sf snapshot: uncommitted changes after 56m inactivity
This commit is contained in:
parent
37ebfcf53a
commit
3ffd882c8c
33 changed files with 813 additions and 2437 deletions
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN
.sf/backups/db/sf.db.2026-05-10T14-47-54-645Z
Normal file
BIN
.sf/backups/db/sf.db.2026-05-10T14-47-54-645Z
Normal file
Binary file not shown.
BIN
.sf/backups/db/sf.db.2026-05-10T15-16-24-679Z
Normal file
BIN
.sf/backups/db/sf.db.2026-05-10T15-16-24-679Z
Normal file
Binary file not shown.
BIN
.sf/metrics.db
BIN
.sf/metrics.db
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
|
@ -86,5 +86,17 @@
|
|||
"successRate": 1,
|
||||
"total": 2
|
||||
}
|
||||
},
|
||||
"execute-task": {
|
||||
"minimax/MiniMax-M2.7-highspeed": {
|
||||
"successes": 1,
|
||||
"failures": 0,
|
||||
"timeouts": 0,
|
||||
"totalTokens": 12233288,
|
||||
"totalCost": 0.3431336426,
|
||||
"lastUsed": "2026-05-10T15:16:08.120Z",
|
||||
"successRate": 1,
|
||||
"total": 1
|
||||
}
|
||||
}
|
||||
}
|
||||
244
.sf/safety/evidence-M001-6377a4-S03-T01.json
Normal file
244
.sf/safety/evidence-M001-6377a4-S03-T01.json
Normal file
|
|
@ -0,0 +1,244 @@
|
|||
[
|
||||
{
|
||||
"kind": "edit",
|
||||
"toolCallId": "tool_Ca8DVl3y7fTqNLC5XPq0Pwk1",
|
||||
"path": "src/resources/extensions/sf/verification-evidence.js",
|
||||
"timestamp": 1778424928174
|
||||
},
|
||||
{
|
||||
"kind": "edit",
|
||||
"toolCallId": "tool_xVkmHZTHvJP7RwJWVqUORsz4",
|
||||
"path": "src/resources/extensions/sf/verification-evidence.js",
|
||||
"timestamp": 1778424949674
|
||||
},
|
||||
{
|
||||
"kind": "edit",
|
||||
"toolCallId": "tool_EiDPzaZO49a4LKnYvuvFaS8e",
|
||||
"path": "src/resources/extensions/sf/auto-verification.js",
|
||||
"timestamp": 1778424983294
|
||||
},
|
||||
{
|
||||
"kind": "edit",
|
||||
"toolCallId": "tool_pNt9nP10Us3CPrsqlnWwtQ8l",
|
||||
"path": "src/resources/extensions/sf/auto-verification.js",
|
||||
"timestamp": 1778425005515
|
||||
},
|
||||
{
|
||||
"kind": "edit",
|
||||
"toolCallId": "tool_Bl3x74Ojz6aenqD3nYqxkdlO",
|
||||
"path": "src/resources/extensions/sf/auto-verification.js",
|
||||
"timestamp": 1778425108830
|
||||
},
|
||||
{
|
||||
"kind": "edit",
|
||||
"toolCallId": "tool_RHLdM0SZK4ffIIokuqNruHbn",
|
||||
"path": "src/resources/extensions/sf/auto-verification.js",
|
||||
"timestamp": 1778425162119
|
||||
},
|
||||
{
|
||||
"kind": "edit",
|
||||
"toolCallId": "tool_mAdgaYCgksHmjAI45ZuSnMk5",
|
||||
"path": "src/resources/extensions/sf/auto-verification.js",
|
||||
"timestamp": 1778425187240
|
||||
},
|
||||
{
|
||||
"kind": "edit",
|
||||
"toolCallId": "tool_HMsSokItiWF9y6ctKvFSkyE3",
|
||||
"path": "src/resources/extensions/sf/auto-verification.js",
|
||||
"timestamp": 1778425206204
|
||||
},
|
||||
{
|
||||
"kind": "edit",
|
||||
"toolCallId": "tool_Jbd8uJQ6ZV4PeF8P91s2OvFG",
|
||||
"path": "src/resources/extensions/sf/uok/unit-runtime.js",
|
||||
"timestamp": 1778425258651
|
||||
},
|
||||
{
|
||||
"kind": "edit",
|
||||
"toolCallId": "tool_m1a9UNWqpwBIJvzB9LtlVTBN",
|
||||
"path": "src/cli-status.ts",
|
||||
"timestamp": 1778425298174
|
||||
},
|
||||
{
|
||||
"kind": "edit",
|
||||
"toolCallId": "tool_zcSH4Fx3bOumjphAgYisPyhE",
|
||||
"path": "src/cli-status.ts",
|
||||
"timestamp": 1778425348170
|
||||
},
|
||||
{
|
||||
"kind": "edit",
|
||||
"toolCallId": "tool_UjLWJsxhCI2bAt3kYl4QEhNK",
|
||||
"path": "src/resources/extensions/sf/tests/uok-unit-runtime.test.mjs",
|
||||
"timestamp": 1778425381561
|
||||
},
|
||||
{
|
||||
"kind": "edit",
|
||||
"toolCallId": "tool_1owe7a26pVq3k18x59p6Sy1J",
|
||||
"path": "src/resources/extensions/sf/tests/uok-unit-runtime.test.mjs",
|
||||
"timestamp": 1778425414995
|
||||
},
|
||||
{
|
||||
"kind": "bash",
|
||||
"toolCallId": "tool_zgJvYKclL8xmY8DE4c8nluTk",
|
||||
"command": "npx vitest run src/resources/extensions/sf/tests/uok-unit-runtime.test.mjs --config vitest.config.ts -t \"getRecoveryDiagnostics\"",
|
||||
"exitCode": 0,
|
||||
"outputSnippet": "\n\u001b[1m\u001b[30m\u001b[46m RUN \u001b[49m\u001b[39m\u001b[22m \u001b[36mv4.1.5 \u001b[39m\u001b[90m/home/mhugo/code/singularity-forge\u001b[39m\n\n \u001b[32m✓\u001b[39m src/resources/extensions/sf/tests/uok-unit-runtime.test.mjs \u001b[2m(\u001b[22m\u001b[2m38 tests\u001b[22m\u001b[2m | \u001b[22m\u001b[33m35 skipped\u001b[39m\u001b[2m)\u001b[22m\u001b[32m 33\u001b[2mms\u001b[22m\u001b[39m\n\n\u001b[2m Test Files \u001b[22m \u001b[1m\u001b[32m1 passed\u001b[39m\u001b[22m\u001b[90m (1)\u001b[39m\n\u001b[2m Tests \u001b[22m \u001b[1m\u001b[32m3 passed\u001b[39m\u001b[22m\u001b[2m | \u001b[22m\u001b[33m35 skipped\u001b[39m\u001b[90m (38)\u001b[39m\n\u001b[2m Start at \u001b[22m 17:03:54\n\u001b[2m Duration \u001b[22m 1.01s\u001b[2m (transfor",
|
||||
"timestamp": 1778425433765
|
||||
},
|
||||
{
|
||||
"kind": "bash",
|
||||
"toolCallId": "tool_c9hBKD3D1Ojayl25rr7Mu3UW",
|
||||
"command": "npx vitest run src/resources/extensions/sf/tests/uok-unit-runtime.test.mjs --config vitest.config.ts",
|
||||
"exitCode": 0,
|
||||
"outputSnippet": "\n\u001b[1m\u001b[30m\u001b[46m RUN \u001b[49m\u001b[39m\u001b[22m \u001b[36mv4.1.5 \u001b[39m\u001b[90m/home/mhugo/code/singularity-forge\u001b[39m\n\n \u001b[32m✓\u001b[39m src/resources/extensions/sf/tests/uok-unit-runtime.test.mjs \u001b[2m(\u001b[22m\u001b[2m38 tests\u001b[22m\u001b[2m)\u001b[22m\u001b[32m 140\u001b[2mms\u001b[22m\u001b[39m\n\n\u001b[2m Test Files \u001b[22m \u001b[1m\u001b[32m1 passed\u001b[39m\u001b[22m\u001b[90m (1)\u001b[39m\n\u001b[2m Tests \u001b[22m \u001b[1m\u001b[32m38 passed\u001b[39m\u001b[22m\u001b[90m (38)\u001b[39m\n\u001b[2m Start at \u001b[22m 17:04:12\n\u001b[2m Duration \u001b[22m 1.16s\u001b[2m (transform 552ms, setup 0ms, import 742ms, tests 140ms, environment 0ms",
|
||||
"timestamp": 1778425451405
|
||||
},
|
||||
{
|
||||
"kind": "bash",
|
||||
"toolCallId": "tool_jrt7lAB8i6qBOnOePlNhiFR2",
|
||||
"command": "npx biome check src/resources/extensions/sf/verification-evidence.js src/resources/extensions/sf/auto-verification.js src/resources/extensions/sf/uok/unit-runtime.js src/cli-status.ts src/resources/extensions/sf/tests/uok-unit-runtime.test.mjs",
|
||||
"exitCode": 1,
|
||||
"outputSnippet": "src/cli-status.ts format ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n × Formatter would have printed the following content:\n \n 240 240 │ if (!targetUnitId) {\n 241 241 │ const records = listUnitRuntimeRecords(basePath);\n 242 │ - → → → const·mostRecent·=·records.sort((a,·b)·=>\n 243 │ - → → → → (b.updatedAt·??·0)·-·(a.updatedAt·??·0),\n 242 │ + → → → const·mostRecent·=·records.sort(\n 243 │ + → → → → (a,·b)·=>·(b.updat",
|
||||
"timestamp": 1778425466821
|
||||
},
|
||||
{
|
||||
"kind": "bash",
|
||||
"toolCallId": "tool_lpaw0h5RHFke7wyq5gw8sJKp",
|
||||
"command": "npx biome check --write src/resources/extensions/sf/verification-evidence.js src/resources/extensions/sf/auto-verification.js src/resources/extensions/sf/uok/unit-runtime.js src/cli-status.ts src/resources/extensions/sf/tests/uok-unit-runtime.test.mjs",
|
||||
"exitCode": 0,
|
||||
"outputSnippet": "Checked 5 files in 160ms. Fixed 3 files.\n",
|
||||
"timestamp": 1778425501417
|
||||
},
|
||||
{
|
||||
"kind": "bash",
|
||||
"toolCallId": "tool_so1U8wV32tA70qs55EtB7Pke",
|
||||
"command": "npx vitest run src/resources/extensions/sf/tests/uok-unit-runtime.test.mjs --config vitest.config.ts",
|
||||
"exitCode": 0,
|
||||
"outputSnippet": "\n\u001b[1m\u001b[30m\u001b[46m RUN \u001b[49m\u001b[39m\u001b[22m \u001b[36mv4.1.5 \u001b[39m\u001b[90m/home/mhugo/code/singularity-forge\u001b[39m\n\n \u001b[32m✓\u001b[39m src/resources/extensions/sf/tests/uok-unit-runtime.test.mjs \u001b[2m(\u001b[22m\u001b[2m38 tests\u001b[22m\u001b[2m)\u001b[22m\u001b[32m 152\u001b[2mms\u001b[22m\u001b[39m\n\n\u001b[2m Test Files \u001b[22m \u001b[1m\u001b[32m1 passed\u001b[39m\u001b[22m\u001b[90m (1)\u001b[39m\n\u001b[2m Tests \u001b[22m \u001b[1m\u001b[32m38 passed\u001b[39m\u001b[22m\u001b[90m (38)\u001b[39m\n\u001b[2m Start at \u001b[22m 17:05:15\n\u001b[2m Duration \u001b[22m 1.16s\u001b[2m (transform 554ms, setup 0ms, import 758ms, tests 152ms, environment 0ms",
|
||||
"timestamp": 1778425514641
|
||||
},
|
||||
{
|
||||
"kind": "bash",
|
||||
"toolCallId": "tool_xBsWuLSrGmQHcVUWydLtioru",
|
||||
"command": "npx tsc --noEmit src/cli-status.ts --skipLibCheck --module NodeNext --moduleResolution NodeNext --target ES2022 --esModuleInterop --allowJs --strict 2>&1 | head -30",
|
||||
"exitCode": 0,
|
||||
"outputSnippet": "error TS5112: tsconfig.json is present but will not be loaded if files are specified on commandline. Use '--ignoreConfig' to skip this error.\n",
|
||||
"timestamp": 1778425532429
|
||||
},
|
||||
{
|
||||
"kind": "bash",
|
||||
"toolCallId": "tool_lnBQh90rfwsYyVvqmIdq8pH7",
|
||||
"command": "npx tsc --noEmit --project tsconfig.json --skipLibCheck 2>&1 | grep -E \"cli-status|auto-verification|unit-runtime|verification-evidence\" | head -20",
|
||||
"exitCode": 0,
|
||||
"outputSnippet": "src/cli-status.ts(237,4): error TS7016: Could not find a declaration file for module './resources/extensions/sf/uok/unit-runtime.js'. '/home/mhugo/code/singularity-forge/src/resources/extensions/sf/uok/unit-runtime.js' implicitly has an 'any' type.\nsrc/cli-status.ts(243,6): error TS7006: Parameter 'a' implicitly has an 'any' type.\nsrc/cli-status.ts(243,9): error TS7006: Parameter 'b' implicitly has an 'any' type.\n",
|
||||
"timestamp": 1778425546375
|
||||
},
|
||||
{
|
||||
"kind": "bash",
|
||||
"toolCallId": "tool_L5EQX39mZFzmJbRkCACrnkcM",
|
||||
"command": "npx tsc --noEmit --project tsconfig.json --skipLibCheck 2>&1 | grep \" Could not find a declaration file\" | head -10",
|
||||
"exitCode": 0,
|
||||
"outputSnippet": "src/cli-status.ts(237,4): error TS7016: Could not find a declaration file for module './resources/extensions/sf/uok/unit-runtime.js'. '/home/mhugo/code/singularity-forge/src/resources/extensions/sf/uok/unit-runtime.js' implicitly has an 'any' type.\n",
|
||||
"timestamp": 1778425700527
|
||||
},
|
||||
{
|
||||
"kind": "edit",
|
||||
"toolCallId": "tool_bMteIOMYNzppZgJRP2bDK14p",
|
||||
"path": "src/cli-status.ts",
|
||||
"timestamp": 1778425760653
|
||||
},
|
||||
{
|
||||
"kind": "bash",
|
||||
"toolCallId": "tool_kja8VArplGz7aqlFb1rZvh9M",
|
||||
"command": "npx tsc --noEmit --project tsconfig.json --skipLibCheck 2>&1 | grep -E \"cli-status|auto-verification|unit-runtime|verification-evidence\" | head -20",
|
||||
"exitCode": 0,
|
||||
"outputSnippet": "src/cli-status.ts(237,4): error TS7016: Could not find a declaration file for module './resources/extensions/sf/uok/unit-runtime.js'. '/home/mhugo/code/singularity-forge/src/resources/extensions/sf/uok/unit-runtime.js' implicitly has an 'any' type.\n",
|
||||
"timestamp": 1778425779047
|
||||
},
|
||||
{
|
||||
"kind": "bash",
|
||||
"toolCallId": "tool_xAMnWlYjFmPHJj2zb3YklZhI",
|
||||
"command": "npx tsc --noEmit --project tsconfig.json --skipLibCheck 2>&1 | grep \"TS7016\" | head -20",
|
||||
"exitCode": 0,
|
||||
"outputSnippet": "src/cli-status.ts(237,4): error TS7016: Could not find a declaration file for module './resources/extensions/sf/uok/unit-runtime.js'. '/home/mhugo/code/singularity-forge/src/resources/extensions/sf/uok/unit-runtime.js' implicitly has an 'any' type.\n",
|
||||
"timestamp": 1778425808025
|
||||
},
|
||||
{
|
||||
"kind": "bash",
|
||||
"toolCallId": "tool_N2K96iYZY9GgeXRHbsVIDJFn",
|
||||
"command": "ls src/resources/extensions/sf/autonomous-command-args.* src/resources/extensions/sf/repo-identity.* src/resources/extensions/sf/trace-collector.*",
|
||||
"exitCode": 0,
|
||||
"outputSnippet": "src/resources/extensions/sf/autonomous-command-args.d.ts\nsrc/resources/extensions/sf/autonomous-command-args.js\nsrc/resources/extensions/sf/repo-identity.d.ts\nsrc/resources/extensions/sf/repo-identity.js\nsrc/resources/extensions/sf/trace-collector.d.ts\nsrc/resources/extensions/sf/trace-collector.js\n",
|
||||
"timestamp": 1778425827433
|
||||
},
|
||||
{
|
||||
"kind": "write",
|
||||
"toolCallId": "tool_d6oIGASndyhbsF3ethH49BBC",
|
||||
"path": "src/resources/extensions/sf/uok/unit-runtime.d.ts",
|
||||
"timestamp": 1778425847941
|
||||
},
|
||||
{
|
||||
"kind": "bash",
|
||||
"toolCallId": "tool_8ipcG5bydJIBFOOfadbuRNzl",
|
||||
"command": "npx tsc --noEmit --project tsconfig.json --skipLibCheck 2>&1 | grep -E \"cli-status|unit-runtime\" | head -20",
|
||||
"exitCode": 0,
|
||||
"outputSnippet": "(no output)",
|
||||
"timestamp": 1778425863933
|
||||
},
|
||||
{
|
||||
"kind": "bash",
|
||||
"toolCallId": "tool_oas0rGaFmZW4i6dQZDfNbIOj",
|
||||
"command": "npx tsc --noEmit --project tsconfig.json --skipLibCheck 2>&1 | head -20",
|
||||
"exitCode": 0,
|
||||
"outputSnippet": "(no output)",
|
||||
"timestamp": 1778425882297
|
||||
},
|
||||
{
|
||||
"kind": "bash",
|
||||
"toolCallId": "tool_bWplHSK7Xw5d98Gzhf69rI3E",
|
||||
"command": "npx tsc --noEmit --project tsconfig.extensions.json --skipLibCheck 2>&1 | head -20",
|
||||
"exitCode": 0,
|
||||
"outputSnippet": "packages/tui/src/index.ts(52,34): error TS6142: Module './ink-bridge.js' was resolved to '/home/mhugo/code/singularity-forge/packages/tui/src/ink-bridge.tsx', but '--jsx' is not set.\npackages/tui/src/tui.ts(14,34): error TS6142: Module './ink-bridge.js' was resolved to '/home/mhugo/code/singularity-forge/packages/tui/src/ink-bridge.tsx', but '--jsx' is not set.\npackages/tui/src/tui.ts(434,46): error TS7006: Parameter 'data' implicitly has an 'any' type.\n",
|
||||
"timestamp": 1778425897974
|
||||
},
|
||||
{
|
||||
"kind": "bash",
|
||||
"toolCallId": "tool_R9DLuMbsvg7Lz87cGbyzuS01",
|
||||
"command": "npx biome check src/resources/extensions/sf/verification-evidence.js src/resources/extensions/sf/auto-verification.js src/resources/extensions/sf/uok/unit-runtime.js src/resources/extensions/sf/uok/unit-runtime.d.ts src/cli-status.ts src/resources/extensions/sf/tests/uok-unit-runtime.test.mjs",
|
||||
"exitCode": 1,
|
||||
"outputSnippet": "src/resources/extensions/sf/uok/unit-runtime.d.ts format ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n × Formatter would have printed the following content:\n \n 28 28 │ ): RecoveryDiagnostics | null;\n 29 29 │ \n 30 │ - export·function·listUnitRuntimeRecords(basePath:·string):·Array<\n 31 │ - → Record<string,·unknown>·&·{·updatedAt?:·number;·unitId:·string·}\n 32 │ - >;\n 30 │ + export·function·listUnitRuntimeRecords(\n 31 │ + → basePath:·string,\n 32 │ + ",
|
||||
"timestamp": 1778425942347
|
||||
},
|
||||
{
|
||||
"kind": "bash",
|
||||
"toolCallId": "tool_nQuAxzeREvyod0nBGyVPIvfJ",
|
||||
"command": "npx biome check --write src/resources/extensions/sf/uok/unit-runtime.d.ts",
|
||||
"exitCode": 0,
|
||||
"outputSnippet": "Checked 1 file in 17ms. Fixed 1 file.\n",
|
||||
"timestamp": 1778425957743
|
||||
},
|
||||
{
|
||||
"kind": "bash",
|
||||
"toolCallId": "tool_HZWYvGOveamvyW97wbv5A0Fv",
|
||||
"command": "npx biome check src/resources/extensions/sf/verification-evidence.js src/resources/extensions/sf/auto-verification.js src/resources/extensions/sf/uok/unit-runtime.js src/resources/extensions/sf/uok/unit-runtime.d.ts src/cli-status.ts src/resources/extensions/sf/tests/uok-unit-runtime.test.mjs",
|
||||
"exitCode": 0,
|
||||
"outputSnippet": "Checked 6 files in 70ms. No fixes applied.\n",
|
||||
"timestamp": 1778425970841
|
||||
},
|
||||
{
|
||||
"kind": "bash",
|
||||
"toolCallId": "tool_SY84S3XYIIBJGZvUchG4qSRZ",
|
||||
"command": "npx vitest run src/resources/extensions/sf/tests/uok-unit-runtime.test.mjs --config vitest.config.ts",
|
||||
"exitCode": 0,
|
||||
"outputSnippet": "\n\u001b[1m\u001b[30m\u001b[46m RUN \u001b[49m\u001b[39m\u001b[22m \u001b[36mv4.1.5 \u001b[39m\u001b[90m/home/mhugo/code/singularity-forge\u001b[39m\n\n \u001b[32m✓\u001b[39m src/resources/extensions/sf/tests/uok-unit-runtime.test.mjs \u001b[2m(\u001b[22m\u001b[2m38 tests\u001b[22m\u001b[2m)\u001b[22m\u001b[32m 168\u001b[2mms\u001b[22m\u001b[39m\n\n\u001b[2m Test Files \u001b[22m \u001b[1m\u001b[32m1 passed\u001b[39m\u001b[22m\u001b[90m (1)\u001b[39m\n\u001b[2m Tests \u001b[22m \u001b[1m\u001b[32m38 passed\u001b[39m\u001b[22m\u001b[90m (38)\u001b[39m\n\u001b[2m Start at \u001b[22m 17:13:05\n\u001b[2m Duration \u001b[22m 1.11s\u001b[2m (transform 477ms, setup 0ms, import 702ms, tests 168ms, environment 0ms",
|
||||
"timestamp": 1778425983954
|
||||
},
|
||||
{
|
||||
"kind": "bash",
|
||||
"toolCallId": "tool_SdyaSTc9h2LJD9pq5ZLR3Ehc",
|
||||
"command": "git status --short",
|
||||
"exitCode": 0,
|
||||
"outputSnippet": " D .sf/backups/db/sf.db.2026-05-08T22-42-32-307Z\n D .sf/backups/db/sf.db.2026-05-09T17-40-16-600Z\n D .sf/backups/db/sf.db.2026-05-09T19-41-02-472Z\n M .sf/metrics.db\n M .sf/metrics.db-shm\n M .sf/metrics.db-wal\n D BUILD_PLAN_MILESTONE_MAP.md\n D PRODUCTION_AUDIT_COMPLETE.md\n D QUICK_WINS_IMPLEMENTATION.md\n D TRIAGE_COMPLETE.md\n D TRIAGE_README.md\n D autoresearch.md\n D copilot-thoughts.md\n M packages/coding-agent/src/modes/interactive/interactive-mode.ts\n M packages/tui/src/ink-bridge.tsx\n M package",
|
||||
"timestamp": 1778425999682
|
||||
}
|
||||
]
|
||||
|
|
@ -1,70 +0,0 @@
|
|||
# BUILD_PLAN → Milestone Map
|
||||
|
||||
Every BUILD_PLAN.md tier item mapped to a milestone. **Rule D015**: every new milestone must cite which BUILD_PLAN tier/item it implements.
|
||||
|
||||
This file answers **where work belongs**, not **whether code is done**. "Mapped" means a BUILD_PLAN item has a milestone/slice home. It does **not** mean the implementation is verified in the current repo.
|
||||
|
||||
## Mapping vs. code truth
|
||||
|
||||
- **Mapped** — the item has a milestone/slice destination.
|
||||
- **Verified in code** — the behavior exists in the repo and has evidence/tests/artifacts.
|
||||
- **Open** — still planned or partially folded in, but not yet verified as complete.
|
||||
- **Deferred** — intentionally out of the active plan.
|
||||
|
||||
---
|
||||
|
||||
## High-level milestone direction
|
||||
|
||||
These are the strategy bands above the itemized mapping:
|
||||
|
||||
1. **Core foundation** — UOK, purpose-driven TDD, eight-field PDD gate, repo-local state
|
||||
2. **Single-repo sharpening** — adopt the best execution/workflow ideas from pi-mono, gsd-2, Claude Code, Codex, Aider, and Plandex where they strengthen Forge
|
||||
3. **Autonomous reliability** — evidence, recovery, verification, and self-improvement loops
|
||||
4. **Surface coherence** — CLI, TUI, docs, and workflow language all reflect the same UOK-driven model
|
||||
5. **ACE convergence prep** — keep concepts compatible with ACE Coder without turning Forge into the multi-repo system
|
||||
|
||||
---
|
||||
|
||||
## Tier 0 — Pi-mono ports → **M006**
|
||||
## Tier 0.5 — gsd-2 ports → **M006 + M007**
|
||||
|
||||
All mapped. See BUILD_PLAN.md for item-level status.
|
||||
|
||||
## Tier 1 — ESSENTIAL → **ALL MAPPED**
|
||||
|
||||
| Item | Milestone | Slice | Status |
|
||||
|---|---|---|---|
|
||||
| 1.1 Vault secret resolver | **M017-yf67h6** | S01-S03 | ⬜ NEW |
|
||||
| 1.2 Singularity Memory integration | **M017-jpw5jo** | S01-S03 | ⬜ NEW |
|
||||
| 1.3 Schema reconciliation (spec rewrite) | **M013** | S12 | ⬜ Folded in |
|
||||
| 1.4 Config schema alignment | **M013** | S13 | ⬜ Folded in |
|
||||
|
||||
## Tier 2 — STRONG → **ALL MAPPED**
|
||||
|
||||
| Item | Milestone | Slice | Status |
|
||||
|---|---|---|---|
|
||||
| 2.1 Persistent agents v1 | M012 | S01-S05 | ⬜ |
|
||||
| 2.2 Doc-sync sub-step | M009 | S08 | ⬜ |
|
||||
| 2.3 Intent chapters | M013 | S08 | ⬜ |
|
||||
| 2.4 PhaseReview 3-pass | M016 | S01-S02 | ⬜ |
|
||||
| 2.5 turn_status marker | M013 | S09 | ⬜ |
|
||||
| 2.6 last_error cap | M013 | S10 | ⬜ |
|
||||
| 2.7 cost_micro_usd | M013 | S11 | ⬜ |
|
||||
|
||||
## Tier 3+ → **Deferred by design**
|
||||
|
||||
---
|
||||
|
||||
## Summary
|
||||
|
||||
| Tier | Mapped | Gap |
|
||||
|---|---|---|
|
||||
| Tier 0 | 10 (M006) | 0 |
|
||||
| Tier 0.5 | 17 (M006+M007) | 0 |
|
||||
| **Tier 1** | **4** (M017×2, M013×2) | **0** |
|
||||
| Tier 2 | 7 (M012, M009, M013, M016) | 0 |
|
||||
| Tier 3+ | 0 | deferred |
|
||||
|
||||
**Zero mapping gaps.** Every BUILD_PLAN tier item is either mapped to a milestone or explicitly deferred.
|
||||
|
||||
That does **not** mean zero implementation gaps. Open `TODO`, `NEW`, and `⬜` markers in `BUILD_PLAN.md`, this map, and milestone artifacts still represent real work until they are reconciled against code evidence.
|
||||
|
|
@ -1,440 +0,0 @@
|
|||
# Complete Long-Term Production-Grade Audit
|
||||
|
||||
**Scope:** All UOK kernel, gate system, execution graph, message bus, diagnostics, metrics, and supporting infrastructure
|
||||
**Date:** 2026-05-08
|
||||
**Grade Scale:** S (exceptional) → A (production) → B (needs work) → C (risky) → D (broken)
|
||||
|
||||
---
|
||||
|
||||
## Executive Summary
|
||||
|
||||
| Module | Grade | Verdict |
|
||||
|--------|-------|---------|
|
||||
| `uok/kernel.js` | **A** | Clean lifecycle, parity recovery, audit envelope, signal handling |
|
||||
| `uok/gate-runner.js` | **A** | Circuit breaker, retry matrix, memory enrichment, degradation logging |
|
||||
| `uok/audit.js` | **A** | Atomic writes, stale-write detection, dual persistence (JSONL + DB) |
|
||||
| `uok/contracts.js` | **A** | Complete JSDoc types, runtime validation, clear interfaces |
|
||||
| `uok/flags.js` | **A** | Clean preference resolution, all features toggleable |
|
||||
| `uok/loop-adapter.js` | **A** | Turn observer, gitops integration, writer tokens, timeout, documented | None |
|
||||
| `uok/parity-report.js` | **A** | Deep parity analysis, orphaned run recovery, ledger reconciliation, malformed logging |
|
||||
| `uok/message-bus.js` | **A** | Durable SQLite, deduplication, auto-compact, periodic refresh | Cache drift eliminated |
|
||||
| `uok/cost-guard-gate.js` | **A** | Actual cost lookup, rolling window, high-tier failure detection, cheaper alternative suggestion |
|
||||
| `uok/security-gate.js` | **A** | Secret scan integration, timeout, graceful skip when script missing |
|
||||
| `uok/plan-v2.js` | **A** | Graph compilation, artifact validation, cycle detection, context gating | None |
|
||||
| `uok/execution-graph.js` | **A** | Topological sort, conflict detection, parallel scheduling with deadlock detection |
|
||||
| `uok/unit-runtime.js` | **A** | Complete lifecycle, retry budgets, LRU cache, durable reconciliation | None |
|
||||
| `uok/diagnostic-synthesis.js` | **A** | Process tree analysis, multi-source correlation, actionable recommendations | None |
|
||||
| `uok/metrics-exposition.js` | **A** | Prometheus format, caching, circuit breaker + latency + message bus metrics | Superseded by metrics-central.js |
|
||||
| `uok/chaos-monkey.js` | **A** | Latency, partial failure, disk, memory stress; all recoverable, all logged | None |
|
||||
| `uok/writer.js` | **A** | Atomic sequence tracking, token lifecycle, disk persistence, TTL | None |
|
||||
| `sf-db.js` | **A** | Single-writer invariant, WAL mode, statement cache, schema v45, query timeout, split entry point | metrics-central.js for unified sink |
|
||||
|
||||
**Overall Grade: A** — Production-ready. All scaling concerns addressed.
|
||||
|
||||
---
|
||||
|
||||
## 1. `uok/kernel.js` — Grade A
|
||||
|
||||
### Strengths
|
||||
- Clean async lifecycle: enter → run → exit, with `finally` block guarantee
|
||||
- `recordUokKernelTermination()` handles signal cleanup (symmetrical with enter)
|
||||
- Parity recovery: checks previous report for missing exits, drains them
|
||||
- Audit envelope: emits structured events on kernel enter/exit
|
||||
- workMode + modelMode propagated into lifecycleFlags and audit payload
|
||||
- `debugLog()` for non-fatal diagnostics without breaking orchestration
|
||||
|
||||
### Production Concerns: None critical
|
||||
|
||||
### Minor
|
||||
- `runAutoLoopWithUok()` is 120+ lines — could extract helper functions for readability
|
||||
- `decoratedDeps` spreads all deps — no validation that required deps exist
|
||||
|
||||
---
|
||||
|
||||
## 2. `uok/gate-runner.js` — Grade A
|
||||
|
||||
### Strengths
|
||||
- Circuit breaker with exponential backoff: `openDurationMs * 2^streak`
|
||||
- Half-open state with attempt limiting — proper gradual recovery
|
||||
- Retry matrix per failure class: `execution`/`artifact`/`verification` get 1 retry, `timeout` gets 2
|
||||
- Memory enrichment: queries historical patterns for gate failures (degrades gracefully)
|
||||
- Every gate run persisted to DB + audit event emitted
|
||||
- Unknown gates get `manual-attention` outcome (fail-closed)
|
||||
|
||||
### Production Concerns: None critical
|
||||
|
||||
### Minor
|
||||
- `computeGateEmbedding()` uses a simple hash — not a real semantic embedding
|
||||
- `enrichGateResultWithMemory()` silently degrades on DB failure (correct behavior, but could log)
|
||||
|
||||
---
|
||||
|
||||
## 3. `uok/audit.js` — Grade A
|
||||
|
||||
### Strengths
|
||||
- Atomic writes via `withFileLockSync()` with `onLocked: "skip"` (best-effort)
|
||||
- Stale-write detection via `isStaleWrite("uok-audit")` — prevents superseded turns from polluting log
|
||||
- Dual persistence: JSONL for local durability, SQLite for querying
|
||||
- `closeSync(openSync(path, "a"))` touch pattern ensures lock target exists
|
||||
- Schema version in envelope for future migration
|
||||
|
||||
### Production Concerns: None critical
|
||||
|
||||
---
|
||||
|
||||
## 4. `uok/contracts.js` — Grade A
|
||||
|
||||
### Strengths
|
||||
- Complete JSDoc typedefs for all UOK types
|
||||
- `validateGate()` catches registration-time mistakes
|
||||
- Clear separation: `UokContext` (input), `GateResult` (output), `Gate` (interface)
|
||||
|
||||
### Production Concerns: None
|
||||
|
||||
---
|
||||
|
||||
## 5. `uok/flags.js` — Grade A
|
||||
|
||||
### Strengths
|
||||
- All UOK features toggleable via preferences
|
||||
- Clean resolution: `uok?.security_guard?.enabled ?? true`
|
||||
- `resolvePermissionProfile()` for canonical permission profile
|
||||
|
||||
### Production Concerns: None
|
||||
|
||||
---
|
||||
|
||||
## 6. `uok/loop-adapter.js` — Grade A
|
||||
|
||||
### Strengths
|
||||
- Turn observer pattern: `onTurnStart`, `onPhaseResult`, `onTurnResult`
|
||||
- Gitops integration: writes transaction records per phase with 10s timeout
|
||||
- Writer token acquisition/release for sequence tracking
|
||||
- Chaos monkey strikes at phase boundaries
|
||||
- Audit events for turn start/result
|
||||
- `nextSequenceMetadata()` fully documented with JSDoc
|
||||
|
||||
### Production Concerns: None critical
|
||||
|
||||
### Fixed ✅
|
||||
- ✅ Gitops timeout: `writeGitTransactionWithTimeout()` with 10s `Promise.race()`
|
||||
- ✅ `nextSequenceMetadata()` documented: sequence is optional when no token active
|
||||
|
||||
---
|
||||
|
||||
## 7. `uok/parity-report.js` — Grade A
|
||||
|
||||
### Strengths
|
||||
- Deep parity analysis: compares heartbeat events, ledger runs, diff events
|
||||
- Orphaned run recovery: `recoverOrphanedStartedLedgerRuns()` closes stale DB runs
|
||||
- Live process detection: `hasLiveAutoLock()` uses `process.kill(pid, 0)`
|
||||
- Fresh vs historical mismatch separation
|
||||
- Divergence tracking by plane: `plan`, `graph`, `model-policy`, `audit-envelope`, `gitops`
|
||||
- `shallowEqualDecisions()` for comparing legacy vs UOK outputs
|
||||
|
||||
### Production Concerns: None critical
|
||||
|
||||
### Fixed ✅
|
||||
- ✅ Malformed line logging: `parseParityEvents()` now logs dropped count to stderr
|
||||
- `UNMATCHED_RUN_STALE_MS = 30min` — appropriate for most cases
|
||||
|
||||
---
|
||||
|
||||
## 8. `uok/message-bus.js` — Grade A
|
||||
|
||||
### Strengths
|
||||
- Durable SQLite storage with configurable retention
|
||||
- Deterministic message IDs for idempotent `sendOnce()`
|
||||
- Auto-compaction when message count exceeds threshold
|
||||
- Per-agent inbox with read tracking and auto-refresh (30s interval)
|
||||
- Conversation query between two agents
|
||||
|
||||
### Production Concerns: None critical
|
||||
|
||||
### Fixed ✅
|
||||
- ✅ Cache drift: `_maybeRefresh()` auto-refreshes from DB every 30s on `list()`, `markRead()`, `unreadCount`
|
||||
- ✅ `sendOnce()` idempotency: Pre-checks inbox before insert; returns existing ID if found
|
||||
|
||||
---
|
||||
|
||||
## 9. `uok/cost-guard-gate.js` — Grade A
|
||||
|
||||
### Strengths
|
||||
- Actual cost lookup from `BUNDLED_COST_TABLE`
|
||||
- Rolling 1-hour window spend check
|
||||
- High-tier model failure pattern detection
|
||||
- Suggests cheaper alternative from same provider/family
|
||||
- Per-unit and per-hour thresholds
|
||||
|
||||
### Production Concerns: None critical
|
||||
|
||||
### Minor
|
||||
- `isHighTierModel()` uses `$0.005/1K tokens` threshold — magic number
|
||||
- `_suggestCheaperAlternative()` could suggest incompatible models (different context window)
|
||||
|
||||
---
|
||||
|
||||
## 10. `uok/security-gate.js` — Grade A
|
||||
|
||||
### Strengths
|
||||
- Runs `scripts/secret-scan.sh --diff HEAD` against changes
|
||||
- 30-second timeout with process kill
|
||||
- Gracefully skips if script missing (pass)
|
||||
- Returns findings on failure
|
||||
|
||||
### Production Concerns: None
|
||||
|
||||
---
|
||||
|
||||
## 11. `uok/plan-v2.js` — Grade A
|
||||
|
||||
### Strengths
|
||||
- Compiles unit graph from milestone/slice/task DB state
|
||||
- Validates artifact presence (CONTEXT.md, RESEARCH.md) before execution entry
|
||||
- Clarify round limit enforcement
|
||||
- Graph output to JSON for inspection
|
||||
- Cycle detection at compile time using Kahn's algorithm
|
||||
|
||||
### Production Concerns: None critical
|
||||
|
||||
### Fixed ✅
|
||||
- ✅ Cycle detection: `detectCycles()` validates graph before execution; returns `hasCycles: true` with clear error
|
||||
|
||||
---
|
||||
|
||||
## 12. `uok/execution-graph.js` — Grade A
|
||||
|
||||
### Strengths
|
||||
- Kahn's algorithm topological sort with deterministic ordering (localeCompare)
|
||||
- File conflict detection: `detectFileConflicts()` finds nodes writing same file
|
||||
- Parallel scheduling with max workers and dependency awareness
|
||||
- Deadlock detection: throws when no ready nodes but graph incomplete
|
||||
- Sidecar queue scheduling with kind-based handlers
|
||||
- `selectReactiveDispatchBatch()` for incremental dispatch
|
||||
|
||||
### Production Concerns: None critical
|
||||
|
||||
---
|
||||
|
||||
## 13. `uok/unit-runtime.js` — Grade A
|
||||
|
||||
### Strengths
|
||||
- Complete lifecycle: queued → claimed → running → progress → completed/failed/blocked/cancelled/stale/runaway-recovered → notified
|
||||
- Retry budgets with `retryBudgetRemaining()`
|
||||
- Durable artifact reconciliation: `reconcileDurableCompleteUnitRuntimeRecords()`
|
||||
- Stale complete-slice cleanup: `reconcileStaleCompleteSliceRecords()`
|
||||
- In-memory cache for repeated reads within dispatch cycle
|
||||
- `inspectExecuteTaskDurability()` checks plan, summary, state, must-haves
|
||||
|
||||
### Production Concerns: None critical
|
||||
|
||||
### Fixed ✅
|
||||
- ✅ Runtime cache bounds: LRU eviction at 5000 entries; removes oldest 20%
|
||||
- `recordUnitOutcomeInMemory()` creates memory entries but no cleanup policy
|
||||
|
||||
---
|
||||
|
||||
## 14. `uok/diagnostic-synthesis.js` — Grade A
|
||||
|
||||
### Strengths
|
||||
- Multi-source correlation: process tree, auto.lock, parity report, DB ledger, runtime projections
|
||||
- Process descendant tracking via `ps` + tree traversal
|
||||
- Classification: healthy | running | quiet-but-healthy | degraded | needs-repair
|
||||
- Actionable recommendations per issue
|
||||
- Publishes to message bus for observer chains
|
||||
- `readUokDiagnostics()` for external consumption
|
||||
|
||||
### Production Concerns: None critical
|
||||
|
||||
---
|
||||
|
||||
## 15. `uok/metrics-exposition.js` — Grade A
|
||||
|
||||
### Strengths
|
||||
- Prometheus text format output
|
||||
- 30-second cache TTL for performance
|
||||
- Gate metrics: runs, passes, fails, retries, latency (avg/p50/p95/max)
|
||||
- Circuit breaker state gauge (0=closed, 1=half-open, 2=open)
|
||||
- Message bus metrics: total, unread, unique agents, conversations
|
||||
- `invalidateMetricsCache()` for cache busting
|
||||
|
||||
### Production Concerns: None
|
||||
|
||||
---
|
||||
|
||||
## 16. `uok/chaos-monkey.js` — Grade A
|
||||
|
||||
### Strengths
|
||||
- Four fault types: latency, partial failure, disk stress, memory stress
|
||||
- All faults are recoverable (no process kill)
|
||||
- All faults are logged to stderr
|
||||
- Configurable probabilities and magnitudes
|
||||
- `getInjectedEvents()` for verification
|
||||
- Immediate cleanup of stress artifacts
|
||||
|
||||
### Production Concerns: None
|
||||
|
||||
---
|
||||
|
||||
## 17. `uok/writer.js` — Grade A
|
||||
|
||||
### Strengths
|
||||
- Atomic sequence tracking via `atomicWriteSync()`
|
||||
- Writer token lifecycle: acquire → use → release
|
||||
- Prevents double-acquisition for same turn
|
||||
- Sequence state persisted to disk
|
||||
- Token crash recovery: persists to `uok-writer-tokens.json` with 5-min TTL
|
||||
|
||||
### Production Concerns: None critical
|
||||
|
||||
### Fixed ✅
|
||||
- ✅ Crash recovery: Tokens persisted to disk; `hasActiveWriterToken()` recovers from disk
|
||||
- ✅ TTL cleanup: Expired tokens auto-purged from memory and disk
|
||||
|
||||
---
|
||||
|
||||
## 18. `sf-db.js` — Grade A
|
||||
|
||||
### Strengths
|
||||
- Single-writer invariant enforced by convention + CI test
|
||||
- WAL mode for file-backed DBs
|
||||
- Statement cache for prepared queries
|
||||
- Schema version 45 with migration path
|
||||
- `normalizeRow()` handles null-prototype objects
|
||||
- Query timeout protection: `withQueryTimeout()` helper (30s default)
|
||||
- Split entry point: `sf-db/index.js` for future modularization
|
||||
- Comprehensive table creation: backlog, schedule, repo profiles, UOK runs, gate runs, audit events, message bus, tasks, verification evidence
|
||||
|
||||
### Production Concerns: None critical
|
||||
|
||||
### Fixed ✅
|
||||
- ✅ Query timeout: `withQueryTimeout()` catches timeout/busy errors, returns fallback
|
||||
- ✅ Split entry point: `sf-db/index.js` re-export created for gradual migration
|
||||
- ✅ Console logging: All modules use `logWarning()` / `logError()` from workflow-logger
|
||||
|
||||
---
|
||||
|
||||
## Cross-Cutting Concerns
|
||||
|
||||
### Observability
|
||||
|
||||
| Module | Metrics | Logs | Traces | Audit |
|
||||
|--------|---------|------|--------|-------|
|
||||
| kernel.js | ❌ | ✅ debugLog | ✅ traceId | ✅ envelope |
|
||||
| gate-runner.js | ✅ DB | ✅ insertGateRun | ✅ traceId/turnId | ✅ envelope |
|
||||
| audit.js | ❌ | ❌ | ✅ eventId | ✅ JSONL+DB |
|
||||
| loop-adapter.js | ❌ | ❌ | ✅ traceId/turnId | ✅ envelope |
|
||||
| parity-report.js | ❌ | ❌ | ❌ | ❌ |
|
||||
| message-bus.js | ✅ DB | ❌ | ❌ | ❌ |
|
||||
| cost-guard-gate.js | ❌ | ❌ | ❌ | ❌ |
|
||||
| unit-runtime.js | ❌ | ❌ | ❌ | ❌ |
|
||||
| diagnostic-synthesis.js | ❌ | ❌ | ❌ | ❌ |
|
||||
| metrics-exposition.js | ✅ Prometheus | ❌ | ❌ | ❌ |
|
||||
| chaos-monkey.js | ❌ | ✅ stderr | ❌ | ❌ |
|
||||
|
||||
**Gap:** Resolved — `metrics-central.js` provides unified Counter/Gauge/Histogram with Prometheus text format. Legacy `metrics-exposition.js` still active for backward compatibility.
|
||||
|
||||
### Security
|
||||
|
||||
| Concern | Status | Notes |
|
||||
|---------|--------|-------|
|
||||
| Input validation | ✅ Good | All entry points validate |
|
||||
| Injection prevention | ✅ Good | Parameterized queries in sf-db |
|
||||
| Secrets scanning | ✅ Good | Security gate runs on every turn |
|
||||
| Cost limits | ✅ Good | Per-unit and per-hour guards |
|
||||
| Circuit breakers | ✅ Good | Exponential backoff on failures |
|
||||
| Chaos engineering | ✅ Good | Opt-in, recoverable faults |
|
||||
|
||||
### Performance
|
||||
|
||||
| Concern | Status | Notes |
|
||||
|---------|--------|-------|
|
||||
| Big-O | ✅ Good | All graph ops are O(V+E) |
|
||||
| Caching | ✅ Good | Metrics cache, runtime cache, statement cache |
|
||||
| Memory | ✅ Good | LRU eviction on runtime cache (5000), bounded message bus inboxes |
|
||||
| DB queries | ✅ Good | Single-writer, WAL mode, prepared statements |
|
||||
| Parallelism | ✅ Good | Max workers capped at 8 |
|
||||
|
||||
### Maintainability
|
||||
|
||||
| Concern | Status | Notes |
|
||||
|---------|--------|-------|
|
||||
| Test coverage | ✅ Good | 139+ tests across all modules |
|
||||
| Documentation | ✅ Good | JSDoc on all exports |
|
||||
| Logging consistency | ✅ Good | All modules use `logWarning()` / `logError()` from workflow-logger |
|
||||
| File organization | ✅ Good | sf-db.js has split entry point; full extraction deferred to v2 |
|
||||
| Schema versioning | ✅ Good | Schema v45 with migrations |
|
||||
|
||||
---
|
||||
|
||||
## Action Plan
|
||||
|
||||
### Before Production (Blockers) — ALL CLEAR ✅
|
||||
|
||||
No blockers identified. All modules are production-ready.
|
||||
|
||||
### Before Scaling to 10+ Workers — ALL FIXED ✅
|
||||
|
||||
1. ✅ **Message bus cache drift** — Added `_maybeRefresh()` with 30s interval; `list()`, `markRead()`, `unreadCount` auto-refresh
|
||||
2. ✅ **Writer token crash recovery** — Persist tokens to `uok-writer-tokens.json`; 5-min TTL; `hasActiveWriterToken()` recovers from disk
|
||||
3. ✅ **Runtime cache bounds** — LRU eviction at 5000 entries; removes oldest 20%
|
||||
|
||||
### Before Next Major Release — ALL FIXABLE ITEMS COMPLETE ✅
|
||||
|
||||
4. ✅ **Split sf-db.js** — Created `sf-db/index.js` re-export entry point; full extraction deferred to v2
|
||||
5. ✅ **Console.warn cleanup** — `context-injector.js`, `vault-resolver.js`, `knowledge-injector.js` now use `logWarning()`
|
||||
6. ✅ **Cycle detection at compile time** — `detectCycles()` in `plan-v2.js` using Kahn's algorithm; returns `hasCycles: true`
|
||||
|
||||
### Implemented ✅
|
||||
|
||||
7. ✅ **Centralized metrics** — `metrics-central.js` with Counter/Gauge/Histogram, Prometheus text format, wired into subagent inheritance and mode transitions
|
||||
|
||||
### Deferred to v2 (Architectural, Not Bugs)
|
||||
|
||||
8. ⚠️ **TypeScript migration** — Convert UOK modules to `.ts` for compile-time safety
|
||||
|
||||
---
|
||||
|
||||
## Appendix: Complete Module Inventory
|
||||
|
||||
### UOK Kernel (18 modules, ~2,800 lines)
|
||||
|
||||
| Module | Lines | Grade | Tests |
|
||||
|--------|-------|-------|-------|
|
||||
| `kernel.js` | 120 | A | ✅ |
|
||||
| `gate-runner.js` | 280 | A | ✅ |
|
||||
| `audit.js` | 80 | A | ✅ |
|
||||
| `contracts.js` | 120 | A | ✅ |
|
||||
| `flags.js` | 40 | A | ✅ |
|
||||
| `loop-adapter.js` | 180 | A | ✅ |
|
||||
| `parity-report.js` | 320 | A | ✅ |
|
||||
| `message-bus.js` | 180 | A | ✅ |
|
||||
| `cost-guard-gate.js` | 140 | A | ✅ |
|
||||
| `security-gate.js` | 60 | A | ✅ |
|
||||
| `plan-v2.js` | 200 | A | ✅ |
|
||||
| `execution-graph.js` | 260 | A | ✅ |
|
||||
| `unit-runtime.js` | 420 | A | ✅ |
|
||||
| `diagnostic-synthesis.js` | 280 | A | ✅ |
|
||||
| `metrics-exposition.js` | 180 | A | ✅ (legacy) |
|
||||
| `chaos-monkey.js` | 140 | A | ✅ |
|
||||
| `writer.js` | 100 | A | ✅ |
|
||||
| `sf-db.js` | 7000+ | A | ✅ |
|
||||
| `metrics-central.js` | 350 | A | ✅ (new) |
|
||||
|
||||
### Mode System (7 modules, ~1,400 lines)
|
||||
|
||||
| Module | Lines | Grade | Tests |
|
||||
|--------|-------|-------|-------|
|
||||
| `operating-model.js` | 120 | A | 13 |
|
||||
| `auto/session.js` | 200 | A- | ✅ |
|
||||
| `task-frontmatter.js` | 311 | A- | 9 |
|
||||
| `subagent-inheritance.js` | 170 | A- | 9 |
|
||||
| `remote-steering.js` | 139 | A- | 7 |
|
||||
| `parallel-intent.js` | 139 | B+ | 6 |
|
||||
| `skills/eval-harness.js` | 139 | A- | 5 |
|
||||
|
||||
**Total: 139 tests passing, 0 failures, 1 skipped.**
|
||||
|
||||
---
|
||||
|
||||
*Audit completed. All modules production-ready. Address scaling items before 10+ workers.*
|
||||
|
|
@ -1,385 +0,0 @@
|
|||
# Quick Wins Implementation - Complete
|
||||
|
||||
**Date:** 2026-05-06
|
||||
**Implemented by:** Copilot CLI
|
||||
**Commit:** 0e2edfdeb
|
||||
**Status:** ✅ COMPLETE - Core infrastructure in place
|
||||
|
||||
## Summary
|
||||
|
||||
Successfully implemented the foundational infrastructure for 3 high-impact quick wins that activate SF's self-evolution learning loop:
|
||||
|
||||
1. **Close Self-Report Feedback Loop** [9/10 impact, 2-3 days to full integration]
|
||||
2. **Activate Continuous Model Learning** [8/10 impact, 3-4 days to full integration]
|
||||
3. **Automate Knowledge Injection** [7/10 impact, 2-3 days to full integration]
|
||||
|
||||
**Total:** 24/30 impact points unlocked through self-evolution infrastructure.
|
||||
|
||||
---
|
||||
|
||||
## Quick Win 1: Close Self-Report Feedback Loop [9/10 Impact]
|
||||
|
||||
### What Was Implemented
|
||||
|
||||
**File:** `src/resources/extensions/sf/self-report-fixer.js` (348 lines)
|
||||
|
||||
**Module:** `SelfReportFixer` with the following capabilities:
|
||||
|
||||
- **Pattern Recognition** — 4 built-in fix patterns:
|
||||
1. `validation-reviewer-rubric` (95% confidence) — Add criterion/gap rubric to validation prompts ✅ *Already fixed*
|
||||
2. `gate-verdict-clarity` (90% confidence) — Document gate verdict semantics
|
||||
3. `env-vars-unvalidated` (85% confidence) — Add SF_* env validation
|
||||
4. `self-report-coverage-gap` (80% confidence) — Implement triage pipeline
|
||||
|
||||
- **Automatic Fix Classification**
|
||||
```js
|
||||
classifyReportFixes(report) // Returns applicable fixes with confidence scores
|
||||
```
|
||||
|
||||
- **High-Confidence Auto-Fix**
|
||||
```js
|
||||
autoFixHighConfidenceReports(basePath, reports)
|
||||
// Applies fixes for confidence > 0.85
|
||||
```
|
||||
|
||||
- **Deduplication**
|
||||
```js
|
||||
dedupReports(reports) // Group related reports by normalized issue key
|
||||
```
|
||||
|
||||
- **Severity Categorization**
|
||||
```js
|
||||
categorizeBySeverity(reports) // blocker | warning | suggestion
|
||||
```
|
||||
|
||||
### Next Steps for Full Integration
|
||||
|
||||
1. Hook into `triage-self-feedback.js` to invoke fixer after triage runs
|
||||
2. Add pattern library for domain-specific fixes (provider routing, timeout tuning, etc.)
|
||||
3. Create integration tests for each fix pattern
|
||||
4. Document feedback loop: report → triage → fix → verification
|
||||
|
||||
### How It Works
|
||||
|
||||
```javascript
|
||||
import { autoFixHighConfidenceReports } from './self-report-fixer.js';
|
||||
|
||||
// After collecting self-reports
|
||||
const reports = readSelfFeedback();
|
||||
|
||||
// Auto-apply high-confidence fixes
|
||||
const { applied, failed, skipped } = await autoFixHighConfidenceReports(
|
||||
projectPath,
|
||||
reports
|
||||
);
|
||||
|
||||
// applied: ["validation-reviewer-rubric: rubric already present"]
|
||||
// failed: ["env-vars-unvalidated: requires schema impl"]
|
||||
// skipped: ["gate-verdict-clarity: confidence 0.9 > threshold 0.85"]
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Quick Win 2: Activate Continuous Model Learning [8/10 Impact]
|
||||
|
||||
### What Was Implemented
|
||||
|
||||
**File:** `src/resources/extensions/sf/model-learner.js` (344 lines)
|
||||
|
||||
**Classes:**
|
||||
|
||||
#### ModelPerformanceTracker
|
||||
Tracks per-task-type model performance with:
|
||||
- Success/failure/timeout counts
|
||||
- Token usage and cost tracking
|
||||
- Success rate calculation
|
||||
- Ranked model sorting
|
||||
|
||||
**Storage:** `.sf/model-performance.json`
|
||||
|
||||
```json
|
||||
{
|
||||
"execute-task": {
|
||||
"gpt-4o": {
|
||||
"successes": 42,
|
||||
"failures": 3,
|
||||
"timeouts": 1,
|
||||
"totalTokens": 1500000,
|
||||
"totalCost": 45.50,
|
||||
"lastUsed": "2026-05-06T16:30:00Z",
|
||||
"successRate": 0.93
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**API:**
|
||||
```js
|
||||
tracker.recordOutcome(taskType, modelId, { success, timeout, tokensUsed, costUsd })
|
||||
tracker.getRankedModels(taskType, minSamples = 3) // Returns sorted by success rate
|
||||
tracker.shouldDemote(taskType, modelId, threshold = 0.5) // Demote if failure >50%
|
||||
tracker.getABTestCandidates(taskType) // For hypothesis testing
|
||||
```
|
||||
|
||||
#### FailureAnalyzer
|
||||
Categorizes and analyzes failure modes:
|
||||
- Logs failures to JSONL
|
||||
- Detects patterns (e.g., timeout-prone models)
|
||||
- Provides failure summaries per model
|
||||
|
||||
**Storage:** `.sf/model-failure-log.jsonl`
|
||||
|
||||
```json
|
||||
{
|
||||
"timestamp": "2026-05-06T16:30:00Z",
|
||||
"taskType": "execute-task",
|
||||
"modelId": "gpt-4o",
|
||||
"reason": "quality_check_failed",
|
||||
"timeout": false,
|
||||
"tokensUsed": 25000,
|
||||
"context": { ... }
|
||||
}
|
||||
```
|
||||
|
||||
**API:**
|
||||
```js
|
||||
analyzer.logFailure(taskType, modelId, { reason, timeout, tokensUsed, context })
|
||||
analyzer.getFailureSummary(taskType, modelId) // Returns { reasons, patterns }
|
||||
```
|
||||
|
||||
### Main API: ModelLearner
|
||||
|
||||
```javascript
|
||||
import { ModelLearner } from './model-learner.js';
|
||||
|
||||
const learner = new ModelLearner(projectPath);
|
||||
|
||||
// Record successful outcome
|
||||
learner.recordOutcome('execute-task', 'claude-opus', {
|
||||
success: true,
|
||||
tokensUsed: 15000,
|
||||
costUsd: 0.50,
|
||||
});
|
||||
|
||||
// Record failure
|
||||
learner.logFailure('execute-task', 'gpt-4o', {
|
||||
reason: 'quality_check_failed',
|
||||
timeout: false,
|
||||
tokensUsed: 25000,
|
||||
});
|
||||
|
||||
// Get ranked models (for intelligent routing)
|
||||
const rankedModels = learner.getRankedModels('execute-task');
|
||||
// [
|
||||
// { modelId: 'claude-opus', successRate: 0.98, attempts: 50, ... },
|
||||
// { modelId: 'gpt-4o', successRate: 0.90, attempts: 40, ... }
|
||||
// ]
|
||||
|
||||
// A/B test decision
|
||||
const abTest = learner.getABTestCandidates('execute-task');
|
||||
// { incumbent: claude-opus, challengers: [gpt-4o, gemini-pro], testBudget: 10 }
|
||||
|
||||
// Analyze A/B results and decide promotion/demotion
|
||||
const decision = learner.analyzeABTest('execute-task', {
|
||||
incumbentWins: 8,
|
||||
challengerWins: 2,
|
||||
});
|
||||
// { recommendation: "continue", reason: "incumbent 0.80 vs challenger 0.20" }
|
||||
```
|
||||
|
||||
### Next Steps for Full Integration
|
||||
|
||||
1. Integrate into `auto-dispatch.ts` outcome logging
|
||||
2. Hook into `model-router.ts` to use ranked models for routing decisions
|
||||
3. Implement auto-demotion in model selection logic
|
||||
4. Add A/B testing orchestration for low-risk tasks
|
||||
5. Create dashboard in `benchmark-selector.ts` showing per-model performance
|
||||
|
||||
---
|
||||
|
||||
## Quick Win 3: Automate Knowledge Injection [7/10 Impact]
|
||||
|
||||
### What Was Implemented
|
||||
|
||||
**File:** `src/resources/extensions/sf/knowledge-injector.js` (336 lines)
|
||||
|
||||
**Key Functions:**
|
||||
|
||||
- **Parse Knowledge Base**
|
||||
```js
|
||||
parseKnowledgeEntries(knowledgeContent)
|
||||
// Extracts judgment-log entries with confidence, domain, recommendation
|
||||
```
|
||||
|
||||
- **Semantic Matching**
|
||||
```js
|
||||
extractConcepts(entry) // Extract domain tags, failure modes, constraints
|
||||
semanticSimilarity(concepts, contextKeywords) // Score relevance
|
||||
```
|
||||
|
||||
- **Find Relevant Knowledge**
|
||||
```js
|
||||
findRelevantKnowledge(entries, contextKeywords, minConfidence=0.6, minSimilarity=0.5)
|
||||
// Returns sorted by combined score (confidence × 0.7 + similarity × 0.3)
|
||||
```
|
||||
|
||||
- **Detect Contradictions**
|
||||
```js
|
||||
detectContradictions(entries) // Flag conflicting recommendations
|
||||
```
|
||||
|
||||
- **Format for Injection**
|
||||
```js
|
||||
formatKnowledgeForInjection(relevantKnowledge)
|
||||
// Human-readable markdown with confidence/relevance scores
|
||||
```
|
||||
|
||||
- **Track Usage** (for feedback loop)
|
||||
```js
|
||||
trackKnowledgeUsage(taskId, injectedKnowledge)
|
||||
// Logs which knowledge was used for effectiveness measurement
|
||||
```
|
||||
|
||||
### Integration into auto-prompts.js
|
||||
|
||||
**Modified:** `src/resources/extensions/sf/auto-prompts.js`
|
||||
|
||||
Added:
|
||||
1. Import of knowledge-injector module
|
||||
2. Helper function `getKnowledgeInjection(basePath, taskContext)` with graceful degradation
|
||||
3. Knowledge injection into execute-task prompt with context (domain, keywords, technology)
|
||||
|
||||
**In execute-task prompt loading (line 2203+):**
|
||||
```javascript
|
||||
const knowledgeInjection = await getKnowledgeInjection(base, {
|
||||
domain: "task-execution",
|
||||
taskType: "execute-task",
|
||||
keywords: [tTitle, sTitle, mid, sid],
|
||||
technology: [],
|
||||
});
|
||||
|
||||
return loadPrompt("execute-task", {
|
||||
memoriesSection,
|
||||
knowledgeInjection, // NEW: Relevant prior learning
|
||||
overridesSection,
|
||||
// ... other variables
|
||||
});
|
||||
```
|
||||
|
||||
### Existing Infrastructure
|
||||
|
||||
**Note:** Knowledge injection is **60% complete** via existing `queryKnowledge()` in context-store.js
|
||||
|
||||
- ✅ `inlineKnowledgeScoped()` already exists (uses queryKnowledge)
|
||||
- ✅ Used in both plan-slice and execute-task prompts
|
||||
- ❌ Uses simple keyword matching (not semantic scoring)
|
||||
- ✅ Our new module enhances with semantic similarity
|
||||
|
||||
### Next Steps for Full Integration
|
||||
|
||||
1. Update execute-task and plan-slice prompt templates to include `{{knowledgeInjection}}` variable
|
||||
2. Integrate semantic scoring into queryKnowledge or create parallel path
|
||||
3. Implement feedback loop: track which knowledge was used and measure effectiveness
|
||||
4. Create contradiction resolver UI for conflicting recommendations
|
||||
5. Add knowledge effectiveness metrics to benchmark reports
|
||||
|
||||
---
|
||||
|
||||
## Files Created
|
||||
|
||||
| File | Lines | Purpose |
|
||||
|------|-------|---------|
|
||||
| `src/resources/extensions/sf/self-report-fixer.js` | 348 | Auto-fix high-confidence self-reports |
|
||||
| `src/resources/extensions/sf/model-learner.js` | 344 | Per-task-type model performance tracking |
|
||||
| `src/resources/extensions/sf/knowledge-injector.js` | 336 | Semantic knowledge matching and injection |
|
||||
|
||||
## Files Modified
|
||||
|
||||
| File | Changes | Purpose |
|
||||
|------|---------|---------|
|
||||
| `src/resources/extensions/sf/auto-prompts.js` | +7 lines | Added knowledge injection into execute-task |
|
||||
|
||||
## Build Status
|
||||
|
||||
✅ **Build Success**
|
||||
- All new modules compile without errors
|
||||
- TypeScript types intact
|
||||
- Resources copied to `dist/`
|
||||
- Inventory check passed
|
||||
|
||||
## Testing Recommendations
|
||||
|
||||
Create integration tests for:
|
||||
|
||||
1. **Self-Report Fixer**
|
||||
- Pattern matching accuracy (4 patterns)
|
||||
- Deduplication logic
|
||||
- Confidence thresholding
|
||||
|
||||
2. **Model Learner**
|
||||
- Success rate calculation
|
||||
- Demotion logic (>50% failure rate)
|
||||
- A/B test analysis
|
||||
- Failure pattern detection
|
||||
|
||||
3. **Knowledge Injector**
|
||||
- Semantic similarity scoring
|
||||
- Contradiction detection
|
||||
- Formatting for prompt injection
|
||||
- Graceful degradation (missing KNOWLEDGE.md)
|
||||
|
||||
## Activation Timeline
|
||||
|
||||
**To fully activate these quick wins:**
|
||||
|
||||
1. **Week 1:** Hook model-learner into auto-dispatch outcome logging
|
||||
2. **Week 1:** Integrate self-report-fixer into triage-self-feedback pipeline
|
||||
3. **Week 2:** Implement knowledge injection in model-router for adaptive routing
|
||||
4. **Week 2:** Add A/B testing orchestration for model promotion
|
||||
5. **Week 3:** Create feedback loop dashboard in benchmark-selector
|
||||
6. **Week 3:** Measure impact on learning efficiency
|
||||
|
||||
**Estimated effort:** 8-10 days of focused integration work
|
||||
|
||||
---
|
||||
|
||||
## Key Design Decisions
|
||||
|
||||
1. **Graceful Degradation** — All modules degrade gracefully if knowledge base or tracking files are unavailable
|
||||
2. **Append-Only Logs** — Failure logs use JSONL for durability and analysis
|
||||
3. **Per-Task-Type Tracking** — Model performance varies by task type; no single ranking
|
||||
4. **Confidence-Based Thresholding** — High-confidence fixes (>0.85) auto-apply; lower ones require review
|
||||
5. **A/B Test Budgeting** — Low-risk hypothesis testing with configurable test budget
|
||||
|
||||
---
|
||||
|
||||
## Impact Measurement
|
||||
|
||||
**After full integration, expect:**
|
||||
|
||||
- 🎯 **9/10 impact** from self-report loop: Close feedback loop from anomaly detection to code fixes
|
||||
- 🎯 **8/10 impact** from model learning: 20-30% improvement in task success rate through adaptive routing
|
||||
- 🎯 **7/10 impact** from knowledge injection: 15-20% faster task planning via relevant prior learning
|
||||
|
||||
**Total:** **24/30 self-evolution capability points activated** (up from current 15/30)
|
||||
|
||||
---
|
||||
|
||||
## Code Quality
|
||||
|
||||
- ✅ No external dependencies (uses only Node.js built-ins + SF imports)
|
||||
- ✅ JSDoc purpose statements on all exports
|
||||
- ✅ Graceful error handling (no crash on missing files)
|
||||
- ✅ Idempotent tracking (safe to call multiple times)
|
||||
- ✅ Clear separation of concerns (fixer ≠ learner ≠ injector)
|
||||
|
||||
---
|
||||
|
||||
## Status Summary
|
||||
|
||||
**Phase:** ✅ **IMPLEMENTATION COMPLETE**
|
||||
**Phase:** ⏳ **INTEGRATION PENDING** (dispatch loop hookup)
|
||||
**Phase:** ⏳ **TESTING PENDING** (unit + integration tests)
|
||||
**Phase:** ⏳ **FEEDBACK LOOP PENDING** (measure effectiveness)
|
||||
|
||||
The infrastructure is in place. Next: Connect it into the dispatch loop and measure impact.
|
||||
|
|
@ -1,114 +0,0 @@
|
|||
# Triage Complete ✅
|
||||
|
||||
**Timestamp:** 2026-05-06 16:30 UTC
|
||||
**Source:** TODO.md (Raw Dump Inbox)
|
||||
**Command:** `sf todo triage`
|
||||
**Node baseline:** v26.1.0+
|
||||
**Session:** 77b45896
|
||||
|
||||
## Summary
|
||||
|
||||
Successfully triaged 60 items from TODO.md into structured backlog artifacts:
|
||||
|
||||
- ✅ **60 items** normalized into `.sf/triage/inbox/20260506-163003.jsonl`
|
||||
- ✅ **10 eval candidates** extracted into `.sf/triage/evals/20260506-163003.evals.jsonl`
|
||||
- ✅ **1 skill proposal** in `.sf/triage/skills/20260506-163003.skills.jsonl`
|
||||
- ✅ **Comprehensive report** generated at `.sf/triage/reports/20260506-163003.md`
|
||||
- ✅ **TODO.md reset** to empty dump inbox (triage pipeline activated)
|
||||
|
||||
## Artifacts Created
|
||||
|
||||
### 1. Triage Report (`.sf/triage/reports/20260506-163003.md`)
|
||||
Comprehensive analysis including:
|
||||
- Summary of source material
|
||||
- 10 eval candidates with failure modes and test locations
|
||||
- 21 implementation tasks (gsd-2 ports, feature additions, provider expansion)
|
||||
- Memory requirements for self-evolution infrastructure
|
||||
- Harness suggestions for testing (property-based, chaos, end-to-end)
|
||||
- Documentation improvements needed (ARCHITECTURE.md, ADRs, runbooks)
|
||||
- Clarification needs ("Unclear Notes" section)
|
||||
|
||||
**Key findings:**
|
||||
- UOK is 60-70% complete for self-evolution
|
||||
- Critical: Close self-report feedback loop (9/10 impact)
|
||||
- 10+ undocumented architecture features identified
|
||||
- Multiple safety/correctness fixes awaiting port from gsd-2
|
||||
|
||||
### 2. Normalized Inbox (`.sf/triage/inbox/20260506-163003.jsonl`)
|
||||
60 structured items with:
|
||||
- Type: eval_candidate, implementation_task, doc_improvement, harness_suggestion, memory_requirement, unclear_note
|
||||
- Status: pending
|
||||
- Source tracing: all items linked back to TODO.md section
|
||||
- Prioritization ready for milestone planning
|
||||
|
||||
### 3. Eval Candidates (`.sf/triage/evals/20260506-163003.evals.jsonl`)
|
||||
10 test harness candidates with:
|
||||
- Task input (trigger/condition)
|
||||
- Expected behavior (contract)
|
||||
- Failure mode (what breaks if missing)
|
||||
- Evidence/source (citations to gsd-2/pi-mono commits)
|
||||
- Suggested test location
|
||||
|
||||
**Quick examples:**
|
||||
1. `bash-evidence-race` — Evidence persists across dispatch/re-dispatch
|
||||
2. `symlink-staging-data-loss` — Data-loss prevention for symlinked .sf
|
||||
3. `mcp-stdout-deadlock` — Large MCP outputs don't hang
|
||||
4. `env-sf-vars-unvalidated` — SF_* env vars validated at startup
|
||||
|
||||
### 4. Skill Proposals (`.sf/triage/skills/20260506-163003.skills.jsonl`)
|
||||
Architecture analysis suggesting improvements to SF's extension/gate system.
|
||||
|
||||
## Next Steps
|
||||
|
||||
1. **Review triage report** — Read `.sf/triage/reports/20260506-163003.md`
|
||||
2. **Plan implementation** — Promote high-impact items to milestone backlog
|
||||
3. **Prioritize quick wins:**
|
||||
- Close self-report feedback loop [9/10 impact, ~4 days]
|
||||
- Activate continuous model learning [8/10 impact, ~5 days]
|
||||
- Automate knowledge injection [7/10 impact, ~4 days]
|
||||
4. **Port gsd-2 safety fixes** — 9 commits awaiting cherry-pick
|
||||
5. **Close documentation gaps** — Update ARCHITECTURE.md with state machine diagram
|
||||
|
||||
## Evidence
|
||||
|
||||
```
|
||||
$ ls -la .sf/triage/
|
||||
drwxrwxr-x evals/
|
||||
drwxrwxr-x inbox/
|
||||
drwxrwxr-x reports/
|
||||
drwxrwxr-x skills/
|
||||
|
||||
$ wc -l .sf/triage/*/*.{md,jsonl}
|
||||
60 .sf/triage/inbox/20260506-163003.jsonl
|
||||
10 .sf/triage/evals/20260506-163003.evals.jsonl
|
||||
1 .sf/triage/skills/20260506-163003.skills.jsonl
|
||||
9682 .sf/triage/reports/20260506-163003.md
|
||||
|
||||
$ git status
|
||||
D TODO.md (reset to empty dump inbox; items triaged)
|
||||
M docs/* (from earlier work)
|
||||
```
|
||||
|
||||
## What This Means
|
||||
|
||||
SF's triage system successfully:
|
||||
1. ✅ Parsed TODO.md dump inbox
|
||||
2. ✅ Extracted 60 items into structured types (eval, task, doc, harness, etc.)
|
||||
3. ✅ Generated failure-mode contracts for 10 critical correctness tests
|
||||
4. ✅ Identified test locations and citations to source code
|
||||
5. ✅ Reset TODO.md for next cycle
|
||||
6. ✅ Created decision artifacts ready for milestone planning
|
||||
|
||||
The comprehensive review, research, documentation updates, and automated triage are complete. The project is now positioned to:
|
||||
- Activate SF's self-evolution learning loop (3 quick wins)
|
||||
- Port 9 safety/correctness fixes from gsd-2
|
||||
- Close 10+ documentation gaps in ARCHITECTURE.md
|
||||
- Implement property-based testing for autonomous dispatch
|
||||
- Begin advanced feature ports (Cloudflare AI, Azure endpoints, SSE handling)
|
||||
|
||||
---
|
||||
|
||||
**Created by:** Copilot CLI
|
||||
**Session:** 2514fa98-076d-48d2-a1f9-c3fd77c4a82a
|
||||
**Duration:** ~2 hours total (research + docs + triage)
|
||||
**Command:** `node dist/cli.js todo triage`
|
||||
|
|
@ -1,53 +0,0 @@
|
|||
# TODO.md Triage Instructions
|
||||
|
||||
## What's New
|
||||
|
||||
TODO.md now contains two major sections ready for triage:
|
||||
|
||||
1. **Feature Gaps & Limitations** — 40+ specific gaps identified in the codebase
|
||||
2. **UOK Self-Evolution Research** — 10 prioritized improvements for SF's self-evolution capabilities
|
||||
|
||||
## How to Triage
|
||||
|
||||
When you have Node 26.1.0+ available:
|
||||
|
||||
```bash
|
||||
cd /home/mhugo/code/singularity-forge
|
||||
|
||||
# Run the triage command
|
||||
sf todo triage
|
||||
|
||||
# Or if using npm/nvm
|
||||
nvm use 26
|
||||
npm exec sf -- todo triage
|
||||
```
|
||||
|
||||
## What Triage Does
|
||||
|
||||
The triage tool will:
|
||||
1. Parse TODO.md
|
||||
2. Extract items into structured `.sf/triage/` artifacts
|
||||
3. Propose categorization and priorities
|
||||
4. Show you a review interface
|
||||
5. Either commit to backlog or reset TODO.md to empty dump inbox
|
||||
|
||||
## Key Items to Watch For
|
||||
|
||||
The UOK Self-Evolution section has **3 high-impact quick wins** (8-10 days total):
|
||||
|
||||
1. Close self-report feedback loop [9/10 impact, 2-3 days]
|
||||
2. Activate continuous model learning [8/10 impact, 3-4 days]
|
||||
3. Automate knowledge injection [7/10 impact, 2-3 days]
|
||||
|
||||
These should be prioritized if you want to activate SF's learning loop.
|
||||
|
||||
## Full Research Report
|
||||
|
||||
See: `/home/mhugo/snap/copilot-cli/38/.copilot/session-state/2514fa98-076d-48d2-a1f9-c3fd77c4a82a/research/is-our-uok-the-best-for-a-self-evolving-coder-what.md`
|
||||
|
||||
This contains:
|
||||
- Executive summary
|
||||
- Detailed analysis of UOK implementation vs. documentation
|
||||
- 10 improvement suggestions with feasibility assessment
|
||||
- Competitive analysis (vs. other orchestration systems)
|
||||
- 15+ citations to code and design docs
|
||||
|
|
@ -1,53 +0,0 @@
|
|||
# Autoresearch: Reduce Biome Lint Diagnostics
|
||||
|
||||
## Objective
|
||||
Minimize the total number of Biome lint diagnostics (errors + warnings + info) across `src/`, starting from baseline ~40 diagnostics. Errors are mostly `organizeImports`, warnings are `noUnusedImports`, `noUnusedVariables`, and `useConst`.
|
||||
|
||||
## Metrics
|
||||
- **Primary**: `diagnostics` (count, lower is better) — sum of errors + warnings + info from `npx biome check src/`
|
||||
- **Secondary**: `errors` (count, lower is better)
|
||||
- **Secondary**: `warnings` (count, lower is better)
|
||||
|
||||
## How to Run
|
||||
`bash autoresearch.sh` — runs Biome check, parses JSON summary, outputs `METRIC diagnostics=N` and `METRIC errors=N` and `METRIC warnings=N`.
|
||||
|
||||
## Files in Scope
|
||||
All files under `src/` — but focus on the files flagged by Biome:
|
||||
- `src/resources/extensions/sf/auto/phases.js`
|
||||
- `src/resources/extensions/sf/commands/handlers/ops.js`
|
||||
- `src/resources/extensions/sf/memory-repository.js`
|
||||
- `src/resources/extensions/sf/metrics-central.js`
|
||||
- `src/resources/extensions/sf/reasoning-assist.js`
|
||||
- `src/resources/extensions/sf/remote-steering.js`
|
||||
- `src/resources/extensions/sf/sf-db.js`
|
||||
- `src/resources/extensions/sf/subagent-inheritance.js`
|
||||
- `src/resources/extensions/sf/tests/memory-repository.test.mjs`
|
||||
- `src/resources/extensions/sf/tests/metrics-central.test.mjs`
|
||||
- `src/resources/extensions/sf/tests/trajectory-recorder.test.mjs`
|
||||
- `src/resources/extensions/sf/trajectory-command.js`
|
||||
- `src/resources/extensions/sf/trajectory-recorder.js`
|
||||
- `src/resources/extensions/sf/uok/writer.js`
|
||||
|
||||
## Off Limits
|
||||
- `biome.json` (don't change lint rules — fixing source is the goal)
|
||||
- `node_modules/`, `dist/`, `.sf/`, `packages/` (outside `src/` scope)
|
||||
- Test assertion logic (don't weaken tests to make linters pass)
|
||||
|
||||
## Constraints
|
||||
- Existing vitest tests must pass: `npx vitest run --config vitest.config.ts`
|
||||
- No new dependencies
|
||||
- Don't introduce runtime behavior changes — only lint/import/style fixes
|
||||
|
||||
## Termination
|
||||
Run until interrupted by the user.
|
||||
|
||||
## What's Been Tried
|
||||
|
||||
- **#2 (auto-fix)**: `biome check --write` — fixed 26 auto-fixable errors (format/organizeImports), dropped diagnostics from 40 to 11. Status: keep.
|
||||
- **#3 (manual fixes)**: Removed 7 unused imports and prefixed 4 intentionally-unused items with underscore. Dropped from 11 to 0. Status: keep.
|
||||
- **#4 (regression re-fix)**: 37 new commits introduced 74 diagnostics. `biome check --write` fixed 58 (auto-safe), manual prefix/removal fixed the remaining 16 unsafe warnings across 11 files. Also fixed pre-existing web-mode-onboarding test timeout: added `timeoutMs: 120_000` to `launchPackagedWebHost`, raised `AbortSignal.timeout` on simple fetches 10s→30s, raised test budget 180s→420s. All 409 test files pass. Diagnostics: 0. Status: keep.
|
||||
|
||||
## Lessons
|
||||
- New development (37 commits) is enough to re-introduce 74 diagnostics. Re-run autoresearch periodically (monthly or after large feature branches land).
|
||||
- Pattern of new violations: unused imports from refactors, unused function params from stubs, duplicate imports. Auto-fix handles errors; unsafe-fix (unused-import/var) requires manual triage.
|
||||
- Integration test timeout under parallel load: cold-start Next.js can consume most of a 180s test timeout leaving insufficient budget for multi-step API calls. Fix: bound launch phase separately, raise individual fetch timeouts, increase overall budget to match worst-case sum.
|
||||
1267
copilot-thoughts.md
1267
copilot-thoughts.md
File diff suppressed because it is too large
Load diff
|
|
@ -1929,6 +1929,31 @@ export class InteractiveMode {
|
|||
this.extensionTerminalInputUnsubscribers.clear();
|
||||
}
|
||||
|
||||
/**
|
||||
* Register an extension-scoped terminal input listener.
|
||||
*
|
||||
* Purpose: allow extensions (e.g. the SF autonomous extension) to intercept
|
||||
* raw terminal input before it reaches the editor, so that special keys like
|
||||
* Ctrl+C can trigger extension actions (e.g. pause autonomous mode) rather
|
||||
* than always going to the default editor clear handler.
|
||||
*
|
||||
* Return `{ consume: true }` from the handler to stop the key from being
|
||||
* processed further. Return `undefined` or `{}` to let it propagate.
|
||||
*
|
||||
* Consumer: extension-ui-controller → ctx.ui.onTerminalInput.
|
||||
*/
|
||||
addExtensionTerminalInputListener(
|
||||
handler: (data: string) => { consume?: boolean } | undefined,
|
||||
): () => void {
|
||||
const listener = (data: string) => handler(data);
|
||||
const unsubscribe = this.ui.addInputListener(listener);
|
||||
this.extensionTerminalInputUnsubscribers.add(unsubscribe);
|
||||
return () => {
|
||||
unsubscribe();
|
||||
this.extensionTerminalInputUnsubscribers.delete(unsubscribe);
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Create the ExtensionUIContext for extensions.
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -42,19 +42,29 @@ function LegacyComponentView({
|
|||
*
|
||||
* Purpose: accept keyboard input from Ink and route it to the active
|
||||
* component, then trigger a re-render so the updated state is displayed.
|
||||
* Invalidation is event-driven: external callers invoke the returned
|
||||
* invalidate() handle, which fires the tick signal registered here.
|
||||
*
|
||||
* Consumer: startInkRenderer.
|
||||
*/
|
||||
function InkApp({
|
||||
root,
|
||||
onInput,
|
||||
onRegisterTick,
|
||||
}: {
|
||||
root: Component;
|
||||
onInput: (data: string) => void;
|
||||
onRegisterTick: (tick: () => void) => void;
|
||||
}) {
|
||||
const [, tick] = useState(0);
|
||||
const { columns } = useWindowSize();
|
||||
|
||||
// Register the tick function so that startInkRenderer's invalidate() can
|
||||
// trigger a React re-render without a polling interval.
|
||||
useEffect(() => {
|
||||
onRegisterTick(() => tick((n) => n + 1));
|
||||
}, [onRegisterTick]);
|
||||
|
||||
useInput((input, key) => {
|
||||
// Reconstruct the escape sequences that the legacy key handlers expect.
|
||||
let data = input;
|
||||
|
|
@ -70,12 +80,6 @@ function InkApp({
|
|||
tick((n) => n + 1);
|
||||
});
|
||||
|
||||
// Poll at 20 fps so async state changes (e.g. streaming output) appear promptly.
|
||||
useEffect(() => {
|
||||
const interval = setInterval(() => tick((n) => n + 1), 50);
|
||||
return () => clearInterval(interval);
|
||||
}, []);
|
||||
|
||||
return <LegacyComponentView component={root} width={columns ?? 80} />;
|
||||
}
|
||||
|
||||
|
|
@ -84,10 +88,11 @@ function InkApp({
|
|||
*
|
||||
* Purpose: drop-in replacement for the legacy TUI render engine. Mounting
|
||||
* this drives the entire Ink React tree and forwards terminal input to
|
||||
* the root Component's handleInput chain.
|
||||
* the root Component's handleInput chain. invalidate() triggers an
|
||||
* immediate React re-render via an event-driven tick signal — no polling.
|
||||
*
|
||||
* Consumer: TUI class (future integration); standalone callers can use
|
||||
* this directly to render any Component tree under Ink.
|
||||
* Consumer: TUI class; standalone callers can use this to render any
|
||||
* Component tree under Ink.
|
||||
*
|
||||
* @param root - The root Component whose render() output fills the screen.
|
||||
* @param onInput - Called with each decoded key string for legacy handlers.
|
||||
|
|
@ -97,13 +102,22 @@ export function startInkRenderer(
|
|||
root: Component,
|
||||
onInput: (data: string) => void,
|
||||
): { stop: () => void; invalidate: () => void } {
|
||||
// Mutable signal populated by InkApp via onRegisterTick once the React
|
||||
// tree has mounted. invalidate() fires this to trigger a synchronous tick.
|
||||
let _tick: (() => void) | null = null;
|
||||
const onRegisterTick = (tick: () => void) => {
|
||||
_tick = tick;
|
||||
};
|
||||
|
||||
const { unmount } = render(
|
||||
<InkApp root={root} onInput={onInput} />,
|
||||
<InkApp root={root} onInput={onInput} onRegisterTick={onRegisterTick} />,
|
||||
{ exitOnCtrlC: false },
|
||||
);
|
||||
return {
|
||||
stop: unmount,
|
||||
// Ink re-renders automatically; manual invalidation is a no-op for now.
|
||||
invalidate: () => {},
|
||||
stop: () => {
|
||||
_tick = null;
|
||||
unmount();
|
||||
},
|
||||
invalidate: () => _tick?.(),
|
||||
};
|
||||
}
|
||||
|
|
|
|||
|
|
@ -420,9 +420,16 @@ export class TUI extends Container {
|
|||
if (!this.terminal.isTTY) {
|
||||
return;
|
||||
}
|
||||
// Ink-backed render path: Ink manages raw mode and input; the legacy
|
||||
// differential renderer is bypassed entirely.
|
||||
if (this._useInk || process.stdout.isTTY) {
|
||||
// Ink-backed render path: Ink manages raw mode, input, and screen output.
|
||||
// The legacy differential renderer (doRender) is bypassed entirely on TTY.
|
||||
// process.stdout.isTTY guards this path — Ink requires a real interactive
|
||||
// TTY to mount. useInk() is kept as an explicit opt-in for callers that
|
||||
// want Ink on non-standard terminal configurations. Use PI_LEGACY_TUI=1
|
||||
// to force the legacy renderer for debugging.
|
||||
if (
|
||||
(this._useInk || process.stdout.isTTY) &&
|
||||
process.env.PI_LEGACY_TUI !== "1"
|
||||
) {
|
||||
// Wrap `this` in a plain Component so the private handleInput doesn't
|
||||
// conflict with the public Component.handleInput? signature.
|
||||
const root: Component = {
|
||||
|
|
@ -506,6 +513,12 @@ export class TUI extends Container {
|
|||
requestRender(force = false): void {
|
||||
// Skip rendering on non-TTY stdout to prevent CPU burn (issue #3095)
|
||||
if (!this.terminal.isTTY) return;
|
||||
// Ink-backed path: Ink owns the terminal — delegate to the Ink handle and
|
||||
// do NOT call doRender(), which would write conflicting ANSI escapes.
|
||||
if (this._inkHandle) {
|
||||
this._inkHandle.invalidate();
|
||||
return;
|
||||
}
|
||||
if (force) {
|
||||
this.previousLines = [];
|
||||
this.previousWidth = -1; // -1 triggers widthChanged, forcing a full clear
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@ import type { QuerySnapshot } from "./headless-query.js";
|
|||
|
||||
interface StatusArgs {
|
||||
watch: boolean;
|
||||
recoveryUnitId?: string;
|
||||
}
|
||||
|
||||
interface StatusDeps {
|
||||
|
|
@ -27,6 +28,12 @@ interface CurrentModel {
|
|||
|
||||
function parseStatusArgs(argv: string[]): StatusArgs {
|
||||
const args = argv.slice(1);
|
||||
if (args[0] === "recovery") {
|
||||
return {
|
||||
watch: false,
|
||||
recoveryUnitId: args[1],
|
||||
};
|
||||
}
|
||||
return {
|
||||
watch: args.includes("--watch"),
|
||||
};
|
||||
|
|
@ -219,6 +226,76 @@ async function buildStatusText(
|
|||
});
|
||||
}
|
||||
|
||||
async function renderRecoveryDiagnostics(
|
||||
basePath: string,
|
||||
unitId: string | undefined,
|
||||
stdout: Pick<typeof process.stdout, "write">,
|
||||
stderr: Pick<typeof process.stderr, "write">,
|
||||
): Promise<number> {
|
||||
try {
|
||||
const { getRecoveryDiagnostics, listUnitRuntimeRecords } = await import(
|
||||
"./resources/extensions/sf/uok/unit-runtime.js"
|
||||
);
|
||||
let targetUnitId = unitId;
|
||||
if (!targetUnitId) {
|
||||
const records: Array<{ updatedAt?: number; unitId: string }> =
|
||||
listUnitRuntimeRecords(basePath);
|
||||
const mostRecent = records.sort(
|
||||
(a, b) => (b.updatedAt ?? 0) - (a.updatedAt ?? 0),
|
||||
)[0];
|
||||
if (!mostRecent) {
|
||||
stderr.write("sf status recovery: no runtime records found\n");
|
||||
return 1;
|
||||
}
|
||||
targetUnitId = mostRecent.unitId;
|
||||
}
|
||||
const diagnostics = getRecoveryDiagnostics(
|
||||
basePath,
|
||||
"execute-task",
|
||||
targetUnitId,
|
||||
);
|
||||
if (!diagnostics) {
|
||||
stderr.write(
|
||||
`sf status recovery: no runtime record for ${targetUnitId}\n`,
|
||||
);
|
||||
return 1;
|
||||
}
|
||||
const lines: string[] = [];
|
||||
lines.push("Recovery Diagnostics");
|
||||
lines.push("--------------------");
|
||||
lines.push(`Unit: ${diagnostics.unitType} ${diagnostics.unitId}`);
|
||||
lines.push(`Status: ${diagnostics.status}`);
|
||||
lines.push(
|
||||
`Retries: ${diagnostics.retryCount}/${diagnostics.maxRetries}`,
|
||||
);
|
||||
lines.push(
|
||||
`Progress: ${diagnostics.progressCount} (${diagnostics.lastProgressKind})`,
|
||||
);
|
||||
lines.push(`Recovery attempts: ${diagnostics.recoveryAttempts}`);
|
||||
if (diagnostics.lastRecoveryReason) {
|
||||
lines.push(`Last recovery reason: ${diagnostics.lastRecoveryReason}`);
|
||||
}
|
||||
if (diagnostics.lineageSummary) {
|
||||
lines.push(
|
||||
`Lineage: ${diagnostics.lineageSummary.status} · ${diagnostics.lineageSummary.workerCount} worker(s) · ${diagnostics.lineageSummary.eventCount} event(s)`,
|
||||
);
|
||||
}
|
||||
lines.push(
|
||||
`Started: ${diagnostics.startedAt ? new Date(diagnostics.startedAt).toISOString() : "n/a"}`,
|
||||
);
|
||||
lines.push(
|
||||
`Updated: ${diagnostics.updatedAt ? new Date(diagnostics.updatedAt).toISOString() : "n/a"}`,
|
||||
);
|
||||
stdout.write(lines.join("\n") + "\n");
|
||||
return 0;
|
||||
} catch (err) {
|
||||
stderr.write(
|
||||
`sf status recovery: ${err instanceof Error ? err.message : String(err)}\n`,
|
||||
);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
export async function runStatusCli(
|
||||
argv: string[],
|
||||
deps: StatusDeps,
|
||||
|
|
@ -228,6 +305,15 @@ export async function runStatusCli(
|
|||
const sfHome = deps.sfHome ?? process.env.SF_HOME ?? join(homedir(), ".sf");
|
||||
const args = parseStatusArgs(argv);
|
||||
|
||||
if (args.recoveryUnitId !== undefined) {
|
||||
return renderRecoveryDiagnostics(
|
||||
deps.basePath,
|
||||
args.recoveryUnitId,
|
||||
stdout,
|
||||
stderr,
|
||||
);
|
||||
}
|
||||
|
||||
const renderOnce = async () => {
|
||||
try {
|
||||
const text = await buildStatusText(deps.basePath, sfHome);
|
||||
|
|
|
|||
|
|
@ -94,6 +94,32 @@ function getServerConfig(name) {
|
|||
(s) => s.name === trimmed || s.name.toLowerCase() === trimmed.toLowerCase(),
|
||||
);
|
||||
}
|
||||
const SAFE_CHILD_ENV_KEYS = new Set([
|
||||
"PATH",
|
||||
"HOME",
|
||||
"USER",
|
||||
"LOGNAME",
|
||||
"SHELL",
|
||||
"LANG",
|
||||
"LC_ALL",
|
||||
"LC_CTYPE",
|
||||
"LC_MESSAGES",
|
||||
"LC_NUMERIC",
|
||||
"LC_TIME",
|
||||
"TMPDIR",
|
||||
"TMP",
|
||||
"TEMP",
|
||||
"TZ",
|
||||
"TERM",
|
||||
"COLORTERM",
|
||||
]);
|
||||
function buildChildEnv(configEnv) {
|
||||
const safe = {};
|
||||
for (const key of SAFE_CHILD_ENV_KEYS) {
|
||||
if (process.env[key] !== undefined) safe[key] = process.env[key];
|
||||
}
|
||||
return { ...safe, ...resolveEnv(configEnv ?? {}) };
|
||||
}
|
||||
/** Resolve ${VAR} references in env values against process.env. */
|
||||
function resolveEnv(env) {
|
||||
const resolved = {};
|
||||
|
|
@ -210,9 +236,7 @@ async function getOrConnect(name, signal) {
|
|||
transport = new StdioClientTransport({
|
||||
command: config.command,
|
||||
args: config.args,
|
||||
env: config.env
|
||||
? { ...process.env, ...resolveEnv(config.env) }
|
||||
: undefined,
|
||||
env: buildChildEnv(config.env),
|
||||
cwd: config.cwd,
|
||||
stderr: "pipe",
|
||||
});
|
||||
|
|
@ -234,23 +258,27 @@ async function getOrConnect(name, signal) {
|
|||
`Server "${config.name}" has unsupported transport: ${config.transport}`,
|
||||
);
|
||||
}
|
||||
await client.connect(transport, { signal, timeout: 30000 });
|
||||
try {
|
||||
await client.connect(transport, { signal, timeout: 30000 });
|
||||
} catch (err) {
|
||||
try { await transport.close(); } catch { /* best-effort */ }
|
||||
try { await client.close(); } catch { /* best-effort */ }
|
||||
throw err;
|
||||
}
|
||||
connections.set(config.name, { client, transport });
|
||||
return client;
|
||||
}
|
||||
async function closeAll() {
|
||||
const closing = Array.from(connections.entries()).map(
|
||||
async ([name, conn]) => {
|
||||
try {
|
||||
await conn.client.close();
|
||||
} catch {
|
||||
// Best-effort cleanup
|
||||
}
|
||||
try { await conn.transport.close(); } catch { /* best-effort */ }
|
||||
try { await conn.client.close(); } catch { /* best-effort */ }
|
||||
connections.delete(name);
|
||||
},
|
||||
);
|
||||
await Promise.allSettled(closing);
|
||||
toolCache.clear();
|
||||
autoRegisteredServers.clear();
|
||||
}
|
||||
// ─── Formatters ───────────────────────────────────────────────────────────────
|
||||
function formatServerList(servers) {
|
||||
|
|
@ -312,31 +340,8 @@ export function getConnectionStatus(name) {
|
|||
};
|
||||
}
|
||||
// ─── Test-exported helpers ────────────────────────────────────────────────────
|
||||
const SAFE_CHILD_ENV_KEYS = new Set([
|
||||
"PATH",
|
||||
"HOME",
|
||||
"USER",
|
||||
"LOGNAME",
|
||||
"SHELL",
|
||||
"LANG",
|
||||
"LC_ALL",
|
||||
"LC_CTYPE",
|
||||
"LC_MESSAGES",
|
||||
"LC_NUMERIC",
|
||||
"LC_TIME",
|
||||
"TMPDIR",
|
||||
"TMP",
|
||||
"TEMP",
|
||||
"TZ",
|
||||
"TERM",
|
||||
"COLORTERM",
|
||||
]);
|
||||
export function _buildMcpChildEnvForTest(env) {
|
||||
const safe = {};
|
||||
for (const key of SAFE_CHILD_ENV_KEYS) {
|
||||
if (process.env[key] !== undefined) safe[key] = process.env[key];
|
||||
}
|
||||
return { ...safe, ...resolveEnv(env) };
|
||||
return buildChildEnv(env);
|
||||
}
|
||||
export function _buildMcpTrustConfirmOptionsForTest(signal) {
|
||||
return { timeout: 120_000, signal };
|
||||
|
|
|
|||
|
|
@ -78,6 +78,7 @@ import {
|
|||
buildSliceSummaryExcerpt,
|
||||
getDependencyTaskSummaryPaths,
|
||||
getPriorTaskSummaryPaths,
|
||||
extractSliceExecutionExcerpt,
|
||||
} from "./summary-helpers.js";
|
||||
import { composeInlinedContext } from "./unit-context-composer.js";
|
||||
import { getUatType } from "./verdict-parser.js";
|
||||
|
|
@ -336,7 +337,7 @@ export function buildSourceFilePaths(base, mid, sid) {
|
|||
* If parsing fails (unrecognizable frontmatter, missing id, etc.) the
|
||||
* function falls back to `inlineFile` so the closer loses no information.
|
||||
*/
|
||||
// Re-exported from summary-helpers.js:
|
||||
// Imported from summary-helpers.js:
|
||||
// - buildSliceSummaryExcerpt, getPriorTaskSummaryPaths
|
||||
// - getDependencyTaskSummaryPaths, isSummaryCleanForSkip
|
||||
// - extractSliceExecutionExcerpt
|
||||
|
|
|
|||
|
|
@ -32,6 +32,10 @@ import { UokGateRunner } from "./uok/gate-runner.js";
|
|||
import { MultiPackageGate } from "./uok/multi-package-gate.js";
|
||||
import { OutcomeLearningGate } from "./uok/outcome-learning-gate.js";
|
||||
import { SecurityGate } from "./uok/security-gate.js";
|
||||
import {
|
||||
formatExecuteTaskRecoveryStatus,
|
||||
inspectExecuteTaskDurability,
|
||||
} from "./uok/unit-runtime.js";
|
||||
import { extractVerdict } from "./verdict-parser.js";
|
||||
import { writeVerificationJSON } from "./verification-evidence.js";
|
||||
import {
|
||||
|
|
@ -42,6 +46,38 @@ import {
|
|||
} from "./verification-gate.js";
|
||||
import { logError, logWarning } from "./workflow-logger.js";
|
||||
|
||||
function computeTokenCountFromSession(ctx) {
|
||||
const entries = ctx.sessionManager?.getEntries?.() ?? [];
|
||||
let total = 0;
|
||||
for (const entry of entries) {
|
||||
if (entry.type !== "message") continue;
|
||||
const msg = entry.message;
|
||||
if (!msg || msg.role !== "assistant") continue;
|
||||
if (msg.usage?.totalTokens != null) {
|
||||
total += msg.usage.totalTokens;
|
||||
}
|
||||
}
|
||||
return total;
|
||||
}
|
||||
|
||||
function getMemoryPressureMB() {
|
||||
try {
|
||||
const mem = process.memoryUsage();
|
||||
return Math.round(mem.heapUsed / 1024 / 1024);
|
||||
} catch {
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
function buildGateOutcomesSummary(gateIds, gateResults) {
|
||||
if (!gateIds || !gateResults || gateIds.length === 0) return undefined;
|
||||
const outcomes = {};
|
||||
for (let i = 0; i < gateIds.length; i++) {
|
||||
outcomes[gateIds[i]] = gateResults[i]?.outcome ?? "unknown";
|
||||
}
|
||||
return outcomes;
|
||||
}
|
||||
|
||||
function isInfraVerificationFailure(stderr) {
|
||||
return /\b(ENOENT|ENOTFOUND|ETIMEDOUT|ECONNRESET|EAI_AGAIN|spawn\s+\S+\s+ENOENT|command not found)\b/i.test(
|
||||
stderr,
|
||||
|
|
@ -259,6 +295,8 @@ export async function runPostUnitVerification(vctx, pauseAuto) {
|
|||
}
|
||||
// ── Zone 2: Ancillary post-gate work (inner try) ─────────────────────────
|
||||
// Failures here are non-fatal — evidence writes, UOK gate calls, notifications, retry logic.
|
||||
let gateIds = [];
|
||||
let gateResults = [];
|
||||
try {
|
||||
if (uokFlags.gates) {
|
||||
const gateRunner = new UokGateRunner();
|
||||
|
|
@ -304,8 +342,8 @@ export async function runPostUnitVerification(vctx, pauseAuto) {
|
|||
iteration: s.verificationRetryCount.get(s.currentUnit.id) ?? 0,
|
||||
};
|
||||
|
||||
const gateIds = gateRunner.list().map((g) => g.id);
|
||||
const gateResults = await Promise.all(
|
||||
gateIds = gateRunner.list().map((g) => g.id);
|
||||
gateResults = await Promise.all(
|
||||
gateIds.map((id) =>
|
||||
gateRunner
|
||||
.run(id, {
|
||||
|
|
@ -434,13 +472,39 @@ export async function runPostUnitVerification(vctx, pauseAuto) {
|
|||
}
|
||||
// Write verification evidence JSON
|
||||
const attempt = s.verificationRetryCount.get(s.currentUnit.id) ?? 0;
|
||||
const tokenCount = computeTokenCountFromSession(ctx);
|
||||
const memoryPressureMB = getMemoryPressureMB();
|
||||
const gateOutcomes = buildGateOutcomesSummary(gateIds, gateResults);
|
||||
let recoveryStatus;
|
||||
try {
|
||||
const durability = await inspectExecuteTaskDurability(
|
||||
s.basePath,
|
||||
s.currentUnit.id,
|
||||
);
|
||||
if (durability) {
|
||||
recoveryStatus = formatExecuteTaskRecoveryStatus(durability);
|
||||
}
|
||||
} catch {
|
||||
recoveryStatus = undefined;
|
||||
}
|
||||
if (mid && sid && tid) {
|
||||
try {
|
||||
const sDir = resolveSlicePath(s.basePath, mid, sid);
|
||||
if (sDir) {
|
||||
const tasksDir = join(sDir, "tasks");
|
||||
if (result.passed) {
|
||||
writeVerificationJSON(result, tasksDir, tid, s.currentUnit.id);
|
||||
writeVerificationJSON(
|
||||
result,
|
||||
tasksDir,
|
||||
tid,
|
||||
s.currentUnit.id,
|
||||
undefined,
|
||||
undefined,
|
||||
tokenCount,
|
||||
memoryPressureMB,
|
||||
gateOutcomes,
|
||||
recoveryStatus,
|
||||
);
|
||||
} else {
|
||||
const nextAttempt = attempt + 1;
|
||||
writeVerificationJSON(
|
||||
|
|
@ -450,6 +514,10 @@ export async function runPostUnitVerification(vctx, pauseAuto) {
|
|||
s.currentUnit.id,
|
||||
nextAttempt,
|
||||
maxRetries,
|
||||
tokenCount,
|
||||
memoryPressureMB,
|
||||
gateOutcomes,
|
||||
recoveryStatus,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
@ -617,6 +685,10 @@ export async function runPostUnitVerification(vctx, pauseAuto) {
|
|||
postExecChecks,
|
||||
postExecBlockingFailure ? attempt + 1 : undefined,
|
||||
postExecBlockingFailure ? maxRetries : undefined,
|
||||
tokenCount,
|
||||
memoryPressureMB,
|
||||
gateOutcomes,
|
||||
recoveryStatus,
|
||||
);
|
||||
}
|
||||
} catch (evidenceErr) {
|
||||
|
|
@ -703,6 +775,10 @@ function writeVerificationJSONWithPostExec(
|
|||
postExecutionChecks,
|
||||
retryAttempt,
|
||||
maxRetries,
|
||||
tokenCount,
|
||||
memoryPressureMB,
|
||||
gateOutcomes,
|
||||
recoveryStatus,
|
||||
) {
|
||||
mkdirSync(tasksDir, { recursive: true });
|
||||
const evidence = {
|
||||
|
|
@ -720,6 +796,10 @@ function writeVerificationJSONWithPostExec(
|
|||
})),
|
||||
...(retryAttempt !== undefined ? { retryAttempt } : {}),
|
||||
...(maxRetries !== undefined ? { maxRetries } : {}),
|
||||
...(tokenCount !== undefined ? { tokenCount } : {}),
|
||||
...(memoryPressureMB !== undefined ? { memoryPressureMB } : {}),
|
||||
...(gateOutcomes !== undefined ? { gateOutcomes } : {}),
|
||||
...(recoveryStatus !== undefined ? { recoveryStatus } : {}),
|
||||
postExecutionChecks,
|
||||
};
|
||||
if (result.runtimeErrors && result.runtimeErrors.length > 0) {
|
||||
|
|
|
|||
|
|
@ -211,6 +211,33 @@ export {
|
|||
// Tests in auto-session-encapsulation.test.ts enforce this invariant.
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
const s = getAutoSession();
|
||||
/** Unsubscribe function for the Ctrl+C → pause intercept registered on autonomous start. */
|
||||
let _ctrlCUnsubscribe = null;
|
||||
/**
|
||||
* Register a terminal input listener that intercepts Ctrl+C while autonomous
|
||||
* mode is active and routes the first press to pauseAuto() instead of letting
|
||||
* it silently clear the editor.
|
||||
*
|
||||
* Purpose: give the user a reliable single-keypress escape from a running
|
||||
* autonomous loop without requiring the double-press exit threshold.
|
||||
*/
|
||||
function registerCtrlCInterceptor(ctx) {
|
||||
_unregisterCtrlCInterceptor();
|
||||
if (typeof ctx?.ui?.onTerminalInput !== "function") return;
|
||||
_ctrlCUnsubscribe = ctx.ui.onTerminalInput((data) => {
|
||||
if (data !== "\x03") return undefined;
|
||||
if (!s.active) return undefined;
|
||||
ctx.ui.notify("Ctrl+C received — pausing autonomous mode.", "info");
|
||||
void pauseAuto(ctx, null, "ctrl-c-interrupt");
|
||||
return { consume: true };
|
||||
});
|
||||
}
|
||||
function _unregisterCtrlCInterceptor() {
|
||||
if (_ctrlCUnsubscribe) {
|
||||
_ctrlCUnsubscribe();
|
||||
_ctrlCUnsubscribe = null;
|
||||
}
|
||||
}
|
||||
/** Throttle STATE.md rebuilds — at most once per 30 seconds */
|
||||
const _STATE_REBUILD_MIN_INTERVAL_MS = 30_000;
|
||||
function captureProjectRootEnv(projectRoot) {
|
||||
|
|
@ -704,6 +731,7 @@ function cleanupAfterLoopExit(ctx) {
|
|||
s.currentUnit = null;
|
||||
s.active = false;
|
||||
s.runControl = "manual";
|
||||
_unregisterCtrlCInterceptor();
|
||||
deactivateSF();
|
||||
clearUnitTimeout();
|
||||
restoreProjectRootEnv();
|
||||
|
|
@ -747,6 +775,7 @@ function cleanupAfterLoopExit(ctx) {
|
|||
}
|
||||
export async function stopAuto(ctx, pi, reason) {
|
||||
if (!s.active && !s.paused) return;
|
||||
_unregisterCtrlCInterceptor();
|
||||
const loadedPreferences = loadEffectiveSFPreferences()?.preferences;
|
||||
const reasonSuffix = reason ? ` — ${reason}` : "";
|
||||
try {
|
||||
|
|
@ -1677,6 +1706,7 @@ export async function startAuto(ctx, pi, base, verboseMode, options) {
|
|||
s.runControl = requestedStepMode ? "assisted" : "autonomous";
|
||||
s.cmdCtx = ctx;
|
||||
s.basePath = base;
|
||||
registerCtrlCInterceptor(ctx);
|
||||
// Ensure the workflow-logger audit log is pinned to the project root
|
||||
// even when autonomous mode is entered via a path that bypasses the
|
||||
// bootstrap/dynamic-tools ensureDbOpen() → setLogBasePath() chain
|
||||
|
|
@ -1943,6 +1973,7 @@ export async function dispatchHookUnit(
|
|||
s.autoStartTime = Date.now();
|
||||
s.currentUnit = null;
|
||||
s.pendingQuickTasks = [];
|
||||
registerCtrlCInterceptor(hookCtx);
|
||||
}
|
||||
const hookUnitType = `hook/${hookName}`;
|
||||
const hookStartedAt = Date.now();
|
||||
|
|
|
|||
|
|
@ -116,6 +116,10 @@ export async function handleAutonomousCommand(trimmed, ctx, pi) {
|
|||
});
|
||||
return true;
|
||||
}
|
||||
if (trimmed === "stop") {
|
||||
await stopAutonomousRun(ctx, pi);
|
||||
return true;
|
||||
}
|
||||
if (isAutonomousVerb) {
|
||||
const autonomousArgsText = trimmed.replace(/^autonomous\b/, "").trim();
|
||||
if (autonomousArgsText === "stop") {
|
||||
|
|
|
|||
|
|
@ -4212,7 +4212,8 @@ function hasTaskSpecIntent(planning = {}) {
|
|||
}
|
||||
function insertTaskSpecIfAbsent(milestoneId, sliceId, taskId, planning = {}) {
|
||||
if (!hasTaskSpecIntent(planning)) return;
|
||||
const frontmatter = taskFrontmatterFromRecord(planning).normalized;
|
||||
const { normalized: frontmatter, errors } = taskFrontmatterFromRecord(planning);
|
||||
if (errors?.length) logWarning("sf-db:insertTaskSpec", `frontmatter validation errors for ${milestoneId}/${sliceId}/${taskId}: ${errors.join(", ")}`);
|
||||
currentDb
|
||||
.prepare(`INSERT OR IGNORE INTO task_specs (
|
||||
milestone_id, slice_id, task_id, verify, inputs, expected_output,
|
||||
|
|
@ -4433,7 +4434,8 @@ export function setTaskBlockerDiscovered(
|
|||
export function upsertTaskPlanning(milestoneId, sliceId, taskId, planning) {
|
||||
if (!currentDb) throw new SFError(SF_STALE_STATE, "sf-db: No database open");
|
||||
insertTaskSpecIfAbsent(milestoneId, sliceId, taskId, planning);
|
||||
const frontmatter = taskFrontmatterFromRecord(planning).normalized;
|
||||
const { normalized: frontmatter, errors: fmErrors } = taskFrontmatterFromRecord(planning);
|
||||
if (fmErrors?.length) logWarning("sf-db:upsertTaskPlanning", `frontmatter validation errors for ${milestoneId}/${sliceId}/${taskId}: ${fmErrors.join(", ")}`);
|
||||
const hasTaskStatus =
|
||||
planning.taskStatus !== undefined ||
|
||||
planning.task_status !== undefined ||
|
||||
|
|
|
|||
|
|
@ -195,3 +195,44 @@ export function isSummaryCleanForSkip(content) {
|
|||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
function escapeRegExpLocal(value) {
|
||||
return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
||||
}
|
||||
|
||||
function extractMarkdownSectionLocal(content, heading) {
|
||||
const match = new RegExp(`^## ${escapeRegExpLocal(heading)}\\s*$`, "m").exec(content);
|
||||
if (!match) return null;
|
||||
const start = match.index + match[0].length;
|
||||
const rest = content.slice(start);
|
||||
const nextHeading = rest.match(/^##\s+/m);
|
||||
const end = nextHeading?.index ?? rest.length;
|
||||
return rest.slice(0, end).trim();
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract key sections from a slice PLAN.md for use in task execution prompts.
|
||||
* Returns Goal, Demo, Verification, and Observability sections as a compact excerpt.
|
||||
*
|
||||
* Purpose: give task executors the slice-level contract without inlining the full plan.
|
||||
* Consumer: auto-prompts.js buildExecuteTask*.
|
||||
*/
|
||||
export function extractSliceExecutionExcerpt(content, relPath) {
|
||||
if (!content) {
|
||||
return [
|
||||
"## Slice Plan Excerpt",
|
||||
`Slice plan not found at dispatch time. Read \`${relPath}\` before running slice-level verification.`,
|
||||
].join("\n");
|
||||
}
|
||||
const lines = content.split("\n");
|
||||
const goalLine = lines.find((line) => line.startsWith("**Goal:**"))?.trim();
|
||||
const demoLine = lines.find((line) => line.startsWith("**Demo:**"))?.trim();
|
||||
const verification = extractMarkdownSectionLocal(content, "Verification");
|
||||
const observability = extractMarkdownSectionLocal(content, "Observability / Diagnostics");
|
||||
const parts = ["## Slice Plan Excerpt", `Source: \`${relPath}\``];
|
||||
if (goalLine) parts.push(goalLine);
|
||||
if (demoLine) parts.push(demoLine);
|
||||
if (verification) parts.push("", "### Slice Verification", verification.trim());
|
||||
if (observability) parts.push("", "### Slice Observability / Diagnostics", observability.trim());
|
||||
return parts.join("\n");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@ import {
|
|||
clearRunawayRecoveredRuntimeRecords,
|
||||
clearUnitRuntimeRecord,
|
||||
decideUnitRuntimeDispatch,
|
||||
getRecoveryDiagnostics,
|
||||
getUnitRuntimeState,
|
||||
isTerminalUnitRuntimeStatus,
|
||||
listUnitRuntimeRecords,
|
||||
|
|
@ -377,3 +378,72 @@ test("listUnitRuntimeRecords_returns_empty_when_dir_missing", () => {
|
|||
const records = listUnitRuntimeRecords(root);
|
||||
assert.deepEqual(records, []);
|
||||
});
|
||||
|
||||
// ─── getRecoveryDiagnostics ────────────────────────────────────────────────
|
||||
|
||||
test("getRecoveryDiagnostics_returns_null_for_missing_record", () => {
|
||||
const root = makeProject();
|
||||
const diagnostics = getRecoveryDiagnostics(root, "execute-task", "MISSING");
|
||||
assert.equal(diagnostics, null);
|
||||
});
|
||||
|
||||
test("getRecoveryDiagnostics_returns_structured_object_for_record_with_recovery", () => {
|
||||
const root = makeProject();
|
||||
const t = Date.now();
|
||||
writeUnitRuntimeRecord(root, "execute-task", "M001/S01/T01", t, {
|
||||
status: "failed",
|
||||
recoveryAttempts: 2,
|
||||
retryCount: 2,
|
||||
maxRetries: 3,
|
||||
lastRecoveryReason: "timeout",
|
||||
progressCount: 5,
|
||||
lastProgressKind: "checkpoint",
|
||||
lineageEvent: {
|
||||
status: "started",
|
||||
workerSessionId: "worker-1",
|
||||
},
|
||||
});
|
||||
const diagnostics = getRecoveryDiagnostics(
|
||||
root,
|
||||
"execute-task",
|
||||
"M001/S01/T01",
|
||||
);
|
||||
assert.ok(diagnostics);
|
||||
assert.equal(diagnostics.unitType, "execute-task");
|
||||
assert.equal(diagnostics.unitId, "M001/S01/T01");
|
||||
assert.equal(diagnostics.status, "failed");
|
||||
assert.equal(diagnostics.retryCount, 2);
|
||||
assert.equal(diagnostics.maxRetries, 3);
|
||||
assert.equal(diagnostics.lastRecoveryReason, "timeout");
|
||||
assert.equal(diagnostics.progressCount, 5);
|
||||
assert.equal(diagnostics.lastProgressKind, "checkpoint");
|
||||
assert.equal(diagnostics.recoveryAttempts, 2);
|
||||
assert.ok(diagnostics.lineageSummary);
|
||||
assert.equal(diagnostics.lineageSummary.status, "started");
|
||||
assert.equal(diagnostics.lineageSummary.workerCount, 1);
|
||||
assert.equal(diagnostics.lineageSummary.eventCount, 1);
|
||||
assert.equal(diagnostics.startedAt, t);
|
||||
assert.ok(diagnostics.updatedAt);
|
||||
});
|
||||
|
||||
test("getRecoveryDiagnostics_returns_minimal_object_for_record_without_recovery", () => {
|
||||
const root = makeProject();
|
||||
const t = Date.now();
|
||||
writeUnitRuntimeRecord(root, "execute-task", "M001/S01/T02", t, {
|
||||
status: "running",
|
||||
});
|
||||
const diagnostics = getRecoveryDiagnostics(
|
||||
root,
|
||||
"execute-task",
|
||||
"M001/S01/T02",
|
||||
);
|
||||
assert.ok(diagnostics);
|
||||
assert.equal(diagnostics.status, "running");
|
||||
assert.equal(diagnostics.retryCount, 0);
|
||||
assert.equal(diagnostics.maxRetries, 1);
|
||||
assert.equal(diagnostics.lastRecoveryReason, null);
|
||||
assert.equal(diagnostics.progressCount, 0);
|
||||
assert.equal(diagnostics.lastProgressKind, "dispatch");
|
||||
assert.equal(diagnostics.recoveryAttempts, 0);
|
||||
assert.equal(diagnostics.lineageSummary, null);
|
||||
});
|
||||
|
|
|
|||
32
src/resources/extensions/sf/uok/unit-runtime.d.ts
vendored
Normal file
32
src/resources/extensions/sf/uok/unit-runtime.d.ts
vendored
Normal file
|
|
@ -0,0 +1,32 @@
|
|||
/**
|
||||
* Type declarations for unit-runtime.js
|
||||
*/
|
||||
|
||||
export interface RecoveryDiagnostics {
|
||||
unitType: string;
|
||||
unitId: string;
|
||||
status: string;
|
||||
retryCount: number;
|
||||
maxRetries: number;
|
||||
lastRecoveryReason: string | null;
|
||||
progressCount: number;
|
||||
lastProgressKind: string;
|
||||
recoveryAttempts: number;
|
||||
lineageSummary: {
|
||||
status: string;
|
||||
workerCount: number;
|
||||
eventCount: number;
|
||||
} | null;
|
||||
updatedAt: number | null;
|
||||
startedAt: number | null;
|
||||
}
|
||||
|
||||
export function getRecoveryDiagnostics(
|
||||
basePath: string,
|
||||
unitType: string,
|
||||
unitId: string,
|
||||
): RecoveryDiagnostics | null;
|
||||
|
||||
export function listUnitRuntimeRecords(
|
||||
basePath: string,
|
||||
): Array<Record<string, unknown> & { updatedAt?: number; unitId: string }>;
|
||||
|
|
@ -582,6 +582,43 @@ export function formatExecuteTaskRecoveryStatus(status) {
|
|||
? missing.join("; ")
|
||||
: "all durable task artifacts present";
|
||||
}
|
||||
|
||||
/**
|
||||
* Read the runtime record for a unit and return structured recovery diagnostics.
|
||||
*
|
||||
* Purpose: surface runtime record state for post-mortem debugging of autonomous
|
||||
* failures without requiring humans to parse `.sf/runtime/units/*.json` manually.
|
||||
*
|
||||
* Consumer: `sf status recovery` CLI command and verification evidence enrichment.
|
||||
*/
|
||||
export function getRecoveryDiagnostics(basePath, unitType, unitId) {
|
||||
const record = readUnitRuntimeRecord(basePath, unitType, unitId);
|
||||
if (!record) {
|
||||
return null;
|
||||
}
|
||||
const state = getUnitRuntimeState(record);
|
||||
const lineageSummary = record.lineage
|
||||
? {
|
||||
status: record.lineage.status,
|
||||
workerCount: record.lineage.workerSessionIds?.length ?? 0,
|
||||
eventCount: record.lineage.events?.length ?? 0,
|
||||
}
|
||||
: null;
|
||||
return {
|
||||
unitType,
|
||||
unitId,
|
||||
status: state.status,
|
||||
retryCount: state.retryCount,
|
||||
maxRetries: state.maxRetries,
|
||||
lastRecoveryReason: record.lastRecoveryReason ?? null,
|
||||
progressCount: record.progressCount ?? 0,
|
||||
lastProgressKind: record.lastProgressKind ?? "dispatch",
|
||||
recoveryAttempts: record.recoveryAttempts ?? 0,
|
||||
lineageSummary,
|
||||
updatedAt: record.updatedAt ?? null,
|
||||
startedAt: record.startedAt ?? null,
|
||||
};
|
||||
}
|
||||
// ─── Stale slice runtime record reconciliation ──────────────────────────────
|
||||
/**
|
||||
* Clear unit runtime records for complete-slice units that are in a terminal
|
||||
|
|
|
|||
|
|
@ -24,6 +24,10 @@ export function writeVerificationJSON(
|
|||
unitId,
|
||||
retryAttempt,
|
||||
maxRetries,
|
||||
tokenCount,
|
||||
memoryPressureMB,
|
||||
gateOutcomes,
|
||||
recoveryStatus,
|
||||
) {
|
||||
mkdirSync(tasksDir, { recursive: true });
|
||||
const evidence = {
|
||||
|
|
@ -41,6 +45,10 @@ export function writeVerificationJSON(
|
|||
})),
|
||||
...(retryAttempt !== undefined ? { retryAttempt } : {}),
|
||||
...(maxRetries !== undefined ? { maxRetries } : {}),
|
||||
...(tokenCount !== undefined ? { tokenCount } : {}),
|
||||
...(memoryPressureMB !== undefined ? { memoryPressureMB } : {}),
|
||||
...(gateOutcomes !== undefined ? { gateOutcomes } : {}),
|
||||
...(recoveryStatus !== undefined ? { recoveryStatus } : {}),
|
||||
};
|
||||
if (result.runtimeErrors && result.runtimeErrors.length > 0) {
|
||||
evidence.runtimeErrors = result.runtimeErrors.map((e) => ({
|
||||
|
|
|
|||
53
todo.md
Normal file
53
todo.md
Normal file
|
|
@ -0,0 +1,53 @@
|
|||
# TODO
|
||||
|
||||
Unimplemented items consolidated from root *.md files. Source file noted for each item.
|
||||
|
||||
---
|
||||
|
||||
## Critical / Correctness
|
||||
|
||||
- [x] Port `fix(security): harden project-controlled surfaces` — env isolation + transport cleanup done; gsd-2 trust/dedup hunks (server.ts, mcp-client/index.ts) not applicable (packages absent) *(BUILD_PLAN.md Tier 0.5 #2)*
|
||||
- [ ] Port agent-session/agent-end transition fixes (gsd-2 `71114fccf`, `6d7e4gcb5`, `c162c44bf`, `e3bd04551`) *(BUILD_PLAN.md Tier 0.5 #7-10, UPSTREAM_CHERRY_PICK_CANDIDATES.md Cluster B)*
|
||||
- [ ] Cloudflare Workers AI provider — `CLOUDFLARE_API_KEY`/`CLOUDFLARE_ACCOUNT_ID` (pi-mono PR #3851) *(BUILD_PLAN.md Tier 0 #8)*
|
||||
|
||||
---
|
||||
|
||||
## Architecture / Design Gaps
|
||||
|
||||
- [ ] Schema reconciliation: update SPEC.md to 3-table model (milestones/slices/tasks vs single `units`) *(BUILD_PLAN.md Tier 1.3)*
|
||||
- [ ] Persistent agents v1 command surface — `/sf agent run|reset|delete|inspect` *(BUILD_PLAN.md Tier 2.1)*
|
||||
- [ ] Intent chapters (`chapter_open`/`chapter_close` — crash-resume context) *(BUILD_PLAN.md Tier 2.3)*
|
||||
- [ ] PhaseReview 3-pass review (establish-context → parallel chunked → synthesis) *(BUILD_PLAN.md Tier 2.4)*
|
||||
- [ ] `last_error` cap to 4 KB head+tail; full payload to file *(BUILD_PLAN.md Tier 2.6)*
|
||||
- [ ] Port workflow state machine hardening (gsd-2 `f2377eedd`, `b9a1c6743`, `153fb328a`, `381ccdef5`, `371b2eb31`) *(BUILD_PLAN.md Tier 0.5 #13, UPSTREAM_CHERRY_PICK_CANDIDATES.md Cluster F)*
|
||||
- [ ] Port `fix(claude-code-cli): persist Always Allow for non-Bash tools` (gsd-2 `a88baeae9`) *(BUILD_PLAN.md Tier 0.5 #11)*
|
||||
|
||||
---
|
||||
|
||||
## Medium Priority / Quality
|
||||
|
||||
- [ ] Replace `isHeavyModelId()` name-matching heuristic with capability-based check *(PRODUCTION_AUDIT_GRADE.md #9, PRODUCTION_AUDIT.md 3.3)*
|
||||
- [ ] Add `version` field to task frontmatter and mode state (schema versioning) *(PRODUCTION_AUDIT_GRADE.md #8)*
|
||||
- [ ] Integration tests for full remote steering pipeline *(PRODUCTION_AUDIT.md Long Term #10)*
|
||||
- [x] Log `frontmatterErrors` in sf-db.js instead of silently dropping validation errors *(PRODUCTION_AUDIT.md 3.1)*
|
||||
- [ ] Search provider registry refactor — consolidate provider list across files into `SearchProviderRegistry` *(BUILD_PLAN.md Tier 1+)*
|
||||
- [ ] Update ARCHITECTURE.md self-evolution section (triage pipeline IS active; injection IS automatic now) *(ARCHITECTURE.md)*
|
||||
- [ ] Add Mermaid state machine diagram to ARCHITECTURE.md *(ARCHITECTURE.md)*
|
||||
- [ ] Symlinked packages/resources/skills/sessions dedup (pi-mono PR #3818) *(BUILD_PLAN.md Tier 0 #6)*
|
||||
|
||||
---
|
||||
|
||||
## Long-term / Deferred
|
||||
|
||||
- [ ] Singularity Knowledge + Agent Platform (Go re-platform, ~12 weeks) *(BUILD_PLAN.md Tier 1+)*
|
||||
- [ ] sf-worker SSH host (Go, `wish` + `xpty`, ~3 weeks) *(BUILD_PLAN.md Tier 4)*
|
||||
- [ ] Charm TUI client (`sf-tui` in Go, ~12-16 weeks) *(BUILD_PLAN.md Tier 1+)*
|
||||
- [ ] Flight recorder (`x/vcr`, ~3 weeks) *(BUILD_PLAN.md Tier 1+)*
|
||||
- [ ] Full swarm chat for `subagent` tool (Option C, depends on persistent-agent layer) *(BUILD_PLAN.md Tier 1+)*
|
||||
- [ ] Caveman input-side prompt compression (rewrite execute-task/plan-slice prompts) *(BUILD_PLAN.md Tier 1+)*
|
||||
- [ ] Runtime input preprocessor (`terse_prompts: true` dispatch transform, ~3-4 days) *(BUILD_PLAN.md Tier 1+)*
|
||||
- [ ] Judge calibration + eval runner service (Go/Charm, ~2-3 weeks post SM) *(BUILD_PLAN.md Tier 1+)*
|
||||
- [ ] M009 promote-only adoption review — create `sf schedule` entry (2 weeks after M009 close) *(BACKLOG.md)*
|
||||
- [ ] Establish pi-mono SDK sync cadence (recurring check schedule) *(BUILD_PLAN.md Tier 1+)*
|
||||
- [ ] `scripts/port-from-gsd2.sh` automation script *(UPSTREAM_PORT_GUIDE.md)*
|
||||
- [ ] TypeScript migration for UOK modules (`kernel.js`, etc.) *(PRODUCTION_AUDIT_COMPLETE.md, PRODUCTION_AUDIT_GRADE.md)*
|
||||
Loading…
Add table
Reference in a new issue