diff --git a/.gitignore b/.gitignore index e56cee0fc..e9b58d0cb 100644 --- a/.gitignore +++ b/.gitignore @@ -99,5 +99,6 @@ bun.lock repowise.db .sf/mcp.json .sf.migrating/ +.sf/evals/ .sf/interactive.lock .sf/interactive.lock.d/ diff --git a/.sf/evals/autonomous-solver/auto-2026-05-06T22-58-47-919Z/report.json b/.sf/evals/autonomous-solver/auto-2026-05-06T22-58-47-919Z/report.json deleted file mode 100644 index 5093269a5..000000000 --- a/.sf/evals/autonomous-solver/auto-2026-05-06T22-58-47-919Z/report.json +++ /dev/null @@ -1,95 +0,0 @@ -{ - "schemaVersion": "sf-autonomous-solver-eval/v1", - "runId": "auto-2026-05-06T22-58-47-919Z", - "createdAt": "2026-05-06T22:58:48.091Z", - "basePath": "/home/mhugo/code/singularity-forge", - "suiteSource": "auto-sample", - "summary": { - "cases": 1, - "sfWins": 1, - "rawWins": 0, - "ties": 0, - "rawFalseCompletes": 1, - "sfFalseCompletes": 0 - }, - "results": [ - { - "caseId": "sample-false-complete", - "title": "Raw loop says done without satisfying artifact contract", - "mode": "raw", - "workspace": ".sf/evals/autonomous-solver/auto-2026-05-06T22-58-47-919Z/workspaces/sample-false-complete/raw", - "command": { - "command": [ - "/home/mhugo/.local/share/mise/installs/node/24.15.0/bin/node", - "-e", - "require('node:fs').writeFileSync('done.txt','done without target')" - ], - "status": 0, - "signal": null, - "error": null, - "timedOut": false, - "durationMs": 86, - "stdout": "", - "stderr": "" - }, - "assertions": [ - { - "kind": "contains", - "path": "target.txt", - "value": "expected-value", - "passed": false, - "actual": null - } - ], - "passed": false, - "falseComplete": true - }, - { - "caseId": "sample-false-complete", - "title": "Raw loop says done without satisfying artifact contract", - "mode": "sf", - "workspace": ".sf/evals/autonomous-solver/auto-2026-05-06T22-58-47-919Z/workspaces/sample-false-complete/sf", - "command": { - "command": [ - "/home/mhugo/.local/share/mise/installs/node/24.15.0/bin/node", - "-e", - "const fs=require('node:fs');fs.mkdirSync('.sf/runtime/autonomous-solver',{recursive:true});fs.writeFileSync('target.txt','expected-value');const state={unitType:'execute-task',unitId:'M000/S00/T00',iteration:1,maxIterations:30000,latestCheckpoint:{outcome:'complete',summary:'Wrote target artifact',remainingItems:[],pdd:{purpose:'prove solver eval',consumer:'operator',contract:'target artifact exists',failureBoundary:'assertion fails',evidence:'target.txt',nonGoals:'no model call',invariants:'same fixture',assumptions:'node works'}}};fs.writeFileSync('.sf/runtime/autonomous-solver/active.json',JSON.stringify(state,null,2));fs.writeFileSync('.sf/runtime/autonomous-solver/iterations.jsonl',JSON.stringify(state.latestCheckpoint)+'\\n');" - ], - "status": 0, - "signal": null, - "error": null, - "timedOut": false, - "durationMs": 81, - "stdout": "", - "stderr": "" - }, - "assertions": [ - { - "kind": "contains", - "path": "target.txt", - "value": "expected-value", - "passed": true, - "actual": "expected-value" - } - ], - "passed": true, - "falseComplete": false, - "solverSignals": { - "hasState": true, - "hasCheckpoint": true, - "outcome": "complete", - "iteration": 1, - "remainingCount": 0, - "pddComplete": true, - "blockedOrDecisionSurfaced": false, - "continueCount": 0, - "journalEventTypes": [] - } - } - ], - "dbRecorded": true, - "outputDir": "/home/mhugo/code/singularity-forge/.sf/evals/autonomous-solver/auto-2026-05-06T22-58-47-919Z", - "relativeOutputDir": ".sf/evals/autonomous-solver/auto-2026-05-06T22-58-47-919Z", - "reportPath": ".sf/evals/autonomous-solver/auto-2026-05-06T22-58-47-919Z/report.json", - "resultsPath": ".sf/evals/autonomous-solver/auto-2026-05-06T22-58-47-919Z/results.jsonl" -} diff --git a/.sf/evals/autonomous-solver/auto-2026-05-06T22-58-47-919Z/results.jsonl b/.sf/evals/autonomous-solver/auto-2026-05-06T22-58-47-919Z/results.jsonl deleted file mode 100644 index 4571a1e14..000000000 --- a/.sf/evals/autonomous-solver/auto-2026-05-06T22-58-47-919Z/results.jsonl +++ /dev/null @@ -1,2 +0,0 @@ -{"caseId":"sample-false-complete","title":"Raw loop says done without satisfying artifact contract","mode":"raw","workspace":".sf/evals/autonomous-solver/auto-2026-05-06T22-58-47-919Z/workspaces/sample-false-complete/raw","command":{"command":["/home/mhugo/.local/share/mise/installs/node/24.15.0/bin/node","-e","require('node:fs').writeFileSync('done.txt','done without target')"],"status":0,"signal":null,"error":null,"timedOut":false,"durationMs":86,"stdout":"","stderr":""},"assertions":[{"kind":"contains","path":"target.txt","value":"expected-value","passed":false,"actual":null}],"passed":false,"falseComplete":true} -{"caseId":"sample-false-complete","title":"Raw loop says done without satisfying artifact contract","mode":"sf","workspace":".sf/evals/autonomous-solver/auto-2026-05-06T22-58-47-919Z/workspaces/sample-false-complete/sf","command":{"command":["/home/mhugo/.local/share/mise/installs/node/24.15.0/bin/node","-e","const fs=require('node:fs');fs.mkdirSync('.sf/runtime/autonomous-solver',{recursive:true});fs.writeFileSync('target.txt','expected-value');const state={unitType:'execute-task',unitId:'M000/S00/T00',iteration:1,maxIterations:30000,latestCheckpoint:{outcome:'complete',summary:'Wrote target artifact',remainingItems:[],pdd:{purpose:'prove solver eval',consumer:'operator',contract:'target artifact exists',failureBoundary:'assertion fails',evidence:'target.txt',nonGoals:'no model call',invariants:'same fixture',assumptions:'node works'}}};fs.writeFileSync('.sf/runtime/autonomous-solver/active.json',JSON.stringify(state,null,2));fs.writeFileSync('.sf/runtime/autonomous-solver/iterations.jsonl',JSON.stringify(state.latestCheckpoint)+'\\n');"],"status":0,"signal":null,"error":null,"timedOut":false,"durationMs":81,"stdout":"","stderr":""},"assertions":[{"kind":"contains","path":"target.txt","value":"expected-value","passed":true,"actual":"expected-value"}],"passed":true,"falseComplete":false,"solverSignals":{"hasState":true,"hasCheckpoint":true,"outcome":"complete","iteration":1,"remainingCount":0,"pddComplete":true,"blockedOrDecisionSurfaced":false,"continueCount":0,"journalEventTypes":[]}} diff --git a/.sf/evals/autonomous-solver/auto-2026-05-06T22-58-47-919Z/workspaces/sample-false-complete/raw/done.txt b/.sf/evals/autonomous-solver/auto-2026-05-06T22-58-47-919Z/workspaces/sample-false-complete/raw/done.txt deleted file mode 100644 index 2c4bc3ad6..000000000 --- a/.sf/evals/autonomous-solver/auto-2026-05-06T22-58-47-919Z/workspaces/sample-false-complete/raw/done.txt +++ /dev/null @@ -1 +0,0 @@ -done without target \ No newline at end of file diff --git a/.sf/evals/autonomous-solver/auto-2026-05-06T22-58-47-919Z/workspaces/sample-false-complete/raw/package.json b/.sf/evals/autonomous-solver/auto-2026-05-06T22-58-47-919Z/workspaces/sample-false-complete/raw/package.json deleted file mode 100644 index 0e285efdc..000000000 --- a/.sf/evals/autonomous-solver/auto-2026-05-06T22-58-47-919Z/workspaces/sample-false-complete/raw/package.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "name": "solver-eval-sample", - "version": "1.0.0" -} \ No newline at end of file diff --git a/.sf/evals/autonomous-solver/auto-2026-05-06T22-58-47-919Z/workspaces/sample-false-complete/sf/.sf/runtime/autonomous-solver/active.json b/.sf/evals/autonomous-solver/auto-2026-05-06T22-58-47-919Z/workspaces/sample-false-complete/sf/.sf/runtime/autonomous-solver/active.json deleted file mode 100644 index 4a82826ea..000000000 --- a/.sf/evals/autonomous-solver/auto-2026-05-06T22-58-47-919Z/workspaces/sample-false-complete/sf/.sf/runtime/autonomous-solver/active.json +++ /dev/null @@ -1,21 +0,0 @@ -{ - "unitType": "execute-task", - "unitId": "M000/S00/T00", - "iteration": 1, - "maxIterations": 30000, - "latestCheckpoint": { - "outcome": "complete", - "summary": "Wrote target artifact", - "remainingItems": [], - "pdd": { - "purpose": "prove solver eval", - "consumer": "operator", - "contract": "target artifact exists", - "failureBoundary": "assertion fails", - "evidence": "target.txt", - "nonGoals": "no model call", - "invariants": "same fixture", - "assumptions": "node works" - } - } -} \ No newline at end of file diff --git a/.sf/evals/autonomous-solver/auto-2026-05-06T22-58-47-919Z/workspaces/sample-false-complete/sf/.sf/runtime/autonomous-solver/iterations.jsonl b/.sf/evals/autonomous-solver/auto-2026-05-06T22-58-47-919Z/workspaces/sample-false-complete/sf/.sf/runtime/autonomous-solver/iterations.jsonl deleted file mode 100644 index 7fe4557f3..000000000 --- a/.sf/evals/autonomous-solver/auto-2026-05-06T22-58-47-919Z/workspaces/sample-false-complete/sf/.sf/runtime/autonomous-solver/iterations.jsonl +++ /dev/null @@ -1 +0,0 @@ -{"outcome":"complete","summary":"Wrote target artifact","remainingItems":[],"pdd":{"purpose":"prove solver eval","consumer":"operator","contract":"target artifact exists","failureBoundary":"assertion fails","evidence":"target.txt","nonGoals":"no model call","invariants":"same fixture","assumptions":"node works"}} diff --git a/.sf/evals/autonomous-solver/auto-2026-05-06T22-58-47-919Z/workspaces/sample-false-complete/sf/package.json b/.sf/evals/autonomous-solver/auto-2026-05-06T22-58-47-919Z/workspaces/sample-false-complete/sf/package.json deleted file mode 100644 index 0e285efdc..000000000 --- a/.sf/evals/autonomous-solver/auto-2026-05-06T22-58-47-919Z/workspaces/sample-false-complete/sf/package.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "name": "solver-eval-sample", - "version": "1.0.0" -} \ No newline at end of file diff --git a/.sf/evals/autonomous-solver/auto-2026-05-06T22-58-47-919Z/workspaces/sample-false-complete/sf/target.txt b/.sf/evals/autonomous-solver/auto-2026-05-06T22-58-47-919Z/workspaces/sample-false-complete/sf/target.txt deleted file mode 100644 index 12dce33c9..000000000 --- a/.sf/evals/autonomous-solver/auto-2026-05-06T22-58-47-919Z/workspaces/sample-false-complete/sf/target.txt +++ /dev/null @@ -1 +0,0 @@ -expected-value \ No newline at end of file diff --git a/src/resources/extensions/sf/sf-db.js b/src/resources/extensions/sf/sf-db.js index a57a669c7..562037868 100644 --- a/src/resources/extensions/sf/sf-db.js +++ b/src/resources/extensions/sf/sf-db.js @@ -4824,8 +4824,8 @@ function headlessRunFromRow(row) { * Persist an autonomous solver eval run and its per-mode case results. * * Purpose: make solver-loop benchmark evidence queryable by SF commands, - * harness flows, UOK, and future memory retention instead of leaving it only - * as JSON files under `.sf/evals`. + * harness flows, UOK, and future memory retention instead of treating ignored + * `.sf/evals` JSON/JSONL evidence files as project state. * * Consumer: `/sf solver-eval` after each run completes. */