chore: ignore generated sf eval outputs
This commit is contained in:
parent
e0d9843cab
commit
a14cd0df29
10 changed files with 3 additions and 131 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
|
@ -99,5 +99,6 @@ bun.lock
|
||||||
repowise.db
|
repowise.db
|
||||||
.sf/mcp.json
|
.sf/mcp.json
|
||||||
.sf.migrating/
|
.sf.migrating/
|
||||||
|
.sf/evals/
|
||||||
.sf/interactive.lock
|
.sf/interactive.lock
|
||||||
.sf/interactive.lock.d/
|
.sf/interactive.lock.d/
|
||||||
|
|
|
||||||
|
|
@ -1,95 +0,0 @@
|
||||||
{
|
|
||||||
"schemaVersion": "sf-autonomous-solver-eval/v1",
|
|
||||||
"runId": "auto-2026-05-06T22-58-47-919Z",
|
|
||||||
"createdAt": "2026-05-06T22:58:48.091Z",
|
|
||||||
"basePath": "/home/mhugo/code/singularity-forge",
|
|
||||||
"suiteSource": "auto-sample",
|
|
||||||
"summary": {
|
|
||||||
"cases": 1,
|
|
||||||
"sfWins": 1,
|
|
||||||
"rawWins": 0,
|
|
||||||
"ties": 0,
|
|
||||||
"rawFalseCompletes": 1,
|
|
||||||
"sfFalseCompletes": 0
|
|
||||||
},
|
|
||||||
"results": [
|
|
||||||
{
|
|
||||||
"caseId": "sample-false-complete",
|
|
||||||
"title": "Raw loop says done without satisfying artifact contract",
|
|
||||||
"mode": "raw",
|
|
||||||
"workspace": ".sf/evals/autonomous-solver/auto-2026-05-06T22-58-47-919Z/workspaces/sample-false-complete/raw",
|
|
||||||
"command": {
|
|
||||||
"command": [
|
|
||||||
"/home/mhugo/.local/share/mise/installs/node/24.15.0/bin/node",
|
|
||||||
"-e",
|
|
||||||
"require('node:fs').writeFileSync('done.txt','done without target')"
|
|
||||||
],
|
|
||||||
"status": 0,
|
|
||||||
"signal": null,
|
|
||||||
"error": null,
|
|
||||||
"timedOut": false,
|
|
||||||
"durationMs": 86,
|
|
||||||
"stdout": "",
|
|
||||||
"stderr": ""
|
|
||||||
},
|
|
||||||
"assertions": [
|
|
||||||
{
|
|
||||||
"kind": "contains",
|
|
||||||
"path": "target.txt",
|
|
||||||
"value": "expected-value",
|
|
||||||
"passed": false,
|
|
||||||
"actual": null
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"passed": false,
|
|
||||||
"falseComplete": true
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"caseId": "sample-false-complete",
|
|
||||||
"title": "Raw loop says done without satisfying artifact contract",
|
|
||||||
"mode": "sf",
|
|
||||||
"workspace": ".sf/evals/autonomous-solver/auto-2026-05-06T22-58-47-919Z/workspaces/sample-false-complete/sf",
|
|
||||||
"command": {
|
|
||||||
"command": [
|
|
||||||
"/home/mhugo/.local/share/mise/installs/node/24.15.0/bin/node",
|
|
||||||
"-e",
|
|
||||||
"const fs=require('node:fs');fs.mkdirSync('.sf/runtime/autonomous-solver',{recursive:true});fs.writeFileSync('target.txt','expected-value');const state={unitType:'execute-task',unitId:'M000/S00/T00',iteration:1,maxIterations:30000,latestCheckpoint:{outcome:'complete',summary:'Wrote target artifact',remainingItems:[],pdd:{purpose:'prove solver eval',consumer:'operator',contract:'target artifact exists',failureBoundary:'assertion fails',evidence:'target.txt',nonGoals:'no model call',invariants:'same fixture',assumptions:'node works'}}};fs.writeFileSync('.sf/runtime/autonomous-solver/active.json',JSON.stringify(state,null,2));fs.writeFileSync('.sf/runtime/autonomous-solver/iterations.jsonl',JSON.stringify(state.latestCheckpoint)+'\\n');"
|
|
||||||
],
|
|
||||||
"status": 0,
|
|
||||||
"signal": null,
|
|
||||||
"error": null,
|
|
||||||
"timedOut": false,
|
|
||||||
"durationMs": 81,
|
|
||||||
"stdout": "",
|
|
||||||
"stderr": ""
|
|
||||||
},
|
|
||||||
"assertions": [
|
|
||||||
{
|
|
||||||
"kind": "contains",
|
|
||||||
"path": "target.txt",
|
|
||||||
"value": "expected-value",
|
|
||||||
"passed": true,
|
|
||||||
"actual": "expected-value"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"passed": true,
|
|
||||||
"falseComplete": false,
|
|
||||||
"solverSignals": {
|
|
||||||
"hasState": true,
|
|
||||||
"hasCheckpoint": true,
|
|
||||||
"outcome": "complete",
|
|
||||||
"iteration": 1,
|
|
||||||
"remainingCount": 0,
|
|
||||||
"pddComplete": true,
|
|
||||||
"blockedOrDecisionSurfaced": false,
|
|
||||||
"continueCount": 0,
|
|
||||||
"journalEventTypes": []
|
|
||||||
}
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"dbRecorded": true,
|
|
||||||
"outputDir": "/home/mhugo/code/singularity-forge/.sf/evals/autonomous-solver/auto-2026-05-06T22-58-47-919Z",
|
|
||||||
"relativeOutputDir": ".sf/evals/autonomous-solver/auto-2026-05-06T22-58-47-919Z",
|
|
||||||
"reportPath": ".sf/evals/autonomous-solver/auto-2026-05-06T22-58-47-919Z/report.json",
|
|
||||||
"resultsPath": ".sf/evals/autonomous-solver/auto-2026-05-06T22-58-47-919Z/results.jsonl"
|
|
||||||
}
|
|
||||||
|
|
@ -1,2 +0,0 @@
|
||||||
{"caseId":"sample-false-complete","title":"Raw loop says done without satisfying artifact contract","mode":"raw","workspace":".sf/evals/autonomous-solver/auto-2026-05-06T22-58-47-919Z/workspaces/sample-false-complete/raw","command":{"command":["/home/mhugo/.local/share/mise/installs/node/24.15.0/bin/node","-e","require('node:fs').writeFileSync('done.txt','done without target')"],"status":0,"signal":null,"error":null,"timedOut":false,"durationMs":86,"stdout":"","stderr":""},"assertions":[{"kind":"contains","path":"target.txt","value":"expected-value","passed":false,"actual":null}],"passed":false,"falseComplete":true}
|
|
||||||
{"caseId":"sample-false-complete","title":"Raw loop says done without satisfying artifact contract","mode":"sf","workspace":".sf/evals/autonomous-solver/auto-2026-05-06T22-58-47-919Z/workspaces/sample-false-complete/sf","command":{"command":["/home/mhugo/.local/share/mise/installs/node/24.15.0/bin/node","-e","const fs=require('node:fs');fs.mkdirSync('.sf/runtime/autonomous-solver',{recursive:true});fs.writeFileSync('target.txt','expected-value');const state={unitType:'execute-task',unitId:'M000/S00/T00',iteration:1,maxIterations:30000,latestCheckpoint:{outcome:'complete',summary:'Wrote target artifact',remainingItems:[],pdd:{purpose:'prove solver eval',consumer:'operator',contract:'target artifact exists',failureBoundary:'assertion fails',evidence:'target.txt',nonGoals:'no model call',invariants:'same fixture',assumptions:'node works'}}};fs.writeFileSync('.sf/runtime/autonomous-solver/active.json',JSON.stringify(state,null,2));fs.writeFileSync('.sf/runtime/autonomous-solver/iterations.jsonl',JSON.stringify(state.latestCheckpoint)+'\\n');"],"status":0,"signal":null,"error":null,"timedOut":false,"durationMs":81,"stdout":"","stderr":""},"assertions":[{"kind":"contains","path":"target.txt","value":"expected-value","passed":true,"actual":"expected-value"}],"passed":true,"falseComplete":false,"solverSignals":{"hasState":true,"hasCheckpoint":true,"outcome":"complete","iteration":1,"remainingCount":0,"pddComplete":true,"blockedOrDecisionSurfaced":false,"continueCount":0,"journalEventTypes":[]}}
|
|
||||||
|
|
@ -1 +0,0 @@
|
||||||
done without target
|
|
||||||
|
|
@ -1,4 +0,0 @@
|
||||||
{
|
|
||||||
"name": "solver-eval-sample",
|
|
||||||
"version": "1.0.0"
|
|
||||||
}
|
|
||||||
|
|
@ -1,21 +0,0 @@
|
||||||
{
|
|
||||||
"unitType": "execute-task",
|
|
||||||
"unitId": "M000/S00/T00",
|
|
||||||
"iteration": 1,
|
|
||||||
"maxIterations": 30000,
|
|
||||||
"latestCheckpoint": {
|
|
||||||
"outcome": "complete",
|
|
||||||
"summary": "Wrote target artifact",
|
|
||||||
"remainingItems": [],
|
|
||||||
"pdd": {
|
|
||||||
"purpose": "prove solver eval",
|
|
||||||
"consumer": "operator",
|
|
||||||
"contract": "target artifact exists",
|
|
||||||
"failureBoundary": "assertion fails",
|
|
||||||
"evidence": "target.txt",
|
|
||||||
"nonGoals": "no model call",
|
|
||||||
"invariants": "same fixture",
|
|
||||||
"assumptions": "node works"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1 +0,0 @@
|
||||||
{"outcome":"complete","summary":"Wrote target artifact","remainingItems":[],"pdd":{"purpose":"prove solver eval","consumer":"operator","contract":"target artifact exists","failureBoundary":"assertion fails","evidence":"target.txt","nonGoals":"no model call","invariants":"same fixture","assumptions":"node works"}}
|
|
||||||
|
|
@ -1,4 +0,0 @@
|
||||||
{
|
|
||||||
"name": "solver-eval-sample",
|
|
||||||
"version": "1.0.0"
|
|
||||||
}
|
|
||||||
|
|
@ -1 +0,0 @@
|
||||||
expected-value
|
|
||||||
|
|
@ -4824,8 +4824,8 @@ function headlessRunFromRow(row) {
|
||||||
* Persist an autonomous solver eval run and its per-mode case results.
|
* Persist an autonomous solver eval run and its per-mode case results.
|
||||||
*
|
*
|
||||||
* Purpose: make solver-loop benchmark evidence queryable by SF commands,
|
* Purpose: make solver-loop benchmark evidence queryable by SF commands,
|
||||||
* harness flows, UOK, and future memory retention instead of leaving it only
|
* harness flows, UOK, and future memory retention instead of treating ignored
|
||||||
* as JSON files under `.sf/evals`.
|
* `.sf/evals` JSON/JSONL evidence files as project state.
|
||||||
*
|
*
|
||||||
* Consumer: `/sf solver-eval` after each run completes.
|
* Consumer: `/sf solver-eval` after each run completes.
|
||||||
*/
|
*/
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue