Addresses self-feedback entry sf-mp4uzvcd-pazg6v
(architecture-defect:no-reflection-layer-over-self-feedback-corpus): SF
detected symptoms and triaged individual entries but had no layer that
reasoned about the corpus to recognize recurring structural patterns.
The same architectural pressure expressed itself across multiple entries
with different exact-kind strings; nothing escalated the pattern to a
class. The cognitive work fell on the operator.
This commit ships Phase 1A — the data-assembly + prompt half of the
reflection layer + an operator-driven entry point. Phase 1B (LLM dispatch
via the autonomous loop as a real unit type) lands once
sf-mp4rxkwb-l4baga (triage-not-a-first-class-unit-type) is in.
Files:
- src/resources/extensions/sf/reflection.js (new)
- assembleReflectionCorpus(basePath): bundles open + recent-resolved
self-feedback (full json), last 50 commits via git log, milestone +
slice + task state, all milestone validation verdicts, and prior
reflection report into one struct. Returns null on prerequisite
failure (DB closed) so callers downgrade gracefully.
- renderReflectionCorpusBrief(corpus): renders the corpus into a
markdown brief the LLM consumes in one turn.
- writeReflectionReport(basePath, content): persists to
.sf/reflection/<timestamp>-report.md so next pass detects "what
changed since last reflection."
- src/resources/extensions/sf/prompts/reflection-pass.md (new)
- {{include:working-directory}} prefix.
- Reasoning order: cluster by structural shape (not exact kind),
identify recurring patterns, identify commit/ledger gaps, identify
stale validation drift, identify the deepest architectural concern,
compare against prior report.
- Output contract: structured markdown report with named sections,
terminator REFLECTION_COMPLETE for clean-finish detection.
- Constraints: don't fix anything (reflection layer not executor),
don't resolve entries without commit-SHA evidence, don't invent IDs.
- src/headless-reflect.ts (new) — sf headless reflect [--json]
- Pre-opens the project DB via auto-start.openProjectDbIfPresent
(one-shot bypass path doesn't run the full SF agent bootstrap).
- Default: emits the rendered prompt brief (template + corpus) for
operators to pipe into any model. Lets the corpus-assembly layer
ship and validate before the LLM-dispatch layer is wired.
- --json: emits raw corpus snapshot for tooling.
- src/headless.ts: registers the new "reflect" command after the
existing usage block.
- src/help-text.ts: documents it in the headless command list.
- src/resources/extensions/sf/tests/reflection.test.mjs (new, 9 tests):
null-when-DB-closed; collects open + recent-resolved; excludes >30d
resolutions; captures milestone/slice/task tree; captures validation
verdicts; commits returned as array (best-effort tmpdir is ok); brief
renders all major sections; entry IDs/severity/kind appear in brief;
writeReflectionReport round-trips through assembleReflectionCorpus's
previousReport read.
Live smoke verified: sf headless reflect against the real .sf/sf.db
returns 15 open + 23 recent-resolved entries, 50 commits, 2 milestones,
1 validation file (correctly surfacing M001's stale needs-attention
verdict against actual 5/5 slices done — exactly the case that
motivated this layer).
Total: +848 LOC, full SF extension suite (1534 tests) passes,
typecheck clean.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
262 lines
7.5 KiB
JavaScript
262 lines
7.5 KiB
JavaScript
/**
|
|
* reflection.test.mjs — corpus assembler + brief renderer.
|
|
*
|
|
* Phase 1A scope: validate the data-assembly half of the reflection layer.
|
|
* The LLM-call half lives in headless-reflect.ts and the future
|
|
* autonomous-loop unit handler; those get their own coverage when they land.
|
|
*/
|
|
import {
|
|
mkdirSync,
|
|
mkdtempSync,
|
|
rmSync,
|
|
writeFileSync,
|
|
} from "node:fs";
|
|
import { tmpdir } from "node:os";
|
|
import { join } from "node:path";
|
|
import { afterEach, describe, expect, test } from "vitest";
|
|
import {
|
|
assembleReflectionCorpus,
|
|
renderReflectionCorpusBrief,
|
|
writeReflectionReport,
|
|
} from "../reflection.js";
|
|
import {
|
|
closeDatabase,
|
|
insertMilestone,
|
|
insertSlice,
|
|
insertTask,
|
|
openDatabase,
|
|
} from "../sf-db.js";
|
|
import { recordSelfFeedback, markResolved } from "../self-feedback.js";
|
|
|
|
const tmpDirs = [];
|
|
|
|
afterEach(() => {
|
|
closeDatabase();
|
|
while (tmpDirs.length > 0) {
|
|
const dir = tmpDirs.pop();
|
|
if (dir) rmSync(dir, { recursive: true, force: true });
|
|
}
|
|
});
|
|
|
|
function makeForgeProject() {
|
|
const dir = mkdtempSync(join(tmpdir(), "sf-reflection-"));
|
|
tmpDirs.push(dir);
|
|
mkdirSync(join(dir, ".sf"), { recursive: true });
|
|
writeFileSync(
|
|
join(dir, "package.json"),
|
|
JSON.stringify({ name: "singularity-forge" }),
|
|
);
|
|
openDatabase(join(dir, ".sf", "sf.db"));
|
|
return dir;
|
|
}
|
|
|
|
function seedMilestone(dir, milestoneId, slices) {
|
|
insertMilestone({ id: milestoneId, title: milestoneId, status: "active" });
|
|
for (const slice of slices) {
|
|
insertSlice({
|
|
milestoneId,
|
|
id: slice.id,
|
|
title: slice.id,
|
|
status: slice.status,
|
|
risk: "medium",
|
|
sequence: 1,
|
|
});
|
|
for (const task of slice.tasks ?? []) {
|
|
insertTask({
|
|
milestoneId,
|
|
sliceId: slice.id,
|
|
id: task.id,
|
|
title: task.id,
|
|
status: task.status,
|
|
description: "",
|
|
estimate: "",
|
|
files: [],
|
|
sequence: 1,
|
|
});
|
|
}
|
|
}
|
|
}
|
|
|
|
function writeValidation(dir, milestoneId, verdict, body) {
|
|
const mdir = join(dir, ".sf", "milestones", milestoneId);
|
|
mkdirSync(mdir, { recursive: true });
|
|
writeFileSync(
|
|
join(mdir, `${milestoneId}-VALIDATION.md`),
|
|
`---\nverdict: ${verdict}\n---\n\n${body}\n`,
|
|
);
|
|
}
|
|
|
|
describe("assembleReflectionCorpus", () => {
|
|
test("returns null when DB is not open", () => {
|
|
// no makeForgeProject → no openDatabase → DB not available
|
|
const corpus = assembleReflectionCorpus("/tmp/does-not-exist");
|
|
expect(corpus).toBe(null);
|
|
});
|
|
|
|
test("collects open and recent-resolved forge entries", () => {
|
|
const dir = makeForgeProject();
|
|
// open entry
|
|
const openRes = recordSelfFeedback(
|
|
{ kind: "gap:foo", severity: "medium", summary: "open one" },
|
|
dir,
|
|
);
|
|
// resolved entry (today, well within lookback)
|
|
const resolvedRes = recordSelfFeedback(
|
|
{
|
|
kind: "architecture-defect:bar",
|
|
severity: "high",
|
|
summary: "resolved one",
|
|
},
|
|
dir,
|
|
);
|
|
markResolved(
|
|
resolvedRes.entry.id,
|
|
{
|
|
reason: "fixed",
|
|
evidence: { kind: "agent-fix", commitSha: "abc1234" },
|
|
},
|
|
dir,
|
|
);
|
|
|
|
const corpus = assembleReflectionCorpus(dir);
|
|
expect(corpus).not.toBe(null);
|
|
expect(corpus.openEntries).toHaveLength(1);
|
|
expect(corpus.openEntries[0].id).toBe(openRes.entry.id);
|
|
expect(corpus.recentResolvedEntries).toHaveLength(1);
|
|
expect(corpus.recentResolvedEntries[0].id).toBe(resolvedRes.entry.id);
|
|
expect(corpus.recentResolvedEntries[0].resolvedEvidence?.kind).toBe(
|
|
"agent-fix",
|
|
);
|
|
});
|
|
|
|
test("excludes resolutions older than the lookback window", async () => {
|
|
const dir = makeForgeProject();
|
|
// Seed an entry, mark it resolved, then back-date its resolved_at
|
|
// directly via DB so it falls outside the 30-day lookback.
|
|
const filed = recordSelfFeedback(
|
|
{ kind: "gap:old", severity: "low", summary: "old one" },
|
|
dir,
|
|
);
|
|
markResolved(
|
|
filed.entry.id,
|
|
{ reason: "old fix", evidence: { kind: "human-clear" } },
|
|
dir,
|
|
);
|
|
// Back-date 60 days. Use the same DB adapter via dynamic ESM import.
|
|
const oldTs = new Date(
|
|
Date.now() - 60 * 24 * 60 * 60 * 1000,
|
|
).toISOString();
|
|
const { _getAdapter } = await import("../sf-db/sf-db-core.js");
|
|
_getAdapter()
|
|
.prepare("UPDATE self_feedback SET resolved_at = :ts WHERE id = :id")
|
|
.run({ ":ts": oldTs, ":id": filed.entry.id });
|
|
|
|
const corpus = assembleReflectionCorpus(dir);
|
|
expect(corpus.openEntries).toHaveLength(0);
|
|
expect(corpus.recentResolvedEntries).toHaveLength(0);
|
|
});
|
|
|
|
test("captures milestone state and per-slice task counts", () => {
|
|
const dir = makeForgeProject();
|
|
seedMilestone(dir, "M001", [
|
|
{
|
|
id: "S01",
|
|
status: "complete",
|
|
tasks: [
|
|
{ id: "T01", status: "complete" },
|
|
{ id: "T02", status: "complete" },
|
|
],
|
|
},
|
|
{
|
|
id: "S02",
|
|
status: "pending",
|
|
tasks: [{ id: "T01", status: "pending" }],
|
|
},
|
|
]);
|
|
|
|
const corpus = assembleReflectionCorpus(dir);
|
|
expect(corpus.milestones).toHaveLength(1);
|
|
const m = corpus.milestones[0];
|
|
expect(m.id).toBe("M001");
|
|
expect(m.slices).toHaveLength(2);
|
|
expect(m.slices[0].id).toBe("S01");
|
|
expect(m.slices[0].tasks).toHaveLength(2);
|
|
expect(m.slices[1].tasks).toHaveLength(1);
|
|
});
|
|
|
|
test("captures milestone validation verdicts", () => {
|
|
const dir = makeForgeProject();
|
|
writeValidation(
|
|
dir,
|
|
"M001",
|
|
"needs-attention",
|
|
"S02-S05 still pending per this stale file",
|
|
);
|
|
writeValidation(dir, "M002", "pass", "All criteria met");
|
|
|
|
const corpus = assembleReflectionCorpus(dir);
|
|
expect(corpus.validations).toHaveLength(2);
|
|
const verdictByMilestone = Object.fromEntries(
|
|
corpus.validations.map((v) => [v.milestoneId, v.verdict]),
|
|
);
|
|
expect(verdictByMilestone.M001).toBe("needs-attention");
|
|
expect(verdictByMilestone.M002).toBe("pass");
|
|
});
|
|
|
|
test("returns commits as an array (best-effort, may be empty in tmpdir)", () => {
|
|
const dir = makeForgeProject();
|
|
const corpus = assembleReflectionCorpus(dir);
|
|
// tmpdir is not a git repo so git log fails → []. Any value other than
|
|
// an array would mean the helper threw out, which it must never.
|
|
expect(Array.isArray(corpus.commits)).toBe(true);
|
|
});
|
|
});
|
|
|
|
describe("renderReflectionCorpusBrief", () => {
|
|
test("renders all major sections even with empty inputs", () => {
|
|
const dir = makeForgeProject();
|
|
const corpus = assembleReflectionCorpus(dir);
|
|
const brief = renderReflectionCorpusBrief(corpus);
|
|
expect(brief).toContain("Open self-feedback entries");
|
|
expect(brief).toContain("Recently resolved self-feedback");
|
|
expect(brief).toContain("Recent commits");
|
|
expect(brief).toContain("Milestone state");
|
|
expect(brief).toContain("Milestone validation verdicts");
|
|
expect(brief).toContain("Previous reflection report");
|
|
});
|
|
|
|
test("includes entry id, severity, and kind in brief", () => {
|
|
const dir = makeForgeProject();
|
|
recordSelfFeedback(
|
|
{
|
|
kind: "gap:visible-in-brief",
|
|
severity: "high",
|
|
summary: "must appear",
|
|
},
|
|
dir,
|
|
);
|
|
const corpus = assembleReflectionCorpus(dir);
|
|
const brief = renderReflectionCorpusBrief(corpus);
|
|
expect(brief).toMatch(/sf-[\w-]+/);
|
|
expect(brief).toContain("gap:visible-in-brief");
|
|
expect(brief).toContain("[high]");
|
|
expect(brief).toContain("must appear");
|
|
});
|
|
});
|
|
|
|
describe("writeReflectionReport", () => {
|
|
test("writes a timestamped report to .sf/reflection/", () => {
|
|
const dir = makeForgeProject();
|
|
const path = writeReflectionReport(
|
|
dir,
|
|
"# Test reflection\n\nSome content.\n",
|
|
);
|
|
expect(path).toBeTruthy();
|
|
expect(path).toMatch(/-report\.md$/);
|
|
|
|
// And next-pass should be able to read it as the previous report
|
|
const corpus = assembleReflectionCorpus(dir);
|
|
expect(corpus.previousReport).toBeTruthy();
|
|
expect(corpus.previousReport.content).toContain("Test reflection");
|
|
});
|
|
});
|