fix(headless): bypass rpc for status
Some checks are pending
CI / detect-changes (push) Waiting to run
CI / docs-check (push) Blocked by required conditions
CI / lint (push) Blocked by required conditions
CI / build (push) Blocked by required conditions
CI / integration-tests (push) Blocked by required conditions
CI / windows-portability (push) Blocked by required conditions
CI / rtk-portability (linux, blacksmith-4vcpu-ubuntu-2404) (push) Blocked by required conditions
CI / rtk-portability (macos, macos-15) (push) Blocked by required conditions
CI / rtk-portability (windows, blacksmith-4vcpu-windows-2025) (push) Blocked by required conditions

This commit is contained in:
Mikael Hugo 2026-05-15 17:32:21 +02:00
parent cf32e79578
commit 362af3d6a4
9 changed files with 465 additions and 30 deletions

108
src/headless-status.ts Normal file
View file

@ -0,0 +1,108 @@
/**
* headless-status.ts direct `sf headless status` implementation.
*
* Purpose: keep the headless status machine surface read-only and
* TTY-independent instead of routing through the interactive `/status` overlay
* command or the long-lived RPC/v2 session handshake.
*/
import { buildQuerySnapshot, type QuerySnapshot } from "./headless-query.js";
export interface HeadlessStatusResult {
exitCode: number;
data?: QuerySnapshot;
}
/**
* Render a compact text status from the query snapshot.
*
* Purpose: provide the same operator value as `/status` in terminals where no
* interactive overlay can be displayed.
*
* Consumer: handleHeadlessStatus for text-mode `sf headless status`.
*/
export function formatHeadlessStatus(snapshot: QuerySnapshot): string {
const { next, runtime, uokDiagnostics, schedule } = snapshot;
const state = snapshot.state as any;
const lines = ["SF Status", ""];
lines.push(`Phase: ${state.phase}`);
if (state.activeMilestone) {
lines.push(
`Active milestone: ${state.activeMilestone.id} - ${state.activeMilestone.title}`,
);
}
if (state.activeSlice) {
lines.push(
`Active slice: ${state.activeSlice.id} - ${state.activeSlice.title}`,
);
}
if (state.activeTask) {
lines.push(
`Active task: ${state.activeTask.id} - ${state.activeTask.title}`,
);
}
const progress = state.progress;
if (progress) {
const parts = [
`milestones ${progress.milestones.done}/${progress.milestones.total}`,
];
if (progress.slices)
parts.push(`slices ${progress.slices.done}/${progress.slices.total}`);
if (progress.tasks)
parts.push(`tasks ${progress.tasks.done}/${progress.tasks.total}`);
lines.push(`Progress: ${parts.join(", ")}`);
}
if (state.nextAction) lines.push(`Next: ${state.nextAction}`);
if (state.blockers.length > 0)
lines.push(`Blockers: ${state.blockers.join("; ")}`);
lines.push("");
lines.push(
`Dispatch: ${next.action}${next.unitType ? ` ${next.unitType}` : ""}${next.unitId ? ` ${next.unitId}` : ""}${next.reason ? ` - ${next.reason}` : ""}`,
);
if (uokDiagnostics) {
lines.push(
`UOK: ${uokDiagnostics.verdict ?? "unknown"} (${uokDiagnostics.classification ?? "unknown"})`,
);
}
if (runtime.units.length > 0) {
lines.push("");
lines.push("Runtime units:");
for (const unit of runtime.units.slice(0, 8)) {
lines.push(` ${unit.unitType} ${unit.unitId}: ${unit.status}`);
}
}
if (schedule) {
lines.push("");
lines.push(
`Schedule: ${schedule.pending_count} pending, ${schedule.overdue_count} overdue`,
);
}
if (state.registry.length > 0) {
lines.push("");
lines.push("Milestones:");
for (const milestone of state.registry) {
lines.push(` ${milestone.id}: ${milestone.title} (${milestone.status})`);
}
}
return lines.join("\n");
}
/**
* Handle `sf headless status` without spawning the interactive RPC child.
*
* Purpose: avoid the long-standing v2 init timeout for a command whose answer
* is fully available from DB-backed project state.
*
* Consumer: runHeadlessOnce direct-command bypass.
*/
export async function handleHeadlessStatus(
basePath: string,
options: { json?: boolean } = {},
): Promise<HeadlessStatusResult> {
const snapshot = await buildQuerySnapshot(basePath);
if (options.json) {
process.stdout.write(JSON.stringify(snapshot) + "\n");
} else {
process.stdout.write(formatHeadlessStatus(snapshot) + "\n");
}
return { exitCode: 0, data: snapshot };
}

View file

@ -861,6 +861,22 @@ async function runHeadlessOnce(
return { exitCode: result.exitCode, interrupted: false, timedOut: false };
}
// Generic headless status: read-only project snapshot. This deliberately
// bypasses the interactive RPC/v2 path because `/status` opens a TUI overlay
// in interactive mode and can hang waiting for protocol init in headless.
if (options.command === "status") {
const { handleHeadlessStatus } = await import("./headless-status.js");
const wantsJson =
options.json ||
options.outputFormat === "json" ||
options.outputFormat === "stream-json" ||
options.commandArgs.includes("--json");
const result = await handleHeadlessStatus(process.cwd(), {
json: wantsJson,
});
return { exitCode: result.exitCode, interrupted: false, timedOut: false };
}
// Reflect: assemble the SF reflection corpus snapshot (open + recent
// self-feedback, recent commits, milestone state, validation files,
// prior report) and emit either the rendered prompt brief (default) or

View file

@ -149,19 +149,25 @@ function buildExtractionUserPrompt(
/**
* Extract assistant message text from activity JSONL.
* Returns concatenated text content from assistant role entries.
*
* Truncation strategy (changed from front-preserving): the front of
* an execute-task transcript carries the goal/context (already in
* the prompt), while the tail carries verification, final summary,
* and the decisions actually committed the highest-signal slices
* for memory extraction. Front-preserving truncation discarded
* exactly the content the LLM is best at extracting from.
*
* The fix: collect every assistant text block first, then if the
* total exceeds maxChars, keep ~25% from the front (goal echo,
* early decisions) and ~75% from the back (where the durable signal
* lives). The two halves are joined with a `[…truncated middle…]`
* marker so the LLM knows content was elided.
*/
function extractTranscriptFromActivity(raw, maxChars = 30_000) {
const lines = raw.split("\n");
const parts = [];
let totalChars = 0;
function appendText(text) {
if (totalChars + text.length > maxChars) {
parts.push(text.substring(0, maxChars - totalChars));
return false; // signal stop
}
parts.push(text);
totalChars += text.length;
return true;
}
for (const line of lines) {
if (!line.trim()) continue;
@ -178,21 +184,44 @@ function extractTranscriptFromActivity(raw, maxChars = 30_000) {
if (Array.isArray(entry.content)) {
for (const block of entry.content) {
if (block.type === "text" && block.text) {
if (!appendText(block.text)) return parts.join("\n\n");
appendText(block.text);
}
}
} else if (typeof entry.content === "string" && entry.content) {
if (!appendText(entry.content)) return parts.join("\n\n");
appendText(entry.content);
}
// Also read plain text/content field on custom_message entries
if (entry.text && typeof entry.text === "string") {
if (!appendText(entry.text)) return parts.join("\n\n");
appendText(entry.text);
}
} catch {
// Skip malformed lines
}
}
return parts.join("\n\n");
const joined = parts.join("\n\n");
if (joined.length <= maxChars) return joined;
return truncateMiddle(joined, maxChars);
}
/**
* Keep N from front + M from back of `text`, drop the middle.
*
* Split ratio is back-weighted (default 25/75) because in an
* execute-task transcript the durable signal what was committed,
* what verified, what the operator decided concentrates at the
* tail. The front share keeps enough goal/early-reasoning context
* to anchor the extraction; the back share keeps the outcomes.
*
* Exported (named export below) for the dedicated unit test that
* pins the ratio + the truncation marker.
*/
export function truncateMiddle(text, maxChars, frontShare = 0.25) {
if (text.length <= maxChars) return text;
const marker = "\n\n[…truncated middle…]\n\n";
const budget = Math.max(0, maxChars - marker.length);
const frontLen = Math.floor(budget * frontShare);
const backLen = budget - frontLen;
return text.slice(0, frontLen) + marker + text.slice(text.length - backLen);
}
// ─── Response Parsing ───────────────────────────────────────────────────────
/**

View file

@ -5,6 +5,7 @@
import { createMemoryRelation } from "./memory-relations.js";
import {
_getAdapter,
computeStaticMemoryScore,
decayMemoriesBefore,
deleteMemoryEmbedding,
incrementMemoryHitCount,
@ -78,7 +79,10 @@ function rankMemoriesByLexicalQuery(memories, query, limit) {
0,
);
const lexicalScore = lexicalHits / queryTokens.length;
const staticScore = memory.confidence * (1 + memory.hit_count * 0.1);
const staticScore = computeStaticMemoryScore(
memory.confidence,
memory.hit_count,
);
return {
memory,
index,
@ -125,8 +129,13 @@ export function getActiveMemories() {
}
}
/**
* Get active memories ordered by ranking score: confidence * (1 + hit_count * 0.1).
* Get active memories ordered by ranking score (computeStaticMemoryScore).
* Higher-scored memories are more relevant and frequently confirmed.
*
* Sorting happens in JS because the canonical scoring formula uses
* log(), which the SQLite adapter doesn't reliably ship as a function
* across builds. The pool is bounded by `WHERE superseded_by IS NULL`
* which for typical projects (10s1000s of rows) is cheap to sort.
*/
export function getActiveMemoriesRanked(limit = 30) {
if (!isDbAvailable()) return [];
@ -134,12 +143,17 @@ export function getActiveMemoriesRanked(limit = 30) {
if (!adapter) return [];
try {
const rows = adapter
.prepare(`SELECT * FROM memories
WHERE superseded_by IS NULL
ORDER BY (confidence * (1.0 + hit_count * 0.1)) DESC
LIMIT :limit`)
.all({ ":limit": limit });
return rows.map(rowToMemory);
.prepare(`SELECT * FROM memories WHERE superseded_by IS NULL`)
.all();
return rows
.map(rowToMemory)
.map((m) => ({
m,
score: computeStaticMemoryScore(m.confidence, m.hit_count),
}))
.sort((a, b) => b.score - a.score)
.slice(0, limit)
.map((entry) => entry.m);
} catch {
return [];
}
@ -211,7 +225,7 @@ export async function getRelevantMemoriesRanked(query, limit = 10) {
let ranked = rankMemoriesByEmbedding(
mergedPool.map((m) => ({
id: m.id,
staticScore: m.confidence * (1 + m.hit_count * 0.1),
staticScore: computeStaticMemoryScore(m.confidence, m.hit_count),
})),
queryVec,
embeddingMap,

View file

@ -1,6 +1,33 @@
import { SF_STALE_STATE, SFError } from "../errors.js";
import { _getAdapter, intBool, parseJsonObject } from "./sf-db-core.js";
/**
* Canonical static-score formula for an active memory.
*
* Previously: confidence * (1 + hit_count * 0.1). That's linear in
* hit_count, which compounds: a memory that surfaces once gets
* hit_count++, ranks higher, surfaces again, ranks higher still.
* Self-reinforcing popularity, not relevance.
*
* Now: confidence * (1 + log(1 + hit_count) * 0.5). Same value at
* hit_count=0 (factor 1.0), close at low counts (hit_count=2 1.55
* vs old 1.2), flattens fast (hit_count=10 2.20 vs old 2.0), and
* stays bounded under runaway hits (hit_count=100 3.30 vs old 11.0).
*
* Used by: memory-store ranked queries, memory-tools search ranker,
* supersedeLowestRankedMemories pruner (this file). One formula
* everywhere no SQL/JS skew possible.
*
* Confidence in [0..1] by convention; hit_count non-negative integer.
* Defensive against NaN/negative inputs because the DB column is
* free-form numeric.
*/
export function computeStaticMemoryScore(confidence, hitCount) {
const c = Number.isFinite(confidence) ? Math.max(0, confidence) : 0;
const h = Number.isFinite(hitCount) ? Math.max(0, hitCount) : 0;
return c * (1 + Math.log(1 + h) * 0.5);
}
export function getActiveMemories({ category, limit = 200 } = {}) {
const currentDb = _getAdapter();
if (!currentDb) return [];
@ -198,15 +225,38 @@ export function expireStaleMemories(unstartedTtlDays = 28, maxTtlDays = 90) {
export function supersedeLowestRankedMemories(limit, now) {
const currentDb = _getAdapter();
if (!currentDb) throw new SFError(SF_STALE_STATE, "sf-db: No database open");
if (!Number.isFinite(limit) || limit <= 0) return;
// JS-side ranking via computeStaticMemoryScore so the formula is
// shared with memory-store's read paths. The candidate pool is
// bounded by the same SUPERSEDE LIMIT — fetch just enough to know
// which ids are the lowest-scoring.
const candidates = currentDb
.prepare(
`SELECT id, confidence, hit_count FROM memories WHERE superseded_by IS NULL`,
)
.all();
if (candidates.length === 0) return;
const ranked = candidates
.map((r) => ({
id: r.id,
score: computeStaticMemoryScore(r.confidence, r.hit_count),
}))
.sort((a, b) => a.score - b.score)
.slice(0, limit);
if (ranked.length === 0) return;
// IN-list with parameterised placeholders, one per id, so we never
// build the SQL with string concat.
const placeholders = ranked.map((_, i) => `:id${i}`).join(", ");
const params = { ":now": now };
ranked.forEach((r, i) => {
params[`:id${i}`] = r.id;
});
currentDb
.prepare(`UPDATE memories SET superseded_by = 'CAP_EXCEEDED', updated_at = :now
WHERE id IN (
SELECT id FROM memories
WHERE superseded_by IS NULL
ORDER BY (confidence * (1.0 + hit_count * 0.1)) ASC
LIMIT :limit
)`)
.run({ ":now": now, ":limit": limit });
.prepare(
`UPDATE memories SET superseded_by = 'CAP_EXCEEDED', updated_at = :now
WHERE id IN (${placeholders})`,
)
.run(params);
}
export function insertMemorySourceRow(args) {

View file

@ -0,0 +1,79 @@
/**
* Test the canonical static memory score formula.
*
* Previous formula `confidence * (1 + hit_count * 0.1)` was linear
* in hit_count self-reinforcing popularity (a memory that surfaces
* once gets hit_count++, ranks higher, surfaces again, hits again...).
*
* Replacement `confidence * (1 + log(1 + hit_count) * 0.5)` keeps
* the curve at hit_count=0 identical, climbs noticeably at low hits
* (1.55× at h=2 vs 1.2× before old was *too* flat there), and
* flattens fast (3.30× at h=100 vs the runaway 11.0× under the
* linear formula).
*
* This test pins:
* - identity at h=0
* - monotonic non-decreasing in h
* - bounded growth (no value above an upper guardrail)
* - defensiveness against NaN / negative inputs
*/
import assert from "node:assert/strict";
import { test } from "vitest";
import { computeStaticMemoryScore } from "../sf-db/sf-db-memory.js";
test("identity at hit_count=0", () => {
assert.equal(computeStaticMemoryScore(0.8, 0), 0.8);
assert.equal(computeStaticMemoryScore(0.5, 0), 0.5);
assert.equal(computeStaticMemoryScore(1.0, 0), 1.0);
});
test("monotonic non-decreasing in hit_count", () => {
const c = 0.7;
let prev = computeStaticMemoryScore(c, 0);
for (let h = 1; h <= 1000; h++) {
const curr = computeStaticMemoryScore(c, h);
assert.ok(curr >= prev, `non-monotonic at h=${h}: ${curr} < ${prev}`);
prev = curr;
}
});
test("growth is bounded under runaway hit counts (log-shaped)", () => {
// Hand-computed pins: confidence * (1 + log(1+h)*0.5).
// h=100 → 1 + log(101)*0.5 ≈ 1 + 2.3081 ≈ 3.308
// h=10000 → 1 + log(10001)*0.5 ≈ 1 + 4.605 ≈ 5.605
const heavy = computeStaticMemoryScore(1.0, 10000);
assert.ok(heavy < 7, `score at h=10000 too high: ${heavy}`);
const veryHeavy = computeStaticMemoryScore(1.0, 1_000_000);
assert.ok(veryHeavy < 10, `score at h=1M too high: ${veryHeavy}`);
});
test("monotonic in confidence at fixed hit_count", () => {
for (let h = 0; h <= 100; h += 10) {
const low = computeStaticMemoryScore(0.2, h);
const mid = computeStaticMemoryScore(0.5, h);
const high = computeStaticMemoryScore(0.9, h);
assert.ok(low <= mid, `low<=mid at h=${h}: ${low} ${mid}`);
assert.ok(mid <= high, `mid<=high at h=${h}: ${mid} ${high}`);
}
});
test("NaN / negative inputs collapse to 0 rather than poisoning sorts", () => {
assert.equal(computeStaticMemoryScore(Number.NaN, 5), 0);
assert.equal(computeStaticMemoryScore(-0.5, 5), 0);
// Negative hit_count clamps to 0 → returns base confidence.
assert.equal(computeStaticMemoryScore(0.7, -3), 0.7);
assert.equal(computeStaticMemoryScore(0.7, Number.NaN), 0.7);
});
test("regression: new formula tames the runaway compared to linear", () => {
// Old: confidence * (1 + hit_count * 0.1) → at h=100 was 11×.
// New: cap at 1M hits stays under 10× (effective ceiling for any
// realistic memory). This pins the central property of the fix.
const linearAtHundred = 0.5 * (1 + 100 * 0.1); // 5.5
const newAtHundred = computeStaticMemoryScore(0.5, 100);
assert.ok(
newAtHundred < linearAtHundred,
`new (${newAtHundred}) should be < linear (${linearAtHundred}) at h=100`,
);
});

View file

@ -0,0 +1,80 @@
/**
* Test the front+back transcript truncation in memory-extractor.
*
* Previously: front-preserving truncation dropped the tail, where
* the highest-signal content for memory extraction lives (final
* summary, what got committed, verification outcome).
*
* Now: keep ~25% from front + ~75% from back with a marker in
* between. This test pins the invariants:
* - identity for short inputs
* - both ends survive when text exceeds cap
* - the marker is present so the LLM knows about the elision
* - output length never exceeds maxChars
*/
import assert from "node:assert/strict";
import { test } from "vitest";
import { truncateMiddle } from "../memory-extractor.js";
test("identity when text fits", () => {
assert.equal(truncateMiddle("hello world", 30_000), "hello world");
assert.equal(truncateMiddle("", 30_000), "");
});
test("keeps front and back, drops middle, inserts marker", () => {
// Pick sizes so the surviving front + back ≪ middle, so the
// middle is meaningfully dropped (not just partly nibbled).
const front = "FRONT".repeat(1000); // 5K
const middle = "MIDDLE".repeat(20_000); // 120K
const back = "BACK".repeat(1000); // 4K
const text = front + middle + back; // 129K
const out = truncateMiddle(text, 10_000);
assert.ok(out.length <= 10_000, `output too long: ${out.length}`);
assert.match(out, /\[…truncated middle…\]/, "marker missing");
// Original front survives (first slice of FRONT pattern).
assert.ok(out.startsWith("FRONT"), "front not preserved");
// Original back survives (last slice ends with BACK pattern).
assert.ok(out.endsWith("BACK"), "back not preserved");
// Most of the giant middle is dropped — output is dramatically
// smaller than the input (>10× compression).
assert.ok(out.length < text.length / 10, `expected heavy compression`);
});
test("back-weighted split (75% back vs 25% front by default)", () => {
// Construct a text where each char identifies its position bucket.
const text = "F".repeat(10_000) + "M".repeat(10_000) + "B".repeat(10_000); // 30K
const out = truncateMiddle(text, 10_000);
const [head, tail] = out.split("[…truncated middle…]");
// Front share ≈ 25% of (10_000 - marker_len) ≈ ~2.5K. Back ≈ 7.5K.
assert.ok(
head.length < tail.length,
`expected back to be larger than front: front=${head.length} back=${tail.length}`,
);
assert.ok(
tail.length > head.length * 2,
`expected back to be > 2× front: front=${head.length} back=${tail.length}`,
);
});
test("respects custom frontShare", () => {
const text = "X".repeat(20_000);
const out = truncateMiddle(text, 5_000, 0.5);
const [head, tail] = out.split("[…truncated middle…]");
// 50/50 split.
assert.ok(
Math.abs(head.length - tail.length) <= 1,
`50/50 split should be balanced: front=${head.length} back=${tail.length}`,
);
});
test("output length never exceeds maxChars even with marker", () => {
for (const cap of [100, 500, 1_000, 30_000]) {
const text = "Z".repeat(cap * 3);
const out = truncateMiddle(text, cap);
assert.ok(
out.length <= cap,
`cap=${cap}: output length ${out.length} exceeded cap`,
);
}
});

View file

@ -9,7 +9,7 @@ import {
getActiveMemoriesRanked,
reinforceMemory,
} from "../memory-store.js";
import { isDbAvailable } from "../sf-db.js";
import { computeStaticMemoryScore, isDbAvailable } from "../sf-db.js";
function dbUnavailable(operation) {
return {
@ -97,7 +97,7 @@ export function executeMemoryQuery(params) {
});
const ranked = filtered.slice(0, k).map((memory) => ({
memory,
score: memory.confidence * (1 + memory.hit_count * 0.1),
score: computeStaticMemoryScore(memory.confidence, memory.hit_count),
}));
const hits = ranked.map((r) => ({
id: r.memory.id,

View file

@ -0,0 +1,59 @@
/**
* headless-status.test.ts direct headless status rendering.
*
* Purpose: prevent `sf headless status` from regressing back to the
* interactive RPC/v2 path when it only needs DB-backed project state.
*/
import assert from "node:assert/strict";
import { test } from "vitest";
import type { QuerySnapshot } from "../headless-query.js";
import { formatHeadlessStatus } from "../headless-status.js";
function snapshot(overrides: Partial<QuerySnapshot> = {}): QuerySnapshot {
return {
schemaVersion: 1,
state: {
activeMilestone: { id: "M001", title: "Runtime Hardening" },
activeSlice: { id: "S01", title: "Headless Status" },
activeTask: { id: "T01", title: "Bypass v2 init" },
phase: "executing",
recentDecisions: [],
blockers: [],
nextAction: "Execute T01.",
registry: [{ id: "M001", title: "Runtime Hardening", status: "active" }],
requirements: {
active: 0,
validated: 0,
deferred: 0,
outOfScope: 0,
blocked: 0,
total: 0,
},
progress: {
milestones: { done: 0, total: 1 },
slices: { done: 0, total: 1 },
tasks: { done: 0, total: 1 },
},
},
next: {
action: "dispatch",
unitType: "execute-task",
unitId: "M001/S01/T01",
},
cost: { workers: [], total: 0 },
runtime: { units: [] },
uokDiagnostics: { verdict: "clear", classification: "healthy" },
schedule: { pending_count: 0, overdue_count: 0, due: [], upcoming: [] },
...overrides,
};
}
test("formatHeadlessStatus_when_snapshot_available_renders_text_without_rpc", () => {
const rendered = formatHeadlessStatus(snapshot());
assert.match(rendered, /^SF Status/);
assert.match(rendered, /Phase: executing/);
assert.match(rendered, /Active milestone: M001 - Runtime Hardening/);
assert.match(rendered, /Dispatch: dispatch execute-task M001\/S01\/T01/);
assert.match(rendered, /UOK: clear \(healthy\)/);
});