fix(headless): bypass rpc for status
Some checks are pending
CI / detect-changes (push) Waiting to run
CI / docs-check (push) Blocked by required conditions
CI / lint (push) Blocked by required conditions
CI / build (push) Blocked by required conditions
CI / integration-tests (push) Blocked by required conditions
CI / windows-portability (push) Blocked by required conditions
CI / rtk-portability (linux, blacksmith-4vcpu-ubuntu-2404) (push) Blocked by required conditions
CI / rtk-portability (macos, macos-15) (push) Blocked by required conditions
CI / rtk-portability (windows, blacksmith-4vcpu-windows-2025) (push) Blocked by required conditions
Some checks are pending
CI / detect-changes (push) Waiting to run
CI / docs-check (push) Blocked by required conditions
CI / lint (push) Blocked by required conditions
CI / build (push) Blocked by required conditions
CI / integration-tests (push) Blocked by required conditions
CI / windows-portability (push) Blocked by required conditions
CI / rtk-portability (linux, blacksmith-4vcpu-ubuntu-2404) (push) Blocked by required conditions
CI / rtk-portability (macos, macos-15) (push) Blocked by required conditions
CI / rtk-portability (windows, blacksmith-4vcpu-windows-2025) (push) Blocked by required conditions
This commit is contained in:
parent
cf32e79578
commit
362af3d6a4
9 changed files with 465 additions and 30 deletions
108
src/headless-status.ts
Normal file
108
src/headless-status.ts
Normal file
|
|
@ -0,0 +1,108 @@
|
||||||
|
/**
|
||||||
|
* headless-status.ts — direct `sf headless status` implementation.
|
||||||
|
*
|
||||||
|
* Purpose: keep the headless status machine surface read-only and
|
||||||
|
* TTY-independent instead of routing through the interactive `/status` overlay
|
||||||
|
* command or the long-lived RPC/v2 session handshake.
|
||||||
|
*/
|
||||||
|
import { buildQuerySnapshot, type QuerySnapshot } from "./headless-query.js";
|
||||||
|
|
||||||
|
export interface HeadlessStatusResult {
|
||||||
|
exitCode: number;
|
||||||
|
data?: QuerySnapshot;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Render a compact text status from the query snapshot.
|
||||||
|
*
|
||||||
|
* Purpose: provide the same operator value as `/status` in terminals where no
|
||||||
|
* interactive overlay can be displayed.
|
||||||
|
*
|
||||||
|
* Consumer: handleHeadlessStatus for text-mode `sf headless status`.
|
||||||
|
*/
|
||||||
|
export function formatHeadlessStatus(snapshot: QuerySnapshot): string {
|
||||||
|
const { next, runtime, uokDiagnostics, schedule } = snapshot;
|
||||||
|
const state = snapshot.state as any;
|
||||||
|
const lines = ["SF Status", ""];
|
||||||
|
lines.push(`Phase: ${state.phase}`);
|
||||||
|
if (state.activeMilestone) {
|
||||||
|
lines.push(
|
||||||
|
`Active milestone: ${state.activeMilestone.id} - ${state.activeMilestone.title}`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
if (state.activeSlice) {
|
||||||
|
lines.push(
|
||||||
|
`Active slice: ${state.activeSlice.id} - ${state.activeSlice.title}`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
if (state.activeTask) {
|
||||||
|
lines.push(
|
||||||
|
`Active task: ${state.activeTask.id} - ${state.activeTask.title}`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
const progress = state.progress;
|
||||||
|
if (progress) {
|
||||||
|
const parts = [
|
||||||
|
`milestones ${progress.milestones.done}/${progress.milestones.total}`,
|
||||||
|
];
|
||||||
|
if (progress.slices)
|
||||||
|
parts.push(`slices ${progress.slices.done}/${progress.slices.total}`);
|
||||||
|
if (progress.tasks)
|
||||||
|
parts.push(`tasks ${progress.tasks.done}/${progress.tasks.total}`);
|
||||||
|
lines.push(`Progress: ${parts.join(", ")}`);
|
||||||
|
}
|
||||||
|
if (state.nextAction) lines.push(`Next: ${state.nextAction}`);
|
||||||
|
if (state.blockers.length > 0)
|
||||||
|
lines.push(`Blockers: ${state.blockers.join("; ")}`);
|
||||||
|
lines.push("");
|
||||||
|
lines.push(
|
||||||
|
`Dispatch: ${next.action}${next.unitType ? ` ${next.unitType}` : ""}${next.unitId ? ` ${next.unitId}` : ""}${next.reason ? ` - ${next.reason}` : ""}`,
|
||||||
|
);
|
||||||
|
if (uokDiagnostics) {
|
||||||
|
lines.push(
|
||||||
|
`UOK: ${uokDiagnostics.verdict ?? "unknown"} (${uokDiagnostics.classification ?? "unknown"})`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
if (runtime.units.length > 0) {
|
||||||
|
lines.push("");
|
||||||
|
lines.push("Runtime units:");
|
||||||
|
for (const unit of runtime.units.slice(0, 8)) {
|
||||||
|
lines.push(` ${unit.unitType} ${unit.unitId}: ${unit.status}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (schedule) {
|
||||||
|
lines.push("");
|
||||||
|
lines.push(
|
||||||
|
`Schedule: ${schedule.pending_count} pending, ${schedule.overdue_count} overdue`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
if (state.registry.length > 0) {
|
||||||
|
lines.push("");
|
||||||
|
lines.push("Milestones:");
|
||||||
|
for (const milestone of state.registry) {
|
||||||
|
lines.push(` ${milestone.id}: ${milestone.title} (${milestone.status})`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return lines.join("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Handle `sf headless status` without spawning the interactive RPC child.
|
||||||
|
*
|
||||||
|
* Purpose: avoid the long-standing v2 init timeout for a command whose answer
|
||||||
|
* is fully available from DB-backed project state.
|
||||||
|
*
|
||||||
|
* Consumer: runHeadlessOnce direct-command bypass.
|
||||||
|
*/
|
||||||
|
export async function handleHeadlessStatus(
|
||||||
|
basePath: string,
|
||||||
|
options: { json?: boolean } = {},
|
||||||
|
): Promise<HeadlessStatusResult> {
|
||||||
|
const snapshot = await buildQuerySnapshot(basePath);
|
||||||
|
if (options.json) {
|
||||||
|
process.stdout.write(JSON.stringify(snapshot) + "\n");
|
||||||
|
} else {
|
||||||
|
process.stdout.write(formatHeadlessStatus(snapshot) + "\n");
|
||||||
|
}
|
||||||
|
return { exitCode: 0, data: snapshot };
|
||||||
|
}
|
||||||
|
|
@ -861,6 +861,22 @@ async function runHeadlessOnce(
|
||||||
return { exitCode: result.exitCode, interrupted: false, timedOut: false };
|
return { exitCode: result.exitCode, interrupted: false, timedOut: false };
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Generic headless status: read-only project snapshot. This deliberately
|
||||||
|
// bypasses the interactive RPC/v2 path because `/status` opens a TUI overlay
|
||||||
|
// in interactive mode and can hang waiting for protocol init in headless.
|
||||||
|
if (options.command === "status") {
|
||||||
|
const { handleHeadlessStatus } = await import("./headless-status.js");
|
||||||
|
const wantsJson =
|
||||||
|
options.json ||
|
||||||
|
options.outputFormat === "json" ||
|
||||||
|
options.outputFormat === "stream-json" ||
|
||||||
|
options.commandArgs.includes("--json");
|
||||||
|
const result = await handleHeadlessStatus(process.cwd(), {
|
||||||
|
json: wantsJson,
|
||||||
|
});
|
||||||
|
return { exitCode: result.exitCode, interrupted: false, timedOut: false };
|
||||||
|
}
|
||||||
|
|
||||||
// Reflect: assemble the SF reflection corpus snapshot (open + recent
|
// Reflect: assemble the SF reflection corpus snapshot (open + recent
|
||||||
// self-feedback, recent commits, milestone state, validation files,
|
// self-feedback, recent commits, milestone state, validation files,
|
||||||
// prior report) and emit either the rendered prompt brief (default) or
|
// prior report) and emit either the rendered prompt brief (default) or
|
||||||
|
|
|
||||||
|
|
@ -149,19 +149,25 @@ function buildExtractionUserPrompt(
|
||||||
/**
|
/**
|
||||||
* Extract assistant message text from activity JSONL.
|
* Extract assistant message text from activity JSONL.
|
||||||
* Returns concatenated text content from assistant role entries.
|
* Returns concatenated text content from assistant role entries.
|
||||||
|
*
|
||||||
|
* Truncation strategy (changed from front-preserving): the front of
|
||||||
|
* an execute-task transcript carries the goal/context (already in
|
||||||
|
* the prompt), while the tail carries verification, final summary,
|
||||||
|
* and the decisions actually committed — the highest-signal slices
|
||||||
|
* for memory extraction. Front-preserving truncation discarded
|
||||||
|
* exactly the content the LLM is best at extracting from.
|
||||||
|
*
|
||||||
|
* The fix: collect every assistant text block first, then if the
|
||||||
|
* total exceeds maxChars, keep ~25% from the front (goal echo,
|
||||||
|
* early decisions) and ~75% from the back (where the durable signal
|
||||||
|
* lives). The two halves are joined with a `[…truncated middle…]`
|
||||||
|
* marker so the LLM knows content was elided.
|
||||||
*/
|
*/
|
||||||
function extractTranscriptFromActivity(raw, maxChars = 30_000) {
|
function extractTranscriptFromActivity(raw, maxChars = 30_000) {
|
||||||
const lines = raw.split("\n");
|
const lines = raw.split("\n");
|
||||||
const parts = [];
|
const parts = [];
|
||||||
let totalChars = 0;
|
|
||||||
function appendText(text) {
|
function appendText(text) {
|
||||||
if (totalChars + text.length > maxChars) {
|
|
||||||
parts.push(text.substring(0, maxChars - totalChars));
|
|
||||||
return false; // signal stop
|
|
||||||
}
|
|
||||||
parts.push(text);
|
parts.push(text);
|
||||||
totalChars += text.length;
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
for (const line of lines) {
|
for (const line of lines) {
|
||||||
if (!line.trim()) continue;
|
if (!line.trim()) continue;
|
||||||
|
|
@ -178,21 +184,44 @@ function extractTranscriptFromActivity(raw, maxChars = 30_000) {
|
||||||
if (Array.isArray(entry.content)) {
|
if (Array.isArray(entry.content)) {
|
||||||
for (const block of entry.content) {
|
for (const block of entry.content) {
|
||||||
if (block.type === "text" && block.text) {
|
if (block.type === "text" && block.text) {
|
||||||
if (!appendText(block.text)) return parts.join("\n\n");
|
appendText(block.text);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if (typeof entry.content === "string" && entry.content) {
|
} else if (typeof entry.content === "string" && entry.content) {
|
||||||
if (!appendText(entry.content)) return parts.join("\n\n");
|
appendText(entry.content);
|
||||||
}
|
}
|
||||||
// Also read plain text/content field on custom_message entries
|
// Also read plain text/content field on custom_message entries
|
||||||
if (entry.text && typeof entry.text === "string") {
|
if (entry.text && typeof entry.text === "string") {
|
||||||
if (!appendText(entry.text)) return parts.join("\n\n");
|
appendText(entry.text);
|
||||||
}
|
}
|
||||||
} catch {
|
} catch {
|
||||||
// Skip malformed lines
|
// Skip malformed lines
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return parts.join("\n\n");
|
const joined = parts.join("\n\n");
|
||||||
|
if (joined.length <= maxChars) return joined;
|
||||||
|
return truncateMiddle(joined, maxChars);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Keep N from front + M from back of `text`, drop the middle.
|
||||||
|
*
|
||||||
|
* Split ratio is back-weighted (default 25/75) because in an
|
||||||
|
* execute-task transcript the durable signal — what was committed,
|
||||||
|
* what verified, what the operator decided — concentrates at the
|
||||||
|
* tail. The front share keeps enough goal/early-reasoning context
|
||||||
|
* to anchor the extraction; the back share keeps the outcomes.
|
||||||
|
*
|
||||||
|
* Exported (named export below) for the dedicated unit test that
|
||||||
|
* pins the ratio + the truncation marker.
|
||||||
|
*/
|
||||||
|
export function truncateMiddle(text, maxChars, frontShare = 0.25) {
|
||||||
|
if (text.length <= maxChars) return text;
|
||||||
|
const marker = "\n\n[…truncated middle…]\n\n";
|
||||||
|
const budget = Math.max(0, maxChars - marker.length);
|
||||||
|
const frontLen = Math.floor(budget * frontShare);
|
||||||
|
const backLen = budget - frontLen;
|
||||||
|
return text.slice(0, frontLen) + marker + text.slice(text.length - backLen);
|
||||||
}
|
}
|
||||||
// ─── Response Parsing ───────────────────────────────────────────────────────
|
// ─── Response Parsing ───────────────────────────────────────────────────────
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,7 @@
|
||||||
import { createMemoryRelation } from "./memory-relations.js";
|
import { createMemoryRelation } from "./memory-relations.js";
|
||||||
import {
|
import {
|
||||||
_getAdapter,
|
_getAdapter,
|
||||||
|
computeStaticMemoryScore,
|
||||||
decayMemoriesBefore,
|
decayMemoriesBefore,
|
||||||
deleteMemoryEmbedding,
|
deleteMemoryEmbedding,
|
||||||
incrementMemoryHitCount,
|
incrementMemoryHitCount,
|
||||||
|
|
@ -78,7 +79,10 @@ function rankMemoriesByLexicalQuery(memories, query, limit) {
|
||||||
0,
|
0,
|
||||||
);
|
);
|
||||||
const lexicalScore = lexicalHits / queryTokens.length;
|
const lexicalScore = lexicalHits / queryTokens.length;
|
||||||
const staticScore = memory.confidence * (1 + memory.hit_count * 0.1);
|
const staticScore = computeStaticMemoryScore(
|
||||||
|
memory.confidence,
|
||||||
|
memory.hit_count,
|
||||||
|
);
|
||||||
return {
|
return {
|
||||||
memory,
|
memory,
|
||||||
index,
|
index,
|
||||||
|
|
@ -125,8 +129,13 @@ export function getActiveMemories() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
* Get active memories ordered by ranking score: confidence * (1 + hit_count * 0.1).
|
* Get active memories ordered by ranking score (computeStaticMemoryScore).
|
||||||
* Higher-scored memories are more relevant and frequently confirmed.
|
* Higher-scored memories are more relevant and frequently confirmed.
|
||||||
|
*
|
||||||
|
* Sorting happens in JS because the canonical scoring formula uses
|
||||||
|
* log(), which the SQLite adapter doesn't reliably ship as a function
|
||||||
|
* across builds. The pool is bounded by `WHERE superseded_by IS NULL`
|
||||||
|
* which for typical projects (10s–1000s of rows) is cheap to sort.
|
||||||
*/
|
*/
|
||||||
export function getActiveMemoriesRanked(limit = 30) {
|
export function getActiveMemoriesRanked(limit = 30) {
|
||||||
if (!isDbAvailable()) return [];
|
if (!isDbAvailable()) return [];
|
||||||
|
|
@ -134,12 +143,17 @@ export function getActiveMemoriesRanked(limit = 30) {
|
||||||
if (!adapter) return [];
|
if (!adapter) return [];
|
||||||
try {
|
try {
|
||||||
const rows = adapter
|
const rows = adapter
|
||||||
.prepare(`SELECT * FROM memories
|
.prepare(`SELECT * FROM memories WHERE superseded_by IS NULL`)
|
||||||
WHERE superseded_by IS NULL
|
.all();
|
||||||
ORDER BY (confidence * (1.0 + hit_count * 0.1)) DESC
|
return rows
|
||||||
LIMIT :limit`)
|
.map(rowToMemory)
|
||||||
.all({ ":limit": limit });
|
.map((m) => ({
|
||||||
return rows.map(rowToMemory);
|
m,
|
||||||
|
score: computeStaticMemoryScore(m.confidence, m.hit_count),
|
||||||
|
}))
|
||||||
|
.sort((a, b) => b.score - a.score)
|
||||||
|
.slice(0, limit)
|
||||||
|
.map((entry) => entry.m);
|
||||||
} catch {
|
} catch {
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
|
|
@ -211,7 +225,7 @@ export async function getRelevantMemoriesRanked(query, limit = 10) {
|
||||||
let ranked = rankMemoriesByEmbedding(
|
let ranked = rankMemoriesByEmbedding(
|
||||||
mergedPool.map((m) => ({
|
mergedPool.map((m) => ({
|
||||||
id: m.id,
|
id: m.id,
|
||||||
staticScore: m.confidence * (1 + m.hit_count * 0.1),
|
staticScore: computeStaticMemoryScore(m.confidence, m.hit_count),
|
||||||
})),
|
})),
|
||||||
queryVec,
|
queryVec,
|
||||||
embeddingMap,
|
embeddingMap,
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,33 @@
|
||||||
import { SF_STALE_STATE, SFError } from "../errors.js";
|
import { SF_STALE_STATE, SFError } from "../errors.js";
|
||||||
import { _getAdapter, intBool, parseJsonObject } from "./sf-db-core.js";
|
import { _getAdapter, intBool, parseJsonObject } from "./sf-db-core.js";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Canonical static-score formula for an active memory.
|
||||||
|
*
|
||||||
|
* Previously: confidence * (1 + hit_count * 0.1). That's linear in
|
||||||
|
* hit_count, which compounds: a memory that surfaces once gets
|
||||||
|
* hit_count++, ranks higher, surfaces again, ranks higher still.
|
||||||
|
* Self-reinforcing popularity, not relevance.
|
||||||
|
*
|
||||||
|
* Now: confidence * (1 + log(1 + hit_count) * 0.5). Same value at
|
||||||
|
* hit_count=0 (factor 1.0), close at low counts (hit_count=2 ≈ 1.55
|
||||||
|
* vs old 1.2), flattens fast (hit_count=10 ≈ 2.20 vs old 2.0), and
|
||||||
|
* stays bounded under runaway hits (hit_count=100 ≈ 3.30 vs old 11.0).
|
||||||
|
*
|
||||||
|
* Used by: memory-store ranked queries, memory-tools search ranker,
|
||||||
|
* supersedeLowestRankedMemories pruner (this file). One formula
|
||||||
|
* everywhere — no SQL/JS skew possible.
|
||||||
|
*
|
||||||
|
* Confidence in [0..1] by convention; hit_count non-negative integer.
|
||||||
|
* Defensive against NaN/negative inputs because the DB column is
|
||||||
|
* free-form numeric.
|
||||||
|
*/
|
||||||
|
export function computeStaticMemoryScore(confidence, hitCount) {
|
||||||
|
const c = Number.isFinite(confidence) ? Math.max(0, confidence) : 0;
|
||||||
|
const h = Number.isFinite(hitCount) ? Math.max(0, hitCount) : 0;
|
||||||
|
return c * (1 + Math.log(1 + h) * 0.5);
|
||||||
|
}
|
||||||
|
|
||||||
export function getActiveMemories({ category, limit = 200 } = {}) {
|
export function getActiveMemories({ category, limit = 200 } = {}) {
|
||||||
const currentDb = _getAdapter();
|
const currentDb = _getAdapter();
|
||||||
if (!currentDb) return [];
|
if (!currentDb) return [];
|
||||||
|
|
@ -198,15 +225,38 @@ export function expireStaleMemories(unstartedTtlDays = 28, maxTtlDays = 90) {
|
||||||
export function supersedeLowestRankedMemories(limit, now) {
|
export function supersedeLowestRankedMemories(limit, now) {
|
||||||
const currentDb = _getAdapter();
|
const currentDb = _getAdapter();
|
||||||
if (!currentDb) throw new SFError(SF_STALE_STATE, "sf-db: No database open");
|
if (!currentDb) throw new SFError(SF_STALE_STATE, "sf-db: No database open");
|
||||||
|
if (!Number.isFinite(limit) || limit <= 0) return;
|
||||||
|
// JS-side ranking via computeStaticMemoryScore so the formula is
|
||||||
|
// shared with memory-store's read paths. The candidate pool is
|
||||||
|
// bounded by the same SUPERSEDE LIMIT — fetch just enough to know
|
||||||
|
// which ids are the lowest-scoring.
|
||||||
|
const candidates = currentDb
|
||||||
|
.prepare(
|
||||||
|
`SELECT id, confidence, hit_count FROM memories WHERE superseded_by IS NULL`,
|
||||||
|
)
|
||||||
|
.all();
|
||||||
|
if (candidates.length === 0) return;
|
||||||
|
const ranked = candidates
|
||||||
|
.map((r) => ({
|
||||||
|
id: r.id,
|
||||||
|
score: computeStaticMemoryScore(r.confidence, r.hit_count),
|
||||||
|
}))
|
||||||
|
.sort((a, b) => a.score - b.score)
|
||||||
|
.slice(0, limit);
|
||||||
|
if (ranked.length === 0) return;
|
||||||
|
// IN-list with parameterised placeholders, one per id, so we never
|
||||||
|
// build the SQL with string concat.
|
||||||
|
const placeholders = ranked.map((_, i) => `:id${i}`).join(", ");
|
||||||
|
const params = { ":now": now };
|
||||||
|
ranked.forEach((r, i) => {
|
||||||
|
params[`:id${i}`] = r.id;
|
||||||
|
});
|
||||||
currentDb
|
currentDb
|
||||||
.prepare(`UPDATE memories SET superseded_by = 'CAP_EXCEEDED', updated_at = :now
|
.prepare(
|
||||||
WHERE id IN (
|
`UPDATE memories SET superseded_by = 'CAP_EXCEEDED', updated_at = :now
|
||||||
SELECT id FROM memories
|
WHERE id IN (${placeholders})`,
|
||||||
WHERE superseded_by IS NULL
|
)
|
||||||
ORDER BY (confidence * (1.0 + hit_count * 0.1)) ASC
|
.run(params);
|
||||||
LIMIT :limit
|
|
||||||
)`)
|
|
||||||
.run({ ":now": now, ":limit": limit });
|
|
||||||
}
|
}
|
||||||
|
|
||||||
export function insertMemorySourceRow(args) {
|
export function insertMemorySourceRow(args) {
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,79 @@
|
||||||
|
/**
|
||||||
|
* Test the canonical static memory score formula.
|
||||||
|
*
|
||||||
|
* Previous formula `confidence * (1 + hit_count * 0.1)` was linear
|
||||||
|
* in hit_count → self-reinforcing popularity (a memory that surfaces
|
||||||
|
* once gets hit_count++, ranks higher, surfaces again, hits again...).
|
||||||
|
*
|
||||||
|
* Replacement `confidence * (1 + log(1 + hit_count) * 0.5)` keeps
|
||||||
|
* the curve at hit_count=0 identical, climbs noticeably at low hits
|
||||||
|
* (1.55× at h=2 vs 1.2× before — old was *too* flat there), and
|
||||||
|
* flattens fast (3.30× at h=100 vs the runaway 11.0× under the
|
||||||
|
* linear formula).
|
||||||
|
*
|
||||||
|
* This test pins:
|
||||||
|
* - identity at h=0
|
||||||
|
* - monotonic non-decreasing in h
|
||||||
|
* - bounded growth (no value above an upper guardrail)
|
||||||
|
* - defensiveness against NaN / negative inputs
|
||||||
|
*/
|
||||||
|
|
||||||
|
import assert from "node:assert/strict";
|
||||||
|
import { test } from "vitest";
|
||||||
|
import { computeStaticMemoryScore } from "../sf-db/sf-db-memory.js";
|
||||||
|
|
||||||
|
test("identity at hit_count=0", () => {
|
||||||
|
assert.equal(computeStaticMemoryScore(0.8, 0), 0.8);
|
||||||
|
assert.equal(computeStaticMemoryScore(0.5, 0), 0.5);
|
||||||
|
assert.equal(computeStaticMemoryScore(1.0, 0), 1.0);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("monotonic non-decreasing in hit_count", () => {
|
||||||
|
const c = 0.7;
|
||||||
|
let prev = computeStaticMemoryScore(c, 0);
|
||||||
|
for (let h = 1; h <= 1000; h++) {
|
||||||
|
const curr = computeStaticMemoryScore(c, h);
|
||||||
|
assert.ok(curr >= prev, `non-monotonic at h=${h}: ${curr} < ${prev}`);
|
||||||
|
prev = curr;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
test("growth is bounded under runaway hit counts (log-shaped)", () => {
|
||||||
|
// Hand-computed pins: confidence * (1 + log(1+h)*0.5).
|
||||||
|
// h=100 → 1 + log(101)*0.5 ≈ 1 + 2.3081 ≈ 3.308
|
||||||
|
// h=10000 → 1 + log(10001)*0.5 ≈ 1 + 4.605 ≈ 5.605
|
||||||
|
const heavy = computeStaticMemoryScore(1.0, 10000);
|
||||||
|
assert.ok(heavy < 7, `score at h=10000 too high: ${heavy}`);
|
||||||
|
const veryHeavy = computeStaticMemoryScore(1.0, 1_000_000);
|
||||||
|
assert.ok(veryHeavy < 10, `score at h=1M too high: ${veryHeavy}`);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("monotonic in confidence at fixed hit_count", () => {
|
||||||
|
for (let h = 0; h <= 100; h += 10) {
|
||||||
|
const low = computeStaticMemoryScore(0.2, h);
|
||||||
|
const mid = computeStaticMemoryScore(0.5, h);
|
||||||
|
const high = computeStaticMemoryScore(0.9, h);
|
||||||
|
assert.ok(low <= mid, `low<=mid at h=${h}: ${low} ${mid}`);
|
||||||
|
assert.ok(mid <= high, `mid<=high at h=${h}: ${mid} ${high}`);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
test("NaN / negative inputs collapse to 0 rather than poisoning sorts", () => {
|
||||||
|
assert.equal(computeStaticMemoryScore(Number.NaN, 5), 0);
|
||||||
|
assert.equal(computeStaticMemoryScore(-0.5, 5), 0);
|
||||||
|
// Negative hit_count clamps to 0 → returns base confidence.
|
||||||
|
assert.equal(computeStaticMemoryScore(0.7, -3), 0.7);
|
||||||
|
assert.equal(computeStaticMemoryScore(0.7, Number.NaN), 0.7);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("regression: new formula tames the runaway compared to linear", () => {
|
||||||
|
// Old: confidence * (1 + hit_count * 0.1) → at h=100 was 11×.
|
||||||
|
// New: cap at 1M hits stays under 10× (effective ceiling for any
|
||||||
|
// realistic memory). This pins the central property of the fix.
|
||||||
|
const linearAtHundred = 0.5 * (1 + 100 * 0.1); // 5.5
|
||||||
|
const newAtHundred = computeStaticMemoryScore(0.5, 100);
|
||||||
|
assert.ok(
|
||||||
|
newAtHundred < linearAtHundred,
|
||||||
|
`new (${newAtHundred}) should be < linear (${linearAtHundred}) at h=100`,
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
@ -0,0 +1,80 @@
|
||||||
|
/**
|
||||||
|
* Test the front+back transcript truncation in memory-extractor.
|
||||||
|
*
|
||||||
|
* Previously: front-preserving truncation dropped the tail, where
|
||||||
|
* the highest-signal content for memory extraction lives (final
|
||||||
|
* summary, what got committed, verification outcome).
|
||||||
|
*
|
||||||
|
* Now: keep ~25% from front + ~75% from back with a marker in
|
||||||
|
* between. This test pins the invariants:
|
||||||
|
* - identity for short inputs
|
||||||
|
* - both ends survive when text exceeds cap
|
||||||
|
* - the marker is present so the LLM knows about the elision
|
||||||
|
* - output length never exceeds maxChars
|
||||||
|
*/
|
||||||
|
|
||||||
|
import assert from "node:assert/strict";
|
||||||
|
import { test } from "vitest";
|
||||||
|
import { truncateMiddle } from "../memory-extractor.js";
|
||||||
|
|
||||||
|
test("identity when text fits", () => {
|
||||||
|
assert.equal(truncateMiddle("hello world", 30_000), "hello world");
|
||||||
|
assert.equal(truncateMiddle("", 30_000), "");
|
||||||
|
});
|
||||||
|
|
||||||
|
test("keeps front and back, drops middle, inserts marker", () => {
|
||||||
|
// Pick sizes so the surviving front + back ≪ middle, so the
|
||||||
|
// middle is meaningfully dropped (not just partly nibbled).
|
||||||
|
const front = "FRONT".repeat(1000); // 5K
|
||||||
|
const middle = "MIDDLE".repeat(20_000); // 120K
|
||||||
|
const back = "BACK".repeat(1000); // 4K
|
||||||
|
const text = front + middle + back; // 129K
|
||||||
|
const out = truncateMiddle(text, 10_000);
|
||||||
|
assert.ok(out.length <= 10_000, `output too long: ${out.length}`);
|
||||||
|
assert.match(out, /\[…truncated middle…\]/, "marker missing");
|
||||||
|
// Original front survives (first slice of FRONT pattern).
|
||||||
|
assert.ok(out.startsWith("FRONT"), "front not preserved");
|
||||||
|
// Original back survives (last slice ends with BACK pattern).
|
||||||
|
assert.ok(out.endsWith("BACK"), "back not preserved");
|
||||||
|
// Most of the giant middle is dropped — output is dramatically
|
||||||
|
// smaller than the input (>10× compression).
|
||||||
|
assert.ok(out.length < text.length / 10, `expected heavy compression`);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("back-weighted split (75% back vs 25% front by default)", () => {
|
||||||
|
// Construct a text where each char identifies its position bucket.
|
||||||
|
const text = "F".repeat(10_000) + "M".repeat(10_000) + "B".repeat(10_000); // 30K
|
||||||
|
const out = truncateMiddle(text, 10_000);
|
||||||
|
const [head, tail] = out.split("[…truncated middle…]");
|
||||||
|
// Front share ≈ 25% of (10_000 - marker_len) ≈ ~2.5K. Back ≈ 7.5K.
|
||||||
|
assert.ok(
|
||||||
|
head.length < tail.length,
|
||||||
|
`expected back to be larger than front: front=${head.length} back=${tail.length}`,
|
||||||
|
);
|
||||||
|
assert.ok(
|
||||||
|
tail.length > head.length * 2,
|
||||||
|
`expected back to be > 2× front: front=${head.length} back=${tail.length}`,
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("respects custom frontShare", () => {
|
||||||
|
const text = "X".repeat(20_000);
|
||||||
|
const out = truncateMiddle(text, 5_000, 0.5);
|
||||||
|
const [head, tail] = out.split("[…truncated middle…]");
|
||||||
|
// 50/50 split.
|
||||||
|
assert.ok(
|
||||||
|
Math.abs(head.length - tail.length) <= 1,
|
||||||
|
`50/50 split should be balanced: front=${head.length} back=${tail.length}`,
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("output length never exceeds maxChars even with marker", () => {
|
||||||
|
for (const cap of [100, 500, 1_000, 30_000]) {
|
||||||
|
const text = "Z".repeat(cap * 3);
|
||||||
|
const out = truncateMiddle(text, cap);
|
||||||
|
assert.ok(
|
||||||
|
out.length <= cap,
|
||||||
|
`cap=${cap}: output length ${out.length} exceeded cap`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
@ -9,7 +9,7 @@ import {
|
||||||
getActiveMemoriesRanked,
|
getActiveMemoriesRanked,
|
||||||
reinforceMemory,
|
reinforceMemory,
|
||||||
} from "../memory-store.js";
|
} from "../memory-store.js";
|
||||||
import { isDbAvailable } from "../sf-db.js";
|
import { computeStaticMemoryScore, isDbAvailable } from "../sf-db.js";
|
||||||
|
|
||||||
function dbUnavailable(operation) {
|
function dbUnavailable(operation) {
|
||||||
return {
|
return {
|
||||||
|
|
@ -97,7 +97,7 @@ export function executeMemoryQuery(params) {
|
||||||
});
|
});
|
||||||
const ranked = filtered.slice(0, k).map((memory) => ({
|
const ranked = filtered.slice(0, k).map((memory) => ({
|
||||||
memory,
|
memory,
|
||||||
score: memory.confidence * (1 + memory.hit_count * 0.1),
|
score: computeStaticMemoryScore(memory.confidence, memory.hit_count),
|
||||||
}));
|
}));
|
||||||
const hits = ranked.map((r) => ({
|
const hits = ranked.map((r) => ({
|
||||||
id: r.memory.id,
|
id: r.memory.id,
|
||||||
|
|
|
||||||
59
src/tests/headless-status.test.ts
Normal file
59
src/tests/headless-status.test.ts
Normal file
|
|
@ -0,0 +1,59 @@
|
||||||
|
/**
|
||||||
|
* headless-status.test.ts — direct headless status rendering.
|
||||||
|
*
|
||||||
|
* Purpose: prevent `sf headless status` from regressing back to the
|
||||||
|
* interactive RPC/v2 path when it only needs DB-backed project state.
|
||||||
|
*/
|
||||||
|
import assert from "node:assert/strict";
|
||||||
|
import { test } from "vitest";
|
||||||
|
import type { QuerySnapshot } from "../headless-query.js";
|
||||||
|
import { formatHeadlessStatus } from "../headless-status.js";
|
||||||
|
|
||||||
|
function snapshot(overrides: Partial<QuerySnapshot> = {}): QuerySnapshot {
|
||||||
|
return {
|
||||||
|
schemaVersion: 1,
|
||||||
|
state: {
|
||||||
|
activeMilestone: { id: "M001", title: "Runtime Hardening" },
|
||||||
|
activeSlice: { id: "S01", title: "Headless Status" },
|
||||||
|
activeTask: { id: "T01", title: "Bypass v2 init" },
|
||||||
|
phase: "executing",
|
||||||
|
recentDecisions: [],
|
||||||
|
blockers: [],
|
||||||
|
nextAction: "Execute T01.",
|
||||||
|
registry: [{ id: "M001", title: "Runtime Hardening", status: "active" }],
|
||||||
|
requirements: {
|
||||||
|
active: 0,
|
||||||
|
validated: 0,
|
||||||
|
deferred: 0,
|
||||||
|
outOfScope: 0,
|
||||||
|
blocked: 0,
|
||||||
|
total: 0,
|
||||||
|
},
|
||||||
|
progress: {
|
||||||
|
milestones: { done: 0, total: 1 },
|
||||||
|
slices: { done: 0, total: 1 },
|
||||||
|
tasks: { done: 0, total: 1 },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
next: {
|
||||||
|
action: "dispatch",
|
||||||
|
unitType: "execute-task",
|
||||||
|
unitId: "M001/S01/T01",
|
||||||
|
},
|
||||||
|
cost: { workers: [], total: 0 },
|
||||||
|
runtime: { units: [] },
|
||||||
|
uokDiagnostics: { verdict: "clear", classification: "healthy" },
|
||||||
|
schedule: { pending_count: 0, overdue_count: 0, due: [], upcoming: [] },
|
||||||
|
...overrides,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
test("formatHeadlessStatus_when_snapshot_available_renders_text_without_rpc", () => {
|
||||||
|
const rendered = formatHeadlessStatus(snapshot());
|
||||||
|
|
||||||
|
assert.match(rendered, /^SF Status/);
|
||||||
|
assert.match(rendered, /Phase: executing/);
|
||||||
|
assert.match(rendered, /Active milestone: M001 - Runtime Hardening/);
|
||||||
|
assert.match(rendered, /Dispatch: dispatch execute-task M001\/S01\/T01/);
|
||||||
|
assert.match(rendered, /UOK: clear \(healthy\)/);
|
||||||
|
});
|
||||||
Loading…
Add table
Reference in a new issue