fix: shard sift search and project requirements

This commit is contained in:
Mikael Hugo 2026-05-17 15:38:55 +02:00
parent 3e5b6fc511
commit 87e9729c13
18 changed files with 1630 additions and 153 deletions

View file

@ -1,5 +1,10 @@
.git/**
.sf/**
# Re-include the live requirements corpus projection so agents can semantic-search R-entries.
!.sf/
!.sf/runtime/
!.sf/runtime/sift/
!.sf/runtime/sift/requirements-projected.md
.bg-shell/**
.pytest_cache/**
.venv/**

View file

@ -66,11 +66,34 @@ fi
# automatically when this process exits. Skip in known read-only
# modes (logs, status, dash, sessions, list, version, help) where
# concurrent reads are safe and useful.
case "${1:-}" in
logs|status|dash|sessions|list|--version|-v|--help|-h)
: # read-only — no lock needed
#
# Top-level read-only commands skip the lock. ALSO skip when $1=headless
# AND $2 is a read-only subcommand — otherwise the operator can't even
# check SF's state while autonomous mode is running (regression observed
# 2026-05-17 when `sf headless query` / `feedback list` / --help were
# rejected with "Another sf is already running" despite being pure reads).
case "${1:-} ${2:-}" in
"logs "*|"status "*|"dash "*|"sessions "*|"list "*|"--version "*|"-v "*|"--help "*|"-h "*)
: # top-level read-only — no lock needed
;;
"headless --help"*|"headless -h"*|"headless --version"*|"headless -v"*|"headless query"*|"headless status"*|"headless usage"*|"headless reflect"*|"headless ")
: # headless read-only subcommand — no lock needed
;;
"headless feedback"*|"headless triage"*)
# `feedback list` and `triage --list` are read-only; `feedback add/resolve`
# and `triage --run/--apply` are writes. Allow the read-only forms only.
if [[ "$*" == *" list"* || "$*" == *" --list"* || "$*" == *" --json"* ]]; then
: # read-only inspect
else
__SF_NEEDS_LOCK=1
fi
;;
*)
__SF_NEEDS_LOCK=1
;;
esac
case "${__SF_NEEDS_LOCK:-}" in
1)
if [[ -z "${SF_SKIP_LOCK:-}" ]]; then
SF_PROJECT_LOCK_DIR="$(pwd)/.sf"
mkdir -p "$SF_PROJECT_LOCK_DIR" 2>/dev/null || true

View file

@ -538,6 +538,19 @@ export function registerHooks(pi, ecosystemHandlers = []) {
} catch {
/* non-fatal — codex catalog refresh must never block session start */
}
// Refresh the requirements corpus projection before Sift warms up so
// the live R-entries are in the index from the first search. The
// projection writes .sf/runtime/sift/requirements-projected.md from
// the DB; .siftignore has a negation for that path so root-scope
// warmup picks it up automatically.
try {
const { buildRequirementsProjection } = await import(
"../sf-db/requirements-sift-projection.js"
);
buildRequirementsProjection(process.cwd());
} catch {
/* non-fatal — requirements projection must never block session start */
}
// Pre-warm the Sift search index so the first agent query in this
// session doesn't pay the cold-build cost. ensureSiftIndexWarmup
// (code-intelligence.js) already runs the full hybrid+vector+

View file

@ -15,6 +15,67 @@ import {
writeFileSync,
} from "node:fs";
import { delimiter, isAbsolute, join, relative, resolve } from "node:path";
import { getErrorMessage } from "./error-utils.js";
/**
* Lists bounded Sift scopes that approximate a whole-repo search.
*
* Purpose: keep root-level "search all" responsive by fanning out across
* top-level project areas, including the generated requirements projection.
*
* Consumer: `resolveSiftSearchScopes` for `sift_search` and `codebase_search`.
*/
export const SIFT_ALL_REPO_SHARDS = [
"src",
"packages",
"web",
"docs",
"scripts",
"rust-engine",
"tests",
".github",
"docker",
".sf/runtime/sift/requirements-projected.md",
];
export function isSiftRepoRootScope(scopePath, projectRoot) {
const normalizedRoot = resolve(projectRoot);
const requested =
typeof scopePath === "string" && scopePath.trim() ? scopePath.trim() : ".";
const absolute = isAbsolute(requested)
? resolve(requested)
: resolve(normalizedRoot, requested);
return absolute === normalizedRoot;
}
/**
* Resolve a requested Sift scope into one or more executable scopes.
*
* Purpose: avoid passing repo root (`.`) directly to Sift in normal agent
* workflows. Root searches are measurably pathological in this repo, while
* top-level shard searches remain bounded and responsive.
*
* Consumer: `sift_search`, `codebase_search`, and future Sift warmup/search
* wrappers that need whole-repo coverage without root-scope hangs.
*/
export function resolveSiftSearchScopes(projectRoot, scope) {
const resolved = resolveSiftSearchScope(projectRoot, scope);
if (!isSiftRepoRootScope(resolved, projectRoot)) {
return {
originalScope: resolved,
scopes: [resolved],
sharded: false,
};
}
const scopes = SIFT_ALL_REPO_SHARDS.filter((shard) =>
existsSync(resolve(projectRoot, shard)),
);
return {
originalScope: resolved,
scopes: scopes.length > 0 ? scopes : [resolved],
sharded: scopes.length > 0,
};
}
/**
* Choose sift retrievers based on scope size.
@ -34,21 +95,12 @@ import { delimiter, isAbsolute, join, relative, resolve } from "node:path";
* For scoped paths: { retrievers: "bm25,phrase,vector", reranking: "position-aware" }
*/
export function chooseSiftRetrievers(scopePath, projectRoot) {
const normalizedRoot = resolve(projectRoot);
const requested =
typeof scopePath === "string" && scopePath.trim() ? scopePath.trim() : ".";
const absolute = isAbsolute(requested)
? resolve(requested)
: resolve(normalizedRoot, requested);
const isRepoRoot = absolute === normalizedRoot;
if (isRepoRoot) {
if (isSiftRepoRootScope(scopePath, projectRoot)) {
return { retrievers: "bm25,phrase", reranking: "none" };
}
return { retrievers: "bm25,phrase,vector", reranking: "position-aware" };
}
import { getErrorMessage } from "./error-utils.js";
const SIFT_BINARY_NAME = process.platform === "win32" ? "sift.exe" : "sift";
const DEFAULT_SIFT_WARMUP_TTL_MS = 6 * 60 * 60 * 1000;
const DEFAULT_SIFT_WARMUP_QUERY =

View file

@ -110,8 +110,10 @@ export function generateDecisionsMd(decisions) {
/** Status values that map to specific sections, in display order. */
const STATUS_SECTION_MAP = [
{ status: "active", heading: "Active" },
{ status: "resolved", heading: "Resolved" },
{ status: "validated", heading: "Validated" },
{ status: "deferred", heading: "Deferred" },
{ status: "superseded", heading: "Superseded" },
{ status: "cancelled", heading: "Cancelled" },
{ status: "out-of-scope", heading: "Out of Scope" },
];

View file

@ -115,8 +115,10 @@ export function parseDecisionsTable(content) {
// ─── REQUIREMENTS.md Parser ────────────────────────────────────────────────
const STATUS_SECTIONS = {
"## active": "active",
"## resolved": "resolved",
"## validated": "validated",
"## deferred": "deferred",
"## superseded": "superseded",
"## cancelled": "cancelled",
"## out of scope": "out-of-scope",
};

View file

@ -0,0 +1,398 @@
/**
* requirement-status-sweeper.js auto-transition R-entries based on DB state.
*
* Purpose: walk requirements rows with status='active' and transition each
* to 'validated' / 'resolved' / 'superseded' when the supporting evidence
* indicates the work is done.
*
* Three rules:
* 1. Operational R with `Source IDs: sf-...` in description if ALL source
* self_feedback rows have resolved_at != NULL set status='resolved'
* and resolved_reason='evidence-backlog-drained-by-sweeper-<date>'.
* 2. Capability/architecture/functional R with non-empty supporting_slices
* JSON array if ALL referenced slice rows have status='complete' set
* status='validated'.
* 3. R with supersedes_by pointing to a row where that row's status
* {'resolved','validated','superseded','cancelled'} set this R's
* status='superseded'.
*
* Logs each transition as self-feedback (kind: 'r-entry-auto-transition',
* severity: 'low') with {fromStatus, toStatus, rule, reason}.
*
* Consumer: UokGate (type=verification, id=requirement-status-sweeper). NOT
* registered here registration is a follow-up slice once Layer A lands
* its parallel edit of gate-registry-bootstrap.js.
*/
import { _getAdapter } from "./sf-db-core.js";
import { insertSelfFeedbackEntry } from "./sf-db-self-feedback.js";
// ─── Constants ────────────────────────────────────────────────────────────────
const ALL_RULES = [
"operational-evidence-drained",
"supporting-slices-complete",
"supersedes-resolved",
];
const SUPERSEDED_TERMINAL_STATUSES = new Set([
"resolved",
"validated",
"superseded",
"cancelled",
]);
// ─── Helpers ──────────────────────────────────────────────────────────────────
/**
* Parse `Source IDs: sf-xxx, sf-yyy` from a description string.
* Returns an array of IDs, or [] if the marker is absent.
*/
function parseSourceIds(description) {
if (typeof description !== "string") return [];
const marker = "Source IDs:";
const idx = description.indexOf(marker);
if (idx === -1) return [];
// Take everything after the marker up to a blank line or end of string.
const afterMarker = description.slice(idx + marker.length);
const paragraphEnd = afterMarker.search(/\n\s*\n/);
const raw =
paragraphEnd === -1 ? afterMarker : afterMarker.slice(0, paragraphEnd);
return raw
.split(",")
.map((s) => s.trim())
.filter(Boolean);
}
/**
* Parse supporting_slices JSON field array of "milestoneId/sliceId" strings.
* Returns [] for any parse failure or empty array.
*/
function parseSupportingSlices(raw) {
if (typeof raw !== "string" || raw.trim() === "" || raw.trim() === "[]")
return [];
try {
const parsed = JSON.parse(raw);
if (!Array.isArray(parsed)) return [];
return parsed.filter((s) => typeof s === "string" && s.length > 0);
} catch {
return [];
}
}
/**
* Record a self-feedback entry for an auto-transition (non-fatal).
*/
function logTransition(req, toStatus, rule, reason) {
try {
const ts = new Date().toISOString();
const id = `sf-sweeper-${req.id}-${rule}-${Date.now()}`;
insertSelfFeedbackEntry({
id,
ts,
kind: "r-entry-auto-transition",
severity: "low",
blocking: false,
repoIdentity: "forge",
sfVersion: "",
basePath: "",
summary: `R-entry ${req.id} auto-transitioned ${req.status}${toStatus} via rule ${rule}`,
evidence: JSON.stringify({
fromStatus: req.status,
toStatus,
rule,
reason,
}),
suggestedFix: "",
full_json: JSON.stringify({
id,
ts,
kind: "r-entry-auto-transition",
severity: "low",
blocking: false,
repoIdentity: "forge",
sfVersion: "",
basePath: "",
summary: `R-entry ${req.id} auto-transitioned ${req.status}${toStatus} via rule ${rule}`,
evidence: JSON.stringify({
fromStatus: req.status,
toStatus,
rule,
reason,
}),
suggestedFix: "",
fromStatus: req.status,
toStatus,
rule,
reason,
}),
});
} catch {
// non-fatal
}
}
/**
* Record a dangling-chain warning as self-feedback (non-fatal).
*/
function logDanglingChain(reqId, supersededById) {
try {
const ts = new Date().toISOString();
const id = `sf-sweeper-dangling-${reqId}-${Date.now()}`;
insertSelfFeedbackEntry({
id,
ts,
kind: "r-entry-dangling-supersedes",
severity: "low",
blocking: false,
repoIdentity: "forge",
sfVersion: "",
basePath: "",
summary: `R-entry ${reqId} references supersedes_by=${supersededById} which does not exist`,
evidence: JSON.stringify({ reqId, supersededById }),
suggestedFix: "Review and correct the supersedes_by reference",
full_json: JSON.stringify({
id,
ts,
kind: "r-entry-dangling-supersedes",
severity: "low",
blocking: false,
repoIdentity: "forge",
sfVersion: "",
basePath: "",
summary: `R-entry ${reqId} references supersedes_by=${supersededById} which does not exist`,
evidence: JSON.stringify({ reqId, supersededById }),
suggestedFix: "Review and correct the supersedes_by reference",
}),
});
} catch {
// non-fatal
}
}
// ─── Rule implementations ─────────────────────────────────────────────────────
/**
* Rule 1: operational-evidence-drained
* Operational R entries carry "Source IDs: sf-..." in their description.
* If all listed self_feedback rows are resolved, the R transitions to 'resolved'.
*/
function applyOperationalEvidenceDrained(db, req) {
if (!req.description) return null;
const sourceIds = parseSourceIds(req.description);
if (sourceIds.length === 0) return null;
// Check each source ID — all must be resolved (resolved_at NOT NULL)
for (const sfId of sourceIds) {
const row = db
.prepare("SELECT resolved_at FROM self_feedback WHERE id = ?")
.get(sfId);
// If not found or not resolved, rule doesn't fire
if (!row || !row["resolved_at"]) return null;
}
const date = new Date().toISOString().slice(0, 10);
return {
toStatus: "resolved",
rule: "operational-evidence-drained",
reason: `evidence-backlog-drained-by-sweeper-${date}`,
updates: {
status: "resolved",
notes: `evidence-backlog-drained-by-sweeper-${date}`,
},
};
}
/**
* Rule 2: supporting-slices-complete
* Capability/architecture/functional R entries with non-empty supporting_slices.
* If all referenced slices are complete, the R transitions to 'validated'.
*/
function applySupportingSlicesComplete(db, req) {
const sliceRefs = parseSupportingSlices(req.supporting_slices);
if (sliceRefs.length === 0) return null;
for (const ref of sliceRefs) {
// Expected format: "milestoneId/sliceId"
const slashIdx = ref.indexOf("/");
if (slashIdx === -1) return null; // malformed ref, skip rule
const milestoneId = ref.slice(0, slashIdx);
const sliceId = ref.slice(slashIdx + 1);
const row = db
.prepare("SELECT status FROM slices WHERE milestone_id = ? AND id = ?")
.get(milestoneId, sliceId);
if (!row || row["status"] !== "complete") return null;
}
return {
toStatus: "validated",
rule: "supporting-slices-complete",
reason: `all ${sliceRefs.length} supporting slice(s) complete`,
updates: { status: "validated" },
};
}
/**
* Rule 3: supersedes-resolved
* If this R has a supersedes_by pointing to a terminal R, mark this one superseded.
* Logs a dangling-chain warning if the target doesn't exist.
*/
function applySupersedes(db, req) {
const supersededBy = req.superseded_by;
if (!supersededBy) return null;
const targetRow = db
.prepare("SELECT id, status FROM requirements WHERE id = ?")
.get(supersededBy);
if (!targetRow) {
// Dangling chain — log as self-feedback but don't transition
logDanglingChain(req.id, supersededBy);
return null;
}
if (!SUPERSEDED_TERMINAL_STATUSES.has(targetRow["status"])) return null;
return {
toStatus: "superseded",
rule: "supersedes-resolved",
reason: `superseded by ${supersededBy} (status=${targetRow["status"]})`,
updates: { status: "superseded" },
};
}
// ─── Main export ──────────────────────────────────────────────────────────────
/**
* Walk all active requirements and apply transition rules.
*
* @param {string} _basePath - base path of the project (unused directly;
* DB adapter is process-global, kept for API symmetry with other sweepers).
* @param {object} [options]
* @param {boolean} [options.dryRun=false] - if true, returns transitions but
* does not write to DB.
* @param {string[]} [options.includeRules] - subset of rules to apply; defaults
* to all three.
* @returns {{ transitioned: Array<{id,fromStatus,toStatus,rule}>, totalChecked: number, durationMs: number }}
*/
export async function sweepRequirementStatuses(_basePath, options = {}) {
const startMs = Date.now();
const dryRun = options.dryRun === true;
const includeRules = Array.isArray(options.includeRules)
? options.includeRules
: ALL_RULES;
const db = _getAdapter();
if (!db) {
return { transitioned: [], totalChecked: 0, durationMs: 0 };
}
// Fetch all active requirements (status='active', superseded_by IS NULL via
// the active_requirements view).
const rows = db
.prepare("SELECT * FROM requirements WHERE status = 'active'")
.all();
const transitioned = [];
for (const rawRow of rows) {
const req = {
id: rawRow["id"],
class: rawRow["class"],
status: rawRow["status"],
description: rawRow["description"],
why: rawRow["why"],
source: rawRow["source"],
primary_owner: rawRow["primary_owner"],
supporting_slices: rawRow["supporting_slices"],
validation: rawRow["validation"],
notes: rawRow["notes"],
full_content: rawRow["full_content"],
superseded_by: rawRow["superseded_by"] ?? null,
};
let transition = null;
// Apply rules in priority order; first match wins.
if (!transition && includeRules.includes("operational-evidence-drained")) {
transition = applyOperationalEvidenceDrained(db, req);
}
if (!transition && includeRules.includes("supporting-slices-complete")) {
transition = applySupportingSlicesComplete(db, req);
}
if (!transition && includeRules.includes("supersedes-resolved")) {
transition = applySupersedes(db, req);
}
if (!transition) continue;
const entry = {
id: req.id,
fromStatus: req.status,
toStatus: transition.toStatus,
rule: transition.rule,
};
transitioned.push(entry);
if (!dryRun) {
// Apply the status update; preserve all other columns.
db.prepare(`UPDATE requirements SET status = :status WHERE id = :id`).run(
{ ":status": transition.toStatus, ":id": req.id },
);
// For rule 1: write reason into notes field as well.
if (transition.rule === "operational-evidence-drained") {
const existingNotes = req.notes ?? "";
const sep = existingNotes.trim().length > 0 ? "\n" : "";
db.prepare(`UPDATE requirements SET notes = :notes WHERE id = :id`).run(
{
":notes": `${existingNotes}${sep}${transition.reason}`,
":id": req.id,
},
);
}
logTransition(
req,
transition.toStatus,
transition.rule,
transition.reason,
);
}
}
return {
transitioned,
totalChecked: rows.length,
durationMs: Date.now() - startMs,
};
}
// ─── UokGate wrapper ──────────────────────────────────────────────────────────
/**
* UokGate descriptor for the requirement status sweeper.
* Registration is deferred see module-level comment.
*/
export const requirementStatusSweeperGate = {
id: "requirement-status-sweeper",
type: "verification",
async execute(ctx) {
const result = await sweepRequirementStatuses(ctx.basePath, ctx.options);
return result.transitioned.length === 0
? {
outcome: "pass",
failureClass: null,
rationale: `requirement sweep: 0 of ${result.totalChecked} needed transition`,
}
: {
outcome: "manual-attention",
failureClass: "verification",
rationale: `requirement sweep: ${result.transitioned.length} of ${result.totalChecked} transitioned`,
findings: result.transitioned,
};
},
};

View file

@ -0,0 +1,150 @@
/**
* requirements-sift-projection.js emit a fresh sift-friendly markdown of all R-entries.
*
* Purpose: write .sf/runtime/sift/requirements-projected.md from the DB so
* sift's index sees the live state of the requirements corpus, not the
* (possibly stale) checked-in .sf/REQUIREMENTS.md.
*
* Consumer: sift warmup pipeline (bootstrap/register-hooks.js, auto/loop.js).
*
* Failure boundary: read-only on DB; write-only on .sf/runtime/sift/.
*/
import { mkdirSync, writeFileSync } from "node:fs";
import { join } from "node:path";
import { _getAdapter } from "./sf-db-core.js";
/** Canonical output path for the requirements corpus projection. */
export function getRequirementsProjectionPath(basePath) {
return join(basePath, ".sf", "runtime", "sift", "requirements-projected.md");
}
/**
* Escape a text value for inline markdown use.
* Collapses interior newlines into spaces so each field renders on one line
* when used in bullet form. Backtick sequences are escaped to avoid
* unintended code-span boundaries.
*/
function mdInline(value) {
if (value == null) return "";
return String(value)
.replace(/`/g, "\\`")
.replace(/\r?\n|\r/g, " ")
.trim();
}
/**
* Render one R-entry as a markdown section for sift indexing.
*
* Uses a flat-heading + bullet-list structure so every field is
* retrievable by keyword AND the section heading is the canonical ID.
*/
function renderRow(row) {
const lines = [];
const id = String(row["id"] ?? "");
const descFirst =
String(row["description"] ?? "")
.split(/\r?\n/)
.find((l) => l.trim().length > 0) ?? "Untitled";
lines.push(`## ${id}${mdInline(descFirst)}`);
lines.push(
`Status: ${mdInline(row["status"])} Class: ${mdInline(row["class"])} Owner: ${mdInline(row["primary_owner"])}`,
);
lines.push("");
if (row["description"]) {
lines.push(mdInline(row["description"]));
lines.push("");
}
if (row["why"]) {
lines.push(`**Why:** ${mdInline(row["why"])}`);
lines.push("");
}
const notes = String(row["notes"] ?? "").trim();
if (notes.length > 0) {
lines.push(`**Notes:** ${mdInline(notes)}`);
lines.push("");
}
return lines.join("\n");
}
/**
* Status ordering for grouped output matches the canonical STATUS_SECTION_MAP
* used by db-writer.js so sections appear in a consistent order.
*/
const STATUS_ORDER = [
"active",
"resolved",
"validated",
"deferred",
"superseded",
"cancelled",
"out-of-scope",
];
function statusRank(status) {
const idx = STATUS_ORDER.indexOf(String(status ?? "").toLowerCase());
return idx === -1 ? STATUS_ORDER.length : idx;
}
function requirementNumber(id) {
const match = String(id ?? "").match(/^R(\d+)$/);
return match ? Number(match[1]) : Number.MAX_SAFE_INTEGER;
}
/**
* Build a fresh markdown projection from the `requirements` DB table and
* write it to `.sf/runtime/sift/requirements-projected.md`.
*
* Returns `{written: true, rows: N, bytes: M, projectionPath}` on success.
* Returns `{written: false, reason, rows: 0}` when the DB adapter is unavailable.
* Never throws failure is non-fatal to the warmup pipeline.
*/
export async function buildRequirementsProjection(basePath) {
const projectionPath = getRequirementsProjectionPath(basePath);
try {
const db = _getAdapter();
if (!db) {
return { written: false, reason: "sf-db adapter unavailable", rows: 0 };
}
// Read all non-superseded requirements ordered by status then R-number.
const rows = db
.prepare(
"SELECT id, class, status, description, why, notes, primary_owner FROM requirements WHERE superseded_by IS NULL",
)
.all();
// Sort: status group first, then numeric R-id within group.
rows.sort((a, b) => {
const sr = statusRank(a["status"]) - statusRank(b["status"]);
if (sr !== 0) return sr;
return requirementNumber(a["id"]) - requirementNumber(b["id"]);
});
const headerLines = [
"# SF Requirements Corpus — sift projection",
"",
"This file is generated from the `requirements` DB table at warmup time.",
"Do not edit manually — it is overwritten on each SF session start.",
"",
];
const sections = rows.map((row) => renderRow(row));
const content = [...headerLines, ...sections].join("\n") + "\n";
mkdirSync(join(basePath, ".sf", "runtime", "sift"), { recursive: true });
writeFileSync(projectionPath, content, "utf-8");
return {
written: true,
rows: rows.length,
bytes: Buffer.byteLength(content, "utf-8"),
projectionPath,
};
} catch (err) {
return {
written: false,
reason: String(err?.message ?? err),
rows: 0,
projectionPath,
};
}
}

View file

@ -218,6 +218,7 @@ function ensureSessionTables(db) {
UNIQUE(session_id, ref_type, ref_value)
)
`);
repairOrphanedTurnsFtsTables(db);
// FTS5 external-content table over turns for keyword recall.
// content_rowid links to turns.id; triggers below keep it in sync.
db.exec(`
@ -268,6 +269,30 @@ function ensureSessionTables(db) {
"CREATE INDEX IF NOT EXISTS idx_session_refs_session ON session_refs(session_id, created_at DESC)",
);
}
function repairOrphanedTurnsFtsTables(db) {
const ftsTable = db
.prepare(
"SELECT name FROM sqlite_master WHERE type='table' AND name='turns_fts'",
)
.get();
if (ftsTable) return;
const orphan = db
.prepare(
"SELECT name FROM sqlite_master WHERE name IN ('turns_fts_data', 'turns_fts_idx', 'turns_fts_docsize', 'turns_fts_config', 'turns_fts_content', 'turns_fts_insert', 'turns_fts_update', 'turns_fts_delete') LIMIT 1",
)
.get();
if (!orphan) return;
db.exec("DROP TRIGGER IF EXISTS turns_fts_insert");
db.exec("DROP TRIGGER IF EXISTS turns_fts_update");
db.exec("DROP TRIGGER IF EXISTS turns_fts_delete");
db.exec("DROP TABLE IF EXISTS turns_fts_data");
db.exec("DROP TABLE IF EXISTS turns_fts_idx");
db.exec("DROP TABLE IF EXISTS turns_fts_docsize");
db.exec("DROP TABLE IF EXISTS turns_fts_config");
db.exec("DROP TABLE IF EXISTS turns_fts_content");
}
function ensureSessionSnapshotTable(db) {
db.exec(`
CREATE TABLE IF NOT EXISTS session_snapshots (
@ -764,7 +789,12 @@ export function initSchema(db, fileBacked, options = {}) {
// empty WAL and cannot be recovered. Explicit checkpoints are issued at
// safe loop boundaries instead (post-unit finalize, close).
if (fileBacked) db.exec("PRAGMA wal_autocheckpoint=0");
if (fileBacked) db.exec("PRAGMA auto_vacuum = INCREMENTAL");
// auto_vacuum=NONE removed the ptrmap pages that caused the 2026-05-17
// "Bad ptr map entry" corruption when multiple node:sqlite writers
// concurrently mutated the requirements btree (rebuilt DB has none).
// The incremental_vacuum() calls in sf-db-core.js become no-ops, which
// is fine — the DB is small and disk pressure is handled separately.
if (fileBacked) db.exec("PRAGMA auto_vacuum = NONE");
if (fileBacked) db.exec("PRAGMA cache_size = -8000"); // 8 MB page cache
if (fileBacked && process.platform !== "darwin")
db.exec("PRAGMA mmap_size = 67108864"); // 64 MB mmap

View file

@ -68,7 +68,13 @@ function _findLatestIterOutcomeForUnit(basePath, milestoneId, sliceId, taskId) {
* Append a revert-blocked self-feedback entry directly to
* `<basePath>/.sf/self-feedback.jsonl` (best-effort, never throws).
*/
function _appendRevertBlockedFeedback(basePath, unitId, iterCompleteTs, attemptedRevert, stackFrame) {
function _appendRevertBlockedFeedback(
basePath,
unitId,
iterCompleteTs,
attemptedRevert,
stackFrame,
) {
try {
const ts = Date.now().toString(36);
const rnd = Math.random().toString(36).slice(2, 8);
@ -246,7 +252,8 @@ export function updateTaskStatus(
: null;
// Layer B: block reverts when iterations.jsonl recently recorded outcome=complete
// for this (milestone, slice, task) triple (R072 + T02-clobber fix).
const isRevert = status !== "complete" && status !== "done" && status !== "in_progress";
const isRevert =
status !== "complete" && status !== "done" && status !== "in_progress";
if (isRevert) {
const windowMs =
process.env.SF_REVERT_BLOCK_WINDOW_MS !== undefined
@ -261,7 +268,9 @@ export function updateTaskStatus(
);
if (iterResult && iterResult.outcome === "complete") {
const iterCompleteTs = iterResult.ts;
const iterCompleteMs = iterCompleteTs ? new Date(iterCompleteTs).getTime() : 0;
const iterCompleteMs = iterCompleteTs
? new Date(iterCompleteTs).getTime()
: 0;
const ageMs = Date.now() - iterCompleteMs;
if (ageMs <= windowMs) {
const unitId = `${milestoneId}/${sliceId}/${taskId}`;

View file

@ -32,6 +32,7 @@ import {
resolveSiftBinary,
resolveSiftLogging,
resolveSiftSearchScope,
resolveSiftSearchScopes,
} from "../code-intelligence.js";
import { emitJournalEvent } from "../journal.js";
import { loadEffectiveSFPreferences } from "../preferences.js";
@ -212,7 +213,7 @@ function buildCodebaseSearchArgs(strategy, query, scope, projectRoot) {
// scoped subdirs get vector+reranking for semantic signal. Timeouts are
// sized to accommodate cold-cache embedding builds.
const { retrievers, reranking } = chooseSiftRetrievers(scope, projectRoot);
return [
const args = [
"search",
"--strategy",
strategy,
@ -2641,15 +2642,15 @@ export default function (pi) {
label: "Code Search",
description: [
"Perform Sift-backed hybrid (BM25 + phrase) retrieval over a scoped codebase path.",
" Use this for conceptual, behavioral, or cross-cutting questions only after choosing a narrow scope",
" Use this for conceptual, behavioral, or cross-cutting questions after choosing a scope",
" (e.g. 'how is X handled?', 'where is the logic for Y?', 'find examples of Z').",
" If Sift status is degraded or the scope is broad, prefer grep/find/ls and retry with a narrower scope.",
" Repository-root scope fans out across bounded top-level shards instead of calling Sift on `.` directly.",
].join(""),
promptGuidelines: [
"Use grep/find/ls for broad orientation first, then codebase_search with a specific scope for conceptual patterns.",
"Use grep/find/ls for broad orientation first, then codebase_search with a specific scope for conceptual patterns; root/all repo is sharded.",
" page-index-hybrid (default): Use for 'How' and 'Why' questions (logic, implementation, reasoning).",
" path-hybrid: Use for 'Where' questions (architecture, directory structure, file location).",
" Keep scope narrow enough to avoid root-level Sift timeouts; each repo uses its own SIFT_SEARCH_CACHE under .sf/runtime/sift/.",
" Each repo uses its own SIFT_SEARCH_CACHE under .sf/runtime/sift/.",
" Be descriptive in your query: include function names, types, or intent (e.g. 'auth middleware validation').",
" This tool is read-only and optimized for evidence gathering before you plan or edit.",
],
@ -2711,7 +2712,10 @@ export default function (pi) {
},
async execute(_toolCallId, params, signal) {
const projectRoot = process.cwd();
const scope = resolveSiftSearchScope(projectRoot, params.scope);
const scopePlan = resolveSiftSearchScopes(projectRoot, params.scope);
const scope = scopePlan.sharded
? `shards:${scopePlan.scopes.join(",")}`
: scopePlan.scopes[0];
const strategy = params.strategy ?? "page-index-hybrid";
const query = params.query;
const startedAt = Date.now();
@ -2750,11 +2754,6 @@ export default function (pi) {
},
};
}
const args = buildCodebaseSearchArgs(strategy, query, scope, projectRoot);
const stderr = [];
const stdout = [];
let wasAborted = false;
let timedOut = false;
const runtimeDirs = ensureSiftRuntimeDirs(projectRoot);
const { env: logEnv, logPath } = resolveSiftLogging(projectRoot);
if (logPath) {
@ -2764,121 +2763,148 @@ export default function (pi) {
fs.writeFileSync(logPath, "", "utf-8");
}
// If vector retrievers are in play, start a progress poller that
// writes index-build state into the log file every 30 seconds.
const usesVector = args.includes("vector");
let progressTimer = null;
let lastSectorCount = 0;
if (usesVector && logPath) {
lastSectorCount = countVectorSectors();
progressTimer = setInterval(() => {
const { sectorCount, cacheSizeMb } = countVectorSectors();
const delta = sectorCount - lastSectorCount;
lastSectorCount = sectorCount;
const line = `[${new Date().toISOString()}] vector-index progress: ${sectorCount} sectors (${cacheSizeMb} MB total)${delta > 0 ? ` (+${delta} since last check)` : ""}\n`;
try {
fs.appendFileSync(logPath, line);
} catch {
// ignore
}
}, 30_000);
}
const runScope = async (scopeToRun, scopedTimeoutMs) => {
const args = buildCodebaseSearchArgs(
strategy,
query,
scopeToRun,
projectRoot,
);
const stderr = [];
const stdout = [];
let wasAborted = false;
let timedOut = false;
const childEnv = { ...buildSiftEnv(projectRoot, process.env), ...logEnv };
const proc = spawn(siftBin, args, {
cwd: projectRoot,
env: childEnv,
shell: false,
stdio: ["ignore", "pipe", "pipe"],
});
liveSubagentProcesses.add(proc);
// Collect output; also tee stderr to the log file for operator diagnostics
proc.stdout.on("data", (chunk) => stdout.push(chunk.toString()));
proc.stderr.on("data", (chunk) => {
stderr.push(chunk.toString());
if (logPath) {
try {
fs.appendFileSync(logPath, chunk);
} catch {
// log write failure must not affect search result
}
}
});
// Handle abort signal
const killProc = () => {
wasAborted = true;
try {
proc.kill("SIGTERM");
} catch {
// ignore
}
setTimeout(() => {
if (proc.exitCode === null) {
// If vector retrievers are in play, start a progress poller that
// writes index-build state into the log file every 30 seconds.
const usesVector = args.includes("vector");
let progressTimer = null;
let lastSectorCount = 0;
if (usesVector && logPath) {
lastSectorCount = countVectorSectors();
progressTimer = setInterval(() => {
const { sectorCount, cacheSizeMb } = countVectorSectors();
const delta = sectorCount - lastSectorCount;
lastSectorCount = sectorCount;
const line = `[${new Date().toISOString()}] vector-index progress: ${sectorCount} sectors (${cacheSizeMb} MB total)${delta > 0 ? ` (+${delta} since last check)` : ""}\n`;
try {
proc.kill("SIGKILL");
fs.appendFileSync(logPath, line);
} catch {
// ignore
}
}, 30_000);
}
const childEnv = {
...buildSiftEnv(projectRoot, process.env),
...logEnv,
};
const proc = spawn(siftBin, args, {
cwd: projectRoot,
env: childEnv,
shell: false,
stdio: ["ignore", "pipe", "pipe"],
});
liveSubagentProcesses.add(proc);
proc.stdout.on("data", (chunk) => stdout.push(chunk.toString()));
proc.stderr.on("data", (chunk) => {
stderr.push(chunk.toString());
if (logPath) {
try {
fs.appendFileSync(logPath, chunk);
} catch {
// log write failure must not affect search result
}
}
}, 5000).unref?.();
});
const killProc = () => {
wasAborted = true;
try {
proc.kill("SIGTERM");
} catch {
// ignore
}
setTimeout(() => {
if (proc.exitCode === null) {
try {
proc.kill("SIGKILL");
} catch {
// ignore
}
}
}, 5000).unref?.();
};
const timeout = setTimeout(() => {
timedOut = true;
killProc();
}, scopedTimeoutMs);
timeout.unref?.();
if (signal) {
if (signal.aborted) killProc();
else signal.addEventListener("abort", killProc, { once: true });
}
const exitCode = await new Promise((resolve) => {
proc.on("close", (code) => {
clearTimeout(timeout);
if (progressTimer) clearInterval(progressTimer);
liveSubagentProcesses.delete(proc);
if (signal) signal.removeEventListener("abort", killProc);
resolve(code ?? 0);
});
proc.on("error", () => {
clearTimeout(timeout);
if (progressTimer) clearInterval(progressTimer);
liveSubagentProcesses.delete(proc);
if (signal) signal.removeEventListener("abort", killProc);
resolve(1);
});
});
return {
scope: scopeToRun,
exitCode,
out: stdout.join(""),
err: stderr.join("").trim(),
timedOut,
wasAborted,
timeoutMs: scopedTimeoutMs,
};
};
const timeout = setTimeout(() => {
timedOut = true;
killProc();
}, timeoutMs);
timeout.unref?.();
if (signal) {
if (signal.aborted) killProc();
else signal.addEventListener("abort", killProc, { once: true });
const perScopeTimeoutMs = scopePlan.sharded
? Math.max(5_000, Math.floor(timeoutMs / scopePlan.scopes.length))
: timeoutMs;
const outputs = [];
for (const scopeToRun of scopePlan.scopes) {
const output = await runScope(scopeToRun, perScopeTimeoutMs);
outputs.push(output);
if (output.wasAborted && !output.timedOut) break;
}
const exitCode = await new Promise((resolve) => {
proc.on("close", (code) => {
clearTimeout(timeout);
if (progressTimer) clearInterval(progressTimer);
liveSubagentProcesses.delete(proc);
if (signal) signal.removeEventListener("abort", killProc);
resolve(code ?? 0);
});
proc.on("error", () => {
clearTimeout(timeout);
if (progressTimer) clearInterval(progressTimer);
liveSubagentProcesses.delete(proc);
if (signal) signal.removeEventListener("abort", killProc);
resolve(1);
});
});
if (wasAborted) {
const abortedOutput = outputs.find(
(output) => output.wasAborted && !output.timedOut,
);
if (abortedOutput) {
const logHint = logPath ? `\n(stage diagnostic: ${logPath})` : "";
const text = timedOut
? `Code search timed out after ${Math.round(timeoutMs / 1000)}s. Narrow the query or scope and retry.${logHint}`
: `Code search aborted.${logHint}`;
const text = `Code search aborted.${logHint}`;
await recordRetrievalEvidence(projectRoot, {
backend: "codebase_search",
sourceKind: "code",
query,
strategy,
scope,
status: timedOut ? "timeout" : "aborted",
status: "aborted",
hitCount: 0,
elapsedMs: Date.now() - startedAt,
cachePath: runtimeDirs.searchCache,
error: text,
result: {
siftBin,
timeoutMs,
},
result: { siftBin, timeoutMs },
});
return {
content: [
{
type: "text",
text,
},
],
content: [{ type: "text", text }],
details: {
operation: "codebase_search",
aborted: true,
timedOut,
timedOut: false,
siftBin,
query,
scope,
@ -2888,9 +2914,23 @@ export default function (pi) {
},
};
}
const out = stdout.join("");
const err = stderr.join("").trim();
if (exitCode !== 0 && !out) {
const allTimedOut =
outputs.length > 0 && outputs.every((output) => output.timedOut);
const successfulOutputs = outputs.filter(
(output) => output.out.trim().length > 0,
);
if (
successfulOutputs.length === 0 &&
outputs.some((output) => output.exitCode !== 0)
) {
const err = outputs
.map((output) => output.err)
.filter(Boolean)
.join("\n")
.trim();
const exitCode =
outputs.find((output) => output.exitCode !== 0)?.exitCode ?? 1;
const hint =
err.includes("not found") || err.includes("No such file")
? "\n\nHint: install rupurt/sift and ensure `sift` is on PATH."
@ -2903,36 +2943,56 @@ export default function (pi) {
query,
strategy,
scope,
status: "error",
status: allTimedOut ? "timeout" : "error",
hitCount: 0,
elapsedMs: Date.now() - startedAt,
cachePath: runtimeDirs.searchCache,
error: err || `exit ${exitCode}`,
error: allTimedOut
? "all codebase_search shards timed out"
: err || `exit ${exitCode}`,
result: {
siftBin,
exitCode,
timeoutMs,
scopes: scopePlan.scopes,
},
});
return {
content: [
{
type: "text",
text: `codebase_search failed (exit ${exitCode}). Is sift installed?${hint}`,
text: allTimedOut
? `Code search timed out after ${Math.round(timeoutMs / 1000)}s across all shards. Narrow the query or scope and retry.${logPath ? `\n(stage diagnostic: ${logPath})` : ""}`
: `codebase_search failed (exit ${exitCode}). Is sift installed?${hint}`,
},
],
details: {
operation: "codebase_search",
exitCode,
sharded: scopePlan.sharded,
scopes: scopePlan.scopes,
siftBin,
query,
scope,
strategy,
timeoutMs,
timedOut: allTimedOut,
searchCache: runtimeDirs.searchCache,
},
};
}
const out = scopePlan.sharded
? successfulOutputs
.map((output) => `## ${output.scope}\n\n${output.out.trim()}`)
.join("\n\n")
: (outputs[0]?.out ?? "");
const err = outputs
.map((output) => output.err)
.filter(Boolean)
.join("\n")
.trim();
const exitCode = outputs.some((output) => output.exitCode !== 0) ? 1 : 0;
await recordRetrievalEvidence(projectRoot, {
backend: "codebase_search",
sourceKind: "code",
@ -2940,7 +3000,7 @@ export default function (pi) {
strategy,
scope,
status: exitCode === 0 ? "ok" : "partial",
hitCount: out.trim() ? 1 : 0,
hitCount: successfulOutputs.length,
elapsedMs: Date.now() - startedAt,
cachePath: runtimeDirs.searchCache,
error: err || null,
@ -2948,6 +3008,8 @@ export default function (pi) {
siftBin,
exitCode,
timeoutMs,
sharded: scopePlan.sharded,
scopes: scopePlan.scopes,
outputPreview: out.slice(0, 2_000),
},
});
@ -2966,6 +3028,8 @@ export default function (pi) {
scope,
strategy,
exitCode,
sharded: scopePlan.sharded,
scopes: scopePlan.scopes,
siftBin,
timeoutMs,
searchCache: runtimeDirs.searchCache,

View file

@ -46,8 +46,7 @@ describe("generateRequirementsMd", () => {
id: "R002",
class: "operational",
status: "active",
description:
"Address recurring drift\n\nSource IDs: sf-one, sf-two",
description: "Address recurring drift\n\nSource IDs: sf-one, sf-two",
why: "Threshold reached\nacross multiple runs",
source: "sf-promoter",
primary_owner: "",
@ -120,4 +119,28 @@ describe("generateRequirementsMd", () => {
assert.equal(parsed[0].id, "R069");
assert.equal(parsed[0].status, "cancelled");
});
test("round_trips_terminal_sweeper_statuses", () => {
const markdown = generateRequirementsMd([
{
id: "R070",
class: "operational",
status: "resolved",
description: "Backlog evidence drained.",
},
{
id: "R071",
class: "architecture",
status: "superseded",
description: "Folded into a newer requirement.",
},
]);
assert.match(markdown, /## Resolved/);
assert.match(markdown, /## Superseded/);
const parsed = parseRequirementsSections(markdown);
assert.equal(parsed.length, 2);
assert.equal(parsed[0].status, "resolved");
assert.equal(parsed[1].status, "superseded");
});
});

View file

@ -0,0 +1,345 @@
/**
* requirement-status-sweeper.test.mjs auto-transition sweeper for R-entries.
*
* Tests all three rules:
* 1. operational-evidence-drained (source IDs all resolved 'resolved')
* 2. supporting-slices-complete (all slices complete 'validated')
* 3. supersedes-resolved (superseded_by target is terminal 'superseded')
*
* Uses in-memory SQLite fixtures never touches the real DB.
*/
import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs";
import { tmpdir } from "node:os";
import { join } from "node:path";
import { afterEach, describe, expect, test } from "vitest";
import {
closeDatabase,
getRequirementById,
insertMilestone,
insertSelfFeedbackEntry,
insertSlice,
openDatabase,
resolveSelfFeedbackEntry,
upsertRequirement,
} from "../sf-db.js";
import {
requirementStatusSweeperGate,
sweepRequirementStatuses,
} from "../sf-db/requirement-status-sweeper.js";
// ─── Fixtures ─────────────────────────────────────────────────────────────────
const tmpDirs = [];
afterEach(() => {
closeDatabase();
while (tmpDirs.length > 0) {
const dir = tmpDirs.pop();
if (dir) rmSync(dir, { recursive: true, force: true });
}
});
/** Open an in-memory DB (no file I/O). */
function openMemoryDb() {
openDatabase(":memory:");
}
/** Insert a minimal self_feedback row with given id and resolved state. */
function makeSelfFeedbackRow(id, { resolved = false } = {}) {
const ts = new Date().toISOString();
insertSelfFeedbackEntry({
id,
ts,
kind: "test:sweeper",
severity: "low",
blocking: false,
repoIdentity: "forge",
sfVersion: "0.0.0",
basePath: "/tmp/test",
summary: `test entry ${id}`,
evidence: "",
suggestedFix: "",
});
if (resolved) {
resolveSelfFeedbackEntry(id, {
reason: "resolved-in-test",
resolvedAt: new Date().toISOString(),
});
}
}
/** Insert an active requirement with the given fields. */
function makeRequirement(id, fields = {}) {
upsertRequirement({
id,
class: fields.class ?? "operational",
status: fields.status ?? "active",
description: fields.description ?? "",
why: fields.why ?? "test",
source: fields.source ?? "test",
primary_owner: fields.primary_owner ?? null,
supporting_slices: fields.supporting_slices ?? "[]",
validation: fields.validation ?? "",
notes: fields.notes ?? "",
full_content: fields.full_content ?? "",
superseded_by: fields.superseded_by ?? null,
});
}
/** Insert a milestone row (required for slice FK). */
function makeMilestone(milestoneId) {
insertMilestone({
id: milestoneId,
title: `test milestone ${milestoneId}`,
status: "active",
});
}
/** Insert a slice row with the given status. */
function makeSlice(milestoneId, sliceId, status = "complete") {
makeMilestone(milestoneId);
insertSlice({
milestoneId,
id: sliceId,
title: `test slice ${sliceId}`,
status,
risk: "low",
depends: [],
demo: "",
sequence: 0,
});
}
// ─── Tests ────────────────────────────────────────────────────────────────────
describe("sweepRequirementStatuses — rule 1: operational-evidence-drained", () => {
test("transitions to 'resolved' when all 3 source IDs are resolved", async () => {
openMemoryDb();
makeSelfFeedbackRow("sf-aaa1", { resolved: true });
makeSelfFeedbackRow("sf-bbb2", { resolved: true });
makeSelfFeedbackRow("sf-ccc3", { resolved: true });
makeRequirement("R900", {
class: "operational",
description:
"Some recurring issue\n\nSource IDs: sf-aaa1, sf-bbb2, sf-ccc3",
});
const result = await sweepRequirementStatuses("/tmp/test");
expect(result.totalChecked).toBe(1);
expect(result.transitioned).toHaveLength(1);
expect(result.transitioned[0]).toMatchObject({
id: "R900",
fromStatus: "active",
toStatus: "resolved",
rule: "operational-evidence-drained",
});
const updated = getRequirementById("R900");
expect(updated.status).toBe("resolved");
});
test("stays 'active' when 1 of 3 source IDs is still open", async () => {
openMemoryDb();
makeSelfFeedbackRow("sf-aaa1", { resolved: true });
makeSelfFeedbackRow("sf-bbb2", { resolved: true });
makeSelfFeedbackRow("sf-ccc3", { resolved: false }); // still open
makeRequirement("R901", {
class: "operational",
description: "Source IDs: sf-aaa1, sf-bbb2, sf-ccc3",
});
const result = await sweepRequirementStatuses("/tmp/test");
expect(result.transitioned).toHaveLength(0);
const row = getRequirementById("R901");
expect(row.status).toBe("active");
});
});
describe("sweepRequirementStatuses — rule 2: supporting-slices-complete", () => {
test("transitions to 'validated' when slice M999/S99 is complete", async () => {
openMemoryDb();
makeSlice("M999", "S99", "complete");
makeRequirement("R902", {
class: "capability",
supporting_slices: '["M999/S99"]',
});
const result = await sweepRequirementStatuses("/tmp/test");
expect(result.transitioned).toHaveLength(1);
expect(result.transitioned[0]).toMatchObject({
id: "R902",
toStatus: "validated",
rule: "supporting-slices-complete",
});
const updated = getRequirementById("R902");
expect(updated.status).toBe("validated");
});
test("stays 'active' when slice M999/S99 is in_progress", async () => {
openMemoryDb();
makeSlice("M999", "S99", "in_progress");
makeRequirement("R903", {
class: "capability",
supporting_slices: '["M999/S99"]',
});
const result = await sweepRequirementStatuses("/tmp/test");
expect(result.transitioned).toHaveLength(0);
const row = getRequirementById("R903");
expect(row.status).toBe("active");
});
test("stays 'active' when supporting_slices is empty array", async () => {
openMemoryDb();
makeRequirement("R904", {
class: "capability",
supporting_slices: "[]",
});
const result = await sweepRequirementStatuses("/tmp/test");
expect(result.transitioned).toHaveLength(0);
const row = getRequirementById("R904");
expect(row.status).toBe("active");
});
});
describe("sweepRequirementStatuses — rule 3: supersedes-resolved", () => {
test("transitions to 'superseded' when supersedes_by target is resolved", async () => {
openMemoryDb();
// Target R200 is already resolved
makeRequirement("R200", { status: "resolved" });
// R905 supersedes R200
makeRequirement("R905", { superseded_by: "R200" });
const result = await sweepRequirementStatuses("/tmp/test");
// Only R905 is active (R200 is resolved, not active)
expect(result.transitioned).toHaveLength(1);
expect(result.transitioned[0]).toMatchObject({
id: "R905",
toStatus: "superseded",
rule: "supersedes-resolved",
});
const updated = getRequirementById("R905");
expect(updated.status).toBe("superseded");
});
test("stays 'active' when supersedes_by target does not exist", async () => {
openMemoryDb();
makeRequirement("R906", { superseded_by: "R999" });
const result = await sweepRequirementStatuses("/tmp/test");
expect(result.transitioned).toHaveLength(0);
const row = getRequirementById("R906");
expect(row.status).toBe("active");
});
});
describe("sweepRequirementStatuses — dryRun option", () => {
test("returns transitions but does not write to DB", async () => {
openMemoryDb();
makeSelfFeedbackRow("sf-dry1", { resolved: true });
makeSelfFeedbackRow("sf-dry2", { resolved: true });
makeSelfFeedbackRow("sf-dry3", { resolved: true });
makeRequirement("R907", {
class: "operational",
description: "Source IDs: sf-dry1, sf-dry2, sf-dry3",
});
const result = await sweepRequirementStatuses("/tmp/test", {
dryRun: true,
});
expect(result.transitioned).toHaveLength(1);
expect(result.transitioned[0].toStatus).toBe("resolved");
// DB must NOT have been written
const row = getRequirementById("R907");
expect(row.status).toBe("active");
});
});
describe("requirementStatusSweeperGate — UokGate contract", () => {
test("returns GateResult pass when no transitions needed", async () => {
openMemoryDb();
makeRequirement("R908", { class: "capability", supporting_slices: "[]" });
const result = await requirementStatusSweeperGate.execute({
basePath: "/tmp/test",
});
expect(result.outcome).toBe("pass");
expect(result.failureClass).toBeNull();
expect(typeof result.rationale).toBe("string");
expect(result.rationale).toContain("0 of");
});
test("returns GateResult manual-attention when transitions occurred", async () => {
openMemoryDb();
makeSelfFeedbackRow("sf-gate1", { resolved: true });
makeRequirement("R909", {
class: "operational",
description: "Source IDs: sf-gate1",
});
const result = await requirementStatusSweeperGate.execute({
basePath: "/tmp/test",
});
expect(result.outcome).toBe("manual-attention");
expect(result.failureClass).toBe("verification");
expect(result.findings).toHaveLength(1);
expect(result.findings[0].id).toBe("R909");
});
});
describe("sweepRequirementStatuses — includeRules option", () => {
test("only rule 3 fires when includeRules=['supersedes-resolved']", async () => {
openMemoryDb();
// Set up a row that would trigger rule 1 (all source IDs resolved)
makeSelfFeedbackRow("sf-r1a", { resolved: true });
makeRequirement("R910", {
class: "operational",
description: "Source IDs: sf-r1a",
});
// Set up a row that would trigger rule 3
makeRequirement("R911-target", { status: "resolved" });
makeRequirement("R912", { superseded_by: "R911-target" });
const result = await sweepRequirementStatuses("/tmp/test", {
includeRules: ["supersedes-resolved"],
});
// Only rule 3 should have fired — R910 should remain active
const r910 = getRequirementById("R910");
expect(r910.status).toBe("active");
// R912 should have transitioned
const r912Updated = getRequirementById("R912");
expect(r912Updated.status).toBe("superseded");
const rules = result.transitioned.map((t) => t.rule);
expect(rules.every((r) => r === "supersedes-resolved")).toBe(true);
expect(rules).toHaveLength(1);
});
});

View file

@ -0,0 +1,205 @@
/**
* requirements-sift-projection.test.mjs unit tests for the DB-to-sift
* requirements corpus projection module.
*
* Purpose: verify buildRequirementsProjection writes a correct markdown file
* from a tmp DB fixture without touching the real .sf/sf.db.
*/
import assert from "node:assert/strict";
import {
existsSync,
mkdirSync,
mkdtempSync,
readFileSync,
rmSync,
} from "node:fs";
import { tmpdir } from "node:os";
import { join } from "node:path";
import { afterEach, test } from "vitest";
import { closeDatabase, insertRequirement, openDatabase } from "../sf-db.js";
import {
buildRequirementsProjection,
getRequirementsProjectionPath,
} from "../sf-db/requirements-sift-projection.js";
const tmpRoots = [];
afterEach(() => {
closeDatabase();
for (const root of tmpRoots.splice(0)) {
rmSync(root, { recursive: true, force: true });
}
});
function makeProject() {
const root = mkdtempSync(join(tmpdir(), "sf-req-sift-proj-"));
mkdirSync(join(root, ".sf"), { recursive: true });
tmpRoots.push(root);
return root;
}
function openDb(root) {
openDatabase(join(root, ".sf", "sf.db"));
}
function seedRow(overrides = {}) {
return {
id: "R001",
class: "functional",
status: "active",
description: "Test requirement",
why: "Because tests matter",
source: "operator",
primary_owner: "S01",
supporting_slices: "",
validation: "smoke",
notes: "",
full_content: "",
superseded_by: null,
...overrides,
};
}
// ── Test 1: 3 rows of varying status → all present, grouped by status ────────
test("buildRequirementsProjection_with_three_rows_writes_all_grouped_by_status", async () => {
const root = makeProject();
openDb(root);
insertRequirement(
seedRow({ id: "R001", status: "active", description: "Active req" }),
);
insertRequirement(
seedRow({ id: "R002", status: "validated", description: "Validated req" }),
);
insertRequirement(
seedRow({ id: "R003", status: "deferred", description: "Deferred req" }),
);
const result = await buildRequirementsProjection(root);
assert.equal(result.written, true);
assert.equal(result.rows, 3);
assert.ok(result.bytes > 0, "bytes should be positive");
assert.ok(
result.projectionPath === getRequirementsProjectionPath(root),
"projectionPath should match helper",
);
assert.ok(existsSync(result.projectionPath), "projection file should exist");
const content = readFileSync(result.projectionPath, "utf-8");
// All three IDs appear
assert.match(content, /## R001/);
assert.match(content, /## R002/);
assert.match(content, /## R003/);
// Grouping by status: active before validated before deferred
const posActive = content.indexOf("## R001");
const posValidated = content.indexOf("## R002");
const posDeferred = content.indexOf("## R003");
assert.ok(posActive < posValidated, "active should come before validated");
assert.ok(
posValidated < posDeferred,
"validated should come before deferred",
);
});
// ── Test 2: NULL notes → no "**Notes:**" section and no "null" literal ────────
test("buildRequirementsProjection_with_null_notes_omits_notes_section", async () => {
const root = makeProject();
openDb(root);
insertRequirement(
seedRow({ id: "R010", notes: "", description: "No notes req" }),
);
const result = await buildRequirementsProjection(root);
assert.equal(result.written, true);
const content = readFileSync(result.projectionPath, "utf-8");
assert.doesNotMatch(
content,
/\*\*Notes:\*\*/,
"should not emit Notes section for empty notes",
);
assert.doesNotMatch(content, /\bnull\b/, "should not emit literal null");
});
// ── Test 3: Special chars (newlines, backticks) → escaped for MD safety ───────
test("buildRequirementsProjection_escapes_backticks_and_collapses_newlines", async () => {
const root = makeProject();
openDb(root);
insertRequirement(
seedRow({
id: "R020",
description: "Use `node:sqlite`\nAnd another line",
why: "Backtick `escape` needed",
notes: "multi\nline\nnote",
}),
);
const result = await buildRequirementsProjection(root);
assert.equal(result.written, true);
const content = readFileSync(result.projectionPath, "utf-8");
// Backticks in description should be escaped
assert.match(content, /\\`node:sqlite\\`/, "backticks should be escaped");
// Newline in description should be collapsed to space in heading
assert.doesNotMatch(
content,
/## R020 — .*\n.*And another line/,
"heading should be single-line",
);
// Notes newlines should be collapsed
assert.doesNotMatch(
content,
/\*\*Notes:\*\*.*\nline/,
"notes should be on one line",
);
});
// ── Test 4: Returns correct shape {rows, bytes, projectionPath} ───────────────
test("buildRequirementsProjection_returns_correct_shape", async () => {
const root = makeProject();
openDb(root);
insertRequirement(seedRow({ id: "R031" }));
insertRequirement(seedRow({ id: "R032" }));
insertRequirement(seedRow({ id: "R033" }));
const result = await buildRequirementsProjection(root);
assert.equal(result.written, true);
assert.equal(result.rows, 3);
assert.ok(typeof result.bytes === "number" && result.bytes > 0, "bytes > 0");
assert.ok(
typeof result.projectionPath === "string" &&
result.projectionPath.includes("requirements-projected.md"),
"projectionPath contains expected filename",
);
});
// ── Test 5: Empty DB → header written, zero R-sections ────────────────────────
test("buildRequirementsProjection_with_empty_db_writes_header_only", async () => {
const root = makeProject();
openDb(root);
const result = await buildRequirementsProjection(root);
assert.equal(result.written, true);
assert.equal(result.rows, 0);
assert.ok(result.bytes > 0, "header itself has bytes");
const content = readFileSync(result.projectionPath, "utf-8");
assert.match(content, /# SF Requirements Corpus/, "header should be present");
assert.doesNotMatch(content, /## R/, "should have no R-sections");
});

View file

@ -0,0 +1,53 @@
/**
* session-fts-repair.test.mjs regression tests for session FTS schema repair.
*
* Purpose: prove sf-db can reopen a DB that contains orphaned FTS5 shadow
* tables from an interrupted or partial `turns_fts` migration.
*/
import assert from "node:assert/strict";
import { mkdirSync, mkdtempSync, rmSync } from "node:fs";
import { tmpdir } from "node:os";
import { join } from "node:path";
import { DatabaseSync } from "node:sqlite";
import { afterEach, test } from "vitest";
import { closeDatabase, openDatabase } from "../sf-db.js";
const tmpRoots = [];
afterEach(() => {
closeDatabase();
for (const root of tmpRoots.splice(0)) {
rmSync(root, { recursive: true, force: true });
}
});
function makeDbPath() {
const root = mkdtempSync(join(tmpdir(), "sf-session-fts-repair-"));
mkdirSync(join(root, ".sf"), { recursive: true });
tmpRoots.push(root);
return join(root, ".sf", "sf.db");
}
test("openDatabase_when_turns_fts_shadow_tables_are_orphaned_repairs_schema", () => {
const dbPath = makeDbPath();
const raw = new DatabaseSync(dbPath);
raw.exec(`
CREATE TABLE turns_fts_data(id INTEGER PRIMARY KEY, block BLOB);
CREATE TABLE turns_fts_idx(segid INTEGER, term TEXT, pgno INTEGER);
CREATE TABLE turns_fts_docsize(id INTEGER PRIMARY KEY, sz BLOB);
CREATE TABLE turns_fts_config(k PRIMARY KEY, v) WITHOUT ROWID;
`);
raw.close();
assert.equal(openDatabase(dbPath), true);
closeDatabase();
const repaired = new DatabaseSync(dbPath);
const table = repaired
.prepare(
"SELECT name FROM sqlite_master WHERE type='table' AND name='turns_fts'",
)
.get();
assert.equal(table.name, "turns_fts");
repaired.close();
});

View file

@ -7,8 +7,14 @@
* while respecting explicit caller overrides.
*/
import assert from "node:assert/strict";
import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs";
import { tmpdir } from "node:os";
import { join } from "node:path";
import { describe, it, vi } from "vitest";
import { chooseSiftRetrievers } from "../code-intelligence.js";
import {
chooseSiftRetrievers,
resolveSiftSearchScopes,
} from "../code-intelligence.js";
// ── chooseSiftRetrievers unit tests ────────────────────────────────────────
@ -110,3 +116,36 @@ describe("warmup regression guard", () => {
assert.equal(result.reranking, "none");
});
});
describe("resolveSiftSearchScopes", () => {
it("repo_root_dot_expands_to_existing_top_level_shards", () => {
const root = mkdtempSync(join(tmpdir(), "sf-sift-shards-"));
try {
mkdirSync(join(root, "src"));
mkdirSync(join(root, "web"));
mkdirSync(join(root, "docs"));
mkdirSync(join(root, ".sf", "runtime", "sift"), { recursive: true });
writeFileSync(
join(root, ".sf", "runtime", "sift", "requirements-projected.md"),
"# requirements\n",
);
const result = resolveSiftSearchScopes(root, ".");
assert.equal(result.originalScope, ".");
assert.equal(result.sharded, true);
assert.deepEqual(result.scopes, [
"src",
"web",
"docs",
".sf/runtime/sift/requirements-projected.md",
]);
} finally {
rmSync(root, { recursive: true, force: true });
}
});
it("non_root_scope_remains_single_scope", () => {
const result = resolveSiftSearchScopes("/repo", "src/resources");
assert.equal(result.sharded, false);
assert.deepEqual(result.scopes, ["src/resources"]);
});
});

View file

@ -66,8 +66,19 @@ function makeProject({ iterLines = null } = {}) {
// Open DB and seed data
openDatabase(join(dir, ".sf", "sf.db"));
insertMilestone({ id: "M001", title: "Test milestone", status: "active" });
insertSlice({ milestoneId: "M001", id: "S01", title: "Test slice", status: "pending" });
insertTask({ milestoneId: "M001", sliceId: "S01", id: "T02", title: "Test task", status: "complete" });
insertSlice({
milestoneId: "M001",
id: "S01",
title: "Test slice",
status: "pending",
});
insertTask({
milestoneId: "M001",
sliceId: "S01",
id: "T02",
title: "Test task",
status: "complete",
});
return dir;
}
@ -170,7 +181,15 @@ test("updateTaskStatus_complete_forward_transition_never_blocked", () => {
// Seed the task as pending so a "complete" update makes sense
const dir = makeProject({ iterLines });
// updateTaskStatus to "complete" should never be blocked
updateTaskStatus("M001", "S01", "T02", "complete", new Date().toISOString(), undefined, {
basePath: dir,
});
updateTaskStatus(
"M001",
"S01",
"T02",
"complete",
new Date().toISOString(),
undefined,
{
basePath: dir,
},
);
});

View file

@ -20,6 +20,7 @@ import {
resolveSiftBinary,
resolveSiftLogging,
resolveSiftSearchScope,
resolveSiftSearchScopes,
} from "../code-intelligence.js";
import { getErrorMessage } from "../error-utils.js";
import { recordRetrievalEvidence } from "../retrieval-evidence.js";
@ -45,8 +46,13 @@ const DEFAULT_TIMEOUT_MS = 600_000;
/**
* Build the sift CLI argument list from tool parameters.
*/
function buildSiftArgs(params, projectRoot = process.cwd()) {
const scope = resolveSiftSearchScope(projectRoot, params.path);
function buildSiftArgs(
params,
projectRoot = process.cwd(),
scopeOverride = null,
) {
const scope =
scopeOverride ?? resolveSiftSearchScope(projectRoot, params.path);
const args = [
"search",
"--json",
@ -63,7 +69,7 @@ function buildSiftArgs(params, projectRoot = process.cwd()) {
// Explicit overrides always win; for repo-root scope the helper returns
// bm25+phrase (no vector) to avoid the full-workspace embedding hang
// (#vector-hang-fix). For scoped subdirs, vector + reranking are enabled.
const scopedDefaults = chooseSiftRetrievers(params.path ?? ".", projectRoot);
const scopedDefaults = chooseSiftRetrievers(scope, projectRoot);
const effectiveRetrievers = params.retrievers ?? scopedDefaults.retrievers;
const effectiveReranking = params.reranking ?? scopedDefaults.reranking;
args.push("--retrievers", String(effectiveRetrievers));
@ -130,6 +136,26 @@ function parseSiftOutput(rawStdout, rawStderr) {
stderr: rawStderr,
};
}
function mergeSiftResults(results, limit) {
const seen = new Set();
const hits = [];
for (const result of results) {
for (const hit of result.hits) {
const key = `${hit.path}:${hit.lineStart ?? ""}:${hit.lineEnd ?? ""}:${hit.content}`;
if (seen.has(key)) continue;
seen.add(key);
hits.push(hit);
}
}
hits.sort((a, b) => Number(b.score ?? 0) - Number(a.score ?? 0));
return {
hits: hits.slice(0, limit),
stderr: results
.map((result) => result.stderr)
.filter((text) => text && text.trim().length > 0)
.join("\n"),
};
}
/**
* Execute a sift search with the given parameters.
@ -272,7 +298,7 @@ export function registerSiftSearchTool(pi) {
path: Type.Optional(
Type.String({
description:
"Directory or file path to search within. Default: repository root ('.'); absolute paths inside the repo are normalized to repo-relative paths so .siftignore applies.",
"Directory or file path to search within. Default/all-repo expands to bounded top-level shards instead of passing '.' directly to Sift.",
default: ".",
}),
),
@ -355,30 +381,43 @@ export function registerSiftSearchTool(pi) {
}
const projectRoot = process.cwd();
const args = buildSiftArgs(params, projectRoot);
const scope = args.at(-2) ?? ".";
const scopePlan = resolveSiftSearchScopes(projectRoot, params.path);
const timeoutMs = params.timeoutMs ?? DEFAULT_TIMEOUT_MS;
const { env: logEnv, logPath } = resolveSiftLogging(projectRoot);
const startedAt = Date.now();
try {
const { stdout, stderr } = await runSift(
binaryPath,
args,
timeoutMs,
projectRoot,
logEnv,
logPath,
);
const perScopeTimeoutMs = scopePlan.sharded
? Math.max(5_000, Math.floor(timeoutMs / scopePlan.scopes.length))
: timeoutMs;
const scopedResults = [];
for (const scope of scopePlan.scopes) {
const args = buildSiftArgs(params, projectRoot, scope);
const { stdout, stderr } = await runSift(
binaryPath,
args,
perScopeTimeoutMs,
projectRoot,
logEnv,
logPath,
);
const parsed = parseSiftOutput(stdout, stderr);
scopedResults.push({ ...parsed, scope });
}
const elapsedMs = Date.now() - startedAt;
const result = parseSiftOutput(stdout, stderr);
const result = mergeSiftResults(
scopedResults,
params.limit ?? DEFAULT_LIMIT,
);
const runtimeDirs = ensureSiftRuntimeDirs(projectRoot);
await recordRetrievalEvidence(projectRoot, {
backend: "sift",
sourceKind: "code",
query: params.query,
strategy: params.strategy ?? DEFAULT_STRATEGY,
scope,
scope: scopePlan.sharded
? `shards:${scopePlan.scopes.join(",")}`
: scopePlan.scopes[0],
status: "ok",
hitCount: result.hits.length,
elapsedMs,
@ -402,7 +441,7 @@ export function registerSiftSearchTool(pi) {
query: params.query,
strategy: params.strategy ?? DEFAULT_STRATEGY,
agent: params.agent ?? false,
path: scope,
path: scopePlan.sharded ? scopePlan.scopes : scopePlan.scopes[0],
hitCount: result.hits.length,
elapsedMs,
binary: binaryPath,
@ -415,7 +454,7 @@ export function registerSiftSearchTool(pi) {
const lines = [
`Sift search: "${params.query}"`,
`Strategy: ${params.strategy ?? DEFAULT_STRATEGY}${params.agent ? ` | agent: ${params.agentMode ?? "linear"} | planner: ${params.plannerStrategy ?? "heuristic"}` : ""}`,
`Scope: ${scope}`,
`Scope: ${scopePlan.sharded ? `all repo shards (${scopePlan.scopes.join(", ")})` : scopePlan.scopes[0]}`,
`Search cache: ${runtimeDirs.searchCache}`,
`Hits: ${result.hits.length} | Elapsed: ${elapsedMs}ms`,
"",
@ -449,6 +488,8 @@ export function registerSiftSearchTool(pi) {
query: params.query,
strategy: params.strategy ?? DEFAULT_STRATEGY,
agent: params.agent ?? false,
sharded: scopePlan.sharded,
scopes: scopePlan.scopes,
elapsedMs,
hitCount: result.hits.length,
hits: result.hits,
@ -463,7 +504,9 @@ export function registerSiftSearchTool(pi) {
sourceKind: "code",
query: params.query,
strategy: params.strategy ?? DEFAULT_STRATEGY,
scope,
scope: scopePlan.sharded
? `shards:${scopePlan.scopes.join(",")}`
: scopePlan.scopes[0],
status: "error",
hitCount: 0,
elapsedMs,
@ -481,6 +524,8 @@ export function registerSiftSearchTool(pi) {
operation: "sift_search",
query: params.query,
strategy: params.strategy ?? DEFAULT_STRATEGY,
sharded: scopePlan.sharded,
scopes: scopePlan.scopes,
elapsedMs,
error: message,
},