From 87e9729c13417017ce22c1b25f911d7d0e5b3a93 Mon Sep 17 00:00:00 2001 From: Mikael Hugo Date: Sun, 17 May 2026 15:38:55 +0200 Subject: [PATCH] fix: shard sift search and project requirements --- .siftignore | 5 + bin/sf-from-source | 29 +- .../extensions/sf/bootstrap/register-hooks.js | 13 + .../extensions/sf/code-intelligence.js | 72 +++- src/resources/extensions/sf/db-writer.js | 2 + src/resources/extensions/sf/md-importer.js | 2 + .../sf/sf-db/requirement-status-sweeper.js | 398 ++++++++++++++++++ .../sf/sf-db/requirements-sift-projection.js | 150 +++++++ .../extensions/sf/sf-db/sf-db-schema.js | 32 +- .../extensions/sf/sf-db/sf-db-tasks.js | 15 +- src/resources/extensions/sf/subagent/index.js | 282 ++++++++----- .../db-writer-requirements-generator.test.mjs | 27 +- .../tests/requirement-status-sweeper.test.mjs | 345 +++++++++++++++ .../requirements-sift-projection.test.mjs | 205 +++++++++ .../sf/tests/session-fts-repair.test.mjs | 53 +++ .../sf/tests/sift-retriever-scope.test.mjs | 41 +- .../update-task-status-revert-safety.test.mjs | 29 +- .../extensions/sf/tools/sift-search-tool.js | 83 +++- 18 files changed, 1630 insertions(+), 153 deletions(-) create mode 100644 src/resources/extensions/sf/sf-db/requirement-status-sweeper.js create mode 100644 src/resources/extensions/sf/sf-db/requirements-sift-projection.js create mode 100644 src/resources/extensions/sf/tests/requirement-status-sweeper.test.mjs create mode 100644 src/resources/extensions/sf/tests/requirements-sift-projection.test.mjs create mode 100644 src/resources/extensions/sf/tests/session-fts-repair.test.mjs diff --git a/.siftignore b/.siftignore index 2cf4002ba..b8290150d 100644 --- a/.siftignore +++ b/.siftignore @@ -1,5 +1,10 @@ .git/** .sf/** +# Re-include the live requirements corpus projection so agents can semantic-search R-entries. +!.sf/ +!.sf/runtime/ +!.sf/runtime/sift/ +!.sf/runtime/sift/requirements-projected.md .bg-shell/** .pytest_cache/** .venv/** diff --git a/bin/sf-from-source b/bin/sf-from-source index b6d3358c9..a2d561f26 100755 --- a/bin/sf-from-source +++ b/bin/sf-from-source @@ -66,11 +66,34 @@ fi # automatically when this process exits. Skip in known read-only # modes (logs, status, dash, sessions, list, version, help) where # concurrent reads are safe and useful. -case "${1:-}" in - logs|status|dash|sessions|list|--version|-v|--help|-h) - : # read-only — no lock needed +# +# Top-level read-only commands skip the lock. ALSO skip when $1=headless +# AND $2 is a read-only subcommand — otherwise the operator can't even +# check SF's state while autonomous mode is running (regression observed +# 2026-05-17 when `sf headless query` / `feedback list` / --help were +# rejected with "Another sf is already running" despite being pure reads). +case "${1:-} ${2:-}" in + "logs "*|"status "*|"dash "*|"sessions "*|"list "*|"--version "*|"-v "*|"--help "*|"-h "*) + : # top-level read-only — no lock needed + ;; + "headless --help"*|"headless -h"*|"headless --version"*|"headless -v"*|"headless query"*|"headless status"*|"headless usage"*|"headless reflect"*|"headless ") + : # headless read-only subcommand — no lock needed + ;; + "headless feedback"*|"headless triage"*) + # `feedback list` and `triage --list` are read-only; `feedback add/resolve` + # and `triage --run/--apply` are writes. Allow the read-only forms only. + if [[ "$*" == *" list"* || "$*" == *" --list"* || "$*" == *" --json"* ]]; then + : # read-only inspect + else + __SF_NEEDS_LOCK=1 + fi ;; *) + __SF_NEEDS_LOCK=1 + ;; +esac +case "${__SF_NEEDS_LOCK:-}" in + 1) if [[ -z "${SF_SKIP_LOCK:-}" ]]; then SF_PROJECT_LOCK_DIR="$(pwd)/.sf" mkdir -p "$SF_PROJECT_LOCK_DIR" 2>/dev/null || true diff --git a/src/resources/extensions/sf/bootstrap/register-hooks.js b/src/resources/extensions/sf/bootstrap/register-hooks.js index f31f8a23a..fce37de70 100644 --- a/src/resources/extensions/sf/bootstrap/register-hooks.js +++ b/src/resources/extensions/sf/bootstrap/register-hooks.js @@ -538,6 +538,19 @@ export function registerHooks(pi, ecosystemHandlers = []) { } catch { /* non-fatal — codex catalog refresh must never block session start */ } + // Refresh the requirements corpus projection before Sift warms up so + // the live R-entries are in the index from the first search. The + // projection writes .sf/runtime/sift/requirements-projected.md from + // the DB; .siftignore has a negation for that path so root-scope + // warmup picks it up automatically. + try { + const { buildRequirementsProjection } = await import( + "../sf-db/requirements-sift-projection.js" + ); + buildRequirementsProjection(process.cwd()); + } catch { + /* non-fatal — requirements projection must never block session start */ + } // Pre-warm the Sift search index so the first agent query in this // session doesn't pay the cold-build cost. ensureSiftIndexWarmup // (code-intelligence.js) already runs the full hybrid+vector+ diff --git a/src/resources/extensions/sf/code-intelligence.js b/src/resources/extensions/sf/code-intelligence.js index a652d91a9..65e4bbfbb 100644 --- a/src/resources/extensions/sf/code-intelligence.js +++ b/src/resources/extensions/sf/code-intelligence.js @@ -15,6 +15,67 @@ import { writeFileSync, } from "node:fs"; import { delimiter, isAbsolute, join, relative, resolve } from "node:path"; +import { getErrorMessage } from "./error-utils.js"; + +/** + * Lists bounded Sift scopes that approximate a whole-repo search. + * + * Purpose: keep root-level "search all" responsive by fanning out across + * top-level project areas, including the generated requirements projection. + * + * Consumer: `resolveSiftSearchScopes` for `sift_search` and `codebase_search`. + */ +export const SIFT_ALL_REPO_SHARDS = [ + "src", + "packages", + "web", + "docs", + "scripts", + "rust-engine", + "tests", + ".github", + "docker", + ".sf/runtime/sift/requirements-projected.md", +]; + +export function isSiftRepoRootScope(scopePath, projectRoot) { + const normalizedRoot = resolve(projectRoot); + const requested = + typeof scopePath === "string" && scopePath.trim() ? scopePath.trim() : "."; + const absolute = isAbsolute(requested) + ? resolve(requested) + : resolve(normalizedRoot, requested); + return absolute === normalizedRoot; +} + +/** + * Resolve a requested Sift scope into one or more executable scopes. + * + * Purpose: avoid passing repo root (`.`) directly to Sift in normal agent + * workflows. Root searches are measurably pathological in this repo, while + * top-level shard searches remain bounded and responsive. + * + * Consumer: `sift_search`, `codebase_search`, and future Sift warmup/search + * wrappers that need whole-repo coverage without root-scope hangs. + */ +export function resolveSiftSearchScopes(projectRoot, scope) { + const resolved = resolveSiftSearchScope(projectRoot, scope); + if (!isSiftRepoRootScope(resolved, projectRoot)) { + return { + originalScope: resolved, + scopes: [resolved], + sharded: false, + }; + } + const scopes = SIFT_ALL_REPO_SHARDS.filter((shard) => + existsSync(resolve(projectRoot, shard)), + ); + return { + originalScope: resolved, + scopes: scopes.length > 0 ? scopes : [resolved], + sharded: scopes.length > 0, + }; +} /** * Choose sift retrievers based on scope size. @@ -34,21 +95,12 @@ import { delimiter, isAbsolute, join, relative, resolve } from "node:path"; * For scoped paths: { retrievers: "bm25,phrase,vector", reranking: "position-aware" } */ export function chooseSiftRetrievers(scopePath, projectRoot) { - const normalizedRoot = resolve(projectRoot); - const requested = - typeof scopePath === "string" && scopePath.trim() ? scopePath.trim() : "."; - const absolute = isAbsolute(requested) - ? resolve(requested) - : resolve(normalizedRoot, requested); - const isRepoRoot = absolute === normalizedRoot; - if (isRepoRoot) { + if (isSiftRepoRootScope(scopePath, projectRoot)) { return { retrievers: "bm25,phrase", reranking: "none" }; } return { retrievers: "bm25,phrase,vector", reranking: "position-aware" }; } -import { getErrorMessage } from "./error-utils.js"; - const SIFT_BINARY_NAME = process.platform === "win32" ? "sift.exe" : "sift"; const DEFAULT_SIFT_WARMUP_TTL_MS = 6 * 60 * 60 * 1000; const DEFAULT_SIFT_WARMUP_QUERY = diff --git a/src/resources/extensions/sf/db-writer.js b/src/resources/extensions/sf/db-writer.js index 281db1939..16dd3b017 100644 --- a/src/resources/extensions/sf/db-writer.js +++ b/src/resources/extensions/sf/db-writer.js @@ -110,8 +110,10 @@ export function generateDecisionsMd(decisions) { /** Status values that map to specific sections, in display order. */ const STATUS_SECTION_MAP = [ { status: "active", heading: "Active" }, + { status: "resolved", heading: "Resolved" }, { status: "validated", heading: "Validated" }, { status: "deferred", heading: "Deferred" }, + { status: "superseded", heading: "Superseded" }, { status: "cancelled", heading: "Cancelled" }, { status: "out-of-scope", heading: "Out of Scope" }, ]; diff --git a/src/resources/extensions/sf/md-importer.js b/src/resources/extensions/sf/md-importer.js index a9eb813a6..a353be2d5 100644 --- a/src/resources/extensions/sf/md-importer.js +++ b/src/resources/extensions/sf/md-importer.js @@ -115,8 +115,10 @@ export function parseDecisionsTable(content) { // ─── REQUIREMENTS.md Parser ──────────────────────────────────────────────── const STATUS_SECTIONS = { "## active": "active", + "## resolved": "resolved", "## validated": "validated", "## deferred": "deferred", + "## superseded": "superseded", "## cancelled": "cancelled", "## out of scope": "out-of-scope", }; diff --git a/src/resources/extensions/sf/sf-db/requirement-status-sweeper.js b/src/resources/extensions/sf/sf-db/requirement-status-sweeper.js new file mode 100644 index 000000000..d552d61fc --- /dev/null +++ b/src/resources/extensions/sf/sf-db/requirement-status-sweeper.js @@ -0,0 +1,398 @@ +/** + * requirement-status-sweeper.js — auto-transition R-entries based on DB state. + * + * Purpose: walk requirements rows with status='active' and transition each + * to 'validated' / 'resolved' / 'superseded' when the supporting evidence + * indicates the work is done. + * + * Three rules: + * 1. Operational R with `Source IDs: sf-...` in description → if ALL source + * self_feedback rows have resolved_at != NULL → set status='resolved' + * and resolved_reason='evidence-backlog-drained-by-sweeper-'. + * 2. Capability/architecture/functional R with non-empty supporting_slices + * JSON array → if ALL referenced slice rows have status='complete' → set + * status='validated'. + * 3. R with supersedes_by pointing to a row where that row's status ∈ + * {'resolved','validated','superseded','cancelled'} → set this R's + * status='superseded'. + * + * Logs each transition as self-feedback (kind: 'r-entry-auto-transition', + * severity: 'low') with {fromStatus, toStatus, rule, reason}. + * + * Consumer: UokGate (type=verification, id=requirement-status-sweeper). NOT + * registered here — registration is a follow-up slice once Layer A lands + * its parallel edit of gate-registry-bootstrap.js. + */ + +import { _getAdapter } from "./sf-db-core.js"; +import { insertSelfFeedbackEntry } from "./sf-db-self-feedback.js"; + +// ─── Constants ──────────────────────────────────────────────────────────────── + +const ALL_RULES = [ + "operational-evidence-drained", + "supporting-slices-complete", + "supersedes-resolved", +]; + +const SUPERSEDED_TERMINAL_STATUSES = new Set([ + "resolved", + "validated", + "superseded", + "cancelled", +]); + +// ─── Helpers ────────────────────────────────────────────────────────────────── + +/** + * Parse `Source IDs: sf-xxx, sf-yyy` from a description string. + * Returns an array of IDs, or [] if the marker is absent. + */ +function parseSourceIds(description) { + if (typeof description !== "string") return []; + const marker = "Source IDs:"; + const idx = description.indexOf(marker); + if (idx === -1) return []; + // Take everything after the marker up to a blank line or end of string. + const afterMarker = description.slice(idx + marker.length); + const paragraphEnd = afterMarker.search(/\n\s*\n/); + const raw = + paragraphEnd === -1 ? afterMarker : afterMarker.slice(0, paragraphEnd); + return raw + .split(",") + .map((s) => s.trim()) + .filter(Boolean); +} + +/** + * Parse supporting_slices JSON field → array of "milestoneId/sliceId" strings. + * Returns [] for any parse failure or empty array. + */ +function parseSupportingSlices(raw) { + if (typeof raw !== "string" || raw.trim() === "" || raw.trim() === "[]") + return []; + try { + const parsed = JSON.parse(raw); + if (!Array.isArray(parsed)) return []; + return parsed.filter((s) => typeof s === "string" && s.length > 0); + } catch { + return []; + } +} + +/** + * Record a self-feedback entry for an auto-transition (non-fatal). + */ +function logTransition(req, toStatus, rule, reason) { + try { + const ts = new Date().toISOString(); + const id = `sf-sweeper-${req.id}-${rule}-${Date.now()}`; + insertSelfFeedbackEntry({ + id, + ts, + kind: "r-entry-auto-transition", + severity: "low", + blocking: false, + repoIdentity: "forge", + sfVersion: "", + basePath: "", + summary: `R-entry ${req.id} auto-transitioned ${req.status} → ${toStatus} via rule ${rule}`, + evidence: JSON.stringify({ + fromStatus: req.status, + toStatus, + rule, + reason, + }), + suggestedFix: "", + full_json: JSON.stringify({ + id, + ts, + kind: "r-entry-auto-transition", + severity: "low", + blocking: false, + repoIdentity: "forge", + sfVersion: "", + basePath: "", + summary: `R-entry ${req.id} auto-transitioned ${req.status} → ${toStatus} via rule ${rule}`, + evidence: JSON.stringify({ + fromStatus: req.status, + toStatus, + rule, + reason, + }), + suggestedFix: "", + fromStatus: req.status, + toStatus, + rule, + reason, + }), + }); + } catch { + // non-fatal + } +} + +/** + * Record a dangling-chain warning as self-feedback (non-fatal). + */ +function logDanglingChain(reqId, supersededById) { + try { + const ts = new Date().toISOString(); + const id = `sf-sweeper-dangling-${reqId}-${Date.now()}`; + insertSelfFeedbackEntry({ + id, + ts, + kind: "r-entry-dangling-supersedes", + severity: "low", + blocking: false, + repoIdentity: "forge", + sfVersion: "", + basePath: "", + summary: `R-entry ${reqId} references supersedes_by=${supersededById} which does not exist`, + evidence: JSON.stringify({ reqId, supersededById }), + suggestedFix: "Review and correct the supersedes_by reference", + full_json: JSON.stringify({ + id, + ts, + kind: "r-entry-dangling-supersedes", + severity: "low", + blocking: false, + repoIdentity: "forge", + sfVersion: "", + basePath: "", + summary: `R-entry ${reqId} references supersedes_by=${supersededById} which does not exist`, + evidence: JSON.stringify({ reqId, supersededById }), + suggestedFix: "Review and correct the supersedes_by reference", + }), + }); + } catch { + // non-fatal + } +} + +// ─── Rule implementations ───────────────────────────────────────────────────── + +/** + * Rule 1: operational-evidence-drained + * Operational R entries carry "Source IDs: sf-..." in their description. + * If all listed self_feedback rows are resolved, the R transitions to 'resolved'. + */ +function applyOperationalEvidenceDrained(db, req) { + if (!req.description) return null; + const sourceIds = parseSourceIds(req.description); + if (sourceIds.length === 0) return null; + + // Check each source ID — all must be resolved (resolved_at NOT NULL) + for (const sfId of sourceIds) { + const row = db + .prepare("SELECT resolved_at FROM self_feedback WHERE id = ?") + .get(sfId); + // If not found or not resolved, rule doesn't fire + if (!row || !row["resolved_at"]) return null; + } + + const date = new Date().toISOString().slice(0, 10); + return { + toStatus: "resolved", + rule: "operational-evidence-drained", + reason: `evidence-backlog-drained-by-sweeper-${date}`, + updates: { + status: "resolved", + notes: `evidence-backlog-drained-by-sweeper-${date}`, + }, + }; +} + +/** + * Rule 2: supporting-slices-complete + * Capability/architecture/functional R entries with non-empty supporting_slices. + * If all referenced slices are complete, the R transitions to 'validated'. + */ +function applySupportingSlicesComplete(db, req) { + const sliceRefs = parseSupportingSlices(req.supporting_slices); + if (sliceRefs.length === 0) return null; + + for (const ref of sliceRefs) { + // Expected format: "milestoneId/sliceId" + const slashIdx = ref.indexOf("/"); + if (slashIdx === -1) return null; // malformed ref, skip rule + + const milestoneId = ref.slice(0, slashIdx); + const sliceId = ref.slice(slashIdx + 1); + + const row = db + .prepare("SELECT status FROM slices WHERE milestone_id = ? AND id = ?") + .get(milestoneId, sliceId); + + if (!row || row["status"] !== "complete") return null; + } + + return { + toStatus: "validated", + rule: "supporting-slices-complete", + reason: `all ${sliceRefs.length} supporting slice(s) complete`, + updates: { status: "validated" }, + }; +} + +/** + * Rule 3: supersedes-resolved + * If this R has a supersedes_by pointing to a terminal R, mark this one superseded. + * Logs a dangling-chain warning if the target doesn't exist. + */ +function applySupersedes(db, req) { + const supersededBy = req.superseded_by; + if (!supersededBy) return null; + + const targetRow = db + .prepare("SELECT id, status FROM requirements WHERE id = ?") + .get(supersededBy); + + if (!targetRow) { + // Dangling chain — log as self-feedback but don't transition + logDanglingChain(req.id, supersededBy); + return null; + } + + if (!SUPERSEDED_TERMINAL_STATUSES.has(targetRow["status"])) return null; + + return { + toStatus: "superseded", + rule: "supersedes-resolved", + reason: `superseded by ${supersededBy} (status=${targetRow["status"]})`, + updates: { status: "superseded" }, + }; +} + +// ─── Main export ────────────────────────────────────────────────────────────── + +/** + * Walk all active requirements and apply transition rules. + * + * @param {string} _basePath - base path of the project (unused directly; + * DB adapter is process-global, kept for API symmetry with other sweepers). + * @param {object} [options] + * @param {boolean} [options.dryRun=false] - if true, returns transitions but + * does not write to DB. + * @param {string[]} [options.includeRules] - subset of rules to apply; defaults + * to all three. + * @returns {{ transitioned: Array<{id,fromStatus,toStatus,rule}>, totalChecked: number, durationMs: number }} + */ +export async function sweepRequirementStatuses(_basePath, options = {}) { + const startMs = Date.now(); + const dryRun = options.dryRun === true; + const includeRules = Array.isArray(options.includeRules) + ? options.includeRules + : ALL_RULES; + + const db = _getAdapter(); + if (!db) { + return { transitioned: [], totalChecked: 0, durationMs: 0 }; + } + + // Fetch all active requirements (status='active', superseded_by IS NULL via + // the active_requirements view). + const rows = db + .prepare("SELECT * FROM requirements WHERE status = 'active'") + .all(); + + const transitioned = []; + + for (const rawRow of rows) { + const req = { + id: rawRow["id"], + class: rawRow["class"], + status: rawRow["status"], + description: rawRow["description"], + why: rawRow["why"], + source: rawRow["source"], + primary_owner: rawRow["primary_owner"], + supporting_slices: rawRow["supporting_slices"], + validation: rawRow["validation"], + notes: rawRow["notes"], + full_content: rawRow["full_content"], + superseded_by: rawRow["superseded_by"] ?? null, + }; + + let transition = null; + + // Apply rules in priority order; first match wins. + if (!transition && includeRules.includes("operational-evidence-drained")) { + transition = applyOperationalEvidenceDrained(db, req); + } + if (!transition && includeRules.includes("supporting-slices-complete")) { + transition = applySupportingSlicesComplete(db, req); + } + if (!transition && includeRules.includes("supersedes-resolved")) { + transition = applySupersedes(db, req); + } + + if (!transition) continue; + + const entry = { + id: req.id, + fromStatus: req.status, + toStatus: transition.toStatus, + rule: transition.rule, + }; + transitioned.push(entry); + + if (!dryRun) { + // Apply the status update; preserve all other columns. + db.prepare(`UPDATE requirements SET status = :status WHERE id = :id`).run( + { ":status": transition.toStatus, ":id": req.id }, + ); + + // For rule 1: write reason into notes field as well. + if (transition.rule === "operational-evidence-drained") { + const existingNotes = req.notes ?? ""; + const sep = existingNotes.trim().length > 0 ? "\n" : ""; + db.prepare(`UPDATE requirements SET notes = :notes WHERE id = :id`).run( + { + ":notes": `${existingNotes}${sep}${transition.reason}`, + ":id": req.id, + }, + ); + } + + logTransition( + req, + transition.toStatus, + transition.rule, + transition.reason, + ); + } + } + + return { + transitioned, + totalChecked: rows.length, + durationMs: Date.now() - startMs, + }; +} + +// ─── UokGate wrapper ────────────────────────────────────────────────────────── + +/** + * UokGate descriptor for the requirement status sweeper. + * Registration is deferred — see module-level comment. + */ +export const requirementStatusSweeperGate = { + id: "requirement-status-sweeper", + type: "verification", + async execute(ctx) { + const result = await sweepRequirementStatuses(ctx.basePath, ctx.options); + return result.transitioned.length === 0 + ? { + outcome: "pass", + failureClass: null, + rationale: `requirement sweep: 0 of ${result.totalChecked} needed transition`, + } + : { + outcome: "manual-attention", + failureClass: "verification", + rationale: `requirement sweep: ${result.transitioned.length} of ${result.totalChecked} transitioned`, + findings: result.transitioned, + }; + }, +}; diff --git a/src/resources/extensions/sf/sf-db/requirements-sift-projection.js b/src/resources/extensions/sf/sf-db/requirements-sift-projection.js new file mode 100644 index 000000000..cbd63c3c7 --- /dev/null +++ b/src/resources/extensions/sf/sf-db/requirements-sift-projection.js @@ -0,0 +1,150 @@ +/** + * requirements-sift-projection.js — emit a fresh sift-friendly markdown of all R-entries. + * + * Purpose: write .sf/runtime/sift/requirements-projected.md from the DB so + * sift's index sees the live state of the requirements corpus, not the + * (possibly stale) checked-in .sf/REQUIREMENTS.md. + * + * Consumer: sift warmup pipeline (bootstrap/register-hooks.js, auto/loop.js). + * + * Failure boundary: read-only on DB; write-only on .sf/runtime/sift/. + */ +import { mkdirSync, writeFileSync } from "node:fs"; +import { join } from "node:path"; +import { _getAdapter } from "./sf-db-core.js"; + +/** Canonical output path for the requirements corpus projection. */ +export function getRequirementsProjectionPath(basePath) { + return join(basePath, ".sf", "runtime", "sift", "requirements-projected.md"); +} + +/** + * Escape a text value for inline markdown use. + * Collapses interior newlines into spaces so each field renders on one line + * when used in bullet form. Backtick sequences are escaped to avoid + * unintended code-span boundaries. + */ +function mdInline(value) { + if (value == null) return ""; + return String(value) + .replace(/`/g, "\\`") + .replace(/\r?\n|\r/g, " ") + .trim(); +} + +/** + * Render one R-entry as a markdown section for sift indexing. + * + * Uses a flat-heading + bullet-list structure so every field is + * retrievable by keyword AND the section heading is the canonical ID. + */ +function renderRow(row) { + const lines = []; + const id = String(row["id"] ?? ""); + const descFirst = + String(row["description"] ?? "") + .split(/\r?\n/) + .find((l) => l.trim().length > 0) ?? "Untitled"; + lines.push(`## ${id} — ${mdInline(descFirst)}`); + lines.push( + `Status: ${mdInline(row["status"])} Class: ${mdInline(row["class"])} Owner: ${mdInline(row["primary_owner"])}`, + ); + lines.push(""); + if (row["description"]) { + lines.push(mdInline(row["description"])); + lines.push(""); + } + if (row["why"]) { + lines.push(`**Why:** ${mdInline(row["why"])}`); + lines.push(""); + } + const notes = String(row["notes"] ?? "").trim(); + if (notes.length > 0) { + lines.push(`**Notes:** ${mdInline(notes)}`); + lines.push(""); + } + return lines.join("\n"); +} + +/** + * Status ordering for grouped output — matches the canonical STATUS_SECTION_MAP + * used by db-writer.js so sections appear in a consistent order. + */ +const STATUS_ORDER = [ + "active", + "resolved", + "validated", + "deferred", + "superseded", + "cancelled", + "out-of-scope", +]; + +function statusRank(status) { + const idx = STATUS_ORDER.indexOf(String(status ?? "").toLowerCase()); + return idx === -1 ? STATUS_ORDER.length : idx; +} + +function requirementNumber(id) { + const match = String(id ?? "").match(/^R(\d+)$/); + return match ? Number(match[1]) : Number.MAX_SAFE_INTEGER; +} + +/** + * Build a fresh markdown projection from the `requirements` DB table and + * write it to `.sf/runtime/sift/requirements-projected.md`. + * + * Returns `{written: true, rows: N, bytes: M, projectionPath}` on success. + * Returns `{written: false, reason, rows: 0}` when the DB adapter is unavailable. + * Never throws — failure is non-fatal to the warmup pipeline. + */ +export async function buildRequirementsProjection(basePath) { + const projectionPath = getRequirementsProjectionPath(basePath); + try { + const db = _getAdapter(); + if (!db) { + return { written: false, reason: "sf-db adapter unavailable", rows: 0 }; + } + // Read all non-superseded requirements ordered by status then R-number. + const rows = db + .prepare( + "SELECT id, class, status, description, why, notes, primary_owner FROM requirements WHERE superseded_by IS NULL", + ) + .all(); + + // Sort: status group first, then numeric R-id within group. + rows.sort((a, b) => { + const sr = statusRank(a["status"]) - statusRank(b["status"]); + if (sr !== 0) return sr; + return requirementNumber(a["id"]) - requirementNumber(b["id"]); + }); + + const headerLines = [ + "# SF Requirements Corpus — sift projection", + "", + "This file is generated from the `requirements` DB table at warmup time.", + "Do not edit manually — it is overwritten on each SF session start.", + "", + ]; + + const sections = rows.map((row) => renderRow(row)); + const content = [...headerLines, ...sections].join("\n") + "\n"; + + mkdirSync(join(basePath, ".sf", "runtime", "sift"), { recursive: true }); + writeFileSync(projectionPath, content, "utf-8"); + + return { + written: true, + rows: rows.length, + bytes: Buffer.byteLength(content, "utf-8"), + projectionPath, + }; + } catch (err) { + return { + written: false, + reason: String(err?.message ?? err), + rows: 0, + projectionPath, + }; + } +} diff --git a/src/resources/extensions/sf/sf-db/sf-db-schema.js b/src/resources/extensions/sf/sf-db/sf-db-schema.js index da88428ba..e333179de 100644 --- a/src/resources/extensions/sf/sf-db/sf-db-schema.js +++ b/src/resources/extensions/sf/sf-db/sf-db-schema.js @@ -218,6 +218,7 @@ function ensureSessionTables(db) { UNIQUE(session_id, ref_type, ref_value) ) `); + repairOrphanedTurnsFtsTables(db); // FTS5 external-content table over turns for keyword recall. // content_rowid links to turns.id; triggers below keep it in sync. db.exec(` @@ -268,6 +269,30 @@ function ensureSessionTables(db) { "CREATE INDEX IF NOT EXISTS idx_session_refs_session ON session_refs(session_id, created_at DESC)", ); } + +function repairOrphanedTurnsFtsTables(db) { + const ftsTable = db + .prepare( + "SELECT name FROM sqlite_master WHERE type='table' AND name='turns_fts'", + ) + .get(); + if (ftsTable) return; + const orphan = db + .prepare( + "SELECT name FROM sqlite_master WHERE name IN ('turns_fts_data', 'turns_fts_idx', 'turns_fts_docsize', 'turns_fts_config', 'turns_fts_content', 'turns_fts_insert', 'turns_fts_update', 'turns_fts_delete') LIMIT 1", + ) + .get(); + if (!orphan) return; + db.exec("DROP TRIGGER IF EXISTS turns_fts_insert"); + db.exec("DROP TRIGGER IF EXISTS turns_fts_update"); + db.exec("DROP TRIGGER IF EXISTS turns_fts_delete"); + db.exec("DROP TABLE IF EXISTS turns_fts_data"); + db.exec("DROP TABLE IF EXISTS turns_fts_idx"); + db.exec("DROP TABLE IF EXISTS turns_fts_docsize"); + db.exec("DROP TABLE IF EXISTS turns_fts_config"); + db.exec("DROP TABLE IF EXISTS turns_fts_content"); +} + function ensureSessionSnapshotTable(db) { db.exec(` CREATE TABLE IF NOT EXISTS session_snapshots ( @@ -764,7 +789,12 @@ export function initSchema(db, fileBacked, options = {}) { // empty WAL and cannot be recovered. Explicit checkpoints are issued at // safe loop boundaries instead (post-unit finalize, close). if (fileBacked) db.exec("PRAGMA wal_autocheckpoint=0"); - if (fileBacked) db.exec("PRAGMA auto_vacuum = INCREMENTAL"); + // auto_vacuum=NONE removed the ptrmap pages that caused the 2026-05-17 + // "Bad ptr map entry" corruption when multiple node:sqlite writers + // concurrently mutated the requirements btree (rebuilt DB has none). + // The incremental_vacuum() calls in sf-db-core.js become no-ops, which + // is fine — the DB is small and disk pressure is handled separately. + if (fileBacked) db.exec("PRAGMA auto_vacuum = NONE"); if (fileBacked) db.exec("PRAGMA cache_size = -8000"); // 8 MB page cache if (fileBacked && process.platform !== "darwin") db.exec("PRAGMA mmap_size = 67108864"); // 64 MB mmap diff --git a/src/resources/extensions/sf/sf-db/sf-db-tasks.js b/src/resources/extensions/sf/sf-db/sf-db-tasks.js index 6a0120b4e..0ed3e5008 100644 --- a/src/resources/extensions/sf/sf-db/sf-db-tasks.js +++ b/src/resources/extensions/sf/sf-db/sf-db-tasks.js @@ -68,7 +68,13 @@ function _findLatestIterOutcomeForUnit(basePath, milestoneId, sliceId, taskId) { * Append a revert-blocked self-feedback entry directly to * `/.sf/self-feedback.jsonl` (best-effort, never throws). */ -function _appendRevertBlockedFeedback(basePath, unitId, iterCompleteTs, attemptedRevert, stackFrame) { +function _appendRevertBlockedFeedback( + basePath, + unitId, + iterCompleteTs, + attemptedRevert, + stackFrame, +) { try { const ts = Date.now().toString(36); const rnd = Math.random().toString(36).slice(2, 8); @@ -246,7 +252,8 @@ export function updateTaskStatus( : null; // Layer B: block reverts when iterations.jsonl recently recorded outcome=complete // for this (milestone, slice, task) triple (R072 + T02-clobber fix). - const isRevert = status !== "complete" && status !== "done" && status !== "in_progress"; + const isRevert = + status !== "complete" && status !== "done" && status !== "in_progress"; if (isRevert) { const windowMs = process.env.SF_REVERT_BLOCK_WINDOW_MS !== undefined @@ -261,7 +268,9 @@ export function updateTaskStatus( ); if (iterResult && iterResult.outcome === "complete") { const iterCompleteTs = iterResult.ts; - const iterCompleteMs = iterCompleteTs ? new Date(iterCompleteTs).getTime() : 0; + const iterCompleteMs = iterCompleteTs + ? new Date(iterCompleteTs).getTime() + : 0; const ageMs = Date.now() - iterCompleteMs; if (ageMs <= windowMs) { const unitId = `${milestoneId}/${sliceId}/${taskId}`; diff --git a/src/resources/extensions/sf/subagent/index.js b/src/resources/extensions/sf/subagent/index.js index dcddb6e03..c6d4795d4 100644 --- a/src/resources/extensions/sf/subagent/index.js +++ b/src/resources/extensions/sf/subagent/index.js @@ -32,6 +32,7 @@ import { resolveSiftBinary, resolveSiftLogging, resolveSiftSearchScope, + resolveSiftSearchScopes, } from "../code-intelligence.js"; import { emitJournalEvent } from "../journal.js"; import { loadEffectiveSFPreferences } from "../preferences.js"; @@ -212,7 +213,7 @@ function buildCodebaseSearchArgs(strategy, query, scope, projectRoot) { // scoped subdirs get vector+reranking for semantic signal. Timeouts are // sized to accommodate cold-cache embedding builds. const { retrievers, reranking } = chooseSiftRetrievers(scope, projectRoot); - return [ + const args = [ "search", "--strategy", strategy, @@ -2641,15 +2642,15 @@ export default function (pi) { label: "Code Search", description: [ "Perform Sift-backed hybrid (BM25 + phrase) retrieval over a scoped codebase path.", - " Use this for conceptual, behavioral, or cross-cutting questions only after choosing a narrow scope", + " Use this for conceptual, behavioral, or cross-cutting questions after choosing a scope", " (e.g. 'how is X handled?', 'where is the logic for Y?', 'find examples of Z').", - " If Sift status is degraded or the scope is broad, prefer grep/find/ls and retry with a narrower scope.", + " Repository-root scope fans out across bounded top-level shards instead of calling Sift on `.` directly.", ].join(""), promptGuidelines: [ - "Use grep/find/ls for broad orientation first, then codebase_search with a specific scope for conceptual patterns.", + "Use grep/find/ls for broad orientation first, then codebase_search with a specific scope for conceptual patterns; root/all repo is sharded.", " page-index-hybrid (default): Use for 'How' and 'Why' questions (logic, implementation, reasoning).", " path-hybrid: Use for 'Where' questions (architecture, directory structure, file location).", - " Keep scope narrow enough to avoid root-level Sift timeouts; each repo uses its own SIFT_SEARCH_CACHE under .sf/runtime/sift/.", + " Each repo uses its own SIFT_SEARCH_CACHE under .sf/runtime/sift/.", " Be descriptive in your query: include function names, types, or intent (e.g. 'auth middleware validation').", " This tool is read-only and optimized for evidence gathering before you plan or edit.", ], @@ -2711,7 +2712,10 @@ export default function (pi) { }, async execute(_toolCallId, params, signal) { const projectRoot = process.cwd(); - const scope = resolveSiftSearchScope(projectRoot, params.scope); + const scopePlan = resolveSiftSearchScopes(projectRoot, params.scope); + const scope = scopePlan.sharded + ? `shards:${scopePlan.scopes.join(",")}` + : scopePlan.scopes[0]; const strategy = params.strategy ?? "page-index-hybrid"; const query = params.query; const startedAt = Date.now(); @@ -2750,11 +2754,6 @@ export default function (pi) { }, }; } - const args = buildCodebaseSearchArgs(strategy, query, scope, projectRoot); - const stderr = []; - const stdout = []; - let wasAborted = false; - let timedOut = false; const runtimeDirs = ensureSiftRuntimeDirs(projectRoot); const { env: logEnv, logPath } = resolveSiftLogging(projectRoot); if (logPath) { @@ -2764,121 +2763,148 @@ export default function (pi) { fs.writeFileSync(logPath, "", "utf-8"); } - // If vector retrievers are in play, start a progress poller that - // writes index-build state into the log file every 30 seconds. - const usesVector = args.includes("vector"); - let progressTimer = null; - let lastSectorCount = 0; - if (usesVector && logPath) { - lastSectorCount = countVectorSectors(); - progressTimer = setInterval(() => { - const { sectorCount, cacheSizeMb } = countVectorSectors(); - const delta = sectorCount - lastSectorCount; - lastSectorCount = sectorCount; - const line = `[${new Date().toISOString()}] vector-index progress: ${sectorCount} sectors (${cacheSizeMb} MB total)${delta > 0 ? ` (+${delta} since last check)` : ""}\n`; - try { - fs.appendFileSync(logPath, line); - } catch { - // ignore - } - }, 30_000); - } + const runScope = async (scopeToRun, scopedTimeoutMs) => { + const args = buildCodebaseSearchArgs( + strategy, + query, + scopeToRun, + projectRoot, + ); + const stderr = []; + const stdout = []; + let wasAborted = false; + let timedOut = false; - const childEnv = { ...buildSiftEnv(projectRoot, process.env), ...logEnv }; - const proc = spawn(siftBin, args, { - cwd: projectRoot, - env: childEnv, - shell: false, - stdio: ["ignore", "pipe", "pipe"], - }); - liveSubagentProcesses.add(proc); - // Collect output; also tee stderr to the log file for operator diagnostics - proc.stdout.on("data", (chunk) => stdout.push(chunk.toString())); - proc.stderr.on("data", (chunk) => { - stderr.push(chunk.toString()); - if (logPath) { - try { - fs.appendFileSync(logPath, chunk); - } catch { - // log write failure must not affect search result - } - } - }); - // Handle abort signal - const killProc = () => { - wasAborted = true; - try { - proc.kill("SIGTERM"); - } catch { - // ignore - } - setTimeout(() => { - if (proc.exitCode === null) { + // If vector retrievers are in play, start a progress poller that + // writes index-build state into the log file every 30 seconds. + const usesVector = args.includes("vector"); + let progressTimer = null; + let lastSectorCount = 0; + if (usesVector && logPath) { + lastSectorCount = countVectorSectors(); + progressTimer = setInterval(() => { + const { sectorCount, cacheSizeMb } = countVectorSectors(); + const delta = sectorCount - lastSectorCount; + lastSectorCount = sectorCount; + const line = `[${new Date().toISOString()}] vector-index progress: ${sectorCount} sectors (${cacheSizeMb} MB total)${delta > 0 ? ` (+${delta} since last check)` : ""}\n`; try { - proc.kill("SIGKILL"); + fs.appendFileSync(logPath, line); } catch { // ignore } + }, 30_000); + } + + const childEnv = { + ...buildSiftEnv(projectRoot, process.env), + ...logEnv, + }; + const proc = spawn(siftBin, args, { + cwd: projectRoot, + env: childEnv, + shell: false, + stdio: ["ignore", "pipe", "pipe"], + }); + liveSubagentProcesses.add(proc); + proc.stdout.on("data", (chunk) => stdout.push(chunk.toString())); + proc.stderr.on("data", (chunk) => { + stderr.push(chunk.toString()); + if (logPath) { + try { + fs.appendFileSync(logPath, chunk); + } catch { + // log write failure must not affect search result + } } - }, 5000).unref?.(); + }); + const killProc = () => { + wasAborted = true; + try { + proc.kill("SIGTERM"); + } catch { + // ignore + } + setTimeout(() => { + if (proc.exitCode === null) { + try { + proc.kill("SIGKILL"); + } catch { + // ignore + } + } + }, 5000).unref?.(); + }; + const timeout = setTimeout(() => { + timedOut = true; + killProc(); + }, scopedTimeoutMs); + timeout.unref?.(); + if (signal) { + if (signal.aborted) killProc(); + else signal.addEventListener("abort", killProc, { once: true }); + } + const exitCode = await new Promise((resolve) => { + proc.on("close", (code) => { + clearTimeout(timeout); + if (progressTimer) clearInterval(progressTimer); + liveSubagentProcesses.delete(proc); + if (signal) signal.removeEventListener("abort", killProc); + resolve(code ?? 0); + }); + proc.on("error", () => { + clearTimeout(timeout); + if (progressTimer) clearInterval(progressTimer); + liveSubagentProcesses.delete(proc); + if (signal) signal.removeEventListener("abort", killProc); + resolve(1); + }); + }); + return { + scope: scopeToRun, + exitCode, + out: stdout.join(""), + err: stderr.join("").trim(), + timedOut, + wasAborted, + timeoutMs: scopedTimeoutMs, + }; }; - const timeout = setTimeout(() => { - timedOut = true; - killProc(); - }, timeoutMs); - timeout.unref?.(); - if (signal) { - if (signal.aborted) killProc(); - else signal.addEventListener("abort", killProc, { once: true }); + + const perScopeTimeoutMs = scopePlan.sharded + ? Math.max(5_000, Math.floor(timeoutMs / scopePlan.scopes.length)) + : timeoutMs; + const outputs = []; + for (const scopeToRun of scopePlan.scopes) { + const output = await runScope(scopeToRun, perScopeTimeoutMs); + outputs.push(output); + if (output.wasAborted && !output.timedOut) break; } - const exitCode = await new Promise((resolve) => { - proc.on("close", (code) => { - clearTimeout(timeout); - if (progressTimer) clearInterval(progressTimer); - liveSubagentProcesses.delete(proc); - if (signal) signal.removeEventListener("abort", killProc); - resolve(code ?? 0); - }); - proc.on("error", () => { - clearTimeout(timeout); - if (progressTimer) clearInterval(progressTimer); - liveSubagentProcesses.delete(proc); - if (signal) signal.removeEventListener("abort", killProc); - resolve(1); - }); - }); - if (wasAborted) { + + const abortedOutput = outputs.find( + (output) => output.wasAborted && !output.timedOut, + ); + if (abortedOutput) { const logHint = logPath ? `\n(stage diagnostic: ${logPath})` : ""; - const text = timedOut - ? `Code search timed out after ${Math.round(timeoutMs / 1000)}s. Narrow the query or scope and retry.${logHint}` - : `Code search aborted.${logHint}`; + const text = `Code search aborted.${logHint}`; await recordRetrievalEvidence(projectRoot, { backend: "codebase_search", sourceKind: "code", query, strategy, scope, - status: timedOut ? "timeout" : "aborted", + status: "aborted", hitCount: 0, elapsedMs: Date.now() - startedAt, cachePath: runtimeDirs.searchCache, error: text, - result: { - siftBin, - timeoutMs, - }, + result: { siftBin, timeoutMs }, }); return { - content: [ - { - type: "text", - text, - }, - ], + content: [{ type: "text", text }], details: { operation: "codebase_search", aborted: true, - timedOut, + timedOut: false, siftBin, query, scope, @@ -2888,9 +2914,23 @@ export default function (pi) { }, }; } - const out = stdout.join(""); - const err = stderr.join("").trim(); - if (exitCode !== 0 && !out) { + + const allTimedOut = + outputs.length > 0 && outputs.every((output) => output.timedOut); + const successfulOutputs = outputs.filter( + (output) => output.out.trim().length > 0, + ); + if ( + successfulOutputs.length === 0 && + outputs.some((output) => output.exitCode !== 0) + ) { + const err = outputs + .map((output) => output.err) + .filter(Boolean) + .join("\n") + .trim(); + const exitCode = + outputs.find((output) => output.exitCode !== 0)?.exitCode ?? 1; const hint = err.includes("not found") || err.includes("No such file") ? "\n\nHint: install rupurt/sift and ensure `sift` is on PATH." @@ -2903,36 +2943,56 @@ export default function (pi) { query, strategy, scope, - status: "error", + status: allTimedOut ? "timeout" : "error", hitCount: 0, elapsedMs: Date.now() - startedAt, cachePath: runtimeDirs.searchCache, - error: err || `exit ${exitCode}`, + error: allTimedOut + ? "all codebase_search shards timed out" + : err || `exit ${exitCode}`, result: { siftBin, exitCode, timeoutMs, + scopes: scopePlan.scopes, }, }); return { content: [ { type: "text", - text: `codebase_search failed (exit ${exitCode}). Is sift installed?${hint}`, + text: allTimedOut + ? `Code search timed out after ${Math.round(timeoutMs / 1000)}s across all shards. Narrow the query or scope and retry.${logPath ? `\n(stage diagnostic: ${logPath})` : ""}` + : `codebase_search failed (exit ${exitCode}). Is sift installed?${hint}`, }, ], details: { operation: "codebase_search", exitCode, + sharded: scopePlan.sharded, + scopes: scopePlan.scopes, siftBin, query, scope, strategy, timeoutMs, + timedOut: allTimedOut, searchCache: runtimeDirs.searchCache, }, }; } + + const out = scopePlan.sharded + ? successfulOutputs + .map((output) => `## ${output.scope}\n\n${output.out.trim()}`) + .join("\n\n") + : (outputs[0]?.out ?? ""); + const err = outputs + .map((output) => output.err) + .filter(Boolean) + .join("\n") + .trim(); + const exitCode = outputs.some((output) => output.exitCode !== 0) ? 1 : 0; await recordRetrievalEvidence(projectRoot, { backend: "codebase_search", sourceKind: "code", @@ -2940,7 +3000,7 @@ export default function (pi) { strategy, scope, status: exitCode === 0 ? "ok" : "partial", - hitCount: out.trim() ? 1 : 0, + hitCount: successfulOutputs.length, elapsedMs: Date.now() - startedAt, cachePath: runtimeDirs.searchCache, error: err || null, @@ -2948,6 +3008,8 @@ export default function (pi) { siftBin, exitCode, timeoutMs, + sharded: scopePlan.sharded, + scopes: scopePlan.scopes, outputPreview: out.slice(0, 2_000), }, }); @@ -2966,6 +3028,8 @@ export default function (pi) { scope, strategy, exitCode, + sharded: scopePlan.sharded, + scopes: scopePlan.scopes, siftBin, timeoutMs, searchCache: runtimeDirs.searchCache, diff --git a/src/resources/extensions/sf/tests/db-writer-requirements-generator.test.mjs b/src/resources/extensions/sf/tests/db-writer-requirements-generator.test.mjs index 25eb978a9..a3f256543 100644 --- a/src/resources/extensions/sf/tests/db-writer-requirements-generator.test.mjs +++ b/src/resources/extensions/sf/tests/db-writer-requirements-generator.test.mjs @@ -46,8 +46,7 @@ describe("generateRequirementsMd", () => { id: "R002", class: "operational", status: "active", - description: - "Address recurring drift\n\nSource IDs: sf-one, sf-two", + description: "Address recurring drift\n\nSource IDs: sf-one, sf-two", why: "Threshold reached\nacross multiple runs", source: "sf-promoter", primary_owner: "", @@ -120,4 +119,28 @@ describe("generateRequirementsMd", () => { assert.equal(parsed[0].id, "R069"); assert.equal(parsed[0].status, "cancelled"); }); + + test("round_trips_terminal_sweeper_statuses", () => { + const markdown = generateRequirementsMd([ + { + id: "R070", + class: "operational", + status: "resolved", + description: "Backlog evidence drained.", + }, + { + id: "R071", + class: "architecture", + status: "superseded", + description: "Folded into a newer requirement.", + }, + ]); + + assert.match(markdown, /## Resolved/); + assert.match(markdown, /## Superseded/); + const parsed = parseRequirementsSections(markdown); + assert.equal(parsed.length, 2); + assert.equal(parsed[0].status, "resolved"); + assert.equal(parsed[1].status, "superseded"); + }); }); diff --git a/src/resources/extensions/sf/tests/requirement-status-sweeper.test.mjs b/src/resources/extensions/sf/tests/requirement-status-sweeper.test.mjs new file mode 100644 index 000000000..c89b38b5a --- /dev/null +++ b/src/resources/extensions/sf/tests/requirement-status-sweeper.test.mjs @@ -0,0 +1,345 @@ +/** + * requirement-status-sweeper.test.mjs — auto-transition sweeper for R-entries. + * + * Tests all three rules: + * 1. operational-evidence-drained (source IDs all resolved → 'resolved') + * 2. supporting-slices-complete (all slices complete → 'validated') + * 3. supersedes-resolved (superseded_by target is terminal → 'superseded') + * + * Uses in-memory SQLite fixtures — never touches the real DB. + */ + +import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { afterEach, describe, expect, test } from "vitest"; +import { + closeDatabase, + getRequirementById, + insertMilestone, + insertSelfFeedbackEntry, + insertSlice, + openDatabase, + resolveSelfFeedbackEntry, + upsertRequirement, +} from "../sf-db.js"; +import { + requirementStatusSweeperGate, + sweepRequirementStatuses, +} from "../sf-db/requirement-status-sweeper.js"; + +// ─── Fixtures ───────────────────────────────────────────────────────────────── + +const tmpDirs = []; + +afterEach(() => { + closeDatabase(); + while (tmpDirs.length > 0) { + const dir = tmpDirs.pop(); + if (dir) rmSync(dir, { recursive: true, force: true }); + } +}); + +/** Open an in-memory DB (no file I/O). */ +function openMemoryDb() { + openDatabase(":memory:"); +} + +/** Insert a minimal self_feedback row with given id and resolved state. */ +function makeSelfFeedbackRow(id, { resolved = false } = {}) { + const ts = new Date().toISOString(); + insertSelfFeedbackEntry({ + id, + ts, + kind: "test:sweeper", + severity: "low", + blocking: false, + repoIdentity: "forge", + sfVersion: "0.0.0", + basePath: "/tmp/test", + summary: `test entry ${id}`, + evidence: "", + suggestedFix: "", + }); + if (resolved) { + resolveSelfFeedbackEntry(id, { + reason: "resolved-in-test", + resolvedAt: new Date().toISOString(), + }); + } +} + +/** Insert an active requirement with the given fields. */ +function makeRequirement(id, fields = {}) { + upsertRequirement({ + id, + class: fields.class ?? "operational", + status: fields.status ?? "active", + description: fields.description ?? "", + why: fields.why ?? "test", + source: fields.source ?? "test", + primary_owner: fields.primary_owner ?? null, + supporting_slices: fields.supporting_slices ?? "[]", + validation: fields.validation ?? "", + notes: fields.notes ?? "", + full_content: fields.full_content ?? "", + superseded_by: fields.superseded_by ?? null, + }); +} + +/** Insert a milestone row (required for slice FK). */ +function makeMilestone(milestoneId) { + insertMilestone({ + id: milestoneId, + title: `test milestone ${milestoneId}`, + status: "active", + }); +} + +/** Insert a slice row with the given status. */ +function makeSlice(milestoneId, sliceId, status = "complete") { + makeMilestone(milestoneId); + insertSlice({ + milestoneId, + id: sliceId, + title: `test slice ${sliceId}`, + status, + risk: "low", + depends: [], + demo: "", + sequence: 0, + }); +} + +// ─── Tests ──────────────────────────────────────────────────────────────────── + +describe("sweepRequirementStatuses — rule 1: operational-evidence-drained", () => { + test("transitions to 'resolved' when all 3 source IDs are resolved", async () => { + openMemoryDb(); + + makeSelfFeedbackRow("sf-aaa1", { resolved: true }); + makeSelfFeedbackRow("sf-bbb2", { resolved: true }); + makeSelfFeedbackRow("sf-ccc3", { resolved: true }); + + makeRequirement("R900", { + class: "operational", + description: + "Some recurring issue\n\nSource IDs: sf-aaa1, sf-bbb2, sf-ccc3", + }); + + const result = await sweepRequirementStatuses("/tmp/test"); + + expect(result.totalChecked).toBe(1); + expect(result.transitioned).toHaveLength(1); + expect(result.transitioned[0]).toMatchObject({ + id: "R900", + fromStatus: "active", + toStatus: "resolved", + rule: "operational-evidence-drained", + }); + + const updated = getRequirementById("R900"); + expect(updated.status).toBe("resolved"); + }); + + test("stays 'active' when 1 of 3 source IDs is still open", async () => { + openMemoryDb(); + + makeSelfFeedbackRow("sf-aaa1", { resolved: true }); + makeSelfFeedbackRow("sf-bbb2", { resolved: true }); + makeSelfFeedbackRow("sf-ccc3", { resolved: false }); // still open + + makeRequirement("R901", { + class: "operational", + description: "Source IDs: sf-aaa1, sf-bbb2, sf-ccc3", + }); + + const result = await sweepRequirementStatuses("/tmp/test"); + + expect(result.transitioned).toHaveLength(0); + const row = getRequirementById("R901"); + expect(row.status).toBe("active"); + }); +}); + +describe("sweepRequirementStatuses — rule 2: supporting-slices-complete", () => { + test("transitions to 'validated' when slice M999/S99 is complete", async () => { + openMemoryDb(); + makeSlice("M999", "S99", "complete"); + makeRequirement("R902", { + class: "capability", + supporting_slices: '["M999/S99"]', + }); + + const result = await sweepRequirementStatuses("/tmp/test"); + + expect(result.transitioned).toHaveLength(1); + expect(result.transitioned[0]).toMatchObject({ + id: "R902", + toStatus: "validated", + rule: "supporting-slices-complete", + }); + + const updated = getRequirementById("R902"); + expect(updated.status).toBe("validated"); + }); + + test("stays 'active' when slice M999/S99 is in_progress", async () => { + openMemoryDb(); + makeSlice("M999", "S99", "in_progress"); + makeRequirement("R903", { + class: "capability", + supporting_slices: '["M999/S99"]', + }); + + const result = await sweepRequirementStatuses("/tmp/test"); + + expect(result.transitioned).toHaveLength(0); + const row = getRequirementById("R903"); + expect(row.status).toBe("active"); + }); + + test("stays 'active' when supporting_slices is empty array", async () => { + openMemoryDb(); + makeRequirement("R904", { + class: "capability", + supporting_slices: "[]", + }); + + const result = await sweepRequirementStatuses("/tmp/test"); + + expect(result.transitioned).toHaveLength(0); + const row = getRequirementById("R904"); + expect(row.status).toBe("active"); + }); +}); + +describe("sweepRequirementStatuses — rule 3: supersedes-resolved", () => { + test("transitions to 'superseded' when supersedes_by target is resolved", async () => { + openMemoryDb(); + + // Target R200 is already resolved + makeRequirement("R200", { status: "resolved" }); + // R905 supersedes R200 + makeRequirement("R905", { superseded_by: "R200" }); + + const result = await sweepRequirementStatuses("/tmp/test"); + + // Only R905 is active (R200 is resolved, not active) + expect(result.transitioned).toHaveLength(1); + expect(result.transitioned[0]).toMatchObject({ + id: "R905", + toStatus: "superseded", + rule: "supersedes-resolved", + }); + + const updated = getRequirementById("R905"); + expect(updated.status).toBe("superseded"); + }); + + test("stays 'active' when supersedes_by target does not exist", async () => { + openMemoryDb(); + + makeRequirement("R906", { superseded_by: "R999" }); + + const result = await sweepRequirementStatuses("/tmp/test"); + + expect(result.transitioned).toHaveLength(0); + const row = getRequirementById("R906"); + expect(row.status).toBe("active"); + }); +}); + +describe("sweepRequirementStatuses — dryRun option", () => { + test("returns transitions but does not write to DB", async () => { + openMemoryDb(); + + makeSelfFeedbackRow("sf-dry1", { resolved: true }); + makeSelfFeedbackRow("sf-dry2", { resolved: true }); + makeSelfFeedbackRow("sf-dry3", { resolved: true }); + + makeRequirement("R907", { + class: "operational", + description: "Source IDs: sf-dry1, sf-dry2, sf-dry3", + }); + + const result = await sweepRequirementStatuses("/tmp/test", { + dryRun: true, + }); + + expect(result.transitioned).toHaveLength(1); + expect(result.transitioned[0].toStatus).toBe("resolved"); + + // DB must NOT have been written + const row = getRequirementById("R907"); + expect(row.status).toBe("active"); + }); +}); + +describe("requirementStatusSweeperGate — UokGate contract", () => { + test("returns GateResult pass when no transitions needed", async () => { + openMemoryDb(); + makeRequirement("R908", { class: "capability", supporting_slices: "[]" }); + + const result = await requirementStatusSweeperGate.execute({ + basePath: "/tmp/test", + }); + + expect(result.outcome).toBe("pass"); + expect(result.failureClass).toBeNull(); + expect(typeof result.rationale).toBe("string"); + expect(result.rationale).toContain("0 of"); + }); + + test("returns GateResult manual-attention when transitions occurred", async () => { + openMemoryDb(); + + makeSelfFeedbackRow("sf-gate1", { resolved: true }); + makeRequirement("R909", { + class: "operational", + description: "Source IDs: sf-gate1", + }); + + const result = await requirementStatusSweeperGate.execute({ + basePath: "/tmp/test", + }); + + expect(result.outcome).toBe("manual-attention"); + expect(result.failureClass).toBe("verification"); + expect(result.findings).toHaveLength(1); + expect(result.findings[0].id).toBe("R909"); + }); +}); + +describe("sweepRequirementStatuses — includeRules option", () => { + test("only rule 3 fires when includeRules=['supersedes-resolved']", async () => { + openMemoryDb(); + + // Set up a row that would trigger rule 1 (all source IDs resolved) + makeSelfFeedbackRow("sf-r1a", { resolved: true }); + makeRequirement("R910", { + class: "operational", + description: "Source IDs: sf-r1a", + }); + + // Set up a row that would trigger rule 3 + makeRequirement("R911-target", { status: "resolved" }); + makeRequirement("R912", { superseded_by: "R911-target" }); + + const result = await sweepRequirementStatuses("/tmp/test", { + includeRules: ["supersedes-resolved"], + }); + + // Only rule 3 should have fired — R910 should remain active + const r910 = getRequirementById("R910"); + expect(r910.status).toBe("active"); + + // R912 should have transitioned + const r912Updated = getRequirementById("R912"); + expect(r912Updated.status).toBe("superseded"); + + const rules = result.transitioned.map((t) => t.rule); + expect(rules.every((r) => r === "supersedes-resolved")).toBe(true); + expect(rules).toHaveLength(1); + }); +}); diff --git a/src/resources/extensions/sf/tests/requirements-sift-projection.test.mjs b/src/resources/extensions/sf/tests/requirements-sift-projection.test.mjs new file mode 100644 index 000000000..9e7090d13 --- /dev/null +++ b/src/resources/extensions/sf/tests/requirements-sift-projection.test.mjs @@ -0,0 +1,205 @@ +/** + * requirements-sift-projection.test.mjs — unit tests for the DB-to-sift + * requirements corpus projection module. + * + * Purpose: verify buildRequirementsProjection writes a correct markdown file + * from a tmp DB fixture without touching the real .sf/sf.db. + */ +import assert from "node:assert/strict"; +import { + existsSync, + mkdirSync, + mkdtempSync, + readFileSync, + rmSync, +} from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { afterEach, test } from "vitest"; +import { closeDatabase, insertRequirement, openDatabase } from "../sf-db.js"; +import { + buildRequirementsProjection, + getRequirementsProjectionPath, +} from "../sf-db/requirements-sift-projection.js"; + +const tmpRoots = []; + +afterEach(() => { + closeDatabase(); + for (const root of tmpRoots.splice(0)) { + rmSync(root, { recursive: true, force: true }); + } +}); + +function makeProject() { + const root = mkdtempSync(join(tmpdir(), "sf-req-sift-proj-")); + mkdirSync(join(root, ".sf"), { recursive: true }); + tmpRoots.push(root); + return root; +} + +function openDb(root) { + openDatabase(join(root, ".sf", "sf.db")); +} + +function seedRow(overrides = {}) { + return { + id: "R001", + class: "functional", + status: "active", + description: "Test requirement", + why: "Because tests matter", + source: "operator", + primary_owner: "S01", + supporting_slices: "", + validation: "smoke", + notes: "", + full_content: "", + superseded_by: null, + ...overrides, + }; +} + +// ── Test 1: 3 rows of varying status → all present, grouped by status ──────── + +test("buildRequirementsProjection_with_three_rows_writes_all_grouped_by_status", async () => { + const root = makeProject(); + openDb(root); + + insertRequirement( + seedRow({ id: "R001", status: "active", description: "Active req" }), + ); + insertRequirement( + seedRow({ id: "R002", status: "validated", description: "Validated req" }), + ); + insertRequirement( + seedRow({ id: "R003", status: "deferred", description: "Deferred req" }), + ); + + const result = await buildRequirementsProjection(root); + + assert.equal(result.written, true); + assert.equal(result.rows, 3); + assert.ok(result.bytes > 0, "bytes should be positive"); + assert.ok( + result.projectionPath === getRequirementsProjectionPath(root), + "projectionPath should match helper", + ); + + assert.ok(existsSync(result.projectionPath), "projection file should exist"); + const content = readFileSync(result.projectionPath, "utf-8"); + + // All three IDs appear + assert.match(content, /## R001/); + assert.match(content, /## R002/); + assert.match(content, /## R003/); + + // Grouping by status: active before validated before deferred + const posActive = content.indexOf("## R001"); + const posValidated = content.indexOf("## R002"); + const posDeferred = content.indexOf("## R003"); + assert.ok(posActive < posValidated, "active should come before validated"); + assert.ok( + posValidated < posDeferred, + "validated should come before deferred", + ); +}); + +// ── Test 2: NULL notes → no "**Notes:**" section and no "null" literal ──────── + +test("buildRequirementsProjection_with_null_notes_omits_notes_section", async () => { + const root = makeProject(); + openDb(root); + + insertRequirement( + seedRow({ id: "R010", notes: "", description: "No notes req" }), + ); + + const result = await buildRequirementsProjection(root); + assert.equal(result.written, true); + + const content = readFileSync(result.projectionPath, "utf-8"); + assert.doesNotMatch( + content, + /\*\*Notes:\*\*/, + "should not emit Notes section for empty notes", + ); + assert.doesNotMatch(content, /\bnull\b/, "should not emit literal null"); +}); + +// ── Test 3: Special chars (newlines, backticks) → escaped for MD safety ─────── + +test("buildRequirementsProjection_escapes_backticks_and_collapses_newlines", async () => { + const root = makeProject(); + openDb(root); + + insertRequirement( + seedRow({ + id: "R020", + description: "Use `node:sqlite`\nAnd another line", + why: "Backtick `escape` needed", + notes: "multi\nline\nnote", + }), + ); + + const result = await buildRequirementsProjection(root); + assert.equal(result.written, true); + + const content = readFileSync(result.projectionPath, "utf-8"); + + // Backticks in description should be escaped + assert.match(content, /\\`node:sqlite\\`/, "backticks should be escaped"); + + // Newline in description should be collapsed to space in heading + assert.doesNotMatch( + content, + /## R020 — .*\n.*And another line/, + "heading should be single-line", + ); + + // Notes newlines should be collapsed + assert.doesNotMatch( + content, + /\*\*Notes:\*\*.*\nline/, + "notes should be on one line", + ); +}); + +// ── Test 4: Returns correct shape {rows, bytes, projectionPath} ─────────────── + +test("buildRequirementsProjection_returns_correct_shape", async () => { + const root = makeProject(); + openDb(root); + + insertRequirement(seedRow({ id: "R031" })); + insertRequirement(seedRow({ id: "R032" })); + insertRequirement(seedRow({ id: "R033" })); + + const result = await buildRequirementsProjection(root); + + assert.equal(result.written, true); + assert.equal(result.rows, 3); + assert.ok(typeof result.bytes === "number" && result.bytes > 0, "bytes > 0"); + assert.ok( + typeof result.projectionPath === "string" && + result.projectionPath.includes("requirements-projected.md"), + "projectionPath contains expected filename", + ); +}); + +// ── Test 5: Empty DB → header written, zero R-sections ──────────────────────── + +test("buildRequirementsProjection_with_empty_db_writes_header_only", async () => { + const root = makeProject(); + openDb(root); + + const result = await buildRequirementsProjection(root); + + assert.equal(result.written, true); + assert.equal(result.rows, 0); + assert.ok(result.bytes > 0, "header itself has bytes"); + + const content = readFileSync(result.projectionPath, "utf-8"); + assert.match(content, /# SF Requirements Corpus/, "header should be present"); + assert.doesNotMatch(content, /## R/, "should have no R-sections"); +}); diff --git a/src/resources/extensions/sf/tests/session-fts-repair.test.mjs b/src/resources/extensions/sf/tests/session-fts-repair.test.mjs new file mode 100644 index 000000000..57b665d9b --- /dev/null +++ b/src/resources/extensions/sf/tests/session-fts-repair.test.mjs @@ -0,0 +1,53 @@ +/** + * session-fts-repair.test.mjs — regression tests for session FTS schema repair. + * + * Purpose: prove sf-db can reopen a DB that contains orphaned FTS5 shadow + * tables from an interrupted or partial `turns_fts` migration. + */ +import assert from "node:assert/strict"; +import { mkdirSync, mkdtempSync, rmSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { DatabaseSync } from "node:sqlite"; +import { afterEach, test } from "vitest"; +import { closeDatabase, openDatabase } from "../sf-db.js"; + +const tmpRoots = []; + +afterEach(() => { + closeDatabase(); + for (const root of tmpRoots.splice(0)) { + rmSync(root, { recursive: true, force: true }); + } +}); + +function makeDbPath() { + const root = mkdtempSync(join(tmpdir(), "sf-session-fts-repair-")); + mkdirSync(join(root, ".sf"), { recursive: true }); + tmpRoots.push(root); + return join(root, ".sf", "sf.db"); +} + +test("openDatabase_when_turns_fts_shadow_tables_are_orphaned_repairs_schema", () => { + const dbPath = makeDbPath(); + const raw = new DatabaseSync(dbPath); + raw.exec(` + CREATE TABLE turns_fts_data(id INTEGER PRIMARY KEY, block BLOB); + CREATE TABLE turns_fts_idx(segid INTEGER, term TEXT, pgno INTEGER); + CREATE TABLE turns_fts_docsize(id INTEGER PRIMARY KEY, sz BLOB); + CREATE TABLE turns_fts_config(k PRIMARY KEY, v) WITHOUT ROWID; + `); + raw.close(); + + assert.equal(openDatabase(dbPath), true); + closeDatabase(); + + const repaired = new DatabaseSync(dbPath); + const table = repaired + .prepare( + "SELECT name FROM sqlite_master WHERE type='table' AND name='turns_fts'", + ) + .get(); + assert.equal(table.name, "turns_fts"); + repaired.close(); +}); diff --git a/src/resources/extensions/sf/tests/sift-retriever-scope.test.mjs b/src/resources/extensions/sf/tests/sift-retriever-scope.test.mjs index e147b95c4..209adef23 100644 --- a/src/resources/extensions/sf/tests/sift-retriever-scope.test.mjs +++ b/src/resources/extensions/sf/tests/sift-retriever-scope.test.mjs @@ -7,8 +7,14 @@ * while respecting explicit caller overrides. */ import assert from "node:assert/strict"; +import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; import { describe, it, vi } from "vitest"; -import { chooseSiftRetrievers } from "../code-intelligence.js"; +import { + chooseSiftRetrievers, + resolveSiftSearchScopes, +} from "../code-intelligence.js"; // ── chooseSiftRetrievers unit tests ──────────────────────────────────────── @@ -110,3 +116,36 @@ describe("warmup regression guard", () => { assert.equal(result.reranking, "none"); }); }); + +describe("resolveSiftSearchScopes", () => { + it("repo_root_dot_expands_to_existing_top_level_shards", () => { + const root = mkdtempSync(join(tmpdir(), "sf-sift-shards-")); + try { + mkdirSync(join(root, "src")); + mkdirSync(join(root, "web")); + mkdirSync(join(root, "docs")); + mkdirSync(join(root, ".sf", "runtime", "sift"), { recursive: true }); + writeFileSync( + join(root, ".sf", "runtime", "sift", "requirements-projected.md"), + "# requirements\n", + ); + const result = resolveSiftSearchScopes(root, "."); + assert.equal(result.originalScope, "."); + assert.equal(result.sharded, true); + assert.deepEqual(result.scopes, [ + "src", + "web", + "docs", + ".sf/runtime/sift/requirements-projected.md", + ]); + } finally { + rmSync(root, { recursive: true, force: true }); + } + }); + + it("non_root_scope_remains_single_scope", () => { + const result = resolveSiftSearchScopes("/repo", "src/resources"); + assert.equal(result.sharded, false); + assert.deepEqual(result.scopes, ["src/resources"]); + }); +}); diff --git a/src/resources/extensions/sf/tests/update-task-status-revert-safety.test.mjs b/src/resources/extensions/sf/tests/update-task-status-revert-safety.test.mjs index 622a3340e..2ad26d660 100644 --- a/src/resources/extensions/sf/tests/update-task-status-revert-safety.test.mjs +++ b/src/resources/extensions/sf/tests/update-task-status-revert-safety.test.mjs @@ -66,8 +66,19 @@ function makeProject({ iterLines = null } = {}) { // Open DB and seed data openDatabase(join(dir, ".sf", "sf.db")); insertMilestone({ id: "M001", title: "Test milestone", status: "active" }); - insertSlice({ milestoneId: "M001", id: "S01", title: "Test slice", status: "pending" }); - insertTask({ milestoneId: "M001", sliceId: "S01", id: "T02", title: "Test task", status: "complete" }); + insertSlice({ + milestoneId: "M001", + id: "S01", + title: "Test slice", + status: "pending", + }); + insertTask({ + milestoneId: "M001", + sliceId: "S01", + id: "T02", + title: "Test task", + status: "complete", + }); return dir; } @@ -170,7 +181,15 @@ test("updateTaskStatus_complete_forward_transition_never_blocked", () => { // Seed the task as pending so a "complete" update makes sense const dir = makeProject({ iterLines }); // updateTaskStatus to "complete" should never be blocked - updateTaskStatus("M001", "S01", "T02", "complete", new Date().toISOString(), undefined, { - basePath: dir, - }); + updateTaskStatus( + "M001", + "S01", + "T02", + "complete", + new Date().toISOString(), + undefined, + { + basePath: dir, + }, + ); }); diff --git a/src/resources/extensions/sf/tools/sift-search-tool.js b/src/resources/extensions/sf/tools/sift-search-tool.js index ccc7efd61..3ad01ade6 100644 --- a/src/resources/extensions/sf/tools/sift-search-tool.js +++ b/src/resources/extensions/sf/tools/sift-search-tool.js @@ -20,6 +20,7 @@ import { resolveSiftBinary, resolveSiftLogging, resolveSiftSearchScope, + resolveSiftSearchScopes, } from "../code-intelligence.js"; import { getErrorMessage } from "../error-utils.js"; import { recordRetrievalEvidence } from "../retrieval-evidence.js"; @@ -45,8 +46,13 @@ const DEFAULT_TIMEOUT_MS = 600_000; /** * Build the sift CLI argument list from tool parameters. */ -function buildSiftArgs(params, projectRoot = process.cwd()) { - const scope = resolveSiftSearchScope(projectRoot, params.path); +function buildSiftArgs( + params, + projectRoot = process.cwd(), + scopeOverride = null, +) { + const scope = + scopeOverride ?? resolveSiftSearchScope(projectRoot, params.path); const args = [ "search", "--json", @@ -63,7 +69,7 @@ function buildSiftArgs(params, projectRoot = process.cwd()) { // Explicit overrides always win; for repo-root scope the helper returns // bm25+phrase (no vector) to avoid the full-workspace embedding hang // (#vector-hang-fix). For scoped subdirs, vector + reranking are enabled. - const scopedDefaults = chooseSiftRetrievers(params.path ?? ".", projectRoot); + const scopedDefaults = chooseSiftRetrievers(scope, projectRoot); const effectiveRetrievers = params.retrievers ?? scopedDefaults.retrievers; const effectiveReranking = params.reranking ?? scopedDefaults.reranking; args.push("--retrievers", String(effectiveRetrievers)); @@ -130,6 +136,26 @@ function parseSiftOutput(rawStdout, rawStderr) { stderr: rawStderr, }; } +function mergeSiftResults(results, limit) { + const seen = new Set(); + const hits = []; + for (const result of results) { + for (const hit of result.hits) { + const key = `${hit.path}:${hit.lineStart ?? ""}:${hit.lineEnd ?? ""}:${hit.content}`; + if (seen.has(key)) continue; + seen.add(key); + hits.push(hit); + } + } + hits.sort((a, b) => Number(b.score ?? 0) - Number(a.score ?? 0)); + return { + hits: hits.slice(0, limit), + stderr: results + .map((result) => result.stderr) + .filter((text) => text && text.trim().length > 0) + .join("\n"), + }; +} /** * Execute a sift search with the given parameters. @@ -272,7 +298,7 @@ export function registerSiftSearchTool(pi) { path: Type.Optional( Type.String({ description: - "Directory or file path to search within. Default: repository root ('.'); absolute paths inside the repo are normalized to repo-relative paths so .siftignore applies.", + "Directory or file path to search within. Default/all-repo expands to bounded top-level shards instead of passing '.' directly to Sift.", default: ".", }), ), @@ -355,30 +381,43 @@ export function registerSiftSearchTool(pi) { } const projectRoot = process.cwd(); - const args = buildSiftArgs(params, projectRoot); - const scope = args.at(-2) ?? "."; + const scopePlan = resolveSiftSearchScopes(projectRoot, params.path); const timeoutMs = params.timeoutMs ?? DEFAULT_TIMEOUT_MS; const { env: logEnv, logPath } = resolveSiftLogging(projectRoot); const startedAt = Date.now(); try { - const { stdout, stderr } = await runSift( - binaryPath, - args, - timeoutMs, - projectRoot, - logEnv, - logPath, - ); + const perScopeTimeoutMs = scopePlan.sharded + ? Math.max(5_000, Math.floor(timeoutMs / scopePlan.scopes.length)) + : timeoutMs; + const scopedResults = []; + for (const scope of scopePlan.scopes) { + const args = buildSiftArgs(params, projectRoot, scope); + const { stdout, stderr } = await runSift( + binaryPath, + args, + perScopeTimeoutMs, + projectRoot, + logEnv, + logPath, + ); + const parsed = parseSiftOutput(stdout, stderr); + scopedResults.push({ ...parsed, scope }); + } const elapsedMs = Date.now() - startedAt; - const result = parseSiftOutput(stdout, stderr); + const result = mergeSiftResults( + scopedResults, + params.limit ?? DEFAULT_LIMIT, + ); const runtimeDirs = ensureSiftRuntimeDirs(projectRoot); await recordRetrievalEvidence(projectRoot, { backend: "sift", sourceKind: "code", query: params.query, strategy: params.strategy ?? DEFAULT_STRATEGY, - scope, + scope: scopePlan.sharded + ? `shards:${scopePlan.scopes.join(",")}` + : scopePlan.scopes[0], status: "ok", hitCount: result.hits.length, elapsedMs, @@ -402,7 +441,7 @@ export function registerSiftSearchTool(pi) { query: params.query, strategy: params.strategy ?? DEFAULT_STRATEGY, agent: params.agent ?? false, - path: scope, + path: scopePlan.sharded ? scopePlan.scopes : scopePlan.scopes[0], hitCount: result.hits.length, elapsedMs, binary: binaryPath, @@ -415,7 +454,7 @@ export function registerSiftSearchTool(pi) { const lines = [ `Sift search: "${params.query}"`, `Strategy: ${params.strategy ?? DEFAULT_STRATEGY}${params.agent ? ` | agent: ${params.agentMode ?? "linear"} | planner: ${params.plannerStrategy ?? "heuristic"}` : ""}`, - `Scope: ${scope}`, + `Scope: ${scopePlan.sharded ? `all repo shards (${scopePlan.scopes.join(", ")})` : scopePlan.scopes[0]}`, `Search cache: ${runtimeDirs.searchCache}`, `Hits: ${result.hits.length} | Elapsed: ${elapsedMs}ms`, "", @@ -449,6 +488,8 @@ export function registerSiftSearchTool(pi) { query: params.query, strategy: params.strategy ?? DEFAULT_STRATEGY, agent: params.agent ?? false, + sharded: scopePlan.sharded, + scopes: scopePlan.scopes, elapsedMs, hitCount: result.hits.length, hits: result.hits, @@ -463,7 +504,9 @@ export function registerSiftSearchTool(pi) { sourceKind: "code", query: params.query, strategy: params.strategy ?? DEFAULT_STRATEGY, - scope, + scope: scopePlan.sharded + ? `shards:${scopePlan.scopes.join(",")}` + : scopePlan.scopes[0], status: "error", hitCount: 0, elapsedMs, @@ -481,6 +524,8 @@ export function registerSiftSearchTool(pi) { operation: "sift_search", query: params.query, strategy: params.strategy ?? DEFAULT_STRATEGY, + sharded: scopePlan.sharded, + scopes: scopePlan.scopes, elapsedMs, error: message, },