2026-05-04 23:27:20 +02:00
/ * *
* Optional code - intelligence backends for SF .
*
2026-05-05 14:27:03 +02:00
* Sift is the live code retrieval path . CODEBASE . md stays the durable fallback
* when the live index is unavailable , cold , or degraded .
2026-05-04 23:27:20 +02:00
* /
import { spawn , spawnSync } from "node:child_process" ;
2026-05-05 14:27:03 +02:00
import {
existsSync ,
mkdirSync ,
readdirSync ,
readFileSync ,
statSync ,
writeFileSync ,
} from "node:fs" ;
2026-05-05 13:29:28 +02:00
import { delimiter , isAbsolute , join , relative , resolve } from "node:path" ;
refactor: replace all inline error message ternaries with getErrorMessage()
Eliminates ~120 repetitions of `err instanceof Error ? err.message : String(err)`
across the entire extension source tree. All callers now import and use
`getErrorMessage` from the canonical `./error-utils.js`.
Files updated (56 files):
- auto.js, auto-worktree.js, auto-recovery.js, auto-dashboard.js, auto-timers.js
- auto-prompts.js, auto-start.js, auto-post-unit.js, auto-model-selection.js
- auto/phases.js, auto/loop.js, auto/infra-errors.js
- autonomous-solver-eval.js, bootstrap/agent-end-recovery.js, bootstrap/db-tools.js
- bootstrap/exec-tools.js, bootstrap/journal-tools.js, bootstrap/register-extension.js
- bootstrap/register-hooks.js, canonical-milestone-plan.js, changelog.js
- clean-root-preflight.js, code-intelligence.js, commands-add-tests.js
- commands-debug.js, commands-eval-review.js, commands-handlers.js
- commands-maintenance.js, commands-pr-branch.js, commands-scan.js, commands-ship.js
- commands-todo.js, commands-worktree.js, definition-io.js, doctor.js
- doctor-config-checks.js, doctor-engine-checks.js, ecosystem/loader.js
- eval-review-schema.js, exec-sandbox.js, execution-instruction-guard.js
- graph-context.js, hook-emitter.js, index.js, learning/runtime.js
- lifecycle-hooks.js, onboarding-state.js, orphan-worktree-sweep.js
- planning-depth.js, quick.js, scaffold-keeper.js, sf-db/sf-db-core.js
- slice-cadence.js, sm-client.js, spec-projections.js, subagent/background-jobs.js
- subagent/isolation.js, sync-scheduler.js, tools/exec-tool.js
- tools/sift-search-tool.js, tools/workflow-tool-executors.js, ui/index.js
- uok/a2a-agent-server.js, uok/auto-dispatch.js, uok/auto-unit-closeout.js
- uok/auto-verification.js, uok/chaos-monkey.js, uok/gate-runner.js
- vault-resolver.js, workflow-install.js, workflow-plugins.js, worktree-manager.js
- worktree-resolver.js
Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-05-11 14:46:30 +02:00
import { getErrorMessage } from "./error-utils.js" ;
2026-05-05 14:27:03 +02:00
2026-05-04 23:27:20 +02:00
const SIFT _BINARY _NAME = process . platform === "win32" ? "sift.exe" : "sift" ;
const DEFAULT _SIFT _WARMUP _TTL _MS = 6 * 60 * 60 * 1000 ;
2026-05-05 14:27:03 +02:00
const DEFAULT _SIFT _WARMUP _QUERY =
"repo architecture source tests entrypoints configuration" ;
2026-05-04 23:27:20 +02:00
const DEFAULT _SIFT _WARMUP _LIMIT = 1 ;
const DEFAULT _SIFT _WARMUP _RETRIEVER _TIMEOUT _MS = 30_000 ;
2026-05-05 13:29:28 +02:00
const DEFAULT _SIFT _WARMUP _HARD _TIMEOUT _SEC = 600 ;
2026-05-04 23:27:20 +02:00
const SIFT _WARMUP _KILL _GRACE _SEC = 10 ;
2026-05-05 13:29:28 +02:00
const DEFAULT _SIFT _HEALTH _TIMEOUT _MS = 60_000 ;
const SIFT _HEALTH _CACHE = new Map ( ) ;
const SIFT _CACHE _POLLUTION _PATTERNS = [
2026-05-05 14:27:03 +02:00
{ label : ".claude worktrees" , pattern : /(?:^|[/\\])\.claude[/\\]/ } ,
{ label : ".git internals" , pattern : /(?:^|[/\\])\.git[/\\]/ } ,
{ label : "dist-test output" , pattern : /(?:^|[/\\])dist-test[/\\]/ } ,
{ label : "node_modules" , pattern : /(?:^|[/\\])node_modules[/\\]/ } ,
{
label : "package dist output" ,
pattern : /(?:^|[/\\])packages[/\\][^/\\]+[/\\]dist[/\\]/ ,
} ,
2026-05-05 13:29:28 +02:00
] ;
2026-05-04 23:27:20 +02:00
export function resolveSiftWarmupRuntimeDirs ( projectRoot ) {
2026-05-05 14:27:03 +02:00
const runtimeRoot = join ( projectRoot , ".sf" , "runtime" , "sift" ) ;
return {
searchCache : join ( runtimeRoot , "search-cache" ) ,
tmpDir : join ( runtimeRoot , "tmp" ) ,
} ;
2026-05-04 23:27:20 +02:00
}
2026-05-05 13:29:28 +02:00
/ * *
* Ensure the repo - local Sift runtime directories exist .
*
* Purpose : keep Sift ' s search database scoped to the current repository instead
* of sharing a process - global cache across unrelated projects .
*
* Consumer : Sift warmup , status probes , ` sift_search ` , and ` codebase_search ` .
* /
export function ensureSiftRuntimeDirs ( projectRoot ) {
2026-05-05 14:27:03 +02:00
const dirs = resolveSiftWarmupRuntimeDirs ( projectRoot ) ;
mkdirSync ( dirs . searchCache , { recursive : true } ) ;
mkdirSync ( dirs . tmpDir , { recursive : true } ) ;
return dirs ;
2026-05-05 13:29:28 +02:00
}
2026-05-04 23:27:20 +02:00
export function buildSiftEnv ( projectRoot , env ) {
2026-05-05 14:27:03 +02:00
const dirs = resolveSiftWarmupRuntimeDirs ( projectRoot ) ;
return {
... env ,
SIFT _SEARCH _CACHE : dirs . searchCache ,
TMPDIR : dirs . tmpDir ,
} ;
2026-05-04 23:27:20 +02:00
}
2026-05-05 13:29:28 +02:00
/ * *
* Resolve a Sift search scope to the form Sift ' s local ignore matcher expects .
*
* Purpose : preserve ` .siftignore ` semantics by running Sift from the repository
* root with repo - relative scopes instead of absolute paths .
*
* Consumer : Sift warmup , ` sift_search ` , and ` codebase_search ` .
* /
export function resolveSiftSearchScope ( projectRoot , scope ) {
2026-05-05 14:27:03 +02:00
const normalizedRoot = normalizeProjectRoot ( projectRoot ) ;
const requested =
typeof scope === "string" && scope . trim ( ) ? scope . trim ( ) : "." ;
const absolute = isAbsolute ( requested )
? resolve ( requested )
: resolve ( normalizedRoot , requested ) ;
const rel = relative ( normalizedRoot , absolute ) ;
if ( ! rel ) return "." ;
if ( ! rel . startsWith ( ".." ) && ! isAbsolute ( rel ) ) return rel ;
return requested ;
2026-05-04 23:27:20 +02:00
}
function normalizeProjectRoot ( projectRoot ) {
2026-05-05 14:27:03 +02:00
return resolve ( projectRoot ) ;
2026-05-04 23:27:20 +02:00
}
function commandExists ( command , env = process . env ) {
2026-05-05 14:27:03 +02:00
if ( ! command ) return false ;
return lookupExecutable ( command , env ) !== null ;
2026-05-04 23:27:20 +02:00
}
function lookupExecutable ( command , env = process . env ) {
2026-05-05 14:27:03 +02:00
if ( /[\\/]/ . test ( command ) && existsSync ( command ) ) return command ;
const pathValue = env . PATH ? ? "" ;
for ( const dir of pathValue . split ( delimiter ) . filter ( Boolean ) ) {
const candidate = join ( dir , command ) ;
if ( existsSync ( candidate ) ) return candidate ;
}
return null ;
2026-05-04 23:27:20 +02:00
}
function resolveSiftWarmupHardTimeoutSec ( env , override ) {
2026-05-05 14:27:03 +02:00
if ( env . SF _SIFT _HARD _TIMEOUT _DISABLE === "1" ) return null ;
if ( override !== undefined ) {
return Number . isFinite ( override ) && override > 0
? Math . floor ( override )
: null ;
}
const raw = env . SF _SIFT _HARD _TIMEOUT _SEC ? . trim ( ) ;
if ( raw ) {
const parsed = Number . parseInt ( raw , 10 ) ;
if ( parsed === 0 ) return null ;
if ( Number . isFinite ( parsed ) && parsed > 0 ) return parsed ;
}
return DEFAULT _SIFT _WARMUP _HARD _TIMEOUT _SEC ;
2026-05-04 23:27:20 +02:00
}
function resolveSiftWarmupTimeoutWrapper ( env , timeoutSec ) {
2026-05-05 14:27:03 +02:00
if ( process . platform === "win32" ) return null ;
const candidates =
process . platform === "darwin"
? [ "gtimeout" , "timeout" ]
: [ "timeout" , "gtimeout" ] ;
for ( const candidate of candidates ) {
const binary = lookupExecutable ( candidate , env ) ;
if ( binary ) {
return {
binary ,
wrapperArgs : [
` --kill-after= ${ SIFT _WARMUP _KILL _GRACE _SEC } ` ,
String ( timeoutSec ) ,
] ,
timeoutSec ,
} ;
}
}
return null ;
2026-05-04 23:27:20 +02:00
}
export function resolveSiftBinary ( env = process . env ) {
2026-05-05 14:27:03 +02:00
const explicit = env . SIFT _PATH ? . trim ( ) ;
if ( explicit ) return explicit ;
return (
lookupExecutable ( SIFT _BINARY _NAME , env ) ? ?
( SIFT _BINARY _NAME === "sift" ? null : lookupExecutable ( "sift" , env ) )
) ;
2026-05-04 23:27:20 +02:00
}
2026-05-05 13:29:28 +02:00
function resolveSiftHealthTimeoutMs ( env ) {
2026-05-05 14:27:03 +02:00
const raw = env . SF _SIFT _HEALTH _TIMEOUT _MS ? . trim ( ) ;
if ( ! raw ) return DEFAULT _SIFT _HEALTH _TIMEOUT _MS ;
const parsed = Number . parseInt ( raw , 10 ) ;
return Number . isFinite ( parsed ) && parsed > 0
? parsed
: DEFAULT _SIFT _HEALTH _TIMEOUT _MS ;
2026-05-05 13:29:28 +02:00
}
function resolveSiftHealthProbePath ( projectRoot ) {
2026-05-05 14:27:03 +02:00
for ( const candidate of [ "src" , "packages" , "tests" ] ) {
const absolute = join ( projectRoot , candidate ) ;
if ( existsSync ( absolute ) ) return candidate ;
}
return "." ;
2026-05-05 13:29:28 +02:00
}
function runSiftHealthProbe ( projectRoot , binaryPath , env ) {
2026-05-05 14:27:03 +02:00
const normalizedRoot = normalizeProjectRoot ( projectRoot ) ;
const timeoutMs = resolveSiftHealthTimeoutMs ( env ) ;
const probePath = resolveSiftHealthProbePath ( normalizedRoot ) ;
const cacheKey = [
normalizedRoot ,
binaryPath ,
env . SIFT _PATH ? ? "" ,
env . SF _SIFT _HEALTH _TIMEOUT _MS ? ? "" ,
env . SF _SIFT _HEALTHCHECK _DISABLE ? ? "" ,
] . join ( "\0" ) ;
if ( SIFT _HEALTH _CACHE . has ( cacheKey ) ) return SIFT _HEALTH _CACHE . get ( cacheKey ) ;
const dirs = ensureSiftRuntimeDirs ( normalizedRoot ) ;
if ( env . SF _SIFT _HEALTHCHECK _DISABLE === "1" ) {
const skipped = {
ok : true ,
probePath ,
timeoutMs ,
searchCache : dirs . searchCache ,
tmpDir : dirs . tmpDir ,
reason : "sift health probe disabled" ,
} ;
SIFT _HEALTH _CACHE . set ( cacheKey , skipped ) ;
return skipped ;
}
const result = spawnSync (
binaryPath ,
[
"search" ,
"--json" ,
"--strategy" ,
"bm25" ,
"--limit" ,
"1" ,
"--retriever-timeout-ms" ,
String ( Math . min ( timeoutMs , 1_000 ) ) ,
probePath ,
"function" ,
] ,
{
cwd : normalizedRoot ,
env : buildSiftEnv ( normalizedRoot , env ) ,
encoding : "utf-8" ,
maxBuffer : 1024 * 1024 ,
timeout : timeoutMs ,
} ,
) ;
const probe = {
ok : result . status === 0 ,
probePath ,
timeoutMs ,
searchCache : dirs . searchCache ,
tmpDir : dirs . tmpDir ,
status : result . status ,
signal : result . signal ,
stderr : result . stderr ,
reason : "" ,
} ;
if ( probe . ok ) {
probe . reason = ` sift scoped health probe passed for ${ probePath } ` ;
} else if ( result . error ? . code === "ETIMEDOUT" || result . signal ) {
probe . reason = ` sift scoped health probe timed out after ${ timeoutMs } ms for ${ probePath } ` ;
} else if ( result . error ) {
probe . reason = ` sift scoped health probe failed: ${ result . error . message } ` ;
} else {
const detail = String ( result . stderr || "" ) . trim ( ) ;
probe . reason = detail
? ` sift scoped health probe failed: ${ detail . slice ( 0 , 300 ) } `
: ` sift scoped health probe exited ${ result . status ? ? "unknown" } ` ;
}
SIFT _HEALTH _CACHE . set ( cacheKey , probe ) ;
return probe ;
2026-05-05 13:29:28 +02:00
}
function listFilesCapped ( root , maxFiles = 32 ) {
2026-05-05 14:27:03 +02:00
const files = [ ] ;
const visit = ( dir ) => {
if ( files . length >= maxFiles ) return ;
let entries = [ ] ;
try {
entries = readdirSync ( dir , { withFileTypes : true } ) ;
} catch {
return ;
}
for ( const entry of entries ) {
if ( files . length >= maxFiles ) return ;
const path = join ( dir , entry . name ) ;
if ( entry . isDirectory ( ) ) {
visit ( path ) ;
} else if ( entry . isFile ( ) ) {
files . push ( path ) ;
}
}
} ;
visit ( root ) ;
return files ;
2026-05-05 13:29:28 +02:00
}
function inspectSiftCache ( projectRoot ) {
2026-05-05 14:27:03 +02:00
const dirs = resolveSiftWarmupRuntimeDirs ( projectRoot ) ;
const manifestRoot = join ( dirs . searchCache , "artifacts" , "manifests" ) ;
const samples = [ ] ;
for ( const manifest of listFilesCapped ( manifestRoot , 16 ) ) {
let text = "" ;
try {
text = readFileSync ( manifest ) . toString ( "utf-8" ) ;
} catch {
continue ;
}
for ( const { label , pattern } of SIFT _CACHE _POLLUTION _PATTERNS ) {
const match = text . match ( pattern ) ;
if ( match ) {
const start = Math . max ( 0 , ( match . index ? ? 0 ) - 80 ) ;
const end = Math . min ( text . length , ( match . index ? ? 0 ) + 160 ) ;
const sample = text
. slice ( start , end )
. replace ( /[^\x20-\x7E]+/g , " " )
. trim ( ) ;
samples . push ( { label , sample } ) ;
break ;
}
}
if ( samples . length >= 5 ) break ;
}
return {
inspected : existsSync ( manifestRoot ) ,
polluted : samples . length > 0 ,
samples ,
} ;
2026-05-05 13:29:28 +02:00
}
2026-05-06 06:22:09 +02:00
function inspectSiftWarmupArtifacts ( projectRoot ) {
const dirs = resolveSiftWarmupRuntimeDirs ( projectRoot ) ;
const artifactsRoot = join ( dirs . searchCache , "artifacts" ) ;
const artifactSampleLimit = 512 ;
const files = listFilesCapped ( artifactsRoot , artifactSampleLimit ) ;
let latestArtifactAt = null ;
let totalBytes = 0 ;
for ( const file of files ) {
try {
const stat = statSync ( file ) ;
totalBytes += stat . size ;
const mtime = new Date ( stat . mtimeMs ) . toISOString ( ) ;
if ( ! latestArtifactAt || mtime > latestArtifactAt ) {
latestArtifactAt = mtime ;
}
} catch {
// Best-effort observability only; marker reconciliation must not fail
// because a cache file changed while we were inspecting it.
}
}
return {
artifactCount : files . length ,
artifactCountCapped : files . length >= artifactSampleLimit ,
artifactSampleLimit ,
latestArtifactAt ,
cacheBytes : totalBytes ,
} ;
}
function finalizeSiftWarmupMarker ( projectRoot , markerPath , parsed , reason ) {
const artifacts = inspectSiftWarmupArtifacts ( projectRoot ) ;
const status = artifacts . artifactCount > 0 ? "completed" : "stale" ;
const reconciled = {
... parsed ,
schemaVersion : 3 ,
status ,
finishedAt : new Date ( ) . toISOString ( ) ,
terminalReason : reason ,
artifactCount : artifacts . artifactCount ,
artifactCountCapped : artifacts . artifactCountCapped ,
artifactSampleLimit : artifacts . artifactSampleLimit ,
latestArtifactAt : artifacts . latestArtifactAt ,
cacheBytes : artifacts . cacheBytes ,
} ;
try {
writeFileSync (
markerPath ,
` ${ JSON . stringify ( reconciled , null , 2 ) } \n ` ,
"utf-8" ,
) ;
} catch {
return null ;
}
return { ... reconciled , markerPath } ;
}
2026-05-05 13:29:28 +02:00
export function detectSift ( projectRoot , prefs , env = process . env ) {
2026-05-05 14:27:03 +02:00
if ( prefs ? . indexer _backend === "none" ) {
return {
backend : "sift" ,
status : "disabled" ,
reason : "codebase.indexer_backend is none" ,
} ;
}
const explicit = env . SIFT _PATH ? . trim ( ) ;
const binaryPath = resolveSiftBinary ( env ) ? ? undefined ;
if ( ! binaryPath ) {
return {
backend : "sift" ,
status : "missing" ,
reason :
"sift binary not found on PATH; set SIFT_PATH or install rupurt/sift." ,
} ;
}
if ( explicit && ! commandExists ( explicit , env ) ) {
return {
backend : "sift" ,
status : "missing" ,
command : explicit ,
binaryPath : explicit ,
reason : "SIFT_PATH is set but does not resolve to an executable file." ,
} ;
}
const warmup = readSiftWarmupMarker ( projectRoot ) ;
if ( warmup ? . status === "warming" ) {
const dirs = ensureSiftRuntimeDirs ( projectRoot ) ;
return {
backend : "sift" ,
status : "warming" ,
command : binaryPath ,
binaryPath ,
searchCache : dirs . searchCache ,
tmpDir : dirs . tmpDir ,
probePath : warmup . scope ? ? "." ,
reason : ` ${ explicit ? "sift binary resolved from SIFT_PATH" : "sift binary found on PATH" } ; repo-local Sift index warmup is still running ` ,
markerPath : warmup . markerPath ,
} ;
}
const health = runSiftHealthProbe ( projectRoot , binaryPath , env ) ;
if ( ! health . ok ) {
return {
backend : "sift" ,
status : "degraded" ,
command : binaryPath ,
binaryPath ,
searchCache : health . searchCache ,
tmpDir : health . tmpDir ,
probePath : health . probePath ,
reason : ` ${ explicit ? "sift binary resolved from SIFT_PATH" : "sift binary found on PATH" } but ${ health . reason } ` ,
} ;
}
const cacheInspection = inspectSiftCache ( projectRoot ) ;
if ( cacheInspection . polluted ) {
return {
backend : "sift" ,
status : "degraded" ,
command : binaryPath ,
binaryPath ,
searchCache : health . searchCache ,
tmpDir : health . tmpDir ,
probePath : health . probePath ,
cacheInspection ,
reason : ` ${ explicit ? "sift binary resolved from SIFT_PATH" : "sift binary found on PATH" } but repo-local Sift cache contains ignored/generated paths ` ,
} ;
}
return {
backend : "sift" ,
status : "configured" ,
command : binaryPath ,
binaryPath ,
searchCache : health . searchCache ,
tmpDir : health . tmpDir ,
probePath : health . probePath ,
cacheInspection ,
reason : ` ${ explicit ? "sift binary resolved from SIFT_PATH" : "sift binary found on PATH" } ; ${ health . reason } ` ,
} ;
2026-05-04 23:27:20 +02:00
}
function isFreshMarker ( markerPath , now , ttlMs ) {
2026-05-05 14:27:03 +02:00
try {
const stat = statSync ( markerPath ) ;
if ( now - stat . mtimeMs >= ttlMs ) return false ;
const parsed = JSON . parse ( readFileSync ( markerPath , "utf-8" ) ) ;
if ( parsed . schemaVersion === 3 ) {
if (
parsed . status === "warming" &&
parsed . pid &&
! isProcessAlive ( parsed . pid )
)
return false ;
return typeof parsed . scope === "string" && parsed . scope . length > 0 ;
}
return (
parsed . schemaVersion === 2 &&
Array . isArray ( parsed . args ) &&
parsed . args . at ( - 2 ) === "."
) ;
} catch {
return false ;
}
2026-05-04 23:27:20 +02:00
}
2026-05-05 13:29:28 +02:00
function readSiftWarmupMarker ( projectRoot ) {
2026-05-05 14:27:03 +02:00
const markerPath = join (
projectRoot ,
".sf" ,
"runtime" ,
"sift-index-warmup.json" ,
) ;
try {
if ( ! existsSync ( markerPath ) ) return null ;
const parsed = JSON . parse ( readFileSync ( markerPath , "utf-8" ) ) ;
if ( parsed . schemaVersion !== 3 ) return null ;
if ( parsed . status !== "warming" ) return null ;
2026-05-06 06:22:09 +02:00
if ( parsed . pid && ! isProcessAlive ( parsed . pid ) ) {
finalizeSiftWarmupMarker (
projectRoot ,
markerPath ,
parsed ,
` warmup pid ${ parsed . pid } is no longer running ` ,
) ;
return null ;
}
2026-05-05 14:27:03 +02:00
const started = Date . parse ( parsed . startedAt ) ;
const hardTimeoutSec = Number (
parsed . hardTimeoutSec ? ? DEFAULT _SIFT _WARMUP _HARD _TIMEOUT _SEC ,
) ;
const expiresAt =
started +
Math . max ( 60 , hardTimeoutSec + SIFT _WARMUP _KILL _GRACE _SEC ) * 1000 ;
2026-05-06 06:22:09 +02:00
if ( ! Number . isFinite ( started ) ) {
finalizeSiftWarmupMarker (
projectRoot ,
markerPath ,
parsed ,
"warmup marker has invalid startedAt" ,
) ;
return null ;
}
if ( Date . now ( ) > expiresAt ) {
finalizeSiftWarmupMarker (
projectRoot ,
markerPath ,
parsed ,
"warmup marker exceeded hard timeout window" ,
) ;
return null ;
}
2026-05-05 14:27:03 +02:00
return { ... parsed , markerPath } ;
} catch {
return null ;
}
2026-05-05 13:29:28 +02:00
}
function isProcessAlive ( pid ) {
2026-05-05 14:27:03 +02:00
try {
process . kill ( Number ( pid ) , 0 ) ;
return true ;
} catch {
return false ;
}
2026-05-05 13:29:28 +02:00
}
2026-05-04 23:27:20 +02:00
export function ensureSiftIndexWarmup ( projectRoot , prefs , options = { } ) {
2026-05-05 14:27:03 +02:00
const env = options . env ? ? process . env ;
const backendName = resolveEffectiveCodebaseIndexerBackendName (
projectRoot ,
prefs ,
env ,
) ;
if ( backendName !== "sift" ) {
return {
status : "skipped" ,
reason : ` effective codebase indexer is ${ backendName } ` ,
} ;
}
const detection = detectSift ( projectRoot , prefs , {
... env ,
SF _SIFT _HEALTHCHECK _DISABLE : "1" ,
} ) ;
if ( detection . status === "warming" ) {
return {
status : "skipped" ,
reason : "sift index warmup is already running" ,
markerPath : detection . markerPath ,
} ;
}
if (
! [ "configured" , "degraded" ] . includes ( detection . status ) ||
! detection . binaryPath
) {
return {
status : "unavailable" ,
reason : detection . reason ,
} ;
}
const markerPath = join (
projectRoot ,
".sf" ,
"runtime" ,
"sift-index-warmup.json" ,
) ;
const now = options . now ? ? Date . now ( ) ;
const ttlMs = options . ttlMs ? ? DEFAULT _SIFT _WARMUP _TTL _MS ;
if ( ! options . force && isFreshMarker ( markerPath , now , ttlMs ) ) {
return {
status : "skipped" ,
reason : "recent sift warmup marker exists" ,
markerPath ,
} ;
}
const scope = resolveSiftSearchScope ( projectRoot , options . scope ? ? "." ) ;
2026-05-15 09:45:49 +02:00
// ── Vector retriever hang workaround ─────────────────────────────────────
// When the embedding model (sentence-transformers/all-MiniLM-L6-v2) hangs
// during inference, page-index-hybrid with vector retriever stalls forever.
// Restrict retrievers to bm25+phrase and disable ML reranking so warmup
// completes without the vector path (#vector-hang-fix).
2026-05-05 14:27:03 +02:00
const siftArgs = [
"search" ,
"--json" ,
"--strategy" ,
"page-index-hybrid" ,
"--limit" ,
String ( options . limit ? ? DEFAULT _SIFT _WARMUP _LIMIT ) ,
"--retriever-timeout-ms" ,
String (
options . retrieverTimeoutMs ? ? DEFAULT _SIFT _WARMUP _RETRIEVER _TIMEOUT _MS ,
) ,
2026-05-15 09:45:49 +02:00
"--retrievers" ,
"bm25,phrase" ,
"--reranking" ,
"none" ,
2026-05-05 14:27:03 +02:00
scope ,
options . query ? ? DEFAULT _SIFT _WARMUP _QUERY ,
] ;
const hardTimeoutSec = resolveSiftWarmupHardTimeoutSec (
env ,
options . hardTimeoutSec ,
) ;
const wrapper =
hardTimeoutSec !== null
? resolveSiftWarmupTimeoutWrapper ( env , hardTimeoutSec )
: null ;
const command = wrapper ? wrapper . binary : detection . binaryPath ;
const args = wrapper
? [ ... wrapper . wrapperArgs , detection . binaryPath , ... siftArgs ]
: siftArgs ;
const startedReason = wrapper
? ` sift page-index-hybrid warmup started (hard cap ${ wrapper . timeoutSec } s via ${ wrapper . binary } ) `
: hardTimeoutSec === null
? "sift page-index-hybrid warmup started (hard cap disabled)"
: "sift page-index-hybrid warmup started (no timeout(1)/gtimeout on PATH; running unbounded)" ;
try {
const runtimeDirs = resolveSiftWarmupRuntimeDirs ( projectRoot ) ;
ensureSiftRuntimeDirs ( projectRoot ) ;
const childEnv = buildSiftEnv ( projectRoot , env ) ;
const marker = {
schemaVersion : 3 ,
status : "warming" ,
startedAt : new Date ( now ) . toISOString ( ) ,
command ,
cwd : projectRoot ,
args ,
scope ,
siftBinary : detection . binaryPath ,
hardTimeoutSec : wrapper ? . timeoutSec ? ? null ,
searchCache : runtimeDirs . searchCache ,
tmpDir : runtimeDirs . tmpDir ,
} ;
writeFileSync ( markerPath , ` ${ JSON . stringify ( marker , null , 2 ) } \n ` , "utf-8" ) ;
const child = ( options . spawnFn ? ? spawn ) ( command , args , {
cwd : projectRoot ,
env : childEnv ,
stdio : "ignore" ,
detached : true ,
} ) ;
marker . pid = child . pid ? ? null ;
writeFileSync ( markerPath , ` ${ JSON . stringify ( marker , null , 2 ) } \n ` , "utf-8" ) ;
child . unref ( ) ;
return {
status : "started" ,
reason : startedReason ,
command ,
args ,
markerPath ,
} ;
} catch ( err ) {
return {
status : "error" ,
refactor: replace all inline error message ternaries with getErrorMessage()
Eliminates ~120 repetitions of `err instanceof Error ? err.message : String(err)`
across the entire extension source tree. All callers now import and use
`getErrorMessage` from the canonical `./error-utils.js`.
Files updated (56 files):
- auto.js, auto-worktree.js, auto-recovery.js, auto-dashboard.js, auto-timers.js
- auto-prompts.js, auto-start.js, auto-post-unit.js, auto-model-selection.js
- auto/phases.js, auto/loop.js, auto/infra-errors.js
- autonomous-solver-eval.js, bootstrap/agent-end-recovery.js, bootstrap/db-tools.js
- bootstrap/exec-tools.js, bootstrap/journal-tools.js, bootstrap/register-extension.js
- bootstrap/register-hooks.js, canonical-milestone-plan.js, changelog.js
- clean-root-preflight.js, code-intelligence.js, commands-add-tests.js
- commands-debug.js, commands-eval-review.js, commands-handlers.js
- commands-maintenance.js, commands-pr-branch.js, commands-scan.js, commands-ship.js
- commands-todo.js, commands-worktree.js, definition-io.js, doctor.js
- doctor-config-checks.js, doctor-engine-checks.js, ecosystem/loader.js
- eval-review-schema.js, exec-sandbox.js, execution-instruction-guard.js
- graph-context.js, hook-emitter.js, index.js, learning/runtime.js
- lifecycle-hooks.js, onboarding-state.js, orphan-worktree-sweep.js
- planning-depth.js, quick.js, scaffold-keeper.js, sf-db/sf-db-core.js
- slice-cadence.js, sm-client.js, spec-projections.js, subagent/background-jobs.js
- subagent/isolation.js, sync-scheduler.js, tools/exec-tool.js
- tools/sift-search-tool.js, tools/workflow-tool-executors.js, ui/index.js
- uok/a2a-agent-server.js, uok/auto-dispatch.js, uok/auto-unit-closeout.js
- uok/auto-verification.js, uok/chaos-monkey.js, uok/gate-runner.js
- vault-resolver.js, workflow-install.js, workflow-plugins.js, worktree-manager.js
- worktree-resolver.js
Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-05-11 14:46:30 +02:00
reason : getErrorMessage ( err ) ,
2026-05-05 14:27:03 +02:00
command ,
args ,
markerPath ,
} ;
}
2026-05-04 23:27:20 +02:00
}
function buildSiftContextLines ( projectRoot , prefs , env = process . env ) {
2026-05-05 14:27:03 +02:00
const detection = detectSift ( projectRoot , prefs , env ) ;
const lines = [ ] ;
if ( detection . status === "disabled" ) {
lines . push (
"- Codebase indexer: disabled by `codebase.indexer_backend: none`." ,
) ;
} else if ( detection . status === "configured" && detection . binaryPath ) {
lines . push ( ` - Sift: configured as local CLI \` ${ detection . binaryPath } \` . ` ) ;
lines . push (
` - Sift cache: project-scoped at \` ${ detection . searchCache } \` ; do not use a shared/global Sift search database for this repo. ` ,
) ;
lines . push (
"- Use Sift with explicit, narrow paths after quick `grep`/`find`/`ls` orientation; avoid root-scope searches unless status proves they are responsive." ,
) ;
lines . push (
"- Tool: `sift_search` exposes the full Sift CLI surface — prefer direct `bm25`, `path-hybrid`, or `page-index-hybrid` with a scoped `path`." ,
) ;
lines . push (
"- Tool: `codebase_search` is the platform-level wrapper — use it only with a scoped `scope` when possible." ,
) ;
lines . push (
"- Strategy guide: `page-index-hybrid` (strongest recall + structural reranking), " +
"`path-hybrid` (filename/path-heavy), `bm25` (fast lexical-only), `vector` (semantic-only)." ,
) ;
lines . push (
"- If Sift is slow, empty, or times out, continue with native `grep`/`find`/`ls`, `lsp`, scout, and `.sf/CODEBASE.md` only as fallback context." ,
) ;
} else if ( detection . status === "warming" && detection . binaryPath ) {
lines . push (
` - Sift: installed at \` ${ detection . binaryPath } \` ; repo-local index warmup is running. ` ,
) ;
lines . push (
` - Sift cache: project-scoped at \` ${ detection . searchCache } \` ; do not use a shared/global Sift search database for this repo. ` ,
) ;
lines . push (
"- Use grep/find/ls and lsp for broad orientation while warmup runs. Use `.sf/CODEBASE.md` only as fallback context. Use narrow `sift_search` paths if needed; broad root-scope Sift may still be cold." ,
) ;
} else if ( detection . status === "degraded" && detection . binaryPath ) {
lines . push (
` - Sift: installed at \` ${ detection . binaryPath } \` but degraded for this repo: ${ detection . reason } . ` ,
) ;
lines . push (
` - Sift cache: project-scoped at \` ${ detection . searchCache } \` ; do not use a shared/global Sift search database for this repo. ` ,
) ;
lines . push (
"- Do not use broad Sift/codebase_search as the first exploration step. Prefer native `grep`/`find`/`ls`, lsp, and narrow `sift_search` only after reducing scope. Use `.sf/CODEBASE.md` only as fallback context." ,
) ;
} else {
lines . push (
"- Sift: not available. This is optional; continue with native `grep`/`find`/`ls`, `lsp`, scout, and `.sf/CODEBASE.md` only as fallback context." ,
) ;
lines . push (
"- To enable later: install `rupurt/sift` on PATH or set `SIFT_PATH` to the sift binary." ,
) ;
}
return lines ;
2026-05-04 23:27:20 +02:00
}
function buildNoCodebaseIndexerContextLines ( ) {
2026-05-05 14:27:03 +02:00
return [
"- Codebase indexer: disabled by `codebase.indexer_backend: none`; continue with native `grep`/`find`/`ls`, `lsp`, scout, and `.sf/CODEBASE.md` only as fallback context." ,
] ;
2026-05-04 23:27:20 +02:00
}
export function resolveCodebaseIndexerBackendName ( prefs ) {
2026-05-05 14:27:03 +02:00
if ( prefs ? . indexer _backend === "none" ) return "none" ;
return "sift" ;
2026-05-04 23:27:20 +02:00
}
2026-05-05 14:27:03 +02:00
export function resolveEffectiveCodebaseIndexerBackendName (
_projectRoot ,
prefs ,
_env = process . env ,
) {
if ( prefs ? . indexer _backend === "none" ) return "none" ;
return "sift" ;
2026-05-04 23:27:20 +02:00
}
export function getCodebaseIndexerBackend ( prefsOrName ) {
2026-05-05 14:27:03 +02:00
const name =
typeof prefsOrName === "string"
? prefsOrName
: resolveCodebaseIndexerBackendName ( prefsOrName ) ;
return CODEBASE _INDEXER _BACKENDS [ name ] ? ? SIFT _CODEBASE _INDEXER _BACKEND ;
2026-05-04 23:27:20 +02:00
}
export function detectCodebaseIndexer ( projectRoot , prefs , env = process . env ) {
2026-05-05 14:27:03 +02:00
const backendName = resolveEffectiveCodebaseIndexerBackendName (
projectRoot ,
prefs ,
env ,
) ;
return getCodebaseIndexerBackend ( backendName ) . detect ( projectRoot , prefs , env ) ;
}
export function formatCodebaseIndexerStatus (
projectRoot ,
prefs ,
env = process . env ,
) {
const backendName = resolveEffectiveCodebaseIndexerBackendName (
projectRoot ,
prefs ,
env ,
) ;
return getCodebaseIndexerBackend ( backendName ) . formatStatus (
projectRoot ,
prefs ,
env ,
) ;
}
export function buildCodeIntelligenceContextBlock (
projectRoot ,
prefs ,
env = process . env ,
) {
const backendName = resolveEffectiveCodebaseIndexerBackendName (
projectRoot ,
prefs ,
env ,
) ;
const lines = [
"[PROJECT CODE INTELLIGENCE]" ,
"" ,
"- Live code retrieval should use Sift when healthy. Use `.sf/CODEBASE.md` only as durable fallback context when Sift is unavailable, cold, degraded, or explicitly needed as a generated overview." ,
... getCodebaseIndexerBackend ( backendName ) . buildContextLines (
projectRoot ,
prefs ,
env ,
) ,
] ;
return ` \n \n ${ lines . join ( "\n" ) } ` ;
2026-05-04 23:27:20 +02:00
}
export function formatSiftStatus ( projectRoot , prefs , env = process . env ) {
2026-05-05 14:27:03 +02:00
const detection = detectSift ( projectRoot , prefs , env ) ;
const lines = [ "Sift Status" , "" ] ;
lines . push ( ` Status: ${ detection . status } ` ) ;
lines . push ( ` Reason: ${ detection . reason } ` ) ;
if ( detection . command ) lines . push ( ` Command: ${ detection . command } ` ) ;
if ( detection . binaryPath ) lines . push ( ` Binary: ${ detection . binaryPath } ` ) ;
if ( detection . searchCache )
lines . push ( ` Search cache: ${ detection . searchCache } ` ) ;
if ( detection . tmpDir ) lines . push ( ` Temp dir: ${ detection . tmpDir } ` ) ;
if ( detection . probePath )
lines . push ( ` Health probe scope: ${ detection . probePath } ` ) ;
if ( detection . markerPath )
lines . push ( ` Warmup marker: ${ detection . markerPath } ` ) ;
if ( detection . cacheInspection ? . polluted ) {
lines . push (
"Cache integrity: polluted - ignored/generated paths were found in repo-local Sift manifests." ,
) ;
for ( const sample of detection . cacheInspection . samples ? ? [ ] ) {
lines . push ( ` Cache sample ( ${ sample . label } ): ${ sample . sample } ` ) ;
}
lines . push (
"Action: remove .sf/runtime/sift/search-cache and warm Sift again from the repo root." ,
) ;
} else if ( detection . cacheInspection ? . inspected ) {
lines . push (
"Cache integrity: ok - no ignored/generated path samples found in inspected manifests." ,
) ;
}
if ( detection . status === "configured" && detection . command ) {
lines . push (
` Operational: ${ commandExists ( detection . command , env ) ? "yes - scoped health probe passed" : "no - configured command is missing" } ` ,
) ;
} else if ( detection . status === "warming" && detection . command ) {
lines . push (
"Operational: warming - binary exists and repo-local index warmup is running. Give Sift time on CPU before broad searches." ,
) ;
} else if ( detection . status === "degraded" && detection . command ) {
lines . push (
"Operational: degraded - binary exists, but the bounded scoped health probe failed. Use narrow paths or fallback search." ,
) ;
} else {
lines . push (
"Operational: no - install rupurt/sift on PATH or set SIFT_PATH." ,
) ;
}
lines . push ( "" ) ;
lines . push (
"Sift is optional. SF falls back to native grep/find/ls, lsp, scout, and CODEBASE.md only as fallback context when it is unavailable." ,
) ;
lines . push (
'When configured, agents should use `sift search --json <path> "<query>"`; `page-index-hybrid` is the strongest direct-search preset and `path-hybrid` is best for path-heavy queries.' ,
) ;
lines . push (
"SF runs Sift warmup with a project-scoped SIFT_SEARCH_CACHE under .sf/runtime/sift/ while leaving model cache shared." ,
) ;
return lines . join ( "\n" ) ;
2026-05-04 23:27:20 +02:00
}
function formatNoCodebaseIndexerStatus ( ) {
2026-05-05 14:27:03 +02:00
return [
"Codebase Indexer Status" ,
"" ,
"Status: disabled" ,
"Reason: codebase.indexer_backend is none" ,
"Operational: no - optional codebase indexer disabled." ,
"" ,
"SF will use native grep/find/ls, lsp, scout, and CODEBASE.md only as fallback context for codebase orientation." ,
] . join ( "\n" ) ;
2026-05-04 23:27:20 +02:00
}
export const SIFT _CODEBASE _INDEXER _BACKEND = {
2026-05-05 14:27:03 +02:00
name : "sift" ,
label : "Sift" ,
detect : detectSift ,
formatStatus : formatSiftStatus ,
buildContextLines : buildSiftContextLines ,
2026-05-04 23:27:20 +02:00
} ;
export const NO _CODEBASE _INDEXER _BACKEND = {
2026-05-05 14:27:03 +02:00
name : "none" ,
label : "None" ,
detect : ( ) => ( {
backend : "none" ,
status : "disabled" ,
reason : "codebase.indexer_backend is none" ,
} ) ,
formatStatus : formatNoCodebaseIndexerStatus ,
buildContextLines : buildNoCodebaseIndexerContextLines ,
2026-05-04 23:27:20 +02:00
} ;
export const CODEBASE _INDEXER _BACKENDS = {
2026-05-05 14:27:03 +02:00
sift : SIFT _CODEBASE _INDEXER _BACKEND ,
none : NO _CODEBASE _INDEXER _BACKEND ,
2026-05-04 23:27:20 +02:00
} ;