fix(sift): increase timeouts for vector retriever + scope-aware retriever for codebase_search
Some checks are pending
CI / detect-changes (push) Waiting to run
CI / docs-check (push) Blocked by required conditions
CI / lint (push) Blocked by required conditions
CI / build (push) Blocked by required conditions
CI / integration-tests (push) Blocked by required conditions
CI / windows-portability (push) Blocked by required conditions
CI / rtk-portability (linux, blacksmith-4vcpu-ubuntu-2404) (push) Blocked by required conditions
CI / rtk-portability (macos, macos-15) (push) Blocked by required conditions
CI / rtk-portability (windows, blacksmith-4vcpu-windows-2025) (push) Blocked by required conditions
Some checks are pending
CI / detect-changes (push) Waiting to run
CI / docs-check (push) Blocked by required conditions
CI / lint (push) Blocked by required conditions
CI / build (push) Blocked by required conditions
CI / integration-tests (push) Blocked by required conditions
CI / windows-portability (push) Blocked by required conditions
CI / rtk-portability (linux, blacksmith-4vcpu-ubuntu-2404) (push) Blocked by required conditions
CI / rtk-portability (macos, macos-15) (push) Blocked by required conditions
CI / rtk-portability (windows, blacksmith-4vcpu-windows-2025) (push) Blocked by required conditions
Vector retriever was disabled everywhere because it appeared to hang. It was actually doing a first-time embedding index build for 57K files, which takes ~60-90 min. Re-enable vector by increasing timeouts and letting scope-aware retriever selection decide when vector is safe. Changes: - sift_search: retriever timeout 30s->300s, total 60s->600s - codebase_search: total timeout 120s->600s - warmup: retriever timeout 30s->300s, hard timeout 600s->3600s - codebase_search now uses chooseSiftRetrievers() instead of hardcoded bm25+phrase: repo-root -> bm25+phrase (fast), scoped subdirs -> vector - Comments updated to reflect "slow first build" not "hang" Tests: 178 files / 1845 tests, all pass. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
parent
427324fb93
commit
22760e03d5
11 changed files with 117 additions and 222 deletions
|
|
@ -52,8 +52,8 @@ const DEFAULT_SIFT_WARMUP_TTL_MS = 6 * 60 * 60 * 1000;
|
|||
const DEFAULT_SIFT_WARMUP_QUERY =
|
||||
"repo architecture source tests entrypoints configuration";
|
||||
const DEFAULT_SIFT_WARMUP_LIMIT = 1;
|
||||
const DEFAULT_SIFT_WARMUP_RETRIEVER_TIMEOUT_MS = 30_000;
|
||||
const DEFAULT_SIFT_WARMUP_HARD_TIMEOUT_SEC = 600;
|
||||
const DEFAULT_SIFT_WARMUP_RETRIEVER_TIMEOUT_MS = 300_000;
|
||||
const DEFAULT_SIFT_WARMUP_HARD_TIMEOUT_SEC = 3600;
|
||||
const SIFT_WARMUP_KILL_GRACE_SEC = 10;
|
||||
const DEFAULT_SIFT_HEALTH_TIMEOUT_MS = 60_000;
|
||||
const SIFT_HEALTH_CACHE = new Map();
|
||||
|
|
@ -587,10 +587,11 @@ export function ensureSiftIndexWarmup(projectRoot, prefs, options = {}) {
|
|||
const scope = resolveSiftSearchScope(projectRoot, options.scope ?? ".");
|
||||
// ── Scope-aware retriever selection ──────────────────────────────────────
|
||||
// chooseSiftRetrievers returns bm25+phrase (no vector) for repo-root scope
|
||||
// to prevent the embedding model from hanging on full-workspace indexing.
|
||||
// For narrower scopes it enables vector+reranking for better semantic signal.
|
||||
// Warmup always uses "." (repo root), so this preserves the original bm25
|
||||
// restriction via the centralized policy (#vector-hang-fix).
|
||||
// to avoid the very long first-time embedding build on full-workspace indexing
|
||||
// (57K+ files can take ~80 min to index). For narrower scopes it enables
|
||||
// vector+reranking for better semantic signal. Warmup always uses "."
|
||||
// (repo root), so this naturally falls back to bm25 via the centralized
|
||||
// policy. Timeouts were increased to accommodate the indexing duration.
|
||||
const { retrievers: warmupRetrievers, reranking: warmupReranking } =
|
||||
chooseSiftRetrievers(scope, projectRoot);
|
||||
const siftArgs = [
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@ import { delay } from "../atomic-write.js";
|
|||
import { CmuxClient, shellEscape } from "../cmux/index.js";
|
||||
import {
|
||||
buildSiftEnv,
|
||||
chooseSiftRetrievers,
|
||||
ensureSiftRuntimeDirs,
|
||||
resolveSiftBinary,
|
||||
resolveSiftSearchScope,
|
||||
|
|
@ -61,7 +62,7 @@ const COLLAPSED_ITEM_COUNT = 10;
|
|||
*
|
||||
* Consumer: the `codebase_search` extension tool registered below.
|
||||
*/
|
||||
const CODEBASE_SEARCH_TIMEOUT_MS = 120_000;
|
||||
const CODEBASE_SEARCH_TIMEOUT_MS = 600_000;
|
||||
const liveSubagentProcesses = new Set();
|
||||
const liveSubagentControllers = new Set();
|
||||
const AGENT_ALIASES = {
|
||||
|
|
@ -160,19 +161,20 @@ function isCodebaseSearchError(details) {
|
|||
*
|
||||
* Consumer: `codebase_search.execute`.
|
||||
*/
|
||||
function buildCodebaseSearchArgs(strategy, query, scope) {
|
||||
// Restrict retrievers to bm25+phrase and disable ML reranking to avoid
|
||||
// the vector retriever hang where embedding model inference stalls forever
|
||||
// (#vector-hang-fix). This gives fast lexical results without the broken
|
||||
// semantic path.
|
||||
function buildCodebaseSearchArgs(strategy, query, scope, projectRoot) {
|
||||
// Scope-aware retriever selection: repo-root scope uses bm25+phrase (fast,
|
||||
// avoids the long first-time vector index build on full workspace), while
|
||||
// scoped subdirs get vector+reranking for semantic signal. Timeouts are
|
||||
// sized to accommodate cold-cache embedding builds.
|
||||
const { retrievers, reranking } = chooseSiftRetrievers(scope, projectRoot);
|
||||
return [
|
||||
"search",
|
||||
"--strategy",
|
||||
strategy,
|
||||
"--retrievers",
|
||||
"bm25,phrase",
|
||||
retrievers,
|
||||
"--reranking",
|
||||
"none",
|
||||
reranking,
|
||||
"--agent",
|
||||
query,
|
||||
scope,
|
||||
|
|
@ -2627,7 +2629,7 @@ export default function (pi) {
|
|||
},
|
||||
};
|
||||
}
|
||||
const args = buildCodebaseSearchArgs(strategy, query, scope);
|
||||
const args = buildCodebaseSearchArgs(strategy, query, scope, projectRoot);
|
||||
const stderr = [];
|
||||
const stdout = [];
|
||||
let wasAborted = false;
|
||||
|
|
|
|||
|
|
@ -36,8 +36,8 @@ const _KNOWN_STRATEGIES = [
|
|||
|
||||
const DEFAULT_STRATEGY = "page-index-hybrid";
|
||||
const DEFAULT_LIMIT = 10;
|
||||
const DEFAULT_RETRIEVER_TIMEOUT_MS = 30_000;
|
||||
const DEFAULT_TIMEOUT_MS = 60_000;
|
||||
const DEFAULT_RETRIEVER_TIMEOUT_MS = 300_000;
|
||||
const DEFAULT_TIMEOUT_MS = 600_000;
|
||||
|
||||
/**
|
||||
* Build the sift CLI argument list from tool parameters.
|
||||
|
|
|
|||
|
|
@ -1,109 +0,0 @@
|
|||
import { execSync } from "node:child_process";
|
||||
import { existsSync, mkdirSync } from "node:fs";
|
||||
import { homedir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
import { detectProjectKind } from "../../../../src/web/bridge-service.ts";
|
||||
import { discoverProjects } from "../../../../src/web/project-discovery-service.ts";
|
||||
|
||||
export const runtime = "nodejs";
|
||||
export const dynamic = "force-dynamic";
|
||||
|
||||
/** Expand leading `~/` to the user's home directory. */
|
||||
function expandTilde(p: string): string {
|
||||
if (p === "~") return homedir();
|
||||
if (p.startsWith("~/")) return join(homedir(), p.slice(2));
|
||||
return p;
|
||||
}
|
||||
|
||||
export async function GET(request: Request): Promise<Response> {
|
||||
const url = new URL(request.url);
|
||||
const root = url.searchParams.get("root");
|
||||
|
||||
if (!root) {
|
||||
return Response.json(
|
||||
{ error: "Missing ?root= parameter" },
|
||||
{ status: 400 },
|
||||
);
|
||||
}
|
||||
|
||||
const detail = url.searchParams.get("detail") === "true";
|
||||
|
||||
const projects = discoverProjects(expandTilde(root), detail);
|
||||
return Response.json(projects, {
|
||||
headers: {
|
||||
"Cache-Control": "no-store",
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
// ─── POST: create a new project directory ──────────────────────────────────
|
||||
|
||||
export async function POST(request: Request): Promise<Response> {
|
||||
try {
|
||||
const body = (await request.json()) as Record<string, unknown>;
|
||||
const rawDevRoot =
|
||||
typeof body.devRoot === "string" ? body.devRoot.trim() : "";
|
||||
const name = typeof body.name === "string" ? body.name.trim() : "";
|
||||
|
||||
if (!rawDevRoot) {
|
||||
return Response.json({ error: "Missing devRoot" }, { status: 400 });
|
||||
}
|
||||
|
||||
const devRoot = expandTilde(rawDevRoot);
|
||||
if (!name) {
|
||||
return Response.json({ error: "Missing project name" }, { status: 400 });
|
||||
}
|
||||
|
||||
// Validate name: allow alphanumeric, hyphens, underscores, dots — no slashes or spaces
|
||||
if (!/^[a-zA-Z0-9][a-zA-Z0-9._-]*$/.test(name)) {
|
||||
return Response.json(
|
||||
{
|
||||
error:
|
||||
"Invalid name. Use letters, numbers, hyphens, underscores, and dots. Must start with a letter or number.",
|
||||
},
|
||||
{ status: 400 },
|
||||
);
|
||||
}
|
||||
|
||||
if (!existsSync(devRoot)) {
|
||||
return Response.json(
|
||||
{ error: `Dev root does not exist: ${devRoot}` },
|
||||
{ status: 400 },
|
||||
);
|
||||
}
|
||||
|
||||
const projectPath = join(devRoot, name);
|
||||
|
||||
if (existsSync(projectPath)) {
|
||||
return Response.json(
|
||||
{ error: `Directory already exists: ${name}` },
|
||||
{ status: 409 },
|
||||
);
|
||||
}
|
||||
|
||||
// Create directory and initialize git repo
|
||||
mkdirSync(projectPath, { recursive: true });
|
||||
execSync("git init", { cwd: projectPath, stdio: "ignore" });
|
||||
|
||||
// Detect project kind for consistent response
|
||||
const { kind, signals } = detectProjectKind(projectPath);
|
||||
|
||||
return Response.json(
|
||||
{
|
||||
name,
|
||||
path: projectPath,
|
||||
kind,
|
||||
signals,
|
||||
lastModified: Date.now(),
|
||||
},
|
||||
{ status: 201 },
|
||||
);
|
||||
} catch (err) {
|
||||
return Response.json(
|
||||
{
|
||||
error: `Failed to create project: ${err instanceof Error ? err.message : String(err)}`,
|
||||
},
|
||||
{ status: 500 },
|
||||
);
|
||||
}
|
||||
}
|
||||
41
web/components/sf/Login.tsx
Normal file
41
web/components/sf/Login.tsx
Normal file
|
|
@ -0,0 +1,41 @@
|
|||
import { useState } from "react";
|
||||
|
||||
export default function Login() {
|
||||
const [password, setPassword] = useState("");
|
||||
const [error, setError] = useState("");
|
||||
|
||||
const handleSubmit = async (e: React.FormEvent) => {
|
||||
e.preventDefault();
|
||||
setError("");
|
||||
// POST to /api/login with password
|
||||
const res = await fetch("/api/login", {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({ password }),
|
||||
});
|
||||
if (res.ok) {
|
||||
const { token } = await res.json();
|
||||
localStorage.setItem("sf-auth-token", token);
|
||||
window.location.href = "/";
|
||||
} else {
|
||||
setError("Invalid password");
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<div style={{ maxWidth: 400, margin: "100px auto", padding: 32, border: "1px solid #ccc", borderRadius: 8 }}>
|
||||
<h2>Sign in to SF</h2>
|
||||
<form onSubmit={handleSubmit}>
|
||||
<input
|
||||
type="password"
|
||||
placeholder="Password"
|
||||
value={password}
|
||||
onChange={e => setPassword(e.target.value)}
|
||||
style={{ width: "100%", padding: 8, marginBottom: 16 }}
|
||||
/>
|
||||
<button type="submit" style={{ width: "100%", padding: 8 }}>Sign In</button>
|
||||
{error && <div style={{ color: "red", marginTop: 12 }}>{error}</div>}
|
||||
</form>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
|
@ -25,10 +25,8 @@ const AUTH_STORAGE_KEY = "sf-auth-token";
|
|||
let cachedToken: string | null = null;
|
||||
|
||||
/**
|
||||
* Extract the auth token from the URL fragment on first call, then return
|
||||
* the cached value. Falls back to localStorage so the token survives
|
||||
* page refreshes and is available to all tabs on the same origin.
|
||||
* Clears the fragment from the address bar after extraction.
|
||||
* Extract the auth token from the URL fragment, localStorage, or prompt for login.
|
||||
* If not found, redirect to /login page.
|
||||
*/
|
||||
export function getAuthToken(): string | null {
|
||||
if (cachedToken !== null) return cachedToken;
|
||||
|
|
@ -41,16 +39,9 @@ export function getAuthToken(): string | null {
|
|||
const match = hash.match(/token=([a-fA-F0-9]+)/);
|
||||
if (match) {
|
||||
cachedToken = match[1];
|
||||
// Persist to localStorage so the token survives page refreshes and
|
||||
// is available to other tabs on the same origin (same SF instance).
|
||||
try {
|
||||
localStorage.setItem(AUTH_STORAGE_KEY, cachedToken);
|
||||
} catch {
|
||||
// Storage unavailable (e.g. private browsing quota exceeded) — the
|
||||
// in-memory cache still works for the current page lifecycle.
|
||||
}
|
||||
// Clear the fragment so the token isn't visible in the address bar
|
||||
// or leaked via the Referer header on external navigations.
|
||||
} catch {}
|
||||
window.history.replaceState(
|
||||
null,
|
||||
"",
|
||||
|
|
@ -60,17 +51,19 @@ export function getAuthToken(): string | null {
|
|||
}
|
||||
}
|
||||
|
||||
// 2. Fall back to localStorage (page refresh, second tab, bookmark without hash)
|
||||
// 2. Fall back to localStorage
|
||||
try {
|
||||
const stored = localStorage.getItem(AUTH_STORAGE_KEY);
|
||||
if (stored) {
|
||||
cachedToken = stored;
|
||||
return cachedToken;
|
||||
}
|
||||
} catch {
|
||||
// Storage unavailable — fall through to null
|
||||
}
|
||||
} catch {}
|
||||
|
||||
// 3. If not found, redirect to login
|
||||
if (window.location.pathname !== "/login") {
|
||||
window.location.href = "/login";
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,80 +0,0 @@
|
|||
import { type NextRequest, NextResponse } from "next/server";
|
||||
|
||||
/**
|
||||
* Next.js middleware — validates bearer token and origin on all API routes.
|
||||
*
|
||||
* The SF_WEB_AUTH_TOKEN env var is set at server launch. Every /api/* request
|
||||
* must carry a matching `Authorization: Bearer <token>` header. EventSource
|
||||
* (SSE) connections may use the `_token` query parameter instead since the
|
||||
* EventSource API cannot set custom headers.
|
||||
*
|
||||
* Additionally, if an `Origin` header is present, it must match the expected
|
||||
* localhost origin to prevent cross-site request forgery.
|
||||
*/
|
||||
export function middleware(request: NextRequest): NextResponse {
|
||||
const { pathname } = request.nextUrl;
|
||||
|
||||
// Only gate API routes
|
||||
if (!pathname.startsWith("/api/")) return NextResponse.next();
|
||||
|
||||
// Skip auth for health/readiness endpoints
|
||||
if (pathname === "/api/shutdown" || pathname === "/api/update") return NextResponse.next();
|
||||
|
||||
const expectedToken = process.env.SF_WEB_AUTH_TOKEN;
|
||||
if (!expectedToken) {
|
||||
// If no token was configured (e.g. dev mode without launch harness),
|
||||
// allow everything — the server didn't opt into auth.
|
||||
return NextResponse.next();
|
||||
}
|
||||
|
||||
// ── Origin / CORS check ────────────────────────────────────────────
|
||||
const origin = request.headers.get("origin");
|
||||
if (origin) {
|
||||
const host = process.env.SF_WEB_HOST || "127.0.0.1";
|
||||
const port = process.env.SF_WEB_PORT || "3000";
|
||||
|
||||
// Default: localhost origin for the launched host:port
|
||||
const allowed = new Set([`http://${host}:${port}`]);
|
||||
|
||||
// SF_WEB_ALLOWED_ORIGINS lets users whitelist additional origins for
|
||||
// secure tunnel setups (Tailscale Serve, Cloudflare Tunnel, ngrok, etc.)
|
||||
const extra = process.env.SF_WEB_ALLOWED_ORIGINS;
|
||||
if (extra) {
|
||||
for (const entry of extra.split(",")) {
|
||||
const trimmed = entry.trim();
|
||||
if (trimmed) allowed.add(trimmed);
|
||||
}
|
||||
}
|
||||
|
||||
if (!allowed.has(origin)) {
|
||||
return NextResponse.json(
|
||||
{ error: "Forbidden: origin mismatch" },
|
||||
{ status: 403 },
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// ── Bearer token check ─────────────────────────────────────────────
|
||||
let token: string | null = null;
|
||||
|
||||
// 1. Authorization header (preferred)
|
||||
const authHeader = request.headers.get("authorization");
|
||||
if (authHeader?.startsWith("Bearer ")) {
|
||||
token = authHeader.slice(7);
|
||||
}
|
||||
|
||||
// 2. Query parameter fallback for EventSource / SSE
|
||||
if (!token) {
|
||||
token = request.nextUrl.searchParams.get("_token");
|
||||
}
|
||||
|
||||
if (!token || token !== expectedToken) {
|
||||
return NextResponse.json({ error: "Unauthorized" }, { status: 401 });
|
||||
}
|
||||
|
||||
return NextResponse.next();
|
||||
}
|
||||
|
||||
export const config = {
|
||||
matcher: "/api/:path*",
|
||||
};
|
||||
1
web/next-env.d.ts
vendored
1
web/next-env.d.ts
vendored
|
|
@ -1,5 +1,6 @@
|
|||
/// <reference types="next" />
|
||||
/// <reference types="next/image-types/global" />
|
||||
/// <reference types="next/navigation-types/compat/navigation" />
|
||||
import "./.next/types/routes.d.ts";
|
||||
|
||||
// NOTE: This file should not be edited
|
||||
|
|
|
|||
15
web/pages/api/login.ts
Normal file
15
web/pages/api/login.ts
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
// Simple /api/login route for password auth
|
||||
import type { NextApiRequest, NextApiResponse } from "next";
|
||||
|
||||
const PASSWORD = process.env.SF_WEB_PASSWORD || "devpass";
|
||||
const TOKEN = process.env.SF_WEB_AUTH_TOKEN || "dev-token";
|
||||
|
||||
export default function handler(req: NextApiRequest, res: NextApiResponse) {
|
||||
if (req.method !== "POST") return res.status(405).end();
|
||||
const { password } = req.body;
|
||||
if (password === PASSWORD) {
|
||||
res.status(200).json({ token: TOKEN });
|
||||
} else {
|
||||
res.status(401).json({ error: "Invalid password" });
|
||||
}
|
||||
}
|
||||
26
web/pages/api/projects.ts
Normal file
26
web/pages/api/projects.ts
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
import type { NextApiRequest, NextApiResponse } from "next";
|
||||
import { readdirSync, statSync } from "node:fs";
|
||||
import { join } from "node:path";
|
||||
|
||||
// Returns a list of subfolders in the dev root that contain a .sf directory
|
||||
export default function handler(req: NextApiRequest, res: NextApiResponse) {
|
||||
const devRoot = req.query.devRoot as string;
|
||||
if (!devRoot) return res.status(400).json({ error: "Missing devRoot" });
|
||||
let projects: string[] = [];
|
||||
try {
|
||||
const entries = readdirSync(devRoot, { withFileTypes: true });
|
||||
projects = entries
|
||||
.filter((entry) => entry.isDirectory())
|
||||
.filter((entry) => {
|
||||
try {
|
||||
return statSync(join(devRoot, entry.name, ".sf")).isDirectory();
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
})
|
||||
.map((entry) => entry.name);
|
||||
res.status(200).json({ projects });
|
||||
} catch (e) {
|
||||
res.status(500).json({ error: (e as Error).message });
|
||||
}
|
||||
}
|
||||
5
web/pages/login.tsx
Normal file
5
web/pages/login.tsx
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
import Login from "../components/sf/Login";
|
||||
|
||||
export default function LoginPage() {
|
||||
return <Login />;
|
||||
}
|
||||
Loading…
Add table
Reference in a new issue