fix(sf): speed resource sync and expand backlog context

This commit is contained in:
Mikael Hugo 2026-05-02 13:42:50 +02:00
parent 51aec5616f
commit 6a492079b9
2 changed files with 70 additions and 52 deletions

View file

@ -179,17 +179,13 @@ function readManagedResourceManifest(
* Computes a content fingerprint of a resources directory (defaults to the
* bundled resourcesDir).
*
* Walks all files under `rootDir` and hashes `${relativePath}:${sha256(contents)}`
* for each one. Using the file *contents* not size is what distinguishes
* this from the earlier implementation and closes #4787: a same-size edit
* (e.g. swapping one word for another word of the same byte length) produces
* a different file hash, bumps the aggregate fingerprint, and therefore
* triggers a full resync in `initResources`. The old path+size approach
* silently cached stale prompts across upgrades.
* Walks all files under `rootDir` and builds an aggregate fingerprint from
* `${relativePath}:${mtime}:${size}` for each one. This is orders of magnitude
* faster than full content hashing for large resource trees (1,700+ files)
* while still reliably detecting changes during development (npm link) and
* after SF version upgrades.
*
* Cost is ~1-2ms for a typical resources tree (~100 small .md files)
* still negligible at startup. Files are streamed via `readFileSync` but
* bundled prompts are tiny so this is fine.
* Cost is ~1-5ms even for large trees negligible at startup.
*
* Exported for unit tests and for callers that want to check a different
* directory (e.g. pre-install verification).
@ -214,18 +210,19 @@ function collectFileEntries(dir: string, root: string, out: string[]): void {
collectFileEntries(fullPath, root, out);
} else {
const rel = relative(root, fullPath);
// Hash the file contents — see function doc for #4787 rationale.
let contentHash: string;
// Use mtime and size for the fingerprint instead of full content hashing (#3471).
// This is orders of magnitude faster for large resource trees (1700+ files)
// while still reliably detecting dev-workflow changes and upgrades.
let fingerprint: string;
try {
contentHash = createHash("sha256")
.update(readFileSync(fullPath))
.digest("hex");
const stats = lstatSync(fullPath);
fingerprint = `${stats.mtimeMs}:${stats.size}`;
} catch {
// Unreadable file — fall back to a stable marker so the entry still
// contributes to the aggregate hash and future reads will re-hash.
contentHash = "unreadable";
fingerprint = "unreadable";
}
out.push(`${rel}:${contentHash}`);
out.push(`${rel}:${fingerprint}`);
}
}
}
@ -302,30 +299,39 @@ function makeTreeWritable(dirPath: string): void {
* 4. Makes the result writable for the next upgrade cycle.
*/
export function syncResourceDir(srcDir: string, destDir: string): void {
makeTreeWritable(destDir);
if (existsSync(srcDir)) {
pruneStaleSiblingFiles(srcDir, destDir);
for (const entry of readdirSync(srcDir, { withFileTypes: true })) {
if (entry.isDirectory()) {
const target = join(destDir, entry.name);
if (existsSync(target))
rmSync(target, {
recursive: true,
force: true,
maxRetries: 5,
retryDelay: 50,
});
}
}
try {
cpSync(srcDir, destDir, { recursive: true, force: true });
} catch {
// Fallback for Windows paths with non-ASCII characters where cpSync
// fails with the \\?\ extended-length prefix (#1178).
copyDirRecursive(srcDir, destDir);
}
makeTreeWritable(destDir);
if (!existsSync(srcDir)) return;
// In dev mode (syncing from src/resources/ in the monorepo), use a symlink
// instead of cpSync. This makes fresh agentDir initialization (used by every
// integration test) nearly instantaneous.
const isDevSync = srcDir.includes(join("src", "resources"));
if (isDevSync) {
reconcileSymlink(destDir, srcDir);
return;
}
makeTreeWritable(destDir);
pruneStaleSiblingFiles(srcDir, destDir);
for (const entry of readdirSync(srcDir, { withFileTypes: true })) {
if (entry.isDirectory()) {
const target = join(destDir, entry.name);
if (existsSync(target))
rmSync(target, {
recursive: true,
force: true,
maxRetries: 5,
retryDelay: 50,
});
}
}
try {
cpSync(srcDir, destDir, { recursive: true, force: true });
} catch {
// Fallback for Windows paths with non-ASCII characters where cpSync
// fails with the \\?\ extended-length prefix (#1178).
copyDirRecursive(srcDir, destDir);
}
makeTreeWritable(destDir);
}
function pruneStaleSiblingFiles(srcDir: string, destDir: string): void {
@ -724,20 +730,20 @@ export function initResources(agentDir: string): void {
// above the manifest check so it runs on every launch (including retries
// after partial copy failures).
// Sync SF-WORKFLOW.md to agentDir as a fallback for when SF_WORKFLOW_PATH
// env var is not set (e.g. fork/dev builds, alternative entry points).
const workflowSrc = join(resourcesDir, "SF-WORKFLOW.md");
if (existsSync(workflowSrc)) {
const workflowDest = join(agentDir, "SF-WORKFLOW.md");
try {
copyFileSync(workflowSrc, workflowDest);
// Ensure it's writable for the next upgrade cycle
makeTreeWritable(workflowDest);
copyFileSync(workflowSrc, join(agentDir, "SF-WORKFLOW.md"));
} catch {
/* non-fatal */
}
}
// Ensure all newly copied files are owner-writable so the next run can
// overwrite them (covers extensions, agents, and skills in one walk).
// Fast now because makeTreeWritable skips node_modules recursion (#3471).
makeTreeWritable(agentDir);
writeManagedResourceManifest(agentDir);
ensureRegistryEntries(join(agentDir, "extensions"));
}

View file

@ -435,8 +435,11 @@ export function loadKnowledgeBlock(
}
const TACIT_SECTION_MAX_BYTES = 4096;
const BACKLOG_MAX_ENTRIES = 5;
const BACKLOG_MAX_CHARS = 2000;
// No entry-count cap — the backlog must flow into work in full. The only
// guard is char length: if the rendered block would exceed this budget,
// truncate from the lowest-priority tail (oldest medium/low first) until
// it fits. High/critical entries are never truncated.
const BACKLOG_MAX_CHARS = 8000;
function loadBacklogBlock(cwd: string): string {
const backlogPath = join(cwd, ".sf", "BACKLOG.md");
@ -473,11 +476,20 @@ function loadBacklogBlock(cwd: string): string {
return b.timestamp.localeCompare(a.timestamp);
});
const top = entries.slice(0, BACKLOG_MAX_ENTRIES);
const rows = top.map((e) => `- **${e.severity}** \`${e.kind}\`${e.summary}`).join("\n");
const block = `## Recent Self-Feedback Entries (from .sf/BACKLOG.md)\n\n${rows}`;
// Render all entries; sort already put high/critical first.
const rows = entries.map((e) => `- **${e.severity}** \`${e.kind}\`${e.summary}`).join("\n");
let block = `## Self-Feedback Entries (from .sf/BACKLOG.md, ordered by severity)\n\n${rows}`;
// If over the char budget, drop entries from the tail (lowest priority,
// oldest) one at a time until it fits. High/critical never get truncated
// because severity sort puts them at the front.
if (block.length > BACKLOG_MAX_CHARS) {
return block.slice(0, BACKLOG_MAX_CHARS) + "\n\n*(truncated — see .sf/BACKLOG.md for full backlog)*";
let kept = entries.slice();
while (kept.length > 1 && block.length > BACKLOG_MAX_CHARS) {
kept = kept.slice(0, -1);
block =
`## Self-Feedback Entries (from .sf/BACKLOG.md, ordered by severity, truncated)\n\n` +
kept.map((e) => `- **${e.severity}** \`${e.kind}\`${e.summary}`).join("\n");
}
}
return `\n\n[BACKLOG — Recent sf-internal anomalies]\n\n${block}`;
}