From 0c7aaafa002b4f68bb1c8847fa4bd3e1b053fb78 Mon Sep 17 00:00:00 2001 From: Mikael Hugo Date: Fri, 15 May 2026 17:36:15 +0200 Subject: [PATCH] feat(memory): enrich execute-task memory retrieval query MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously buildProjectMemoriesSection(`${sTitle} ${tTitle}`) sent two short strings to the cosine ranker — too sparse for re-ranking to do meaningful work against the static pool. buildMemoryRetrievalQuery() (new, exported for tests) enriches the query with: - slice.title + task.title (original signal) - slice.goal text, front 600 chars (the WHY of the slice — usually names the memory-relevant context the title can't fit) - top 20 changed files from git diff/status (the WHAT — what code is in play right now; lets cosine ranking promote memories whose content references those paths) Fail-open at each source: DB closed → no goal; not a git repo → no files; nullish title args don't poison the string. The call site never has to handle errors. Bounded so embedding token cost stays predictable: 600-char goal cap, 20-file cap. Empty inputs collapse to "" so the consumer's `if (!query.trim())` branch still picks the static fallback. Tests: 5 cases — titles always present, non-git directory safe, empty-input collapse, nullish-arg defensiveness, real git repo surfaces changed file paths. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/resources/extensions/sf/auto-prompts.js | 62 ++++++++++-- .../sf/tests/memory-retrieval-query.test.mjs | 97 +++++++++++++++++++ 2 files changed, 152 insertions(+), 7 deletions(-) create mode 100644 src/resources/extensions/sf/tests/memory-retrieval-query.test.mjs diff --git a/src/resources/extensions/sf/auto-prompts.js b/src/resources/extensions/sf/auto-prompts.js index f606d3466..9257ac635 100644 --- a/src/resources/extensions/sf/auto-prompts.js +++ b/src/resources/extensions/sf/auto-prompts.js @@ -1539,6 +1539,49 @@ async function buildProjectMemoriesSection(query, limit = 10) { return "## Project Memories\n(unavailable)"; } } + +/** + * Build a richer query string for getRelevantMemoriesRanked at the + * execute-task site. Cosine ranking on two short title strings is + * effectively keyword matching; injecting slice.goal text + current + * touched-file list materially improves the candidates that + * surface, especially for slices whose memory-relevant context is + * named in the goal but not in the title. + * + * Bounded so the embedding token cost stays predictable: + * - sliceGoal: ~600 chars (front of stored goal text) + * - touched files: top 20 paths from git status + diff + * + * Fail-open: every individual data source can fail (DB closed, not + * a git repo, etc.) and we still return the original title-based + * query so the call site doesn't have to handle errors. + * + * Exported for the dedicated unit test. + */ +export async function buildMemoryRetrievalQuery(basePath, mid, sid, sliceTitle, taskTitle) { + const parts = [String(sliceTitle ?? "").trim(), String(taskTitle ?? "").trim()]; + let sliceGoal = ""; + try { + const { isDbAvailable, getSlice } = await import("./sf-db.js"); + if (isDbAvailable()) { + sliceGoal = String(getSlice(mid, sid)?.goal ?? "").trim(); + } + } catch { + sliceGoal = ""; + } + if (sliceGoal) { + parts.push(sliceGoal.length > 600 ? sliceGoal.slice(0, 600) : sliceGoal); + } + try { + const { getChangedFilesWithContext } = await import("./diff-context.js"); + const changed = await getChangedFilesWithContext(basePath); + const paths = changed.slice(0, 20).map((c) => c.path); + if (paths.length > 0) parts.push(paths.join(" ")); + } catch { + // Not a git repo, or git unavailable — fine, just skip files. + } + return parts.filter((p) => p && p.length > 0).join("\n\n"); +} /** * Shared assembly for plan-slice and refine-slice prompts. Both builders need * the same inlined context (roadmap excerpt, slice context, research, decisions, @@ -1968,14 +2011,19 @@ export async function buildExecuteTaskPrompt( getGatesForTurn("execute-task"), { pending: new Set(etPending.map((g) => g.gate_id)), allowOmit: true }, ); - // Query-aware memory ranking: build a short query from the active task - // context so embeddings can promote semantically-relevant memories above - // the cold static-rank top. Falls back to pure static ranking when no - // gateway is configured or no embeddings exist yet — see + // Query-aware memory ranking: build a query from the active task + // context so embeddings can promote semantically-relevant memories + // above the cold static-rank top. Falls back to pure static ranking + // when no gateway is configured or no embeddings exist yet — see // getRelevantMemoriesRanked for the fallback chain. - const memoriesSection = await buildProjectMemoriesSection( - `${sTitle} ${tTitle}`, - ); + // + // The query is enriched beyond just titles: titles alone are too + // short for cosine ranking. We add the slice's stored goal text + // (the WHY) and the current touched-file list from git (the WHAT + // — what code is in play right now), bounded so we don't blow up + // the embedding tokens. + const memoryQuery = await buildMemoryRetrievalQuery(base, mid, sid, sTitle, tTitle); + const memoriesSection = await buildProjectMemoriesSection(memoryQuery); // SF ADR-011 P2: when the feature is enabled, teach the executor that it can // surface non-obvious choices via the `escalation` field on complete_task // rather than silently picking. Autonomous mode auto-accepts the recommendation diff --git a/src/resources/extensions/sf/tests/memory-retrieval-query.test.mjs b/src/resources/extensions/sf/tests/memory-retrieval-query.test.mjs new file mode 100644 index 000000000..9424c55d0 --- /dev/null +++ b/src/resources/extensions/sf/tests/memory-retrieval-query.test.mjs @@ -0,0 +1,97 @@ +/** + * Test buildMemoryRetrievalQuery — the memory retrieval query + * expansion at the execute-task site. + * + * Previously: getRelevantMemoriesRanked received only + * `slice.title + task.title`. Two short strings are too sparse for + * cosine ranking to do meaningful re-ranking against the static pool. + * + * Now: titles + slice.goal (front 600 chars) + touched-file paths + * (top 20 from git diff/status). This test pins: + * - titles are always present + * - slice goal is appended when DB has one (we stub by passing + * through to the live import; if DB is closed we fall back to + * titles-only without throwing) + * - non-git directories don't crash the path + * - bounded output: ~600 char cap on goal, 20-file cap + * + * The unit tests run against a tmp dir so we don't need an + * actual .sf/sf.db. + */ + +import assert from "node:assert/strict"; +import { mkdtempSync, rmSync, writeFileSync, mkdirSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { afterEach, test } from "vitest"; +import { buildMemoryRetrievalQuery } from "../auto-prompts.js"; + +const tempDirs = []; + +afterEach(() => { + for (const d of tempDirs) rmSync(d, { recursive: true, force: true }); + tempDirs.length = 0; +}); + +function mkdtemp() { + const d = mkdtempSync(join(tmpdir(), "sf-memquery-")); + tempDirs.push(d); + return d; +} + +test("titles always appear in the query", async () => { + const dir = mkdtemp(); + const q = await buildMemoryRetrievalQuery(dir, "M01", "S01", "slice title", "task title"); + assert.match(q, /slice title/); + assert.match(q, /task title/); +}); + +test("non-git directory doesn't crash", async () => { + const dir = mkdtemp(); + const q = await buildMemoryRetrievalQuery(dir, "M01", "S01", "A", "B"); + // Just titles, no goal (no DB), no files (no git) — still safe. + assert.equal(typeof q, "string"); + assert.match(q, /A/); + assert.match(q, /B/); +}); + +test("missing or empty inputs collapse cleanly", async () => { + const dir = mkdtemp(); + const q = await buildMemoryRetrievalQuery(dir, "", "", "", ""); + // All-empty inputs → empty-string output (the consumer's `if + // (!query.trim())` branch picks the static fallback). + assert.equal(q.trim(), ""); +}); + +test("nullish title args don't poison the query", async () => { + const dir = mkdtemp(); + const q = await buildMemoryRetrievalQuery(dir, null, undefined, undefined, "real task"); + // Only the present non-empty string survives. + assert.match(q, /real task/); + assert.ok(!q.includes("undefined")); + assert.ok(!q.includes("null")); +}); + +test("git changed files added when repo is present", async () => { + const { execFileSync } = await import("node:child_process"); + const dir = mkdtemp(); + execFileSync("git", ["init", "--quiet"], { cwd: dir }); + execFileSync("git", ["config", "user.email", "t@e.x"], { cwd: dir }); + execFileSync("git", ["config", "user.name", "t"], { cwd: dir }); + // Two committed files plus one untracked file in the working tree. + writeFileSync(join(dir, "kept.txt"), "kept content"); + mkdirSync(join(dir, "sub")); + writeFileSync(join(dir, "sub", "deep.txt"), "deep content"); + execFileSync("git", ["add", "."], { cwd: dir }); + execFileSync( + "git", + ["commit", "--quiet", "--allow-empty-message", "-m", ""], + { cwd: dir }, + ); + writeFileSync(join(dir, "kept.txt"), "modified content"); + writeFileSync(join(dir, "new.txt"), "untracked"); + const q = await buildMemoryRetrievalQuery(dir, "M01", "S01", "title", "task"); + // At least one of the changed files surfaces in the query. + const sawAny = ["kept.txt", "new.txt"].some((p) => q.includes(p)); + assert.ok(sawAny, `expected changed files in query, got: ${q}`); +});