feat(memory): enrich execute-task memory retrieval query

Previously buildProjectMemoriesSection(`${sTitle} ${tTitle}`) sent two short strings to the cosine ranker — too sparse for re-ranking to do meaningful work against the static pool. buildMemoryRetrievalQuery() (new, exported for tests) enriches the query with: - slice.title + task.title (original signal) - slice.goal text, front 600 chars (the WHY of the slice — usually names the memory-relevant context the title can't fit) - top 20 changed files from git diff/status (the WHAT — what code is in play right now; lets cosine ranking promote memories whose content references those paths) Fail-open at each source: DB closed → no goal; not a git repo → no files; nullish title args don't poison the string. The call site never has to handle errors. Bounded so embedding token cost stays predictable: 600-char goal cap, 20-file cap. Empty inputs collapse to "" so the consumer's `if (!query.trim())` branch still picks the static fallback. Tests: 5 cases — titles always present, non-git directory safe, empty-input collapse, nullish-arg defensiveness, real git repo surfaces changed file paths. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-15 17:36:15 +02:00 · 2026-05-15 17:36:15 +02:00 · 0c7aaafa00
commit 0c7aaafa00
parent 362af3d6a4
2 changed files with 152 additions and 7 deletions
--- a/src/resources/extensions/sf/auto-prompts.js
+++ b/src/resources/extensions/sf/auto-prompts.js
@ -1539,6 +1539,49 @@ async function buildProjectMemoriesSection(query, limit = 10) {
 		return "## Project Memories\n(unavailable)";
 	}
 }
+
+/**
+ * Build a richer query string for getRelevantMemoriesRanked at the
+ * execute-task site. Cosine ranking on two short title strings is
+ * effectively keyword matching; injecting slice.goal text + current
+ * touched-file list materially improves the candidates that
+ * surface, especially for slices whose memory-relevant context is
+ * named in the goal but not in the title.
+ *
+ * Bounded so the embedding token cost stays predictable:
+ *   - sliceGoal: ~600 chars (front of stored goal text)
+ *   - touched files: top 20 paths from git status + diff
+ *
+ * Fail-open: every individual data source can fail (DB closed, not
+ * a git repo, etc.) and we still return the original title-based
+ * query so the call site doesn't have to handle errors.
+ *
+ * Exported for the dedicated unit test.
+ */
+export async function buildMemoryRetrievalQuery(basePath, mid, sid, sliceTitle, taskTitle) {
+	const parts = [String(sliceTitle ?? "").trim(), String(taskTitle ?? "").trim()];
+	let sliceGoal = "";
+	try {
+		const { isDbAvailable, getSlice } = await import("./sf-db.js");
+		if (isDbAvailable()) {
+			sliceGoal = String(getSlice(mid, sid)?.goal ?? "").trim();
+		}
+	} catch {
+		sliceGoal = "";
+	}
+	if (sliceGoal) {
+		parts.push(sliceGoal.length > 600 ? sliceGoal.slice(0, 600) : sliceGoal);
+	}
+	try {
+		const { getChangedFilesWithContext } = await import("./diff-context.js");
+		const changed = await getChangedFilesWithContext(basePath);
+		const paths = changed.slice(0, 20).map((c) => c.path);
+		if (paths.length > 0) parts.push(paths.join(" "));
+	} catch {
+		// Not a git repo, or git unavailable — fine, just skip files.
+	}
+	return parts.filter((p) => p && p.length > 0).join("\n\n");
+}
 /**
 * Shared assembly for plan-slice and refine-slice prompts. Both builders need
 * the same inlined context (roadmap excerpt, slice context, research, decisions,
@ -1968,14 +2011,19 @@ export async function buildExecuteTaskPrompt(
 		getGatesForTurn("execute-task"),
 		{ pending: new Set(etPending.map((g) => g.gate_id)), allowOmit: true },
 	);
-	// Query-aware memory ranking: build a short query from the active task
-	// context so embeddings can promote semantically-relevant memories above
-	// the cold static-rank top. Falls back to pure static ranking when no
-	// gateway is configured or no embeddings exist yet — see
+	// Query-aware memory ranking: build a query from the active task
+	// context so embeddings can promote semantically-relevant memories
+	// above the cold static-rank top. Falls back to pure static ranking
+	// when no gateway is configured or no embeddings exist yet — see
 	// getRelevantMemoriesRanked for the fallback chain.
-	const memoriesSection = await buildProjectMemoriesSection(
-		`${sTitle} ${tTitle}`,
-	);
+	//
+	// The query is enriched beyond just titles: titles alone are too
+	// short for cosine ranking. We add the slice's stored goal text
+	// (the WHY) and the current touched-file list from git (the WHAT
+	// — what code is in play right now), bounded so we don't blow up
+	// the embedding tokens.
+	const memoryQuery = await buildMemoryRetrievalQuery(base, mid, sid, sTitle, tTitle);
+	const memoriesSection = await buildProjectMemoriesSection(memoryQuery);
 	// SF ADR-011 P2: when the feature is enabled, teach the executor that it can
 	// surface non-obvious choices via the `escalation` field on complete_task
 	// rather than silently picking. Autonomous mode auto-accepts the recommendation
--- a/src/resources/extensions/sf/tests/memory-retrieval-query.test.mjs
+++ b/src/resources/extensions/sf/tests/memory-retrieval-query.test.mjs
@ -0,0 +1,97 @@
+/**
+ * Test buildMemoryRetrievalQuery — the memory retrieval query
+ * expansion at the execute-task site.
+ *
+ * Previously: getRelevantMemoriesRanked received only
+ * `slice.title + task.title`. Two short strings are too sparse for
+ * cosine ranking to do meaningful re-ranking against the static pool.
+ *
+ * Now: titles + slice.goal (front 600 chars) + touched-file paths
+ * (top 20 from git diff/status). This test pins:
+ *   - titles are always present
+ *   - slice goal is appended when DB has one (we stub by passing
+ *     through to the live import; if DB is closed we fall back to
+ *     titles-only without throwing)
+ *   - non-git directories don't crash the path
+ *   - bounded output: ~600 char cap on goal, 20-file cap
+ *
+ * The unit tests run against a tmp dir so we don't need an
+ * actual .sf/sf.db.
+ */
+
+import assert from "node:assert/strict";
+import { mkdtempSync, rmSync, writeFileSync, mkdirSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { afterEach, test } from "vitest";
+import { buildMemoryRetrievalQuery } from "../auto-prompts.js";
+
+const tempDirs = [];
+
+afterEach(() => {
+	for (const d of tempDirs) rmSync(d, { recursive: true, force: true });
+	tempDirs.length = 0;
+});
+
+function mkdtemp() {
+	const d = mkdtempSync(join(tmpdir(), "sf-memquery-"));
+	tempDirs.push(d);
+	return d;
+}
+
+test("titles always appear in the query", async () => {
+	const dir = mkdtemp();
+	const q = await buildMemoryRetrievalQuery(dir, "M01", "S01", "slice title", "task title");
+	assert.match(q, /slice title/);
+	assert.match(q, /task title/);
+});
+
+test("non-git directory doesn't crash", async () => {
+	const dir = mkdtemp();
+	const q = await buildMemoryRetrievalQuery(dir, "M01", "S01", "A", "B");
+	// Just titles, no goal (no DB), no files (no git) — still safe.
+	assert.equal(typeof q, "string");
+	assert.match(q, /A/);
+	assert.match(q, /B/);
+});
+
+test("missing or empty inputs collapse cleanly", async () => {
+	const dir = mkdtemp();
+	const q = await buildMemoryRetrievalQuery(dir, "", "", "", "");
+	// All-empty inputs → empty-string output (the consumer's `if
+	// (!query.trim())` branch picks the static fallback).
+	assert.equal(q.trim(), "");
+});
+
+test("nullish title args don't poison the query", async () => {
+	const dir = mkdtemp();
+	const q = await buildMemoryRetrievalQuery(dir, null, undefined, undefined, "real task");
+	// Only the present non-empty string survives.
+	assert.match(q, /real task/);
+	assert.ok(!q.includes("undefined"));
+	assert.ok(!q.includes("null"));
+});
+
+test("git changed files added when repo is present", async () => {
+	const { execFileSync } = await import("node:child_process");
+	const dir = mkdtemp();
+	execFileSync("git", ["init", "--quiet"], { cwd: dir });
+	execFileSync("git", ["config", "user.email", "t@e.x"], { cwd: dir });
+	execFileSync("git", ["config", "user.name", "t"], { cwd: dir });
+	// Two committed files plus one untracked file in the working tree.
+	writeFileSync(join(dir, "kept.txt"), "kept content");
+	mkdirSync(join(dir, "sub"));
+	writeFileSync(join(dir, "sub", "deep.txt"), "deep content");
+	execFileSync("git", ["add", "."], { cwd: dir });
+	execFileSync(
+		"git",
+		["commit", "--quiet", "--allow-empty-message", "-m", ""],
+		{ cwd: dir },
+	);
+	writeFileSync(join(dir, "kept.txt"), "modified content");
+	writeFileSync(join(dir, "new.txt"), "untracked");
+	const q = await buildMemoryRetrievalQuery(dir, "M01", "S01", "title", "task");
+	// At least one of the changed files surfaces in the query.
+	const sawAny = ["kept.txt", "new.txt"].some((p) => q.includes(p));
+	assert.ok(sawAny, `expected changed files in query, got: ${q}`);
+});