feat(memory): enrich execute-task memory retrieval query

Previously buildProjectMemoriesSection(`${sTitle} ${tTitle}`) sent
two short strings to the cosine ranker — too sparse for re-ranking
to do meaningful work against the static pool.

buildMemoryRetrievalQuery() (new, exported for tests) enriches the
query with:

  - slice.title + task.title       (original signal)
  - slice.goal text, front 600 chars
                                   (the WHY of the slice — usually
                                   names the memory-relevant
                                   context the title can't fit)
  - top 20 changed files from
    git diff/status                (the WHAT — what code is in
                                   play right now; lets cosine
                                   ranking promote memories whose
                                   content references those paths)

Fail-open at each source: DB closed → no goal; not a git repo →
no files; nullish title args don't poison the string. The call
site never has to handle errors.

Bounded so embedding token cost stays predictable: 600-char goal
cap, 20-file cap. Empty inputs collapse to "" so the consumer's
`if (!query.trim())` branch still picks the static fallback.

Tests: 5 cases — titles always present, non-git directory safe,
empty-input collapse, nullish-arg defensiveness, real git repo
surfaces changed file paths.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Mikael Hugo 2026-05-15 17:36:15 +02:00
parent 362af3d6a4
commit 0c7aaafa00
2 changed files with 152 additions and 7 deletions

View file

@ -1539,6 +1539,49 @@ async function buildProjectMemoriesSection(query, limit = 10) {
return "## Project Memories\n(unavailable)";
}
}
/**
* Build a richer query string for getRelevantMemoriesRanked at the
* execute-task site. Cosine ranking on two short title strings is
* effectively keyword matching; injecting slice.goal text + current
* touched-file list materially improves the candidates that
* surface, especially for slices whose memory-relevant context is
* named in the goal but not in the title.
*
* Bounded so the embedding token cost stays predictable:
* - sliceGoal: ~600 chars (front of stored goal text)
* - touched files: top 20 paths from git status + diff
*
* Fail-open: every individual data source can fail (DB closed, not
* a git repo, etc.) and we still return the original title-based
* query so the call site doesn't have to handle errors.
*
* Exported for the dedicated unit test.
*/
export async function buildMemoryRetrievalQuery(basePath, mid, sid, sliceTitle, taskTitle) {
const parts = [String(sliceTitle ?? "").trim(), String(taskTitle ?? "").trim()];
let sliceGoal = "";
try {
const { isDbAvailable, getSlice } = await import("./sf-db.js");
if (isDbAvailable()) {
sliceGoal = String(getSlice(mid, sid)?.goal ?? "").trim();
}
} catch {
sliceGoal = "";
}
if (sliceGoal) {
parts.push(sliceGoal.length > 600 ? sliceGoal.slice(0, 600) : sliceGoal);
}
try {
const { getChangedFilesWithContext } = await import("./diff-context.js");
const changed = await getChangedFilesWithContext(basePath);
const paths = changed.slice(0, 20).map((c) => c.path);
if (paths.length > 0) parts.push(paths.join(" "));
} catch {
// Not a git repo, or git unavailable — fine, just skip files.
}
return parts.filter((p) => p && p.length > 0).join("\n\n");
}
/**
* Shared assembly for plan-slice and refine-slice prompts. Both builders need
* the same inlined context (roadmap excerpt, slice context, research, decisions,
@ -1968,14 +2011,19 @@ export async function buildExecuteTaskPrompt(
getGatesForTurn("execute-task"),
{ pending: new Set(etPending.map((g) => g.gate_id)), allowOmit: true },
);
// Query-aware memory ranking: build a short query from the active task
// context so embeddings can promote semantically-relevant memories above
// the cold static-rank top. Falls back to pure static ranking when no
// gateway is configured or no embeddings exist yet — see
// Query-aware memory ranking: build a query from the active task
// context so embeddings can promote semantically-relevant memories
// above the cold static-rank top. Falls back to pure static ranking
// when no gateway is configured or no embeddings exist yet — see
// getRelevantMemoriesRanked for the fallback chain.
const memoriesSection = await buildProjectMemoriesSection(
`${sTitle} ${tTitle}`,
);
//
// The query is enriched beyond just titles: titles alone are too
// short for cosine ranking. We add the slice's stored goal text
// (the WHY) and the current touched-file list from git (the WHAT
// — what code is in play right now), bounded so we don't blow up
// the embedding tokens.
const memoryQuery = await buildMemoryRetrievalQuery(base, mid, sid, sTitle, tTitle);
const memoriesSection = await buildProjectMemoriesSection(memoryQuery);
// SF ADR-011 P2: when the feature is enabled, teach the executor that it can
// surface non-obvious choices via the `escalation` field on complete_task
// rather than silently picking. Autonomous mode auto-accepts the recommendation

View file

@ -0,0 +1,97 @@
/**
* Test buildMemoryRetrievalQuery the memory retrieval query
* expansion at the execute-task site.
*
* Previously: getRelevantMemoriesRanked received only
* `slice.title + task.title`. Two short strings are too sparse for
* cosine ranking to do meaningful re-ranking against the static pool.
*
* Now: titles + slice.goal (front 600 chars) + touched-file paths
* (top 20 from git diff/status). This test pins:
* - titles are always present
* - slice goal is appended when DB has one (we stub by passing
* through to the live import; if DB is closed we fall back to
* titles-only without throwing)
* - non-git directories don't crash the path
* - bounded output: ~600 char cap on goal, 20-file cap
*
* The unit tests run against a tmp dir so we don't need an
* actual .sf/sf.db.
*/
import assert from "node:assert/strict";
import { mkdtempSync, rmSync, writeFileSync, mkdirSync } from "node:fs";
import { tmpdir } from "node:os";
import { join } from "node:path";
import { afterEach, test } from "vitest";
import { buildMemoryRetrievalQuery } from "../auto-prompts.js";
const tempDirs = [];
afterEach(() => {
for (const d of tempDirs) rmSync(d, { recursive: true, force: true });
tempDirs.length = 0;
});
function mkdtemp() {
const d = mkdtempSync(join(tmpdir(), "sf-memquery-"));
tempDirs.push(d);
return d;
}
test("titles always appear in the query", async () => {
const dir = mkdtemp();
const q = await buildMemoryRetrievalQuery(dir, "M01", "S01", "slice title", "task title");
assert.match(q, /slice title/);
assert.match(q, /task title/);
});
test("non-git directory doesn't crash", async () => {
const dir = mkdtemp();
const q = await buildMemoryRetrievalQuery(dir, "M01", "S01", "A", "B");
// Just titles, no goal (no DB), no files (no git) — still safe.
assert.equal(typeof q, "string");
assert.match(q, /A/);
assert.match(q, /B/);
});
test("missing or empty inputs collapse cleanly", async () => {
const dir = mkdtemp();
const q = await buildMemoryRetrievalQuery(dir, "", "", "", "");
// All-empty inputs → empty-string output (the consumer's `if
// (!query.trim())` branch picks the static fallback).
assert.equal(q.trim(), "");
});
test("nullish title args don't poison the query", async () => {
const dir = mkdtemp();
const q = await buildMemoryRetrievalQuery(dir, null, undefined, undefined, "real task");
// Only the present non-empty string survives.
assert.match(q, /real task/);
assert.ok(!q.includes("undefined"));
assert.ok(!q.includes("null"));
});
test("git changed files added when repo is present", async () => {
const { execFileSync } = await import("node:child_process");
const dir = mkdtemp();
execFileSync("git", ["init", "--quiet"], { cwd: dir });
execFileSync("git", ["config", "user.email", "t@e.x"], { cwd: dir });
execFileSync("git", ["config", "user.name", "t"], { cwd: dir });
// Two committed files plus one untracked file in the working tree.
writeFileSync(join(dir, "kept.txt"), "kept content");
mkdirSync(join(dir, "sub"));
writeFileSync(join(dir, "sub", "deep.txt"), "deep content");
execFileSync("git", ["add", "."], { cwd: dir });
execFileSync(
"git",
["commit", "--quiet", "--allow-empty-message", "-m", ""],
{ cwd: dir },
);
writeFileSync(join(dir, "kept.txt"), "modified content");
writeFileSync(join(dir, "new.txt"), "untracked");
const q = await buildMemoryRetrievalQuery(dir, "M01", "S01", "title", "task");
// At least one of the changed files surfaces in the query.
const sawAny = ["kept.txt", "new.txt"].some((p) => q.includes(p));
assert.ok(sawAny, `expected changed files in query, got: ${q}`);
});