fix(sf): formatMemoriesForPrompt rank-preserving mode + use it in execute-task

Real semantic bug: getRelevantMemoriesRanked returns memories in score-descending order (cosine + optional rerank), but formatMemoriesForPrompt then re-grouped them by CATEGORY_PRIORITY (gotcha=0 first, convention=1, ...). A high-relevance "convention" memory got buried under low-relevance "gotcha" entries purely because gotcha has higher category priority. The agent never saw the most relevant items at the top. formatMemoriesForPrompt gains a `preserveRankOrder` parameter (default false for backward compat). When true: - Renders bullets in input order - Tags each line with [category] so the agent can still tell gotchas from conventions Wired auto-prompts.ts execute-task injection: when memoryQuery is non-empty (i.e. query-aware ranker was used), pass true. Static-ranked input keeps the historical category-grouped layout. Tests verify both modes side-by-side using identical input — the ordering flip is the load-bearing assertion. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-03 00:02:59 +02:00 · 2026-05-03 00:02:59 +02:00 · 72104aed1d
commit 72104aed1d
parent a3698b4e6c
3 changed files with 102 additions and 2 deletions
--- a/src/resources/extensions/sf/auto-prompts.ts
+++ b/src/resources/extensions/sf/auto-prompts.ts
@ -2488,11 +2488,16 @@ export async function buildExecuteTaskPrompt(
 	const memoryQuery = `${sTitle} ${tTitle}`.trim();
 	const memoriesSection = await (async () => {
 		try {
-			const memories = memoryQuery
+			const usingRanker = !!memoryQuery;
+			const memories = usingRanker
 				? await getRelevantMemoriesRanked(memoryQuery, 10)
 				: getActiveMemoriesRanked(10);
 			if (memories.length === 0) return "## Project Memories\n(none yet)";
-			return `## Project Memories\n${formatMemoriesForPrompt(memories)}`;
+			// preserveRankOrder=true when the input came from the query-aware
+			// ranker so semantic relevance dominates over CATEGORY_PRIORITY in
+			// the rendered list. Static-ranked input keeps the historical
+			// category-grouped layout.
+			return `## Project Memories\n${formatMemoriesForPrompt(memories, 2000, usingRanker)}`;
 		} catch {
 			return "## Project Memories\n(unavailable)";
 		}
--- a/src/resources/extensions/sf/memory-store.ts
+++ b/src/resources/extensions/sf/memory-store.ts
@ -531,10 +531,22 @@ export function applyMemoryActions(
 /**
 * Format memories as categorized markdown for system prompt injection.
 * Truncates to token budget (~4 chars per token).
+ *
+ * `preserveRankOrder` — when true, render in input order (no category
+ * grouping). Pass true when the input came from a query-aware ranker
+ * (`getRelevantMemoriesRanked`) so semantic relevance dominates over
+ * `CATEGORY_PRIORITY`. Default false keeps the historical category-
+ * grouped layout for callers that pass static-ranked input.
+ *
+ * Why the toggle exists: category grouping puts a high-relevance
+ * "convention" memory after a low-relevance "gotcha" entry purely
+ * because gotcha has higher CATEGORY_PRIORITY. With embedding-aware
+ * ranking, that drops the most relevant items below dim ones.
 */
 export function formatMemoriesForPrompt(
 	memories: Memory[],
 	tokenBudget = 2000,
+	preserveRankOrder = false,
 ): string {
 	if (memories.length === 0) return "";

@ -543,6 +555,19 @@ export function formatMemoriesForPrompt(
 	let output = header;
 	let remaining = charBudget - header.length;

+	if (preserveRankOrder) {
+		// Render in input order — caller already ranked semantically. Each
+		// bullet shows the category inline so the agent can still tell
+		// gotchas from conventions without losing rank order.
+		for (const item of memories) {
+			const bullet = `- [${item.category}] ${item.content}\n`;
+			if (remaining < bullet.length) break;
+			output += bullet;
+			remaining -= bullet.length;
+		}
+		return output.trimEnd();
+	}
+
 	// Group by category
 	const grouped = new Map<string, Memory[]>();
 	for (const m of memories) {
--- a/src/resources/extensions/sf/tests/memory-store.test.ts
+++ b/src/resources/extensions/sf/tests/memory-store.test.ts
@ -425,6 +425,76 @@ test("memory-store: formatMemoriesForPrompt", () => {
 		"empty memories should return empty string",
 	);

+	// preserveRankOrder=true: render in input order with inline category tag,
+	// no per-category headers. Verifies that high-CATEGORY_PRIORITY items
+	// (gotcha) don't get reordered above low-priority ones (architecture)
+	// when the caller has already ranked semantically.
+	closeDatabase();
+	openDatabase(":memory:");
+	const a = createMemory({
+		category: "architecture",
+		content: "high-relevance architecture",
+	});
+	const b = createMemory({
+		category: "gotcha",
+		content: "low-relevance gotcha",
+	});
+	assert.ok(a && b);
+	const ranked = [
+		{
+			seq: 1,
+			id: a!,
+			category: "architecture",
+			content: "high-relevance architecture",
+			confidence: 0.9,
+			source_unit_type: null,
+			source_unit_id: null,
+			created_at: "",
+			updated_at: "",
+			superseded_by: null,
+			hit_count: 0,
+		},
+		{
+			seq: 2,
+			id: b!,
+			category: "gotcha",
+			content: "low-relevance gotcha",
+			confidence: 0.5,
+			source_unit_type: null,
+			source_unit_id: null,
+			created_at: "",
+			updated_at: "",
+			superseded_by: null,
+			hit_count: 0,
+		},
+	];
+	const rankPreserved = formatMemoriesForPrompt(ranked, 2000, true);
+	const archIdx = rankPreserved.indexOf("high-relevance architecture");
+	const gotIdx = rankPreserved.indexOf("low-relevance gotcha");
+	assert.ok(archIdx !== -1 && gotIdx !== -1);
+	assert.ok(
+		archIdx < gotIdx,
+		"preserveRankOrder=true should keep input order even when gotcha has higher CATEGORY_PRIORITY",
+	);
+	assert.ok(
+		rankPreserved.includes("[architecture]"),
+		"preserveRankOrder mode should tag each line with [category]",
+	);
+	assert.ok(
+		!rankPreserved.includes("### Gotcha"),
+		"preserveRankOrder mode should not emit per-category headers",
+	);
+
+	// Default (preserveRankOrder=false) still groups by category — high-priority
+	// gotcha comes before architecture in the same input.
+	const grouped = formatMemoriesForPrompt(ranked);
+	const archIdxGrouped = grouped.indexOf("high-relevance architecture");
+	const gotIdxGrouped = grouped.indexOf("low-relevance gotcha");
+	assert.ok(
+		gotIdxGrouped < archIdxGrouped,
+		"default mode keeps category-priority grouping (gotcha first)",
+	);
+
 	// Test token budget truncation
 	closeDatabase();
 	openDatabase(":memory:");