singularity-forge/src/resources/extensions/sf/tests/memory-extractor.test.ts

401 lines
13 KiB
TypeScript

import assert from "node:assert/strict";
import { test, vi } from 'vitest';
import {
_resetExtractionState,
buildMemoryLLMCall,
parseMemoryResponse,
} from "../memory-extractor.ts";
import {
applyMemoryActions,
getActiveMemoriesRanked,
} from "../memory-store.ts";
import { closeDatabase, openDatabase } from "../sf-db.ts";
// ═══════════════════════════════════════════════════════════════════════════
// memory-extractor: parse valid JSON response
// ═══════════════════════════════════════════════════════════════════════════
test("memory-extractor: parse valid JSON", () => {
const response = JSON.stringify([
{
action: "CREATE",
category: "gotcha",
content: "esbuild drops binaries",
confidence: 0.85,
},
{ action: "REINFORCE", id: "MEM001" },
{ action: "UPDATE", id: "MEM002", content: "revised content" },
{ action: "SUPERSEDE", id: "MEM003", superseded_by: "MEM004" },
]);
const actions = parseMemoryResponse(response);
assert.deepStrictEqual(actions.length, 4, "should parse 4 actions");
assert.deepStrictEqual(
actions[0].action,
"CREATE",
"first action should be CREATE",
);
assert.deepStrictEqual(
(actions[0] as any).category,
"gotcha",
"CREATE category",
);
assert.deepStrictEqual(
(actions[0] as any).confidence,
0.85,
"CREATE confidence",
);
assert.deepStrictEqual(
actions[1].action,
"REINFORCE",
"second action should be REINFORCE",
);
assert.deepStrictEqual(
actions[2].action,
"UPDATE",
"third action should be UPDATE",
);
assert.deepStrictEqual(
actions[3].action,
"SUPERSEDE",
"fourth action should be SUPERSEDE",
);
});
// ═══════════════════════════════════════════════════════════════════════════
// memory-extractor: parse fenced JSON response
// ═══════════════════════════════════════════════════════════════════════════
test("memory-extractor: parse fenced JSON", () => {
const response =
'```json\n[\n {"action": "CREATE", "category": "convention", "content": "test memory"}\n]\n```';
const actions = parseMemoryResponse(response);
assert.deepStrictEqual(
actions.length,
1,
"should parse 1 action from fenced JSON",
);
assert.deepStrictEqual(
actions[0].action,
"CREATE",
"action should be CREATE",
);
});
// ═══════════════════════════════════════════════════════════════════════════
// memory-extractor: parse empty array response
// ═══════════════════════════════════════════════════════════════════════════
test("memory-extractor: parse empty array", () => {
const actions = parseMemoryResponse("[]");
assert.deepStrictEqual(
actions.length,
0,
"empty array should parse to empty actions",
);
});
// ═══════════════════════════════════════════════════════════════════════════
// memory-extractor: parse malformed response
// ═══════════════════════════════════════════════════════════════════════════
test("memory-extractor: malformed responses", () => {
assert.deepStrictEqual(
parseMemoryResponse("not json at all"),
[],
"garbage text should return []",
);
assert.deepStrictEqual(
parseMemoryResponse('{"action": "CREATE"}'),
[],
"non-array should return []",
);
assert.deepStrictEqual(
parseMemoryResponse(""),
[],
"empty string should return []",
);
assert.deepStrictEqual(
parseMemoryResponse("```\nbroken\n```"),
[],
"fenced non-JSON should return []",
);
});
// ═══════════════════════════════════════════════════════════════════════════
// memory-extractor: validation of required fields
// ═══════════════════════════════════════════════════════════════════════════
test("memory-extractor: field validation", () => {
const response = JSON.stringify([
// Valid CREATE
{ action: "CREATE", category: "gotcha", content: "valid" },
// Invalid CREATE — missing content
{ action: "CREATE", category: "gotcha" },
// Invalid CREATE — missing category
{ action: "CREATE", content: "no category" },
// Valid REINFORCE
{ action: "REINFORCE", id: "MEM001" },
// Invalid REINFORCE — missing id
{ action: "REINFORCE" },
// Valid UPDATE
{ action: "UPDATE", id: "MEM002", content: "new content" },
// Invalid UPDATE — missing content
{ action: "UPDATE", id: "MEM002" },
// Valid SUPERSEDE
{ action: "SUPERSEDE", id: "MEM001", superseded_by: "MEM002" },
// Invalid SUPERSEDE — missing superseded_by
{ action: "SUPERSEDE", id: "MEM001" },
// Unknown action
{ action: "DELETE", id: "MEM001" },
// Null entry
null,
]);
const actions = parseMemoryResponse(response);
assert.deepStrictEqual(
actions.length,
4,
"should only accept 4 valid actions",
);
assert.deepStrictEqual(actions[0].action, "CREATE", "first valid is CREATE");
assert.deepStrictEqual(
actions[1].action,
"REINFORCE",
"second valid is REINFORCE",
);
assert.deepStrictEqual(actions[2].action, "UPDATE", "third valid is UPDATE");
assert.deepStrictEqual(
actions[3].action,
"SUPERSEDE",
"fourth valid is SUPERSEDE",
);
});
// ═══════════════════════════════════════════════════════════════════════════
// Integration: applyMemoryActions with mixed actions
// ═══════════════════════════════════════════════════════════════════════════
test("integration: mixed action lifecycle", () => {
openDatabase(":memory:");
// Phase 1: Create initial memories
applyMemoryActions(
[
{
action: "CREATE",
category: "gotcha",
content: "npm run build needs tsc first",
confidence: 0.7,
},
{
action: "CREATE",
category: "convention",
content: "all DB queries use named params",
confidence: 0.8,
},
{
action: "CREATE",
category: "architecture",
content: "extensions loaded from two paths",
confidence: 0.85,
},
],
"plan-slice",
"M001/S01",
);
let active = getActiveMemoriesRanked(30);
assert.deepStrictEqual(active.length, 3, "phase 1: 3 active memories");
// Phase 2: Reinforce one, update another, create new
applyMemoryActions(
[
{ action: "REINFORCE", id: "MEM002" },
{
action: "UPDATE",
id: "MEM001",
content: "npm run build requires tsc --noEmit first",
},
{
action: "CREATE",
category: "pattern",
content: "use INSERT OR IGNORE for idempotency",
confidence: 0.75,
},
],
"execute-task",
"M001/S01/T01",
);
active = getActiveMemoriesRanked(30);
assert.deepStrictEqual(active.length, 4, "phase 2: 4 active memories");
assert.deepStrictEqual(
active.find((m) => m.id === "MEM001")?.content,
"npm run build requires tsc --noEmit first",
"MEM001 content should be updated",
);
assert.deepStrictEqual(
active.find((m) => m.id === "MEM002")?.hit_count,
1,
"MEM002 should be reinforced",
);
// Phase 3: Supersede MEM001 with MEM005
applyMemoryActions(
[
{
action: "CREATE",
category: "gotcha",
content: "build script handles tsc automatically now",
confidence: 0.9,
},
{ action: "SUPERSEDE", id: "MEM001", superseded_by: "MEM005" },
],
"execute-task",
"M001/S01/T02",
);
active = getActiveMemoriesRanked(30);
assert.deepStrictEqual(
active.length,
4,
"phase 3: 4 active (1 superseded, 1 created)",
);
assert.ok(
!active.find((m) => m.id === "MEM001"),
"MEM001 should be superseded",
);
assert.ok(!!active.find((m) => m.id === "MEM005"), "MEM005 should be active");
// Verify ranking: MEM003 (0.85) > MEM005 (0.9) but MEM002 has 1 hit
// MEM002: 0.8 * (1 + 1*0.1) = 0.88
// MEM003: 0.85 * 1.0 = 0.85
// MEM005: 0.9 * 1.0 = 0.9
// MEM004: 0.75 * 1.0 = 0.75
assert.deepStrictEqual(
active[0].id,
"MEM005",
"MEM005 should rank first (0.9)",
);
assert.deepStrictEqual(
active[1].id,
"MEM002",
"MEM002 should rank second (0.88)",
);
closeDatabase();
});
// ═══════════════════════════════════════════════════════════════════════════
// memory-extractor: _resetExtractionState
// ═══════════════════════════════════════════════════════════════════════════
test("memory-extractor: reset extraction state", () => {
// Just verify it doesn't throw
_resetExtractionState();
assert.ok(true, "_resetExtractionState should not throw");
});
// ═══════════════════════════════════════════════════════════════════════════
// memory-extractor: buildMemoryLLMCall resolves OAuth API key via modelRegistry
// Regression test for #2959 — OAuth users had broken memory extraction
// because streamSimpleAnthropic only checked env vars, not auth.json.
// ═══════════════════════════════════════════════════════════════════════════
vi.mock("@singularity-forge/pi-ai", () => ({
completeSimple: vi.fn(async () => ({
content: [{ type: "text", text: "mocked memory extraction" }],
})),
}));
test("memory-extractor: buildMemoryLLMCall resolves API key from modelRegistry for OAuth users", async () => {
const OAUTH_TOKEN = "sk-ant-oat-test-oauth-token-12345";
let getApiKeyCalled = false;
const fakeModel = {
id: "claude-haiku-test",
provider: "anthropic",
api: "anthropic-messages",
cost: { input: 0.25, output: 1.25 },
};
const ctx = {
modelRegistry: {
getAvailable: () => [fakeModel],
getApiKey: async (_model: any) => {
getApiKeyCalled = true;
return OAUTH_TOKEN;
},
},
} as any;
const llmCallFn = buildMemoryLLMCall(ctx);
assert.ok(
llmCallFn !== null,
"buildMemoryLLMCall should return a function when models are available",
);
// API key resolution is lazy (inside the returned function), so we must
// actually invoke the LLM call to trigger getApiKey.
await llmCallFn!("system prompt", "user prompt");
assert.ok(
getApiKeyCalled,
"buildMemoryLLMCall must call modelRegistry.getApiKey() to resolve OAuth tokens",
);
});
test("memory-extractor: buildMemoryLLMCall returns null when no models available", () => {
const ctx = {
modelRegistry: {
getAvailable: () => [],
getApiKey: async () => undefined,
},
} as any;
const llmCallFn = buildMemoryLLMCall(ctx);
assert.strictEqual(
llmCallFn,
null,
"should return null when no models available",
);
});
test("memory-extractor: buildMemoryLLMCall prefers haiku model", async () => {
let resolvedModelId: string | undefined;
const haikuModel = {
id: "claude-3-5-haiku-20241022",
provider: "anthropic",
api: "anthropic-messages",
cost: { input: 0.25, output: 1.25 },
};
const sonnetModel = {
id: "claude-sonnet-4-20250514",
provider: "anthropic",
api: "anthropic-messages",
cost: { input: 3, output: 15 },
};
const ctx = {
modelRegistry: {
getAvailable: () => [sonnetModel, haikuModel],
getApiKey: async (model: any) => {
resolvedModelId = model.id;
return "sk-ant-oat-test-token";
},
},
} as any;
const llmCallFn = buildMemoryLLMCall(ctx);
assert.ok(llmCallFn !== null, "should return a function");
// API key resolution is lazy — invoke the function to trigger getApiKey
await llmCallFn!("system prompt", "user prompt");
assert.strictEqual(
resolvedModelId,
"claude-3-5-haiku-20241022",
"should resolve API key for haiku model, not sonnet",
);
});