From 820f9aaf8efc52c62ebfd42370d706e5b9812aae Mon Sep 17 00:00:00 2001 From: Mikael Hugo Date: Fri, 15 May 2026 18:06:33 +0200 Subject: [PATCH] fix(memory): classify extraction failures --- .../extensions/sf/memory-extractor.js | 184 ++++++++++++++++-- src/resources/extensions/sf/memory-store.js | 2 + .../extensions/sf/sf-db/sf-db-memory.js | 5 +- .../extensions/sf/sf-db/sf-db-schema.js | 28 ++- .../memory-extraction-lifecycle.test.mjs | 59 +++++- .../sf/tests/sf-db-migration.test.mjs | 7 +- 6 files changed, 265 insertions(+), 20 deletions(-) diff --git a/src/resources/extensions/sf/memory-extractor.js b/src/resources/extensions/sf/memory-extractor.js index 0522008d9..92c1d1652 100644 --- a/src/resources/extensions/sf/memory-extractor.js +++ b/src/resources/extensions/sf/memory-extractor.js @@ -224,14 +224,110 @@ export function truncateMiddle(text, maxChars, frontShare = 0.25) { return text.slice(0, frontLen) + marker + text.slice(text.length - backLen); } // ─── Response Parsing ─────────────────────────────────────────────────────── +class MemoryExtractionFailure extends Error { + constructor(message, failureClass, reason, cause) { + super(message); + this.name = "MemoryExtractionFailure"; + this.failureClass = failureClass; + this.reason = reason; + this.cause = cause; + } +} + +/** + * Classify memory extraction failures for retry and operator diagnostics. + * + * Purpose: keep transient provider failures, deterministic parse failures, + * filesystem faults, and apply/storage failures distinct in DB-backed memory + * closeout attempts. + * + * Consumer: extractMemoriesFromUnit when deciding whether a failed extraction + * should retry immediately or surface as an actionable diagnostic. + */ +export function classifyMemoryExtractionFailure(error) { + const inherited = error?.failureClass; + if (typeof inherited === "string" && inherited) return inherited; + const message = String(error?.message ?? error ?? "").toLowerCase(); + const code = String(error?.code ?? "").toLowerCase(); + if ( + code === "enoent" || + code === "eacces" || + code === "eperm" || + message.includes("no such file") || + message.includes("permission denied") + ) { + return "filesystem"; + } + if ( + message.includes("json") || + message.includes("parse") || + message.includes("invalid response") + ) { + return "parse"; + } + if ( + code === "etimedout" || + code === "econnreset" || + code === "econnrefused" || + message.includes("timeout") || + message.includes("timed out") || + message.includes("rate limit") || + message.includes("429") || + /\b5\d\d\b/.test(message) || + message.includes("overloaded") || + message.includes("service unavailable") || + message.includes("network") + ) { + return "transient"; + } + if ( + message.includes("unauthorized") || + message.includes("forbidden") || + message.includes("api key") || + message.includes("401") || + message.includes("403") + ) { + return "auth"; + } + if ( + message.includes("sqlite") || + message.includes("constraint") || + message.includes("database") + ) { + return "apply"; + } + return "unknown"; +} + /** * Parse the LLM response into memory actions. * Strips markdown fences, validates required fields. * Returns [] on any parse failure. */ export function parseMemoryResponse(raw) { + return parseMemoryResponseDetailed(raw).actions; +} + +/** + * Parse the LLM response into actions plus an explicit failure diagnostic. + * + * Purpose: distinguish a legitimate empty memory response from malformed JSON + * or unsupported action shapes so closeout diagnostics do not report parser + * failures as successful no-op extractions. + * + * Consumer: extractMemoriesFromUnit failure classification. + */ +export function parseMemoryResponseDetailed(raw) { try { // Strip markdown code fences + if (typeof raw !== "string" || raw.trim() === "") { + return { + actions: [], + failureClass: "parse", + reason: "empty-response", + error: "LLM returned an empty memory extraction response", + }; + } let cleaned = raw.trim(); if (cleaned.startsWith("```")) { cleaned = cleaned @@ -239,10 +335,21 @@ export function parseMemoryResponse(raw) { .replace(/\n?```\s*$/, ""); } const parsed = JSON.parse(cleaned); - if (!Array.isArray(parsed)) return []; + if (!Array.isArray(parsed)) { + return { + actions: [], + failureClass: "parse", + reason: "non-array-response", + error: "Memory extraction response must be a JSON array", + }; + } const actions = []; + let invalidItems = 0; for (const item of parsed) { - if (!item || typeof item !== "object" || !item.action) continue; + if (!item || typeof item !== "object" || !item.action) { + invalidItems++; + continue; + } switch (item.action) { case "add": if ( @@ -258,11 +365,15 @@ export function parseMemoryResponse(raw) { ? item.confidence : undefined, }); + } else { + invalidItems++; } break; case "prune": if (typeof item.id === "string") { actions.push({ action: "prune", id: item.id }); + } else { + invalidItems++; } break; // Silently drop any legacy uppercase action variants the model may emit @@ -270,12 +381,26 @@ export function parseMemoryResponse(raw) { // applyConsolidationActions anyway, but filtering here keeps the error // surface clean and avoids a throw on every legacy response. default: + invalidItems++; break; } } - return actions; - } catch { - return []; + if (parsed.length > 0 && actions.length === 0 && invalidItems > 0) { + return { + actions: [], + failureClass: "parse", + reason: "invalid-actions", + error: "Memory extraction response contained no valid actions", + }; + } + return { actions }; + } catch (error) { + return { + actions: [], + failureClass: "parse", + reason: "invalid-json", + error: error?.message ?? String(error), + }; } } // ─── Main Extraction Function ─────────────────────────────────────────────── @@ -290,7 +415,7 @@ export async function extractMemoriesFromUnit( llmCallFn, ) { const unitKey = `${unitType}/${unitId}`; - const recordAttempt = (status, reason, error) => + const recordAttempt = (status, reason, error, failureClass) => recordMemoryExtractionAttempt({ unitKey, unitType, @@ -298,6 +423,11 @@ export async function extractMemoriesFromUnit( activityFile, status, reason, + failureClass: + failureClass ?? + (status === "error" || error + ? classifyMemoryExtractionFailure(error) + : null), error: error?.message ?? error, }); // Mutex guard @@ -357,7 +487,7 @@ export async function extractMemoriesFromUnit( return; } } catch (error) { - recordAttempt("skipped", "stat-failed", error); + recordAttempt("skipped", "stat-failed", error, "filesystem"); debugLog("memory-extract", { phase: "skip", reason: "stat-failed", @@ -407,7 +537,15 @@ export async function extractMemoriesFromUnit( }); const response = await llmCallFn(EXTRACTION_SYSTEM, userPrompt); // Parse response - const actions = parseMemoryResponse(response); + const parsed = parseMemoryResponseDetailed(response); + if (parsed.failureClass) { + throw new MemoryExtractionFailure( + parsed.error ?? parsed.reason, + parsed.failureClass, + parsed.reason, + ); + } + const actions = parsed.actions; debugLog("memory-extract", { phase: "parsed", unitKey, @@ -432,28 +570,48 @@ export async function extractMemoriesFromUnit( ); debugLog("memory-extract", { phase: "done", unitKey }); } catch (err) { - recordAttempt("error", "extract-failed", err); + const failureClass = classifyMemoryExtractionFailure(err); + const reason = err?.reason ?? `${failureClass}-failure`; + recordAttempt("error", reason, err, failureClass); debugLog("memory-extract", { phase: "error", unitKey, + failureClass, error: err?.message || String(err), }); - // Retry once after a brief delay - if (userPrompt) { + // Retry once only for transient provider or network failures. Parser and + // apply/storage failures need a different input or code fix, so immediate + // retry just burns tokens while hiding the real class from diagnostics. + if (userPrompt && failureClass === "transient") { try { await delay(2000); const response2 = await llmCallFn(EXTRACTION_SYSTEM, userPrompt); - const actions2 = parseMemoryResponse(response2); + const parsed2 = parseMemoryResponseDetailed(response2); + if (parsed2.failureClass) { + throw new MemoryExtractionFailure( + parsed2.error ?? parsed2.reason, + parsed2.failureClass, + parsed2.reason, + ); + } + const actions2 = parsed2.actions; if (actions2.length > 0) applyConsolidationActions(actions2, unitType, unitId); markUnitProcessed(unitKey, activityFile); recordAttempt("processed", "retry-success"); debugLog("memory-extract", { phase: "retry-success", unitKey }); } catch (err2) { - recordAttempt("error", "retry-failed", err2); + const retryFailureClass = classifyMemoryExtractionFailure(err2); + recordAttempt( + "error", + err2?.reason ?? "retry-failed", + err2, + retryFailureClass, + ); debugLog("memory-extract", { phase: "retry-failed", unitKey, + failureClass: retryFailureClass, error: err2?.message || String(err2), }); // Non-fatal — memory extraction failure should never affect autonomous mode diff --git a/src/resources/extensions/sf/memory-store.js b/src/resources/extensions/sf/memory-store.js index 8129288cf..d0772154d 100644 --- a/src/resources/extensions/sf/memory-store.js +++ b/src/resources/extensions/sf/memory-store.js @@ -541,6 +541,7 @@ export function recordMemoryExtractionAttempt({ unitId, activityFile, status, + failureClass, reason, error, }) { @@ -552,6 +553,7 @@ export function recordMemoryExtractionAttempt({ unitId, activityFile, status, + failureClass, reason, error, createdAt: new Date().toISOString(), diff --git a/src/resources/extensions/sf/sf-db/sf-db-memory.js b/src/resources/extensions/sf/sf-db/sf-db-memory.js index 1a3ee2179..009127e7d 100644 --- a/src/resources/extensions/sf/sf-db/sf-db-memory.js +++ b/src/resources/extensions/sf/sf-db/sf-db-memory.js @@ -182,14 +182,15 @@ export function insertMemoryExtractionAttempt(args) { if (!currentDb) throw new SFError(SF_STALE_STATE, "sf-db: No database open"); currentDb .prepare(`INSERT INTO memory_extraction_attempts - (unit_key, unit_type, unit_id, activity_file, status, reason, error, created_at) - VALUES (:unit_key, :unit_type, :unit_id, :activity_file, :status, :reason, :error, :created_at)`) + (unit_key, unit_type, unit_id, activity_file, status, failure_class, reason, error, created_at) + VALUES (:unit_key, :unit_type, :unit_id, :activity_file, :status, :failure_class, :reason, :error, :created_at)`) .run({ ":unit_key": args.unitKey, ":unit_type": args.unitType ?? null, ":unit_id": args.unitId ?? null, ":activity_file": args.activityFile ?? null, ":status": args.status, + ":failure_class": args.failureClass ?? null, ":reason": args.reason ?? null, ":error": args.error ?? null, ":created_at": args.createdAt, diff --git a/src/resources/extensions/sf/sf-db/sf-db-schema.js b/src/resources/extensions/sf/sf-db/sf-db-schema.js index 873f14554..b569f5ee5 100644 --- a/src/resources/extensions/sf/sf-db/sf-db-schema.js +++ b/src/resources/extensions/sf/sf-db/sf-db-schema.js @@ -15,7 +15,7 @@ function defaultQueryTimeout(operation, fallbackValue) { } } -const SCHEMA_VERSION = 66; +const SCHEMA_VERSION = 69; function indexExists(db, name) { return !!db .prepare( @@ -850,6 +850,7 @@ export function initSchema(db, fileBacked, options = {}) { unit_id TEXT, activity_file TEXT, status TEXT NOT NULL, + failure_class TEXT, reason TEXT, error TEXT, created_at TEXT NOT NULL @@ -1649,6 +1650,7 @@ function migrateSchema(db, { currentPath, withQueryTimeout }) { unit_id TEXT, activity_file TEXT, status TEXT NOT NULL, + failure_class TEXT, reason TEXT, error TEXT, created_at TEXT NOT NULL @@ -3505,6 +3507,7 @@ function migrateSchema(db, { currentPath, withQueryTimeout }) { unit_id TEXT, activity_file TEXT, status TEXT NOT NULL, + failure_class TEXT, reason TEXT, error TEXT, created_at TEXT NOT NULL @@ -3549,6 +3552,29 @@ function migrateSchema(db, { currentPath, withQueryTimeout }) { }); if (ok) appliedVersion = 68; } + if (appliedVersion < 69) { + const ok = runMigrationStep("v69", () => { + // Schema v69: classify memory extraction failures. + // + // The attempt ledger already showed that extraction failed, but parse, + // transient provider, filesystem, and apply errors were indistinguishable. + // Autonomous retry policy depends on that distinction. + try { + db.exec( + "ALTER TABLE memory_extraction_attempts ADD COLUMN failure_class TEXT", + ); + } catch { + // Column may already exist on fresh DBs - idempotent. + } + db.prepare( + "INSERT INTO schema_version (version, applied_at) VALUES (:version, :applied_at)", + ).run({ + ":version": 69, + ":applied_at": new Date().toISOString(), + }); + }); + if (ok) appliedVersion = 69; + } // Post-migration assertion: ensure critical tables created by historical // migrations are actually present. If a prior migration claimed success but diff --git a/src/resources/extensions/sf/tests/memory-extraction-lifecycle.test.mjs b/src/resources/extensions/sf/tests/memory-extraction-lifecycle.test.mjs index 09fc6b401..55537cc93 100644 --- a/src/resources/extensions/sf/tests/memory-extraction-lifecycle.test.mjs +++ b/src/resources/extensions/sf/tests/memory-extraction-lifecycle.test.mjs @@ -9,7 +9,7 @@ import assert from "node:assert/strict"; import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs"; import { tmpdir } from "node:os"; import { join } from "node:path"; -import { afterEach, test } from "vitest"; +import { afterEach, test, vi } from "vitest"; import { _resetExtractionState, extractMemoriesFromUnit, @@ -120,6 +120,63 @@ test("extractMemoriesFromUnit_when_successful_records_processed_unit_and_attempt assert.match(memories[0].content, /stored in SQLite/); }); +test("extractMemoriesFromUnit_when_response_invalid_json_records_parse_failure", async () => { + const project = makeProject(); + const activityFile = writeActivity( + project, + "invalid-json.jsonl", + "Memory extraction should classify malformed model output. ".repeat(40), + ); + const llmCall = vi.fn(async () => "{not valid json"); + + await extractMemoriesFromUnit( + activityFile, + "execute-task", + "M001/S01/T03", + llmCall, + ); + + assert.equal(llmCall.mock.calls.length, 1); + const latestAttempt = getRecentMemoryExtractionAttempts(1)[0]; + assert.equal(latestAttempt.status, "error"); + assert.equal(latestAttempt.failure_class, "parse"); + assert.equal(latestAttempt.reason, "invalid-json"); + const processed = getDatabase() + .prepare("SELECT count(*) AS cnt FROM memory_processed_units") + .get(); + assert.equal(processed.cnt, 0); +}); + +test("extractMemoriesFromUnit_when_transient_failure_retries_once", async () => { + const project = makeProject(); + const activityFile = writeActivity( + project, + "transient.jsonl", + "Memory extraction should retry only transient provider failures. ".repeat( + 40, + ), + ); + const llmCall = vi + .fn() + .mockRejectedValueOnce(new Error("503 service unavailable")) + .mockResolvedValueOnce("[]"); + + await extractMemoriesFromUnit( + activityFile, + "execute-task", + "M001/S01/T04", + llmCall, + ); + + assert.equal(llmCall.mock.calls.length, 2); + const attempts = getRecentMemoryExtractionAttempts(5); + assert.equal(attempts[0].status, "processed"); + assert.equal(attempts[0].reason, "retry-success"); + const transientAttempt = attempts.find((row) => row.status === "error"); + assert.equal(transientAttempt.failure_class, "transient"); + assert.equal(transientAttempt.reason, "transient-failure"); +}); + test("formatMemoriesForPrompt_when_injected_increments_hit_count", () => { makeProject(); const id = createMemory({ diff --git a/src/resources/extensions/sf/tests/sf-db-migration.test.mjs b/src/resources/extensions/sf/tests/sf-db-migration.test.mjs index 7ce08a548..48fb7b5f0 100644 --- a/src/resources/extensions/sf/tests/sf-db-migration.test.mjs +++ b/src/resources/extensions/sf/tests/sf-db-migration.test.mjs @@ -273,7 +273,7 @@ test("openDatabase_migrates_v27_tasks_without_created_at_through_spec_backfill", const version = db .prepare("SELECT MAX(version) AS version FROM schema_version") .get(); - assert.equal(version.version, 68); + assert.equal(version.version, 69); // v61: intent_chapters table exists const chaptersTable = db .prepare( @@ -383,11 +383,11 @@ test("openDatabase_v52_db_heals_routing_history_and_auto_start_path_works", () = initRoutingHistory(dbPath); }, "initRoutingHistory should not throw on a v52 DB"); - // Schema should have migrated to v66 (current head) + // Schema should have migrated to the current head. const version = db .prepare("SELECT MAX(version) AS version FROM schema_version") .get(); - assert.equal(version.version, 68); + assert.equal(version.version, 69); }); test("openDatabase_when_fresh_db_supports_schedule_entries", () => { @@ -556,6 +556,7 @@ test("openDatabase_memory_extraction_attempts_table_exists", () => { "unit_id", "activity_file", "status", + "failure_class", "reason", "error", "created_at",