fix(memory): classify extraction failures

This commit is contained in:
Mikael Hugo 2026-05-15 18:06:33 +02:00
parent 7dfb5099c9
commit 820f9aaf8e
6 changed files with 265 additions and 20 deletions

View file

@ -224,14 +224,110 @@ export function truncateMiddle(text, maxChars, frontShare = 0.25) {
return text.slice(0, frontLen) + marker + text.slice(text.length - backLen);
}
// ─── Response Parsing ───────────────────────────────────────────────────────
class MemoryExtractionFailure extends Error {
constructor(message, failureClass, reason, cause) {
super(message);
this.name = "MemoryExtractionFailure";
this.failureClass = failureClass;
this.reason = reason;
this.cause = cause;
}
}
/**
* Classify memory extraction failures for retry and operator diagnostics.
*
* Purpose: keep transient provider failures, deterministic parse failures,
* filesystem faults, and apply/storage failures distinct in DB-backed memory
* closeout attempts.
*
* Consumer: extractMemoriesFromUnit when deciding whether a failed extraction
* should retry immediately or surface as an actionable diagnostic.
*/
export function classifyMemoryExtractionFailure(error) {
const inherited = error?.failureClass;
if (typeof inherited === "string" && inherited) return inherited;
const message = String(error?.message ?? error ?? "").toLowerCase();
const code = String(error?.code ?? "").toLowerCase();
if (
code === "enoent" ||
code === "eacces" ||
code === "eperm" ||
message.includes("no such file") ||
message.includes("permission denied")
) {
return "filesystem";
}
if (
message.includes("json") ||
message.includes("parse") ||
message.includes("invalid response")
) {
return "parse";
}
if (
code === "etimedout" ||
code === "econnreset" ||
code === "econnrefused" ||
message.includes("timeout") ||
message.includes("timed out") ||
message.includes("rate limit") ||
message.includes("429") ||
/\b5\d\d\b/.test(message) ||
message.includes("overloaded") ||
message.includes("service unavailable") ||
message.includes("network")
) {
return "transient";
}
if (
message.includes("unauthorized") ||
message.includes("forbidden") ||
message.includes("api key") ||
message.includes("401") ||
message.includes("403")
) {
return "auth";
}
if (
message.includes("sqlite") ||
message.includes("constraint") ||
message.includes("database")
) {
return "apply";
}
return "unknown";
}
/**
* Parse the LLM response into memory actions.
* Strips markdown fences, validates required fields.
* Returns [] on any parse failure.
*/
export function parseMemoryResponse(raw) {
return parseMemoryResponseDetailed(raw).actions;
}
/**
* Parse the LLM response into actions plus an explicit failure diagnostic.
*
* Purpose: distinguish a legitimate empty memory response from malformed JSON
* or unsupported action shapes so closeout diagnostics do not report parser
* failures as successful no-op extractions.
*
* Consumer: extractMemoriesFromUnit failure classification.
*/
export function parseMemoryResponseDetailed(raw) {
try {
// Strip markdown code fences
if (typeof raw !== "string" || raw.trim() === "") {
return {
actions: [],
failureClass: "parse",
reason: "empty-response",
error: "LLM returned an empty memory extraction response",
};
}
let cleaned = raw.trim();
if (cleaned.startsWith("```")) {
cleaned = cleaned
@ -239,10 +335,21 @@ export function parseMemoryResponse(raw) {
.replace(/\n?```\s*$/, "");
}
const parsed = JSON.parse(cleaned);
if (!Array.isArray(parsed)) return [];
if (!Array.isArray(parsed)) {
return {
actions: [],
failureClass: "parse",
reason: "non-array-response",
error: "Memory extraction response must be a JSON array",
};
}
const actions = [];
let invalidItems = 0;
for (const item of parsed) {
if (!item || typeof item !== "object" || !item.action) continue;
if (!item || typeof item !== "object" || !item.action) {
invalidItems++;
continue;
}
switch (item.action) {
case "add":
if (
@ -258,11 +365,15 @@ export function parseMemoryResponse(raw) {
? item.confidence
: undefined,
});
} else {
invalidItems++;
}
break;
case "prune":
if (typeof item.id === "string") {
actions.push({ action: "prune", id: item.id });
} else {
invalidItems++;
}
break;
// Silently drop any legacy uppercase action variants the model may emit
@ -270,12 +381,26 @@ export function parseMemoryResponse(raw) {
// applyConsolidationActions anyway, but filtering here keeps the error
// surface clean and avoids a throw on every legacy response.
default:
invalidItems++;
break;
}
}
return actions;
} catch {
return [];
if (parsed.length > 0 && actions.length === 0 && invalidItems > 0) {
return {
actions: [],
failureClass: "parse",
reason: "invalid-actions",
error: "Memory extraction response contained no valid actions",
};
}
return { actions };
} catch (error) {
return {
actions: [],
failureClass: "parse",
reason: "invalid-json",
error: error?.message ?? String(error),
};
}
}
// ─── Main Extraction Function ───────────────────────────────────────────────
@ -290,7 +415,7 @@ export async function extractMemoriesFromUnit(
llmCallFn,
) {
const unitKey = `${unitType}/${unitId}`;
const recordAttempt = (status, reason, error) =>
const recordAttempt = (status, reason, error, failureClass) =>
recordMemoryExtractionAttempt({
unitKey,
unitType,
@ -298,6 +423,11 @@ export async function extractMemoriesFromUnit(
activityFile,
status,
reason,
failureClass:
failureClass ??
(status === "error" || error
? classifyMemoryExtractionFailure(error)
: null),
error: error?.message ?? error,
});
// Mutex guard
@ -357,7 +487,7 @@ export async function extractMemoriesFromUnit(
return;
}
} catch (error) {
recordAttempt("skipped", "stat-failed", error);
recordAttempt("skipped", "stat-failed", error, "filesystem");
debugLog("memory-extract", {
phase: "skip",
reason: "stat-failed",
@ -407,7 +537,15 @@ export async function extractMemoriesFromUnit(
});
const response = await llmCallFn(EXTRACTION_SYSTEM, userPrompt);
// Parse response
const actions = parseMemoryResponse(response);
const parsed = parseMemoryResponseDetailed(response);
if (parsed.failureClass) {
throw new MemoryExtractionFailure(
parsed.error ?? parsed.reason,
parsed.failureClass,
parsed.reason,
);
}
const actions = parsed.actions;
debugLog("memory-extract", {
phase: "parsed",
unitKey,
@ -432,28 +570,48 @@ export async function extractMemoriesFromUnit(
);
debugLog("memory-extract", { phase: "done", unitKey });
} catch (err) {
recordAttempt("error", "extract-failed", err);
const failureClass = classifyMemoryExtractionFailure(err);
const reason = err?.reason ?? `${failureClass}-failure`;
recordAttempt("error", reason, err, failureClass);
debugLog("memory-extract", {
phase: "error",
unitKey,
failureClass,
error: err?.message || String(err),
});
// Retry once after a brief delay
if (userPrompt) {
// Retry once only for transient provider or network failures. Parser and
// apply/storage failures need a different input or code fix, so immediate
// retry just burns tokens while hiding the real class from diagnostics.
if (userPrompt && failureClass === "transient") {
try {
await delay(2000);
const response2 = await llmCallFn(EXTRACTION_SYSTEM, userPrompt);
const actions2 = parseMemoryResponse(response2);
const parsed2 = parseMemoryResponseDetailed(response2);
if (parsed2.failureClass) {
throw new MemoryExtractionFailure(
parsed2.error ?? parsed2.reason,
parsed2.failureClass,
parsed2.reason,
);
}
const actions2 = parsed2.actions;
if (actions2.length > 0)
applyConsolidationActions(actions2, unitType, unitId);
markUnitProcessed(unitKey, activityFile);
recordAttempt("processed", "retry-success");
debugLog("memory-extract", { phase: "retry-success", unitKey });
} catch (err2) {
recordAttempt("error", "retry-failed", err2);
const retryFailureClass = classifyMemoryExtractionFailure(err2);
recordAttempt(
"error",
err2?.reason ?? "retry-failed",
err2,
retryFailureClass,
);
debugLog("memory-extract", {
phase: "retry-failed",
unitKey,
failureClass: retryFailureClass,
error: err2?.message || String(err2),
});
// Non-fatal — memory extraction failure should never affect autonomous mode

View file

@ -541,6 +541,7 @@ export function recordMemoryExtractionAttempt({
unitId,
activityFile,
status,
failureClass,
reason,
error,
}) {
@ -552,6 +553,7 @@ export function recordMemoryExtractionAttempt({
unitId,
activityFile,
status,
failureClass,
reason,
error,
createdAt: new Date().toISOString(),

View file

@ -182,14 +182,15 @@ export function insertMemoryExtractionAttempt(args) {
if (!currentDb) throw new SFError(SF_STALE_STATE, "sf-db: No database open");
currentDb
.prepare(`INSERT INTO memory_extraction_attempts
(unit_key, unit_type, unit_id, activity_file, status, reason, error, created_at)
VALUES (:unit_key, :unit_type, :unit_id, :activity_file, :status, :reason, :error, :created_at)`)
(unit_key, unit_type, unit_id, activity_file, status, failure_class, reason, error, created_at)
VALUES (:unit_key, :unit_type, :unit_id, :activity_file, :status, :failure_class, :reason, :error, :created_at)`)
.run({
":unit_key": args.unitKey,
":unit_type": args.unitType ?? null,
":unit_id": args.unitId ?? null,
":activity_file": args.activityFile ?? null,
":status": args.status,
":failure_class": args.failureClass ?? null,
":reason": args.reason ?? null,
":error": args.error ?? null,
":created_at": args.createdAt,

View file

@ -15,7 +15,7 @@ function defaultQueryTimeout(operation, fallbackValue) {
}
}
const SCHEMA_VERSION = 66;
const SCHEMA_VERSION = 69;
function indexExists(db, name) {
return !!db
.prepare(
@ -850,6 +850,7 @@ export function initSchema(db, fileBacked, options = {}) {
unit_id TEXT,
activity_file TEXT,
status TEXT NOT NULL,
failure_class TEXT,
reason TEXT,
error TEXT,
created_at TEXT NOT NULL
@ -1649,6 +1650,7 @@ function migrateSchema(db, { currentPath, withQueryTimeout }) {
unit_id TEXT,
activity_file TEXT,
status TEXT NOT NULL,
failure_class TEXT,
reason TEXT,
error TEXT,
created_at TEXT NOT NULL
@ -3505,6 +3507,7 @@ function migrateSchema(db, { currentPath, withQueryTimeout }) {
unit_id TEXT,
activity_file TEXT,
status TEXT NOT NULL,
failure_class TEXT,
reason TEXT,
error TEXT,
created_at TEXT NOT NULL
@ -3549,6 +3552,29 @@ function migrateSchema(db, { currentPath, withQueryTimeout }) {
});
if (ok) appliedVersion = 68;
}
if (appliedVersion < 69) {
const ok = runMigrationStep("v69", () => {
// Schema v69: classify memory extraction failures.
//
// The attempt ledger already showed that extraction failed, but parse,
// transient provider, filesystem, and apply errors were indistinguishable.
// Autonomous retry policy depends on that distinction.
try {
db.exec(
"ALTER TABLE memory_extraction_attempts ADD COLUMN failure_class TEXT",
);
} catch {
// Column may already exist on fresh DBs - idempotent.
}
db.prepare(
"INSERT INTO schema_version (version, applied_at) VALUES (:version, :applied_at)",
).run({
":version": 69,
":applied_at": new Date().toISOString(),
});
});
if (ok) appliedVersion = 69;
}
// Post-migration assertion: ensure critical tables created by historical
// migrations are actually present. If a prior migration claimed success but

View file

@ -9,7 +9,7 @@ import assert from "node:assert/strict";
import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs";
import { tmpdir } from "node:os";
import { join } from "node:path";
import { afterEach, test } from "vitest";
import { afterEach, test, vi } from "vitest";
import {
_resetExtractionState,
extractMemoriesFromUnit,
@ -120,6 +120,63 @@ test("extractMemoriesFromUnit_when_successful_records_processed_unit_and_attempt
assert.match(memories[0].content, /stored in SQLite/);
});
test("extractMemoriesFromUnit_when_response_invalid_json_records_parse_failure", async () => {
const project = makeProject();
const activityFile = writeActivity(
project,
"invalid-json.jsonl",
"Memory extraction should classify malformed model output. ".repeat(40),
);
const llmCall = vi.fn(async () => "{not valid json");
await extractMemoriesFromUnit(
activityFile,
"execute-task",
"M001/S01/T03",
llmCall,
);
assert.equal(llmCall.mock.calls.length, 1);
const latestAttempt = getRecentMemoryExtractionAttempts(1)[0];
assert.equal(latestAttempt.status, "error");
assert.equal(latestAttempt.failure_class, "parse");
assert.equal(latestAttempt.reason, "invalid-json");
const processed = getDatabase()
.prepare("SELECT count(*) AS cnt FROM memory_processed_units")
.get();
assert.equal(processed.cnt, 0);
});
test("extractMemoriesFromUnit_when_transient_failure_retries_once", async () => {
const project = makeProject();
const activityFile = writeActivity(
project,
"transient.jsonl",
"Memory extraction should retry only transient provider failures. ".repeat(
40,
),
);
const llmCall = vi
.fn()
.mockRejectedValueOnce(new Error("503 service unavailable"))
.mockResolvedValueOnce("[]");
await extractMemoriesFromUnit(
activityFile,
"execute-task",
"M001/S01/T04",
llmCall,
);
assert.equal(llmCall.mock.calls.length, 2);
const attempts = getRecentMemoryExtractionAttempts(5);
assert.equal(attempts[0].status, "processed");
assert.equal(attempts[0].reason, "retry-success");
const transientAttempt = attempts.find((row) => row.status === "error");
assert.equal(transientAttempt.failure_class, "transient");
assert.equal(transientAttempt.reason, "transient-failure");
});
test("formatMemoriesForPrompt_when_injected_increments_hit_count", () => {
makeProject();
const id = createMemory({

View file

@ -273,7 +273,7 @@ test("openDatabase_migrates_v27_tasks_without_created_at_through_spec_backfill",
const version = db
.prepare("SELECT MAX(version) AS version FROM schema_version")
.get();
assert.equal(version.version, 68);
assert.equal(version.version, 69);
// v61: intent_chapters table exists
const chaptersTable = db
.prepare(
@ -383,11 +383,11 @@ test("openDatabase_v52_db_heals_routing_history_and_auto_start_path_works", () =
initRoutingHistory(dbPath);
}, "initRoutingHistory should not throw on a v52 DB");
// Schema should have migrated to v66 (current head)
// Schema should have migrated to the current head.
const version = db
.prepare("SELECT MAX(version) AS version FROM schema_version")
.get();
assert.equal(version.version, 68);
assert.equal(version.version, 69);
});
test("openDatabase_when_fresh_db_supports_schedule_entries", () => {
@ -556,6 +556,7 @@ test("openDatabase_memory_extraction_attempts_table_exists", () => {
"unit_id",
"activity_file",
"status",
"failure_class",
"reason",
"error",
"created_at",