fix(memory): classify extraction failures
This commit is contained in:
parent
7dfb5099c9
commit
820f9aaf8e
6 changed files with 265 additions and 20 deletions
|
|
@ -224,14 +224,110 @@ export function truncateMiddle(text, maxChars, frontShare = 0.25) {
|
|||
return text.slice(0, frontLen) + marker + text.slice(text.length - backLen);
|
||||
}
|
||||
// ─── Response Parsing ───────────────────────────────────────────────────────
|
||||
class MemoryExtractionFailure extends Error {
|
||||
constructor(message, failureClass, reason, cause) {
|
||||
super(message);
|
||||
this.name = "MemoryExtractionFailure";
|
||||
this.failureClass = failureClass;
|
||||
this.reason = reason;
|
||||
this.cause = cause;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Classify memory extraction failures for retry and operator diagnostics.
|
||||
*
|
||||
* Purpose: keep transient provider failures, deterministic parse failures,
|
||||
* filesystem faults, and apply/storage failures distinct in DB-backed memory
|
||||
* closeout attempts.
|
||||
*
|
||||
* Consumer: extractMemoriesFromUnit when deciding whether a failed extraction
|
||||
* should retry immediately or surface as an actionable diagnostic.
|
||||
*/
|
||||
export function classifyMemoryExtractionFailure(error) {
|
||||
const inherited = error?.failureClass;
|
||||
if (typeof inherited === "string" && inherited) return inherited;
|
||||
const message = String(error?.message ?? error ?? "").toLowerCase();
|
||||
const code = String(error?.code ?? "").toLowerCase();
|
||||
if (
|
||||
code === "enoent" ||
|
||||
code === "eacces" ||
|
||||
code === "eperm" ||
|
||||
message.includes("no such file") ||
|
||||
message.includes("permission denied")
|
||||
) {
|
||||
return "filesystem";
|
||||
}
|
||||
if (
|
||||
message.includes("json") ||
|
||||
message.includes("parse") ||
|
||||
message.includes("invalid response")
|
||||
) {
|
||||
return "parse";
|
||||
}
|
||||
if (
|
||||
code === "etimedout" ||
|
||||
code === "econnreset" ||
|
||||
code === "econnrefused" ||
|
||||
message.includes("timeout") ||
|
||||
message.includes("timed out") ||
|
||||
message.includes("rate limit") ||
|
||||
message.includes("429") ||
|
||||
/\b5\d\d\b/.test(message) ||
|
||||
message.includes("overloaded") ||
|
||||
message.includes("service unavailable") ||
|
||||
message.includes("network")
|
||||
) {
|
||||
return "transient";
|
||||
}
|
||||
if (
|
||||
message.includes("unauthorized") ||
|
||||
message.includes("forbidden") ||
|
||||
message.includes("api key") ||
|
||||
message.includes("401") ||
|
||||
message.includes("403")
|
||||
) {
|
||||
return "auth";
|
||||
}
|
||||
if (
|
||||
message.includes("sqlite") ||
|
||||
message.includes("constraint") ||
|
||||
message.includes("database")
|
||||
) {
|
||||
return "apply";
|
||||
}
|
||||
return "unknown";
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse the LLM response into memory actions.
|
||||
* Strips markdown fences, validates required fields.
|
||||
* Returns [] on any parse failure.
|
||||
*/
|
||||
export function parseMemoryResponse(raw) {
|
||||
return parseMemoryResponseDetailed(raw).actions;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse the LLM response into actions plus an explicit failure diagnostic.
|
||||
*
|
||||
* Purpose: distinguish a legitimate empty memory response from malformed JSON
|
||||
* or unsupported action shapes so closeout diagnostics do not report parser
|
||||
* failures as successful no-op extractions.
|
||||
*
|
||||
* Consumer: extractMemoriesFromUnit failure classification.
|
||||
*/
|
||||
export function parseMemoryResponseDetailed(raw) {
|
||||
try {
|
||||
// Strip markdown code fences
|
||||
if (typeof raw !== "string" || raw.trim() === "") {
|
||||
return {
|
||||
actions: [],
|
||||
failureClass: "parse",
|
||||
reason: "empty-response",
|
||||
error: "LLM returned an empty memory extraction response",
|
||||
};
|
||||
}
|
||||
let cleaned = raw.trim();
|
||||
if (cleaned.startsWith("```")) {
|
||||
cleaned = cleaned
|
||||
|
|
@ -239,10 +335,21 @@ export function parseMemoryResponse(raw) {
|
|||
.replace(/\n?```\s*$/, "");
|
||||
}
|
||||
const parsed = JSON.parse(cleaned);
|
||||
if (!Array.isArray(parsed)) return [];
|
||||
if (!Array.isArray(parsed)) {
|
||||
return {
|
||||
actions: [],
|
||||
failureClass: "parse",
|
||||
reason: "non-array-response",
|
||||
error: "Memory extraction response must be a JSON array",
|
||||
};
|
||||
}
|
||||
const actions = [];
|
||||
let invalidItems = 0;
|
||||
for (const item of parsed) {
|
||||
if (!item || typeof item !== "object" || !item.action) continue;
|
||||
if (!item || typeof item !== "object" || !item.action) {
|
||||
invalidItems++;
|
||||
continue;
|
||||
}
|
||||
switch (item.action) {
|
||||
case "add":
|
||||
if (
|
||||
|
|
@ -258,11 +365,15 @@ export function parseMemoryResponse(raw) {
|
|||
? item.confidence
|
||||
: undefined,
|
||||
});
|
||||
} else {
|
||||
invalidItems++;
|
||||
}
|
||||
break;
|
||||
case "prune":
|
||||
if (typeof item.id === "string") {
|
||||
actions.push({ action: "prune", id: item.id });
|
||||
} else {
|
||||
invalidItems++;
|
||||
}
|
||||
break;
|
||||
// Silently drop any legacy uppercase action variants the model may emit
|
||||
|
|
@ -270,12 +381,26 @@ export function parseMemoryResponse(raw) {
|
|||
// applyConsolidationActions anyway, but filtering here keeps the error
|
||||
// surface clean and avoids a throw on every legacy response.
|
||||
default:
|
||||
invalidItems++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return actions;
|
||||
} catch {
|
||||
return [];
|
||||
if (parsed.length > 0 && actions.length === 0 && invalidItems > 0) {
|
||||
return {
|
||||
actions: [],
|
||||
failureClass: "parse",
|
||||
reason: "invalid-actions",
|
||||
error: "Memory extraction response contained no valid actions",
|
||||
};
|
||||
}
|
||||
return { actions };
|
||||
} catch (error) {
|
||||
return {
|
||||
actions: [],
|
||||
failureClass: "parse",
|
||||
reason: "invalid-json",
|
||||
error: error?.message ?? String(error),
|
||||
};
|
||||
}
|
||||
}
|
||||
// ─── Main Extraction Function ───────────────────────────────────────────────
|
||||
|
|
@ -290,7 +415,7 @@ export async function extractMemoriesFromUnit(
|
|||
llmCallFn,
|
||||
) {
|
||||
const unitKey = `${unitType}/${unitId}`;
|
||||
const recordAttempt = (status, reason, error) =>
|
||||
const recordAttempt = (status, reason, error, failureClass) =>
|
||||
recordMemoryExtractionAttempt({
|
||||
unitKey,
|
||||
unitType,
|
||||
|
|
@ -298,6 +423,11 @@ export async function extractMemoriesFromUnit(
|
|||
activityFile,
|
||||
status,
|
||||
reason,
|
||||
failureClass:
|
||||
failureClass ??
|
||||
(status === "error" || error
|
||||
? classifyMemoryExtractionFailure(error)
|
||||
: null),
|
||||
error: error?.message ?? error,
|
||||
});
|
||||
// Mutex guard
|
||||
|
|
@ -357,7 +487,7 @@ export async function extractMemoriesFromUnit(
|
|||
return;
|
||||
}
|
||||
} catch (error) {
|
||||
recordAttempt("skipped", "stat-failed", error);
|
||||
recordAttempt("skipped", "stat-failed", error, "filesystem");
|
||||
debugLog("memory-extract", {
|
||||
phase: "skip",
|
||||
reason: "stat-failed",
|
||||
|
|
@ -407,7 +537,15 @@ export async function extractMemoriesFromUnit(
|
|||
});
|
||||
const response = await llmCallFn(EXTRACTION_SYSTEM, userPrompt);
|
||||
// Parse response
|
||||
const actions = parseMemoryResponse(response);
|
||||
const parsed = parseMemoryResponseDetailed(response);
|
||||
if (parsed.failureClass) {
|
||||
throw new MemoryExtractionFailure(
|
||||
parsed.error ?? parsed.reason,
|
||||
parsed.failureClass,
|
||||
parsed.reason,
|
||||
);
|
||||
}
|
||||
const actions = parsed.actions;
|
||||
debugLog("memory-extract", {
|
||||
phase: "parsed",
|
||||
unitKey,
|
||||
|
|
@ -432,28 +570,48 @@ export async function extractMemoriesFromUnit(
|
|||
);
|
||||
debugLog("memory-extract", { phase: "done", unitKey });
|
||||
} catch (err) {
|
||||
recordAttempt("error", "extract-failed", err);
|
||||
const failureClass = classifyMemoryExtractionFailure(err);
|
||||
const reason = err?.reason ?? `${failureClass}-failure`;
|
||||
recordAttempt("error", reason, err, failureClass);
|
||||
debugLog("memory-extract", {
|
||||
phase: "error",
|
||||
unitKey,
|
||||
failureClass,
|
||||
error: err?.message || String(err),
|
||||
});
|
||||
// Retry once after a brief delay
|
||||
if (userPrompt) {
|
||||
// Retry once only for transient provider or network failures. Parser and
|
||||
// apply/storage failures need a different input or code fix, so immediate
|
||||
// retry just burns tokens while hiding the real class from diagnostics.
|
||||
if (userPrompt && failureClass === "transient") {
|
||||
try {
|
||||
await delay(2000);
|
||||
const response2 = await llmCallFn(EXTRACTION_SYSTEM, userPrompt);
|
||||
const actions2 = parseMemoryResponse(response2);
|
||||
const parsed2 = parseMemoryResponseDetailed(response2);
|
||||
if (parsed2.failureClass) {
|
||||
throw new MemoryExtractionFailure(
|
||||
parsed2.error ?? parsed2.reason,
|
||||
parsed2.failureClass,
|
||||
parsed2.reason,
|
||||
);
|
||||
}
|
||||
const actions2 = parsed2.actions;
|
||||
if (actions2.length > 0)
|
||||
applyConsolidationActions(actions2, unitType, unitId);
|
||||
markUnitProcessed(unitKey, activityFile);
|
||||
recordAttempt("processed", "retry-success");
|
||||
debugLog("memory-extract", { phase: "retry-success", unitKey });
|
||||
} catch (err2) {
|
||||
recordAttempt("error", "retry-failed", err2);
|
||||
const retryFailureClass = classifyMemoryExtractionFailure(err2);
|
||||
recordAttempt(
|
||||
"error",
|
||||
err2?.reason ?? "retry-failed",
|
||||
err2,
|
||||
retryFailureClass,
|
||||
);
|
||||
debugLog("memory-extract", {
|
||||
phase: "retry-failed",
|
||||
unitKey,
|
||||
failureClass: retryFailureClass,
|
||||
error: err2?.message || String(err2),
|
||||
});
|
||||
// Non-fatal — memory extraction failure should never affect autonomous mode
|
||||
|
|
|
|||
|
|
@ -541,6 +541,7 @@ export function recordMemoryExtractionAttempt({
|
|||
unitId,
|
||||
activityFile,
|
||||
status,
|
||||
failureClass,
|
||||
reason,
|
||||
error,
|
||||
}) {
|
||||
|
|
@ -552,6 +553,7 @@ export function recordMemoryExtractionAttempt({
|
|||
unitId,
|
||||
activityFile,
|
||||
status,
|
||||
failureClass,
|
||||
reason,
|
||||
error,
|
||||
createdAt: new Date().toISOString(),
|
||||
|
|
|
|||
|
|
@ -182,14 +182,15 @@ export function insertMemoryExtractionAttempt(args) {
|
|||
if (!currentDb) throw new SFError(SF_STALE_STATE, "sf-db: No database open");
|
||||
currentDb
|
||||
.prepare(`INSERT INTO memory_extraction_attempts
|
||||
(unit_key, unit_type, unit_id, activity_file, status, reason, error, created_at)
|
||||
VALUES (:unit_key, :unit_type, :unit_id, :activity_file, :status, :reason, :error, :created_at)`)
|
||||
(unit_key, unit_type, unit_id, activity_file, status, failure_class, reason, error, created_at)
|
||||
VALUES (:unit_key, :unit_type, :unit_id, :activity_file, :status, :failure_class, :reason, :error, :created_at)`)
|
||||
.run({
|
||||
":unit_key": args.unitKey,
|
||||
":unit_type": args.unitType ?? null,
|
||||
":unit_id": args.unitId ?? null,
|
||||
":activity_file": args.activityFile ?? null,
|
||||
":status": args.status,
|
||||
":failure_class": args.failureClass ?? null,
|
||||
":reason": args.reason ?? null,
|
||||
":error": args.error ?? null,
|
||||
":created_at": args.createdAt,
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ function defaultQueryTimeout(operation, fallbackValue) {
|
|||
}
|
||||
}
|
||||
|
||||
const SCHEMA_VERSION = 66;
|
||||
const SCHEMA_VERSION = 69;
|
||||
function indexExists(db, name) {
|
||||
return !!db
|
||||
.prepare(
|
||||
|
|
@ -850,6 +850,7 @@ export function initSchema(db, fileBacked, options = {}) {
|
|||
unit_id TEXT,
|
||||
activity_file TEXT,
|
||||
status TEXT NOT NULL,
|
||||
failure_class TEXT,
|
||||
reason TEXT,
|
||||
error TEXT,
|
||||
created_at TEXT NOT NULL
|
||||
|
|
@ -1649,6 +1650,7 @@ function migrateSchema(db, { currentPath, withQueryTimeout }) {
|
|||
unit_id TEXT,
|
||||
activity_file TEXT,
|
||||
status TEXT NOT NULL,
|
||||
failure_class TEXT,
|
||||
reason TEXT,
|
||||
error TEXT,
|
||||
created_at TEXT NOT NULL
|
||||
|
|
@ -3505,6 +3507,7 @@ function migrateSchema(db, { currentPath, withQueryTimeout }) {
|
|||
unit_id TEXT,
|
||||
activity_file TEXT,
|
||||
status TEXT NOT NULL,
|
||||
failure_class TEXT,
|
||||
reason TEXT,
|
||||
error TEXT,
|
||||
created_at TEXT NOT NULL
|
||||
|
|
@ -3549,6 +3552,29 @@ function migrateSchema(db, { currentPath, withQueryTimeout }) {
|
|||
});
|
||||
if (ok) appliedVersion = 68;
|
||||
}
|
||||
if (appliedVersion < 69) {
|
||||
const ok = runMigrationStep("v69", () => {
|
||||
// Schema v69: classify memory extraction failures.
|
||||
//
|
||||
// The attempt ledger already showed that extraction failed, but parse,
|
||||
// transient provider, filesystem, and apply errors were indistinguishable.
|
||||
// Autonomous retry policy depends on that distinction.
|
||||
try {
|
||||
db.exec(
|
||||
"ALTER TABLE memory_extraction_attempts ADD COLUMN failure_class TEXT",
|
||||
);
|
||||
} catch {
|
||||
// Column may already exist on fresh DBs - idempotent.
|
||||
}
|
||||
db.prepare(
|
||||
"INSERT INTO schema_version (version, applied_at) VALUES (:version, :applied_at)",
|
||||
).run({
|
||||
":version": 69,
|
||||
":applied_at": new Date().toISOString(),
|
||||
});
|
||||
});
|
||||
if (ok) appliedVersion = 69;
|
||||
}
|
||||
|
||||
// Post-migration assertion: ensure critical tables created by historical
|
||||
// migrations are actually present. If a prior migration claimed success but
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ import assert from "node:assert/strict";
|
|||
import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs";
|
||||
import { tmpdir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
import { afterEach, test } from "vitest";
|
||||
import { afterEach, test, vi } from "vitest";
|
||||
import {
|
||||
_resetExtractionState,
|
||||
extractMemoriesFromUnit,
|
||||
|
|
@ -120,6 +120,63 @@ test("extractMemoriesFromUnit_when_successful_records_processed_unit_and_attempt
|
|||
assert.match(memories[0].content, /stored in SQLite/);
|
||||
});
|
||||
|
||||
test("extractMemoriesFromUnit_when_response_invalid_json_records_parse_failure", async () => {
|
||||
const project = makeProject();
|
||||
const activityFile = writeActivity(
|
||||
project,
|
||||
"invalid-json.jsonl",
|
||||
"Memory extraction should classify malformed model output. ".repeat(40),
|
||||
);
|
||||
const llmCall = vi.fn(async () => "{not valid json");
|
||||
|
||||
await extractMemoriesFromUnit(
|
||||
activityFile,
|
||||
"execute-task",
|
||||
"M001/S01/T03",
|
||||
llmCall,
|
||||
);
|
||||
|
||||
assert.equal(llmCall.mock.calls.length, 1);
|
||||
const latestAttempt = getRecentMemoryExtractionAttempts(1)[0];
|
||||
assert.equal(latestAttempt.status, "error");
|
||||
assert.equal(latestAttempt.failure_class, "parse");
|
||||
assert.equal(latestAttempt.reason, "invalid-json");
|
||||
const processed = getDatabase()
|
||||
.prepare("SELECT count(*) AS cnt FROM memory_processed_units")
|
||||
.get();
|
||||
assert.equal(processed.cnt, 0);
|
||||
});
|
||||
|
||||
test("extractMemoriesFromUnit_when_transient_failure_retries_once", async () => {
|
||||
const project = makeProject();
|
||||
const activityFile = writeActivity(
|
||||
project,
|
||||
"transient.jsonl",
|
||||
"Memory extraction should retry only transient provider failures. ".repeat(
|
||||
40,
|
||||
),
|
||||
);
|
||||
const llmCall = vi
|
||||
.fn()
|
||||
.mockRejectedValueOnce(new Error("503 service unavailable"))
|
||||
.mockResolvedValueOnce("[]");
|
||||
|
||||
await extractMemoriesFromUnit(
|
||||
activityFile,
|
||||
"execute-task",
|
||||
"M001/S01/T04",
|
||||
llmCall,
|
||||
);
|
||||
|
||||
assert.equal(llmCall.mock.calls.length, 2);
|
||||
const attempts = getRecentMemoryExtractionAttempts(5);
|
||||
assert.equal(attempts[0].status, "processed");
|
||||
assert.equal(attempts[0].reason, "retry-success");
|
||||
const transientAttempt = attempts.find((row) => row.status === "error");
|
||||
assert.equal(transientAttempt.failure_class, "transient");
|
||||
assert.equal(transientAttempt.reason, "transient-failure");
|
||||
});
|
||||
|
||||
test("formatMemoriesForPrompt_when_injected_increments_hit_count", () => {
|
||||
makeProject();
|
||||
const id = createMemory({
|
||||
|
|
|
|||
|
|
@ -273,7 +273,7 @@ test("openDatabase_migrates_v27_tasks_without_created_at_through_spec_backfill",
|
|||
const version = db
|
||||
.prepare("SELECT MAX(version) AS version FROM schema_version")
|
||||
.get();
|
||||
assert.equal(version.version, 68);
|
||||
assert.equal(version.version, 69);
|
||||
// v61: intent_chapters table exists
|
||||
const chaptersTable = db
|
||||
.prepare(
|
||||
|
|
@ -383,11 +383,11 @@ test("openDatabase_v52_db_heals_routing_history_and_auto_start_path_works", () =
|
|||
initRoutingHistory(dbPath);
|
||||
}, "initRoutingHistory should not throw on a v52 DB");
|
||||
|
||||
// Schema should have migrated to v66 (current head)
|
||||
// Schema should have migrated to the current head.
|
||||
const version = db
|
||||
.prepare("SELECT MAX(version) AS version FROM schema_version")
|
||||
.get();
|
||||
assert.equal(version.version, 68);
|
||||
assert.equal(version.version, 69);
|
||||
});
|
||||
|
||||
test("openDatabase_when_fresh_db_supports_schedule_entries", () => {
|
||||
|
|
@ -556,6 +556,7 @@ test("openDatabase_memory_extraction_attempts_table_exists", () => {
|
|||
"unit_id",
|
||||
"activity_file",
|
||||
"status",
|
||||
"failure_class",
|
||||
"reason",
|
||||
"error",
|
||||
"created_at",
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue