singularity-forge/src/resources/extensions/gsd/memory-extractor.ts
Tom Boucher d41ce60530 fix: resolve OAuth API key in buildMemoryLLMCall via modelRegistry (#2959) (#3233)
buildMemoryLLMCall called completeSimple without passing an API key,
which routed to streamSimpleAnthropic -> getEnvApiKey (env vars only).
OAuth users (Claude Max/Pro) store tokens in auth.json, so getEnvApiKey
returned undefined, the call threw, and memory extraction silently failed.

Now resolves the key eagerly via ctx.modelRegistry.getApiKey() which
checks auth.json through authStorage, matching how streamAnthropic and
the compaction orchestrator resolve credentials.

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-30 14:32:22 -06:00

360 lines
12 KiB
TypeScript

// GSD Memory Extractor — Background LLM extraction from activity logs
//
// After each unit completes, extracts durable knowledge from the session
// transcript and stores it as memory entries. One extraction at a time
// (mutex guard). Fire-and-forget — never blocks auto-mode.
import { readFileSync, statSync } from 'node:fs';
import type { ExtensionContext } from '@gsd/pi-coding-agent';
import type { Api, AssistantMessage, Model } from '@gsd/pi-ai';
import {
getActiveMemories,
isUnitProcessed,
markUnitProcessed,
applyMemoryActions,
decayStaleMemories,
} from './memory-store.js';
import type { MemoryAction } from './memory-store.js';
// ─── Types ──────────────────────────────────────────────────────────────────
export type LLMCallFn = (system: string, user: string) => Promise<string>;
// ─── Concurrency Guard ──────────────────────────────────────────────────────
let _extracting = false;
let _lastExtractionTime = 0;
const MIN_EXTRACTION_INTERVAL_MS = 30_000;
// ─── Skip Conditions ────────────────────────────────────────────────────────
const SKIP_TYPES = new Set([
'complete-slice',
'rewrite-docs',
'triage-captures',
]);
const MIN_ACTIVITY_SIZE = 1024; // 1KB
// ─── Secret Redaction ───────────────────────────────────────────────────────
const SECRET_PATTERNS = [
/(?:sk|pk|api[_-]?key|token|secret|password|credential|auth)[_-]?\w*[\s:=]+['"]?[\w\-./+=]{20,}['"]?/gi,
/AKIA[0-9A-Z]{16}/g,
/gh[pousr]_[A-Za-z0-9_]{36,}/g,
/[rsp]k_(?:live|test)_[A-Za-z0-9]{20,}/g,
/eyJ[A-Za-z0-9_-]{20,}\.eyJ[A-Za-z0-9_-]{20,}\.[A-Za-z0-9_-]+/g,
/-----BEGIN (?:RSA |EC |DSA |OPENSSH )?PRIVATE KEY-----[\s\S]*?-----END (?:RSA |EC |DSA |OPENSSH )?PRIVATE KEY-----/g,
/(?:Bearer\s+)[A-Za-z0-9\-._~+/]+=*/gi,
/npm_[A-Za-z0-9]{36,}/g,
/sk-ant-[A-Za-z0-9\-_]{20,}/g,
/sk-[A-Za-z0-9]{40,}/g,
];
function redactSecrets(text: string): string {
let result = text;
for (const pattern of SECRET_PATTERNS) {
// Reset lastIndex for global regexes
pattern.lastIndex = 0;
result = result.replace(pattern, '[REDACTED]');
}
return result;
}
// ─── Model Selection ────────────────────────────────────────────────────────
/**
* Build an LLM call function using the cheapest available model (preferring Haiku).
* Returns null if no models available.
*/
export function buildMemoryLLMCall(ctx: ExtensionContext): LLMCallFn | null {
try {
const available = ctx.modelRegistry.getAvailable();
if (!available || available.length === 0) return null;
// Prefer Haiku by ID substring match
let model = available.find(m =>
m.id.toLowerCase().includes('haiku'),
);
// Fallback: cheapest by input cost
if (!model) {
model = [...available].sort((a, b) => a.cost.input - b.cost.input)[0];
}
if (!model) return null;
const selectedModel = model as Model<Api>;
// Resolve API key via modelRegistry so OAuth tokens (auth.json) are used.
// Without this, streamSimpleAnthropic only checks env vars via getEnvApiKey,
// which returns undefined for OAuth users (Claude Max / Claude Pro).
// See: https://github.com/gsd-build/gsd-2/issues/2959
const resolvedKeyPromise = ctx.modelRegistry.getApiKey(selectedModel).catch(() => undefined);
return async (system: string, user: string): Promise<string> => {
const { completeSimple } = await import('@gsd/pi-ai');
const resolvedApiKey = await resolvedKeyPromise;
const result: AssistantMessage = await completeSimple(selectedModel, {
systemPrompt: system,
messages: [{ role: 'user', content: [{ type: 'text', text: user }], timestamp: Date.now() }],
}, {
maxTokens: 2048,
temperature: 0,
...(resolvedApiKey ? { apiKey: resolvedApiKey } : {}),
});
// Extract text from response
const textParts = result.content
.filter((c): c is { type: 'text'; text: string } => c.type === 'text')
.map(c => c.text);
return textParts.join('');
};
} catch {
return null;
}
}
// ─── Extraction Prompts ─────────────────────────────────────────────────────
const EXTRACTION_SYSTEM = `You are a memory extraction agent for a software project. Analyze the session
transcript and identify durable knowledge worth remembering for future sessions.
Categories: architecture, convention, gotcha, preference, environment, pattern
Actions (return JSON array):
- CREATE: {"action": "CREATE", "category": "<cat>", "content": "<text>", "confidence": <0.6-0.95>}
- UPDATE: {"action": "UPDATE", "id": "<MEM###>", "content": "<revised text>"}
- REINFORCE: {"action": "REINFORCE", "id": "<MEM###>"}
- SUPERSEDE: {"action": "SUPERSEDE", "id": "<MEM###>", "superseded_by": "<MEM###>"}
Rules:
- Don't create memories for one-off bug fixes or temporary state
- Don't duplicate existing memories — use REINFORCE or UPDATE
- Keep content to 1-3 sentences
- Confidence: 0.6 tentative, 0.8 solid, 0.95 well-confirmed
- Prefer fewer high-quality memories over many low-quality ones
- Return empty array [] if nothing worth remembering
- NEVER include secrets, API keys, or passwords
Return ONLY a valid JSON array.`;
function buildExtractionUserPrompt(
unitType: string,
unitId: string,
existingMemories: { id: string; category: string; content: string }[],
transcript: string,
): string {
let memoriesSection: string;
if (existingMemories.length === 0) {
memoriesSection = '(none yet)';
} else {
memoriesSection = existingMemories
.map((m, i) => `${i + 1}. [${m.id}] (${m.category}) ${m.content}`)
.join('\n');
}
return `## Current Active Memories\n${memoriesSection}\n\n## Session Transcript (${unitType}: ${unitId})\n${transcript}`;
}
// ─── Activity JSONL Parsing ─────────────────────────────────────────────────
/**
* Extract assistant message text from activity JSONL.
* Returns concatenated text content from assistant role entries.
*/
function extractTranscriptFromActivity(raw: string, maxChars = 30_000): string {
const lines = raw.split('\n');
const parts: string[] = [];
let totalChars = 0;
for (const line of lines) {
if (!line.trim()) continue;
try {
const entry = JSON.parse(line);
if (entry.role !== 'assistant') continue;
// Handle content array or direct text
if (Array.isArray(entry.content)) {
for (const block of entry.content) {
if (block.type === 'text' && block.text) {
const text = block.text;
if (totalChars + text.length > maxChars) {
parts.push(text.substring(0, maxChars - totalChars));
return parts.join('\n\n');
}
parts.push(text);
totalChars += text.length;
}
}
} else if (typeof entry.content === 'string') {
const text = entry.content;
if (totalChars + text.length > maxChars) {
parts.push(text.substring(0, maxChars - totalChars));
return parts.join('\n\n');
}
parts.push(text);
totalChars += text.length;
}
} catch {
// Skip malformed lines
}
}
return parts.join('\n\n');
}
// ─── Response Parsing ───────────────────────────────────────────────────────
/**
* Parse the LLM response into memory actions.
* Strips markdown fences, validates required fields.
* Returns [] on any parse failure.
*/
export function parseMemoryResponse(raw: string): MemoryAction[] {
try {
// Strip markdown code fences
let cleaned = raw.trim();
if (cleaned.startsWith('```')) {
cleaned = cleaned.replace(/^```(?:json)?\s*\n?/, '').replace(/\n?```\s*$/, '');
}
const parsed = JSON.parse(cleaned);
if (!Array.isArray(parsed)) return [];
const actions: MemoryAction[] = [];
for (const item of parsed) {
if (!item || typeof item !== 'object' || !item.action) continue;
switch (item.action) {
case 'CREATE':
if (typeof item.category === 'string' && typeof item.content === 'string') {
actions.push({
action: 'CREATE',
category: item.category,
content: item.content,
confidence: typeof item.confidence === 'number' ? item.confidence : undefined,
});
}
break;
case 'UPDATE':
if (typeof item.id === 'string' && typeof item.content === 'string') {
actions.push({
action: 'UPDATE',
id: item.id,
content: item.content,
confidence: typeof item.confidence === 'number' ? item.confidence : undefined,
});
}
break;
case 'REINFORCE':
if (typeof item.id === 'string') {
actions.push({ action: 'REINFORCE', id: item.id });
}
break;
case 'SUPERSEDE':
if (typeof item.id === 'string' && typeof item.superseded_by === 'string') {
actions.push({
action: 'SUPERSEDE',
id: item.id,
superseded_by: item.superseded_by,
});
}
break;
}
}
return actions;
} catch {
return [];
}
}
// ─── Main Extraction Function ───────────────────────────────────────────────
/**
* Extract memories from a completed unit's activity log.
* Fire-and-forget — never throws, mutex-guarded, respects rate limiting.
*/
export async function extractMemoriesFromUnit(
activityFile: string,
unitType: string,
unitId: string,
llmCallFn: LLMCallFn,
): Promise<void> {
// Mutex guard
if (_extracting) return;
// Rate limit
const now = Date.now();
if (now - _lastExtractionTime < MIN_EXTRACTION_INTERVAL_MS) return;
// Skip certain unit types
if (SKIP_TYPES.has(unitType)) return;
const unitKey = `${unitType}/${unitId}`;
// Already processed
if (isUnitProcessed(unitKey)) return;
// Check file size
try {
const stat = statSync(activityFile);
if (stat.size < MIN_ACTIVITY_SIZE) return;
} catch {
return;
}
_extracting = true;
_lastExtractionTime = now;
try {
// Read and parse activity file
const raw = readFileSync(activityFile, 'utf-8');
const transcript = extractTranscriptFromActivity(raw);
if (!transcript.trim()) return;
// Redact secrets
const safeTranscript = redactSecrets(transcript);
// Get current memories for context
const activeMemories = getActiveMemories().map(m => ({
id: m.id,
category: m.category,
content: m.content,
}));
// Build prompts
const userPrompt = buildExtractionUserPrompt(unitType, unitId, activeMemories, safeTranscript);
// Call LLM
const response = await llmCallFn(EXTRACTION_SYSTEM, userPrompt);
// Parse response
const actions = parseMemoryResponse(response);
// Apply actions
if (actions.length > 0) {
applyMemoryActions(actions, unitType, unitId);
}
// Decay stale memories periodically
decayStaleMemories(20);
// Mark unit as processed
markUnitProcessed(unitKey, activityFile);
} catch {
// Non-fatal — memory extraction failure should never affect auto-mode
} finally {
_extracting = false;
}
}
// ─── Testing Helpers ────────────────────────────────────────────────────────
/** Reset extraction state (testing only). */
export function _resetExtractionState(): void {
_extracting = false;
_lastExtractionTime = 0;
}