diff --git a/packages/coding-agent/src/cli/list-models.test.ts b/packages/coding-agent/src/cli/list-models.test.ts index f35f50c04..b98493d8e 100644 --- a/packages/coding-agent/src/cli/list-models.test.ts +++ b/packages/coding-agent/src/cli/list-models.test.ts @@ -1,4 +1,7 @@ import assert from "node:assert/strict"; +import { writeFileSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; import type { Model } from "@singularity-forge/ai"; import { afterEach, beforeEach, describe, it } from "vitest"; import type { ModelRegistry } from "../core/model-registry.js"; @@ -17,6 +20,15 @@ const model = (provider: string, id: string): Model => ({ maxTokens: 16000, }); +/** Minimal no-op ModelRegistry stub used by discovery-cache tests */ +const staticRegistry = (staticModels: Model[] = []): ModelRegistry => + ({ + discoverModels: async () => [], + getAvailable: () => staticModels, + getDiscoveredModels: () => [], + isDiscovered: () => false, + }) as unknown as ModelRegistry; + let originalLog: typeof console.log; let output: string[]; @@ -86,3 +98,109 @@ describe("listModels", () => { assert.match(rendered, /MiniMax-M2\.7/); }); }); + +describe("listModels – discovery cache merge", () => { + /** Write a minimal discovery-cache.json to a temp file and return its path */ + function writeTempCache(entries: Record): string { + const path = join(tmpdir(), `discovery-cache-test-${Date.now()}-${Math.random().toString(36).slice(2)}.json`); + writeFileSync( + path, + JSON.stringify({ version: 1, entries }), + "utf-8", + ); + return path; + } + + it("includes opencode models from discovery cache when provider is absent from static catalog", async () => { + const cachePath = writeTempCache({ + opencode: { + models: [ + { id: "big-pickle", name: "big-pickle", provider: "opencode", api: "openai-completions", baseUrl: "https://opencode.ai/zen", input: ["text"] }, + { id: "gpt-5-nano", name: "gpt-5-nano", provider: "opencode", api: "openai-completions", baseUrl: "https://opencode.ai/zen", input: ["text"] }, + ], + }, + }); + + await listModels(staticRegistry(), { + discover: true, + _discoveryCacheFilePath: cachePath, + }); + + const rendered = output.join("\n"); + assert.match(rendered, /opencode/); + assert.match(rendered, /big-pickle/); + assert.match(rendered, /gpt-5-nano/); + }); + + it("dedupes opencode-go: 9 static + 15 in cache → 15 unique rows (static wins for overlap)", async () => { + // 9 static models + const staticModels = Array.from({ length: 9 }, (_, i) => + model("opencode-go", `static-model-${i}`), + ); + // Discovery cache has 15 models: 9 overlap with static, 6 unique + const cacheModels = [ + ...Array.from({ length: 9 }, (_, i) => ({ id: `static-model-${i}` })), + ...Array.from({ length: 6 }, (_, i) => ({ id: `live-only-${i}` })), + ]; + const cachePath = writeTempCache({ + "opencode-go": { models: cacheModels }, + }); + + await listModels(staticRegistry(staticModels), { + discover: true, + _discoveryCacheFilePath: cachePath, + }); + + const rendered = output.join("\n"); + // Static entries should be present + assert.match(rendered, /static-model-0/); + // Live-only entries should be present + assert.match(rendered, /live-only-0/); + // Total opencode-go rows = 15 (9 static + 6 new from cache) + const lines = rendered.split("\n").filter((l) => l.includes("opencode-go")); + assert.equal(lines.length, 15, `expected 15 opencode-go rows, got ${lines.length}`); + }); + + it("gracefully handles missing discovery cache file — falls back to static only", async () => { + const nonExistentPath = join(tmpdir(), `no-such-file-${Date.now()}.json`); + + await listModels(staticRegistry([model("anthropic", "claude-opus-4")]), { + discover: true, + _discoveryCacheFilePath: nonExistentPath, + }); + + const rendered = output.join("\n"); + assert.match(rendered, /claude-opus-4/); + }); + + it("gracefully handles malformed discovery cache JSON — falls back to static only", async () => { + const path = join(tmpdir(), `bad-cache-${Date.now()}.json`); + writeFileSync(path, "{ this is not valid json ::::", "utf-8"); + + await listModels(staticRegistry([model("anthropic", "claude-sonnet-4")]), { + discover: true, + _discoveryCacheFilePath: path, + }); + + const rendered = output.join("\n"); + assert.match(rendered, /claude-sonnet-4/); + }); + + it("synthesized cache entries are marked [discovered] in output", async () => { + const cachePath = writeTempCache({ + "kimi-coding": { + models: [{ id: "kimi-for-coding", provider: "kimi-coding", api: "openai-completions", input: ["text"] }], + }, + }); + + await listModels(staticRegistry(), { + discover: true, + _discoveryCacheFilePath: cachePath, + }); + + const rendered = output.join("\n"); + const kimiLine = rendered.split("\n").find((l) => l.includes("kimi-for-coding")); + assert.ok(kimiLine, "kimi-for-coding should appear in output"); + assert.match(kimiLine, /\[discovered\]/, "cache entry should be marked [discovered]"); + }); +}); diff --git a/packages/coding-agent/src/cli/list-models.ts b/packages/coding-agent/src/cli/list-models.ts index 8d056f7d7..2d2c39b54 100644 --- a/packages/coding-agent/src/cli/list-models.ts +++ b/packages/coding-agent/src/cli/list-models.ts @@ -2,11 +2,87 @@ * List available models with optional fuzzy search and discovery support */ +import { readFileSync } from "node:fs"; +import { homedir } from "node:os"; +import { join } from "node:path"; import type { Api, Model } from "@singularity-forge/ai"; import { fuzzyFilter } from "@singularity-forge/tui/fuzzy"; import { getDiscoverableProviders } from "../core/model-discovery.js"; import type { ModelRegistry } from "../core/model-registry.js"; +/** + * Read ~/.sf/agent/discovery-cache.json and return synthesized Model objects + * for entries not already present in `existingKeys` (set of "provider/id" strings). + * Entries from the cache are marked with `_fromDiscoveryCache: true` via the + * badge field in the row builder below. + * + * Gracefully returns [] on missing file, malformed JSON, or unexpected shape. + */ +export interface SynthesizedModel extends Model { + /** True when this model came from the discovery cache, not the static catalog */ + _fromDiscoveryCache: boolean; +} + +function readDiscoveryCacheModels( + existingKeys: Set, + cacheFilePath?: string, +): SynthesizedModel[] { + try { + const path = + cacheFilePath ?? join(homedir(), ".sf", "agent", "discovery-cache.json"); + const raw = readFileSync(path, "utf-8"); + const data = JSON.parse(raw) as { + version?: number; + entries?: Record< + string, + { + models?: Array<{ + id: string; + name?: string; + contextWindow?: number; + maxTokens?: number; + reasoning?: boolean; + input?: ("text" | "image")[]; + cost?: { + input: number; + output: number; + cacheRead: number; + cacheWrite: number; + }; + }>; + } + >; + }; + if (data.version !== 1 || typeof data.entries !== "object") return []; + + const synthesized: SynthesizedModel[] = []; + for (const [provider, entry] of Object.entries(data.entries)) { + if (!Array.isArray(entry?.models)) continue; + for (const m of entry.models) { + if (!m?.id) continue; + const key = `${provider}/${m.id}`; + if (existingKeys.has(key)) continue; + synthesized.push({ + provider, + id: m.id, + name: m.name ?? m.id, + api: "openai-completions", + baseUrl: "", + contextWindow: m.contextWindow ?? 0, + maxTokens: m.maxTokens ?? 0, + cost: m.cost ?? { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + reasoning: m.reasoning ?? false, + input: m.input ?? ["text"], + _fromDiscoveryCache: true, + }); + } + } + return synthesized; + } catch { + return []; + } +} + export interface ListModelsOptions { /** Include discovered models in output */ discover?: boolean; @@ -19,6 +95,11 @@ export interface ListModelsOptions { * preferences. */ modelFilter?: (model: Model) => boolean; + /** + * Override path to discovery-cache.json. Defaults to + * ~/.sf/agent/discovery-cache.json. Intended for testing only. + */ + _discoveryCacheFilePath?: string; } /** @@ -85,7 +166,7 @@ export async function listModels( // Live-listed providers must not fall back to stale static catalog rows once a // discovery pass was attempted. If the provider returns 401/429/empty, it // contributes zero rows to this diagnostic output. - const models = shouldDiscover + const baseModels = shouldDiscover ? [ ...modelRegistry .getAvailable() @@ -96,13 +177,30 @@ export async function listModels( ] : modelRegistry.getAvailable(); + // Merge entries from the discovery cache that aren't already in baseModels. + // This covers SF-managed providers (opencode, opencode-go, kimi-coding, …) + // whose SDK adapters have supportsDiscovery=false and are therefore skipped + // by modelRegistry.discoverModels(). The cache was populated by + // refreshSfManagedProviders() before this call (in src/cli.ts). + // Static catalog entries win over synthesized cache entries (dedup by + // "provider/id" — existing key = already in baseModels → skip). + const existingKeys = new Set(baseModels.map((m) => `${m.provider}/${m.id}`)); + const cacheModels = readDiscoveryCacheModels( + existingKeys, + options._discoveryCacheFilePath, + ); + const models: (Model | SynthesizedModel)[] = [ + ...baseModels, + ...cacheModels, + ]; + if (models.length === 0) { console.log("No models available. Set API keys in environment variables."); return; } // Apply fuzzy filter if search pattern provided - let filteredModels: Model[] = exactDiscoveryProvider + let filteredModels: (Model | SynthesizedModel)[] = exactDiscoveryProvider ? models.filter((m) => m.provider.toLowerCase() === exactDiscoveryProvider) : models; if (options.searchPattern && !exactDiscoveryProvider) { @@ -133,7 +231,9 @@ export async function listModels( // Calculate column widths const rows = filteredModels.map((m) => { - const isDiscovered = options.discover && modelRegistry.isDiscovered(m); + const isCacheEntry = "_fromDiscoveryCache" in m && m._fromDiscoveryCache; + const isDiscovered = + isCacheEntry || (options.discover && modelRegistry.isDiscovered(m)); return { provider: m.provider, model: m.id, diff --git a/src/headless-mark-state.ts b/src/headless-mark-state.ts new file mode 100644 index 000000000..863c51712 --- /dev/null +++ b/src/headless-mark-state.ts @@ -0,0 +1,214 @@ +/** + * Headless commands for marking SF state out-of-band: + * + * sf headless complete-slice / [--reason "..."] + * sf headless skip-slice / [--reason "..."] + * sf headless complete-milestone [--reason "..."] + * + * Why these exist: there are several legitimate cases where an + * operator needs to flip slice or milestone status without going + * through the autonomous flow: + * + * - Migration: pulling a planning doc into the DB-first format + * leaves stale placeholder slices that wedge the active-slice + * resolver. Without a CLI you'd drop to sqlite3 / python. + * - Recovery: an autonomous run dies between writing the + * filesystem evidence and flipping the DB row. + * - Recording out-of-band work: code shipped through a parallel + * channel (manual dev, paired session) and the slice needs to + * reflect that without re-running execute-slice. + * + * All three commands bypass the RPC child for the same reason + * `headless query` does — they're deterministic DB operations that + * shouldn't pay the cost of spinning up an agent. They are + * idempotent: a slice already in the target status returns ok + * with a note rather than failing. + * + * Refuses cross-milestone-id ambiguity. Refuses unknown ids + * outright instead of silently no-op'ing (which would be a + * dangerous-looking 0-row UPDATE). + */ + +import { existsSync } from "node:fs"; +import { join } from "node:path"; +import { createJiti } from "@mariozechner/jiti"; +import { resolveBundledSourceResource } from "./bundled-resource-path.js"; +import { getSfEnv } from "./env.js"; + +const jiti = createJiti(import.meta.filename, { + interopDefault: true, + debug: false, +}); + +const agentExtensionsDir = join(getSfEnv().agentDir, "extensions", "sf"); +const useAgentDir = existsSync(join(agentExtensionsDir, "state.js")); + +export interface MarkStateResult { + exitCode: number; +} + +interface MarkStateOptions { + command: "complete-slice" | "skip-slice" | "complete-milestone"; + args: string[]; + json: boolean; +} + +function sfExtensionPath(moduleName: string): string { + if (useAgentDir) return join(agentExtensionsDir, `${moduleName}.js`); + const tsPath = resolveBundledSourceResource( + import.meta.url, + "extensions", + "sf", + `${moduleName}.ts`, + ); + if (existsSync(tsPath)) return tsPath; + return resolveBundledSourceResource( + import.meta.url, + "extensions", + "sf", + `${moduleName}.js`, + ); +} + +function parseRef(ref: string): { milestoneId: string; sliceId: string | null } { + const trimmed = ref.trim(); + const slash = trimmed.indexOf("/"); + if (slash < 0) return { milestoneId: trimmed, sliceId: null }; + return { + milestoneId: trimmed.slice(0, slash), + sliceId: trimmed.slice(slash + 1), + }; +} + +function extractReason(args: string[]): string { + const idx = args.indexOf("--reason"); + if (idx >= 0 && idx + 1 < args.length) { + return args[idx + 1] ?? ""; + } + return ""; +} + +function emit( + out: NodeJS.WritableStream, + json: boolean, + payload: Record, + humanLine: string, +): void { + if (json) { + out.write(`${JSON.stringify(payload)}\n`); + } else { + out.write(`${humanLine}\n`); + } +} + +export async function handleMarkState( + basePath: string, + options: MarkStateOptions, +): Promise { + // First positional arg is the milestone/slice ref. The dispatcher in + // headless.ts has already filtered the flags out of args, but we still + // have to skip --reason and its value here. + const positional = options.args.filter((arg, i, all) => { + if (arg === "--reason") return false; + if (i > 0 && all[i - 1] === "--reason") return false; + return !arg.startsWith("--"); + }); + if (positional.length === 0) { + process.stderr.write( + `[headless] Error: ${options.command} requires a reference (e.g. M010/S03)\n`, + ); + return { exitCode: 2 }; + } + const reason = extractReason(options.args); + const ref = parseRef(positional[0]); + + const autoStartModule = (await jiti.import(sfExtensionPath("auto-start"), {})) as { + openProjectDbIfPresent: (basePath: string) => Promise; + }; + await autoStartModule.openProjectDbIfPresent(basePath); + + const slicesModule = (await jiti.import(sfExtensionPath("sf-db/sf-db-slices"), {})) as { + getSlice: (mid: string, sid: string) => { status: string } | null; + updateSliceStatus: (mid: string, sid: string, status: string, completedAt: string | null) => void; + setSliceSummaryMd?: (mid: string, sid: string, summaryMd: string, uatMd: string) => void; + }; + const milestonesModule = (await jiti.import(sfExtensionPath("sf-db/sf-db-milestones"), {})) as { + getMilestone: (id: string) => { status: string } | null; + updateMilestoneStatus: (id: string, status: string, completedAt: string | null) => void; + }; + + const now = new Date().toISOString(); + + if (options.command === "complete-milestone") { + if (ref.sliceId !== null) { + process.stderr.write( + `[headless] Error: complete-milestone takes a milestone id, not /\n`, + ); + return { exitCode: 2 }; + } + const m = milestonesModule.getMilestone(ref.milestoneId); + if (!m) { + process.stderr.write( + `[headless] Error: milestone ${ref.milestoneId} not found\n`, + ); + return { exitCode: 1 }; + } + if (m.status === "complete") { + emit(process.stdout, options.json, { + ok: true, + idempotent: true, + milestone_id: ref.milestoneId, + status: m.status, + }, `${ref.milestoneId} already complete`); + return { exitCode: 0 }; + } + milestonesModule.updateMilestoneStatus(ref.milestoneId, "complete", now); + emit(process.stdout, options.json, { + ok: true, + milestone_id: ref.milestoneId, + previous_status: m.status, + status: "complete", + completed_at: now, + reason, + }, `${ref.milestoneId}: ${m.status} → complete`); + return { exitCode: 0 }; + } + + // complete-slice / skip-slice + if (ref.sliceId === null) { + process.stderr.write( + `[headless] Error: ${options.command} requires / (e.g. M010/S03)\n`, + ); + return { exitCode: 2 }; + } + const slice = slicesModule.getSlice(ref.milestoneId, ref.sliceId); + if (!slice) { + process.stderr.write( + `[headless] Error: slice ${ref.milestoneId}/${ref.sliceId} not found\n`, + ); + return { exitCode: 1 }; + } + const targetStatus = + options.command === "complete-slice" ? "complete" : "skipped"; + if (slice.status === targetStatus) { + emit(process.stdout, options.json, { + ok: true, + idempotent: true, + milestone_id: ref.milestoneId, + slice_id: ref.sliceId, + status: slice.status, + }, `${ref.milestoneId}/${ref.sliceId} already ${targetStatus}`); + return { exitCode: 0 }; + } + slicesModule.updateSliceStatus(ref.milestoneId, ref.sliceId, targetStatus, now); + emit(process.stdout, options.json, { + ok: true, + milestone_id: ref.milestoneId, + slice_id: ref.sliceId, + previous_status: slice.status, + status: targetStatus, + completed_at: now, + reason, + }, `${ref.milestoneId}/${ref.sliceId}: ${slice.status} → ${targetStatus}`); + return { exitCode: 0 }; +} diff --git a/src/headless.ts b/src/headless.ts index a13b7069d..73fa6705f 100644 --- a/src/headless.ts +++ b/src/headless.ts @@ -816,6 +816,23 @@ async function runHeadlessOnce( return { exitCode: result.exitCode, interrupted: false, timedOut: false }; } + // Out-of-band slice/milestone status updates. Bypass the RPC child + // because they're deterministic single-row DB UPDATEs — no agent + // needed. See headless-mark-state.ts for the why. + if ( + options.command === "complete-slice" || + options.command === "skip-slice" || + options.command === "complete-milestone" + ) { + const { handleMarkState } = await import("./headless-mark-state.js"); + const result = await handleMarkState(process.cwd(), { + command: options.command, + args: options.commandArgs, + json: options.json, + }); + return { exitCode: result.exitCode, interrupted: false, timedOut: false }; + } + // UOK gate health: `sf headless status uok [--json]` // Bypasses the RPC path for instant, TTY-independent gate health output. if (options.command === "status" && options.commandArgs[0] === "uok") { diff --git a/src/help-text.ts b/src/help-text.ts index a24b32391..35e5fea44 100644 --- a/src/help-text.ts +++ b/src/help-text.ts @@ -226,6 +226,9 @@ const SUBCOMMAND_HELP: Record = { " usage Live LLM-provider usage snapshot (today: gemini-cli tier + per-model quota)", " reflect Assemble reflection corpus + render prompt for cross-corpus pattern analysis (--json for raw, --run to dispatch to gemini-cli, --model to override)", " triage Render canonical self-feedback triage prompt (--list/--json inspect, --run writes decisions, --apply runs triage-decider -> review-code)", + " complete-slice Mark a slice complete out-of-band: complete-slice / [--reason ]", + " skip-slice Mark a slice skipped out-of-band (placeholder/migration cleanup)", + " complete-milestone Mark a milestone complete out-of-band: complete-milestone [--reason ]", "", "new-milestone flags:", " --context Path to spec/PRD file (use '-' for stdin)", @@ -262,6 +265,9 @@ const SUBCOMMAND_HELP: Record = { " sf headless triage --apply Apply via triage-decider, then gate with review-code", " sf headless reflect Render reflection prompt for piping", " sf headless reflect --run Dispatch reflection + write report", + " sf headless complete-slice M010/S03 Flip M010/S03 to status=complete (idempotent)", + " sf headless skip-slice M003/S01 --reason \"migration placeholder\" Mark placeholder slice skipped", + " sf headless complete-milestone M010 Flip milestone to status=complete", "", "Exit codes: 0 = success, 1 = error/timeout, 10 = blocked, 11 = cancelled", ].join("\n"), diff --git a/src/resources/extensions/sf/bootstrap/system-context.js b/src/resources/extensions/sf/bootstrap/system-context.js index 89c197c88..890536ab4 100644 --- a/src/resources/extensions/sf/bootstrap/system-context.js +++ b/src/resources/extensions/sf/bootstrap/system-context.js @@ -1,13 +1,18 @@ -import { existsSync, readdirSync, readFileSync, statSync, unlinkSync } from "node:fs"; -import { sfHome } from '../sf-home.js'; +import { + existsSync, + readdirSync, + readFileSync, + statSync, + unlinkSync, +} from "node:fs"; import { join } from "node:path"; +import { toPosixPath } from "../../shared/mod.js"; +import { isCanAskUser } from "../auto.js"; +import { getActiveAutoWorktreeContext } from "../auto-worktree.js"; import { markCmuxPromptShown, shouldPromptToEnableCmux, } from "../cmux/index.js"; -import { toPosixPath } from "../../shared/mod.js"; -import { isCanAskUser } from "../auto.js"; -import { getActiveAutoWorktreeContext } from "../auto-worktree.js"; import { buildCodeIntelligenceContextBlock } from "../code-intelligence.js"; import { ensureCodebaseMapFresh, @@ -50,6 +55,7 @@ import { isDbAvailable, listSelfFeedbackEntries, } from "../sf-db.js"; +import { sfHome } from "../sf-home.js"; import { detectNewSkills, formatSkillsXml, @@ -391,13 +397,23 @@ const TACIT_SECTION_MAX_BYTES = 4096; // (#sf-moobj36p-ko6snt) const SELF_FEEDBACK_MAX_ENTRIES = 20; const SELF_FEEDBACK_MAX_CHARS = 4000; -function loadSelfFeedbackBlock(_cwd) { +/** + * Render unresolved self-feedback for startup context injection. + * + * Purpose: keep SF agents aware of current internal anomalies with stable entry + * IDs so they can inspect, triage, or resolve the exact feedback item instead + * of acting on an unaddressable summary. + * + * Consumer: buildBeforeAgentStartResult and system-context unit tests. + */ +export function loadSelfFeedbackBlock(_cwd) { let entries = []; if (isDbAvailable()) { const rows = listSelfFeedbackEntries(); entries = rows .filter((r) => !r["resolved_at"]) .map((r) => ({ + id: r["id"] ?? "", timestamp: r["ts"] ?? "", kind: r["kind"] ?? "", severity: r["severity"] ?? "low", @@ -426,7 +442,10 @@ function loadSelfFeedbackBlock(_cwd) { } // Render compact summaries — full evidence is not injected here const rows = kept - .map((e) => `- **${e.severity}** \`${e.kind}\` — ${e.summary}`) + .map( + (e) => + `- **${e.severity}** ${e.id ? `\`${e.id}\` ` : ""}\`${e.kind}\` — ${e.summary}`, + ) .join("\n"); let block = `## Self-Feedback Entries (ordered by severity, ${kept.length}/${entries.length} shown)\n\n${rows}`; // If still over char budget, drop from tail (lowest priority first) @@ -436,7 +455,10 @@ function loadSelfFeedbackBlock(_cwd) { block = `## Self-Feedback Entries (ordered by severity, truncated)\n\n` + kept - .map((e) => `- **${e.severity}** \`${e.kind}\` — ${e.summary}`) + .map( + (e) => + `- **${e.severity}** ${e.id ? `\`${e.id}\` ` : ""}\`${e.kind}\` — ${e.summary}`, + ) .join("\n"); } } diff --git a/src/resources/extensions/sf/memory-extractor.js b/src/resources/extensions/sf/memory-extractor.js index b44d589db..640f30f57 100644 --- a/src/resources/extensions/sf/memory-extractor.js +++ b/src/resources/extensions/sf/memory-extractor.js @@ -6,6 +6,7 @@ import { readFileSync, statSync } from "node:fs"; import { delay } from "./atomic-write.js"; +import { debugLog } from "./debug-logger.js"; import { applyConsolidationActions, decayStaleMemories, @@ -252,30 +253,50 @@ export async function extractMemoriesFromUnit( llmCallFn, ) { // Mutex guard - if (_extracting) return; + if (_extracting) { + debugLog("memory-extract", { phase: "skip", reason: "mutex-busy", unitKey: `${unitType}/${unitId}` }); + return; + } // Rate limit const now = Date.now(); - if (now - _lastExtractionTime < MIN_EXTRACTION_INTERVAL_MS) return; + if (now - _lastExtractionTime < MIN_EXTRACTION_INTERVAL_MS) { + debugLog("memory-extract", { phase: "skip", reason: "rate-limited", unitKey: `${unitType}/${unitId}`, lastExtraction: _lastExtractionTime }); + return; + } // Skip certain unit types - if (SKIP_TYPES.has(unitType)) return; + if (SKIP_TYPES.has(unitType)) { + debugLog("memory-extract", { phase: "skip", reason: "skip-type", unitType }); + return; + } const unitKey = `${unitType}/${unitId}`; // Already processed - if (isUnitProcessed(unitKey)) return; + if (isUnitProcessed(unitKey)) { + debugLog("memory-extract", { phase: "skip", reason: "already-processed", unitKey }); + return; + } // Check file size try { const stat = statSync(activityFile); - if (stat.size < MIN_ACTIVITY_SIZE) return; + if (stat.size < MIN_ACTIVITY_SIZE) { + debugLog("memory-extract", { phase: "skip", reason: "file-too-small", unitKey, size: stat.size, min: MIN_ACTIVITY_SIZE }); + return; + } } catch { + debugLog("memory-extract", { phase: "skip", reason: "stat-failed", unitKey, file: activityFile }); return; } _extracting = true; _lastExtractionTime = now; + debugLog("memory-extract", { phase: "start", unitKey, file: activityFile }); let userPrompt; try { // Read and parse activity file const raw = readFileSync(activityFile, "utf-8"); const transcript = extractTranscriptFromActivity(raw); - if (!transcript.trim()) return; + if (!transcript.trim()) { + debugLog("memory-extract", { phase: "skip", reason: "empty-transcript", unitKey }); + return; + } // Redact secrets const safeTranscript = redactSecrets(transcript); // Get current memories for context @@ -292,18 +313,23 @@ export async function extractMemoriesFromUnit( safeTranscript, ); // Call LLM + debugLog("memory-extract", { phase: "llm-call", unitKey, transcriptChars: safeTranscript.length }); const response = await llmCallFn(EXTRACTION_SYSTEM, userPrompt); // Parse response const actions = parseMemoryResponse(response); + debugLog("memory-extract", { phase: "parsed", unitKey, actions: actions.length }); // Apply actions (consolidation-path only — add/prune discipline enforced) if (actions.length > 0) { applyConsolidationActions(actions, unitType, unitId); + debugLog("memory-extract", { phase: "applied", unitKey, actions: actions.length }); } // Decay stale memories periodically decayStaleMemories(20); // Mark unit as processed markUnitProcessed(unitKey, activityFile); - } catch { + debugLog("memory-extract", { phase: "done", unitKey }); + } catch (err) { + debugLog("memory-extract", { phase: "error", unitKey, error: err?.message || String(err) }); // Retry once after a brief delay if (userPrompt) { try { @@ -312,7 +338,9 @@ export async function extractMemoriesFromUnit( const actions2 = parseMemoryResponse(response2); if (actions2.length > 0) applyConsolidationActions(actions2, unitType, unitId); markUnitProcessed(unitKey, activityFile); - } catch { + debugLog("memory-extract", { phase: "retry-success", unitKey }); + } catch (err2) { + debugLog("memory-extract", { phase: "retry-failed", unitKey, error: err2?.message || String(err2) }); // Non-fatal — memory extraction failure should never affect autonomous mode } } diff --git a/src/resources/extensions/sf/tests/auto-halt-self-feedback.test.mjs b/src/resources/extensions/sf/tests/auto-halt-self-feedback.test.mjs new file mode 100644 index 000000000..aa7e8e837 --- /dev/null +++ b/src/resources/extensions/sf/tests/auto-halt-self-feedback.test.mjs @@ -0,0 +1,117 @@ +import assert from "node:assert/strict"; +import { + existsSync, + mkdirSync, + readFileSync, + rmSync, + writeFileSync, +} from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { afterEach, beforeEach, test, vi } from "vitest"; +import { HaltWatchdog } from "../auto/loop.js"; +import { installNotifyInterceptor } from "../bootstrap/notify-interceptor.js"; +import { + _resetNotificationStore, + initNotificationStore, +} from "../notification-store.js"; +import { + closeDatabase, + listSelfFeedbackEntries, + openDatabase, +} from "../sf-db.js"; + +let testDir; + +beforeEach(() => { + _resetNotificationStore(); + testDir = join(tmpdir(), `sf-halt-self-feedback-test-${Date.now()}`); + mkdirSync(testDir, { recursive: true }); + writeFileSync( + join(testDir, "package.json"), + JSON.stringify({ name: "singularity-forge" }), + ); + mkdirSync(join(testDir, ".sf"), { recursive: true }); + openDatabase(join(testDir, ".sf", "sf.db")); + initNotificationStore(testDir); +}); + +afterEach(() => { + rmSync(testDir, { recursive: true, force: true }); + _resetNotificationStore(); + closeDatabase(); + vi.useRealTimers(); +}); + +test("HaltWatchdog.check()_when_idle_exceeds_threshold_creates_self_feedback_db_entry_and_markdown", () => { + vi.useFakeTimers(); + const thresholdMs = 10; + const watchdog = new HaltWatchdog(testDir, thresholdMs); + const originalNotify = vi.fn(); + const mockCtx = { ui: { notify: originalNotify }, basePath: testDir }; + installNotifyInterceptor(mockCtx); + + watchdog.heartbeat(); + vi.advanceTimersByTime(thresholdMs + 5); + const result = watchdog.check(mockCtx, 42); + + assert.equal(result.stuck, true); + assert.equal(originalNotify.mock.calls.length, 1); + + const entries = listSelfFeedbackEntries(); + assert.equal(entries.length, 1); + assert.equal(entries[0].kind, "runaway-loop:idle-halt"); + assert.equal(entries[0].severity, "high"); + assert.equal(entries[0].blocking, true); + assert.match(entries[0].summary, /iteration 42/); + assert.match(entries[0].summary, /no heartbeat for/); + assert.match(entries[0].evidence, /threshold 10ms/); + + const markdownPath = join(testDir, ".sf", "SELF-FEEDBACK.md"); + assert.equal(existsSync(markdownPath), true); + const markdown = readFileSync(markdownPath, "utf-8"); + assert.match(markdown, /runaway-loop:idle-halt/); + assert.match(markdown, /iteration 42/); +}); + +test("HaltWatchdog.check()_deduplicates_within_same_idle_period", () => { + vi.useFakeTimers(); + const thresholdMs = 10; + const watchdog = new HaltWatchdog(testDir, thresholdMs); + const originalNotify = vi.fn(); + const mockCtx = { ui: { notify: originalNotify }, basePath: testDir }; + installNotifyInterceptor(mockCtx); + + watchdog.heartbeat(); + vi.advanceTimersByTime(thresholdMs + 5); + watchdog.check(mockCtx, 1); + watchdog.check(mockCtx, 2); + + assert.equal(listSelfFeedbackEntries().length, 1); +}); + +test("HaltWatchdog.check()_after_new_heartbeat_creates_second_entry", () => { + vi.useFakeTimers(); + const thresholdMs = 10; + const watchdog = new HaltWatchdog(testDir, thresholdMs); + const originalNotify = vi.fn(); + const mockCtx = { ui: { notify: originalNotify }, basePath: testDir }; + installNotifyInterceptor(mockCtx); + + watchdog.heartbeat(); + vi.advanceTimersByTime(thresholdMs + 5); + watchdog.check(mockCtx, 1); + + watchdog.heartbeat(); + vi.advanceTimersByTime(thresholdMs + 5); + watchdog.check(mockCtx, 2); + + const entries = listSelfFeedbackEntries(); + assert.equal(entries.length, 2); + assert.equal(entries[0].kind, "runaway-loop:idle-halt"); + assert.equal(entries[1].kind, "runaway-loop:idle-halt"); + + const markdownPath = join(testDir, ".sf", "SELF-FEEDBACK.md"); + const markdown = readFileSync(markdownPath, "utf-8"); + assert.match(markdown, /runaway-loop:idle-halt/); +}); diff --git a/src/resources/extensions/sf/tests/system-context-self-feedback.test.mjs b/src/resources/extensions/sf/tests/system-context-self-feedback.test.mjs new file mode 100644 index 000000000..50d1456d5 --- /dev/null +++ b/src/resources/extensions/sf/tests/system-context-self-feedback.test.mjs @@ -0,0 +1,30 @@ +import { describe, expect, test, vi } from "vitest"; + +vi.mock("../sf-db.js", () => ({ + getActiveMemories: vi.fn(() => []), + isDbAvailable: vi.fn(() => true), + listSelfFeedbackEntries: vi.fn(() => [ + { + id: "sf-dogfood-fanout-001", + ts: "2026-05-15T14:03:00.000Z", + kind: "agent-friction", + severity: "medium", + summary: + "Dogfood run did not expose whether fan-out was skipped intentionally.", + }, + ]), +})); + +const { loadSelfFeedbackBlock } = await import( + "../bootstrap/system-context.js" +); + +describe("loadSelfFeedbackBlock", () => { + test("includes_entry_ids_so_agents_can_act_on_feedback", () => { + const block = loadSelfFeedbackBlock("/repo"); + + expect(block).toContain("sf-dogfood-fanout-001"); + expect(block).toContain("agent-friction"); + expect(block).toContain("Dogfood run did not expose"); + }); +});