From a73ea845e7a78353048c78633738265f5ea019d7 Mon Sep 17 00:00:00 2001 From: Mikael Hugo Date: Wed, 6 May 2026 10:04:20 +0200 Subject: [PATCH] sf snapshot: uncommitted changes after 61m inactivity --- docs/adr/0075-uok-gate-architecture.md | 103 ++++ src/headless-query.ts | 15 +- src/resources/extensions/sf/auto-post-unit.js | 22 +- .../extensions/sf/commands-schedule.js | 44 +- src/resources/extensions/sf/sf-db.js | 328 ++++++++++++- .../sf/tests/commands-schedule.test.mjs | 62 +++ .../sf/tests/uok-gate-runner.test.mjs | 457 ++++++++++++++++++ .../extensions/sf/tests/uok-gates.test.mjs | 387 +++++++++++++++ .../sf/tests/uok-message-bus.test.mjs | 142 ++++++ .../sf/tests/uok-outcome-ledger.test.mjs | 117 +++++ .../sf/tests/uok-unit-runtime.test.mjs | 292 +++++++++++ .../extensions/sf/uok/chaos-monkey.js | 123 ++++- src/resources/extensions/sf/uok/contracts.js | 28 +- .../extensions/sf/uok/cost-guard-gate.js | 129 ++++- .../extensions/sf/uok/gate-runner.js | 181 ++++++- .../extensions/sf/uok/message-bus.js | 200 +++++++- .../extensions/sf/uok/metrics-exposition.js | 121 +++++ .../extensions/sf/uok/multi-package-gate.js | 285 +++++++++-- .../sf/uok/outcome-learning-gate.js | 168 ++++++- .../extensions/sf/uok/unit-runtime.js | 3 +- src/tests/schedule-headless-query.test.ts | 26 +- 21 files changed, 3111 insertions(+), 122 deletions(-) create mode 100644 docs/adr/0075-uok-gate-architecture.md create mode 100644 src/resources/extensions/sf/tests/uok-gate-runner.test.mjs create mode 100644 src/resources/extensions/sf/tests/uok-gates.test.mjs create mode 100644 src/resources/extensions/sf/tests/uok-message-bus.test.mjs create mode 100644 src/resources/extensions/sf/tests/uok-outcome-ledger.test.mjs create mode 100644 src/resources/extensions/sf/tests/uok-unit-runtime.test.mjs create mode 100644 src/resources/extensions/sf/uok/metrics-exposition.js diff --git a/docs/adr/0075-uok-gate-architecture.md b/docs/adr/0075-uok-gate-architecture.md new file mode 100644 index 000000000..b3fe691d8 --- /dev/null +++ b/docs/adr/0075-uok-gate-architecture.md @@ -0,0 +1,103 @@ +# ADR-0075: UOK Gate Architecture + +**Status:** Accepted +**Date:** 2026-05-06 +**Deciders:** UOK subsystem migration (M013 S04) + +## Context + +The Unit Orchestration Kernel (UOK) post-unit verification flow originally had a single ad-hoc gate: the Security Gate (secret scanning). As the autonomous loop matured, we needed a structured, extensible way to enforce policy, verify correctness, learn from outcomes, and stress-test durability — without bloating the kernel loop with inline conditionals. + +## Decision + +We adopt a **gate-runner pattern** with explicitly typed gates, a uniform execution contract, durable audit logging, and a configurable retry matrix. + +### Gate Contract + +Every gate implements: + +- `id: string` — unique identifier (e.g. `"cost-guard"`) +- `type: string` — `"security" | "policy" | "verification" | "learning" | "chaos"` +- `execute(ctx: UokContext, attempt: number): Promise` + +The `UokContext` carries traceable identifiers (`traceId`, `turnId`, `unitType`, `unitId`, `modelId`, `provider`) plus runtime telemetry (`tokenCount`, `costUsd`, `durationMs`). + +The `GateResult` is a sealed union: + +- `outcome: "pass" | "fail" | "retry" | "manual-attention"` +- `failureClass: "policy" | "verification" | "execution" | "artifact" | "git" | "timeout" | "input" | "closeout" | "manual-attention" | "unknown"` +- `rationale: string` — human-readable explanation +- `findings?: string` — structured output (diffs, logs, cost breakdowns) +- `recommendation?: string` — actionable next step + +### Retry Matrix + +The `UokGateRunner` consults a per-failure-class retry ceiling: + +| failureClass | max retries | +|-------------|-------------| +| policy, input, manual-attention | 0 | +| execution, artifact, verification, git | 1 | +| timeout | 2 | +| unknown | 0 | + +Retries are persisted to the `gate_runs` SQLite table and emitted as audit events so operators can reconstruct the full retry chain. + +### Implemented Gates + +| Gate | Type | Purpose | Durable Store | +|------|------|---------|---------------| +| **SecurityGate** | security | Run `scripts/secret-scan.sh` against uncommitted changes | N/A (external script) | +| **CostGuardGate** | policy | Enforce per-unit and per-hour USD budgets; detect high-tier model burn | `llm_task_outcomes` (SQLite) + `model-cost-table.js` | +| **OutcomeLearningGate** | learning | Detect failure patterns by model, unit type, and escalation rate | `llm_task_outcomes` (SQLite) | +| **MultiPackageGate** | verification | Verify only affected workspace packages and downstream dependents | N/A (git + package.json) | +| **ChaosMonkey** | chaos | Inject latency, partial failures, disk stress, memory pressure | N/A (ephemeral) | + +### Durable Message Bus + +The `MessageBus` persists messages to `.sf/runtime/uok-messages.jsonl` with at-least-once delivery. Inbox read-state is stored per-agent in `.sf/runtime/uok-inbox-{agent}.json`. Messages are pruned by TTL (`retentionDays`, default 7) and inbox size is capped (`maxInboxSize`, default 1000). + +### Chaos Engineering Safety + +`ChaosMonkey` is **opt-in only** (`active: false` by default). It injects recoverable faults only: + +- Latency delays (configurable max) +- Retryable thrown errors (`err.code = "CHAOS_INJECTED"`) +- Disk stress (temp files written then immediately deleted) +- Memory stress (buffers allocated then released) + +It **never** sends `SIGKILL` or mutates production state. + +## Consequences + +**Positive:** + +- Adding a new gate is a single file + registration line — no kernel loop changes. +- Every gate execution is auditable in SQLite and parity JSONL. +- Retry policy is data-driven, not hard-coded per gate. +- Cost and outcome learning are grounded in real ledger data, not heuristics. + +**Negative / Mitigated:** + +- Gate execution adds latency to the verification path. Mitigation: gates run in parallel where possible; timeout defaults are conservative (10s for git diff, 120s for typecheck). +- SQLite queries on the critical path could block. Mitigation: queries are simple indexed SELECTs; the DB is local and WAL-mode. +- ChaosMonkey in a CI environment could destabilize builds. Mitigation: it is explicitly opt-in and defaults to `active: false`. + +## Alternatives Considered + +1. **Inline conditionals in `auto-verification.js`** — rejected because it creates a monolithic, untestable verification block. +2. **Plugin system with dynamic `import()`** — rejected because ESM dynamic imports in an extension context add unnecessary complexity; static imports + a registry Map are sufficient. +3. **Separate microservices for cost/outcome learning** — rejected because the SF design principle keeps all state on disk in `.sf/`; adding network boundaries violates the single-writer invariant. + +## Testing Strategy + +Every gate has dedicated behavioral tests in `tests/uok-gates.test.mjs`: + +- **SecurityGate**: missing script, passing scan, failing scan. +- **CostGuardGate**: empty ledger (pass), unit budget exceeded (fail), hourly budget exceeded (fail), high-tier failure pattern (fail). +- **OutcomeLearningGate**: empty ledger (pass), unit failure rate high (fail), model failure rate high (fail), escalation pattern (fail). +- **ChaosMonkey**: inactive (no-op), latency injection, partial failure, disk stress, event clearing. + +`uok-message-bus.test.mjs` covers send/receive, broadcast, persistence across reconstruction, read-state persistence, compaction, conversation filtering, and max-size enforcement. + +`uok-unit-runtime.test.mjs` covers FSM transitions, terminal-status classification, retry budgets, synthetic-unit blocking, and record IO (write/read/clear/list). diff --git a/src/headless-query.ts b/src/headless-query.ts index 1125c5852..02fcf6b3d 100644 --- a/src/headless-query.ts +++ b/src/headless-query.ts @@ -177,6 +177,8 @@ export interface QuerySnapshot { }; uokDiagnostics?: any; schedule?: { + pending_count: number; + overdue_count: number; due: Array<{ id: string; kind: string; @@ -337,13 +339,22 @@ export async function buildQuerySnapshot( })); // Load schedule entries - let scheduleEntries: QuerySnapshot["schedule"] = { due: [], upcoming: [] }; + let scheduleEntries: QuerySnapshot["schedule"] = { + pending_count: 0, + overdue_count: 0, + due: [], + upcoming: [], + }; try { const { createScheduleStore } = await import( "./resources/extensions/sf/schedule/schedule-store.js" ); const store = createScheduleStore(basePath); const now = new Date(); + const all = store.readEntries("project"); + const pending = all.filter((e) => e.status === "pending"); + const nowMs = now.getTime(); + const overdue = pending.filter((e) => Date.parse(e.due_at) <= nowMs); const mapEntry = (e: { id: string; kind: string; @@ -358,6 +369,8 @@ export async function buildQuerySnapshot( payload: e.payload, }); scheduleEntries = { + pending_count: pending.length, + overdue_count: overdue.length, due: store.findDue("project", now).map(mapEntry), upcoming: store.findUpcoming("project", now, 7).map(mapEntry), }; diff --git a/src/resources/extensions/sf/auto-post-unit.js b/src/resources/extensions/sf/auto-post-unit.js index 6ca4b1c38..0f4ca46b6 100644 --- a/src/resources/extensions/sf/auto-post-unit.js +++ b/src/resources/extensions/sf/auto-post-unit.js @@ -89,8 +89,10 @@ import { writeTurnGitTransaction, } from "./uok/gitops.js"; import { + captureGitopsDiff, getParityCommitBlockReason, isParityCommitBlocked, + legacyGitopsDecision, } from "./uok/parity-diff-capture.js"; import { isAwaitingUserInput } from "./user-input-boundary.js"; import { writePreExecutionEvidence } from "./verification-evidence.js"; @@ -400,15 +402,27 @@ export async function postUnitPreVerification(pctx, opts) { const configuredTurnAction = uokFlags.gitops ? uokFlags.gitopsTurnAction : "commit"; + const traceId = s.currentTraceId ?? `turn:${unit.startedAt}`; + const turnId = + s.currentTurnId ?? `${unit.type}/${unit.id}/${unit.startedAt}`; + const parityGitAction = uokFlags.gitops + ? captureGitopsDiff({ + basePath: s.basePath, + sessionId: traceId, + turnId, + legacy: legacyGitopsDecision("commit", false), + uok: { + action: configuredTurnAction, + push: uokFlags.gitopsTurnPush, + }, + }).effectiveAction + : configuredTurnAction; const safeTurnGit = resolveParitySafeGitAction({ - action: configuredTurnAction, + action: parityGitAction, push: uokFlags.gitopsTurnPush, status: "ok", }); const turnAction = safeTurnGit.action; - const traceId = s.currentTraceId ?? `turn:${unit.startedAt}`; - const turnId = - s.currentTurnId ?? `${unit.type}/${unit.id}/${unit.startedAt}`; s.lastGitActionFailure = null; s.lastGitActionStatus = null; try { diff --git a/src/resources/extensions/sf/commands-schedule.js b/src/resources/extensions/sf/commands-schedule.js index f56ff141e..2ffb432c2 100644 --- a/src/resources/extensions/sf/commands-schedule.js +++ b/src/resources/extensions/sf/commands-schedule.js @@ -442,9 +442,21 @@ async function snoozeItem(args, ctx) { } async function runItem(args, ctx) { - const idPrefix = _joinPlain(_splitArgs(args)); + const parts = _splitArgs(args); + let idPrefix = ""; + let dryRun = false; + for (const part of parts) { + if (part === "--dry-run" || part === "--dry") { + dryRun = true; + continue; + } + if (!idPrefix) idPrefix = part; + } if (!idPrefix) { - ctx.ui.notify("Usage: /sf schedule run \u003cid\u003e", "warning"); + ctx.ui.notify( + "Usage: /sf schedule run [--dry-run] \u003cid\u003e", + "warning", + ); return; } const store = createScheduleStore(_basePath()); @@ -466,6 +478,26 @@ async function runItem(args, ctx) { break; } case "command": { + const command = payload.command; + if (dryRun) { + ctx.ui.notify( + JSON.stringify( + { + id: entry.id, + kind: entry.kind, + status: entry.status, + cwd: _basePath(), + command, + auto_dispatch: entry.auto_dispatch === true, + would_execute: typeof command === "string" && command.length > 0, + }, + null, + 2, + ), + "info", + ); + return; + } const result = executeProjectScheduleCommand(_basePath(), entry); if (!result.ok) { ctx.ui.notify(`Command failed: ${result.reason}`, "error"); @@ -477,6 +509,10 @@ async function runItem(args, ctx) { } case "prompt": { const title = payload.prompt || payload.message || entry.id; + if (dryRun) { + ctx.ui.notify(`Dry run prompt: ${title}`, "info"); + return; + } ctx.ui.notify(`Prompt: ${title}`, "info"); break; } @@ -500,7 +536,7 @@ async function runItem(args, ctx) { * * Consumer: commands dispatcher (dispatcher.js). * - * @param {string} args + * @param {string|string[]} args * @param {import("@singularity-forge/pi-coding-agent").ExtensionContext} ctx */ export async function handleSchedule(args, ctx) { @@ -529,7 +565,7 @@ export async function handleSchedule(args, ctx) { " done \u003cid\u003e\n" + " cancel \u003cid\u003e\n" + " snooze \u003cid\u003e --by \u003cduration\u003e\n" + - " run \u003cid\u003e", + " run [--dry-run] \u003cid\u003e", "info", ); return; diff --git a/src/resources/extensions/sf/sf-db.js b/src/resources/extensions/sf/sf-db.js index 8c32d0138..66773dedc 100644 --- a/src/resources/extensions/sf/sf-db.js +++ b/src/resources/extensions/sf/sf-db.js @@ -78,7 +78,7 @@ function openRawDb(path) { loadProvider(); return new DatabaseSync(path); } -const SCHEMA_VERSION = 27; +const SCHEMA_VERSION = 28; function indexExists(db, name) { return !!db .prepare( @@ -488,7 +488,19 @@ function initSchema(db, fileBacked) { attempt INTEGER NOT NULL DEFAULT 1, max_attempts INTEGER NOT NULL DEFAULT 1, retryable INTEGER NOT NULL DEFAULT 0, - evaluated_at TEXT NOT NULL DEFAULT '' + evaluated_at TEXT NOT NULL DEFAULT '', + duration_ms INTEGER DEFAULT NULL + ) + `); + db.exec(` + CREATE TABLE IF NOT EXISTS gate_circuit_breakers ( + gate_id TEXT PRIMARY KEY, + state TEXT NOT NULL DEFAULT 'closed', + failure_streak INTEGER NOT NULL DEFAULT 0, + last_failure_at TEXT DEFAULT NULL, + opened_at TEXT DEFAULT NULL, + half_open_attempts INTEGER NOT NULL DEFAULT 0, + updated_at TEXT NOT NULL DEFAULT '' ) `); db.exec(` @@ -1193,7 +1205,8 @@ function migrateSchema(db) { attempt INTEGER NOT NULL DEFAULT 1, max_attempts INTEGER NOT NULL DEFAULT 1, retryable INTEGER NOT NULL DEFAULT 0, - evaluated_at TEXT NOT NULL DEFAULT '' + evaluated_at TEXT NOT NULL DEFAULT '', + duration_ms INTEGER DEFAULT NULL ) `); db.exec(` @@ -1528,6 +1541,33 @@ function migrateSchema(db) { ":applied_at": new Date().toISOString(), }); } + if (currentVersion < 28) { + // UOK observability: gate execution latency + ensureColumn( + db, + "gate_runs", + "duration_ms", + "ALTER TABLE gate_runs ADD COLUMN duration_ms INTEGER DEFAULT NULL", + ); + // UOK circuit breaker state + db.exec(` + CREATE TABLE IF NOT EXISTS gate_circuit_breakers ( + gate_id TEXT PRIMARY KEY, + state TEXT NOT NULL DEFAULT 'closed', + failure_streak INTEGER NOT NULL DEFAULT 0, + last_failure_at TEXT DEFAULT NULL, + opened_at TEXT DEFAULT NULL, + half_open_attempts INTEGER NOT NULL DEFAULT 0, + updated_at TEXT NOT NULL DEFAULT '' + ) + `); + db.prepare( + "INSERT INTO schema_version (version, applied_at) VALUES (:version, :applied_at)", + ).run({ + ":version": 28, + ":applied_at": new Date().toISOString(), + }); + } db.exec("COMMIT"); } catch (err) { db.exec("ROLLBACK"); @@ -3415,10 +3455,10 @@ export function insertGateRun(entry) { currentDb .prepare(`INSERT INTO gate_runs ( trace_id, turn_id, gate_id, gate_type, unit_type, unit_id, milestone_id, slice_id, task_id, - outcome, failure_class, rationale, findings, attempt, max_attempts, retryable, evaluated_at + outcome, failure_class, rationale, findings, attempt, max_attempts, retryable, evaluated_at, duration_ms ) VALUES ( :trace_id, :turn_id, :gate_id, :gate_type, :unit_type, :unit_id, :milestone_id, :slice_id, :task_id, - :outcome, :failure_class, :rationale, :findings, :attempt, :max_attempts, :retryable, :evaluated_at + :outcome, :failure_class, :rationale, :findings, :attempt, :max_attempts, :retryable, :evaluated_at, :duration_ms )`) .run({ ":trace_id": entry.traceId, @@ -3438,6 +3478,7 @@ export function insertGateRun(entry) { ":max_attempts": entry.maxAttempts, ":retryable": entry.retryable ? 1 : 0, ":evaluated_at": entry.evaluatedAt, + ":duration_ms": entry.durationMs ?? null, }); } export function upsertTurnGitTransaction(entry) { @@ -3768,7 +3809,25 @@ export function getLlmTaskOutcomesByUnit(unitType, unitId, limit = 20) { try { return currentDb .prepare( - , + `SELECT + model_id, + provider, + unit_type, + unit_id, + succeeded, + retries, + escalated, + verification_passed, + blocker_discovered, + duration_ms, + tokens_total, + cost_usd, + recorded_at + FROM llm_task_outcomes + WHERE unit_type = :unit_type + AND unit_id = :unit_id + ORDER BY recorded_at DESC + LIMIT :limit`, ) .all({ ":unit_type": unitType, @@ -3792,7 +3851,24 @@ export function getLlmTaskOutcomesByModel(modelId, limit = 50) { try { return currentDb .prepare( - , + `SELECT + model_id, + provider, + unit_type, + unit_id, + succeeded, + retries, + escalated, + verification_passed, + blocker_discovered, + duration_ms, + tokens_total, + cost_usd, + recorded_at + FROM llm_task_outcomes + WHERE model_id = :model_id + ORDER BY recorded_at DESC + LIMIT :limit`, ) .all({ ":model_id": modelId, @@ -3812,11 +3888,28 @@ export function getLlmTaskOutcomesByModel(modelId, limit = 50) { */ export function getRecentLlmTaskOutcomes(hours = 24, limit = 100) { if (!currentDb) return []; - const cutoff = new Date(Date.now() - hours * 60 * 60 * 1000).toISOString(); + const cutoff = Date.now() - hours * 60 * 60 * 1000; try { return currentDb .prepare( - , + `SELECT + model_id, + provider, + unit_type, + unit_id, + succeeded, + retries, + escalated, + verification_passed, + blocker_discovered, + duration_ms, + tokens_total, + cost_usd, + recorded_at + FROM llm_task_outcomes + WHERE recorded_at >= :cutoff + ORDER BY recorded_at DESC + LIMIT :limit`, ) .all({ ":cutoff": cutoff, @@ -3835,17 +3928,40 @@ export function getRecentLlmTaskOutcomes(hours = 24, limit = 100) { */ export function getLlmTaskOutcomeStats(modelId, windowHours = 24) { if (!currentDb) { - return { total: 0, succeeded: 0, failed: 0, totalCostUsd: 0, totalTokens: 0, avgDurationMs: 0 }; + return { + total: 0, + succeeded: 0, + failed: 0, + totalCostUsd: 0, + totalTokens: 0, + avgDurationMs: 0, + }; } - const cutoff = new Date(Date.now() - windowHours * 60 * 60 * 1000).toISOString(); + const cutoff = Date.now() - windowHours * 60 * 60 * 1000; try { const row = currentDb .prepare( - , + `SELECT + COUNT(*) AS total, + COALESCE(SUM(CASE WHEN succeeded = 1 THEN 1 ELSE 0 END), 0) AS succeeded, + COALESCE(SUM(CASE WHEN succeeded = 0 THEN 1 ELSE 0 END), 0) AS failed, + COALESCE(SUM(cost_usd), 0) AS totalCostUsd, + COALESCE(SUM(tokens_total), 0) AS totalTokens, + COALESCE(AVG(duration_ms), 0) AS avgDurationMs + FROM llm_task_outcomes + WHERE model_id = :model_id + AND recorded_at >= :cutoff`, ) .get({ ":model_id": modelId, ":cutoff": cutoff }); if (!row) { - return { total: 0, succeeded: 0, failed: 0, totalCostUsd: 0, totalTokens: 0, avgDurationMs: 0 }; + return { + total: 0, + succeeded: 0, + failed: 0, + totalCostUsd: 0, + totalTokens: 0, + avgDurationMs: 0, + }; } return { total: row.total ?? 0, @@ -3856,7 +3972,14 @@ export function getLlmTaskOutcomeStats(modelId, windowHours = 24) { avgDurationMs: row.avgDurationMs ?? 0, }; } catch { - return { total: 0, succeeded: 0, failed: 0, totalCostUsd: 0, totalTokens: 0, avgDurationMs: 0 }; + return { + total: 0, + succeeded: 0, + failed: 0, + totalCostUsd: 0, + totalTokens: 0, + avgDurationMs: 0, + }; } } /** @@ -3868,17 +3991,42 @@ export function getLlmTaskOutcomeStats(modelId, windowHours = 24) { */ export function getGateRunStats(gateId, windowHours = 24) { if (!currentDb) { - return { total: 0, pass: 0, fail: 0, retry: 0, manualAttention: 0, lastEvaluatedAt: null }; + return { + total: 0, + pass: 0, + fail: 0, + retry: 0, + manualAttention: 0, + lastEvaluatedAt: null, + }; } - const cutoff = new Date(Date.now() - windowHours * 60 * 60 * 1000).toISOString(); + const cutoff = new Date( + Date.now() - windowHours * 60 * 60 * 1000, + ).toISOString(); try { const row = currentDb .prepare( - , + `SELECT + COUNT(*) AS total, + COALESCE(SUM(CASE WHEN outcome = 'pass' THEN 1 ELSE 0 END), 0) AS pass, + COALESCE(SUM(CASE WHEN outcome = 'fail' THEN 1 ELSE 0 END), 0) AS fail, + COALESCE(SUM(CASE WHEN outcome = 'retry' THEN 1 ELSE 0 END), 0) AS retry, + COALESCE(SUM(CASE WHEN outcome = 'manual-attention' THEN 1 ELSE 0 END), 0) AS manualAttention, + MAX(evaluated_at) AS lastEvaluatedAt + FROM gate_runs + WHERE gate_id = :gate_id + AND evaluated_at >= :cutoff`, ) .get({ ":gate_id": gateId, ":cutoff": cutoff }); if (!row) { - return { total: 0, pass: 0, fail: 0, retry: 0, manualAttention: 0, lastEvaluatedAt: null }; + return { + total: 0, + pass: 0, + fail: 0, + retry: 0, + manualAttention: 0, + lastEvaluatedAt: null, + }; } return { total: row.total ?? 0, @@ -3889,7 +4037,149 @@ export function getGateRunStats(gateId, windowHours = 24) { lastEvaluatedAt: row.lastEvaluatedAt ?? null, }; } catch { - return { total: 0, pass: 0, fail: 0, retry: 0, manualAttention: 0, lastEvaluatedAt: null }; + return { + total: 0, + pass: 0, + fail: 0, + retry: 0, + manualAttention: 0, + lastEvaluatedAt: null, + }; + } +} + +/** + * Read the circuit breaker state for a specific gate. + * + * Returns { gateId, state, failureStreak, lastFailureAt, openedAt, halfOpenAttempts, updatedAt }. + * If no record exists, returns a default closed state. + * + * Consumer: uok/gate-runner.js before executing a gate. + */ +export function getGateCircuitBreaker(gateId) { + if (!currentDb) { + return { + gateId, + state: "closed", + failureStreak: 0, + lastFailureAt: null, + openedAt: null, + halfOpenAttempts: 0, + updatedAt: null, + }; + } + try { + const row = currentDb + .prepare( + `SELECT gate_id, state, failure_streak, last_failure_at, opened_at, half_open_attempts, updated_at + FROM gate_circuit_breakers + WHERE gate_id = :gate_id`, + ) + .get({ ":gate_id": gateId }); + if (!row) { + return { + gateId, + state: "closed", + failureStreak: 0, + lastFailureAt: null, + openedAt: null, + halfOpenAttempts: 0, + updatedAt: null, + }; + } + return { + gateId: row.gate_id, + state: row.state, + failureStreak: row.failure_streak ?? 0, + lastFailureAt: row.last_failure_at ?? null, + openedAt: row.opened_at ?? null, + halfOpenAttempts: row.half_open_attempts ?? 0, + updatedAt: row.updated_at ?? null, + }; + } catch { + return { + gateId, + state: "closed", + failureStreak: 0, + lastFailureAt: null, + openedAt: null, + halfOpenAttempts: 0, + updatedAt: null, + }; + } +} +/** + * Update the circuit breaker state for a specific gate. + * + * Consumer: uok/gate-runner.js after executing a gate. + */ +export function updateGateCircuitBreaker(gateId, updates) { + if (!currentDb) return; + currentDb + .prepare( + `INSERT INTO gate_circuit_breakers ( + gate_id, state, failure_streak, last_failure_at, opened_at, half_open_attempts, updated_at + ) VALUES ( + :gate_id, :state, :failure_streak, :last_failure_at, :opened_at, :half_open_attempts, :updated_at + ) + ON CONFLICT(gate_id) DO UPDATE SET + state = excluded.state, + failure_streak = excluded.failure_streak, + last_failure_at = COALESCE(excluded.last_failure_at, gate_circuit_breakers.last_failure_at), + opened_at = COALESCE(excluded.opened_at, gate_circuit_breakers.opened_at), + half_open_attempts = excluded.half_open_attempts, + updated_at = excluded.updated_at`, + ) + .run({ + ":gate_id": gateId, + ":state": updates.state ?? "closed", + ":failure_streak": updates.failureStreak ?? 0, + ":last_failure_at": updates.lastFailureAt ?? null, + ":opened_at": updates.openedAt ?? null, + ":half_open_attempts": updates.halfOpenAttempts ?? 0, + ":updated_at": new Date().toISOString(), + }); + return { total: 0, avgMs: 0, p50Ms: 0, p95Ms: 0, maxMs: 0 }; +} +export function getGateLatencyStats(gateId, windowHours = 24) { + if (!currentDb) { + return { total: 0, avgMs: 0, p50Ms: 0, p95Ms: 0, maxMs: 0 }; + } + const cutoff = new Date(Date.now() - windowHours * 60 * 60 * 1000).toISOString(); + try { + const row = currentDb + .prepare( + `SELECT + COUNT(*) AS total, + COALESCE(AVG(duration_ms), 0) AS avgMs, + COALESCE(MAX(duration_ms), 0) AS maxMs + FROM gate_runs + WHERE gate_id = :gate_id AND evaluated_at >= :cutoff`, + ) + .get({ ":gate_id": gateId, ":cutoff": cutoff }); + if (!row || row.total === 0) { + return { total: 0, avgMs: 0, p50Ms: 0, p95Ms: 0, maxMs: 0 }; + } + const durations = currentDb + .prepare( + `SELECT duration_ms + FROM gate_runs + WHERE gate_id = :gate_id AND evaluated_at >= :cutoff AND duration_ms IS NOT NULL + ORDER BY duration_ms`, + ) + .all({ ":gate_id": gateId, ":cutoff": cutoff }) + .map((r) => r.duration_ms); + const p50Ms = durations[Math.floor(durations.length * 0.5)] ?? 0; + const p95Ms = durations[Math.floor(durations.length * 0.95)] ?? 0; + return { + total: row.total ?? 0, + avgMs: Math.round(row.avgMs ?? 0), + p50Ms, + p95Ms, + maxMs: row.maxMs ?? 0, + }; + } catch { + return { total: 0, avgMs: 0, p50Ms: 0, p95Ms: 0, maxMs: 0 }; } } function asStringOrNull(value) { diff --git a/src/resources/extensions/sf/tests/commands-schedule.test.mjs b/src/resources/extensions/sf/tests/commands-schedule.test.mjs index 5b50735dc..8390ba383 100644 --- a/src/resources/extensions/sf/tests/commands-schedule.test.mjs +++ b/src/resources/extensions/sf/tests/commands-schedule.test.mjs @@ -144,6 +144,35 @@ describe("handleSchedule", () => { assert.equal(ctx.notifications[0].type, "info"); assert.ok(ctx.notifications[0].msg.includes("No scheduled items")); }); + + it("list --json preserves the exact stored command payload", async () => { + const addCtx = mockCtx(); + await handleSchedule( + [ + "add", + "--in", + "1h", + "--kind", + "command", + "--auto-dispatch", + "--", + "node", + "-e", + "require('fs').writeFileSync('json-cron.txt','ok')", + ], + addCtx, + ); + const stored = createScheduleStore(testDir).readEntries("project")[0]; + + const listCtx = mockCtx(); + await handleSchedule("list --json", listCtx); + const listed = JSON.parse(listCtx.notifications[0].msg); + + assert.equal(listed.length, 1); + assert.equal(listed[0].kind, "command"); + assert.equal(listed[0].auto_dispatch, true); + assert.equal(listed[0].payload.command, stored.payload.command); + }); }); describe("done", () => { @@ -216,6 +245,39 @@ describe("handleSchedule", () => { assert.ok(ctx2.notifications.some((n) => n.msg.includes("Reminder:"))); assert.ok(ctx2.notifications.some((n) => n.type === "success")); }); + + it("dry-runs a command item without executing or marking done", async () => { + const ctx1 = mockCtx(); + await handleSchedule( + [ + "add", + "--in", + "0m", + "--kind", + "command", + "--auto-dispatch", + "--", + "node", + "-e", + "require('fs').writeFileSync('dry-run-cron.txt','bad')", + ], + ctx1, + ); + const id = ctx1.notifications[0].msg.match(/Scheduled: (\S+)/)?.[1]; + assert.ok(id); + + const ctx2 = mockCtx(); + await handleSchedule(["run", "--dry-run", id.slice(0, 8)], ctx2); + + const preview = JSON.parse(ctx2.notifications[0].msg); + assert.equal(preview.id, id); + assert.equal(preview.kind, "command"); + assert.equal(preview.would_execute, true); + assert.ok(preview.command.includes("dry-run-cron.txt")); + assert.throws(() => readFileSync(join(testDir, "dry-run-cron.txt"))); + const entries = createScheduleStore(testDir).readEntries("project"); + assert.equal(entries[0].status, "pending"); + }); }); describe("usage", () => { diff --git a/src/resources/extensions/sf/tests/uok-gate-runner.test.mjs b/src/resources/extensions/sf/tests/uok-gate-runner.test.mjs new file mode 100644 index 000000000..2911ef4ea --- /dev/null +++ b/src/resources/extensions/sf/tests/uok-gate-runner.test.mjs @@ -0,0 +1,457 @@ +import assert from "node:assert/strict"; +import { mkdtempSync, rmSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { afterEach, test } from "vitest"; +import { + closeDatabase, + getGateCircuitBreaker, + getGateLatencyStats, + getGateRunStats, + openDatabase, + updateGateCircuitBreaker, +} from "../sf-db.js"; +import { validateGate } from "../uok/contracts.js"; +import { UokGateRunner } from "../uok/gate-runner.js"; + +const tmpRoots = []; + +afterEach(() => { + closeDatabase(); + for (const dir of tmpRoots.splice(0)) { + rmSync(dir, { recursive: true, force: true }); + } +}); + +function makeProject() { + const root = mkdtempSync(join(tmpdir(), "sf-uok-runner-")); + tmpRoots.push(root); + return root; +} + +function makeCtx(overrides = {}) { + return { + basePath: makeProject(), + traceId: "trace-1", + turnId: "turn-1", + unitType: "execute-task", + unitId: "M001/S01/T01", + milestoneId: "M001", + sliceId: "S01", + taskId: "T01", + ...overrides, + }; +} + +// ─── Registration ────────────────────────────────────────────────────────── + +test("runner_register_adds_gate", () => { + const runner = new UokGateRunner(); + const gate = { + id: "test-gate", + type: "policy", + execute: async () => ({ outcome: "pass", rationale: "ok" }), + }; + runner.register(gate); + assert.equal(runner.list().length, 1); + assert.equal(runner.list()[0].id, "test-gate"); +}); + +// ─── Unknown gate ────────────────────────────────────────────────────────── + +test("run_when_gate_not_registered_returns_manual_attention", async () => { + openDatabase(":memory:"); + const runner = new UokGateRunner(); + const result = await runner.run("missing-gate", makeCtx()); + assert.equal(result.outcome, "manual-attention"); + assert.equal(result.gateId, "missing-gate"); + assert.equal(result.failureClass, "unknown"); + assert.equal(result.retryable, false); +}); + +test("run_when_gate_not_registered_persists_to_db", async () => { + openDatabase(":memory:"); + const runner = new UokGateRunner(); + await runner.run("missing-gate", makeCtx()); + const stats = getGateRunStats("missing-gate", 24); + assert.equal(stats.total, 1); + assert.equal(stats.manualAttention, 1); +}); + +// ─── Pass ────────────────────────────────────────────────────────────────── + +test("run_when_gate_passes_returns_pass", async () => { + openDatabase(":memory:"); + const runner = new UokGateRunner(); + const gate = { + id: "pass-gate", + type: "verification", + execute: async () => ({ outcome: "pass", rationale: "all good" }), + }; + runner.register(gate); + const result = await runner.run("pass-gate", makeCtx()); + assert.equal(result.outcome, "pass"); + assert.equal(result.attempt, 1); + assert.equal(result.retryable, false); +}); + +test("run_when_gate_passes_persists_pass_to_db", async () => { + openDatabase(":memory:"); + const runner = new UokGateRunner(); + runner.register({ + id: "pass-gate", + type: "verification", + execute: async () => ({ outcome: "pass", rationale: "ok" }), + }); + await runner.run("pass-gate", makeCtx()); + const stats = getGateRunStats("pass-gate", 24); + assert.equal(stats.total, 1); + assert.equal(stats.pass, 1); +}); + +// ─── Fail without retry ──────────────────────────────────────────────────── + +test("run_when_policy_fail_returns_fail_no_retry", async () => { + openDatabase(":memory:"); + const runner = new UokGateRunner(); + runner.register({ + id: "policy-gate", + type: "policy", + execute: async () => ({ + outcome: "fail", + failureClass: "policy", + rationale: "blocked", + }), + }); + const result = await runner.run("policy-gate", makeCtx()); + assert.equal(result.outcome, "fail"); + assert.equal(result.attempt, 1); + assert.equal(result.retryable, false); +}); + +// ─── Fail with retry ─────────────────────────────────────────────────────── + +test("run_when_execution_fail_retries_once_then_fails", async () => { + openDatabase(":memory:"); + const runner = new UokGateRunner(); + let calls = 0; + runner.register({ + id: "exec-gate", + type: "verification", + execute: async () => { + calls++; + return { + outcome: "fail", + failureClass: "execution", + rationale: "transient error", + }; + }, + }); + const result = await runner.run("exec-gate", makeCtx()); + assert.equal(calls, 2); + assert.equal(result.outcome, "fail"); + assert.equal(result.attempt, 2); + assert.equal(result.retryable, false); +}); + +test("run_when_timeout_fail_retries_twice_then_fails", async () => { + openDatabase(":memory:"); + const runner = new UokGateRunner(); + let calls = 0; + runner.register({ + id: "timeout-gate", + type: "verification", + execute: async () => { + calls++; + return { + outcome: "fail", + failureClass: "timeout", + rationale: "timed out", + }; + }, + }); + const result = await runner.run("timeout-gate", makeCtx()); + assert.equal(calls, 3); + assert.equal(result.outcome, "fail"); + assert.equal(result.attempt, 3); + assert.equal(result.retryable, false); +}); + +// ─── Retry succeeds ──────────────────────────────────────────────────────── + +test("run_when_first_fail_then_pass_stops_early", async () => { + openDatabase(":memory:"); + const runner = new UokGateRunner(); + let calls = 0; + runner.register({ + id: "flaky-gate", + type: "verification", + execute: async () => { + calls++; + return calls === 1 + ? { outcome: "fail", failureClass: "execution", rationale: "retry me" } + : { outcome: "pass", rationale: "ok now" }; + }, + }); + const result = await runner.run("flaky-gate", makeCtx()); + assert.equal(calls, 2); + assert.equal(result.outcome, "pass"); + assert.equal(result.attempt, 2); +}); + +// ─── Exception isolation ─────────────────────────────────────────────────── + +test("run_when_gate_throws_returns_fail_with_unknown_class", async () => { + openDatabase(":memory:"); + const runner = new UokGateRunner(); + runner.register({ + id: "throw-gate", + type: "verification", + execute: async () => { + throw new Error("boom"); + }, + }); + const result = await runner.run("throw-gate", makeCtx()); + assert.equal(result.outcome, "fail"); + assert.equal(result.failureClass, "unknown"); + assert.ok(result.rationale.includes("boom")); +}); + +// ─── DB audit trail ──────────────────────────────────────────────────────── + +test("run_records_every_attempt_to_gate_runs", async () => { + openDatabase(":memory:"); + const runner = new UokGateRunner(); + let calls = 0; + runner.register({ + id: "audit-gate", + type: "verification", + execute: async () => { + calls++; + return { outcome: "fail", failureClass: "execution", rationale: `attempt ${calls}` }; + }, + }); + await runner.run("audit-gate", makeCtx()); + const stats = getGateRunStats("audit-gate", 24); + assert.equal(stats.total, 2); + assert.equal(stats.fail, 1); + assert.equal(stats.retry, 1); +}); + +// ─── Gate metadata ───────────────────────────────────────────────────────── + +test("run_result_includes_gate_id_type_and_timestamps", async () => { + openDatabase(":memory:"); + const runner = new UokGateRunner(); + runner.register({ + id: "meta-gate", + type: "policy", + execute: async () => ({ outcome: "pass", rationale: "ok" }), + }); + const result = await runner.run("meta-gate", makeCtx()); + assert.equal(result.gateId, "meta-gate"); + assert.equal(result.gateType, "policy"); + assert.ok(typeof result.evaluatedAt === "string"); + assert.ok(result.evaluatedAt.length > 0); +}); + +// ─── Latency tracking ────────────────────────────────────────────────────── + +test("run_records_duration_ms_in_gate_runs", async () => { + openDatabase(":memory:"); + const runner = new UokGateRunner(); + runner.register({ + id: "slow-gate", + type: "verification", + execute: async () => { + await new Promise((r) => setTimeout(r, 50)); + return { outcome: "pass", rationale: "ok" }; + }, + }); + await runner.run("slow-gate", makeCtx()); + const stats = getGateLatencyStats("slow-gate", 24); + assert.equal(stats.total, 1); + assert.ok(stats.avgMs >= 40, `expected avgMs >= 40, got ${stats.avgMs}`); +}); + +// ─── Circuit breaker ─────────────────────────────────────────────────────── + +test("circuitBreaker_when_passes_resets_failure_streak", async () => { + openDatabase(":memory:"); + const runner = new UokGateRunner(); + runner.register({ + id: "cb-pass", + type: "verification", + execute: async () => ({ outcome: "pass", rationale: "ok" }), + }); + updateGateCircuitBreaker("cb-pass", { state: "closed", failureStreak: 3 }); + await runner.run("cb-pass", makeCtx()); + const breaker = getGateCircuitBreaker("cb-pass"); + assert.equal(breaker.state, "closed"); + assert.equal(breaker.failureStreak, 0); +}); + +test("circuitBreaker_when_fails_incrementally_opens_after_threshold", async () => { + openDatabase(":memory:"); + const runner = new UokGateRunner(); + let calls = 0; + runner.register({ + id: "cb-fail", + type: "verification", + execute: async () => { + calls++; + return { outcome: "fail", failureClass: "execution", rationale: "nope" }; + }, + }); + // Threshold is 5; run 5 times to open the breaker + for (let i = 0; i < 5; i++) { + await runner.run("cb-fail", makeCtx()); + } + const breaker = getGateCircuitBreaker("cb-fail"); + assert.equal(breaker.state, "open"); + assert.equal(breaker.failureStreak, 5); + assert.ok(breaker.openedAt != null); +}); + +test("circuitBreaker_when_open_blocks_execution", async () => { + openDatabase(":memory:"); + const runner = new UokGateRunner(); + let calls = 0; + runner.register({ + id: "cb-block", + type: "verification", + execute: async () => { + calls++; + return { outcome: "pass", rationale: "ok" }; + }, + }); + updateGateCircuitBreaker("cb-block", { + state: "open", + failureStreak: 5, + openedAt: new Date().toISOString(), + }); + const result = await runner.run("cb-block", makeCtx()); + assert.equal(calls, 0); + assert.equal(result.outcome, "fail"); + assert.ok(result.rationale.includes("OPEN")); +}); + +test("circuitBreaker_when_open_and_cooled_down_transitions_to_half_open", async () => { + openDatabase(":memory:"); + const runner = new UokGateRunner(); + let calls = 0; + runner.register({ + id: "cb-cool", + type: "verification", + execute: async () => { + calls++; + return { outcome: "pass", rationale: "ok" }; + }, + }); + updateGateCircuitBreaker("cb-cool", { + state: "open", + failureStreak: 5, + openedAt: new Date(Date.now() - 120_000).toISOString(), + }); + const result = await runner.run("cb-cool", makeCtx()); + assert.equal(calls, 1); + assert.equal(result.outcome, "pass"); + const breaker = getGateCircuitBreaker("cb-cool"); + assert.equal(breaker.state, "closed"); +}); + +test("circuitBreaker_half_open_fail_reopens", async () => { + openDatabase(":memory:"); + const runner = new UokGateRunner(); + runner.register({ + id: "cb-reopen", + type: "verification", + execute: async () => ({ outcome: "fail", failureClass: "execution", rationale: "nope" }), + }); + updateGateCircuitBreaker("cb-reopen", { + state: "half-open", + failureStreak: 5, + halfOpenAttempts: 1, + }); + await runner.run("cb-reopen", makeCtx()); + const breaker = getGateCircuitBreaker("cb-reopen"); + assert.equal(breaker.state, "open"); +}); + +test("circuitBreaker_half_open_limit_exceeded_reopens", async () => { + openDatabase(":memory:"); + const runner = new UokGateRunner(); + let calls = 0; + runner.register({ + id: "cb-limit", + type: "verification", + execute: async () => { + calls++; + return { outcome: "pass", rationale: "ok" }; + }, + }); + updateGateCircuitBreaker("cb-limit", { + state: "half-open", + failureStreak: 5, + halfOpenAttempts: 3, + }); + const result = await runner.run("cb-limit", makeCtx()); + assert.equal(calls, 0); + assert.equal(result.outcome, "fail"); + assert.ok(result.rationale.includes("reopening")); +}); + +// ─── Contract validation ─────────────────────────────────────────────────── + +test("validateGate_when_valid_returns_true", () => { + const result = validateGate({ + id: "good", + type: "policy", + execute: async () => ({ outcome: "pass", rationale: "ok" }), + }); + assert.equal(result.valid, true); +}); + +test("validateGate_when_null_returns_false", () => { + const result = validateGate(null); + assert.equal(result.valid, false); + assert.ok(result.reason.includes("object")); +}); + +test("validateGate_when_missing_id_returns_false", () => { + const result = validateGate({ type: "policy", execute: async () => ({}) }); + assert.equal(result.valid, false); + assert.ok(result.reason.includes("id")); +}); + +test("validateGate_when_empty_id_returns_false", () => { + const result = validateGate({ id: "", type: "policy", execute: async () => ({}) }); + assert.equal(result.valid, false); + assert.ok(result.reason.includes("id")); +}); + +test("validateGate_when_missing_type_returns_false", () => { + const result = validateGate({ id: "x", execute: async () => ({}) }); + assert.equal(result.valid, false); + assert.ok(result.reason.includes("type")); +}); + +test("validateGate_when_missing_execute_returns_false", () => { + const result = validateGate({ id: "x", type: "policy" }); + assert.equal(result.valid, false); + assert.ok(result.reason.includes("execute")); +}); + +test("validateGate_when_execute_not_function_returns_false", () => { + const result = validateGate({ id: "x", type: "policy", execute: "not-a-fn" }); + assert.equal(result.valid, false); + assert.ok(result.reason.includes("execute")); +}); + +test("runner_register_when_invalid_gate_throws", () => { + const runner = new UokGateRunner(); + assert.throws(() => runner.register({ id: "", type: "policy", execute: async () => ({}) }), /id/); + assert.throws(() => runner.register({ id: "x", type: "", execute: async () => ({}) }), /type/); + assert.throws(() => runner.register({ id: "x", type: "policy" }), /execute/); + assert.throws(() => runner.register(null), /object/); +}); diff --git a/src/resources/extensions/sf/tests/uok-gates.test.mjs b/src/resources/extensions/sf/tests/uok-gates.test.mjs new file mode 100644 index 000000000..57f86ff31 --- /dev/null +++ b/src/resources/extensions/sf/tests/uok-gates.test.mjs @@ -0,0 +1,387 @@ +import assert from "node:assert/strict"; +import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { afterEach, test, vi } from "vitest"; +import { closeDatabase, insertLlmTaskOutcome, openDatabase } from "../sf-db.js"; +import { ChaosMonkey } from "../uok/chaos-monkey.js"; +import { CostGuardGate } from "../uok/cost-guard-gate.js"; +import { OutcomeLearningGate } from "../uok/outcome-learning-gate.js"; +import { SecurityGate } from "../uok/security-gate.js"; + +const tmpRoots = []; + +afterEach(() => { + closeDatabase(); + for (const dir of tmpRoots.splice(0)) { + rmSync(dir, { recursive: true, force: true }); + } + vi.restoreAllMocks(); +}); + +function makeProject() { + const root = mkdtempSync(join(tmpdir(), "sf-uok-gates-")); + tmpRoots.push(root); + return root; +} + +// ─── SecurityGate ────────────────────────────────────────────────────────── + +test("securityGate_when_script_missing_returns_pass_with_skip", async () => { + const gate = new SecurityGate(); + const result = await gate.execute({ basePath: makeProject() }, 1); + assert.equal(result.outcome, "pass"); + assert.ok(result.rationale.includes("skipped")); +}); + +test("securityGate_when_script_passes_returns_pass", async () => { + const root = makeProject(); + const scriptDir = join(root, "scripts"); + mkdirSync(scriptDir, { recursive: true }); + writeFileSync( + join(scriptDir, "secret-scan.sh"), + "#!/bin/bash\nexit 0\n", + "utf-8", + ); + const gate = new SecurityGate(); + const result = await gate.execute({ basePath: root }, 1); + assert.equal(result.outcome, "pass"); + assert.ok(result.rationale.includes("No secrets")); +}); + +test("securityGate_when_script_fails_returns_fail", async () => { + const root = makeProject(); + const scriptDir = join(root, "scripts"); + mkdirSync(scriptDir, { recursive: true }); + writeFileSync( + join(scriptDir, "secret-scan.sh"), + "#!/bin/bash\necho 'found AWS key'\nexit 1\n", + "utf-8", + ); + const gate = new SecurityGate(); + const result = await gate.execute({ basePath: root }, 1); + assert.equal(result.outcome, "fail"); + assert.equal(result.failureClass, "policy"); + assert.ok(result.rationale.includes("secrets")); +}); + +// ─── CostGuardGate ───────────────────────────────────────────────────────── + +test("costGuardGate_when_no_data_returns_pass", async () => { + openDatabase(":memory:"); + const gate = new CostGuardGate(); + const result = await gate.execute({ + modelId: "gpt-4o", + unitType: "execute-task", + unitId: "M001/S01/T01", + }); + assert.equal(result.outcome, "pass"); +}); + +test("costGuardGate_when_unit_cost_exceeded_returns_fail", async () => { + openDatabase(":memory:"); + const gate = new CostGuardGate({ maxUsdPerUnit: 1.0 }); + const _now = new Date().toISOString(); + for (let i = 0; i < 3; i++) { + insertLlmOutcome({ + modelId: "gpt-4o", + provider: "openai", + unitType: "execute-task", + unitId: "M001/S01/T01", + succeeded: 0, + cost_usd: 0.5, + recorded_at: new Date(Date.now() + i * 1000).toISOString(), + }); + } + const result = await gate.execute({ + modelId: "gpt-4o", + unitType: "execute-task", + unitId: "M001/S01/T01", + }); + assert.equal(result.outcome, "fail"); + assert.ok(result.rationale.includes("$1.50")); +}); + +test("costGuardGate_when_hourly_cost_exceeded_returns_fail", async () => { + openDatabase(":memory:"); + const gate = new CostGuardGate({ maxUsdPerHour: 1.0 }); + const _now = new Date().toISOString(); + for (let i = 0; i < 3; i++) { + insertLlmOutcome({ + modelId: "gpt-4o", + provider: "openai", + unitType: "execute-task", + unitId: `M001/S01/T0${i}`, + succeeded: 1, + cost_usd: 0.5, + recorded_at: new Date(Date.now() + i * 1000).toISOString(), + }); + } + const result = await gate.execute({ + modelId: "gpt-4o", + unitType: "execute-task", + unitId: "M001/S01/T99", + }); + assert.equal(result.outcome, "fail"); + assert.ok(result.rationale.includes("1-hour")); +}); + +test("costGuardGate_when_highTier_fails_twice_returns_fail", async () => { + openDatabase(":memory:"); + const gate = new CostGuardGate({ highTierFailureThreshold: 2 }); + const _now = new Date().toISOString(); + insertLlmOutcome({ + modelId: "claude-opus-4-6", + provider: "anthropic", + unitType: "execute-task", + unitId: "M001/S01/T01", + succeeded: 0, + cost_usd: 2.0, + recorded_at: new Date(Date.now()).toISOString(), + }); + insertLlmOutcome({ + modelId: "claude-opus-4-6", + provider: "anthropic", + unitType: "execute-task", + unitId: "M001/S01/T01", + succeeded: 0, + cost_usd: 2.0, + recorded_at: new Date(Date.now() + 1000).toISOString(), + }); + const result = await gate.execute({ + modelId: "claude-opus-4-6", + unitType: "execute-task", + unitId: "M001/S01/T01", + }); + assert.equal(result.outcome, "fail"); + assert.ok(result.rationale.includes("high-tier")); +}); + +test("costGuardGate_when_suggesting_alternative_uses_known_cost_table_model", async () => { + openDatabase(":memory:"); + const gate = new CostGuardGate({ highTierFailureThreshold: 2 }); + insertLlmOutcome({ + modelId: "claude-opus-4-6", + provider: "anthropic", + unitType: "execute-task", + unitId: "M001/S01/T01", + succeeded: 0, + cost_usd: 0.1, + recorded_at: Date.now(), + }); + insertLlmOutcome({ + modelId: "claude-opus-4-6", + provider: "anthropic", + unitType: "execute-task", + unitId: "M001/S01/T01", + succeeded: 0, + cost_usd: 0.1, + recorded_at: Date.now() + 1000, + }); + + const result = await gate.execute({ + modelId: "claude-opus-4-6", + unitType: "execute-task", + unitId: "M001/S01/T01", + }); + + assert.equal(result.outcome, "fail"); + assert.ok(result.recommendation.includes("claude-haiku-4-5")); + assert.ok(!result.recommendation.includes("claude-haiku-4-5-haiku")); +}); + +// ─── OutcomeLearningGate ─────────────────────────────────────────────────── + +test("outcomeLearningGate_when_no_data_returns_pass", async () => { + openDatabase(":memory:"); + const gate = new OutcomeLearningGate(); + const result = await gate.execute({ + modelId: "gpt-4o", + unitType: "execute-task", + unitId: "M001/S01/T01", + }); + assert.equal(result.outcome, "pass"); +}); + +test("outcomeLearningGate_when_unit_failure_rate_high_returns_fail", async () => { + openDatabase(":memory:"); + const gate = new OutcomeLearningGate({ + minSampleSize: 3, + failureRateThreshold: 0.6, + }); + const _now = new Date().toISOString(); + for (let i = 0; i < 3; i++) { + insertLlmOutcome({ + modelId: "gpt-4o", + provider: "openai", + unitType: "execute-task", + unitId: "M001/S01/T01", + succeeded: 0, + cost_usd: 0.1, + recorded_at: new Date(Date.now() + i * 1000).toISOString(), + }); + } + const result = await gate.execute({ + modelId: "gpt-4o", + unitType: "execute-task", + unitId: "M001/S01/T01", + }); + assert.equal(result.outcome, "fail"); + assert.ok(result.rationale.includes("pattern")); + assert.ok(result.findings.includes("100%")); +}); + +test("outcomeLearningGate_when_model_failure_rate_high_returns_fail", async () => { + openDatabase(":memory:"); + const gate = new OutcomeLearningGate({ + minSampleSize: 3, + failureRateThreshold: 0.6, + }); + const _now = new Date().toISOString(); + for (let i = 0; i < 4; i++) { + insertLlmOutcome({ + modelId: "gpt-4o", + provider: "openai", + unitType: "execute-task", + unitId: `M001/S01/T0${i}`, + succeeded: 0, + cost_usd: 0.1, + recorded_at: new Date(Date.now() + i * 1000).toISOString(), + }); + } + const result = await gate.execute({ + modelId: "gpt-4o", + unitType: "execute-task", + unitId: "M001/S01/T99", + }); + assert.equal(result.outcome, "fail"); + assert.ok(result.findings.includes("gpt-4o")); + assert.ok(result.findings.includes("100%")); +}); + +test("outcomeLearningGate_when_escalation_pattern_detected_returns_fail", async () => { + openDatabase(":memory:"); + const gate = new OutcomeLearningGate({ + minSampleSize: 1, + lookbackHours: 168, + }); + const _now = new Date().toISOString(); + for (let i = 0; i < 3; i++) { + insertLlmOutcome({ + modelId: "gpt-4o", + provider: "openai", + unitType: "execute-task", + unitId: `M001/S01/T0${i}`, + succeeded: 1, + escalated: 1, + cost_usd: 0.1, + recorded_at: new Date(Date.now() + i * 1000).toISOString(), + }); + } + const result = await gate.execute({ + modelId: "gpt-4o", + unitType: "execute-task", + unitId: "M001/S01/T99", + }); + assert.equal(result.outcome, "fail"); + assert.ok(result.findings.includes("escalated")); +}); + +// ─── ChaosMonkey ─────────────────────────────────────────────────────────── + +test("chaosMonkey_when_inactive_does_nothing", async () => { + const monkey = new ChaosMonkey({ active: false }); + await monkey.strike("test"); + assert.deepEqual(monkey.getInjectedEvents(), []); +}); + +test("chaosMonkey_when_active_may_inject_latency", async () => { + const monkey = new ChaosMonkey({ + active: true, + latencyProbability: 1.0, + maxLatencyMs: 50, + partialFailureProbability: 0, + diskStressProbability: 0, + memoryStressProbability: 0, + }); + const start = Date.now(); + await monkey.strike("test"); + const elapsed = Date.now() - start; + assert.ok(elapsed >= 30, `expected latency injection, elapsed ${elapsed}ms`); + const events = monkey.getInjectedEvents(); + assert.equal(events.length, 1); + assert.equal(events[0].type, "latency"); + assert.equal(events[0].phase, "test"); +}); + +test("chaosMonkey_when_active_may_throw_retryable_error", async () => { + const monkey = new ChaosMonkey({ + active: true, + latencyProbability: 0, + partialFailureProbability: 1.0, + diskStressProbability: 0, + memoryStressProbability: 0, + }); + try { + await monkey.strike("test"); + assert.fail("expected partial failure throw"); + } catch (err) { + assert.equal(err.code, "CHAOS_INJECTED"); + assert.equal(err.retryable, true); + } + const events = monkey.getInjectedEvents(); + assert.equal(events.length, 1); + assert.equal(events[0].type, "partial-failure"); +}); + +test("chaosMonkey_disk_stress_writes_and_cleans_up", async () => { + const monkey = new ChaosMonkey({ + active: true, + latencyProbability: 0, + partialFailureProbability: 0, + diskStressProbability: 1.0, + diskStressMb: 5, + memoryStressProbability: 0, + }); + await monkey.strike("test"); + const events = monkey.getInjectedEvents(); + assert.equal(events.length, 1); + assert.equal(events[0].type, "disk-stress"); + assert.ok(events[0].sizeMb >= 1 && events[0].sizeMb <= 5); +}); + +test("chaosMonkey_clearEvents_resets_history", async () => { + const monkey = new ChaosMonkey({ + active: true, + latencyProbability: 1.0, + maxLatencyMs: 10, + partialFailureProbability: 0, + diskStressProbability: 0, + memoryStressProbability: 0, + }); + await monkey.strike("test"); + assert.equal(monkey.getInjectedEvents().length, 1); + monkey.clearEvents(); + assert.equal(monkey.getInjectedEvents().length, 0); +}); + +// ─── Helpers ─────────────────────────────────────────────────────────────── + +function insertLlmOutcome(overrides) { + return insertLlmTaskOutcome({ + modelId: overrides.modelId ?? "gpt-4o", + provider: overrides.provider ?? "openai", + unitType: overrides.unitType ?? "execute-task", + unitId: overrides.unitId ?? "M001/S01/T01", + succeeded: overrides.succeeded ?? 1, + retries: overrides.retries ?? 0, + escalated: overrides.escalated ?? 0, + verification_passed: overrides.verification_passed ?? null, + blocker_discovered: overrides.blocker_discovered ?? 0, + duration_ms: overrides.duration_ms ?? 1000, + tokens_total: overrides.tokens_total ?? 1000, + cost_usd: overrides.cost_usd ?? 0.1, + recorded_at: overrides.recorded_at ?? new Date().toISOString(), + ...overrides, + }); +} diff --git a/src/resources/extensions/sf/tests/uok-message-bus.test.mjs b/src/resources/extensions/sf/tests/uok-message-bus.test.mjs new file mode 100644 index 000000000..a7eb58c93 --- /dev/null +++ b/src/resources/extensions/sf/tests/uok-message-bus.test.mjs @@ -0,0 +1,142 @@ +import assert from "node:assert/strict"; +import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { afterEach, test } from "vitest"; +import { AgentInbox, MessageBus } from "../uok/message-bus.js"; + +const tmpRoots = []; + +afterEach(() => { + for (const dir of tmpRoots.splice(0)) { + rmSync(dir, { recursive: true, force: true }); + } +}); + +function makeProject() { + const root = mkdtempSync(join(tmpdir(), "sf-uok-bus-")); + tmpRoots.push(root); + return root; +} + +// ─── MessageBus ──────────────────────────────────────────────────────────── + +test("messageBus_send_delivers_message_to_inbox", () => { + const root = makeProject(); + const bus = new MessageBus(root); + const id = bus.send("agent-a", "agent-b", "hello"); + assert.match(id, /^msg-[0-9a-f-]{36}$/); + + const inbox = bus.getInbox("agent-b"); + const messages = inbox.list(); + assert.equal(messages.length, 1); + assert.equal(messages[0].from, "agent-a"); + assert.equal(messages[0].body, "hello"); + assert.equal(messages[0].read, false); +}); + +test("messageBus_broadcast_delivers_to_all_recipients", () => { + const root = makeProject(); + const bus = new MessageBus(root); + const ids = bus.broadcast("agent-a", ["agent-b", "agent-c"], "alert"); + assert.equal(ids.length, 2); + assert.equal(bus.getInbox("agent-b").unreadCount, 1); + assert.equal(bus.getInbox("agent-c").unreadCount, 1); +}); + +test("messageBus_persists_across_reconstruction", () => { + const root = makeProject(); + const bus1 = new MessageBus(root); + bus1.send("agent-a", "agent-b", "persistent"); + + const bus2 = new MessageBus(root); + const inbox = bus2.getInbox("agent-b"); + const messages = inbox.list(); + assert.equal(messages.length, 1); + assert.equal(messages[0].body, "persistent"); +}); + +test("messageBus_markRead_updates_state_and_persists", () => { + const root = makeProject(); + const bus = new MessageBus(root); + bus.send("agent-a", "agent-b", "read-me"); + const inbox = bus.getInbox("agent-b"); + const msg = inbox.list()[0]; + assert.equal(inbox.unreadCount, 1); + + inbox.markRead(msg.id); + assert.equal(inbox.unreadCount, 0); + + // Reconstruct and verify read state persisted + const bus2 = new MessageBus(root); + const inbox2 = bus2.getInbox("agent-b"); + assert.equal(inbox2.list(true).length, 0); +}); + +test("messageBus_compact_removes_old_messages", () => { + const root = makeProject(); + const bus = new MessageBus(root, { retentionDays: 1 }); + // Send an old message by manually appending to JSONL + const path = join(root, ".sf", "runtime", "uok-messages.jsonl"); + mkdirSync(join(root, ".sf", "runtime"), { recursive: true }); + const oldMsg = { + id: "msg-old", + from: "agent-a", + to: "agent-b", + body: "old", + sentAt: new Date(Date.now() - 2 * 24 * 60 * 60 * 1000).toISOString(), + deliveredAt: new Date(Date.now() - 2 * 24 * 60 * 60 * 1000).toISOString(), + }; + writeFileSync(path, `${JSON.stringify(oldMsg)}\n`, "utf-8"); + + const result = bus.compact(); + assert.equal(result.after, 0); + assert.ok(result.before >= 1); + + const inbox = bus.getInbox("agent-b"); + inbox.refresh(); + assert.equal(inbox.list().length, 0); +}); + +test("messageBus_getConversation_filters_by_pair", () => { + const root = makeProject(); + const bus = new MessageBus(root); + bus.send("agent-a", "agent-b", "ab1"); + bus.send("agent-b", "agent-a", "ba1"); + bus.send("agent-c", "agent-b", "cb1"); + + const conv = bus.getConversation("agent-a", "agent-b"); + assert.equal(conv.length, 2); + assert.ok(conv.every((m) => m.body !== "cb1")); +}); + +test("messageBus_inbox_enforces_maxSize", () => { + const root = makeProject(); + const bus = new MessageBus(root, { maxInboxSize: 3 }); + for (let i = 0; i < 5; i++) { + bus.send("agent-a", "agent-b", `msg-${i}`); + } + const inbox = bus.getInbox("agent-b"); + assert.equal(inbox.list().length, 3); + assert.equal(inbox.list()[0].body, "msg-2"); +}); + +// ─── AgentInbox ──────────────────────────────────────────────────────────── + +test("agentInbox_list_unreadOnly_filters_correctly", () => { + const root = makeProject(); + const inbox = new AgentInbox("agent-b", root); + inbox.receive({ id: "1", from: "a", body: "x" }); + inbox.receive({ id: "2", from: "a", body: "y" }); + inbox.markRead("1"); + + assert.equal(inbox.list().length, 2); + assert.equal(inbox.list(true).length, 1); + assert.equal(inbox.list(true)[0].id, "2"); +}); + +test("agentInbox_markRead_returns_false_for_unknown_id", () => { + const root = makeProject(); + const inbox = new AgentInbox("agent-b", root); + assert.equal(inbox.markRead("nope"), false); +}); diff --git a/src/resources/extensions/sf/tests/uok-outcome-ledger.test.mjs b/src/resources/extensions/sf/tests/uok-outcome-ledger.test.mjs new file mode 100644 index 000000000..2cfb1a654 --- /dev/null +++ b/src/resources/extensions/sf/tests/uok-outcome-ledger.test.mjs @@ -0,0 +1,117 @@ +import assert from "node:assert/strict"; +import { afterEach, test } from "vitest"; +import { + closeDatabase, + getGateRunStats, + getLlmTaskOutcomeStats, + getLlmTaskOutcomesByModel, + getLlmTaskOutcomesByUnit, + getRecentLlmTaskOutcomes, + insertGateRun, + insertLlmTaskOutcome, + openDatabase, +} from "../sf-db.js"; + +afterEach(() => { + closeDatabase(); +}); + +test("llm_task_outcome_queries_when_records_exist_return_ordered_contracts", () => { + openDatabase(":memory:"); + const now = Date.now(); + + insertLlmTaskOutcome({ + modelId: "provider/expensive", + provider: "provider", + unitType: "execute-task", + unitId: "M001/S01/T01", + succeeded: false, + retries: 1, + escalated: true, + verification_passed: false, + blocker_discovered: true, + duration_ms: 1200, + tokens_total: 4000, + cost_usd: 0.42, + recorded_at: now - 1_000, + }); + insertLlmTaskOutcome({ + modelId: "provider/cheap", + provider: "provider", + unitType: "execute-task", + unitId: "M001/S01/T01", + succeeded: true, + retries: 0, + escalated: false, + verification_passed: true, + blocker_discovered: false, + duration_ms: 800, + tokens_total: 1000, + cost_usd: 0.03, + recorded_at: now, + }); + + const byUnit = getLlmTaskOutcomesByUnit("execute-task", "M001/S01/T01", 10); + assert.equal(byUnit.length, 2); + assert.equal(byUnit[0].model_id, "provider/cheap"); + assert.equal(byUnit[1].model_id, "provider/expensive"); + + const byModel = getLlmTaskOutcomesByModel("provider/expensive", 10); + assert.equal(byModel.length, 1); + assert.equal(byModel[0].cost_usd, 0.42); + + const recent = getRecentLlmTaskOutcomes(1, 10); + assert.equal(recent.length, 2); + + const stats = getLlmTaskOutcomeStats("provider/expensive", 1); + assert.equal(stats.total, 1); + assert.equal(stats.failed, 1); + assert.equal(stats.succeeded, 0); + assert.equal(stats.totalCostUsd, 0.42); + assert.equal(stats.totalTokens, 4000); + assert.equal(stats.avgDurationMs, 1200); +}); + +test("gate_run_stats_when_gate_runs_exist_aggregates_by_gate_and_window", () => { + openDatabase(":memory:"); + const now = new Date().toISOString(); + + for (const outcome of ["pass", "fail", "retry", "manual-attention"]) { + insertGateRun({ + traceId: `trace-${outcome}`, + turnId: `turn-${outcome}`, + gateId: "cost-guard", + gateType: "policy", + outcome, + failureClass: outcome === "pass" ? "none" : "policy", + rationale: outcome, + findings: "", + attempt: 1, + maxAttempts: 1, + retryable: outcome === "retry", + evaluatedAt: now, + }); + } + insertGateRun({ + traceId: "trace-other", + turnId: "turn-other", + gateId: "other-gate", + gateType: "policy", + outcome: "fail", + failureClass: "policy", + rationale: "other", + findings: "", + attempt: 1, + maxAttempts: 1, + retryable: false, + evaluatedAt: now, + }); + + const stats = getGateRunStats("cost-guard", 1); + assert.equal(stats.total, 4); + assert.equal(stats.pass, 1); + assert.equal(stats.fail, 1); + assert.equal(stats.retry, 1); + assert.equal(stats.manualAttention, 1); + assert.equal(stats.lastEvaluatedAt, now); +}); diff --git a/src/resources/extensions/sf/tests/uok-unit-runtime.test.mjs b/src/resources/extensions/sf/tests/uok-unit-runtime.test.mjs new file mode 100644 index 000000000..a20ed1c18 --- /dev/null +++ b/src/resources/extensions/sf/tests/uok-unit-runtime.test.mjs @@ -0,0 +1,292 @@ +import assert from "node:assert/strict"; +import { + mkdirSync, + mkdtempSync, + readdirSync, + rmSync, + writeFileSync, +} from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { afterEach, test } from "vitest"; +import { + clearUnitRuntimeRecord, + decideUnitRuntimeDispatch, + getUnitRuntimeState, + isTerminalUnitRuntimeStatus, + listUnitRuntimeRecords, + readUnitRuntimeRecord, + UNIT_RUNTIME_STATUSES, + UNIT_RUNTIME_TERMINAL_STATUSES, + writeUnitRuntimeRecord, +} from "../uok/unit-runtime.js"; + +const tmpRoots = []; + +afterEach(() => { + for (const dir of tmpRoots.splice(0)) { + rmSync(dir, { recursive: true, force: true }); + } +}); + +function makeProject() { + const root = mkdtempSync(join(tmpdir(), "sf-uok-urt-")); + tmpRoots.push(root); + return root; +} + +// ─── Constants ───────────────────────────────────────────────────────────── + +test("unitRuntimeStatuses_is_exhaustive", () => { + assert.ok(UNIT_RUNTIME_STATUSES.includes("queued")); + assert.ok(UNIT_RUNTIME_STATUSES.includes("completed")); + assert.ok(UNIT_RUNTIME_STATUSES.includes("failed")); + assert.ok(UNIT_RUNTIME_STATUSES.includes("runaway-recovered")); +}); + +test("terminalStatuses_is_subset_of_all_statuses", () => { + for (const s of UNIT_RUNTIME_TERMINAL_STATUSES) { + assert.ok( + UNIT_RUNTIME_STATUSES.includes(s), + `${s} should be in UNIT_RUNTIME_STATUSES`, + ); + } +}); + +// ─── isTerminalUnitRuntimeStatus ─────────────────────────────────────────── + +test("isTerminal_returns_true_for_terminal_statuses", () => { + for (const s of UNIT_RUNTIME_TERMINAL_STATUSES) { + assert.equal( + isTerminalUnitRuntimeStatus(s), + true, + `${s} should be terminal`, + ); + } +}); + +test("isTerminal_returns_false_for_non_terminal_statuses", () => { + const nonTerminal = UNIT_RUNTIME_STATUSES.filter( + (s) => !UNIT_RUNTIME_TERMINAL_STATUSES.includes(s), + ); + for (const s of nonTerminal) { + assert.equal( + isTerminalUnitRuntimeStatus(s), + false, + `${s} should not be terminal`, + ); + } +}); + +// ─── getUnitRuntimeState ─────────────────────────────────────────────────── + +test("getUnitRuntimeState_normalizes_legacy_phase_only_record", () => { + const state = getUnitRuntimeState({ phase: "dispatched" }); + assert.equal(state.status, "running"); +}); + +test("getUnitRuntimeState_prefers_explicit_status_over_phase", () => { + const state = getUnitRuntimeState({ phase: "dispatched", status: "queued" }); + assert.equal(state.status, "queued"); +}); + +test("getUnitRuntimeState_maps_timeout_phase_to_stale", () => { + const state = getUnitRuntimeState({ phase: "timeout" }); + assert.equal(state.status, "stale"); +}); + +test("getUnitRuntimeState_maps_finalized_phase_to_completed", () => { + const state = getUnitRuntimeState({ phase: "finalized" }); + assert.equal(state.status, "completed"); +}); + +test("getUnitRuntimeState_computes_retryCount_from_recoveryAttempts", () => { + const state = getUnitRuntimeState({ recoveryAttempts: 3 }); + assert.equal(state.retryCount, 3); +}); + +// ─── decideUnitRuntimeDispatch ───────────────────────────────────────────── + +test("decide_when_no_record_returns_dispatch", () => { + const d = decideUnitRuntimeDispatch(null); + assert.equal(d.action, "dispatch"); + assert.equal(d.reasonCode, "no-runtime-record"); +}); + +test("decide_when_queued_returns_dispatch", () => { + const d = decideUnitRuntimeDispatch({ status: "queued" }); + assert.equal(d.action, "dispatch"); + assert.equal(d.reasonCode, "queued"); +}); + +test("decide_when_running_returns_skip", () => { + const d = decideUnitRuntimeDispatch({ status: "running" }); + assert.equal(d.action, "skip"); + assert.equal(d.reasonCode, "active-or-claimed"); +}); + +test("decide_when_completed_returns_notify", () => { + const d = decideUnitRuntimeDispatch({ status: "completed" }); + assert.equal(d.action, "notify"); + assert.equal(d.reasonCode, "terminal-ready-to-notify"); +}); + +test("decide_when_failed_with_budget_returns_retry", () => { + const d = decideUnitRuntimeDispatch({ + status: "failed", + retryCount: 0, + maxRetries: 2, + }); + assert.equal(d.action, "retry"); + assert.equal(d.reasonCode, "retry-budget-available"); +}); + +test("decide_when_failed_without_budget_returns_block", () => { + const d = decideUnitRuntimeDispatch({ + status: "failed", + retryCount: 2, + maxRetries: 1, + }); + assert.equal(d.action, "block"); + assert.equal(d.reasonCode, "retry-budget-exhausted"); +}); + +test("decide_when_stale_with_budget_returns_retry", () => { + const d = decideUnitRuntimeDispatch({ + status: "stale", + retryCount: 0, + maxRetries: 1, + }); + assert.equal(d.action, "retry"); +}); + +test("decide_when_blocked_returns_notify", () => { + const d = decideUnitRuntimeDispatch({ status: "blocked" }); + assert.equal(d.action, "notify"); +}); + +test("decide_when_cancelled_returns_notify", () => { + const d = decideUnitRuntimeDispatch({ status: "cancelled" }); + assert.equal(d.action, "notify"); +}); + +test("decide_when_synthetic_failed_returns_block", () => { + const d = decideUnitRuntimeDispatch({ + status: "failed", + unitType: "synthetic", + retryCount: 0, + maxRetries: 3, + }); + assert.equal(d.action, "block"); + assert.equal(d.reasonCode, "synthetic-reset-required"); +}); + +test("decide_when_notified_returns_skip", () => { + const d = decideUnitRuntimeDispatch({ status: "notified" }); + assert.equal(d.action, "skip"); + assert.equal(d.reasonCode, "notified"); +}); + +test("decide_when_notifiedAt_set_returns_skip", () => { + const d = decideUnitRuntimeDispatch({ + status: "completed", + notifiedAt: Date.now(), + }); + assert.equal(d.action, "skip"); + assert.equal(d.reasonCode, "already-notified"); +}); + +test("decide_retryBudgetRemaining_is_accurate", () => { + const d = decideUnitRuntimeDispatch({ + status: "failed", + retryCount: 1, + maxRetries: 3, + }); + assert.equal(d.retryBudgetRemaining, 2); +}); + +// ─── Record IO ───────────────────────────────────────────────────────────── + +test("writeUnitRuntimeRecord_creates_file", () => { + const root = makeProject(); + writeUnitRuntimeRecord(root, "execute-task", "M001/S01/T01", Date.now(), { + status: "running", + }); + const records = listUnitRuntimeRecords(root); + assert.equal(records.length, 1); + assert.equal(records[0].unitType, "execute-task"); + assert.equal(records[0].unitId, "M001/S01/T01"); + assert.equal(records[0].status, "running"); +}); + +test("readUnitRuntimeRecord_returns_cached_record", () => { + const root = makeProject(); + writeUnitRuntimeRecord(root, "execute-task", "M001/S01/T01", Date.now(), { + status: "running", + }); + const record = readUnitRuntimeRecord(root, "execute-task", "M001/S01/T01"); + assert.equal(record.status, "running"); +}); + +test("readUnitRuntimeRecord_returns_null_for_missing", () => { + const root = makeProject(); + const record = readUnitRuntimeRecord(root, "execute-task", "MISSING"); + assert.equal(record, null); +}); + +test("clearUnitRuntimeRecord_removes_file_and_cache", () => { + const root = makeProject(); + writeUnitRuntimeRecord(root, "execute-task", "M001/S01/T01", Date.now(), { + status: "running", + }); + clearUnitRuntimeRecord(root, "execute-task", "M001/S01/T01"); + const record = readUnitRuntimeRecord(root, "execute-task", "M001/S01/T01"); + assert.equal(record, null); + assert.equal(listUnitRuntimeRecords(root).length, 0); +}); + +test("writeUnitRuntimeRecord_merges_updates", () => { + const root = makeProject(); + const t = Date.now(); + writeUnitRuntimeRecord(root, "execute-task", "M001/S01/T01", t, { + status: "running", + progressCount: 1, + }); + writeUnitRuntimeRecord(root, "execute-task", "M001/S01/T01", t, { + status: "progress", + progressCount: 2, + }); + const record = readUnitRuntimeRecord(root, "execute-task", "M001/S01/T01"); + assert.equal(record.status, "progress"); + assert.equal(record.progressCount, 2); +}); + +test("writeUnitRuntimeRecord_sanitizes_path_characters", () => { + const root = makeProject(); + writeUnitRuntimeRecord(root, "exec/task", "M001/S01/T01", Date.now(), { + status: "running", + }); + const dir = join(root, ".sf", "runtime", "units"); + const files = readdirSync(dir); + assert.ok(files.every((f) => !f.includes("/"))); +}); + +// ─── listUnitRuntimeRecords ──────────────────────────────────────────────── + +test("listUnitRuntimeRecords_skips_malformed_files", () => { + const root = makeProject(); + const dir = join(root, ".sf", "runtime", "units"); + mkdirSync(dir, { recursive: true }); + writeFileSync(join(dir, "bad.json"), "not json", "utf-8"); + writeUnitRuntimeRecord(root, "execute-task", "M001/S01/T01", Date.now(), { + status: "running", + }); + const records = listUnitRuntimeRecords(root); + assert.equal(records.length, 1); +}); + +test("listUnitRuntimeRecords_returns_empty_when_dir_missing", () => { + const root = makeProject(); + const records = listUnitRuntimeRecords(root); + assert.deepEqual(records, []); +}); diff --git a/src/resources/extensions/sf/uok/chaos-monkey.js b/src/resources/extensions/sf/uok/chaos-monkey.js index b0cec8a6e..bea8eb193 100644 --- a/src/resources/extensions/sf/uok/chaos-monkey.js +++ b/src/resources/extensions/sf/uok/chaos-monkey.js @@ -1,23 +1,126 @@ /** - * UOK Chaos Monkey + * UOK Chaos Engineering Monkey * - * Designed to stress-test the kernel's durability and "Parity Heartbeat" recovery mechanisms. - * When enabled, it randomly injects fatal process signals during critical lifecycle phases. + * Purpose: safely stress-test the kernel's durability and parity-recovery + * mechanisms by injecting controlled, non-fatal faults during critical + * lifecycle phases. Never kills the process — faults are always recoverable. + * + * Consumer: UOK kernel loop adapter and turn observer hooks (opt-in only). */ + +import { mkdtempSync, rmSync, writeFileSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; + +const DEFAULT_LATENCY_PROBABILITY = 0.05; +const DEFAULT_PARTIAL_FAILURE_PROBABILITY = 0.03; +const DEFAULT_DISK_STRESS_PROBABILITY = 0.01; +const DEFAULT_MEMORY_STRESS_PROBABILITY = 0.005; +const DEFAULT_MAX_LATENCY_MS = 5000; +const DEFAULT_DISK_STRESS_MB = 50; +const DEFAULT_MEMORY_STRESS_MB = 100; + +function randomDelay(ms) { + return new Promise((resolve) => setTimeout(resolve, ms)); +} + +function randomInRange(min, max) { + return min + Math.random() * (max - min); +} + export class ChaosMonkey { - constructor(probability = 0.05) { - this.probability = probability; - this.active = true; + constructor(options = {}) { + this.active = options.active ?? false; + this.latencyProbability = + options.latencyProbability ?? DEFAULT_LATENCY_PROBABILITY; + this.partialFailureProbability = + options.partialFailureProbability ?? DEFAULT_PARTIAL_FAILURE_PROBABILITY; + this.diskStressProbability = + options.diskStressProbability ?? DEFAULT_DISK_STRESS_PROBABILITY; + this.memoryStressProbability = + options.memoryStressProbability ?? DEFAULT_MEMORY_STRESS_PROBABILITY; + this.maxLatencyMs = options.maxLatencyMs ?? DEFAULT_MAX_LATENCY_MS; + this.diskStressMb = options.diskStressMb ?? DEFAULT_DISK_STRESS_MB; + this.memoryStressMb = options.memoryStressMb ?? DEFAULT_MEMORY_STRESS_MB; + this._injected = []; } - strike(phase) { + /** + * Inject a fault during the named lifecycle phase. + * Returns a promise that resolves after any injected latency. + * + * @param {string} phase — e.g. "turn-start", "dispatch", "verification", "gitops" + */ + async strike(phase) { if (!this.active) return; - if (Math.random() < this.probability) { + // 1. Latency injection + if (Math.random() < this.latencyProbability) { + const delay = Math.floor(randomInRange(100, this.maxLatencyMs)); console.error( - `\n[CHAOS MONKEY] Striking during UOK phase: ${phase}. Simulating catastrophic process failure...`, + `[CHAOS MONKEY] Injecting ${delay}ms latency during phase: ${phase}`, ); - process.kill(process.pid, "SIGKILL"); + this._injected.push({ type: "latency", phase, delay }); + await randomDelay(delay); + } + + // 2. Partial failure (non-fatal error throw) + if (Math.random() < this.partialFailureProbability) { + const errors = [ + "Simulated network timeout", + "Simulated EAGAIN retryable I/O error", + "Simulated provider rate-limit (429)", + "Simulated temporary filesystem unavailability", + ]; + const message = errors[Math.floor(Math.random() * errors.length)]; + console.error( + `[CHAOS MONKEY] Injecting partial failure during phase: ${phase}: ${message}`, + ); + this._injected.push({ type: "partial-failure", phase, message }); + const err = new Error(message); + err.code = "CHAOS_INJECTED"; + err.retryable = true; + throw err; + } + + // 3. Disk stress (write large temp file, then clean up) + if (Math.random() < this.diskStressProbability) { + const sizeMb = Math.floor(randomInRange(1, this.diskStressMb)); + console.error( + `[CHAOS MONKEY] Injecting disk stress: writing ${sizeMb}MB temp file during phase: ${phase}`, + ); + this._injected.push({ type: "disk-stress", phase, sizeMb }); + const tmpDir = mkdtempSync(join(tmpdir(), "uok-chaos-")); + const chunk = Buffer.alloc(1024 * 1024, "x"); + for (let i = 0; i < sizeMb; i++) { + writeFileSync(join(tmpDir, `stress-${i}.bin`), chunk); + } + // Immediate cleanup — we only wanted to stress, not leave garbage + rmSync(tmpDir, { recursive: true, force: true }); + } + + // 4. Memory stress (allocate large buffer, then free) + if (Math.random() < this.memoryStressProbability) { + const sizeMb = Math.floor(randomInRange(10, this.memoryStressMb)); + console.error( + `[CHAOS MONKEY] Injecting memory stress: allocating ${sizeMb}MB during phase: ${phase}`, + ); + this._injected.push({ type: "memory-stress", phase, sizeMb }); + // Allocate and immediately release + const buffers = []; + for (let i = 0; i < sizeMb; i++) { + buffers.push(Buffer.alloc(1024 * 1024)); + } + buffers.length = 0; + if (global.gc) global.gc(); } } + + getInjectedEvents() { + return [...this._injected]; + } + + clearEvents() { + this._injected = []; + } } diff --git a/src/resources/extensions/sf/uok/contracts.js b/src/resources/extensions/sf/uok/contracts.js index a5b3668e2..57534277e 100644 --- a/src/resources/extensions/sf/uok/contracts.js +++ b/src/resources/extensions/sf/uok/contracts.js @@ -90,4 +90,30 @@ * @property {number} [lookbackHours] — how far back to query (default 168) */ -export {}; +/** + * Validate that a value conforms to the Gate interface. + * + * Purpose: catch registration-time mistakes (missing id, non-function execute) + * instead of deferring the failure to the first run() call. + * + * Consumer: UokGateRunner.register and gate factory tests. + * + * @param {unknown} value + * @returns {{ valid: true } | { valid: false; reason: string }} + */ +export function validateGate(value) { + if (value == null || typeof value !== "object") { + return { valid: false, reason: "Gate must be an object" }; + } + const gate = /** @type {Record} */ (value); + if (typeof gate.id !== "string" || gate.id.length === 0) { + return { valid: false, reason: "Gate.id must be a non-empty string" }; + } + if (typeof gate.type !== "string" || gate.type.length === 0) { + return { valid: false, reason: "Gate.type must be a non-empty string" }; + } + if (typeof gate.execute !== "function") { + return { valid: false, reason: "Gate.execute must be a function" }; + } + return { valid: true }; +} diff --git a/src/resources/extensions/sf/uok/cost-guard-gate.js b/src/resources/extensions/sf/uok/cost-guard-gate.js index c46757b02..739049cb2 100644 --- a/src/resources/extensions/sf/uok/cost-guard-gate.js +++ b/src/resources/extensions/sf/uok/cost-guard-gate.js @@ -1,36 +1,141 @@ /** * UOK Autonomous Cost-Guard Gate * - * Prevents "money burning" by detecting repeated failures with expensive models. - * If a task fails verification twice with a high-tier model, this gate - * forces a model downgrade or a "sanity check" with a different provider. + * Purpose: prevent runaway spend by enforcing cumulative cost budgets per unit + * and per rolling time window, using actual recorded costs from the LLM task + * outcome ledger and the bundled model cost table. + * + * Consumer: UOK gate-runner during post-unit verification. */ + +import { BUNDLED_COST_TABLE, lookupModelCost } from "../model-cost-table.js"; +import { + getLlmTaskOutcomeStats, + getLlmTaskOutcomesByUnit, + getRecentLlmTaskOutcomes, +} from "../sf-db.js"; + +const DEFAULT_MAX_USD_PER_UNIT = 5.0; +const DEFAULT_MAX_USD_PER_HOUR = 20.0; +const DEFAULT_HIGH_TIER_FAILURE_THRESHOLD = 2; + +function isHighTierModel(modelId) { + const cost = lookupModelCost(modelId); + if (!cost) return false; + // Anything >= $0.005 per 1K input tokens is considered high-tier + return cost.inputPer1k >= 0.005; +} + +function sumCostUsd(outcomes) { + return outcomes.reduce((sum, o) => sum + (o.cost_usd ?? 0), 0); +} + +function countConsecutiveFailures(outcomes) { + let count = 0; + for (const o of outcomes) { + if (!o.succeeded) count++; + else break; + } + return count; +} + export class CostGuardGate { - constructor() { + constructor(options = {}) { this.id = "cost-guard"; this.type = "policy"; + this.maxUsdPerUnit = options.maxUsdPerUnit ?? DEFAULT_MAX_USD_PER_UNIT; + this.maxUsdPerHour = options.maxUsdPerHour ?? DEFAULT_MAX_USD_PER_HOUR; + this.highTierFailureThreshold = + options.highTierFailureThreshold ?? DEFAULT_HIGH_TIER_FAILURE_THRESHOLD; } /** * @param {import("./contracts.js").UokContext} ctx + * @param {number} _attempt */ - async execute(ctx) { - const retryCount = ctx.iteration || 0; - const currentModel = ctx.modelId || "unknown"; + async execute(ctx, _attempt) { + const modelId = ctx.modelId ?? "unknown"; + const unitType = ctx.unitType ?? "unknown"; + const unitId = ctx.unitId ?? "unknown"; - // If we've failed twice with a high-tier model (mock detection) - if (retryCount >= 2 && currentModel.includes("gpt-4")) { + // 1. Check cumulative spend for this specific unit + const unitOutcomes = getLlmTaskOutcomesByUnit(unitType, unitId, 20); + const unitCost = sumCostUsd(unitOutcomes); + if (unitCost >= this.maxUsdPerUnit) { return { outcome: "fail", failureClass: "policy", - rationale: `Cost-Guard blocked ${currentModel}: 2+ consecutive failures. Downgrading to optimize cost.`, - findings: "Recommended: Switch to Haiku or Flash for remediation.", + rationale: `Cost-Guard blocked: unit ${unitId} has consumed $${unitCost.toFixed(2)} USD (threshold $${this.maxUsdPerUnit.toFixed(2)}).`, + findings: `Recorded ${unitOutcomes.length} outcome(s) for this unit. Cumulative cost $${unitCost.toFixed(2)}.`, + recommendation: + "Review unit plan for redundancy, downgrade model, or split into smaller tasks.", }; } + // 2. Check rolling 1-hour window spend across all units + const recentOutcomes = getRecentLlmTaskOutcomes(1, 200); + const hourCost = sumCostUsd(recentOutcomes); + if (hourCost >= this.maxUsdPerHour) { + return { + outcome: "fail", + failureClass: "policy", + rationale: `Cost-Guard blocked: rolling 1-hour spend is $${hourCost.toFixed(2)} USD (threshold $${this.maxUsdPerHour.toFixed(2)}).`, + findings: `Recorded ${recentOutcomes.length} outcome(s) in the last hour. Cumulative cost $${hourCost.toFixed(2)}.`, + recommendation: + "Pause autonomous dispatch until hourly budget resets. Review dispatch frequency or model selection.", + }; + } + + // 3. Check high-tier model failure pattern + if (isHighTierModel(modelId)) { + const modelStats = getLlmTaskOutcomeStats(modelId, 24); + const recentUnitFailures = countConsecutiveFailures(unitOutcomes); + if ( + recentUnitFailures >= this.highTierFailureThreshold || + (modelStats.total >= this.highTierFailureThreshold && + modelStats.failed / modelStats.total >= 0.5) + ) { + const cheaper = this._suggestCheaperAlternative(modelId); + return { + outcome: "fail", + failureClass: "policy", + rationale: `Cost-Guard blocked high-tier model ${modelId}: ${recentUnitFailures} consecutive failure(s) for this unit, or ${modelStats.failed}/${modelStats.total} failures in 24h.`, + findings: `Unit cost $${unitCost.toFixed(2)}. 24h model failure rate ${modelStats.total > 0 ? ((modelStats.failed / modelStats.total) * 100).toFixed(0) : 0}%.`, + recommendation: cheaper + ? `Switch to ${cheaper.id} ($${cheaper.inputPer1k}/1K tokens) to reduce burn rate while debugging.` + : "Downgrade to a cheaper model tier before retrying.", + }; + } + } + return { outcome: "pass", - rationale: "Cost budget and model tier within safe limits.", + rationale: `Cost budget safe: unit $${unitCost.toFixed(2)}/$${this.maxUsdPerUnit.toFixed(2)}, hour $${hourCost.toFixed(2)}/$${this.maxUsdPerHour.toFixed(2)}.`, }; } + + _suggestCheaperAlternative(modelId) { + const current = lookupModelCost(modelId); + if (!current) return null; + const providerPrefix = modelId.includes("/") + ? modelId.split("/")[0].toLowerCase() + : undefined; + const bareId = modelId.includes("/") ? modelId.split("/").pop() : modelId; + const familyPrefix = bareId.split(/[-.]/)[0]?.toLowerCase(); + const candidates = BUNDLED_COST_TABLE.filter((candidate) => { + if (candidate.id === current.id) return false; + if (candidate.inputPer1k >= current.inputPer1k * 0.5) return false; + if ( + providerPrefix && + candidate.id.toLowerCase().startsWith(providerPrefix) + ) { + return true; + } + return ( + familyPrefix !== undefined && + candidate.id.toLowerCase().startsWith(familyPrefix) + ); + }).sort((a, b) => a.inputPer1k - b.inputPer1k); + return candidates[0] ?? null; + } } diff --git a/src/resources/extensions/sf/uok/gate-runner.js b/src/resources/extensions/sf/uok/gate-runner.js index 92aabbc99..a9056161a 100644 --- a/src/resources/extensions/sf/uok/gate-runner.js +++ b/src/resources/extensions/sf/uok/gate-runner.js @@ -1,5 +1,10 @@ -import { insertGateRun } from "../sf-db.js"; +import { + getGateCircuitBreaker, + insertGateRun, + updateGateCircuitBreaker, +} from "../sf-db.js"; import { buildAuditEnvelope, emitUokAuditEvent } from "./audit.js"; +import { validateGate } from "./contracts.js"; const RETRY_MATRIX = { none: 0, @@ -14,18 +19,109 @@ const RETRY_MATRIX = { "manual-attention": 0, unknown: 0, }; + +const CIRCUIT_BREAKER_FAILURE_THRESHOLD = 5; +const CIRCUIT_BREAKER_OPEN_DURATION_MS = 60_000; +const CIRCUIT_BREAKER_HALF_OPEN_MAX_ATTEMPTS = 3; + +function nowIso() { + return new Date().toISOString(); +} + export class UokGateRunner { registry = new Map(); register(gate) { + const validation = validateGate(gate); + if (!validation.valid) { + throw new Error(`UokGateRunner.register: ${validation.reason}`); + } this.registry.set(gate.id, gate); } list() { return Array.from(this.registry.values()); } + + _checkCircuitBreaker(gateId) { + const breaker = getGateCircuitBreaker(gateId); + if (breaker.state === "open") { + const openedAt = breaker.openedAt ? Date.parse(breaker.openedAt) : 0; + if (Date.now() - openedAt >= CIRCUIT_BREAKER_OPEN_DURATION_MS) { + // Transition to half-open automatically after cooldown + updateGateCircuitBreaker(gateId, { + state: "half-open", + failureStreak: breaker.failureStreak, + halfOpenAttempts: 0, + }); + return { blocked: false }; + } + return { + blocked: true, + reason: `Circuit breaker OPEN for ${gateId} (failure streak ${breaker.failureStreak}). Cooldown until ${new Date(openedAt + CIRCUIT_BREAKER_OPEN_DURATION_MS).toISOString()}.`, + }; + } + if (breaker.state === "half-open") { + if (breaker.halfOpenAttempts >= CIRCUIT_BREAKER_HALF_OPEN_MAX_ATTEMPTS) { + // Too many half-open attempts without success — go back to open + updateGateCircuitBreaker(gateId, { + state: "open", + failureStreak: breaker.failureStreak, + openedAt: nowIso(), + halfOpenAttempts: 0, + }); + return { + blocked: true, + reason: `Circuit breaker half-open limit exceeded for ${gateId}; reopening.`, + }; + } + } + return { blocked: false }; + } + + _updateCircuitBreaker(gateId, succeeded) { + const breaker = getGateCircuitBreaker(gateId); + if (succeeded) { + updateGateCircuitBreaker(gateId, { + state: "closed", + failureStreak: 0, + lastFailureAt: null, + openedAt: null, + halfOpenAttempts: 0, + }); + return; + } + const nextStreak = breaker.failureStreak + 1; + if (breaker.state === "half-open") { + updateGateCircuitBreaker(gateId, { + state: "open", + failureStreak: nextStreak, + lastFailureAt: nowIso(), + openedAt: nowIso(), + halfOpenAttempts: 0, + }); + return; + } + if (nextStreak >= CIRCUIT_BREAKER_FAILURE_THRESHOLD) { + updateGateCircuitBreaker(gateId, { + state: "open", + failureStreak: nextStreak, + lastFailureAt: nowIso(), + openedAt: nowIso(), + halfOpenAttempts: 0, + }); + return; + } + updateGateCircuitBreaker(gateId, { + state: "closed", + failureStreak: nextStreak, + lastFailureAt: nowIso(), + halfOpenAttempts: 0, + }); + } + async run(id, ctx) { const gate = this.registry.get(id); if (!gate) { - const now = new Date().toISOString(); + const now = nowIso(); const unknownResult = { gateId: id, gateType: "unknown", @@ -55,6 +151,7 @@ export class UokGateRunner { maxAttempts: unknownResult.maxAttempts, retryable: unknownResult.retryable, evaluatedAt: unknownResult.evaluatedAt, + durationMs: 0, }); emitUokAuditEvent( ctx.basePath, @@ -76,13 +173,81 @@ export class UokGateRunner { ); return unknownResult; } + + // Circuit breaker check + const cbCheck = this._checkCircuitBreaker(id); + if (cbCheck.blocked) { + const now = nowIso(); + const cbResult = { + gateId: id, + gateType: gate.type, + outcome: "fail", + failureClass: "policy", + rationale: cbCheck.reason, + attempt: 1, + maxAttempts: 1, + retryable: false, + evaluatedAt: now, + }; + insertGateRun({ + traceId: ctx.traceId, + turnId: ctx.turnId, + gateId: cbResult.gateId, + gateType: cbResult.gateType, + unitType: ctx.unitType, + unitId: ctx.unitId, + milestoneId: ctx.milestoneId, + sliceId: ctx.sliceId, + taskId: ctx.taskId, + outcome: cbResult.outcome, + failureClass: cbResult.failureClass, + rationale: cbResult.rationale, + findings: cbResult.findings, + attempt: cbResult.attempt, + maxAttempts: cbResult.maxAttempts, + retryable: cbResult.retryable, + evaluatedAt: cbResult.evaluatedAt, + durationMs: 0, + }); + emitUokAuditEvent( + ctx.basePath, + buildAuditEnvelope({ + traceId: ctx.traceId, + turnId: ctx.turnId, + category: "gate", + type: "gate-run", + payload: { + gateId: cbResult.gateId, + gateType: cbResult.gateType, + outcome: cbResult.outcome, + failureClass: cbResult.failureClass, + attempt: cbResult.attempt, + maxAttempts: cbResult.maxAttempts, + retryable: cbResult.retryable, + }, + }), + ); + return cbResult; + } + + // Half-open tracking + const breaker = getGateCircuitBreaker(id); + if (breaker.state === "half-open") { + updateGateCircuitBreaker(id, { + state: "half-open", + failureStreak: breaker.failureStreak, + halfOpenAttempts: breaker.halfOpenAttempts + 1, + }); + } + let attempt = 0; let final = null; const maxAttemptsByFailureClass = RETRY_MATRIX; const maxAttemptsCeiling = Math.max(...Object.values(RETRY_MATRIX)) + 1; while (attempt < maxAttemptsCeiling) { attempt += 1; - const now = new Date().toISOString(); + const now = nowIso(); + const startedAt = Date.now(); let result; try { result = await gate.execute(ctx, attempt); @@ -94,6 +259,7 @@ export class UokGateRunner { rationale: message, }; } + const durationMs = Date.now() - startedAt; const failureClass = result.failureClass ?? (result.outcome === "pass" ? "none" : "unknown"); const retryBudget = maxAttemptsByFailureClass[failureClass] ?? 0; @@ -128,6 +294,7 @@ export class UokGateRunner { maxAttempts: final.maxAttempts, retryable: final.retryable, evaluatedAt: final.evaluatedAt, + durationMs, }); emitUokAuditEvent( ctx.basePath, @@ -144,11 +311,17 @@ export class UokGateRunner { attempt: final.attempt, maxAttempts: final.maxAttempts, retryable: final.retryable, + durationMs, }, }), ); if (!retryable) break; } + + // Update circuit breaker based on final outcome + const succeeded = final?.outcome === "pass"; + this._updateCircuitBreaker(id, succeeded); + return ( final ?? { gateId: gate.id, @@ -158,7 +331,7 @@ export class UokGateRunner { attempt: 1, maxAttempts: 1, retryable: false, - evaluatedAt: new Date().toISOString(), + evaluatedAt: nowIso(), } ); } diff --git a/src/resources/extensions/sf/uok/message-bus.js b/src/resources/extensions/sf/uok/message-bus.js index f0789a78d..5d648781d 100644 --- a/src/resources/extensions/sf/uok/message-bus.js +++ b/src/resources/extensions/sf/uok/message-bus.js @@ -1,62 +1,228 @@ /** - * UOK Message Bus & Agent Inbox + * UOK Durable Message Bus & Agent Inbox * - * Implements Letta-style inter-agent communication. - * Allows agents to send messages, check inboxes, and wait for replies - * across turn boundaries. + * Purpose: implement Letta-style inter-agent communication with at-least-once + * delivery guarantees via append-only JSONL. Messages survive process restarts + * and are retained with configurable TTL. + * + * Consumer: multi-agent orchestration, cross-turn coordination, and UOK kernel + * observer chains. */ +import { randomUUID } from "node:crypto"; +import { + appendFileSync, + existsSync, + mkdirSync, + readFileSync, + writeFileSync, +} from "node:fs"; +import { join } from "node:path"; +import { sfRoot } from "../paths.js"; + +const DEFAULT_RETENTION_DAYS = 7; +const DEFAULT_MAX_INBOX_SIZE = 1000; + +function messagesPath(basePath) { + return join(sfRoot(basePath), "runtime", "uok-messages.jsonl"); +} + +function inboxStatePath(basePath, agentId) { + const sanitized = agentId.replace(/[^a-zA-Z0-9_-]/g, "_"); + return join(sfRoot(basePath), "runtime", `uok-inbox-${sanitized}.json`); +} + +function loadMessages(basePath) { + const path = messagesPath(basePath); + if (!existsSync(path)) return []; + const lines = readFileSync(path, "utf-8").split(/\r?\n/).filter(Boolean); + const results = []; + for (const line of lines) { + try { + results.push(JSON.parse(line)); + } catch { + // Skip malformed lines + } + } + return results; +} + +function appendMessage(basePath, message) { + const path = messagesPath(basePath); + const dir = join(sfRoot(basePath), "runtime"); + mkdirSync(dir, { recursive: true }); + appendFileSync(path, `${JSON.stringify(message)}\n`, "utf-8"); +} + +function loadInboxState(basePath, agentId) { + const path = inboxStatePath(basePath, agentId); + if (!existsSync(path)) return { readIds: [] }; + try { + return JSON.parse(readFileSync(path, "utf-8")); + } catch { + return { readIds: [] }; + } +} + +function saveInboxState(basePath, agentId, state) { + const path = inboxStatePath(basePath, agentId); + const dir = join(sfRoot(basePath), "runtime"); + mkdirSync(dir, { recursive: true }); + writeFileSync(path, `${JSON.stringify(state)}\n`, "utf-8"); +} + +function pruneOldMessages(messages, retentionDays) { + const cutoff = Date.now() - retentionDays * 24 * 60 * 60 * 1000; + return messages.filter((m) => { + const ts = m.sentAt ? Date.parse(m.sentAt) : 0; + return ts >= cutoff; + }); +} + export class AgentInbox { - constructor(agentId, basePath) { + constructor(agentId, basePath, options = {}) { this.agentId = agentId; this.basePath = basePath; - this.messages = []; + this.maxSize = options.maxInboxSize ?? DEFAULT_MAX_INBOX_SIZE; + this.retentionDays = options.retentionDays ?? DEFAULT_RETENTION_DAYS; + this._state = loadInboxState(basePath, agentId); + this._messages = this._hydrate(); + } + + _hydrate() { + const all = loadMessages(this.basePath); + const readIds = new Set(this._state.readIds ?? []); + const forMe = all + .filter((m) => m.to === this.agentId) + .map((m) => ({ + ...m, + read: readIds.has(m.id), + })); + return pruneOldMessages(forMe, this.retentionDays).slice(-this.maxSize); } receive(message) { - this.messages.push({ + const enriched = { ...message, receivedAt: new Date().toISOString(), read: false, - }); + }; + this._messages.push(enriched); + if (this._messages.length > this.maxSize) { + this._messages = this._messages.slice(-this.maxSize); + } + return enriched; } list(unreadOnly = false) { - return unreadOnly ? this.messages.filter((m) => !m.read) : this.messages; + return unreadOnly + ? this._messages.filter((m) => !m.read) + : [...this._messages]; } markRead(messageId) { - const msg = this.messages.find((m) => m.id === messageId); - if (msg) msg.read = true; + const msg = this._messages.find((m) => m.id === messageId); + if (msg) { + msg.read = true; + if (!this._state.readIds.includes(messageId)) { + this._state.readIds.push(messageId); + saveInboxState(this.basePath, this.agentId, this._state); + } + } + return !!msg; + } + + get unreadCount() { + return this._messages.filter((m) => !m.read).length; + } + + refresh() { + this._messages = this._hydrate(); } } export class MessageBus { - constructor(basePath) { + constructor(basePath, options = {}) { this.basePath = basePath; + this.retentionDays = options.retentionDays ?? DEFAULT_RETENTION_DAYS; + this.maxInboxSize = options.maxInboxSize ?? DEFAULT_MAX_INBOX_SIZE; this.inboxes = new Map(); + this._messages = loadMessages(basePath); } - getOrCreateInbox(agentId) { + _getOrCreateInbox(agentId) { if (!this.inboxes.has(agentId)) { - this.inboxes.set(agentId, new AgentInbox(agentId, this.basePath)); + this.inboxes.set( + agentId, + new AgentInbox(agentId, this.basePath, { + retentionDays: this.retentionDays, + maxInboxSize: this.maxInboxSize, + }), + ); } return this.inboxes.get(agentId); } send(from, to, body, metadata = {}) { const message = { - id: `msg-${Date.now()}-${Math.random().toString(36).slice(2, 7)}`, + id: `msg-${randomUUID()}`, from, to, body, metadata, sentAt: new Date().toISOString(), + deliveredAt: new Date().toISOString(), }; - const targetInbox = this.getOrCreateInbox(to); - targetInbox.receive(message); + appendMessage(this.basePath, message); + this._messages.push(message); + + const targetInbox = this._getOrCreateInbox(to); + const alreadyHas = targetInbox.list().some((m) => m.id === message.id); + if (!alreadyHas) { + targetInbox.receive(message); + } return message.id; } + + broadcast(from, recipients, body, metadata = {}) { + const ids = []; + for (const to of recipients) { + ids.push(this.send(from, to, body, metadata)); + } + return ids; + } + + getInbox(agentId) { + return this._getOrCreateInbox(agentId); + } + + getConversation(agentA, agentB) { + const all = loadMessages(this.basePath); + return pruneOldMessages( + all.filter( + (m) => + (m.from === agentA && m.to === agentB) || + (m.from === agentB && m.to === agentA), + ), + this.retentionDays, + ); + } + + compact() { + const path = messagesPath(this.basePath); + const all = loadMessages(this.basePath); + const pruned = pruneOldMessages(all, this.retentionDays); + const dir = join(sfRoot(this.basePath), "runtime"); + mkdirSync(dir, { recursive: true }); + writeFileSync( + path, + pruned.map((m) => JSON.stringify(m)).join("\n") + + (pruned.length ? "\n" : ""), + "utf-8", + ); + this._messages = pruned; + return { before: all.length, after: pruned.length }; + } } diff --git a/src/resources/extensions/sf/uok/metrics-exposition.js b/src/resources/extensions/sf/uok/metrics-exposition.js new file mode 100644 index 000000000..a9ec78d4c --- /dev/null +++ b/src/resources/extensions/sf/uok/metrics-exposition.js @@ -0,0 +1,121 @@ +/** + * UOK Prometheus-style Metrics Exposition + * + * Purpose: expose gate-run counters, circuit breaker states, and latency + * histograms in Prometheus text format so external collectors (or a local + * dashboard) can scrape them without needing SQLite access. + * + * Consumer: health widgets, /sf uok status, and external monitoring. + */ + +import { existsSync, mkdirSync, writeFileSync } from "node:fs"; +import { join } from "node:path"; +import { sfRoot } from "../paths.js"; +import { + getGateCircuitBreaker, + getGateLatencyStats, + getGateRunStats, + isDbAvailable, +} from "../sf-db.js"; + +const GATE_NAMES = [ + "security-guard", + "cost-guard", + "outcome-learning", + "multi-package-healing", +]; + +function fmtCounter(name, value, labels = {}) { + const labelStr = Object.entries(labels) + .map(([k, v]) => `${k}="${v}"`) + .join(","); + const suffix = labelStr ? `{${labelStr}}` : ""; + return `${name}${suffix} ${value}`; +} + +function fmtGauge(name, value, labels = {}) { + return fmtCounter(name, value, labels); +} + +function collectGateMetrics() { + const lines = []; + for (const gateId of GATE_NAMES) { + const stats = getGateRunStats(gateId, 24); + lines.push(fmtCounter("uok_gate_runs_total", stats.total, { gate_id: gateId })); + lines.push(fmtCounter("uok_gate_runs_passed_total", stats.pass, { gate_id: gateId })); + lines.push(fmtCounter("uok_gate_runs_failed_total", stats.fail, { gate_id: gateId })); + lines.push(fmtCounter("uok_gate_runs_retry_total", stats.retry, { gate_id: gateId })); + + const latency = getGateLatencyStats(gateId, 24); + lines.push(fmtGauge("uok_gate_latency_avg_ms", latency.avgMs, { gate_id: gateId })); + lines.push(fmtGauge("uok_gate_latency_p50_ms", latency.p50Ms, { gate_id: gateId })); + lines.push(fmtGauge("uok_gate_latency_p95_ms", latency.p95Ms, { gate_id: gateId })); + lines.push(fmtGauge("uok_gate_latency_max_ms", latency.maxMs, { gate_id: gateId })); + + const breaker = getGateCircuitBreaker(gateId); + const stateMap = { closed: 0, "half-open": 1, open: 2 }; + lines.push( + fmtGauge("uok_gate_circuit_breaker_state", stateMap[breaker.state] ?? 0, { + gate_id: gateId, + state: breaker.state, + }), + ); + lines.push( + fmtGauge("uok_gate_circuit_breaker_failure_streak", breaker.failureStreak, { + gate_id: gateId, + }), + ); + } + return lines; +} + +function buildMetricsText() { + const lines = [ + "# HELP uok_gate_runs_total Total gate runs in the last 24h", + "# TYPE uok_gate_runs_total counter", + "# HELP uok_gate_runs_passed_total Passed gate runs in the last 24h", + "# TYPE uok_gate_runs_passed_total counter", + "# HELP uok_gate_runs_failed_total Failed gate runs in the last 24h", + "# TYPE uok_gate_runs_failed_total counter", + "# HELP uok_gate_runs_retry_total Retried gate runs in the last 24h", + "# TYPE uok_gate_runs_retry_total counter", + "# HELP uok_gate_latency_avg_ms Average gate execution latency in ms", + "# TYPE uok_gate_latency_avg_ms gauge", + "# HELP uok_gate_latency_p50_ms P50 gate execution latency in ms", + "# TYPE uok_gate_latency_p50_ms gauge", + "# HELP uok_gate_latency_p95_ms P95 gate execution latency in ms", + "# TYPE uok_gate_latency_p95_ms gauge", + "# HELP uok_gate_latency_max_ms Max gate execution latency in ms", + "# TYPE uok_gate_latency_max_ms gauge", + "# HELP uok_gate_circuit_breaker_state Circuit breaker state (0=closed, 1=half-open, 2=open)", + "# TYPE uok_gate_circuit_breaker_state gauge", + "# HELP uok_gate_circuit_breaker_failure_streak Consecutive failures since last pass", + "# TYPE uok_gate_circuit_breaker_failure_streak gauge", + ]; + if (isDbAvailable()) { + lines.push(...collectGateMetrics()); + } + return lines.join("\n") + "\n"; +} + +export function metricsPath(basePath) { + return join(sfRoot(basePath), "runtime", "uok-metrics.prom"); +} + +export function writeUokMetrics(basePath) { + const path = metricsPath(basePath); + const dir = join(sfRoot(basePath), "runtime"); + mkdirSync(dir, { recursive: true }); + writeFileSync(path, buildMetricsText(), "utf-8"); + return path; +} + +export function readUokMetrics(basePath) { + const path = metricsPath(basePath); + if (!existsSync(path)) return null; + try { + return buildMetricsText(); + } catch { + return null; + } +} diff --git a/src/resources/extensions/sf/uok/multi-package-gate.js b/src/resources/extensions/sf/uok/multi-package-gate.js index c16a48325..b08fbc02c 100644 --- a/src/resources/extensions/sf/uok/multi-package-gate.js +++ b/src/resources/extensions/sf/uok/multi-package-gate.js @@ -1,16 +1,184 @@ -import { execFileSync } from "node:child_process"; - /** * UOK Multi-Package Healing Gate * - * Automatically detects if code changes impact monorepo packages. - * If so, it dispatches cross-package verification commands (e.g., typecheck) - * to ensure downstream dependencies remain intact and no regressions are introduced. + * Purpose: detect when code changes impact monorepo packages, build a + * workspace dependency graph, and run verification only on affected packages + * and their downstream dependents. Prevents full-workspace typecheck bottlenecks. + * + * Consumer: UOK gate-runner during post-unit verification. */ + +import { execFile } from "node:child_process"; +import { existsSync, readdirSync, readFileSync } from "node:fs"; +import { join } from "node:path"; + +const DEFAULT_TIMEOUT_MS = 120_000; +const DEFAULT_ADDITIONAL_CHECKS = ["typecheck"]; + +function execFilePromise(file, args, options) { + return new Promise((resolve, reject) => { + const child = execFile(file, args, options, (error, stdout, stderr) => { + if (error) { + reject(Object.assign(error, { stdout, stderr })); + } else { + resolve({ stdout, stderr }); + } + }); + if (options?.timeout && options.timeout > 0) { + const t = setTimeout(() => { + child.kill(); + reject( + Object.assign(new Error("command timed out"), { + stdout: "", + stderr: "", + }), + ); + }, options.timeout); + child.on("exit", () => clearTimeout(t)); + child.on("error", () => clearTimeout(t)); + } + }); +} + +function readJsonSafe(path) { + if (!existsSync(path)) return null; + try { + return JSON.parse(readFileSync(path, "utf-8")); + } catch { + return null; + } +} + +function discoverWorkspacePackages(basePath) { + const rootPkg = readJsonSafe(join(basePath, "package.json")); + if (!rootPkg) return []; + + const packages = []; + // npm workspaces + const workspaces = rootPkg.workspaces; + if (Array.isArray(workspaces)) { + for (const pattern of workspaces) { + // Simple glob-like expansion: packages/* + if (pattern.endsWith("/*")) { + const dir = pattern.slice(0, -2); + // We can't list directories easily without fs.readdir, but we can + // discover packages by looking for package.json files + // For now, rely on the workspace pattern being directories + const baseDir = join(basePath, dir); + if (existsSync(baseDir)) { + for (const entry of readdirSync(baseDir, { withFileTypes: true })) { + if (entry.isDirectory()) { + const pkgPath = join(baseDir, entry.name, "package.json"); + const pkg = readJsonSafe(pkgPath); + if (pkg) { + packages.push({ + name: pkg.name, + path: join(dir, entry.name), + dependencies: Object.keys(pkg.dependencies ?? {}), + devDependencies: Object.keys(pkg.devDependencies ?? {}), + }); + } + } + } + } + } else { + const pkgPath = join(basePath, pattern, "package.json"); + const pkg = readJsonSafe(pkgPath); + if (pkg) { + packages.push({ + name: pkg.name, + path: pattern, + dependencies: Object.keys(pkg.dependencies ?? {}), + devDependencies: Object.keys(pkg.devDependencies ?? {}), + }); + } + } + } + } + + // pnpm workspaces + const pnpmWorkspace = readJsonSafe(join(basePath, "pnpm-workspace.yaml")); + if (pnpmWorkspace && Array.isArray(pnpmWorkspace.packages)) { + for (const pattern of pnpmWorkspace.packages) { + if (pattern.endsWith("/*")) { + const dir = pattern.slice(0, -2); + const baseDir = join(basePath, dir); + if (existsSync(baseDir)) { + for (const entry of readdirSync(baseDir, { withFileTypes: true })) { + if (entry.isDirectory()) { + const pkgPath = join(baseDir, entry.name, "package.json"); + const pkg = readJsonSafe(pkgPath); + if (pkg) { + packages.push({ + name: pkg.name, + path: join(dir, entry.name), + dependencies: Object.keys(pkg.dependencies ?? {}), + devDependencies: Object.keys(pkg.devDependencies ?? {}), + }); + } + } + } + } + } + } + } + + return packages; +} + +function buildDependencyGraph(packages) { + const graph = new Map(); + const nameToPackage = new Map(); + for (const pkg of packages) { + nameToPackage.set(pkg.name, pkg); + graph.set(pkg.name, new Set()); + } + for (const pkg of packages) { + const deps = [...pkg.dependencies, ...pkg.devDependencies]; + for (const dep of deps) { + if (nameToPackage.has(dep)) { + graph.get(dep).add(pkg.name); + } + } + } + return { graph, nameToPackage }; +} + +function getAffectedPackages(changedFiles, packages) { + const affected = new Set(); + for (const file of changedFiles) { + for (const pkg of packages) { + const prefix = pkg.path + "/"; + if (file.startsWith(prefix) || file === pkg.path) { + affected.add(pkg.name); + } + } + } + return affected; +} + +function getDownstreamDependents(graph, affected) { + const result = new Set(affected); + const queue = [...affected]; + while (queue.length > 0) { + const current = queue.shift(); + for (const dependent of graph.get(current) ?? []) { + if (!result.has(dependent)) { + result.add(dependent); + queue.push(dependent); + } + } + } + return result; +} + export class MultiPackageGate { - constructor() { + constructor(options = {}) { this.id = "multi-package-healing"; this.type = "verification"; + this.timeoutMs = options.timeoutMs ?? DEFAULT_TIMEOUT_MS; + this.additionalChecks = + options.additionalChecks ?? DEFAULT_ADDITIONAL_CHECKS; } /** @@ -18,50 +186,95 @@ export class MultiPackageGate { */ async execute(ctx) { try { - // Find changed files - const diffOutput = execFileSync("git", ["diff", "--name-only", "HEAD"], { - cwd: ctx.basePath, - stdio: ["ignore", "pipe", "pipe"], - encoding: "utf-8", - }); - - const changedFiles = diffOutput.split("\n").filter(Boolean); - - // Determine if a core package was changed - const impactsPackages = changedFiles.some((f) => - f.startsWith("packages/"), - ); - - if (!impactsPackages) { + // 1. Discover workspace packages + const packages = discoverWorkspacePackages(ctx.basePath); + if (packages.length === 0) { return { outcome: "pass", rationale: - "No cross-package verification needed (changes do not impact 'packages/').", + "No workspace packages detected; skipping multi-package verification.", }; } - // Run workspace-wide typecheck to ensure downstream packages are healthy - execFileSync("npm", ["run", "typecheck"], { - cwd: ctx.basePath, - stdio: ["ignore", "pipe", "pipe"], - encoding: "utf-8", - }); + // 2. Find changed files + const diffOutput = ( + await execFilePromise("git", ["diff", "--name-only", "HEAD"], { + cwd: ctx.basePath, + stdio: ["ignore", "pipe", "pipe"], + encoding: "utf-8", + timeout: 10_000, + }) + ).stdout; + const changedFiles = diffOutput.split("\n").filter(Boolean); + if (changedFiles.length === 0) { + return { + outcome: "pass", + rationale: + "No uncommitted changes; skipping multi-package verification.", + }; + } + + // 3. Determine affected packages + const affected = getAffectedPackages(changedFiles, packages); + if (affected.size === 0) { + return { + outcome: "pass", + rationale: "Changes do not impact any workspace packages.", + }; + } + + // 4. Build dependency graph and find downstream dependents + const { graph } = buildDependencyGraph(packages); + const toVerify = getDownstreamDependents(graph, affected); + + // 5. Run targeted checks + const failures = []; + for (const check of this.additionalChecks) { + for (const pkgName of toVerify) { + const pkg = packages.find((p) => p.name === pkgName); + if (!pkg) continue; + const pkgPath = join(ctx.basePath, pkg.path); + if (!existsSync(join(pkgPath, "package.json"))) continue; + + try { + await execFilePromise("npm", ["run", check, "--if-present"], { + cwd: pkgPath, + stdio: ["ignore", "pipe", "pipe"], + encoding: "utf-8", + timeout: this.timeoutMs, + }); + } catch (err) { + const output = `${err.stdout ?? ""}${err.stderr ?? ""}`.trim(); + failures.push({ + pkg: pkgName, + check, + output: output || String(err.message || err), + }); + } + } + } + + if (failures.length > 0) { + return { + outcome: "fail", + failureClass: "verification", + rationale: `Multi-Package regression detected in ${failures.length} package/check combination(s).`, + findings: failures + .map((f) => `${f.pkg} (${f.check}): ${f.output.slice(0, 500)}`) + .join("\n---\n"), + }; + } return { outcome: "pass", - rationale: - "Multi-Package Healing confirmed no regressions in downstream packages.", + rationale: `Verified ${toVerify.size} affected package(s) including downstream dependents: ${[...toVerify].join(", ")}.`, }; } catch (err) { - const stdout = err.stdout ? String(err.stdout) : ""; - const stderr = err.stderr ? String(err.stderr) : ""; - const output = (stdout + stderr).trim(); - + const output = `${err.stdout ?? ""}${err.stderr ?? ""}`.trim(); return { outcome: "fail", failureClass: "verification", - rationale: - "Multi-Package regression detected (downstream typecheck failed).", + rationale: "Multi-Package verification infrastructure failed.", findings: output || String(err), }; } diff --git a/src/resources/extensions/sf/uok/outcome-learning-gate.js b/src/resources/extensions/sf/uok/outcome-learning-gate.js index 38600d220..b0c1e4a37 100644 --- a/src/resources/extensions/sf/uok/outcome-learning-gate.js +++ b/src/resources/extensions/sf/uok/outcome-learning-gate.js @@ -1,26 +1,176 @@ /** * UOK Outcome Learning Gate * - * Interacts with the local learning/ database to record successes and failures. - * Over time, this allows the UOK to autonomously avoid patterns that led to - * previous regressions. + * Purpose: query the local LLM task outcome ledger to detect failure patterns + * (model-specific, unit-type-specific, provider-specific) and autonomously + * recommend model or strategy changes before dispatching the next unit. + * + * Consumer: UOK gate-runner during post-unit verification and pre-dispatch + * advisory. */ + +import { compareModelCost } from "../model-cost-table.js"; +import { + getLlmTaskOutcomeStats, + getLlmTaskOutcomesByUnit, + getRecentLlmTaskOutcomes, +} from "../sf-db.js"; + +const DEFAULT_MIN_SAMPLE_SIZE = 3; +const DEFAULT_FAILURE_RATE_THRESHOLD = 0.6; +const DEFAULT_LOOKBACK_HOURS = 168; // 1 week + +function computeFailureRate(outcomes) { + if (outcomes.length === 0) return 0; + const failed = outcomes.filter((o) => !o.succeeded).length; + return failed / outcomes.length; +} + +function groupByModel(outcomes) { + const groups = new Map(); + for (const o of outcomes) { + const list = groups.get(o.model_id) ?? []; + list.push(o); + groups.set(o.model_id, list); + } + return groups; +} + +function groupByUnitType(outcomes) { + const groups = new Map(); + for (const o of outcomes) { + const list = groups.get(o.unit_type) ?? []; + list.push(o); + groups.set(o.unit_type, list); + } + return groups; +} + export class OutcomeLearningGate { - constructor() { + constructor(options = {}) { this.id = "outcome-learning"; this.type = "learning"; + this.minSampleSize = options.minSampleSize ?? DEFAULT_MIN_SAMPLE_SIZE; + this.failureRateThreshold = + options.failureRateThreshold ?? DEFAULT_FAILURE_RATE_THRESHOLD; + this.lookbackHours = options.lookbackHours ?? DEFAULT_LOOKBACK_HOURS; } /** * @param {import("./contracts.js").UokContext} ctx + * @param {number} _attempt */ - async execute(_ctx) { - // Mock interaction with outcome-recorder.mjs - // In a full implementation, this would query/write to the SQLite DB + async execute(ctx, _attempt) { + const unitType = ctx.unitType ?? "unknown"; + const unitId = ctx.unitId ?? "unknown"; + const modelId = ctx.modelId ?? "unknown"; + + const findings = []; + const recommendations = []; + + // 1. Unit-specific pattern + const unitOutcomes = getLlmTaskOutcomesByUnit(unitType, unitId, 20); + if (unitOutcomes.length >= this.minSampleSize) { + const unitRate = computeFailureRate(unitOutcomes); + if (unitRate >= this.failureRateThreshold) { + findings.push( + `Unit ${unitId} failure rate ${(unitRate * 100).toFixed(0)}% over ${unitOutcomes.length} attempt(s).`, + ); + recommendations.push( + "Consider splitting this unit into smaller tasks or changing approach.", + ); + } + } + + // 2. Model-specific pattern (rolling window) + const modelStats = getLlmTaskOutcomeStats(modelId, 24); + if (modelStats.total >= this.minSampleSize) { + const modelRate = modelStats.failed / modelStats.total; + if (modelRate >= this.failureRateThreshold) { + findings.push( + `Model ${modelId} failure rate ${(modelRate * 100).toFixed(0)}% over ${modelStats.total} attempt(s) in 24h.`, + ); + const alternatives = this._findBetterModels(modelId); + if (alternatives.length > 0) { + recommendations.push( + `Switch to a more reliable model: ${alternatives.slice(0, 3).join(", ")}.`, + ); + } else { + recommendations.push( + "Model shows poor reliability; consider provider change or human review.", + ); + } + } + } + + // 3. System-wide pattern detection (lookback window) + const recent = getRecentLlmTaskOutcomes(this.lookbackHours, 200); + if (recent.length >= this.minSampleSize) { + const byUnitType = groupByUnitType(recent); + for (const [ut, outcomes] of byUnitType) { + if (outcomes.length < this.minSampleSize) continue; + const rate = computeFailureRate(outcomes); + if (rate >= this.failureRateThreshold) { + findings.push( + `Unit type '${ut}' failure rate ${(rate * 100).toFixed(0)}% over ${outcomes.length} attempt(s) in last ${this.lookbackHours}h.`, + ); + } + } + + const byModel = groupByModel(recent); + for (const [mid, outcomes] of byModel) { + if (outcomes.length < this.minSampleSize) continue; + const rate = computeFailureRate(outcomes); + if (rate >= this.failureRateThreshold) { + findings.push( + `Model '${mid}' failure rate ${(rate * 100).toFixed(0)}% over ${outcomes.length} attempt(s) in last ${this.lookbackHours}h.`, + ); + } + } + } + + // 4. Escalation / blocker pattern + const escalatedCount = recent.filter((o) => o.escalated).length; + if (escalatedCount >= 3) { + findings.push( + `${escalatedCount} escalated task(s) in last ${this.lookbackHours}h — systemic blocker risk.`, + ); + recommendations.push( + "Review milestone plans for hidden dependencies or scope creep.", + ); + } + + if (findings.length > 0) { + return { + outcome: "fail", + failureClass: "policy", + rationale: `Outcome Learning detected ${findings.length} pattern(s) suggesting degraded success probability.`, + findings: findings.join("; "), + recommendation: + recommendations.join(" ") || + "Review recent failures before continuing.", + }; + } + return { outcome: "pass", - rationale: - "Outcome recorded in local experience DB. Cross-turn learning enabled.", + rationale: `No adverse patterns detected. Unit ${unitOutcomes.length} sample(s), model ${modelStats.total} sample(s), system ${recent.length} sample(s).`, }; } + + _findBetterModels(currentModelId) { + // Simple heuristic: find models that are cheaper (often correlated with + // simpler/faster and sometimes more reliable for narrow tasks). + // In a fuller implementation this would query outcome stats per model. + const candidates = [ + "claude-haiku-4-5", + "gemini-2.0-flash", + "gpt-4o-mini", + "gpt-4.1-mini", + "deepseek-chat", + ]; + return candidates + .filter((c) => compareModelCost(c, currentModelId) < 0) + .slice(0, 3); + } } diff --git a/src/resources/extensions/sf/uok/unit-runtime.js b/src/resources/extensions/sf/uok/unit-runtime.js index f41397bda..0a779cd5b 100644 --- a/src/resources/extensions/sf/uok/unit-runtime.js +++ b/src/resources/extensions/sf/uok/unit-runtime.js @@ -191,9 +191,10 @@ export function getUnitRuntimeState(record) { * Consumer: decideUnitRuntimeDispatch. */ export function isSyntheticUnitRuntime(record) { + if (!record) return false; return ( record.unitType === "synthetic" || - record.unitId.includes("parallel-research") + (record.unitId ?? "").includes("parallel-research") ); } /** diff --git a/src/tests/schedule-headless-query.test.ts b/src/tests/schedule-headless-query.test.ts index 10641f690..6e5d5dbbf 100644 --- a/src/tests/schedule-headless-query.test.ts +++ b/src/tests/schedule-headless-query.test.ts @@ -22,12 +22,30 @@ test("headless-query QuerySnapshot type includes schedule field", () => { ); }); -test("headless-query schedule field has due and upcoming arrays", () => { +test("headless-query schedule field has pending_count and overdue_count", () => { const scheduleBlock = src.slice(src.indexOf("schedule?:")); - assert.ok(scheduleBlock.includes("due:"), "schedule should have due array"); assert.ok( - scheduleBlock.includes("upcoming:"), - "schedule should have upcoming array", + scheduleBlock.includes("pending_count:"), + "schedule should have pending_count", + ); + assert.ok( + scheduleBlock.includes("overdue_count:"), + "schedule should have overdue_count", + ); +}); + +test("headless-query buildQuerySnapshot computes pending_count and overdue_count", () => { + assert.ok( + src.includes("pending_count:"), + "buildQuerySnapshot should compute pending_count", + ); + assert.ok( + src.includes("overdue_count:"), + "buildQuerySnapshot should compute overdue_count", + ); + assert.ok( + src.includes("readEntries") && src.includes('status === "pending"'), + "buildQuerySnapshot should read entries and filter by pending status", ); });