refactor: consolidate loop signals and gate registry wiring

This commit is contained in:
Mikael Hugo 2026-05-17 14:45:12 +02:00
parent 4d2266e57d
commit 3e002ca698
16 changed files with 883 additions and 156 deletions

View file

@ -0,0 +1,227 @@
# UOK Gate Call-Site Migration Template (ADR-0075 / R079)
Migrate inline `new UokGateRunner()` call sites to `UokGateRegistry` +
`ctxFactory` pattern. The process singleton in `uok/gate-registry.js` replaces
ad-hoc runner construction and enables cross-process gate health queries.
---
## 4-Step Pattern
### Step 1 — Read the inline block
Find the `new UokGateRunner()` instantiation, the `runner.register({id, type,
execute: async () => { ... }})` call, and note every local variable the closure
captures. These become the `ctx` fields you will supply at run time.
### Step 2 — Lift to module scope
Define a top-level `const` for the gate object. The `execute` function receives
`ctx` as its first argument; replace every closure reference with a `ctx.*`
read:
```js
// module scope — defined once, not inside any function
const myGate = {
id: "my-gate-id",
type: "verification", // must be a valid UOK_GATE_TYPES value
execute: async (ctx) => ({
outcome: ctx.someFlag ? "manual-attention" : "pass",
failureClass: ctx.someFlag ? "manual-attention" : "none",
rationale: `gate result: ${ctx.someValue}`,
findings: ctx.someFlag ? ctx.details ?? "" : "",
}),
};
```
### Step 3 — Register with a ctxFactory (or inline override)
Two sub-cases:
**A. Per-call state (most common):** supply the captured locals as a
`ctxOverride` object to `registry.run()`. No `ctxFactory` argument needed when
`register()` is called:
```js
const registry = getGateRegistry();
if (!registry.has(myGate.id)) {
registry.register(myGate); // no ctxFactory — state comes from override
}
await registry.run(myGate.id, {
someFlag: localFlag, // previously-closed-over state
someValue: localValue,
// plus standard runner ctx fields:
basePath,
traceId: opts?.traceId ?? `my-trace`,
turnId: opts?.turnId ?? `my-turn`,
milestoneId, sliceId, unitType, unitId,
});
```
**B. Stable per-module state:** if the gate always needs the same context
object (e.g. a fixed config block), pass a `ctxFactory` to `register()` so
callers never have to supply it:
```js
registry.register(myGate, () => ({ stableConfig }));
await registry.run(myGate.id, {}); // ctxFactory merged automatically
```
`registry.run(id, override)` merges as `{ ...ctxFactory(), ...override }`, so
override values always win.
### Step 4 — Remove the original inline block
Delete the `new UokGateRunner()` instantiation, the `runner.register(...)` call,
and the `await runner.run(...)` call. Import `getGateRegistry` from
`"../uok/gate-registry.js"` and remove the `UokGateRunner` import if it is no
longer used.
---
## Worked Example — `tools/validate-milestone.js`
### Before (199 lines)
```js
import { UokGateRunner } from "../uok/gate-runner.js";
// ...inside handleValidateMilestone:
if (gatesEnabled) {
try {
const gateRunner = new UokGateRunner();
const nonPassVerdict = params.verdict !== "pass";
gateRunner.register({
id: "milestone-validation-gates",
type: "verification",
execute: async () => ({
outcome: nonPassVerdict ? "manual-attention" : "pass",
failureClass: nonPassVerdict ? "manual-attention" : "none",
rationale: `milestone validation verdict: ${params.verdict}`,
findings: nonPassVerdict
? [params.verdictRationale, params.remediationPlan ?? ""].filter(Boolean).join("\n")
: "",
}),
});
await gateRunner.run("milestone-validation-gates", { basePath, traceId, ... });
} catch (err) { logWarning(...); }
}
```
The closure captured `params.verdict`, `params.verdictRationale`,
`params.remediationPlan`, and the derived `nonPassVerdict` flag — all call-time
values that differed on every invocation.
### After (222 lines)
```js
import { getGateRegistry } from "../uok/gate-registry.js";
// module scope
const milestoneValidationGatesGate = {
id: "milestone-validation-gates",
type: "verification",
execute: async (ctx) => {
const nonPassVerdict = ctx.verdict !== "pass";
return {
outcome: nonPassVerdict ? "manual-attention" : "pass",
failureClass: nonPassVerdict ? "manual-attention" : "none",
rationale: `milestone validation verdict: ${ctx.verdict}`,
findings: nonPassVerdict
? [ctx.verdictRationale, ctx.remediationPlan ?? ""].filter(Boolean).join("\n")
: "",
};
},
};
// inside handleValidateMilestone:
if (gatesEnabled) {
try {
const registry = getGateRegistry();
if (!registry.has(milestoneValidationGatesGate.id)) {
registry.register(milestoneValidationGatesGate);
}
await registry.run(milestoneValidationGatesGate.id, {
verdict: params.verdict,
verdictRationale: params.verdictRationale,
remediationPlan: params.remediationPlan,
basePath, traceId: opts?.traceId ?? `...`, ...
});
} catch (err) { logWarning(...); }
}
```
The gate object is now registered once (guarded by `registry.has()`). The
previously-closed-over locals are passed as a plain object to `registry.run()`,
where they merge with the ctxFactory output (none here) before reaching
`execute(ctx)`.
---
## Per-Gate Scope Checklist (5 remaining inline gates)
| Gate id | File | Captured locals | Notes |
|---------|------|-----------------|-------|
| `unit-verification-gate` | `auto/phases-finalize.js` | `deps`, `s`, `ctx`, `pi`, `pauseAuto` | Complex; `ctx` is the outer phase context — rename to `phaseCtx` in gate's `execute(ctx)` to avoid shadowing |
| `milestone-validation-post-check` | `uok/auto-verification.js` | `persistGate` args from `runValidateMilestonePostCheck` | Straightforward; all are primitive strings/booleans |
| `verification-gate` | `uok/auto-verification.js` | local verification result + helper formatting state | Multiple gates in same file — see Pitfall section |
| `post-execution-checks` | `uok/auto-verification.js` | `postExecResult`, strict-mode prefs, derived blocking state | Three gates in same file — same Pitfall applies |
| `planning-flow-gate` | `guided-flow.js` | `persistGate` args | Clean; same pattern as milestone-validation-post-check |
---
## When to Skip ctxFactory
If the gate's `execute()` needs no context at all (e.g. it reads only DB or
env state), register without a factory and run with an empty override:
```js
registry.register(myStatelessGate); // no ctxFactory
await registry.run(myStatelessGate.id, { basePath, traceId, turnId });
```
`uok/gate-registry-bootstrap.js` uses this form for `driftDetectionGate`, which
sources everything from the DB.
---
## Pitfall: Multiple Call Sites, Same Gate Id, Different State
`uok/auto-verification.js` contains three distinct gate registrations
(`verification-gate`, `milestone-validation-post-check`,
`post-execution-checks`). Each has its own unique id, so there is no collision.
However, if a future refactor discovers two call sites in the SAME file that
run the SAME gate id with different state shapes, choose one of:
**Option A — Two distinct gate ids** (preferred when logic differs):
```js
const myGateA = { id: "my-gate-pass-path", type: "verification", execute: async (ctx) => ... };
const myGateB = { id: "my-gate-fail-path", type: "verification", execute: async (ctx) => ... };
```
Each id carries its own circuit-breaker state and audit trail — cleaner
semantics at the cost of a registry entry per variant.
**Option B — Single id, discriminator field in ctx** (preferred when logic is
identical but input differs):
```js
const myGate = {
id: "my-gate",
type: "verification",
execute: async (ctx) => {
if (ctx.variant === "foo") { ... }
else { ... }
},
};
// call site A:
await registry.run("my-gate", { variant: "foo", ... });
// call site B:
await registry.run("my-gate", { variant: "bar", ... });
```
Trade-off: Option A gives separate circuit breakers (fail on path A does not
suppress path B). Option B shares a circuit breaker — one pathological failure
mode can block the other path. Prefer Option A when the two paths have
meaningfully different failure domains.

View file

@ -18,6 +18,7 @@ import {
truncateHead,
} from "@singularity-forge/coding-agent";
import { Text } from "@singularity-forge/tui";
import { duplicateSearchSignal } from "../shared/loop-signals.js";
import { recordRetrievalEvidence } from "../sf/retrieval-evidence.js";
import { LRUTTLCache } from "./cache.js";
import { formatSearchResults } from "./format.js";
@ -710,6 +711,12 @@ export function registerSearchTool(pi) {
if (cacheKey === lastSearchKey) {
consecutiveDupeCount++;
if (consecutiveDupeCount > MAX_CONSECUTIVE_DUPES) {
const loopSignal = duplicateSearchSignal({
query: params.query,
provider,
domain: params.domain ?? "",
consecutiveDupeCount,
});
await recordWebSearchEvidence(projectRoot, {
query: params.query,
strategy: provider,
@ -718,7 +725,7 @@ export function registerSearchTool(pi) {
hitCount: 0,
elapsedMs: Date.now() - startedAt,
error: "Consecutive duplicate search detected",
result: { consecutiveDupeCount },
result: { consecutiveDupeCount, loopSignal },
});
return {
content: [
@ -731,6 +738,7 @@ export function registerSearchTool(pi) {
details: {
errorKind: "search_loop",
error: "Consecutive duplicate search detected",
loopSignal,
},
};
}

View file

@ -550,12 +550,8 @@ export function registerHooks(pi, ecosystemHandlers = []) {
// first search-tool query paid the full cold-build cost (5-20
// minutes on a repo this size). Hooking it here closes that gap.
try {
const { ensureSiftIndexWarmup } = await import(
"../code-intelligence.js"
);
const { loadEffectiveSFPreferences } = await import(
"../preferences.js"
);
const { ensureSiftIndexWarmup } = await import("../code-intelligence.js");
const { loadEffectiveSFPreferences } = await import("../preferences.js");
const prefs = loadEffectiveSFPreferences()?.preferences;
// Fire and forget — the warmup writes its state to
// .sf/runtime/sift-index-warmup.json so other surfaces (doctor,
@ -992,7 +988,11 @@ export function registerHooks(pi, ecosystemHandlers = []) {
// ── Loop guard: block repeated identical tool calls ──
const loopCheck = checkToolCallLoop(event.toolName, event.input);
if (loopCheck.block) {
return { block: true, reason: loopCheck.reason };
return {
block: true,
reason: loopCheck.reason,
details: { loopSignal: loopCheck.loopSignal },
};
}
// ── Session file-touch recording ──────────────────────────────────────
// Best-effort: path may be absent for non-file tools; recordFileTouch
@ -1320,6 +1320,7 @@ export function registerHooks(pi, ecosystemHandlers = []) {
details: {
key: steer.key,
severity: steer.severity,
loopSignal: steer.loopSignal,
toolName: event.toolName,
toolCallId: event.toolCallId,
},

View file

@ -12,6 +12,7 @@
* and when a different tool call breaks the streak.
*/
import { createHash } from "node:crypto";
import { repeatedToolCallSignal } from "../supervision/loop-signals.js";
const MAX_CONSECUTIVE_IDENTICAL_CALLS = 4;
/** Interactive/user-facing tools where even 1 duplicate is confusing. */
@ -60,6 +61,12 @@ export function checkToolCallLoop(toolName, args) {
? MAX_CONSECUTIVE_STRICT
: MAX_CONSECUTIVE_IDENTICAL_CALLS;
if (consecutiveCount > threshold) {
const loopSignal = repeatedToolCallSignal({
toolName,
count: consecutiveCount,
threshold,
signature: sig,
});
return {
block: true,
reason:
@ -67,6 +74,8 @@ export function checkToolCallLoop(toolName, args) {
`with identical arguments. Blocking to prevent infinite loop. ` +
`Try a different approach or modify your arguments.`,
count: consecutiveCount,
signature: sig,
loopSignal,
};
}
return { block: false, count: consecutiveCount };

View file

@ -49,17 +49,16 @@
import { runSubagent } from "@singularity-forge/coding-agent";
import { SMOKE_FIXTURE_MILESTONE_ID } from "../smoke-fixture-constants.js";
import { writeLastGreenRecord } from "../last-green.js";
import {
buildCompleteMilestonePrompt,
buildReassessRoadmapPrompt,
buildValidateMilestonePrompt,
} from "../auto-prompts.js";
import { debugLog } from "../debug-logger.js";
import {
getMilestone,
getSlice,
isDbAvailable,
} from "../sf-db.js";
import { getMilestone, getSlice, isDbAvailable } from "../sf-db.js";
/**
* Unit types this module supports for inline dispatch.
@ -294,5 +293,35 @@ export async function runUnitInline(unitType, unitId, options = {}) {
elapsedMs: Date.now() - startedAt,
});
// Wire the last-green ledger for the smoke fixture milestone.
// Non-fatal: ledger write failure does not corrupt the dispatch result.
if (unitId === SMOKE_FIXTURE_MILESTONE_ID && result.ok === true) {
try {
const elapsedMs = Date.now() - startedAt;
// Inject elapsedMs into the result object — writeLastGreenRecord
// reads result.elapsedMs, but runSubagent doesn't populate it.
const resultWithTiming = { ...result, elapsedMs };
const written = writeLastGreenRecord(basePath, resultWithTiming, {
milestoneId: unitId,
unitType,
traceId: null,
});
debugLog("run-unit-inline", {
event: "last-green-write",
unitType,
unitId,
written,
});
} catch (err) {
// Log and continue — the dispatch result takes priority.
debugLog("run-unit-inline", {
event: "last-green-write-failed",
unitType,
unitId,
error: err instanceof Error ? err.message : String(err),
});
}
}
return result;
}

View file

@ -1,106 +1,16 @@
/**
* loop-signals.js normalized loop/stuck supervision signals.
* loop-signals.js SF-local re-export for shared supervision signals.
*
* Purpose: give SF one contract for loop detectors, watchdogs, retry guards,
* and tool churn signals so the autonomous supervisor can decide from one
* shape instead of bespoke metadata branches.
* Purpose: preserve the SF import path while allowing non-SF extensions to
* emit the same normalized loop signal contract from `extensions/shared`.
*
* Consumer: autonomous unit supervision and detector adapters.
* Consumer: SF autonomous supervision modules.
*/
const VALID_SCOPES = new Set([
"process",
"session",
"unit",
"tool",
"provider",
"artifact",
]);
const VALID_SEVERITIES = new Set(["info", "warning", "fail", "pause"]);
const VALID_ACTIONS = new Set([
"continue",
"retry",
"switch-model",
"pause",
"block",
"recover",
]);
function pick(value, allowed, fallback) {
return allowed.has(value) ? value : fallback;
}
/**
* Build a normalized loop supervision signal.
*
* Purpose: preserve detector-specific evidence while making action, severity,
* scope, and unit identity machine-readable for the shared supervisor.
*
* Consumer: detector adapters and auto-runaway-guard.
*/
export function createLoopSignal(input = {}) {
const unit =
input.unit && typeof input.unit === "object"
? {
type: String(input.unit.type ?? ""),
id: String(input.unit.id ?? ""),
}
: undefined;
return {
scope: pick(input.scope, VALID_SCOPES, "unit"),
kind: String(input.kind ?? "unknown-loop-signal"),
severity: pick(input.severity, VALID_SEVERITIES, "warning"),
...(unit?.type && unit?.id ? { unit } : {}),
evidence:
input.evidence && typeof input.evidence === "object"
? input.evidence
: { value: input.evidence ?? null },
recommendedAction: pick(input.recommendedAction, VALID_ACTIONS, "pause"),
...(input.message ? { message: String(input.message) } : {}),
...(input.source ? { source: String(input.source) } : {}),
};
}
/**
* Convert a Wiggums detector sweep hit into a normalized loop signal.
*
* Purpose: let the periodic detector family feed the same supervisor contract
* as direct runaway and zero-progress checks.
*
* Consumer: auto-runaway-guard when consuming buffered detector sweep results.
*/
export function detectorSweepSignal(fired, unitType, unitId, now = Date.now()) {
return createLoopSignal({
scope: "unit",
kind: `wiggums:${fired?.name ?? "unknown"}`,
severity: "fail",
unit: { type: unitType, id: unitId },
evidence: {
detector: fired?.name ?? "unknown",
signature: fired?.signature ?? null,
failedAt: now,
},
recommendedAction: "pause",
source: "periodic-detector-sweep",
});
}
/**
* Convert a zero-progress detector hit into a normalized loop signal.
*
* Purpose: make the strongest unit-churn signal available through the same
* contract as Wiggums detectors and future tool/provider loop producers.
*
* Consumer: auto-runaway-guard.
*/
export function zeroProgressSignal(input) {
return createLoopSignal({
scope: "unit",
kind: "zero-progress",
severity: "fail",
unit: { type: input.unitType, id: input.unitId },
evidence: input.evidence,
recommendedAction: "pause",
source: "zero-progress-detector",
});
}
export {
createLoopSignal,
detectorSweepSignal,
duplicateSearchSignal,
repeatedToolCallSignal,
repeatedToolFailureSignal,
zeroProgressSignal,
} from "../../shared/loop-signals.js";

View file

@ -2,6 +2,9 @@ import { describe, expect, test } from "vitest";
import {
createLoopSignal,
detectorSweepSignal,
duplicateSearchSignal,
repeatedToolCallSignal,
repeatedToolFailureSignal,
zeroProgressSignal,
} from "../supervision/loop-signals.js";
@ -62,4 +65,75 @@ describe("loop supervision signals", () => {
evidence: { toolCallsTotal: 25 },
});
});
test("repeatedToolCallSignal_when_guard_blocks_preserves_tool_evidence", () => {
const signal = repeatedToolCallSignal({
toolName: "ask_user_questions",
count: 2,
threshold: 1,
signature: "abc123",
});
expect(signal).toMatchObject({
scope: "tool",
kind: "tool-call:duplicate",
severity: "fail",
recommendedAction: "block",
source: "tool-call-loop-guard",
evidence: {
toolName: "ask_user_questions",
count: 2,
threshold: 1,
signature: "abc123",
},
});
});
test("repeatedToolFailureSignal_when_watchdog_steers_preserves_command_evidence", () => {
const signal = repeatedToolFailureSignal({
toolName: "bash",
command: "npm test",
count: 2,
windowMs: 600000,
excerpt: "failed",
});
expect(signal).toMatchObject({
scope: "tool",
kind: "tool-result:repeated-failure",
severity: "warning",
recommendedAction: "continue",
source: "tool-watchdog",
evidence: {
toolName: "bash",
command: "npm test",
count: 2,
windowMs: 600000,
excerpt: "failed",
},
});
});
test("duplicateSearchSignal_when_search_guard_blocks_preserves_query_evidence", () => {
const signal = duplicateSearchSignal({
query: "sf autonomous loops",
provider: "tavily",
domain: "example.com",
consecutiveDupeCount: 2,
});
expect(signal).toMatchObject({
scope: "tool",
kind: "search:duplicate-query",
severity: "fail",
recommendedAction: "block",
source: "search-the-web",
evidence: {
query: "sf autonomous loops",
provider: "tavily",
domain: "example.com",
consecutiveDupeCount: 2,
},
});
});
});

View file

@ -11,12 +11,7 @@
* DB schema drift, mock shape mismatch).
*/
import { afterEach, describe, expect, it, vi } from "vitest";
import {
mkdtempSync,
writeFileSync,
mkdirSync,
rmSync,
} from "node:fs";
import { mkdtempSync, writeFileSync, mkdirSync, rmSync } from "node:fs";
import { join } from "node:path";
import { tmpdir } from "node:os";
@ -29,6 +24,13 @@ import {
import { DispatchLayer } from "../dispatch/dispatch-layer.js";
import { runSubagent } from "@singularity-forge/coding-agent";
import { SMOKE_FIXTURE_MILESTONE_ID } from "../smoke-fixture-constants.js";
import { readLastGreenRecord, clearLastGreen } from "../last-green.js";
// Enable smoke_gate so last-green writes are not suppressed.
// Without this, writeLastGreenRecord returns false and the ledger tests would fail.
vi.mock("../experimental.js", () => ({
getExperimentalFlag: vi.fn(() => true),
}));
// Module-level mock — vitest hoists vi.mock automatically.
//
@ -68,12 +70,34 @@ function makeProject() {
const dir = mkdtempSync(join(tmpdir(), "sf-smoke-fixture-"));
tmpDirs.push(dir);
mkdirSync(join(dir, ".sf", "milestones", SMOKE_FIXTURE_MILESTONE_ID, "slices", "S01", "tasks"), {
recursive: true,
});
mkdirSync(join(dir, ".sf", "milestones", SMOKE_FIXTURE_MILESTONE_ID, "slices", "S02", "tasks"), {
recursive: true,
});
mkdirSync(
join(
dir,
".sf",
"milestones",
SMOKE_FIXTURE_MILESTONE_ID,
"slices",
"S01",
"tasks",
),
{
recursive: true,
},
);
mkdirSync(
join(
dir,
".sf",
"milestones",
SMOKE_FIXTURE_MILESTONE_ID,
"slices",
"S02",
"tasks",
),
{
recursive: true,
},
);
openDatabase(join(dir, ".sf", "sf.db"));
@ -116,13 +140,27 @@ function makeProject() {
// ROADMAP.md must exist — buildCompleteMilestonePrompt falls back to it
// when the DB slice query returns nothing.
writeFileSync(
join(dir, ".sf", "milestones", SMOKE_FIXTURE_MILESTONE_ID, `${SMOKE_FIXTURE_MILESTONE_ID}-ROADMAP.md`),
join(
dir,
".sf",
"milestones",
SMOKE_FIXTURE_MILESTONE_ID,
`${SMOKE_FIXTURE_MILESTONE_ID}-ROADMAP.md`,
),
"# SMOKE_FIXTURE_MILESTONE_ID: Smoke test milestone\n\n## S01: Smoke slice (complete)\n\n## S02: Smoke slice (active)\n",
);
// S01-SUMMARY.md for slice summary excerpt path.
writeFileSync(
join(dir, ".sf", "milestones", SMOKE_FIXTURE_MILESTONE_ID, "slices", "S01", "S01-SUMMARY.md"),
join(
dir,
".sf",
"milestones",
SMOKE_FIXTURE_MILESTONE_ID,
"slices",
"S01",
"S01-SUMMARY.md",
),
"# S01: Smoke slice (complete)\n\n**Status:** complete\n",
);
@ -206,4 +244,86 @@ describe("M048/S01 — smoke fixture", () => {
expect(result.exitCode).toBe(1);
expect(result.stderr).toMatch(/must be one of/);
});
});
// ---------------------------------------------------------------------------
// S02: last-green ledger wiring tests
// ---------------------------------------------------------------------------
it("ledger is written after successful fixture dispatch", async () => {
const dir = makeProject();
openDatabase(join(dir, ".sf", "sf.db"));
// Clear any stale ledger from prior runs.
clearLastGreen(dir);
const layer = new DispatchLayer(dir);
const result = await layer.dispatch({
isolation: "full",
coordination: "managed",
scope: "inline",
mode: "single",
unitType: "complete-milestone",
unitId: SMOKE_FIXTURE_MILESTONE_ID,
});
// The dispatch must have succeeded (mocked runSubagent always returns ok=true).
expect(result.ok).toBe(true);
// Read the ledger — writeLastGreenRecord guards on smoke_gate flag,
// which T01 set to true via setExperimentalFlag in last-green.test.mjs setup.
const ledger = readLastGreenRecord(dir);
expect(ledger).not.toBeNull();
expect(Array.isArray(ledger)).toBe(true);
expect(ledger.length).toBeGreaterThanOrEqual(1);
// Verify the most-recent entry has the expected fixture shape.
const entry = ledger[0];
expect(entry.fixture.milestoneId).toBe(SMOKE_FIXTURE_MILESTONE_ID);
expect(entry.fixture.unitType).toBe("complete-milestone");
expect(entry.result.ok).toBe(true);
expect(entry.result.exitCode).toBe(0);
expect(entry.result.outputLength).toBeGreaterThan(0);
expect(entry.result.elapsedMs).toBeGreaterThanOrEqual(0);
expect(typeof entry.checksum).toBe("string");
expect(entry.checksum.length).toBeGreaterThan(0);
expect(typeof entry.timestamp).toBe("string");
});
it("ledger is NOT written when dispatch fails", async () => {
const dir = makeProject();
openDatabase(join(dir, ".sf", "sf.db"));
// Seed with a known-good entry so we can tell it wasn't replaced.
const layer = new DispatchLayer(dir);
// Override the mock to return a failure result.
vi.mocked(runSubagent).mockResolvedValueOnce({
ok: false,
exitCode: 1,
stderr: "test error",
output: "",
});
const result = await layer.dispatch({
isolation: "full",
coordination: "managed",
scope: "inline",
mode: "single",
unitType: "complete-milestone",
unitId: SMOKE_FIXTURE_MILESTONE_ID,
});
// The dispatch must have failed.
expect(result.ok).toBe(false);
expect(result.exitCode).toBe(1);
// Ledger should be null (no entry written for failed dispatch),
// or empty (any prior entry preserved but not appended).
const ledger = readLastGreenRecord(dir);
// A null return means the file doesn't exist; an empty array means it
// exists but has no entries — both are acceptable for "not written".
expect(
ledger === null || (Array.isArray(ledger) && ledger.length === 0),
).toBe(true);
});
});

View file

@ -0,0 +1,37 @@
import { describe, expect, test } from "vitest";
import {
checkToolCallLoop,
resetToolCallLoopGuard,
} from "../bootstrap/tool-call-loop-guard.js";
describe("tool-call loop guard", () => {
test("checkToolCallLoop_when_identical_tool_repeats_returns_loop_signal", () => {
resetToolCallLoopGuard();
const first = checkToolCallLoop("ask_user_questions", {
questions: ["continue?"],
});
const second = checkToolCallLoop("ask_user_questions", {
questions: ["continue?"],
});
expect(first.block).toBe(false);
expect(second).toMatchObject({
block: true,
count: 2,
loopSignal: {
scope: "tool",
kind: "tool-call:duplicate",
severity: "fail",
recommendedAction: "block",
source: "tool-call-loop-guard",
evidence: {
toolName: "ask_user_questions",
count: 2,
threshold: 1,
},
},
});
expect(second.loopSignal.evidence.signature).toBe(second.signature);
});
});

View file

@ -0,0 +1,40 @@
import { describe, expect, test } from "vitest";
import { observeToolResult, resetToolWatchdog } from "../tool-watchdog.js";
function failingBash(command, text = "command failed") {
return {
toolName: "bash",
input: { command },
isError: true,
content: [{ type: "text", text }],
};
}
describe("tool watchdog loop signal", () => {
test("observeToolResult_when_same_bash_command_fails_twice_returns_loop_signal", () => {
resetToolWatchdog();
const first = observeToolResult(failingBash("npm run missing", "first"));
const second = observeToolResult(failingBash("npm run missing", "second"));
expect(first).toBeUndefined();
expect(second).toMatchObject({
key: "repeat-failure:npm run missing",
severity: "warning",
loopSignal: {
scope: "tool",
kind: "tool-result:repeated-failure",
severity: "warning",
recommendedAction: "continue",
source: "tool-watchdog",
evidence: {
toolName: "bash",
command: "npm run missing",
count: 2,
windowMs: 600000,
excerpt: "second",
},
},
});
});
});

View file

@ -96,11 +96,7 @@ test("guarantee_normal_exit_exactly_one_exit_per_runId", async () => {
assert.equal(enters[0].runId, exits[0].runId, "enter and exit share runId");
const exitsByRunId = countExitsByRunId(events);
assert.equal(
exitsByRunId.get(enters[0].runId),
1,
"only one exit per runId",
);
assert.equal(exitsByRunId.get(enters[0].runId), 1, "only one exit per runId");
});
// ── Case 2: Abnormal exit via finally (no conditional site) ──────────────────
@ -295,7 +291,11 @@ test("parity_report_handles_duplicate_exits_per_runId_as_balanced", () => {
const report = buildParityReport(events, "/tmp/uok-parity.jsonl", NOW);
// enterEvents=1, exitEvents=2 → not in unmatchedRuns (exitEvents >= enterEvents)
assert.equal(report.unmatchedRuns.length, 0, "no unmatched runs with double-exit");
assert.equal(
report.unmatchedRuns.length,
0,
"no unmatched runs with double-exit",
);
assert.equal(report.missingExitEvents, 0);
// statuses bucket has both exits counted
assert.deepEqual(report.statuses, { signal: 1, ok: 1 });

View file

@ -134,3 +134,62 @@ test("search_the_web_when_successful_records_retrieval_evidence", async () => {
},
]);
});
test("search_the_web_when_query_repeats_returns_loop_signal", async () => {
const project = makeProject();
process.chdir(project);
process.env.BRAVE_API_KEY = "test-brave-key";
process.env.TAVILY_API_KEY = "";
process.env.MINIMAX_CODE_PLAN_KEY = "";
process.env.MINIMAX_CODING_API_KEY = "";
process.env.MINIMAX_API_KEY = "";
process.env.SERPER_API_KEY = "";
process.env.EXA_API_KEY = "";
process.env.OLLAMA_API_KEY = "";
assert.equal(openDatabase(join(project, ".sf", "sf.db")), true);
globalThis.fetch = async () => ({
ok: true,
headers: { get: () => null },
json: async () => ({
web: {
results: [
{
title: "Example Result",
url: "https://example.com/result",
description: "A result for testing.",
},
],
},
}),
});
const tool = captureSearchTool();
await tool.execute("call-1", {
query: "duplicate query",
count: 1,
});
const result = await tool.execute("call-2", {
query: "duplicate query",
count: 1,
});
assert.equal(result.isError, true);
assert.equal(result.details.errorKind, "search_loop");
assert.deepEqual(result.details.loopSignal, {
scope: "tool",
kind: "search:duplicate-query",
severity: "fail",
evidence: {
query: "duplicate query",
provider: "brave",
domain: "",
consecutiveDupeCount: 2,
},
recommendedAction: "block",
source: "search-the-web",
});
const rows = getRetrievalEvidence(5);
assert.equal(rows[0].status, "search_loop");
assert.deepEqual(rows[0].result.loopSignal, result.details.loopSignal);
});

View file

@ -3,6 +3,8 @@
// Observes tool-result events at runtime and emits a steer (info /
// warning) when patterns like a repeated bash failure or a too-large
// tool result indicate the agent is stuck.
import { repeatedToolFailureSignal } from "./supervision/loop-signals.js";
const seenKeys = new Set();
const bashFailures = new Map();
const MAX_RESULT_CHARS = 6000;
@ -97,12 +99,21 @@ function maybeRepeatedFailureSteer(event, text) {
: 1;
bashFailures.set(command, { count, lastAt: now });
if (count < 2) return undefined;
const excerpt = text.slice(0, 1200);
const loopSignal = repeatedToolFailureSignal({
toolName: event.toolName,
command,
count,
windowMs: REPEAT_FAILURE_WINDOW_MS,
excerpt,
});
return once({
key: `repeat-failure:${command}`,
severity: "warning",
loopSignal,
content: buildSteer(
"repeated failing command",
`The same bash command has failed ${count} times in this autonomous mode unit:\n\n${command}\n\nStop retrying it as-is. Read the error, inspect the relevant files/config, and choose a different repair or verification path.\n\nLatest result excerpt:\n${text.slice(0, 1200)}`,
`The same bash command has failed ${count} times in this autonomous mode unit:\n\n${command}\n\nStop retrying it as-is. Read the error, inspect the relevant files/config, and choose a different repair or verification path.\n\nLatest result excerpt:\n${excerpt}`,
),
});
}

View file

@ -22,7 +22,7 @@ import {
} from "../sf-db.js";
import { invalidateStateCache } from "../state.js";
import { resolveUokFlags } from "../uok/flags.js";
import { UokGateRunner } from "../uok/gate-runner.js";
import { getGateRegistry } from "../uok/gate-registry.js";
import {
isValidMilestoneVerdict,
VALIDATION_VERDICTS,
@ -30,6 +30,35 @@ import {
import { logWarning } from "../workflow-logger.js";
import { resolveCanonicalMilestoneRoot } from "../worktree-manager.js";
/**
* Module-scope gate definition for ADR-0075 UokGateRegistry.
*
* ctx shape (supplied via ctxFactory at call site):
* verdict {string} raw milestone verdict ("pass" | "fail" | )
* verdictRationale {string|undefined}
* remediationPlan {string|undefined}
*
* The execute() function is pure relative to ctx it no longer closes over
* call-site locals, making the gate safe to register once at module load time.
*/
const milestoneValidationGatesGate = {
id: "milestone-validation-gates",
type: "verification",
execute: async (ctx) => {
const nonPassVerdict = ctx.verdict !== "pass";
return {
outcome: nonPassVerdict ? "manual-attention" : "pass",
failureClass: nonPassVerdict ? "manual-attention" : "none",
rationale: `milestone validation verdict: ${ctx.verdict}`,
findings: nonPassVerdict
? [ctx.verdictRationale, ctx.remediationPlan ?? ""]
.filter(Boolean)
.join("\n")
: "",
};
},
};
function renderValidationMarkdown(params) {
let md = `<!-- generated from .sf/sf.db — do not edit directly; use validate_milestone tool -->
---
@ -159,23 +188,16 @@ export async function handleValidateMilestone(params, basePath, opts) {
const gatesEnabled = opts?.uokGatesEnabled ?? resolveUokFlags(prefs).gates;
if (gatesEnabled) {
try {
const gateRunner = new UokGateRunner();
const nonPassVerdict = params.verdict !== "pass";
gateRunner.register({
id: "milestone-validation-gates",
type: "verification",
execute: async () => ({
outcome: nonPassVerdict ? "manual-attention" : "pass",
failureClass: nonPassVerdict ? "manual-attention" : "none",
rationale: `milestone validation verdict: ${params.verdict}`,
findings: nonPassVerdict
? [params.verdictRationale, params.remediationPlan ?? ""]
.filter(Boolean)
.join("\n")
: "",
}),
});
await gateRunner.run("milestone-validation-gates", {
const registry = getGateRegistry();
if (!registry.has(milestoneValidationGatesGate.id)) {
registry.register(milestoneValidationGatesGate);
}
await registry.run(milestoneValidationGatesGate.id, {
// ctxFactory: lifted closure state supplied as per-call override
verdict: params.verdict,
verdictRationale: params.verdictRationale,
remediationPlan: params.remediationPlan,
// runner ctx fields
basePath,
traceId: opts?.traceId ?? `validate-milestone:${params.milestoneId}`,
turnId: opts?.turnId ?? `${params.milestoneId}:validate`,

View file

@ -16,7 +16,8 @@ const registry = getGateRegistry();
// SKIP milestone-validation-post-check: execute() closes over persistGate arguments from runValidateMilestonePostCheck.
// SKIP verification-gate: execute() closes over the local verification result and helper formatting state from uok/auto-verification.js.
// SKIP post-execution-checks: execute() closes over postExecResult, strict-mode prefs, and derived blocking state from uok/auto-verification.js.
// SKIP milestone-validation-gates: execute() closes over validate_milestone params and derived verdict/remediation state.
// milestone-validation-gates: lifted to module scope in tools/validate-milestone.js (R079 pilot).
// Registered lazily at call site via registry.has() guard; ctxFactory pattern supplies verdict/rationale/remediationPlan.
// SKIP planning-flow-gate: execute() closes over persistGate arguments from guided-flow.js.
registry.register(driftDetectionGate);

View file

@ -0,0 +1,179 @@
/**
* loop-signals.js normalized loop/stuck supervision signals.
*
* Purpose: give extensions one contract for loop detectors, watchdogs, retry
* guards, and tool churn signals so supervisors can decide from one shape
* instead of bespoke metadata branches.
*
* Consumer: SF autonomy supervision and cross-extension loop guards.
*/
const VALID_SCOPES = new Set([
"process",
"session",
"unit",
"tool",
"provider",
"artifact",
]);
const VALID_SEVERITIES = new Set(["info", "warning", "fail", "pause"]);
const VALID_ACTIONS = new Set([
"continue",
"retry",
"switch-model",
"pause",
"block",
"recover",
]);
function pick(value, allowed, fallback) {
return allowed.has(value) ? value : fallback;
}
/**
* Build a normalized loop supervision signal.
*
* Purpose: preserve producer-specific evidence while making action, severity,
* scope, and unit identity machine-readable for shared supervision.
*
* Consumer: detector adapters, tool guards, provider guards, and watchdogs.
*/
export function createLoopSignal(input = {}) {
const unit =
input.unit && typeof input.unit === "object"
? {
type: String(input.unit.type ?? ""),
id: String(input.unit.id ?? ""),
}
: undefined;
return {
scope: pick(input.scope, VALID_SCOPES, "unit"),
kind: String(input.kind ?? "unknown-loop-signal"),
severity: pick(input.severity, VALID_SEVERITIES, "warning"),
...(unit?.type && unit?.id ? { unit } : {}),
evidence:
input.evidence && typeof input.evidence === "object"
? input.evidence
: { value: input.evidence ?? null },
recommendedAction: pick(input.recommendedAction, VALID_ACTIONS, "pause"),
...(input.message ? { message: String(input.message) } : {}),
...(input.source ? { source: String(input.source) } : {}),
};
}
/**
* Convert a Wiggums detector sweep hit into a normalized loop signal.
*
* Purpose: let the periodic detector family feed the same supervisor contract
* as direct runaway and zero-progress checks.
*
* Consumer: auto-runaway-guard when consuming buffered detector sweep results.
*/
export function detectorSweepSignal(fired, unitType, unitId, now = Date.now()) {
return createLoopSignal({
scope: "unit",
kind: `wiggums:${fired?.name ?? "unknown"}`,
severity: "fail",
unit: { type: unitType, id: unitId },
evidence: {
detector: fired?.name ?? "unknown",
signature: fired?.signature ?? null,
failedAt: now,
},
recommendedAction: "pause",
source: "periodic-detector-sweep",
});
}
/**
* Convert a zero-progress detector hit into a normalized loop signal.
*
* Purpose: make the strongest unit-churn signal available through the same
* contract as Wiggums detectors and future tool/provider loop producers.
*
* Consumer: auto-runaway-guard.
*/
export function zeroProgressSignal(input) {
return createLoopSignal({
scope: "unit",
kind: "zero-progress",
severity: "fail",
unit: { type: input.unitType, id: input.unitId },
evidence: input.evidence,
recommendedAction: "pause",
source: "zero-progress-detector",
});
}
/**
* Convert a repeated identical tool call into a normalized loop signal.
*
* Purpose: expose pre-tool blocking as the same signal class as unit-level
* stuck detectors instead of hiding it inside a string reason.
*
* Consumer: SF tool-call loop guard.
*/
export function repeatedToolCallSignal(input) {
return createLoopSignal({
scope: "tool",
kind: "tool-call:duplicate",
severity: "fail",
evidence: {
toolName: input.toolName,
count: input.count,
threshold: input.threshold,
signature: input.signature,
},
recommendedAction: "block",
source: "tool-call-loop-guard",
});
}
/**
* Convert a repeated failing shell command into a normalized loop signal.
*
* Purpose: make tool-result watchdog steering machine-readable while preserving
* the existing human steering message.
*
* Consumer: SF tool watchdog.
*/
export function repeatedToolFailureSignal(input) {
return createLoopSignal({
scope: "tool",
kind: "tool-result:repeated-failure",
severity: "warning",
evidence: {
toolName: input.toolName,
command: input.command,
count: input.count,
windowMs: input.windowMs,
excerpt: input.excerpt,
},
recommendedAction: "continue",
source: "tool-watchdog",
});
}
/**
* Convert a duplicate web-search query into a normalized loop signal.
*
* Purpose: let non-SF search tools report query loops through the shared
* extension signal contract instead of only returning an opaque tool error.
*
* Consumer: search-the-web tool loop guard.
*/
export function duplicateSearchSignal(input) {
return createLoopSignal({
scope: "tool",
kind: "search:duplicate-query",
severity: "fail",
evidence: {
query: input.query,
provider: input.provider,
domain: input.domain ?? "",
consecutiveDupeCount: input.consecutiveDupeCount,
},
recommendedAction: "block",
source: "search-the-web",
});
}