refactor: consolidate loop signals and gate registry wiring
This commit is contained in:
parent
4d2266e57d
commit
3e002ca698
16 changed files with 883 additions and 156 deletions
227
docs/dev/drafts/uok-gate-call-site-migration-template.md
Normal file
227
docs/dev/drafts/uok-gate-call-site-migration-template.md
Normal file
|
|
@ -0,0 +1,227 @@
|
|||
# UOK Gate Call-Site Migration Template (ADR-0075 / R079)
|
||||
|
||||
Migrate inline `new UokGateRunner()` call sites to `UokGateRegistry` +
|
||||
`ctxFactory` pattern. The process singleton in `uok/gate-registry.js` replaces
|
||||
ad-hoc runner construction and enables cross-process gate health queries.
|
||||
|
||||
---
|
||||
|
||||
## 4-Step Pattern
|
||||
|
||||
### Step 1 — Read the inline block
|
||||
|
||||
Find the `new UokGateRunner()` instantiation, the `runner.register({id, type,
|
||||
execute: async () => { ... }})` call, and note every local variable the closure
|
||||
captures. These become the `ctx` fields you will supply at run time.
|
||||
|
||||
### Step 2 — Lift to module scope
|
||||
|
||||
Define a top-level `const` for the gate object. The `execute` function receives
|
||||
`ctx` as its first argument; replace every closure reference with a `ctx.*`
|
||||
read:
|
||||
|
||||
```js
|
||||
// module scope — defined once, not inside any function
|
||||
const myGate = {
|
||||
id: "my-gate-id",
|
||||
type: "verification", // must be a valid UOK_GATE_TYPES value
|
||||
execute: async (ctx) => ({
|
||||
outcome: ctx.someFlag ? "manual-attention" : "pass",
|
||||
failureClass: ctx.someFlag ? "manual-attention" : "none",
|
||||
rationale: `gate result: ${ctx.someValue}`,
|
||||
findings: ctx.someFlag ? ctx.details ?? "" : "",
|
||||
}),
|
||||
};
|
||||
```
|
||||
|
||||
### Step 3 — Register with a ctxFactory (or inline override)
|
||||
|
||||
Two sub-cases:
|
||||
|
||||
**A. Per-call state (most common):** supply the captured locals as a
|
||||
`ctxOverride` object to `registry.run()`. No `ctxFactory` argument needed when
|
||||
`register()` is called:
|
||||
|
||||
```js
|
||||
const registry = getGateRegistry();
|
||||
if (!registry.has(myGate.id)) {
|
||||
registry.register(myGate); // no ctxFactory — state comes from override
|
||||
}
|
||||
await registry.run(myGate.id, {
|
||||
someFlag: localFlag, // previously-closed-over state
|
||||
someValue: localValue,
|
||||
// plus standard runner ctx fields:
|
||||
basePath,
|
||||
traceId: opts?.traceId ?? `my-trace`,
|
||||
turnId: opts?.turnId ?? `my-turn`,
|
||||
milestoneId, sliceId, unitType, unitId,
|
||||
});
|
||||
```
|
||||
|
||||
**B. Stable per-module state:** if the gate always needs the same context
|
||||
object (e.g. a fixed config block), pass a `ctxFactory` to `register()` so
|
||||
callers never have to supply it:
|
||||
|
||||
```js
|
||||
registry.register(myGate, () => ({ stableConfig }));
|
||||
await registry.run(myGate.id, {}); // ctxFactory merged automatically
|
||||
```
|
||||
|
||||
`registry.run(id, override)` merges as `{ ...ctxFactory(), ...override }`, so
|
||||
override values always win.
|
||||
|
||||
### Step 4 — Remove the original inline block
|
||||
|
||||
Delete the `new UokGateRunner()` instantiation, the `runner.register(...)` call,
|
||||
and the `await runner.run(...)` call. Import `getGateRegistry` from
|
||||
`"../uok/gate-registry.js"` and remove the `UokGateRunner` import if it is no
|
||||
longer used.
|
||||
|
||||
---
|
||||
|
||||
## Worked Example — `tools/validate-milestone.js`
|
||||
|
||||
### Before (199 lines)
|
||||
|
||||
```js
|
||||
import { UokGateRunner } from "../uok/gate-runner.js";
|
||||
// ...inside handleValidateMilestone:
|
||||
if (gatesEnabled) {
|
||||
try {
|
||||
const gateRunner = new UokGateRunner();
|
||||
const nonPassVerdict = params.verdict !== "pass";
|
||||
gateRunner.register({
|
||||
id: "milestone-validation-gates",
|
||||
type: "verification",
|
||||
execute: async () => ({
|
||||
outcome: nonPassVerdict ? "manual-attention" : "pass",
|
||||
failureClass: nonPassVerdict ? "manual-attention" : "none",
|
||||
rationale: `milestone validation verdict: ${params.verdict}`,
|
||||
findings: nonPassVerdict
|
||||
? [params.verdictRationale, params.remediationPlan ?? ""].filter(Boolean).join("\n")
|
||||
: "",
|
||||
}),
|
||||
});
|
||||
await gateRunner.run("milestone-validation-gates", { basePath, traceId, ... });
|
||||
} catch (err) { logWarning(...); }
|
||||
}
|
||||
```
|
||||
|
||||
The closure captured `params.verdict`, `params.verdictRationale`,
|
||||
`params.remediationPlan`, and the derived `nonPassVerdict` flag — all call-time
|
||||
values that differed on every invocation.
|
||||
|
||||
### After (222 lines)
|
||||
|
||||
```js
|
||||
import { getGateRegistry } from "../uok/gate-registry.js";
|
||||
|
||||
// module scope
|
||||
const milestoneValidationGatesGate = {
|
||||
id: "milestone-validation-gates",
|
||||
type: "verification",
|
||||
execute: async (ctx) => {
|
||||
const nonPassVerdict = ctx.verdict !== "pass";
|
||||
return {
|
||||
outcome: nonPassVerdict ? "manual-attention" : "pass",
|
||||
failureClass: nonPassVerdict ? "manual-attention" : "none",
|
||||
rationale: `milestone validation verdict: ${ctx.verdict}`,
|
||||
findings: nonPassVerdict
|
||||
? [ctx.verdictRationale, ctx.remediationPlan ?? ""].filter(Boolean).join("\n")
|
||||
: "",
|
||||
};
|
||||
},
|
||||
};
|
||||
|
||||
// inside handleValidateMilestone:
|
||||
if (gatesEnabled) {
|
||||
try {
|
||||
const registry = getGateRegistry();
|
||||
if (!registry.has(milestoneValidationGatesGate.id)) {
|
||||
registry.register(milestoneValidationGatesGate);
|
||||
}
|
||||
await registry.run(milestoneValidationGatesGate.id, {
|
||||
verdict: params.verdict,
|
||||
verdictRationale: params.verdictRationale,
|
||||
remediationPlan: params.remediationPlan,
|
||||
basePath, traceId: opts?.traceId ?? `...`, ...
|
||||
});
|
||||
} catch (err) { logWarning(...); }
|
||||
}
|
||||
```
|
||||
|
||||
The gate object is now registered once (guarded by `registry.has()`). The
|
||||
previously-closed-over locals are passed as a plain object to `registry.run()`,
|
||||
where they merge with the ctxFactory output (none here) before reaching
|
||||
`execute(ctx)`.
|
||||
|
||||
---
|
||||
|
||||
## Per-Gate Scope Checklist (5 remaining inline gates)
|
||||
|
||||
| Gate id | File | Captured locals | Notes |
|
||||
|---------|------|-----------------|-------|
|
||||
| `unit-verification-gate` | `auto/phases-finalize.js` | `deps`, `s`, `ctx`, `pi`, `pauseAuto` | Complex; `ctx` is the outer phase context — rename to `phaseCtx` in gate's `execute(ctx)` to avoid shadowing |
|
||||
| `milestone-validation-post-check` | `uok/auto-verification.js` | `persistGate` args from `runValidateMilestonePostCheck` | Straightforward; all are primitive strings/booleans |
|
||||
| `verification-gate` | `uok/auto-verification.js` | local verification result + helper formatting state | Multiple gates in same file — see Pitfall section |
|
||||
| `post-execution-checks` | `uok/auto-verification.js` | `postExecResult`, strict-mode prefs, derived blocking state | Three gates in same file — same Pitfall applies |
|
||||
| `planning-flow-gate` | `guided-flow.js` | `persistGate` args | Clean; same pattern as milestone-validation-post-check |
|
||||
|
||||
---
|
||||
|
||||
## When to Skip ctxFactory
|
||||
|
||||
If the gate's `execute()` needs no context at all (e.g. it reads only DB or
|
||||
env state), register without a factory and run with an empty override:
|
||||
|
||||
```js
|
||||
registry.register(myStatelessGate); // no ctxFactory
|
||||
await registry.run(myStatelessGate.id, { basePath, traceId, turnId });
|
||||
```
|
||||
|
||||
`uok/gate-registry-bootstrap.js` uses this form for `driftDetectionGate`, which
|
||||
sources everything from the DB.
|
||||
|
||||
---
|
||||
|
||||
## Pitfall: Multiple Call Sites, Same Gate Id, Different State
|
||||
|
||||
`uok/auto-verification.js` contains three distinct gate registrations
|
||||
(`verification-gate`, `milestone-validation-post-check`,
|
||||
`post-execution-checks`). Each has its own unique id, so there is no collision.
|
||||
|
||||
However, if a future refactor discovers two call sites in the SAME file that
|
||||
run the SAME gate id with different state shapes, choose one of:
|
||||
|
||||
**Option A — Two distinct gate ids** (preferred when logic differs):
|
||||
|
||||
```js
|
||||
const myGateA = { id: "my-gate-pass-path", type: "verification", execute: async (ctx) => ... };
|
||||
const myGateB = { id: "my-gate-fail-path", type: "verification", execute: async (ctx) => ... };
|
||||
```
|
||||
|
||||
Each id carries its own circuit-breaker state and audit trail — cleaner
|
||||
semantics at the cost of a registry entry per variant.
|
||||
|
||||
**Option B — Single id, discriminator field in ctx** (preferred when logic is
|
||||
identical but input differs):
|
||||
|
||||
```js
|
||||
const myGate = {
|
||||
id: "my-gate",
|
||||
type: "verification",
|
||||
execute: async (ctx) => {
|
||||
if (ctx.variant === "foo") { ... }
|
||||
else { ... }
|
||||
},
|
||||
};
|
||||
// call site A:
|
||||
await registry.run("my-gate", { variant: "foo", ... });
|
||||
// call site B:
|
||||
await registry.run("my-gate", { variant: "bar", ... });
|
||||
```
|
||||
|
||||
Trade-off: Option A gives separate circuit breakers (fail on path A does not
|
||||
suppress path B). Option B shares a circuit breaker — one pathological failure
|
||||
mode can block the other path. Prefer Option A when the two paths have
|
||||
meaningfully different failure domains.
|
||||
|
|
@ -18,6 +18,7 @@ import {
|
|||
truncateHead,
|
||||
} from "@singularity-forge/coding-agent";
|
||||
import { Text } from "@singularity-forge/tui";
|
||||
import { duplicateSearchSignal } from "../shared/loop-signals.js";
|
||||
import { recordRetrievalEvidence } from "../sf/retrieval-evidence.js";
|
||||
import { LRUTTLCache } from "./cache.js";
|
||||
import { formatSearchResults } from "./format.js";
|
||||
|
|
@ -710,6 +711,12 @@ export function registerSearchTool(pi) {
|
|||
if (cacheKey === lastSearchKey) {
|
||||
consecutiveDupeCount++;
|
||||
if (consecutiveDupeCount > MAX_CONSECUTIVE_DUPES) {
|
||||
const loopSignal = duplicateSearchSignal({
|
||||
query: params.query,
|
||||
provider,
|
||||
domain: params.domain ?? "",
|
||||
consecutiveDupeCount,
|
||||
});
|
||||
await recordWebSearchEvidence(projectRoot, {
|
||||
query: params.query,
|
||||
strategy: provider,
|
||||
|
|
@ -718,7 +725,7 @@ export function registerSearchTool(pi) {
|
|||
hitCount: 0,
|
||||
elapsedMs: Date.now() - startedAt,
|
||||
error: "Consecutive duplicate search detected",
|
||||
result: { consecutiveDupeCount },
|
||||
result: { consecutiveDupeCount, loopSignal },
|
||||
});
|
||||
return {
|
||||
content: [
|
||||
|
|
@ -731,6 +738,7 @@ export function registerSearchTool(pi) {
|
|||
details: {
|
||||
errorKind: "search_loop",
|
||||
error: "Consecutive duplicate search detected",
|
||||
loopSignal,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
|
|
|||
|
|
@ -550,12 +550,8 @@ export function registerHooks(pi, ecosystemHandlers = []) {
|
|||
// first search-tool query paid the full cold-build cost (5-20
|
||||
// minutes on a repo this size). Hooking it here closes that gap.
|
||||
try {
|
||||
const { ensureSiftIndexWarmup } = await import(
|
||||
"../code-intelligence.js"
|
||||
);
|
||||
const { loadEffectiveSFPreferences } = await import(
|
||||
"../preferences.js"
|
||||
);
|
||||
const { ensureSiftIndexWarmup } = await import("../code-intelligence.js");
|
||||
const { loadEffectiveSFPreferences } = await import("../preferences.js");
|
||||
const prefs = loadEffectiveSFPreferences()?.preferences;
|
||||
// Fire and forget — the warmup writes its state to
|
||||
// .sf/runtime/sift-index-warmup.json so other surfaces (doctor,
|
||||
|
|
@ -992,7 +988,11 @@ export function registerHooks(pi, ecosystemHandlers = []) {
|
|||
// ── Loop guard: block repeated identical tool calls ──
|
||||
const loopCheck = checkToolCallLoop(event.toolName, event.input);
|
||||
if (loopCheck.block) {
|
||||
return { block: true, reason: loopCheck.reason };
|
||||
return {
|
||||
block: true,
|
||||
reason: loopCheck.reason,
|
||||
details: { loopSignal: loopCheck.loopSignal },
|
||||
};
|
||||
}
|
||||
// ── Session file-touch recording ──────────────────────────────────────
|
||||
// Best-effort: path may be absent for non-file tools; recordFileTouch
|
||||
|
|
@ -1320,6 +1320,7 @@ export function registerHooks(pi, ecosystemHandlers = []) {
|
|||
details: {
|
||||
key: steer.key,
|
||||
severity: steer.severity,
|
||||
loopSignal: steer.loopSignal,
|
||||
toolName: event.toolName,
|
||||
toolCallId: event.toolCallId,
|
||||
},
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@
|
|||
* and when a different tool call breaks the streak.
|
||||
*/
|
||||
import { createHash } from "node:crypto";
|
||||
import { repeatedToolCallSignal } from "../supervision/loop-signals.js";
|
||||
|
||||
const MAX_CONSECUTIVE_IDENTICAL_CALLS = 4;
|
||||
/** Interactive/user-facing tools where even 1 duplicate is confusing. */
|
||||
|
|
@ -60,6 +61,12 @@ export function checkToolCallLoop(toolName, args) {
|
|||
? MAX_CONSECUTIVE_STRICT
|
||||
: MAX_CONSECUTIVE_IDENTICAL_CALLS;
|
||||
if (consecutiveCount > threshold) {
|
||||
const loopSignal = repeatedToolCallSignal({
|
||||
toolName,
|
||||
count: consecutiveCount,
|
||||
threshold,
|
||||
signature: sig,
|
||||
});
|
||||
return {
|
||||
block: true,
|
||||
reason:
|
||||
|
|
@ -67,6 +74,8 @@ export function checkToolCallLoop(toolName, args) {
|
|||
`with identical arguments. Blocking to prevent infinite loop. ` +
|
||||
`Try a different approach or modify your arguments.`,
|
||||
count: consecutiveCount,
|
||||
signature: sig,
|
||||
loopSignal,
|
||||
};
|
||||
}
|
||||
return { block: false, count: consecutiveCount };
|
||||
|
|
|
|||
|
|
@ -49,17 +49,16 @@
|
|||
|
||||
import { runSubagent } from "@singularity-forge/coding-agent";
|
||||
|
||||
import { SMOKE_FIXTURE_MILESTONE_ID } from "../smoke-fixture-constants.js";
|
||||
import { writeLastGreenRecord } from "../last-green.js";
|
||||
|
||||
import {
|
||||
buildCompleteMilestonePrompt,
|
||||
buildReassessRoadmapPrompt,
|
||||
buildValidateMilestonePrompt,
|
||||
} from "../auto-prompts.js";
|
||||
import { debugLog } from "../debug-logger.js";
|
||||
import {
|
||||
getMilestone,
|
||||
getSlice,
|
||||
isDbAvailable,
|
||||
} from "../sf-db.js";
|
||||
import { getMilestone, getSlice, isDbAvailable } from "../sf-db.js";
|
||||
|
||||
/**
|
||||
* Unit types this module supports for inline dispatch.
|
||||
|
|
@ -294,5 +293,35 @@ export async function runUnitInline(unitType, unitId, options = {}) {
|
|||
elapsedMs: Date.now() - startedAt,
|
||||
});
|
||||
|
||||
// Wire the last-green ledger for the smoke fixture milestone.
|
||||
// Non-fatal: ledger write failure does not corrupt the dispatch result.
|
||||
if (unitId === SMOKE_FIXTURE_MILESTONE_ID && result.ok === true) {
|
||||
try {
|
||||
const elapsedMs = Date.now() - startedAt;
|
||||
// Inject elapsedMs into the result object — writeLastGreenRecord
|
||||
// reads result.elapsedMs, but runSubagent doesn't populate it.
|
||||
const resultWithTiming = { ...result, elapsedMs };
|
||||
const written = writeLastGreenRecord(basePath, resultWithTiming, {
|
||||
milestoneId: unitId,
|
||||
unitType,
|
||||
traceId: null,
|
||||
});
|
||||
debugLog("run-unit-inline", {
|
||||
event: "last-green-write",
|
||||
unitType,
|
||||
unitId,
|
||||
written,
|
||||
});
|
||||
} catch (err) {
|
||||
// Log and continue — the dispatch result takes priority.
|
||||
debugLog("run-unit-inline", {
|
||||
event: "last-green-write-failed",
|
||||
unitType,
|
||||
unitId,
|
||||
error: err instanceof Error ? err.message : String(err),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,106 +1,16 @@
|
|||
/**
|
||||
* loop-signals.js — normalized loop/stuck supervision signals.
|
||||
* loop-signals.js — SF-local re-export for shared supervision signals.
|
||||
*
|
||||
* Purpose: give SF one contract for loop detectors, watchdogs, retry guards,
|
||||
* and tool churn signals so the autonomous supervisor can decide from one
|
||||
* shape instead of bespoke metadata branches.
|
||||
* Purpose: preserve the SF import path while allowing non-SF extensions to
|
||||
* emit the same normalized loop signal contract from `extensions/shared`.
|
||||
*
|
||||
* Consumer: autonomous unit supervision and detector adapters.
|
||||
* Consumer: SF autonomous supervision modules.
|
||||
*/
|
||||
|
||||
const VALID_SCOPES = new Set([
|
||||
"process",
|
||||
"session",
|
||||
"unit",
|
||||
"tool",
|
||||
"provider",
|
||||
"artifact",
|
||||
]);
|
||||
const VALID_SEVERITIES = new Set(["info", "warning", "fail", "pause"]);
|
||||
const VALID_ACTIONS = new Set([
|
||||
"continue",
|
||||
"retry",
|
||||
"switch-model",
|
||||
"pause",
|
||||
"block",
|
||||
"recover",
|
||||
]);
|
||||
|
||||
function pick(value, allowed, fallback) {
|
||||
return allowed.has(value) ? value : fallback;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a normalized loop supervision signal.
|
||||
*
|
||||
* Purpose: preserve detector-specific evidence while making action, severity,
|
||||
* scope, and unit identity machine-readable for the shared supervisor.
|
||||
*
|
||||
* Consumer: detector adapters and auto-runaway-guard.
|
||||
*/
|
||||
export function createLoopSignal(input = {}) {
|
||||
const unit =
|
||||
input.unit && typeof input.unit === "object"
|
||||
? {
|
||||
type: String(input.unit.type ?? ""),
|
||||
id: String(input.unit.id ?? ""),
|
||||
}
|
||||
: undefined;
|
||||
return {
|
||||
scope: pick(input.scope, VALID_SCOPES, "unit"),
|
||||
kind: String(input.kind ?? "unknown-loop-signal"),
|
||||
severity: pick(input.severity, VALID_SEVERITIES, "warning"),
|
||||
...(unit?.type && unit?.id ? { unit } : {}),
|
||||
evidence:
|
||||
input.evidence && typeof input.evidence === "object"
|
||||
? input.evidence
|
||||
: { value: input.evidence ?? null },
|
||||
recommendedAction: pick(input.recommendedAction, VALID_ACTIONS, "pause"),
|
||||
...(input.message ? { message: String(input.message) } : {}),
|
||||
...(input.source ? { source: String(input.source) } : {}),
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert a Wiggums detector sweep hit into a normalized loop signal.
|
||||
*
|
||||
* Purpose: let the periodic detector family feed the same supervisor contract
|
||||
* as direct runaway and zero-progress checks.
|
||||
*
|
||||
* Consumer: auto-runaway-guard when consuming buffered detector sweep results.
|
||||
*/
|
||||
export function detectorSweepSignal(fired, unitType, unitId, now = Date.now()) {
|
||||
return createLoopSignal({
|
||||
scope: "unit",
|
||||
kind: `wiggums:${fired?.name ?? "unknown"}`,
|
||||
severity: "fail",
|
||||
unit: { type: unitType, id: unitId },
|
||||
evidence: {
|
||||
detector: fired?.name ?? "unknown",
|
||||
signature: fired?.signature ?? null,
|
||||
failedAt: now,
|
||||
},
|
||||
recommendedAction: "pause",
|
||||
source: "periodic-detector-sweep",
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert a zero-progress detector hit into a normalized loop signal.
|
||||
*
|
||||
* Purpose: make the strongest unit-churn signal available through the same
|
||||
* contract as Wiggums detectors and future tool/provider loop producers.
|
||||
*
|
||||
* Consumer: auto-runaway-guard.
|
||||
*/
|
||||
export function zeroProgressSignal(input) {
|
||||
return createLoopSignal({
|
||||
scope: "unit",
|
||||
kind: "zero-progress",
|
||||
severity: "fail",
|
||||
unit: { type: input.unitType, id: input.unitId },
|
||||
evidence: input.evidence,
|
||||
recommendedAction: "pause",
|
||||
source: "zero-progress-detector",
|
||||
});
|
||||
}
|
||||
export {
|
||||
createLoopSignal,
|
||||
detectorSweepSignal,
|
||||
duplicateSearchSignal,
|
||||
repeatedToolCallSignal,
|
||||
repeatedToolFailureSignal,
|
||||
zeroProgressSignal,
|
||||
} from "../../shared/loop-signals.js";
|
||||
|
|
|
|||
|
|
@ -2,6 +2,9 @@ import { describe, expect, test } from "vitest";
|
|||
import {
|
||||
createLoopSignal,
|
||||
detectorSweepSignal,
|
||||
duplicateSearchSignal,
|
||||
repeatedToolCallSignal,
|
||||
repeatedToolFailureSignal,
|
||||
zeroProgressSignal,
|
||||
} from "../supervision/loop-signals.js";
|
||||
|
||||
|
|
@ -62,4 +65,75 @@ describe("loop supervision signals", () => {
|
|||
evidence: { toolCallsTotal: 25 },
|
||||
});
|
||||
});
|
||||
|
||||
test("repeatedToolCallSignal_when_guard_blocks_preserves_tool_evidence", () => {
|
||||
const signal = repeatedToolCallSignal({
|
||||
toolName: "ask_user_questions",
|
||||
count: 2,
|
||||
threshold: 1,
|
||||
signature: "abc123",
|
||||
});
|
||||
|
||||
expect(signal).toMatchObject({
|
||||
scope: "tool",
|
||||
kind: "tool-call:duplicate",
|
||||
severity: "fail",
|
||||
recommendedAction: "block",
|
||||
source: "tool-call-loop-guard",
|
||||
evidence: {
|
||||
toolName: "ask_user_questions",
|
||||
count: 2,
|
||||
threshold: 1,
|
||||
signature: "abc123",
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
test("repeatedToolFailureSignal_when_watchdog_steers_preserves_command_evidence", () => {
|
||||
const signal = repeatedToolFailureSignal({
|
||||
toolName: "bash",
|
||||
command: "npm test",
|
||||
count: 2,
|
||||
windowMs: 600000,
|
||||
excerpt: "failed",
|
||||
});
|
||||
|
||||
expect(signal).toMatchObject({
|
||||
scope: "tool",
|
||||
kind: "tool-result:repeated-failure",
|
||||
severity: "warning",
|
||||
recommendedAction: "continue",
|
||||
source: "tool-watchdog",
|
||||
evidence: {
|
||||
toolName: "bash",
|
||||
command: "npm test",
|
||||
count: 2,
|
||||
windowMs: 600000,
|
||||
excerpt: "failed",
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
test("duplicateSearchSignal_when_search_guard_blocks_preserves_query_evidence", () => {
|
||||
const signal = duplicateSearchSignal({
|
||||
query: "sf autonomous loops",
|
||||
provider: "tavily",
|
||||
domain: "example.com",
|
||||
consecutiveDupeCount: 2,
|
||||
});
|
||||
|
||||
expect(signal).toMatchObject({
|
||||
scope: "tool",
|
||||
kind: "search:duplicate-query",
|
||||
severity: "fail",
|
||||
recommendedAction: "block",
|
||||
source: "search-the-web",
|
||||
evidence: {
|
||||
query: "sf autonomous loops",
|
||||
provider: "tavily",
|
||||
domain: "example.com",
|
||||
consecutiveDupeCount: 2,
|
||||
},
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -11,12 +11,7 @@
|
|||
* DB schema drift, mock shape mismatch).
|
||||
*/
|
||||
import { afterEach, describe, expect, it, vi } from "vitest";
|
||||
import {
|
||||
mkdtempSync,
|
||||
writeFileSync,
|
||||
mkdirSync,
|
||||
rmSync,
|
||||
} from "node:fs";
|
||||
import { mkdtempSync, writeFileSync, mkdirSync, rmSync } from "node:fs";
|
||||
import { join } from "node:path";
|
||||
import { tmpdir } from "node:os";
|
||||
|
||||
|
|
@ -29,6 +24,13 @@ import {
|
|||
import { DispatchLayer } from "../dispatch/dispatch-layer.js";
|
||||
import { runSubagent } from "@singularity-forge/coding-agent";
|
||||
import { SMOKE_FIXTURE_MILESTONE_ID } from "../smoke-fixture-constants.js";
|
||||
import { readLastGreenRecord, clearLastGreen } from "../last-green.js";
|
||||
|
||||
// Enable smoke_gate so last-green writes are not suppressed.
|
||||
// Without this, writeLastGreenRecord returns false and the ledger tests would fail.
|
||||
vi.mock("../experimental.js", () => ({
|
||||
getExperimentalFlag: vi.fn(() => true),
|
||||
}));
|
||||
|
||||
// Module-level mock — vitest hoists vi.mock automatically.
|
||||
//
|
||||
|
|
@ -68,12 +70,34 @@ function makeProject() {
|
|||
const dir = mkdtempSync(join(tmpdir(), "sf-smoke-fixture-"));
|
||||
tmpDirs.push(dir);
|
||||
|
||||
mkdirSync(join(dir, ".sf", "milestones", SMOKE_FIXTURE_MILESTONE_ID, "slices", "S01", "tasks"), {
|
||||
recursive: true,
|
||||
});
|
||||
mkdirSync(join(dir, ".sf", "milestones", SMOKE_FIXTURE_MILESTONE_ID, "slices", "S02", "tasks"), {
|
||||
recursive: true,
|
||||
});
|
||||
mkdirSync(
|
||||
join(
|
||||
dir,
|
||||
".sf",
|
||||
"milestones",
|
||||
SMOKE_FIXTURE_MILESTONE_ID,
|
||||
"slices",
|
||||
"S01",
|
||||
"tasks",
|
||||
),
|
||||
{
|
||||
recursive: true,
|
||||
},
|
||||
);
|
||||
mkdirSync(
|
||||
join(
|
||||
dir,
|
||||
".sf",
|
||||
"milestones",
|
||||
SMOKE_FIXTURE_MILESTONE_ID,
|
||||
"slices",
|
||||
"S02",
|
||||
"tasks",
|
||||
),
|
||||
{
|
||||
recursive: true,
|
||||
},
|
||||
);
|
||||
|
||||
openDatabase(join(dir, ".sf", "sf.db"));
|
||||
|
||||
|
|
@ -116,13 +140,27 @@ function makeProject() {
|
|||
// ROADMAP.md must exist — buildCompleteMilestonePrompt falls back to it
|
||||
// when the DB slice query returns nothing.
|
||||
writeFileSync(
|
||||
join(dir, ".sf", "milestones", SMOKE_FIXTURE_MILESTONE_ID, `${SMOKE_FIXTURE_MILESTONE_ID}-ROADMAP.md`),
|
||||
join(
|
||||
dir,
|
||||
".sf",
|
||||
"milestones",
|
||||
SMOKE_FIXTURE_MILESTONE_ID,
|
||||
`${SMOKE_FIXTURE_MILESTONE_ID}-ROADMAP.md`,
|
||||
),
|
||||
"# SMOKE_FIXTURE_MILESTONE_ID: Smoke test milestone\n\n## S01: Smoke slice (complete)\n\n## S02: Smoke slice (active)\n",
|
||||
);
|
||||
|
||||
// S01-SUMMARY.md for slice summary excerpt path.
|
||||
writeFileSync(
|
||||
join(dir, ".sf", "milestones", SMOKE_FIXTURE_MILESTONE_ID, "slices", "S01", "S01-SUMMARY.md"),
|
||||
join(
|
||||
dir,
|
||||
".sf",
|
||||
"milestones",
|
||||
SMOKE_FIXTURE_MILESTONE_ID,
|
||||
"slices",
|
||||
"S01",
|
||||
"S01-SUMMARY.md",
|
||||
),
|
||||
"# S01: Smoke slice (complete)\n\n**Status:** complete\n",
|
||||
);
|
||||
|
||||
|
|
@ -206,4 +244,86 @@ describe("M048/S01 — smoke fixture", () => {
|
|||
expect(result.exitCode).toBe(1);
|
||||
expect(result.stderr).toMatch(/must be one of/);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// S02: last-green ledger wiring tests
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
it("ledger is written after successful fixture dispatch", async () => {
|
||||
const dir = makeProject();
|
||||
openDatabase(join(dir, ".sf", "sf.db"));
|
||||
|
||||
// Clear any stale ledger from prior runs.
|
||||
clearLastGreen(dir);
|
||||
|
||||
const layer = new DispatchLayer(dir);
|
||||
const result = await layer.dispatch({
|
||||
isolation: "full",
|
||||
coordination: "managed",
|
||||
scope: "inline",
|
||||
mode: "single",
|
||||
unitType: "complete-milestone",
|
||||
unitId: SMOKE_FIXTURE_MILESTONE_ID,
|
||||
});
|
||||
|
||||
// The dispatch must have succeeded (mocked runSubagent always returns ok=true).
|
||||
expect(result.ok).toBe(true);
|
||||
|
||||
// Read the ledger — writeLastGreenRecord guards on smoke_gate flag,
|
||||
// which T01 set to true via setExperimentalFlag in last-green.test.mjs setup.
|
||||
const ledger = readLastGreenRecord(dir);
|
||||
expect(ledger).not.toBeNull();
|
||||
expect(Array.isArray(ledger)).toBe(true);
|
||||
expect(ledger.length).toBeGreaterThanOrEqual(1);
|
||||
|
||||
// Verify the most-recent entry has the expected fixture shape.
|
||||
const entry = ledger[0];
|
||||
expect(entry.fixture.milestoneId).toBe(SMOKE_FIXTURE_MILESTONE_ID);
|
||||
expect(entry.fixture.unitType).toBe("complete-milestone");
|
||||
expect(entry.result.ok).toBe(true);
|
||||
expect(entry.result.exitCode).toBe(0);
|
||||
expect(entry.result.outputLength).toBeGreaterThan(0);
|
||||
expect(entry.result.elapsedMs).toBeGreaterThanOrEqual(0);
|
||||
expect(typeof entry.checksum).toBe("string");
|
||||
expect(entry.checksum.length).toBeGreaterThan(0);
|
||||
expect(typeof entry.timestamp).toBe("string");
|
||||
});
|
||||
|
||||
it("ledger is NOT written when dispatch fails", async () => {
|
||||
const dir = makeProject();
|
||||
openDatabase(join(dir, ".sf", "sf.db"));
|
||||
|
||||
// Seed with a known-good entry so we can tell it wasn't replaced.
|
||||
const layer = new DispatchLayer(dir);
|
||||
|
||||
// Override the mock to return a failure result.
|
||||
vi.mocked(runSubagent).mockResolvedValueOnce({
|
||||
ok: false,
|
||||
exitCode: 1,
|
||||
stderr: "test error",
|
||||
output: "",
|
||||
});
|
||||
|
||||
const result = await layer.dispatch({
|
||||
isolation: "full",
|
||||
coordination: "managed",
|
||||
scope: "inline",
|
||||
mode: "single",
|
||||
unitType: "complete-milestone",
|
||||
unitId: SMOKE_FIXTURE_MILESTONE_ID,
|
||||
});
|
||||
|
||||
// The dispatch must have failed.
|
||||
expect(result.ok).toBe(false);
|
||||
expect(result.exitCode).toBe(1);
|
||||
|
||||
// Ledger should be null (no entry written for failed dispatch),
|
||||
// or empty (any prior entry preserved but not appended).
|
||||
const ledger = readLastGreenRecord(dir);
|
||||
// A null return means the file doesn't exist; an empty array means it
|
||||
// exists but has no entries — both are acceptable for "not written".
|
||||
expect(
|
||||
ledger === null || (Array.isArray(ledger) && ledger.length === 0),
|
||||
).toBe(true);
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -0,0 +1,37 @@
|
|||
import { describe, expect, test } from "vitest";
|
||||
import {
|
||||
checkToolCallLoop,
|
||||
resetToolCallLoopGuard,
|
||||
} from "../bootstrap/tool-call-loop-guard.js";
|
||||
|
||||
describe("tool-call loop guard", () => {
|
||||
test("checkToolCallLoop_when_identical_tool_repeats_returns_loop_signal", () => {
|
||||
resetToolCallLoopGuard();
|
||||
|
||||
const first = checkToolCallLoop("ask_user_questions", {
|
||||
questions: ["continue?"],
|
||||
});
|
||||
const second = checkToolCallLoop("ask_user_questions", {
|
||||
questions: ["continue?"],
|
||||
});
|
||||
|
||||
expect(first.block).toBe(false);
|
||||
expect(second).toMatchObject({
|
||||
block: true,
|
||||
count: 2,
|
||||
loopSignal: {
|
||||
scope: "tool",
|
||||
kind: "tool-call:duplicate",
|
||||
severity: "fail",
|
||||
recommendedAction: "block",
|
||||
source: "tool-call-loop-guard",
|
||||
evidence: {
|
||||
toolName: "ask_user_questions",
|
||||
count: 2,
|
||||
threshold: 1,
|
||||
},
|
||||
},
|
||||
});
|
||||
expect(second.loopSignal.evidence.signature).toBe(second.signature);
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,40 @@
|
|||
import { describe, expect, test } from "vitest";
|
||||
import { observeToolResult, resetToolWatchdog } from "../tool-watchdog.js";
|
||||
|
||||
function failingBash(command, text = "command failed") {
|
||||
return {
|
||||
toolName: "bash",
|
||||
input: { command },
|
||||
isError: true,
|
||||
content: [{ type: "text", text }],
|
||||
};
|
||||
}
|
||||
|
||||
describe("tool watchdog loop signal", () => {
|
||||
test("observeToolResult_when_same_bash_command_fails_twice_returns_loop_signal", () => {
|
||||
resetToolWatchdog();
|
||||
|
||||
const first = observeToolResult(failingBash("npm run missing", "first"));
|
||||
const second = observeToolResult(failingBash("npm run missing", "second"));
|
||||
|
||||
expect(first).toBeUndefined();
|
||||
expect(second).toMatchObject({
|
||||
key: "repeat-failure:npm run missing",
|
||||
severity: "warning",
|
||||
loopSignal: {
|
||||
scope: "tool",
|
||||
kind: "tool-result:repeated-failure",
|
||||
severity: "warning",
|
||||
recommendedAction: "continue",
|
||||
source: "tool-watchdog",
|
||||
evidence: {
|
||||
toolName: "bash",
|
||||
command: "npm run missing",
|
||||
count: 2,
|
||||
windowMs: 600000,
|
||||
excerpt: "second",
|
||||
},
|
||||
},
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
@ -96,11 +96,7 @@ test("guarantee_normal_exit_exactly_one_exit_per_runId", async () => {
|
|||
assert.equal(enters[0].runId, exits[0].runId, "enter and exit share runId");
|
||||
|
||||
const exitsByRunId = countExitsByRunId(events);
|
||||
assert.equal(
|
||||
exitsByRunId.get(enters[0].runId),
|
||||
1,
|
||||
"only one exit per runId",
|
||||
);
|
||||
assert.equal(exitsByRunId.get(enters[0].runId), 1, "only one exit per runId");
|
||||
});
|
||||
|
||||
// ── Case 2: Abnormal exit via finally (no conditional site) ──────────────────
|
||||
|
|
@ -295,7 +291,11 @@ test("parity_report_handles_duplicate_exits_per_runId_as_balanced", () => {
|
|||
const report = buildParityReport(events, "/tmp/uok-parity.jsonl", NOW);
|
||||
|
||||
// enterEvents=1, exitEvents=2 → not in unmatchedRuns (exitEvents >= enterEvents)
|
||||
assert.equal(report.unmatchedRuns.length, 0, "no unmatched runs with double-exit");
|
||||
assert.equal(
|
||||
report.unmatchedRuns.length,
|
||||
0,
|
||||
"no unmatched runs with double-exit",
|
||||
);
|
||||
assert.equal(report.missingExitEvents, 0);
|
||||
// statuses bucket has both exits counted
|
||||
assert.deepEqual(report.statuses, { signal: 1, ok: 1 });
|
||||
|
|
|
|||
|
|
@ -134,3 +134,62 @@ test("search_the_web_when_successful_records_retrieval_evidence", async () => {
|
|||
},
|
||||
]);
|
||||
});
|
||||
|
||||
test("search_the_web_when_query_repeats_returns_loop_signal", async () => {
|
||||
const project = makeProject();
|
||||
process.chdir(project);
|
||||
process.env.BRAVE_API_KEY = "test-brave-key";
|
||||
process.env.TAVILY_API_KEY = "";
|
||||
process.env.MINIMAX_CODE_PLAN_KEY = "";
|
||||
process.env.MINIMAX_CODING_API_KEY = "";
|
||||
process.env.MINIMAX_API_KEY = "";
|
||||
process.env.SERPER_API_KEY = "";
|
||||
process.env.EXA_API_KEY = "";
|
||||
process.env.OLLAMA_API_KEY = "";
|
||||
assert.equal(openDatabase(join(project, ".sf", "sf.db")), true);
|
||||
globalThis.fetch = async () => ({
|
||||
ok: true,
|
||||
headers: { get: () => null },
|
||||
json: async () => ({
|
||||
web: {
|
||||
results: [
|
||||
{
|
||||
title: "Example Result",
|
||||
url: "https://example.com/result",
|
||||
description: "A result for testing.",
|
||||
},
|
||||
],
|
||||
},
|
||||
}),
|
||||
});
|
||||
|
||||
const tool = captureSearchTool();
|
||||
await tool.execute("call-1", {
|
||||
query: "duplicate query",
|
||||
count: 1,
|
||||
});
|
||||
const result = await tool.execute("call-2", {
|
||||
query: "duplicate query",
|
||||
count: 1,
|
||||
});
|
||||
|
||||
assert.equal(result.isError, true);
|
||||
assert.equal(result.details.errorKind, "search_loop");
|
||||
assert.deepEqual(result.details.loopSignal, {
|
||||
scope: "tool",
|
||||
kind: "search:duplicate-query",
|
||||
severity: "fail",
|
||||
evidence: {
|
||||
query: "duplicate query",
|
||||
provider: "brave",
|
||||
domain: "",
|
||||
consecutiveDupeCount: 2,
|
||||
},
|
||||
recommendedAction: "block",
|
||||
source: "search-the-web",
|
||||
});
|
||||
|
||||
const rows = getRetrievalEvidence(5);
|
||||
assert.equal(rows[0].status, "search_loop");
|
||||
assert.deepEqual(rows[0].result.loopSignal, result.details.loopSignal);
|
||||
});
|
||||
|
|
|
|||
|
|
@ -3,6 +3,8 @@
|
|||
// Observes tool-result events at runtime and emits a steer (info /
|
||||
// warning) when patterns like a repeated bash failure or a too-large
|
||||
// tool result indicate the agent is stuck.
|
||||
import { repeatedToolFailureSignal } from "./supervision/loop-signals.js";
|
||||
|
||||
const seenKeys = new Set();
|
||||
const bashFailures = new Map();
|
||||
const MAX_RESULT_CHARS = 6000;
|
||||
|
|
@ -97,12 +99,21 @@ function maybeRepeatedFailureSteer(event, text) {
|
|||
: 1;
|
||||
bashFailures.set(command, { count, lastAt: now });
|
||||
if (count < 2) return undefined;
|
||||
const excerpt = text.slice(0, 1200);
|
||||
const loopSignal = repeatedToolFailureSignal({
|
||||
toolName: event.toolName,
|
||||
command,
|
||||
count,
|
||||
windowMs: REPEAT_FAILURE_WINDOW_MS,
|
||||
excerpt,
|
||||
});
|
||||
return once({
|
||||
key: `repeat-failure:${command}`,
|
||||
severity: "warning",
|
||||
loopSignal,
|
||||
content: buildSteer(
|
||||
"repeated failing command",
|
||||
`The same bash command has failed ${count} times in this autonomous mode unit:\n\n${command}\n\nStop retrying it as-is. Read the error, inspect the relevant files/config, and choose a different repair or verification path.\n\nLatest result excerpt:\n${text.slice(0, 1200)}`,
|
||||
`The same bash command has failed ${count} times in this autonomous mode unit:\n\n${command}\n\nStop retrying it as-is. Read the error, inspect the relevant files/config, and choose a different repair or verification path.\n\nLatest result excerpt:\n${excerpt}`,
|
||||
),
|
||||
});
|
||||
}
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ import {
|
|||
} from "../sf-db.js";
|
||||
import { invalidateStateCache } from "../state.js";
|
||||
import { resolveUokFlags } from "../uok/flags.js";
|
||||
import { UokGateRunner } from "../uok/gate-runner.js";
|
||||
import { getGateRegistry } from "../uok/gate-registry.js";
|
||||
import {
|
||||
isValidMilestoneVerdict,
|
||||
VALIDATION_VERDICTS,
|
||||
|
|
@ -30,6 +30,35 @@ import {
|
|||
import { logWarning } from "../workflow-logger.js";
|
||||
import { resolveCanonicalMilestoneRoot } from "../worktree-manager.js";
|
||||
|
||||
/**
|
||||
* Module-scope gate definition for ADR-0075 UokGateRegistry.
|
||||
*
|
||||
* ctx shape (supplied via ctxFactory at call site):
|
||||
* verdict {string} — raw milestone verdict ("pass" | "fail" | …)
|
||||
* verdictRationale {string|undefined}
|
||||
* remediationPlan {string|undefined}
|
||||
*
|
||||
* The execute() function is pure relative to ctx — it no longer closes over
|
||||
* call-site locals, making the gate safe to register once at module load time.
|
||||
*/
|
||||
const milestoneValidationGatesGate = {
|
||||
id: "milestone-validation-gates",
|
||||
type: "verification",
|
||||
execute: async (ctx) => {
|
||||
const nonPassVerdict = ctx.verdict !== "pass";
|
||||
return {
|
||||
outcome: nonPassVerdict ? "manual-attention" : "pass",
|
||||
failureClass: nonPassVerdict ? "manual-attention" : "none",
|
||||
rationale: `milestone validation verdict: ${ctx.verdict}`,
|
||||
findings: nonPassVerdict
|
||||
? [ctx.verdictRationale, ctx.remediationPlan ?? ""]
|
||||
.filter(Boolean)
|
||||
.join("\n")
|
||||
: "",
|
||||
};
|
||||
},
|
||||
};
|
||||
|
||||
function renderValidationMarkdown(params) {
|
||||
let md = `<!-- generated from .sf/sf.db — do not edit directly; use validate_milestone tool -->
|
||||
---
|
||||
|
|
@ -159,23 +188,16 @@ export async function handleValidateMilestone(params, basePath, opts) {
|
|||
const gatesEnabled = opts?.uokGatesEnabled ?? resolveUokFlags(prefs).gates;
|
||||
if (gatesEnabled) {
|
||||
try {
|
||||
const gateRunner = new UokGateRunner();
|
||||
const nonPassVerdict = params.verdict !== "pass";
|
||||
gateRunner.register({
|
||||
id: "milestone-validation-gates",
|
||||
type: "verification",
|
||||
execute: async () => ({
|
||||
outcome: nonPassVerdict ? "manual-attention" : "pass",
|
||||
failureClass: nonPassVerdict ? "manual-attention" : "none",
|
||||
rationale: `milestone validation verdict: ${params.verdict}`,
|
||||
findings: nonPassVerdict
|
||||
? [params.verdictRationale, params.remediationPlan ?? ""]
|
||||
.filter(Boolean)
|
||||
.join("\n")
|
||||
: "",
|
||||
}),
|
||||
});
|
||||
await gateRunner.run("milestone-validation-gates", {
|
||||
const registry = getGateRegistry();
|
||||
if (!registry.has(milestoneValidationGatesGate.id)) {
|
||||
registry.register(milestoneValidationGatesGate);
|
||||
}
|
||||
await registry.run(milestoneValidationGatesGate.id, {
|
||||
// ctxFactory: lifted closure state supplied as per-call override
|
||||
verdict: params.verdict,
|
||||
verdictRationale: params.verdictRationale,
|
||||
remediationPlan: params.remediationPlan,
|
||||
// runner ctx fields
|
||||
basePath,
|
||||
traceId: opts?.traceId ?? `validate-milestone:${params.milestoneId}`,
|
||||
turnId: opts?.turnId ?? `${params.milestoneId}:validate`,
|
||||
|
|
|
|||
|
|
@ -16,7 +16,8 @@ const registry = getGateRegistry();
|
|||
// SKIP milestone-validation-post-check: execute() closes over persistGate arguments from runValidateMilestonePostCheck.
|
||||
// SKIP verification-gate: execute() closes over the local verification result and helper formatting state from uok/auto-verification.js.
|
||||
// SKIP post-execution-checks: execute() closes over postExecResult, strict-mode prefs, and derived blocking state from uok/auto-verification.js.
|
||||
// SKIP milestone-validation-gates: execute() closes over validate_milestone params and derived verdict/remediation state.
|
||||
// milestone-validation-gates: lifted to module scope in tools/validate-milestone.js (R079 pilot).
|
||||
// Registered lazily at call site via registry.has() guard; ctxFactory pattern supplies verdict/rationale/remediationPlan.
|
||||
// SKIP planning-flow-gate: execute() closes over persistGate arguments from guided-flow.js.
|
||||
registry.register(driftDetectionGate);
|
||||
|
||||
|
|
|
|||
179
src/resources/extensions/shared/loop-signals.js
Normal file
179
src/resources/extensions/shared/loop-signals.js
Normal file
|
|
@ -0,0 +1,179 @@
|
|||
/**
|
||||
* loop-signals.js — normalized loop/stuck supervision signals.
|
||||
*
|
||||
* Purpose: give extensions one contract for loop detectors, watchdogs, retry
|
||||
* guards, and tool churn signals so supervisors can decide from one shape
|
||||
* instead of bespoke metadata branches.
|
||||
*
|
||||
* Consumer: SF autonomy supervision and cross-extension loop guards.
|
||||
*/
|
||||
|
||||
const VALID_SCOPES = new Set([
|
||||
"process",
|
||||
"session",
|
||||
"unit",
|
||||
"tool",
|
||||
"provider",
|
||||
"artifact",
|
||||
]);
|
||||
const VALID_SEVERITIES = new Set(["info", "warning", "fail", "pause"]);
|
||||
const VALID_ACTIONS = new Set([
|
||||
"continue",
|
||||
"retry",
|
||||
"switch-model",
|
||||
"pause",
|
||||
"block",
|
||||
"recover",
|
||||
]);
|
||||
|
||||
function pick(value, allowed, fallback) {
|
||||
return allowed.has(value) ? value : fallback;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a normalized loop supervision signal.
|
||||
*
|
||||
* Purpose: preserve producer-specific evidence while making action, severity,
|
||||
* scope, and unit identity machine-readable for shared supervision.
|
||||
*
|
||||
* Consumer: detector adapters, tool guards, provider guards, and watchdogs.
|
||||
*/
|
||||
export function createLoopSignal(input = {}) {
|
||||
const unit =
|
||||
input.unit && typeof input.unit === "object"
|
||||
? {
|
||||
type: String(input.unit.type ?? ""),
|
||||
id: String(input.unit.id ?? ""),
|
||||
}
|
||||
: undefined;
|
||||
return {
|
||||
scope: pick(input.scope, VALID_SCOPES, "unit"),
|
||||
kind: String(input.kind ?? "unknown-loop-signal"),
|
||||
severity: pick(input.severity, VALID_SEVERITIES, "warning"),
|
||||
...(unit?.type && unit?.id ? { unit } : {}),
|
||||
evidence:
|
||||
input.evidence && typeof input.evidence === "object"
|
||||
? input.evidence
|
||||
: { value: input.evidence ?? null },
|
||||
recommendedAction: pick(input.recommendedAction, VALID_ACTIONS, "pause"),
|
||||
...(input.message ? { message: String(input.message) } : {}),
|
||||
...(input.source ? { source: String(input.source) } : {}),
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert a Wiggums detector sweep hit into a normalized loop signal.
|
||||
*
|
||||
* Purpose: let the periodic detector family feed the same supervisor contract
|
||||
* as direct runaway and zero-progress checks.
|
||||
*
|
||||
* Consumer: auto-runaway-guard when consuming buffered detector sweep results.
|
||||
*/
|
||||
export function detectorSweepSignal(fired, unitType, unitId, now = Date.now()) {
|
||||
return createLoopSignal({
|
||||
scope: "unit",
|
||||
kind: `wiggums:${fired?.name ?? "unknown"}`,
|
||||
severity: "fail",
|
||||
unit: { type: unitType, id: unitId },
|
||||
evidence: {
|
||||
detector: fired?.name ?? "unknown",
|
||||
signature: fired?.signature ?? null,
|
||||
failedAt: now,
|
||||
},
|
||||
recommendedAction: "pause",
|
||||
source: "periodic-detector-sweep",
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert a zero-progress detector hit into a normalized loop signal.
|
||||
*
|
||||
* Purpose: make the strongest unit-churn signal available through the same
|
||||
* contract as Wiggums detectors and future tool/provider loop producers.
|
||||
*
|
||||
* Consumer: auto-runaway-guard.
|
||||
*/
|
||||
export function zeroProgressSignal(input) {
|
||||
return createLoopSignal({
|
||||
scope: "unit",
|
||||
kind: "zero-progress",
|
||||
severity: "fail",
|
||||
unit: { type: input.unitType, id: input.unitId },
|
||||
evidence: input.evidence,
|
||||
recommendedAction: "pause",
|
||||
source: "zero-progress-detector",
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert a repeated identical tool call into a normalized loop signal.
|
||||
*
|
||||
* Purpose: expose pre-tool blocking as the same signal class as unit-level
|
||||
* stuck detectors instead of hiding it inside a string reason.
|
||||
*
|
||||
* Consumer: SF tool-call loop guard.
|
||||
*/
|
||||
export function repeatedToolCallSignal(input) {
|
||||
return createLoopSignal({
|
||||
scope: "tool",
|
||||
kind: "tool-call:duplicate",
|
||||
severity: "fail",
|
||||
evidence: {
|
||||
toolName: input.toolName,
|
||||
count: input.count,
|
||||
threshold: input.threshold,
|
||||
signature: input.signature,
|
||||
},
|
||||
recommendedAction: "block",
|
||||
source: "tool-call-loop-guard",
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert a repeated failing shell command into a normalized loop signal.
|
||||
*
|
||||
* Purpose: make tool-result watchdog steering machine-readable while preserving
|
||||
* the existing human steering message.
|
||||
*
|
||||
* Consumer: SF tool watchdog.
|
||||
*/
|
||||
export function repeatedToolFailureSignal(input) {
|
||||
return createLoopSignal({
|
||||
scope: "tool",
|
||||
kind: "tool-result:repeated-failure",
|
||||
severity: "warning",
|
||||
evidence: {
|
||||
toolName: input.toolName,
|
||||
command: input.command,
|
||||
count: input.count,
|
||||
windowMs: input.windowMs,
|
||||
excerpt: input.excerpt,
|
||||
},
|
||||
recommendedAction: "continue",
|
||||
source: "tool-watchdog",
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert a duplicate web-search query into a normalized loop signal.
|
||||
*
|
||||
* Purpose: let non-SF search tools report query loops through the shared
|
||||
* extension signal contract instead of only returning an opaque tool error.
|
||||
*
|
||||
* Consumer: search-the-web tool loop guard.
|
||||
*/
|
||||
export function duplicateSearchSignal(input) {
|
||||
return createLoopSignal({
|
||||
scope: "tool",
|
||||
kind: "search:duplicate-query",
|
||||
severity: "fail",
|
||||
evidence: {
|
||||
query: input.query,
|
||||
provider: input.provider,
|
||||
domain: input.domain ?? "",
|
||||
consecutiveDupeCount: input.consecutiveDupeCount,
|
||||
},
|
||||
recommendedAction: "block",
|
||||
source: "search-the-web",
|
||||
});
|
||||
}
|
||||
Loading…
Add table
Reference in a new issue