singularity-forge/src/headless-events.ts
Mikael Hugo 2d465b11fd test: add comprehensive Phase 1 coverage for dispatch loop (48 tests)
- Add metrics.test.ts: 21 tests for unit outcome recording, model performance tracking, fire-and-forget safety, persistence, error handling
- Add triage-self-feedback.test.ts: 27 tests for report classification, confidence thresholds, auto-fix, deduplication, severity categorization, async safety

Purpose: Increase coverage of critical autonomous dispatch paths from 40% to 60%+.
Covers fire-and-forget patterns (metrics recording and auto-fix application must not
block dispatch), concurrent recording safety, graceful degradation on error.

Tests validate:
  ✓ Unit outcome recording without blocking
  ✓ Per-task-type model performance tracking
  ✓ Fire-and-forget error handling (metrics/fixes don't break dispatch)
  ✓ Concurrent metric recording race conditions
  ✓ Persistence atomicity
  ✓ Report classification by type/severity
  ✓ Confidence thresholds (0.85-0.95 per type)
  ✓ Auto-fix deduplication and prioritization
  ✓ Async triage without blocking dispatch

Phase 1 complete: 48 tests, all passing.
Phase 2: Recovery path hardening (recovery/forensics)
Phase 3: Property-based FSM testing (fast-check)

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-05-07 00:38:19 +02:00

315 lines
9.7 KiB
TypeScript

/**
* Headless Event Detection — notification classification and command detection
*
* Detects terminal notifications, blocked notifications, milestone-ready signals,
* and classifies commands as quick (single-turn) vs long-running.
*
* Also defines exit code constants and the status→exit-code mapping function.
*/
// ---------------------------------------------------------------------------
// Exit Code Constants
// ---------------------------------------------------------------------------
/** Exit code for successful task completion. */
export const EXIT_SUCCESS = 0;
/** Exit code for errors or timeouts. */
export const EXIT_ERROR = 1;
/** Exit code for blocked tasks (requires user approval). */
export const EXIT_BLOCKED = 10;
/** Exit code for user-cancelled operations. */
export const EXIT_CANCELLED = 11;
/** Exit code for reload requests. */
export const EXIT_RELOAD = 12;
/**
* Map a headless session status string to its standardized exit code.
*
* success → 0
* complete → 0
* completed → 0
* error → 1
* timeout → 1
* blocked → 10
* cancelled → 11
*
* Unknown statuses default to EXIT_ERROR (1).
*/
export function mapStatusToExitCode(status: string): number {
switch (status) {
case "success":
case "complete":
case "completed":
return EXIT_SUCCESS;
case "error":
case "timeout":
return EXIT_ERROR;
case "blocked":
return EXIT_BLOCKED;
case "cancelled":
return EXIT_CANCELLED;
case "reload":
return EXIT_RELOAD;
default:
return EXIT_ERROR;
}
}
export interface HeadlessRestartDecisionInput {
exitCode: number;
interrupted?: boolean;
timedOut?: boolean;
restartCount: number;
maxRestarts: number;
}
/**
* Decide whether the headless outer loop should restart a completed run.
*
* Purpose: keep crash recovery for unexpected child exits while respecting
* operator-bounded runs. A configured overall timeout is a terminal result with
* DB/eval evidence, not a crash that should silently start a new attempt.
*
* Consumer: headless.ts after each runHeadlessOnce result.
*/
export function shouldRestartHeadlessRun(
input: HeadlessRestartDecisionInput,
): boolean {
if (
input.exitCode === EXIT_SUCCESS ||
input.exitCode === EXIT_BLOCKED ||
input.interrupted ||
input.timedOut
) {
return false;
}
return input.restartCount < input.maxRestarts;
}
// ---------------------------------------------------------------------------
// Completion Detection
// ---------------------------------------------------------------------------
/**
* Detect genuine auto-mode termination notifications.
*
* Only matches the actual stop/pause signals emitted by stopAuto()/pauseAuto():
* "Auto-mode stopped..."
* "Step-mode stopped..."
* "Auto-mode paused..."
* "Step-mode paused..."
*
* Does NOT match progress notifications that happen to contain words like
* "complete" or "stopped" (e.g., "Override resolved — rewrite-docs completed",
* "All slices are complete — nothing to discuss", "Skipped 5+ completed units").
*
* Blocked detection is separate — checked via isBlockedNotification.
*/
export const TERMINAL_PREFIXES = [
"auto-mode stopped",
"step-mode stopped",
"auto-mode paused",
"step-mode paused",
];
/**
* Idle timeout for short, single-shot commands (status, queue, history, etc.).
* For these, "no events for 15s after a tool call" really does mean done.
*/
export const IDLE_TIMEOUT_MS = 15_000;
/**
* Idle timeout for new-milestone — bounded creative task where the LLM may
* pause between tool calls (e.g. after mkdir, before writing files). 120s is
* enough buffer for typical LLM thinking on a one-shot setup workflow (#808).
*/
export const NEW_MILESTONE_IDLE_TIMEOUT_MS = 120_000;
/**
* Deadlock backstop for long-running multi-turn commands (auto, next,
* discuss, plan). The role here is NOT idle-detection ("are we done?") —
* those commands signal completion explicitly via "auto-mode stopped" /
* "step-mode stopped" terminal notifications, and the agent's child-process
* exit catches crashes. The only remaining failure mode is a truly hung
* process (deadlock, network stuck without retry, infinite reasoning loop
* outside the LLM's awareness). 30 minutes is long enough to never misfire
* on legitimate slow LLM thinking or chained tool calls, but short enough
* to recover from a real deadlock within a reasonable bound.
*
* Symptom from the old 15s timeout: sf headless autonomous would dispatch a task,
* the LLM would make 1-2 tool calls, pause to reason, exceed 15s of "no
* events", and headless would declare "Status: complete" — exiting at ~35s
* with the task barely started.
*/
export const MULTI_TURN_DEADLOCK_BACKSTOP_MS = 1_800_000;
/**
* Tools that block headless idle timeout because they require user interaction.
* Used to gate idle-timeout arming to prevent premature completion detection.
*/
const INTERACTIVE_HEADLESS_TOOLS = new Set([
"ask_user_questions",
"secure_env_collect",
]);
/**
* Extract structured metadata from a notify event.
* Returns undefined when absent or malformed, so callers fall through to
* the legacy string-matching heuristics.
*/
function getEventMetadata(
event: Record<string, unknown>,
): Record<string, unknown> | undefined {
const meta = event.metadata;
if (meta == null || typeof meta !== "object" || Array.isArray(meta))
return undefined;
return meta as Record<string, unknown>;
}
/**
* Detect genuine auto-mode or step-mode termination signals. Checks structured
* metadata first, then falls back to legacy text-matching heuristics.
*/
export function isTerminalNotification(
event: Record<string, unknown>,
): boolean {
if (event.type !== "extension_ui_request" || event.method !== "notify")
return false;
// Structured metadata takes precedence over text matching.
const meta = getEventMetadata(event);
if (meta?.kind === "terminal") return true;
// Fallback: legacy text heuristics for untagged notifications.
const message = String(event.message ?? "").toLowerCase();
return TERMINAL_PREFIXES.some((prefix) => message.startsWith(prefix));
}
export function isPauseNotification(event: Record<string, unknown>): boolean {
if (event.type !== "extension_ui_request" || event.method !== "notify")
return false;
// Structured: a terminal+blocking notice is a pause.
const meta = getEventMetadata(event);
if (meta?.kind === "terminal" && meta.blocking === true) return true;
// Fallback: legacy text heuristics.
const message = String(event.message ?? "").toLowerCase();
return (
message.startsWith("auto-mode paused") ||
message.startsWith("step-mode paused")
);
}
export function isAutoResumeScheduledNotification(
event: Record<string, unknown>,
): boolean {
if (event.type !== "extension_ui_request" || event.method !== "notify")
return false;
return /auto-resuming in \d+s/i.test(String(event.message ?? ""));
}
export function isBlockedNotification(event: Record<string, unknown>): boolean {
if (event.type !== "extension_ui_request" || event.method !== "notify")
return false;
// Structured: explicit blocking flag, excluding non-blocking progress notices.
const meta = getEventMetadata(event);
if (meta?.blocking === true && meta.kind !== "progress") return true;
// Fallback: legacy text heuristics.
const message = String(event.message ?? "").toLowerCase();
return message.includes("blocked:") || isPauseNotification(event);
}
/**
* Detect milestone-ready (approval request) notifications. Indicates workflow
* reached a checkpoint and awaits user approval to continue.
*/
export function isMilestoneReadyNotification(
event: Record<string, unknown>,
): boolean {
if (event.type !== "extension_ui_request" || event.method !== "notify")
return false;
const meta = getEventMetadata(event);
if (meta !== undefined) {
// Metadata present: it is the authoritative source. Do not fall back to
// text matching — the emitter declared the event kind explicitly.
return meta.kind === "approval_request" && meta.blocking === true;
}
// No metadata — fall back to legacy text heuristics.
return isMilestoneReadyText(String(event.message ?? ""));
}
/**
* Check if plain text matches milestone-ready pattern (e.g., "milestone m2 ready").
*/
export function isMilestoneReadyText(text: string): boolean {
return /milestone\s+m\d+.*ready/i.test(text);
}
/**
* Check if a tool requires user interaction and should block idle timeout.
*/
export function isInteractiveHeadlessTool(
toolName: string | undefined,
): boolean {
return INTERACTIVE_HEADLESS_TOOLS.has(String(toolName ?? ""));
}
/**
* Determine whether to arm the idle timeout for command completion detection.
* Returns false if interactive tools have been called.
*/
export function shouldArmHeadlessIdleTimeout(
toolCallCount: number,
interactiveToolCount: number,
): boolean {
return toolCallCount > 0 && interactiveToolCount === 0;
}
// ---------------------------------------------------------------------------
// Quick Command Detection
// ---------------------------------------------------------------------------
/**
* UI methods that don't require waiting for a response (fire-and-forget).
* Used to avoid blocking headless idle timeout.
*/
export const FIRE_AND_FORGET_METHODS = new Set([
"notify",
"setStatus",
"setWidget",
"setTitle",
"set_editor_text",
]);
/**
* Commands that complete in a single turn without interactive tool use.
* These use a shorter idle timeout since they don't involve extended reasoning.
*/
export const QUICK_COMMANDS = new Set([
"status",
"queue",
"history",
"hooks",
"export",
"stop",
"pause",
"capture",
"skip",
"undo",
"knowledge",
"config",
"prefs",
"cleanup",
"migrate",
"doctor",
"remote",
"help",
"steer",
"triage",
"visualize",
]);
export function isQuickCommand(command: string): boolean {
return QUICK_COMMANDS.has(command);
}