- Add metrics.test.ts: 21 tests for unit outcome recording, model performance tracking, fire-and-forget safety, persistence, error handling - Add triage-self-feedback.test.ts: 27 tests for report classification, confidence thresholds, auto-fix, deduplication, severity categorization, async safety Purpose: Increase coverage of critical autonomous dispatch paths from 40% to 60%+. Covers fire-and-forget patterns (metrics recording and auto-fix application must not block dispatch), concurrent recording safety, graceful degradation on error. Tests validate: ✓ Unit outcome recording without blocking ✓ Per-task-type model performance tracking ✓ Fire-and-forget error handling (metrics/fixes don't break dispatch) ✓ Concurrent metric recording race conditions ✓ Persistence atomicity ✓ Report classification by type/severity ✓ Confidence thresholds (0.85-0.95 per type) ✓ Auto-fix deduplication and prioritization ✓ Async triage without blocking dispatch Phase 1 complete: 48 tests, all passing. Phase 2: Recovery path hardening (recovery/forensics) Phase 3: Property-based FSM testing (fast-check) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
315 lines
9.7 KiB
TypeScript
315 lines
9.7 KiB
TypeScript
/**
|
|
* Headless Event Detection — notification classification and command detection
|
|
*
|
|
* Detects terminal notifications, blocked notifications, milestone-ready signals,
|
|
* and classifies commands as quick (single-turn) vs long-running.
|
|
*
|
|
* Also defines exit code constants and the status→exit-code mapping function.
|
|
*/
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Exit Code Constants
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/** Exit code for successful task completion. */
|
|
export const EXIT_SUCCESS = 0;
|
|
|
|
/** Exit code for errors or timeouts. */
|
|
export const EXIT_ERROR = 1;
|
|
|
|
/** Exit code for blocked tasks (requires user approval). */
|
|
export const EXIT_BLOCKED = 10;
|
|
|
|
/** Exit code for user-cancelled operations. */
|
|
export const EXIT_CANCELLED = 11;
|
|
|
|
/** Exit code for reload requests. */
|
|
export const EXIT_RELOAD = 12;
|
|
|
|
/**
|
|
* Map a headless session status string to its standardized exit code.
|
|
*
|
|
* success → 0
|
|
* complete → 0
|
|
* completed → 0
|
|
* error → 1
|
|
* timeout → 1
|
|
* blocked → 10
|
|
* cancelled → 11
|
|
*
|
|
* Unknown statuses default to EXIT_ERROR (1).
|
|
*/
|
|
export function mapStatusToExitCode(status: string): number {
|
|
switch (status) {
|
|
case "success":
|
|
case "complete":
|
|
case "completed":
|
|
return EXIT_SUCCESS;
|
|
case "error":
|
|
case "timeout":
|
|
return EXIT_ERROR;
|
|
case "blocked":
|
|
return EXIT_BLOCKED;
|
|
case "cancelled":
|
|
return EXIT_CANCELLED;
|
|
case "reload":
|
|
return EXIT_RELOAD;
|
|
default:
|
|
return EXIT_ERROR;
|
|
}
|
|
}
|
|
|
|
export interface HeadlessRestartDecisionInput {
|
|
exitCode: number;
|
|
interrupted?: boolean;
|
|
timedOut?: boolean;
|
|
restartCount: number;
|
|
maxRestarts: number;
|
|
}
|
|
|
|
/**
|
|
* Decide whether the headless outer loop should restart a completed run.
|
|
*
|
|
* Purpose: keep crash recovery for unexpected child exits while respecting
|
|
* operator-bounded runs. A configured overall timeout is a terminal result with
|
|
* DB/eval evidence, not a crash that should silently start a new attempt.
|
|
*
|
|
* Consumer: headless.ts after each runHeadlessOnce result.
|
|
*/
|
|
export function shouldRestartHeadlessRun(
|
|
input: HeadlessRestartDecisionInput,
|
|
): boolean {
|
|
if (
|
|
input.exitCode === EXIT_SUCCESS ||
|
|
input.exitCode === EXIT_BLOCKED ||
|
|
input.interrupted ||
|
|
input.timedOut
|
|
) {
|
|
return false;
|
|
}
|
|
return input.restartCount < input.maxRestarts;
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Completion Detection
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/**
|
|
* Detect genuine auto-mode termination notifications.
|
|
*
|
|
* Only matches the actual stop/pause signals emitted by stopAuto()/pauseAuto():
|
|
* "Auto-mode stopped..."
|
|
* "Step-mode stopped..."
|
|
* "Auto-mode paused..."
|
|
* "Step-mode paused..."
|
|
*
|
|
* Does NOT match progress notifications that happen to contain words like
|
|
* "complete" or "stopped" (e.g., "Override resolved — rewrite-docs completed",
|
|
* "All slices are complete — nothing to discuss", "Skipped 5+ completed units").
|
|
*
|
|
* Blocked detection is separate — checked via isBlockedNotification.
|
|
*/
|
|
export const TERMINAL_PREFIXES = [
|
|
"auto-mode stopped",
|
|
"step-mode stopped",
|
|
"auto-mode paused",
|
|
"step-mode paused",
|
|
];
|
|
|
|
/**
|
|
* Idle timeout for short, single-shot commands (status, queue, history, etc.).
|
|
* For these, "no events for 15s after a tool call" really does mean done.
|
|
*/
|
|
export const IDLE_TIMEOUT_MS = 15_000;
|
|
|
|
/**
|
|
* Idle timeout for new-milestone — bounded creative task where the LLM may
|
|
* pause between tool calls (e.g. after mkdir, before writing files). 120s is
|
|
* enough buffer for typical LLM thinking on a one-shot setup workflow (#808).
|
|
*/
|
|
export const NEW_MILESTONE_IDLE_TIMEOUT_MS = 120_000;
|
|
|
|
/**
|
|
* Deadlock backstop for long-running multi-turn commands (auto, next,
|
|
* discuss, plan). The role here is NOT idle-detection ("are we done?") —
|
|
* those commands signal completion explicitly via "auto-mode stopped" /
|
|
* "step-mode stopped" terminal notifications, and the agent's child-process
|
|
* exit catches crashes. The only remaining failure mode is a truly hung
|
|
* process (deadlock, network stuck without retry, infinite reasoning loop
|
|
* outside the LLM's awareness). 30 minutes is long enough to never misfire
|
|
* on legitimate slow LLM thinking or chained tool calls, but short enough
|
|
* to recover from a real deadlock within a reasonable bound.
|
|
*
|
|
* Symptom from the old 15s timeout: sf headless autonomous would dispatch a task,
|
|
* the LLM would make 1-2 tool calls, pause to reason, exceed 15s of "no
|
|
* events", and headless would declare "Status: complete" — exiting at ~35s
|
|
* with the task barely started.
|
|
*/
|
|
export const MULTI_TURN_DEADLOCK_BACKSTOP_MS = 1_800_000;
|
|
|
|
/**
|
|
* Tools that block headless idle timeout because they require user interaction.
|
|
* Used to gate idle-timeout arming to prevent premature completion detection.
|
|
*/
|
|
const INTERACTIVE_HEADLESS_TOOLS = new Set([
|
|
"ask_user_questions",
|
|
"secure_env_collect",
|
|
]);
|
|
|
|
/**
|
|
* Extract structured metadata from a notify event.
|
|
* Returns undefined when absent or malformed, so callers fall through to
|
|
* the legacy string-matching heuristics.
|
|
*/
|
|
function getEventMetadata(
|
|
event: Record<string, unknown>,
|
|
): Record<string, unknown> | undefined {
|
|
const meta = event.metadata;
|
|
if (meta == null || typeof meta !== "object" || Array.isArray(meta))
|
|
return undefined;
|
|
return meta as Record<string, unknown>;
|
|
}
|
|
|
|
/**
|
|
* Detect genuine auto-mode or step-mode termination signals. Checks structured
|
|
* metadata first, then falls back to legacy text-matching heuristics.
|
|
*/
|
|
export function isTerminalNotification(
|
|
event: Record<string, unknown>,
|
|
): boolean {
|
|
if (event.type !== "extension_ui_request" || event.method !== "notify")
|
|
return false;
|
|
// Structured metadata takes precedence over text matching.
|
|
const meta = getEventMetadata(event);
|
|
if (meta?.kind === "terminal") return true;
|
|
// Fallback: legacy text heuristics for untagged notifications.
|
|
const message = String(event.message ?? "").toLowerCase();
|
|
return TERMINAL_PREFIXES.some((prefix) => message.startsWith(prefix));
|
|
}
|
|
|
|
export function isPauseNotification(event: Record<string, unknown>): boolean {
|
|
if (event.type !== "extension_ui_request" || event.method !== "notify")
|
|
return false;
|
|
// Structured: a terminal+blocking notice is a pause.
|
|
const meta = getEventMetadata(event);
|
|
if (meta?.kind === "terminal" && meta.blocking === true) return true;
|
|
// Fallback: legacy text heuristics.
|
|
const message = String(event.message ?? "").toLowerCase();
|
|
return (
|
|
message.startsWith("auto-mode paused") ||
|
|
message.startsWith("step-mode paused")
|
|
);
|
|
}
|
|
|
|
export function isAutoResumeScheduledNotification(
|
|
event: Record<string, unknown>,
|
|
): boolean {
|
|
if (event.type !== "extension_ui_request" || event.method !== "notify")
|
|
return false;
|
|
return /auto-resuming in \d+s/i.test(String(event.message ?? ""));
|
|
}
|
|
|
|
export function isBlockedNotification(event: Record<string, unknown>): boolean {
|
|
if (event.type !== "extension_ui_request" || event.method !== "notify")
|
|
return false;
|
|
// Structured: explicit blocking flag, excluding non-blocking progress notices.
|
|
const meta = getEventMetadata(event);
|
|
if (meta?.blocking === true && meta.kind !== "progress") return true;
|
|
// Fallback: legacy text heuristics.
|
|
const message = String(event.message ?? "").toLowerCase();
|
|
return message.includes("blocked:") || isPauseNotification(event);
|
|
}
|
|
|
|
/**
|
|
* Detect milestone-ready (approval request) notifications. Indicates workflow
|
|
* reached a checkpoint and awaits user approval to continue.
|
|
*/
|
|
export function isMilestoneReadyNotification(
|
|
event: Record<string, unknown>,
|
|
): boolean {
|
|
if (event.type !== "extension_ui_request" || event.method !== "notify")
|
|
return false;
|
|
const meta = getEventMetadata(event);
|
|
if (meta !== undefined) {
|
|
// Metadata present: it is the authoritative source. Do not fall back to
|
|
// text matching — the emitter declared the event kind explicitly.
|
|
return meta.kind === "approval_request" && meta.blocking === true;
|
|
}
|
|
// No metadata — fall back to legacy text heuristics.
|
|
return isMilestoneReadyText(String(event.message ?? ""));
|
|
}
|
|
|
|
/**
|
|
* Check if plain text matches milestone-ready pattern (e.g., "milestone m2 ready").
|
|
*/
|
|
export function isMilestoneReadyText(text: string): boolean {
|
|
return /milestone\s+m\d+.*ready/i.test(text);
|
|
}
|
|
|
|
/**
|
|
* Check if a tool requires user interaction and should block idle timeout.
|
|
*/
|
|
export function isInteractiveHeadlessTool(
|
|
toolName: string | undefined,
|
|
): boolean {
|
|
return INTERACTIVE_HEADLESS_TOOLS.has(String(toolName ?? ""));
|
|
}
|
|
|
|
/**
|
|
* Determine whether to arm the idle timeout for command completion detection.
|
|
* Returns false if interactive tools have been called.
|
|
*/
|
|
export function shouldArmHeadlessIdleTimeout(
|
|
toolCallCount: number,
|
|
interactiveToolCount: number,
|
|
): boolean {
|
|
return toolCallCount > 0 && interactiveToolCount === 0;
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Quick Command Detection
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/**
|
|
* UI methods that don't require waiting for a response (fire-and-forget).
|
|
* Used to avoid blocking headless idle timeout.
|
|
*/
|
|
export const FIRE_AND_FORGET_METHODS = new Set([
|
|
"notify",
|
|
"setStatus",
|
|
"setWidget",
|
|
"setTitle",
|
|
"set_editor_text",
|
|
]);
|
|
|
|
/**
|
|
* Commands that complete in a single turn without interactive tool use.
|
|
* These use a shorter idle timeout since they don't involve extended reasoning.
|
|
*/
|
|
export const QUICK_COMMANDS = new Set([
|
|
"status",
|
|
"queue",
|
|
"history",
|
|
"hooks",
|
|
"export",
|
|
"stop",
|
|
"pause",
|
|
"capture",
|
|
"skip",
|
|
"undo",
|
|
"knowledge",
|
|
"config",
|
|
"prefs",
|
|
"cleanup",
|
|
"migrate",
|
|
"doctor",
|
|
"remote",
|
|
"help",
|
|
"steer",
|
|
"triage",
|
|
"visualize",
|
|
]);
|
|
|
|
export function isQuickCommand(command: string): boolean {
|
|
return QUICK_COMMANDS.has(command);
|
|
}
|