singularity-forge/src/headless-triage.ts

/**
 * headless-triage.ts — `sf headless triage`
 *
 * Purpose: operator-driven entry point for self-feedback triage. Lists open
 * forge-local self-feedback entries (sorted by impact ↓ effort ↑ ts ↑) and
 * either:
 *   - emits the canonical triage prompt (default): the same one the
 *     bootstrap session_start drain queues as a followUp, but rendered
 *     synchronously to stdout so operators can pipe it into any model
 *     (`sf headless triage | sf headless -p -`, or any external assistant)
 *     without depending on the autonomous-loop turn semantics that
 *     swallow the followUp when no other unit is dispatchable.
 *   - --list: human-readable candidate digest, no prompt — for scanning
 *     the queue at a glance.
 *   - --json: structured candidate list for tooling.
 *
 * Why this command exists (sf-mp4rxkwb-l4baga): the inline-fix worker
 * currently delivers via `triggerTurn:true, deliverAs:"followUp"`. When
 * autonomous mode bails at milestone validation before any turn runs,
 * the followUp never lands and the queue stays unprocessed. This command
 * gives operators a deterministic path to drain the queue today, ahead of
 * the larger refactor that promotes triage to a real SF unit type.
 *
 * Consumer: headless.ts when command === "triage".
 */

import { randomUUID } from "node:crypto";
import { existsSync } from "node:fs";
import { join } from "node:path";
import { createJiti } from "@mariozechner/jiti";
import { runSubagent } from "@singularity-forge/coding-agent";
import { parse as parseYaml } from "yaml";
import { resolveBundledSourceResource } from "./bundled-resource-path.js";
import { getSfEnv } from "./env.js";

const jiti = createJiti(import.meta.filename, {
	interopDefault: true,
	debug: false,
});

function sfExtensionPath(moduleName: string): string {
	const agentExtensionsDir = join(getSfEnv().agentDir, "extensions", "sf");
	const useAgentDir = existsSync(join(agentExtensionsDir, "state.js"));
	if (useAgentDir) return join(agentExtensionsDir, `${moduleName}.js`);
	const tsPath = resolveBundledSourceResource(
		import.meta.url,
		"extensions",
		"sf",
		`${moduleName}.ts`,
	);
	if (existsSync(tsPath)) return tsPath;
	return resolveBundledSourceResource(
		import.meta.url,
		"extensions",
		"sf",
		`${moduleName}.js`,
	);
}

export interface HandleTriageOptions {
	json?: boolean;
	list?: boolean;
	max?: number;
	run?: boolean;
	apply?: boolean;
	model?: string;
	agentRunner?: AgentRunner;
}

export interface HandleTriageResult {
	exitCode: number;
}

interface TriageCandidate {
	id: string;
	kind: string;
	severity: string;
	summary: string;
	ts: string;
	impactScore?: number;
	effortEstimate?: number;
}

interface AgentConfig {
	name: string;
	model?: string;
	tools?: string[];
	systemPrompt: string;
	promptParts?: string[];
	source?: string;
	filePath?: string;
}

interface AgentRunResult {
	ok: boolean;
	output: string;
	stderr?: string;
	exitCode?: number;
}

type AgentRunner = (
	agent: AgentConfig,
	task: string,
	options?: { tools?: string[]; model?: string; cwd?: string },
) => Promise<AgentRunResult>;

/**
 * Triage-decider's output contract is a YAML fenced block with key
 * `decisions:`. Parse it. Returns null when no plan is present or YAML
 * fails to load — runTriageApply treats null as "do not apply" (safe
 * default: when in doubt, never mutate).
 *
 * Why a structured plan instead of letting the decider call resolve_issue
 * directly: codex review 2026-05-14 flagged that the original sequential
 * design (decider → review-code) let the decider mutate state during its
 * own turn, before review-code ever saw the decisions. This parser pulls
 * the proposed actions out of the decider's text so they can be reviewed
 * BEFORE any resolve_issue call.
 */
export interface TriageDecision {
	id: string;
	outcome: "fix" | "promote" | "close";
	evidenceKind?: string;
	reason?: string;
	proposedApproach?: string;
	requirementId?: string;
}

export interface ParseTriagePlanResult {
	plan: TriageDecision[] | null;
	error: string | null;
}

const COMPLETION_MARKER = "Self-feedback triage complete";

/**
 * Parse a decider output into a strict decision plan, or return a
 * structured error explaining what's wrong. Codex review 2026-05-14
 * follow-up: refuse the whole plan if ANY item is malformed instead of
 * silently dropping items — partial-trust on a triage plan is worse
 * than no apply at all.
 *
 * Validates:
 *   1. The completion marker is present (signals the decider finished).
 *   2. A single fenced ```yaml block with key `decisions:` exists.
 *   3. Every item has `id` (non-empty string) and `outcome` ∈
 *      {fix, promote, close}.
 *   4. Outcome-specific required fields:
 *        - close   → reason (non-empty) + evidence_kind (defaults to
 *                    human-clear if omitted, but if provided must be
 *                    a non-empty string).
 *        - promote → reason + requirement_id (non-empty strings).
 *        - fix     → proposed_approach (non-empty).
 *   5. If `expectedIds` is supplied (the candidate set the decider was
 *      shown), every decision id must be in that set and every expected
 *      id must have exactly one decision — no extras, no missing.
 */
export function parseTriagePlanStrict(
	text: string,
	expectedIds?: string[],
): ParseTriagePlanResult {
	if (typeof text !== "string" || text.length === 0) {
		return { plan: null, error: "decider output was empty" };
	}
	if (!text.includes(COMPLETION_MARKER)) {
		return {
			plan: null,
			error: `decider output is missing the completion marker "${COMPLETION_MARKER}" — treating as incomplete`,
		};
	}
	const fenceMatches = Array.from(
		text.matchAll(/```ya?ml\s*\n([\s\S]*?)\n```/gi),
	);
	if (fenceMatches.length === 0) {
		return {
			plan: null,
			error: "decider output has no fenced yaml block with the decision plan",
		};
	}
	if (fenceMatches.length > 1) {
		return {
			plan: null,
			error: `decider output has ${fenceMatches.length} fenced yaml blocks — the contract is exactly one`,
		};
	}
	const yamlBody = fenceMatches[0][1];
	let parsed: unknown;
	try {
		parsed = parseYaml(yamlBody);
	} catch (err) {
		return {
			plan: null,
			error: `decision plan failed to parse as yaml: ${
				err instanceof Error ? err.message : String(err)
			}`,
		};
	}
	const root = parsed as Record<string, unknown> | null;
	const decisions = root?.decisions;
	if (!Array.isArray(decisions)) {
		return {
			plan: null,
			error: "decision plan must have a top-level `decisions:` array",
		};
	}
	if (decisions.length === 0) {
		return {
			plan: null,
			error: "decision plan has zero decisions — nothing to apply",
		};
	}
	const out: TriageDecision[] = [];
	const seenIds = new Set<string>();
	for (let i = 0; i < decisions.length; i++) {
		const raw = decisions[i];
		if (!raw || typeof raw !== "object" || Array.isArray(raw)) {
			return {
				plan: null,
				error: `decisions[${i}] is not an object`,
			};
		}
		const item = raw as Record<string, unknown>;
		const id = typeof item.id === "string" ? item.id.trim() : "";
		if (!id) {
			return {
				plan: null,
				error: `decisions[${i}] is missing a non-empty id`,
			};
		}
		if (seenIds.has(id)) {
			return {
				plan: null,
				error: `decisions[${i}] duplicates id "${id}" — each ledger entry must appear exactly once`,
			};
		}
		seenIds.add(id);
		const outcome = item.outcome;
		if (outcome !== "fix" && outcome !== "promote" && outcome !== "close") {
			return {
				plan: null,
				error: `decisions[${i}] (id=${id}) has invalid outcome "${outcome}" — expected fix|promote|close`,
			};
		}
		const decision: TriageDecision = { id, outcome };
		const reason = typeof item.reason === "string" ? item.reason.trim() : "";
		const evidenceKindRaw =
			typeof item.evidence_kind === "string"
				? item.evidence_kind
				: typeof item.evidenceKind === "string"
					? (item.evidenceKind as string)
					: "";
		if (evidenceKindRaw.trim()) decision.evidenceKind = evidenceKindRaw.trim();
		if (reason) decision.reason = reason;
		const proposedApproachRaw =
			typeof item.proposed_approach === "string"
				? item.proposed_approach
				: typeof item.proposedApproach === "string"
					? (item.proposedApproach as string)
					: "";
		if (proposedApproachRaw.trim()) {
			decision.proposedApproach = proposedApproachRaw.trim();
		}
		const requirementIdRaw =
			typeof item.requirement_id === "string"
				? item.requirement_id
				: typeof item.requirementId === "string"
					? (item.requirementId as string)
					: "";
		if (requirementIdRaw.trim()) {
			decision.requirementId = requirementIdRaw.trim();
		}

		// Outcome-specific required fields.
		if (outcome === "close" && !decision.reason) {
			return {
				plan: null,
				error: `decisions[${i}] (id=${id}, outcome=close) is missing required field reason`,
			};
		}
		if (outcome === "promote") {
			if (!decision.reason) {
				return {
					plan: null,
					error: `decisions[${i}] (id=${id}, outcome=promote) is missing required field reason`,
				};
			}
			if (!decision.requirementId) {
				return {
					plan: null,
					error: `decisions[${i}] (id=${id}, outcome=promote) is missing required field requirement_id`,
				};
			}
		}
		if (outcome === "fix" && !decision.proposedApproach) {
			return {
				plan: null,
				error: `decisions[${i}] (id=${id}, outcome=fix) is missing required field proposed_approach`,
			};
		}
		out.push(decision);
	}

	// Expected-id coverage check: if the caller knows which entries the
	// decider was shown, ensure the plan covers them exactly.
	if (Array.isArray(expectedIds) && expectedIds.length > 0) {
		const expected = new Set(expectedIds);
		for (const decision of out) {
			if (!expected.has(decision.id)) {
				return {
					plan: null,
					error: `decision id "${decision.id}" is not in the candidate set the decider was shown — possible hallucination`,
				};
			}
		}
		for (const id of expected) {
			if (!seenIds.has(id)) {
				return {
					plan: null,
					error: `candidate id "${id}" has no decision in the plan — incomplete coverage`,
				};
			}
		}
	}

	return { plan: out, error: null };
}

/**
 * Backwards-compat wrapper for the lenient parser used by existing tests
 * and the test fixtures. Returns just the plan, no error explanation.
 * Production callers should use parseTriagePlanStrict.
 */
export function parseTriagePlan(text: string): TriageDecision[] | null {
	return parseTriagePlanStrict(text).plan;
}

/**
 * Default per-agent timeout: 8 minutes. Long enough for a real LLM reasoning
 * pass + tool calls; short enough that a hung gemini OAuth or stalled
 * provider doesn't lock the whole triage flow indefinitely. Operators can
 * override via SF_TRIAGE_AGENT_TIMEOUT_MS env var.
 *
 * The earlier version had no timeout at all — `defaultAgentRunner` waited
 * forever on `proc.on("close")`, so a single hung subagent dispatch
 * blocked the orchestrator until manual kill (observed 2026-05-14:
 * 33-minute-stuck triage --apply caused by an unresponsive provider).
 * The watchdog is now enforced inside `runSubagent` (SDK helper).
 */
const DEFAULT_AGENT_TIMEOUT_MS = (() => {
	const fromEnv = Number.parseInt(
		process.env.SF_TRIAGE_AGENT_TIMEOUT_MS ?? "",
		10,
	);
	if (Number.isFinite(fromEnv) && fromEnv > 0) return fromEnv;
	return 8 * 60 * 1000;
})();

async function defaultAgentRunner(
	agent: AgentConfig,
	task: string,
	options: { tools?: string[]; model?: string; cwd?: string } = {},
): Promise<AgentRunResult> {
	// Compose the system prompt via the prompt-parts registry. Dynamic
	// import because src/resources/ is excluded from the root tsconfig
	// (extensions get their own build). If the module isn't available
	// fall back to the agent's raw systemPrompt — degrades gracefully.
	const promptPartsModule = (await jiti.import(
		sfExtensionPath("subagent/prompt-parts"),
	)) as {
		composeAgentPrompt?: (
			agent: AgentConfig,
			context: { cwd: string; surface: string; tools?: string[] },
		) => string;
	};
	const composed =
		promptPartsModule.composeAgentPrompt?.(agent, {
			cwd: options.cwd ?? process.cwd(),
			surface: "headless",
			tools: options.tools ?? agent.tools,
		}) ?? agent.systemPrompt;
	const appendedPrompt = `${composed}\n\n## Task Input\n\n${task}`;
	const result = await runSubagent(
		{
			systemPrompt: appendedPrompt,
			model: options.model ?? agent.model,
			tools: options.tools ?? agent.tools,
			cwd: options.cwd ?? process.cwd(),
			name: agent.name,
		},
		task,
		{ timeoutMs: DEFAULT_AGENT_TIMEOUT_MS },
	);
	return {
		ok: result.ok,
		output: result.output,
		stderr:
			result.exitCode === 124
				? `${agent.name} timed out after ${DEFAULT_AGENT_TIMEOUT_MS}ms (configure SF_TRIAGE_AGENT_TIMEOUT_MS to extend)`
				: (result.stderr ?? ""),
		exitCode: result.exitCode,
	};
}

async function emitTriageApplyJournal(
	cwd: string,
	flowId: string,
	seq: number,
	eventType: string,
	data: Record<string, unknown> = {},
): Promise<void> {
	try {
		const journalModule = (await jiti.import(sfExtensionPath("journal"))) as {
			emitJournalEvent?: (
				basePath: string,
				entry: Record<string, unknown>,
			) => void;
		};
		journalModule.emitJournalEvent?.(cwd, {
			ts: new Date().toISOString(),
			flowId,
			seq,
			eventType,
			data,
		});
	} catch {
		// Journal is best-effort; the apply result remains authoritative.
	}
}

export interface RunTriageApplyResult {
	ok: boolean;
	agreed: boolean;
	error?: string;
	deciderOutput?: string;
	reviewOutput?: string;
	resolvedIds: string[];
	rejectedIds?: string[];
	pendingFixIds?: string[];
	flowId: string;
}

export async function runTriageApply(
	cwd: string,
	prompt: string,
	options: {
		model?: string;
		agentRunner?: AgentRunner;
		candidateCount?: number;
		// Expected ledger ids the decider was shown. When supplied, the
		// strict plan parser refuses any plan that adds new ids or omits
		// expected ones.
		expectedIds?: string[];
		// Test escape hatch. Production callers MUST NOT set this. Required
		// to pass a custom agentRunner because an arbitrary runner could
		// side-channel-mutate the ledger despite the read-only tool override
		// the orchestrator enforces (codex review 2026-05-14 follow-up).
		allowUntrustedRunner?: boolean;
	} = {},
): Promise<RunTriageApplyResult> {
	const flowId = `triage-apply-${randomUUID()}`;
	let seq = 0;
	const emit = (eventType: string, data: Record<string, unknown> = {}) =>
		emitTriageApplyJournal(cwd, flowId, seq++, eventType, data);

	// Slice 3a of "Make UOK the SF Control Plane": every triage --apply run
	// emits three gate_run trace events (trusted-agent-source-gate,
	// triage-plan-validation-gate, triage-apply-review-gate) with canonical
	// UOK run context. status uok reads surface/runControl/... from these
	// events and classifies coverage as "ok"/"stale"/"incomplete".
	//
	// Dynamic imports because src/resources is excluded from the root
	// tsconfig (extensions have their own build). buildUokRunContext and
	// appendTraceEventRequired lives there; loaded once per run.
	const runContextModule = (await jiti.import(
		sfExtensionPath("uok/run-context"),
	)) as {
		buildUokRunContext: (opts: Record<string, unknown>) => {
			surface: string;
			runControl: string;
			permissionProfile: string;
			traceId: string;
			parentTrace?: string;
		} | null;
	};
	const traceWriterModule = (await jiti.import(
		sfExtensionPath("uok/trace-writer"),
	)) as {
		appendTraceEvent?: (
			basePath: string,
			traceId: string,
			event: Record<string, unknown>,
		) => void;
		appendTraceEventRequired?: (
			basePath: string,
			traceId: string,
			event: Record<string, unknown>,
		) => void;
		readTraceEvents?: (
			basePath: string,
			type: string,
			windowHours?: number,
		) => Array<Record<string, unknown>>;
	};

	// surface: "headless"        - runTriageApply is always operator-invoked
	//                              via sf headless triage --apply.
	// runControl: "supervised"   - the operator launched this command; it's
	//                              not an autonomous-loop self-initiation.
	// permissionProfile: "high"  - --apply mutates the ledger, so the run
	//                              must have write permission.
	// traceId: flowId             - already a UUID-stamped per-run id.
	const uokContext = runContextModule.buildUokRunContext({
		surface: "headless",
		runControl: "supervised",
		permissionProfile: "high",
		traceId: flowId,
	});

	const emitTriageGate = (
		gateId: string,
		outcome: "pass" | "fail" | "manual-attention",
		rationale: string,
		extra: Record<string, unknown> = {},
	): Error | null => {
		if (!uokContext) {
			return new Error("buildUokRunContext returned null for triage --apply");
		}
		const event = {
			type: "gate_run",
			traceId: uokContext.traceId,
			turnId: `triage-apply:${gateId}`,
			gateId,
			gateType: "quality-gate",
			outcome,
			failureClass:
				outcome === "fail"
					? "policy"
					: outcome === "manual-attention"
						? "manual-attention"
						: "none",
			rationale,
			attempt: 1,
			maxAttempts: 1,
			retryable: false,
			evaluatedAt: new Date().toISOString(),
			durationMs: 0,
			// Canonical UOK run context. status uok reads these from
			// trace events (slice 3a addition) so the gate
			// classifies as "ok" without needing a quality_gates parent
			// FK row to exist.
			surface: uokContext.surface,
			runControl: uokContext.runControl,
			permissionProfile: uokContext.permissionProfile,
			...extra,
		};
		try {
			if (typeof traceWriterModule.appendTraceEventRequired === "function") {
				traceWriterModule.appendTraceEventRequired(cwd, flowId, event);
			} else if (typeof traceWriterModule.appendTraceEvent === "function") {
				traceWriterModule.appendTraceEvent(cwd, flowId, event);
				const persisted = traceWriterModule
					.readTraceEvents?.(cwd, "gate_run", 24 * 30)
					.some(
						(ev) =>
							ev.traceId === event.traceId &&
							ev.turnId === event.turnId &&
							ev.gateId === event.gateId,
					);
				if (!persisted) {
					return new Error(
						`legacy trace writer did not persist ${gateId} gate event`,
					);
				}
			} else {
				return new Error("trace writer does not expose appendTraceEvent");
			}
			return null;
		} catch (err) {
			return err instanceof Error ? err : new Error(String(err));
		}
	};

	const emitRequiredTriageGate = async (
		gateId: string,
		outcome: "pass" | "fail" | "manual-attention",
		rationale: string,
		extra: Record<string, unknown> = {},
	): Promise<RunTriageApplyResult | null> => {
		const err = emitTriageGate(gateId, outcome, rationale, extra);
		if (!err) return null;
		await emit("triage-apply-failed", {
			reason: "uok-gate-emission-failed",
			gateId,
			error: err.message,
		});
		return {
			ok: false,
			agreed: false,
			error: `UOK gate emission failed for ${gateId}: ${err.message}`,
			resolvedIds: [],
			flowId,
		};
	};

	await emit("triage-apply-start", {
		candidateCount: options.candidateCount ?? null,
	});
	const agentsModule = (await jiti.import(
		sfExtensionPath("subagent/agents"),
	)) as {
		discoverAgents?: (cwd: string, scope: string) => { agents: AgentConfig[] };
	};
	const agents = agentsModule.discoverAgents?.(cwd, "both").agents ?? [];
	const triageDecider = agents.find((agent) => agent.name === "triage-decider");
	const reviewCode = agents.find((agent) => agent.name === "review-code");
	if (!triageDecider || !reviewCode) {
		const missing = [
			triageDecider ? null : "triage-decider",
			reviewCode ? null : "review-code",
		]
			.filter(Boolean)
			.join(", ");
		// Missing agents is a trusted-source-gate failure cause too — the
		// gate's contract is "both built-ins exist AND have source=builtin".
		// Emit it so operators see the failure in status uok, not just in
		// the triage-apply journal.
		const gateFailure = await emitRequiredTriageGate(
			"trusted-agent-source-gate",
			"fail",
			`required built-in agent(s) not discovered: ${missing}`,
			{ missing },
		);
		if (gateFailure) return gateFailure;
		await emit("triage-apply-failed", { reason: "missing-agent", missing });
		return {
			ok: false,
			agreed: false,
			error: `Missing built-in agent(s): ${missing}`,
			resolvedIds: [],
			flowId,
		};
	}
	// Trusted-source guard (codex review 2026-05-14): when --apply will
	// mutate the ledger, BOTH agents must be SF-shipped built-ins. A
	// project-level override could silently disable review-code's
	// independence. Operators can still customize behavior for inspect
	// workflows, but --apply uses only the shipped review contract.
	if (triageDecider.source !== "builtin" || reviewCode.source !== "builtin") {
		const rationale = `non-builtin agents (triage-decider=${triageDecider.source}, review-code=${reviewCode.source})`;
		const gateFailure = await emitRequiredTriageGate(
			"trusted-agent-source-gate",
			"fail",
			rationale,
			{
				triageDeciderSource: triageDecider.source,
				reviewCodeSource: reviewCode.source,
			},
		);
		if (gateFailure) return gateFailure;
		await emit("triage-apply-failed", {
			reason: "untrusted-agent-source",
			triageDeciderSource: triageDecider.source,
			reviewCodeSource: reviewCode.source,
		});
		return {
			ok: false,
			agreed: false,
			error: `Refusing to --apply with non-builtin agents (triage-decider=${triageDecider.source}, review-code=${reviewCode.source}). Use \`sf headless triage --run\` for a reviewable decision artifact, or remove the project/user override.`,
			resolvedIds: [],
			flowId,
		};
	}
	const trustGateFailure = await emitRequiredTriageGate(
		"trusted-agent-source-gate",
		"pass",
		"both triage-decider and review-code are SF-shipped built-ins",
	);
	if (trustGateFailure) return trustGateFailure;

	// Custom-runner guard (codex review follow-up): an injected agentRunner
	// can side-channel-mutate the ledger despite the read-only tool override.
	// Only allow it when allowUntrustedRunner is explicitly set (test path).
	if (options.agentRunner && !options.allowUntrustedRunner) {
		// Same trust contract as missing-agent / non-builtin source: the
		// run cannot guarantee built-in behavior, so it's a failure of the
		// trusted-agent-source-gate, surfaced through status uok.
		const gateFailure = await emitRequiredTriageGate(
			"trusted-agent-source-gate",
			"fail",
			"runTriageApply: custom agentRunner injected without allowUntrustedRunner; production callers cannot bypass the built-in agent contract",
		);
		if (gateFailure) return gateFailure;
		await emit("triage-apply-failed", { reason: "untrusted-runner" });
		return {
			ok: false,
			agreed: false,
			error:
				"runTriageApply: a custom agentRunner was supplied without allowUntrustedRunner. Production callers cannot inject a runner — only tests can, via the explicit allowUntrustedRunner option.",
			resolvedIds: [],
			flowId,
		};
	}
	const runner = options.agentRunner ?? defaultAgentRunner;

	// Phase 1: triage-decider runs in PLAN-ONLY mode. Drop resolve_issue
	// from its tool list (the YAML already drops it, but this is defense-
	// in-depth in case a project override resurrects it). The decider
	// emits a YAML decision plan; we parse it post-hoc.
	const decider = await runner(triageDecider, prompt, {
		model: options.model,
		cwd,
		tools: ["view", "grep", "glob", "git_log"],
	});
	await emit("triage-apply-decider-finished", {
		ok: decider.ok,
		exitCode: decider.exitCode ?? null,
	});
	if (!decider.ok) {
		await emit("triage-apply-failed", { reason: "decider-failed" });
		return {
			ok: false,
			agreed: false,
			error: decider.stderr || "triage-decider failed",
			deciderOutput: decider.output,
			resolvedIds: [],
			flowId,
		};
	}

	// Parse the structured plan in strict mode. Refuses on any malformed
	// item, missing completion marker, multiple yaml blocks, or mismatch
	// against the expected candidate set (when supplied). Partial trust on
	// a triage plan is worse than no apply at all.
	const parseResult = parseTriagePlanStrict(
		decider.output,
		options.expectedIds,
	);
	if (!parseResult.plan) {
		const gateFailure = await emitRequiredTriageGate(
			"triage-plan-validation-gate",
			"fail",
			parseResult.error ?? "decider produced an unparseable plan",
			{ parseError: parseResult.error ?? null },
		);
		if (gateFailure) return gateFailure;
		await emit("triage-apply-failed", {
			reason: "no-plan",
			parseError: parseResult.error,
		});
		return {
			ok: false,
			agreed: false,
			error: `triage-decider plan rejected: ${
				parseResult.error ?? "unknown parse error"
			}`,
			deciderOutput: decider.output,
			resolvedIds: [],
			flowId,
		};
	}
	const plan = parseResult.plan;
	const validationGateFailure = await emitRequiredTriageGate(
		"triage-plan-validation-gate",
		"pass",
		`decider plan parsed cleanly: ${plan.length} decisions`,
		{ decisionCount: plan.length },
	);
	if (validationGateFailure) return validationGateFailure;
	await emit("triage-apply-plan-parsed", {
		decisionCount: plan.length,
		outcomes: plan.reduce<Record<string, number>>((acc, d) => {
			acc[d.outcome] = (acc[d.outcome] ?? 0) + 1;
			return acc;
		}, {}),
	});

	// Phase 2: review-code reviews the plan with read-only tools. The
	// review task explicitly hands the plan as the artifact under
	// scrutiny — the reviewer's job is to spot bad calls before they land.
	const reviewTask = [
		"Review this self-feedback triage decision PLAN. The plan has NOT yet been applied — your verdict gates whether any resolve_issue mutation runs.",
		'Return "review-code: agree" only if every decision in the plan is safe and coherent against the current code/ledger state.',
		"On disagreement, name each concerning decision explicitly so the operator (or a follow-up apply pass) can pull just those entries out and proceed with the rest.",
		"",
		"## Original triage prompt (the ledger entries the decider saw)",
		prompt,
		"",
		"## triage-decider output (includes the plan as a fenced yaml block)",
		decider.output,
	].join("\n");
	const review = await runner(reviewCode, reviewTask, {
		model: options.model,
		cwd,
		tools: ["view", "grep", "glob", "git_log", "query_journal"],
	});
	const agreed = /^review-code:\s*agree\b/im.test(review.output.trim());
	await emit(
		agreed
			? "triage-apply-review-code-agree"
			: "triage-apply-review-code-disagree",
		{
			ok: review.ok,
			exitCode: review.exitCode ?? null,
		},
	);
	if (!review.ok) {
		const gateFailure = await emitRequiredTriageGate(
			"triage-apply-review-gate",
			"manual-attention",
			"review-code subagent failed to complete; review pending operator",
			{ exitCode: review.exitCode ?? null },
		);
		if (gateFailure) return gateFailure;
		await emit("triage-apply-failed", { reason: "review-code-failed" });
		return {
			ok: false,
			agreed: false,
			error: review.stderr || "review-code failed",
			deciderOutput: decider.output,
			reviewOutput: review.output,
			resolvedIds: [],
			flowId,
		};
	}
	if (!agreed) {
		// Disagreement is a clean pause, not a failure. The plan and the
		// review are both persisted in the decision report; the operator
		// can read both and act.
		const gateFailure = await emitRequiredTriageGate(
			"triage-apply-review-gate",
			"fail",
			"review-code disagreed with the proposed plan; no mutations applied",
		);
		if (gateFailure) return gateFailure;
		return {
			ok: false,
			agreed: false,
			error: "review-code disagreed — pausing for operator review",
			deciderOutput: decider.output,
			reviewOutput: review.output,
			resolvedIds: [],
			flowId,
		};
	}
	const reviewGateFailure = await emitRequiredTriageGate(
		"triage-apply-review-gate",
		"pass",
		"review-code agreed with the proposed plan; apply phase proceeds",
	);
	if (reviewGateFailure) return reviewGateFailure;

	// Phase 3: apply the plan. We (this runner) call markResolved for
	// each close/promote decision; fix decisions get surfaced for the
	// operator but never auto-mutate. Mutations happen ONCE, post-review,
	// and the resolvedIds we return reflect actual ledger state.
	const applyResult = await applyTriagePlan(cwd, plan, emit);
	// Per-decision failure surfacing (codex review follow-up): if any
	// approved close/promote failed to apply, runTriageApply reports
	// ok=false. Operator sees both partial success (resolvedIds) and
	// partial failure (rejectedIds) so they can investigate.
	const approvedMutationCount = plan.filter((d) => d.outcome !== "fix").length;
	const hasFailures = applyResult.rejectedIds.length > 0;

	// triage-apply-mutation-gate (codex review 2026-05-14 follow-up on
	// slice 3a): observability for the post-review apply phase. pass when
	// every approved mutation landed; fail when any rejected; manual-
	// attention when there were no approved mutations to apply (all
	// decisions were "fix" handoffs, which require operator action).
	const mutationOutcome: "pass" | "fail" | "manual-attention" =
		approvedMutationCount === 0
			? "manual-attention"
			: hasFailures
				? "fail"
				: "pass";
	const mutationRationale =
		approvedMutationCount === 0
			? `no approved mutations: ${applyResult.pendingFixIds.length} fix decisions await operator`
			: hasFailures
				? `${applyResult.rejectedIds.length} of ${approvedMutationCount} approved mutations failed: ${applyResult.rejectedIds.join(", ")}`
				: `${applyResult.resolvedIds.length} of ${approvedMutationCount} approved mutations applied cleanly`;
	const mutationGateFailure = await emitRequiredTriageGate(
		"triage-apply-mutation-gate",
		mutationOutcome,
		mutationRationale,
		{
			resolvedCount: applyResult.resolvedIds.length,
			rejectedCount: applyResult.rejectedIds.length,
			pendingFixCount: applyResult.pendingFixIds.length,
		},
	);
	if (mutationGateFailure) return mutationGateFailure;

	return {
		ok: !hasFailures,
		agreed: true,
		error: hasFailures
			? `${applyResult.rejectedIds.length} of ${approvedMutationCount} approved mutations failed: ${applyResult.rejectedIds.join(", ")}`
			: undefined,
		deciderOutput: decider.output,
		reviewOutput: review.output,
		resolvedIds: applyResult.resolvedIds,
		rejectedIds: applyResult.rejectedIds,
		pendingFixIds: applyResult.pendingFixIds,
		flowId,
	};
}

interface ApplyTriagePlanResult {
	resolvedIds: string[];
	rejectedIds: string[];
	pendingFixIds: string[];
}

/**
 * Apply an approved decision plan. Calls markResolved (via the SF
 * extension's self-feedback writer, which runs the existing writer-
 * layer constraints — accepted evidence kinds, commit-exists check for
 * agent-fix, etc.) for each close/promote decision. Fix decisions are
 * not auto-applied; they require operator implementation work.
 *
 * Returns three lists: resolvedIds (actually mutated), rejectedIds
 * (writer-layer refused OR markResolved threw), pendingFixIds (fix
 * decisions surfaced for operator handoff). runTriageApply uses the
 * rejectedIds count to decide ok=true vs ok=false.
 */
async function applyTriagePlan(
	cwd: string,
	plan: TriageDecision[],
	emit: (eventType: string, data?: Record<string, unknown>) => Promise<void>,
): Promise<ApplyTriagePlanResult> {
	const resolvedIds: string[] = [];
	const rejectedIds: string[] = [];
	const pendingFixIds: string[] = [];
	const sfModule = (await jiti.import(sfExtensionPath("self-feedback"))) as {
		markResolved?: (
			entryId: string,
			resolution: Record<string, unknown>,
			basePath?: string,
		) => boolean;
	};
	if (typeof sfModule.markResolved !== "function") {
		await emit("triage-apply-mutation-failed", {
			reason: "markResolved-unavailable",
		});
		// Every approved close/promote becomes a rejection — can't mutate
		// anything if the writer module isn't loadable.
		for (const decision of plan) {
			if (decision.outcome === "fix") pendingFixIds.push(decision.id);
			else rejectedIds.push(decision.id);
		}
		return { resolvedIds, rejectedIds, pendingFixIds };
	}
	for (const decision of plan) {
		if (decision.outcome === "fix") {
			// Fix decisions are operator handoffs — surface in the report
			// (via the caller's deciderOutput / decision plan), don't mutate.
			pendingFixIds.push(decision.id);
			await emit("triage-apply-fix-pending-operator", { id: decision.id });
			continue;
		}
		const evidenceKind =
			decision.evidenceKind ??
			(decision.outcome === "promote"
				? "promoted-to-requirement"
				: "human-clear");
		const evidence: Record<string, unknown> = { kind: evidenceKind };
		if (decision.outcome === "promote" && decision.requirementId) {
			evidence.requirementId = decision.requirementId;
		}
		const reason = decision.reason ?? "";
		try {
			const ok = sfModule.markResolved(decision.id, { reason, evidence }, cwd);
			if (ok) {
				resolvedIds.push(decision.id);
				await emit("triage-apply-resolved", {
					id: decision.id,
					outcome: decision.outcome,
					evidenceKind,
				});
			} else {
				rejectedIds.push(decision.id);
				await emit("triage-apply-mutation-rejected", {
					id: decision.id,
					outcome: decision.outcome,
					evidenceKind,
					note: "writer layer refused the resolution",
				});
			}
		} catch (err) {
			rejectedIds.push(decision.id);
			await emit("triage-apply-mutation-failed", {
				id: decision.id,
				error: err instanceof Error ? err.message : String(err),
			});
		}
	}
	return { resolvedIds, rejectedIds, pendingFixIds };
}

/**
 * Render the triage queue or canonical triage prompt to stdout.
 *
 * Never throws — best-effort, returns non-zero exit on assembly failure.
 */
export async function handleTriage(
	cwd: string,
	options: HandleTriageOptions = {},
): Promise<HandleTriageResult> {
	// Open the project DB before reading. The one-shot bypass path doesn't
	// run the full SF agent bootstrap, so DB-open isn't done for us.
	try {
		const autoStartModule = (await jiti.import(
			sfExtensionPath("auto-start"),
			{},
		)) as { openProjectDbIfPresent?: (cwd: string) => Promise<unknown> };
		if (typeof autoStartModule.openProjectDbIfPresent === "function") {
			await autoStartModule.openProjectDbIfPresent(cwd);
		}
	} catch (err) {
		const msg = err instanceof Error ? err.message : String(err);
		process.stderr.write(`[triage] DB pre-open warning: ${msg}\n`);
	}

	let drainModule: {
		selectInlineFixCandidates: (basePath: string) => TriageCandidate[];
		buildInlineFixPrompt: (entries: TriageCandidate[]) => string;
		runTriage: (
			prompt: string,
			options?: { model?: string; timeoutMs?: number },
		) => Promise<{
			ok: boolean;
			content?: string;
			error?: string;
			cleanFinish?: boolean;
			provider?: string;
			modelId?: string;
		}>;
		writeTriageDecisionReport: (
			basePath: string,
			content: string,
		) => string | null;
		rankTriageModelsViaRouter: (candidates?: string[]) => Promise<string[]>;
	};
	try {
		drainModule = (await jiti.import(
			sfExtensionPath("self-feedback-drain"),
		)) as typeof drainModule;
	} catch (err) {
		const msg = err instanceof Error ? err.message : String(err);
		process.stderr.write(
			`[triage] failed to load self-feedback-drain module: ${msg}\n`,
		);
		return { exitCode: 1 };
	}

	let candidates: TriageCandidate[];
	try {
		candidates = drainModule.selectInlineFixCandidates(cwd);
	} catch (err) {
		const msg = err instanceof Error ? err.message : String(err);
		process.stderr.write(`[triage] candidate selection failed: ${msg}\n`);
		return { exitCode: 1 };
	}

	if (typeof options.max === "number" && options.max > 0) {
		candidates = candidates.slice(0, options.max);
	}

	if (candidates.length === 0) {
		if (options.json) {
			process.stdout.write(`${JSON.stringify({ ok: true, candidates: [] })}\n`);
		} else {
			process.stdout.write("No open self-feedback candidates to triage.\n");
		}
		return { exitCode: 0 };
	}

	// --run/--apply take precedence over --json/--list because they describe the
	// ACTION, not the output format. With --run/--apply, --json controls whether
	// the result is JSON vs. human text. Without an action, --json emits
	// the candidate digest as JSON (the inspect path).
	if (!options.run && !options.apply) {
		if (options.json) {
			process.stdout.write(
				`${JSON.stringify({
					ok: true,
					count: candidates.length,
					candidates: candidates.map((c) => ({
						id: c.id,
						kind: c.kind,
						severity: c.severity,
						summary: c.summary,
						ts: c.ts,
						impact: c.impactScore ?? null,
						effort: c.effortEstimate ?? null,
					})),
				})}\n`,
			);
			return { exitCode: 0 };
		}

		if (options.list) {
			process.stdout.write(
				`${candidates.length} candidate${candidates.length === 1 ? "" : "s"} (priority: impact↓ effort↑ ts↑)\n\n`,
			);
			for (const c of candidates) {
				const impact = c.impactScore != null ? `i${c.impactScore}` : "i?";
				const effort = c.effortEstimate != null ? `e${c.effortEstimate}` : "e?";
				process.stdout.write(
					`  [${c.severity}] ${impact} ${effort}  ${c.id}  ${c.kind}\n`,
				);
				process.stdout.write(`    ${c.summary}\n`);
			}
			return { exitCode: 0 };
		}
	}

	// Render the canonical triage prompt (used by both the default
	// pipe-to-model output and the --run dispatch path below).
	let prompt: string;
	try {
		prompt = drainModule.buildInlineFixPrompt(candidates);
	} catch (err) {
		const msg = err instanceof Error ? err.message : String(err);
		process.stderr.write(`[triage] prompt render failed: ${msg}\n`);
		return { exitCode: 1 };
	}

	if (!options.run && !options.apply) {
		process.stdout.write(`${prompt}\n`);
		return { exitCode: 0 };
	}

	if (options.apply) {
		// Pre-resolve a model via the router when no --model was supplied and
		// no custom runner is injected. Without this, `defaultAgentRunner`
		// would spawn `sf -p` with no `--model` flag, and that path hangs
		// indefinitely during the subprocess's own model-selection step
		// (see sf-mp5tuvdx-ibyk9b). The watchdog still backs this up.
		let resolvedModel = options.model;
		if (!resolvedModel && !options.agentRunner) {
			try {
				const ranked = await drainModule.rankTriageModelsViaRouter();
				resolvedModel = ranked[0];
			} catch (err) {
				const msg = err instanceof Error ? err.message : String(err);
				process.stderr.write(
					`[triage] router pre-resolution failed; falling back to subprocess default: ${msg}\n`,
				);
			}
		}
		process.stderr.write(
			`[triage] applying via triage-decider -> review-code${
				resolvedModel ? ` (model: ${resolvedModel})` : ""
			} (this can take a few minutes)…\n`,
		);
		const result = await runTriageApply(cwd, prompt, {
			model: resolvedModel,
			agentRunner: options.agentRunner,
			candidateCount: candidates.length,
			expectedIds: candidates.map((candidate) => candidate.id),
		});
		const payload = {
			ok: result.ok,
			agreed: result.agreed,
			error: result.error,
			flowId: result.flowId,
			resolvedIds: result.resolvedIds,
			deciderOutput: result.deciderOutput,
			reviewOutput: result.reviewOutput,
		};
		if (options.json) {
			process.stdout.write(`${JSON.stringify(payload)}\n`);
		} else if (result.ok) {
			process.stdout.write(
				`Triage apply complete: review-code agreed (${result.resolvedIds.length} resolved)\n`,
			);
			if (result.resolvedIds.length > 0) {
				process.stdout.write(`Resolved: ${result.resolvedIds.join(", ")}\n`);
			}
		} else {
			process.stdout.write(`[triage] apply blocked: ${result.error}\n`);
			if (result.reviewOutput) process.stdout.write(`${result.reviewOutput}\n`);
		}
		return { exitCode: result.ok ? 0 : 1 };
	}

	// --run: dispatch the prompt via @singularity-forge/ai completeSimple,
	// capture the decision text, persist to .sf/triage/decisions/<ts>.md.
	// Same shape as `sf headless reflect --run`. The model's output is a
	// decision matrix — applying the decisions (resolve_issue calls, code
	// edits) is operator-driven; a tool-enabled variant is follow-up work.
	process.stderr.write(
		"[triage] dispatching to model (this can take a few minutes)…\n",
	);
	const result = await drainModule.runTriage(prompt, { model: options.model });
	if (!result.ok) {
		const payload = {
			ok: false,
			error: result.error ?? "unknown triage error",
			provider: result.provider,
			modelId: result.modelId,
		};
		process.stdout.write(
			options.json
				? `${JSON.stringify(payload)}\n`
				: `[triage] failed: ${payload.error}\n`,
		);
		return { exitCode: 1 };
	}
	const reportPath = drainModule.writeTriageDecisionReport(
		cwd,
		result.content ?? "",
	);
	const payload = {
		ok: true,
		reportPath,
		cleanFinish: result.cleanFinish === true,
		provider: result.provider,
		modelId: result.modelId,
	};
	if (options.json) {
		process.stdout.write(`${JSON.stringify(payload)}\n`);
	} else {
		process.stdout.write(`Triage decisions written to: ${reportPath}\n`);
		if (!result.cleanFinish) {
			process.stderr.write(
				'[triage] WARNING: report did not include "Self-feedback triage complete" terminator — output may be truncated\n',
			);
		}
	}
	return { exitCode: 0 };
}