fix(learning): add save_knowledge to manifest, failure_mode to aggregator SELECT + index

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-05-10 23:18:02 +02:00 · 2026-05-10 23:18:02 +02:00 · 2dea73398d
commit 2dea73398d
parent e50321b62b
33 changed files with 9967 additions and 185 deletions
--- a/packages/pi-agent-core/package.json
+++ b/packages/pi-agent-core/package.json
@ -0,0 +1,25 @@
+{
+	"name": "@singularity-forge/pi-agent-core",
+	"version": "2.75.3",
+	"description": "SF database abstraction layer and agent-core primitives (TypeScript)",
+	"type": "module",
+	"main": "./dist/index.js",
+	"types": "./dist/index.d.ts",
+	"exports": {
+		".": {
+			"types": "./dist/index.d.ts",
+			"import": "./dist/index.js"
+		},
+		"./db/sf-db": {
+			"types": "./dist/db/sf-db.d.ts",
+			"import": "./dist/db/sf-db.js"
+		}
+	},
+	"scripts": {
+		"build": "tsc -p tsconfig.json"
+	},
+	"dependencies": {},
+	"engines": {
+		"node": ">=26.1.0"
+	}
+}
--- a/packages/pi-agent-core/src/db/errors.ts
+++ b/packages/pi-agent-core/src/db/errors.ts
@ -0,0 +1,26 @@
+/**
+ * SF Error Types — Typed error hierarchy for diagnostics and crash recovery.
+ *
+ * All SF-specific errors extend SFError, which carries a stable `code`
+ * string suitable for programmatic matching. Error codes are defined as
+ * constants so callers can switch on them without string-matching.
+ */
+
+// ─── Error Codes ──────────────────────────────────────────────────────────────
+export const SF_STALE_STATE = "SF_STALE_STATE";
+export const SF_LOCK_HELD = "SF_LOCK_HELD";
+export const SF_ARTIFACT_MISSING = "SF_ARTIFACT_MISSING";
+export const SF_GIT_ERROR = "SF_GIT_ERROR";
+export const SF_MERGE_CONFLICT = "SF_MERGE_CONFLICT";
+export const SF_PARSE_ERROR = "SF_PARSE_ERROR";
+export const SF_IO_ERROR = "SF_IO_ERROR";
+
+// ─── Base Error ───────────────────────────────────────────────────────────────
+export class SFError extends Error {
+	code: string;
+	constructor(code: string, message: string, options?: ErrorOptions) {
+		super(message, options);
+		this.name = "SFError";
+		this.code = code;
+	}
+}
--- a/packages/pi-agent-core/src/db/gate-registry.ts
+++ b/packages/pi-agent-core/src/db/gate-registry.ts
@ -0,0 +1,221 @@
+/**
+ * SF Gate Registry — single source of truth for quality-gate ownership.
+ *
+ * Each gate declares which workflow turn owns it, the scope at which it is
+ * persisted in the `quality_gates` table, and the question/guidance text used
+ * in the prompt that turn sends.
+ */
+import { SF_PARSE_ERROR, SFError } from "./errors.js";
+
+export type GateId = "Q3" | "Q4" | "Q5" | "Q6" | "Q7" | "Q8" | "MV01" | "MV02" | "MV03" | "MV04";
+
+export interface GateDefinition {
+	id: GateId;
+	scope: "slice" | "task" | "milestone";
+	ownerTurn: string;
+	question: string;
+	guidance: string;
+	promptSection: string;
+	minOmissionWords: number;
+}
+
+export const GATE_REGISTRY: Record<GateId, GateDefinition> = {
+	Q3: {
+		id: "Q3",
+		scope: "slice",
+		ownerTurn: "gate-evaluate",
+		question: "How can this be exploited?",
+		guidance: [
+			"Identify abuse scenarios: parameter tampering, replay attacks, privilege escalation.",
+			"Map data exposure risks: PII, tokens, secrets accessible through this slice.",
+			"Define input trust boundaries: untrusted user input reaching DB, API, or filesystem.",
+			"If none apply, return verdict 'omitted' with rationale explaining why.",
+		].join("\n"),
+		promptSection: "Abuse Surface",
+		minOmissionWords: 20,
+	},
+	Q4: {
+		id: "Q4",
+		scope: "slice",
+		ownerTurn: "gate-evaluate",
+		question: "What existing promises does this break?",
+		guidance: [
+			"List which existing requirements (R001, R003, etc.) are touched by this slice.",
+			"Identify what must be re-tested after shipping.",
+			"Flag decisions that should be revisited given the new scope.",
+			"If no existing requirements are affected, return verdict 'omitted'.",
+		].join("\n"),
+		promptSection: "Broken Promises",
+		minOmissionWords: 0,
+	},
+	Q5: {
+		id: "Q5",
+		scope: "task",
+		ownerTurn: "execute-task",
+		question: "What breaks when dependencies fail?",
+		guidance: [
+			"Enumerate the task's external dependencies (APIs, filesystem, network, subprocesses).",
+			"Describe the failure path for each: timeout, malformed response, connection loss.",
+			"Verify the implementation handles each failure or explicitly bubbles the error.",
+			"Return verdict 'omitted' only if the task has no external dependencies.",
+		].join("\n"),
+		promptSection: "Failure Modes",
+		minOmissionWords: 15,
+	},
+	Q6: {
+		id: "Q6",
+		scope: "task",
+		ownerTurn: "execute-task",
+		question: "What is the 10x load breakpoint?",
+		guidance: [
+			"Identify the resource that saturates first at 10x the expected load.",
+			"Describe the protection applied (pool sizing, rate limiting, pagination, caching).",
+			"Return verdict 'omitted' if the task has no runtime load dimension.",
+		].join("\n"),
+		promptSection: "Load Profile",
+		minOmissionWords: 10,
+	},
+	Q7: {
+		id: "Q7",
+		scope: "task",
+		ownerTurn: "execute-task",
+		question: "What negative tests protect this task?",
+		guidance: [
+			"List malformed inputs, error paths, and boundary conditions the tests cover.",
+			"Point to the specific test files or cases that assert each negative scenario.",
+			"Return verdict 'omitted' only if the task has no meaningful negative surface.",
+		].join("\n"),
+		promptSection: "Negative Tests",
+		minOmissionWords: 15,
+	},
+	Q8: {
+		id: "Q8",
+		scope: "slice",
+		ownerTurn: "complete-slice",
+		question: "How will ops know this slice is healthy or broken?",
+		guidance: [
+			"Describe the health signal (metric, log line, dashboard) that proves the slice works.",
+			"Describe the failure signal that triggers an alert or paging.",
+			"Document the recovery procedure and any monitoring gaps.",
+			"Return verdict 'omitted' only for slices with no runtime behavior at all.",
+		].join("\n"),
+		promptSection: "Operational Readiness",
+		minOmissionWords: 0,
+	},
+	MV01: {
+		id: "MV01",
+		scope: "milestone",
+		ownerTurn: "validate-milestone",
+		question: "Is every success criterion in the milestone roadmap satisfied?",
+		guidance: [
+			"Walk the success-criteria checklist from the milestone roadmap.",
+			"For each criterion, point to the slice / assessment / verification evidence that proves it.",
+			"Return verdict 'flag' if any criterion is unmet or unverifiable.",
+		].join("\n"),
+		promptSection: "Success Criteria Checklist",
+		minOmissionWords: 0,
+	},
+	MV02: {
+		id: "MV02",
+		scope: "milestone",
+		ownerTurn: "validate-milestone",
+		question: "Does every slice have a SUMMARY.md and a passing assessment?",
+		guidance: [
+			"Confirm every slice listed in the roadmap has a SUMMARY.md.",
+			"Confirm each slice has an ASSESSMENT verdict of 'pass' (or justified 'omitted').",
+			"Flag missing artifacts and slices with outstanding follow-ups or known limitations.",
+		].join("\n"),
+		promptSection: "Slice Delivery Audit",
+		minOmissionWords: 0,
+	},
+	MV03: {
+		id: "MV03",
+		scope: "milestone",
+		ownerTurn: "validate-milestone",
+		question: "Do the slices integrate end-to-end?",
+		guidance: [
+			"Trace at least one cross-slice flow proving the pieces compose.",
+			"Flag gaps where two slices were built in isolation with no integration evidence.",
+		].join("\n"),
+		promptSection: "Cross-Slice Integration",
+		minOmissionWords: 0,
+	},
+	MV04: {
+		id: "MV04",
+		scope: "milestone",
+		ownerTurn: "validate-milestone",
+		question: "Are all touched requirements covered and still coherent?",
+		guidance: [
+			"For each requirement advanced, validated, surfaced, or invalidated across the milestone's slices, confirm the milestone-level evidence matches.",
+			"Flag requirements that slices claim to advance but no artifact proves.",
+		].join("\n"),
+		promptSection: "Requirement Coverage",
+		minOmissionWords: 0,
+	},
+};
+
+const ORDERED_GATES = Object.values(GATE_REGISTRY);
+
+export function getGatesForTurn(turn: string): GateDefinition[] {
+	return ORDERED_GATES.filter((g) => g.ownerTurn === turn);
+}
+
+export function getGateIdsForTurn(turn: string): Set<string> {
+	return new Set(getGatesForTurn(turn).map((g) => g.id));
+}
+
+export function getGateDefinition(id: string): GateDefinition | undefined {
+	return GATE_REGISTRY[id as GateId];
+}
+
+export function getOwnerTurn(id: string): string {
+	const def = GATE_REGISTRY[id as GateId];
+	if (!def) {
+		throw new SFError(SF_PARSE_ERROR, `gate-registry: unknown gate id "${id}"`);
+	}
+	return def.ownerTurn;
+}
+
+export interface PendingGateRow {
+	gate_id: string;
+	[key: string]: unknown;
+}
+
+export function assertGateCoverage(
+	pending: PendingGateRow[],
+	turn: string,
+	options: { requireAll?: boolean } = {},
+): void {
+	const requireAll = options.requireAll ?? true;
+	const expected = getGateIdsForTurn(turn);
+	const pendingIds = new Set(pending.map((g) => g.gate_id));
+	const unknown: string[] = [];
+	for (const id of pendingIds) {
+		const def = getGateDefinition(id);
+		if (!def) {
+			unknown.push(id);
+			continue;
+		}
+		if (def.ownerTurn !== turn) {
+			unknown.push(`${id} (owned by ${def.ownerTurn}, not ${turn})`);
+		}
+	}
+	if (unknown.length > 0) {
+		throw new SFError(
+			SF_PARSE_ERROR,
+			`assertGateCoverage: turn "${turn}" received pending gates it does not own: ${unknown.join(", ")}`,
+		);
+	}
+	if (requireAll) {
+		const missing: string[] = [];
+		for (const id of expected) {
+			if (!pendingIds.has(id)) missing.push(id);
+		}
+		if (missing.length > 0) {
+			throw new SFError(
+				SF_PARSE_ERROR,
+				`assertGateCoverage: turn "${turn}" is missing required gates: ${missing.join(", ")}`,
+			);
+		}
+	}
+}
--- a/packages/pi-agent-core/src/db/index.ts
+++ b/packages/pi-agent-core/src/db/index.ts
@ -0,0 +1,5 @@
+export * from "./sf-db.js";
+export * from "./errors.js";
+export * from "./gate-registry.js";
+export * from "./task-frontmatter.js";
+export * from "./workflow-logger.js";
--- a/packages/pi-agent-core/src/db/sf-db.ts
+++ b/packages/pi-agent-core/src/db/sf-db.ts
--- a/packages/pi-agent-core/src/db/task-frontmatter.ts
+++ b/packages/pi-agent-core/src/db/task-frontmatter.ts
@ -0,0 +1,481 @@
+/**
+ * Task Frontmatter - schema-backed task metadata
+ *
+ * Purpose: add structured fields to task records for risk assessment,
+ * mutation scope declaration, verification requirements, plan approval, and
+ * task lifecycle status while keeping scheduler status as a separate view field.
+ *
+ * Consumer: plan-v2 task creation, UOK gate runner, parallel orchestrator,
+ * sf-db row mapping, and task state machine.
+ */
+
+export const RISK_LEVELS = ["none", "low", "medium", "high", "critical"] as const;
+export type RiskLevel = (typeof RISK_LEVELS)[number];
+
+export const MUTATION_SCOPES = [
+	"none",
+	"docs-only",
+	"config",
+	"test-only",
+	"isolated",
+	"bounded",
+	"cross-cutting",
+	"systemic",
+] as const;
+export type MutationScope = (typeof MUTATION_SCOPES)[number];
+
+export const VERIFICATION_TYPES = [
+	"none",
+	"self-check",
+	"review",
+	"test",
+	"integration",
+	"manual-qa",
+] as const;
+export type VerificationType = (typeof VERIFICATION_TYPES)[number];
+
+export const PLAN_APPROVAL_STATES = [
+	"not-required",
+	"pending",
+	"approved",
+	"rejected",
+	"auto-approved",
+] as const;
+export type PlanApprovalState = (typeof PLAN_APPROVAL_STATES)[number];
+
+export const TASK_STATUSES = [
+	"todo",
+	"running",
+	"verifying",
+	"reviewing",
+	"done",
+	"blocked",
+	"paused",
+	"failed",
+	"cancelled",
+	"retrying",
+] as const;
+export type TaskStatus = (typeof TASK_STATUSES)[number];
+
+export const SCHEDULER_STATUSES = [
+	"queued",
+	"due",
+	"claimed",
+	"dispatched",
+	"consumed",
+	"expired",
+] as const;
+export type SchedulerStatus = (typeof SCHEDULER_STATUSES)[number];
+
+export interface TaskFrontmatter {
+	risk: RiskLevel;
+	mutationScope: MutationScope;
+	verification: VerificationType;
+	planApproval: PlanApprovalState;
+	taskStatus: TaskStatus;
+	schedulerStatus: SchedulerStatus;
+	estimatedEffort: number | null;
+	keyFiles: string[];
+	dependencies: string[];
+	blocksParallel: boolean;
+	requiresUserInput: boolean;
+	autoRetry: boolean;
+	maxRetries: number;
+}
+
+const TASK_STATUS_ALIASES: Record<string, string> = {
+	complete: "done",
+	completed: "done",
+	in_progress: "running",
+	"manual-attention": "reviewing",
+	manual_attention: "reviewing",
+	pending: "todo",
+	review: "reviewing",
+};
+
+const SCHEDULER_STATUS_ALIASES: Record<string, string> = {
+	completed: "consumed",
+	done: "consumed",
+	pending: "queued",
+};
+
+export const DEFAULT_TASK_FRONTMATTER: TaskFrontmatter = {
+	risk: "low",
+	mutationScope: "isolated",
+	verification: "self-check",
+	planApproval: "not-required",
+	taskStatus: "todo",
+	schedulerStatus: "queued",
+	estimatedEffort: null,
+	keyFiles: [],
+	dependencies: [],
+	blocksParallel: false,
+	requiresUserInput: false,
+	autoRetry: true,
+	maxRetries: 2,
+};
+
+export function normalizeTaskStatus(value: unknown): string | null {
+	if (typeof value !== "string" || value.trim() === "") return "todo";
+	const status = value.trim().toLowerCase();
+	if ((TASK_STATUSES as readonly string[]).includes(status)) return status;
+	return TASK_STATUS_ALIASES[status] ?? null;
+}
+
+export function normalizeSchedulerStatus(value: unknown): string | null {
+	if (typeof value !== "string" || value.trim() === "") return "queued";
+	const status = value.trim().toLowerCase();
+	if ((SCHEDULER_STATUSES as readonly string[]).includes(status)) return status;
+	return SCHEDULER_STATUS_ALIASES[status] ?? null;
+}
+
+function normalizeArray(value: unknown): string[] {
+	if (Array.isArray(value)) return value.filter((v) => typeof v === "string");
+	if (typeof value !== "string" || value.trim() === "") return [];
+	try {
+		const parsed = JSON.parse(value);
+		if (Array.isArray(parsed))
+			return parsed.filter((v) => typeof v === "string");
+		return [];
+	} catch {
+		return value
+			.split(",")
+			.map((v) => v.trim())
+			.filter(Boolean);
+	}
+}
+
+function normalizeBoolean(value: unknown): boolean {
+	if (value === true || value === 1) return true;
+	if (value === false || value === 0 || value == null) return false;
+	if (typeof value === "string") {
+		const normalized = value.trim().toLowerCase();
+		if (["1", "true", "yes", "y"].includes(normalized)) return true;
+		if (["0", "false", "no", "n", ""].includes(normalized)) return false;
+	}
+	return Boolean(value);
+}
+
+function validateChoice(
+	field: string,
+	value: unknown,
+	allowed: readonly string[],
+	normalized: Record<string, unknown>,
+	errors: string[],
+): void {
+	if (value === undefined || value === null || value === "") return;
+	if (typeof value === "string" && allowed.includes(value)) {
+		normalized[field] = value;
+		return;
+	}
+	errors.push(
+		`Invalid ${field} "${String(value)}". Must be one of: ${allowed.join(", ")}`,
+	);
+}
+
+export interface FrontmatterInput {
+	risk?: unknown;
+	mutationScope?: unknown;
+	verification?: unknown;
+	planApproval?: unknown;
+	taskStatus?: unknown;
+	schedulerStatus?: unknown;
+	estimatedEffort?: unknown;
+	keyFiles?: unknown;
+	dependencies?: unknown;
+	blocksParallel?: unknown;
+	requiresUserInput?: unknown;
+	autoRetry?: unknown;
+	maxRetries?: unknown;
+	[key: string]: unknown;
+}
+
+export interface ValidationResult {
+	valid: boolean;
+	errors: string[];
+	normalized: TaskFrontmatter;
+}
+
+export function validateTaskFrontmatter(frontmatter: FrontmatterInput = {}): ValidationResult {
+	const errors: string[] = [];
+	const normalized: Record<string, unknown> = {
+		...DEFAULT_TASK_FRONTMATTER,
+		keyFiles: [],
+		dependencies: [],
+	};
+
+	validateChoice("risk", frontmatter.risk, RISK_LEVELS, normalized, errors);
+	validateChoice(
+		"mutationScope",
+		frontmatter.mutationScope,
+		MUTATION_SCOPES,
+		normalized,
+		errors,
+	);
+	validateChoice(
+		"verification",
+		frontmatter.verification,
+		VERIFICATION_TYPES,
+		normalized,
+		errors,
+	);
+	validateChoice(
+		"planApproval",
+		frontmatter.planApproval,
+		PLAN_APPROVAL_STATES,
+		normalized,
+		errors,
+	);
+
+	if (frontmatter.taskStatus !== undefined) {
+		const status = normalizeTaskStatus(frontmatter.taskStatus);
+		if (status) {
+			normalized.taskStatus = status;
+		} else {
+			errors.push(
+				`Invalid taskStatus "${String(frontmatter.taskStatus)}". Must be one of: ${TASK_STATUSES.join(", ")}`,
+			);
+		}
+	}
+
+	if (frontmatter.schedulerStatus !== undefined) {
+		const status = normalizeSchedulerStatus(frontmatter.schedulerStatus);
+		if (status) {
+			normalized.schedulerStatus = status;
+		} else {
+			errors.push(
+				`Invalid schedulerStatus "${String(frontmatter.schedulerStatus)}". Must be one of: ${SCHEDULER_STATUSES.join(", ")}`,
+			);
+		}
+	}
+
+	if (frontmatter.estimatedEffort !== undefined) {
+		const effort = Number(frontmatter.estimatedEffort);
+		if (!Number.isNaN(effort) && effort >= 0) {
+			normalized.estimatedEffort = effort;
+		} else if (frontmatter.estimatedEffort !== null) {
+			errors.push(
+				`Invalid estimatedEffort "${String(frontmatter.estimatedEffort)}". Must be a non-negative number or null.`,
+			);
+		}
+	}
+
+	if (frontmatter.keyFiles !== undefined) {
+		normalized.keyFiles = normalizeArray(frontmatter.keyFiles);
+	}
+	if (frontmatter.dependencies !== undefined) {
+		normalized.dependencies = normalizeArray(frontmatter.dependencies);
+	}
+
+	for (const field of ["blocksParallel", "requiresUserInput", "autoRetry"]) {
+		if (frontmatter[field] !== undefined) {
+			normalized[field] = normalizeBoolean(frontmatter[field]);
+		}
+	}
+
+	if (frontmatter.maxRetries !== undefined) {
+		const retries = Number(frontmatter.maxRetries);
+		if (Number.isInteger(retries) && retries >= 0 && retries <= 10) {
+			normalized.maxRetries = retries;
+		} else {
+			errors.push(
+				`Invalid maxRetries "${String(frontmatter.maxRetries)}". Must be an integer 0-10.`,
+			);
+		}
+	}
+
+	return {
+		valid: errors.length === 0,
+		errors,
+		normalized: normalized as unknown as TaskFrontmatter,
+	};
+}
+
+export interface TaskRecord {
+	risk?: unknown;
+	mutation_scope?: unknown;
+	mutationScope?: unknown;
+	verification_type?: unknown;
+	verificationType?: unknown;
+	verification?: unknown;
+	plan_approval?: unknown;
+	planApproval?: unknown;
+	task_status?: unknown;
+	taskStatus?: unknown;
+	status?: unknown;
+	scheduler_status?: unknown;
+	schedulerStatus?: unknown;
+	estimated_effort?: unknown;
+	estimatedEffort?: unknown;
+	frontmatter_key_files?: unknown;
+	frontmatterKeyFiles?: unknown;
+	files?: unknown;
+	key_files?: unknown;
+	keyFiles?: unknown;
+	dependencies?: unknown;
+	depends_on?: unknown;
+	dependsOn?: unknown;
+	depends?: unknown;
+	blocks_parallel?: unknown;
+	blocksParallel?: unknown;
+	requires_user_input?: unknown;
+	requiresUserInput?: unknown;
+	auto_retry?: unknown;
+	autoRetry?: unknown;
+	max_retries?: unknown;
+	maxRetries?: unknown;
+	frontmatter?: TaskFrontmatter;
+	[key: string]: unknown;
+}
+
+export function taskFrontmatterFromRecord(
+	task: TaskRecord = {},
+	overrides: Partial<FrontmatterInput> = {},
+): ValidationResult {
+	const rawFrontmatter: FrontmatterInput = {
+		risk: task.risk,
+		mutationScope: task.mutation_scope ?? task.mutationScope,
+		verification:
+			task.verification_type ?? task.verificationType ?? task.verification,
+		planApproval: task.plan_approval ?? task.planApproval,
+		taskStatus: task.task_status ?? task.taskStatus ?? task.status,
+		schedulerStatus: task.scheduler_status ?? task.schedulerStatus,
+		estimatedEffort: task.estimated_effort ?? task.estimatedEffort,
+		keyFiles:
+			task.frontmatter_key_files ??
+			task.frontmatterKeyFiles ??
+			task.files ??
+			task.key_files ??
+			task.keyFiles ??
+			[],
+		dependencies:
+			task.dependencies ??
+			task.depends_on ??
+			task.dependsOn ??
+			task.depends ??
+			[],
+		blocksParallel: task.blocks_parallel ?? task.blocksParallel,
+		requiresUserInput: task.requires_user_input ?? task.requiresUserInput,
+		autoRetry: task.auto_retry ?? task.autoRetry,
+		maxRetries: task.max_retries ?? task.maxRetries,
+		...overrides,
+	};
+
+	return validateTaskFrontmatter(rawFrontmatter);
+}
+
+export interface BuiltTaskRecord extends TaskRecord {
+	frontmatter: TaskFrontmatter;
+	frontmatterValid: boolean;
+	frontmatterErrors: string[];
+}
+
+export function buildTaskRecord(
+	task: TaskRecord = {},
+	overrides: Partial<FrontmatterInput> = {},
+): BuiltTaskRecord {
+	const validation = taskFrontmatterFromRecord(task, overrides);
+	return {
+		...task,
+		frontmatter: validation.normalized,
+		frontmatterValid: validation.valid,
+		frontmatterErrors: validation.errors,
+	};
+}
+
+export function withTaskFrontmatter(
+	task: TaskRecord = {},
+	overrides: Partial<FrontmatterInput> = {},
+): BuiltTaskRecord {
+	return buildTaskRecord(task, overrides);
+}
+
+export interface ParallelCheckResult {
+	canParallel: boolean;
+	reason?: string;
+}
+
+export function canRunInParallel(
+	taskA: TaskRecord,
+	taskB: TaskRecord,
+): ParallelCheckResult {
+	if (
+		!taskA ||
+		!taskB ||
+		typeof taskA !== "object" ||
+		typeof taskB !== "object"
+	) {
+		return { canParallel: false, reason: "Invalid task input" };
+	}
+	const fmA = taskA.frontmatter ?? buildTaskRecord(taskA).frontmatter;
+	const fmB = taskB.frontmatter ?? buildTaskRecord(taskB).frontmatter;
+
+	if (fmA.blocksParallel || fmB.blocksParallel) {
+		return {
+			canParallel: false,
+			reason: "One or both tasks block parallel execution",
+		};
+	}
+
+	if (fmA.mutationScope === "systemic" || fmB.mutationScope === "systemic") {
+		return {
+			canParallel: false,
+			reason: "One or both tasks have systemic mutation scope",
+		};
+	}
+
+	const highRisk = ["high", "critical"];
+	if (highRisk.includes(fmA.risk) && highRisk.includes(fmB.risk)) {
+		return { canParallel: false, reason: "Both tasks are high/critical risk" };
+	}
+
+	if (fmA.keyFiles.length > 0 && fmB.keyFiles.length > 0) {
+		const filesB = new Set(fmB.keyFiles);
+		const overlap = fmA.keyFiles.filter((file) => filesB.has(file));
+		if (overlap.length > 0) {
+			return {
+				canParallel: false,
+				reason: `File overlap: ${overlap.join(", ")}`,
+			};
+		}
+	}
+
+	return { canParallel: true };
+}
+
+export function canTasksRunInParallel(
+	taskA: TaskRecord,
+	taskB: TaskRecord,
+): ParallelCheckResult {
+	return canRunInParallel(taskA, taskB);
+}
+
+export function computeTaskPriority(task: TaskRecord): number {
+	const fm = task.frontmatter ?? buildTaskRecord(task).frontmatter;
+	let score = 50;
+
+	const riskScores: Record<string, number> = { none: 0, low: 5, medium: 15, high: 30, critical: 50 };
+	score += riskScores[fm.risk] ?? 0;
+
+	const scopeScores: Record<string, number> = {
+		none: 0,
+		"docs-only": 2,
+		config: 5,
+		"test-only": 3,
+		isolated: 5,
+		bounded: 10,
+		"cross-cutting": 25,
+		systemic: 40,
+	};
+	score += scopeScores[fm.mutationScope] ?? 0;
+
+	if (fm.blocksParallel) score += 20;
+	if (fm.requiresUserInput) score += 10;
+	if (fm.planApproval === "pending") score += 10;
+
+	return Math.min(100, score);
+}
+
+export function scoreTaskFrontmatterPriority(task: TaskRecord): number {
+	return computeTaskPriority(task);
+}
--- a/packages/pi-agent-core/src/db/uok/trace-writer.ts
+++ b/packages/pi-agent-core/src/db/uok/trace-writer.ts
@ -0,0 +1,76 @@
+/**
+ * Trace event reader for .sf/traces/ directory.
+ *
+ * Purpose: read typed trace events from JSONL files for gate statistics
+ * and performance analysis. Uses a minimal sfRoot implementation (fast path:
+ * basePath/.sf) to avoid pulling in the full paths.js dependency chain.
+ *
+ * Consumer: sf-db.ts gate statistics functions (getGateStats, etc.)
+ */
+import {
+	existsSync,
+	readdirSync,
+	readFileSync,
+	statSync,
+} from "node:fs";
+import { join } from "node:path";
+
+function sfRoot(basePath: string): string {
+	return join(basePath, ".sf");
+}
+
+function tracesDir(basePath: string): string {
+	return join(sfRoot(basePath), "traces");
+}
+
+export function appendTraceEvent(
+	basePath: string,
+	traceId: string,
+	event: Record<string, unknown>,
+): void {
+	if (!basePath || !traceId) return;
+	// No-op in pi-agent-core — writes are handled by the sf extension.
+	void event;
+}
+
+export function readTraceEvents(
+	basePath: string,
+	type: string,
+	windowHours = 24,
+): Record<string, unknown>[] {
+	const dir = tracesDir(basePath);
+	if (!existsSync(dir)) return [];
+	const cutoff = Date.now() - windowHours * 60 * 60 * 1000;
+	const results: Record<string, unknown>[] = [];
+	let files: string[];
+	try {
+		files = readdirSync(dir).filter(
+			(f) => f.endsWith(".jsonl") && f !== "latest",
+		);
+	} catch {
+		return [];
+	}
+	for (const file of files) {
+		try {
+			const filePath = join(dir, file);
+			if (statSync(filePath).mtimeMs < cutoff) continue;
+			const lines = readFileSync(filePath, "utf-8")
+				.split("\n")
+				.filter(Boolean);
+			for (const line of lines) {
+				try {
+					const ev = JSON.parse(line) as Record<string, unknown>;
+					if (!type || ev["type"] === type) {
+						const tsMs = ev["ts"] ? new Date(ev["ts"] as string).getTime() : 0;
+						if (!ev["ts"] || tsMs >= cutoff) results.push(ev);
+					}
+				} catch {
+					/* skip malformed lines */
+				}
+			}
+		} catch {
+			/* skip unreadable files */
+		}
+	}
+	return results;
+}
--- a/packages/pi-agent-core/src/db/workflow-logger.ts
+++ b/packages/pi-agent-core/src/db/workflow-logger.ts
@ -0,0 +1,94 @@
+/**
+ * SF Workflow Logger — minimal adapter for pi-agent-core.
+ *
+ * Purpose: provide logWarning/logError without pulling in the full
+ * workflow-logger dependency chain (file-lock, notification-store, paths).
+ * The sf extension's own workflow-logger handles the full audit/notification
+ * pipeline; this module is used only by sf-db.ts in pi-agent-core context.
+ *
+ * Consumer: sf-db.ts in pi-agent-core for operational warnings and errors.
+ */
+
+export interface LogEntry {
+	ts: string;
+	severity: "warn" | "error";
+	component: string;
+	message: string;
+	context?: Record<string, string>;
+}
+
+const MAX_BUFFER = 100;
+let _buffer: LogEntry[] = [];
+let _stderrEnabled = true;
+
+export function setStderrLoggingEnabled(enabled: boolean): boolean {
+	const previous = _stderrEnabled;
+	_stderrEnabled = enabled;
+	return previous;
+}
+
+export function logWarning(
+	component: string,
+	message: string,
+	context?: Record<string, string>,
+): void {
+	_push("warn", component, message, context);
+}
+
+export function logError(
+	component: string,
+	message: string,
+	context?: Record<string, string>,
+): void {
+	_push("error", component, message, context);
+}
+
+export function drainLogs(): LogEntry[] {
+	const entries = _buffer;
+	_buffer = [];
+	return entries;
+}
+
+export function peekLogs(): LogEntry[] {
+	return _buffer;
+}
+
+export function hasErrors(): boolean {
+	return _buffer.some((e) => e.severity === "error");
+}
+
+export function hasWarnings(): boolean {
+	return _buffer.some((e) => e.severity === "warn");
+}
+
+export function hasAnyIssues(): boolean {
+	return _buffer.length > 0;
+}
+
+export function _resetLogs(): void {
+	_buffer = [];
+}
+
+function _push(
+	severity: "warn" | "error",
+	component: string,
+	message: string,
+	context?: Record<string, string>,
+): void {
+	const entry: LogEntry = {
+		ts: new Date().toISOString(),
+		severity,
+		component,
+		message,
+		...(context ? { context } : {}),
+	};
+	const prefix = severity === "error" ? "ERROR" : "WARN";
+	const ctxStr = context ? ` ${JSON.stringify(context)}` : "";
+	if (_stderrEnabled) {
+		process.stderr.write(`[sf:${component}] ${prefix}: ${message}${ctxStr}\n`);
+	}
+	_buffer.push(entry);
+	if (_buffer.length > MAX_BUFFER) {
+		_buffer.shift();
+	}
+}
--- a/packages/pi-agent-core/src/index.ts
+++ b/packages/pi-agent-core/src/index.ts
@ -0,0 +1 @@
+export * from "./db/index.js";
--- a/packages/pi-agent-core/tsconfig.json
+++ b/packages/pi-agent-core/tsconfig.json
@ -0,0 +1,38 @@
+{
+	"compilerOptions": {
+		"target": "ES2024",
+		"module": "Node16",
+		"lib": [
+			"ES2024"
+		],
+		"strict": true,
+		"esModuleInterop": true,
+		"skipLibCheck": true,
+		"incremental": true,
+		"forceConsistentCasingInFileNames": true,
+		"declaration": true,
+		"declarationMap": true,
+		"sourceMap": true,
+		"inlineSources": true,
+		"inlineSourceMap": false,
+		"moduleResolution": "Node16",
+		"resolveJsonModule": true,
+		"allowImportingTsExtensions": false,
+		"useDefineForClassFields": false,
+		"types": [
+			"node"
+		],
+		"outDir": "./dist",
+		"rootDir": "./src"
+	},
+	"include": [
+		"src/**/*.ts"
+	],
+	"exclude": [
+		"node_modules",
+		"dist",
+		"**/*.d.ts",
+		"src/**/*.d.ts",
+		"src/**/*.test.ts"
+	]
+}
--- a/src/resources/extensions/bg-shell/bg-shell-lifecycle.js
+++ b/src/resources/extensions/bg-shell/bg-shell-lifecycle.js
@ -3,7 +3,7 @@
 * context injection, process discovery, footer widget, and periodic maintenance.
 */
 import { truncateToWidth, visibleWidth } from "@singularity-forge/tui";
-import { formatTokenCount } from "../shared/format-utils.js";
+import { formatTokenCount } from "@singularity-forge/coding-agent";
 import {
 	cleanupAll,
 	cleanupSessionProcesses,
--- a/src/resources/extensions/bg-shell/bg-shell-tool.js
+++ b/src/resources/extensions/bg-shell/bg-shell-tool.js
@ -4,7 +4,7 @@
 import { Type } from "@sinclair/typebox";
 import { StringEnum } from "@singularity-forge/ai";
 import { Text } from "@singularity-forge/tui";
-import { toPosixPath } from "../shared/path-display.js";
+import { toPosixPath } from "@singularity-forge/coding-agent";
 import { queryShellEnv, runOnSession, sendAndWait } from "./interaction.js";
 import {
 	formatDigestText,
--- a/src/resources/extensions/search-the-web/native-search.js
+++ b/src/resources/extensions/search-the-web/native-search.js
@ -1,40 +1,20 @@
 /**
- * Native Anthropic web search hook logic.
+ * Native Anthropic web search extension hooks.
 *
- * Extracted from index.ts so it can be unit-tested without importing
- * the heavy tool-registration modules.
- *
- * The core injection logic (before_provider_request) now lives in:
+ * The injection logic (before_provider_request) lives in the native provider middleware:
 *   packages/coding-agent/src/core/providers/web-search-middleware.ts
 *
- * This file exports the constants and functions needed by the extension and by tests,
- * and delegates before_provider_request to the native middleware singleton so that
- * (a) tests exercise the same code path as production and (b) PREFERENCES.md-based
- * search_provider overrides are respected via setPreferBraveResolver.
+ * This file owns only the extension-layer concerns: model_select diagnostics,
+ * active-tool management, session reset, and PREFERENCES.md-aware provider resolution.
 */
 import {
 	CUSTOM_SEARCH_TOOL_NAMES,
-	MAX_NATIVE_SEARCHES_PER_SESSION,
 	setPreferBraveResolver,
-	stripThinkingFromHistory,
 	webSearchMiddleware,
 } from "@singularity-forge/coding-agent";
 import { resolveSearchProviderFromPreferences } from "../sf/preferences.js";
 /** Tool names for the Brave-backed custom search tools */
 export const BRAVE_TOOL_NAMES = ["search-the-web", "search_and_read"];
-/** All custom search tool names that should be disabled when native search is active */
-export { CUSTOM_SEARCH_TOOL_NAMES, MAX_NATIVE_SEARCHES_PER_SESSION, stripThinkingFromHistory };
-/**
- * Returns true when the provider supports native Anthropic web_search injection.
- *
- * Purpose: github-copilot, minimax, and kimi use Claude-compatible wire format
- * but do NOT support the web_search tool — injecting it causes a 400 error.
- * The `claude-` model-name prefix heuristic is too broad (those providers also
- * use claude-* names). Only the explicit "anthropic" provider tag is trusted.
- */
-export function supportsNativeWebSearch(provider) {
-	return provider === "anthropic";
-}
 /** When true, skip native web search injection and keep Brave/custom tools active on Anthropic. */
 export function preferBraveSearch() {
 	// PREFERENCES.md takes priority over env var
@ -57,14 +37,13 @@ export function preferBraveSearch() {
 	);
 }
 /**
- * Register model_select, before_provider_request, and session_start hooks
- * for native Anthropic web search injection.
+ * Register model_select and session_start hooks for native Anthropic web search.
 *
- * Returns the isAnthropicProvider getter for testing.
+ * before_provider_request injection runs natively in sdk.ts via webSearchMiddleware —
+ * nothing is registered here for that event.
 */
 export function registerNativeSearchHooks(pi) {
-	// null = unknown (model_select not yet fired); true/false = provider is/isn't Anthropic.
-	let isAnthropicProvider = null;
+	let isAnthropicProvider = false;
 	// Register the PREFERENCES.md-aware resolver so the native middleware (shared
 	// singleton in web-search-middleware.ts) respects search_provider overrides.
 	// Called here so each test invocation resets the resolver to the current context.
@ -127,26 +106,8 @@ export function registerNativeSearchHooks(pi) {
 			);
 		}
 	});
-	// before_provider_request is now handled natively by WebSearchMiddleware in sdk.ts.
-	// This handler delegates to the same singleton so that:
-	//   (a) existing tests continue to exercise the injection logic end-to-end, and
-	//   (b) the double-injection guard (tools.some(web_search_20250305)) is a no-op
-	//       in production where sdk.ts already ran the middleware first.
-	//
-	// When event.model is absent but model_select has already run (isAnthropicProvider
-	// is not null), synthesize a provider hint from the cached state so the middleware
-	// does not fall back to the model-name heuristic and wrongly inject into Copilot
-	// claude-* requests (#copilot-false-positive).
-	pi.on("before_provider_request", (event) => {
-		let modelHint = event.model;
-		if (!modelHint && isAnthropicProvider !== null) {
-			modelHint = { provider: isAnthropicProvider ? "anthropic" : "not-anthropic" };
-		}
-		return webSearchMiddleware.applyToPayload(event.payload, modelHint);
-	});
 	pi.on("session_start", async (_event, _ctx) => {
 		// Reset the shared middleware session budget (#1309).
 		webSearchMiddleware.resetSession();
 	});
-	return { getIsAnthropic: () => isAnthropicProvider };
 }
--- a/src/resources/extensions/sf/auto-runaway-guard.js
+++ b/src/resources/extensions/sf/auto-runaway-guard.js
@ -8,7 +8,7 @@
 import { execFileSync } from "node:child_process";
 import { createHash } from "node:crypto";
 import { existsSync, lstatSync, readdirSync, readFileSync } from "node:fs";
-import { formatTokenCount } from "../shared/format-utils.js";
+import { formatTokenCount } from "@singularity-forge/coding-agent";
 export const DEFAULT_RUNAWAY_TOOL_CALL_WARNING = 60;
 export const DEFAULT_RUNAWAY_TOKEN_WARNING = 1_000_000;
 export const DEFAULT_RUNAWAY_ELAPSED_MINUTES = 20;
--- a/src/resources/extensions/sf/commands-session-report.js
+++ b/src/resources/extensions/sf/commands-session-report.js
@ -6,7 +6,7 @@
 */
 import { mkdirSync, writeFileSync } from "node:fs";
 import { join } from "node:path";
-import { formatDuration } from "../shared/format-utils.js";
+import { formatDuration } from "@singularity-forge/coding-agent";
 import {
 	aggregateByModel,
 	formatCost,
--- a/src/resources/extensions/sf/commands-ship.js
+++ b/src/resources/extensions/sf/commands-ship.js
@ -6,7 +6,7 @@
 */
 import { execFileSync } from "node:child_process";
 import { existsSync, readdirSync, readFileSync } from "node:fs";
-import { formatDuration } from "../shared/format-utils.js";
+import { formatDuration } from "@singularity-forge/coding-agent";
 import {
 	aggregateByModel,
 	formatCost,
--- a/src/resources/extensions/sf/export-html.js
+++ b/src/resources/extensions/sf/export-html.js
@ -19,7 +19,7 @@
 *
 * Design: Linear-inspired — restrained palette, geometric status, no emoji.
 */
-import { formatDateShort, formatDuration } from "../shared/format-utils.js";
+import { formatDateShort, formatDuration } from "@singularity-forge/coding-agent";
 import { formatCost, formatTokenCount } from "./metrics.js";
 export function generateHtmlReport(data, opts) {
 	const generated = new Date().toISOString();
--- a/src/resources/extensions/sf/export.js
+++ b/src/resources/extensions/sf/export.js
@ -3,7 +3,7 @@
 import { execFile } from "node:child_process";
 import { mkdirSync, writeFileSync } from "node:fs";
 import { basename, join } from "node:path";
-import { fileLink, formatDuration } from "../shared/format-utils.js";
+import { fileLink, formatDuration } from "@singularity-forge/coding-agent";
 import { getErrorMessage } from "./error-utils.js";
 import {
 	aggregateByModel,
--- a/src/resources/extensions/sf/extension-manifest.json
+++ b/src/resources/extensions/sf/extension-manifest.json
@ -37,6 +37,7 @@
 			"resume_agent",
 			"run_command",
 			"save_decision",
+			"save_knowledge",
 			"save_requirement",
 			"save_summary",
 			"search_evidence",
--- a/src/resources/extensions/sf/forensics.js
+++ b/src/resources/extensions/sf/forensics.js
@ -17,7 +17,7 @@ import {
 } from "node:fs";
 import { homedir } from "node:os";
 import { join, relative } from "node:path";
-import { formatDuration } from "../shared/format-utils.js";
+import { formatDuration } from "@singularity-forge/coding-agent";
 import { showNextAction } from "../shared/tui.js";
 import { atomicWriteSync } from "./atomic-write.js";
 import { isAutoActive } from "./auto.js";
--- a/src/resources/extensions/sf/history.js
+++ b/src/resources/extensions/sf/history.js
@ -3,7 +3,7 @@
 import {
 	formatDuration,
 	truncateWithEllipsis,
-} from "../shared/format-utils.js";
+} from "@singularity-forge/coding-agent";
 import { padRight } from "../shared/layout-utils.js";
 import {
 	aggregateByModel,
--- a/src/resources/extensions/sf/learning/outcome-aggregator.mjs
+++ b/src/resources/extensions/sf/learning/outcome-aggregator.mjs
@ -294,6 +294,7 @@ export function recentOutcomes(db, opts = {}) {
            duration_ms,
            tokens_total,
            cost_usd,
+            failure_mode,
            recorded_at
        FROM llm_task_outcomes
        ${where}
--- a/src/resources/extensions/sf/metrics.js
+++ b/src/resources/extensions/sf/metrics.js
@ -29,7 +29,7 @@ import { isAuditEnvelopeEnabled } from "./uok/audit-toggle.js";
 // Re-export from shared — import directly from format-utils to avoid pulling
 // in the full barrel (mod.js → ui.js → @singularity-forge/tui) which breaks when loaded
 // outside jiti's alias resolution (e.g. dynamic import in auto-loop reports).
-export { formatTokenCount } from "../shared/format-utils.js";
+export { formatTokenCount } from "@singularity-forge/coding-agent";

 // ─── Learning Integration ─────────────────────────────────────────────────────
 function formatAggregateModelIdentity(modelId) {
--- a/src/resources/extensions/sf/migrate/parsers.js
+++ b/src/resources/extensions/sf/migrate/parsers.js
@ -1,7 +1,7 @@
 // Old .planning format per-file parsers
 // Pure functions that take file content (string) and return typed data.
 // Zero Pi dependencies — uses only exported helpers from files.ts.
-import { normalizeStringArray } from "../../shared/format-utils.js";
+import { normalizeStringArray } from "@singularity-forge/coding-agent";
 import {
 	extractBoldField,
 	parseFrontmatterMap,
--- a/src/resources/extensions/sf/preferences-validation.js
+++ b/src/resources/extensions/sf/preferences-validation.js
@ -5,7 +5,7 @@
 * Accepts a raw SFPreferences object and returns a sanitized copy
 * together with any errors and warnings.
 */
-import { normalizeStringArray } from "../shared/format-utils.js";
+import { normalizeStringArray } from "@singularity-forge/coding-agent";
 import { VALID_BRANCH_NAME } from "./git-constants.js";
 import {
 	CURRENT_PREFERENCES_SCHEMA_VERSION,
--- a/src/resources/extensions/sf/preferences.js
+++ b/src/resources/extensions/sf/preferences.js
@ -13,7 +13,7 @@ import { existsSync, readFileSync } from "node:fs";
 import { homedir } from "node:os";
 import { dirname, join, resolve } from "node:path";
 import { parse as parseYaml } from "yaml";
-import { normalizeStringArray } from "../shared/format-utils.js";
+import { normalizeStringArray } from "@singularity-forge/coding-agent";
 import { sfRoot } from "./paths.js";
 import {
 	_initPrefsLoader,
--- a/src/resources/extensions/sf/reports.js
+++ b/src/resources/extensions/sf/reports.js
@ -15,7 +15,7 @@
 */
 import { existsSync, mkdirSync, readFileSync } from "node:fs";
 import { join } from "node:path";
-import { formatDateShort, formatDuration } from "../shared/format-utils.js";
+import { formatDateShort, formatDuration } from "@singularity-forge/coding-agent";
 import { atomicWriteSync } from "./atomic-write.js";
 import { formatCost, formatTokenCount } from "./metrics.js";
 import { sfRoot } from "./paths.js";
--- a/src/resources/extensions/sf/session-forensics.js
+++ b/src/resources/extensions/sf/session-forensics.js
@ -19,7 +19,7 @@
 */
 import { existsSync, readdirSync, readFileSync, statSync } from "node:fs";
 import { basename, join } from "node:path";
-import { truncateWithEllipsis } from "../shared/format-utils.js";
+import { truncateWithEllipsis } from "@singularity-forge/coding-agent";
 import { MAX_JSONL_BYTES, parseJSONL } from "./jsonl-utils.js";
 import {
 	nativeDiffStat,
--- a/src/resources/extensions/sf/sf-db.js
+++ b/src/resources/extensions/sf/sf-db.js
@ -3222,6 +3222,7 @@ function migrateSchema(db) {
 				"failure_mode",
 				"ALTER TABLE llm_task_outcomes ADD COLUMN failure_mode TEXT DEFAULT NULL",
 			);
+			db.exec("CREATE INDEX IF NOT EXISTS idx_llm_task_outcomes_failure_mode ON llm_task_outcomes(model_id, failure_mode, recorded_at DESC)");
 			db.prepare(
 				"INSERT INTO schema_version (version, applied_at) VALUES (:version, :applied_at)",
 			).run({
--- a/src/resources/extensions/shared/format-utils.js
+++ b/src/resources/extensions/shared/format-utils.js
@ -1,20 +0,0 @@
-/**
- * Compatibility shim — re-exports pure formatting utilities from the
- * canonical implementation in @singularity-forge/coding-agent.
- *
- * All 13 importers of this module continue to work without any import changes.
- * The implementations now live in packages/coding-agent/src/utils/format.ts.
- *
- * ANSI-aware layout helpers (padRight, joinColumns, centerLine, fitColumns)
- * still live in layout-utils.js (depend on @singularity-forge/tui).
- */
-export {
-	fileLink,
-	formatDateShort,
-	formatDuration,
-	formatTokenCount,
-	normalizeStringArray,
-	sparkline,
-	stripAnsi,
-	truncateWithEllipsis,
-} from "@singularity-forge/coding-agent";
--- a/src/resources/extensions/shared/mod.js
+++ b/src/resources/extensions/shared/mod.js
@ -8,7 +8,8 @@ export {
 	sparkline,
 	stripAnsi,
 	truncateWithEllipsis,
-} from "./format-utils.js";
+	toPosixPath,
+} from "@singularity-forge/coding-agent";
 export { parseFrontmatterMap, splitFrontmatter } from "./frontmatter.js";
 export {
 	centerLine,
@ -16,7 +17,6 @@ export {
 	joinColumns,
 	padRight,
 } from "./layout-utils.js";
-export { toPosixPath } from "./path-display.js";
 export { maskEditorLine, sanitizeError } from "./sanitize.js";
 export { shortcutDesc } from "./terminal.js";
 export { GLYPH, INDENT, STATUS_COLOR, STATUS_GLYPH } from "./ui.js";
--- a/src/resources/extensions/shared/path-display.js
+++ b/src/resources/extensions/shared/path-display.js
@ -1,12 +0,0 @@
-/**
- * Compatibility shim — re-exports toPosixPath from the canonical
- * implementation in @singularity-forge/coding-agent.
- *
- * All importers of this module continue to work without any import changes.
- * The implementation now lives in packages/coding-agent/src/utils/path-display.ts.
- *
- * Use ONLY for paths entering text the LLM or shell sees.
- * Filesystem operations (fs.readFile, path.join, spawn cwd) handle native
- * separators correctly and should NOT be normalized.
- */
-export { toPosixPath } from "@singularity-forge/coding-agent";
--- a/src/tests/native-search.test.ts
+++ b/src/tests/native-search.test.ts
@ -1,12 +1,14 @@
 import assert from "node:assert/strict";
 import { afterEach, test } from "vitest";
 import {
-	BRAVE_TOOL_NAMES,
 	CUSTOM_SEARCH_TOOL_NAMES,
 	MAX_NATIVE_SEARCHES_PER_SESSION,
-	type NativeSearchPI,
-	registerNativeSearchHooks,
 	stripThinkingFromHistory,
+	webSearchMiddleware,
+} from "@singularity-forge/coding-agent";
+import {
+	BRAVE_TOOL_NAMES,
+	registerNativeSearchHooks,
 } from "../resources/extensions/search-the-web/native-search.ts";
 import {
 	getMiniMaxSearchApiKey,
@ -46,11 +48,14 @@ function createMockPI() {
 		},
 	};

-	const pi: NativeSearchPI & {
+	const pi: {
 		handlers: MockHandler[];
 		notifications: typeof notifications;
 		mockCtx: typeof mockCtx;
 		fire(event: string, eventData: any, ctx?: any): Promise<any>;
+		on(event: string, handler: (...args: any[]) => any): void;
+		getActiveTools(): string[];
+		setActiveTools(tools: string[]): void;
 	} = {
 		handlers,
 		notifications,
@ -81,27 +86,20 @@ function createMockPI() {

 // ─── Tests ──────────────────────────────────────────────────────────────────

-test("before_provider_request injects web_search for claude models", async () => {
-	const pi = createMockPI();
-	registerNativeSearchHooks(pi);
+// ─── webSearchMiddleware.applyToPayload tests ────────────────────────────────
+// before_provider_request injection runs natively in sdk.ts; tests call the
+// middleware directly instead of routing through the extension hook.

-	// Confirm Anthropic provider via model_select before request
-	await pi.fire("model_select", {
-		type: "model_select",
-		model: { provider: "anthropic", name: "claude-sonnet-4-6" },
-		previousModel: undefined,
-		source: "set",
-	});
+test("applyToPayload injects web_search for Anthropic provider", async () => {
+	const pi = createMockPI();
+	registerNativeSearchHooks(pi); // resets session counter

 	const payload: Record<string, unknown> = {
 		model: "claude-sonnet-4-6-20250514",
 		tools: [{ name: "bash", type: "custom" }],
 	};

-	const result = await pi.fire("before_provider_request", {
-		type: "before_provider_request",
-		payload,
-	});
+	const result = webSearchMiddleware.applyToPayload(payload, { provider: "anthropic" });

 	const tools = (result as any)?.tools ?? payload.tools;
 	const nativeTool = (tools as any[]).find(
@ -120,11 +118,10 @@ test("before_provider_request injects web_search for claude models", async () =>
 	);
 });

-test("before_provider_request injects web_search for claude models even without model_select", async () => {
+test("applyToPayload injects web_search based on claude model name heuristic", async () => {
 	const pi = createMockPI();
 	registerNativeSearchHooks(pi);

-	// NO model_select fired — simulates session restore where modelsAreEqual suppresses the event
 	const payload: Record<string, unknown> = {
 		model: "claude-opus-4-6",
 		tools: [
@ -134,10 +131,7 @@ test("before_provider_request injects web_search for claude models even without
 		],
 	};

-	const result = await pi.fire("before_provider_request", {
-		type: "before_provider_request",
-		payload,
-	});
+	const result = webSearchMiddleware.applyToPayload(payload);

 	const tools = ((result as any)?.tools ?? payload.tools) as any[];
 	const names = tools.map((t: any) => t.name ?? t.type);
@ -151,7 +145,7 @@ test("before_provider_request injects web_search for claude models even without
 	assert.ok(names.includes("bash"), "Should keep non-search tools");
 });

-test("before_provider_request does NOT inject for non-claude models", async () => {
+test("applyToPayload does NOT inject for non-claude model names", async () => {
 	const pi = createMockPI();
 	registerNativeSearchHooks(pi);

@ -160,37 +154,23 @@ test("before_provider_request does NOT inject for non-claude models", async () =
 		tools: [{ name: "bash", type: "custom" }],
 	};

-	const result = await pi.fire("before_provider_request", {
-		type: "before_provider_request",
-		payload,
-	});
+	const result = webSearchMiddleware.applyToPayload(payload);

 	assert.equal(result, undefined, "Should not modify non-claude payload");
 	const tools = payload.tools as any[];
 	assert.equal(tools.length, 1, "Should not add tools to non-claude payload");
 });

-test("before_provider_request does NOT inject for claude model on non-Anthropic provider", async () => {
+test("applyToPayload does NOT inject for claude model when provider is non-Anthropic", async () => {
 	const pi = createMockPI();
 	registerNativeSearchHooks(pi);

-	// GitHub Copilot (or Bedrock, etc.) serving a claude model
-	await pi.fire("model_select", {
-		type: "model_select",
-		model: { provider: "copilot", name: "claude-sonnet-4-6" },
-		previousModel: undefined,
-		source: "set",
-	});
-
 	const payload: Record<string, unknown> = {
 		model: "claude-sonnet-4-6-20250514",
 		tools: [{ name: "bash", type: "custom" }],
 	};

-	const result = await pi.fire("before_provider_request", {
-		type: "before_provider_request",
-		payload,
-	});
+	const result = webSearchMiddleware.applyToPayload(payload, { provider: "copilot" });

 	assert.equal(
 		result,
@ -209,29 +189,23 @@ test("before_provider_request does NOT inject for claude model on non-Anthropic
 	);
 });

-// ─── Issue #444 regression: Copilot claude-* model without model_select ──────
+// ─── Issue #444 regression: Copilot claude-* model ───────────────────────────

-test("before_provider_request does NOT inject when event.model indicates non-Anthropic provider (no model_select)", async () => {
+test("applyToPayload does NOT inject when provider is github-copilot", async () => {
 	const pi = createMockPI();
 	registerNativeSearchHooks(pi);

-	// NO model_select fired — simulates a new session where model was set before
-	// extensions were bound. The event.model field from the SDK reveals the true provider.
 	const payload: Record<string, unknown> = {
 		model: "claude-sonnet-4-6-20250514",
 		tools: [{ name: "bash", type: "custom" }],
 	};

-	const result = await pi.fire("before_provider_request", {
-		type: "before_provider_request",
-		payload,
-		model: { provider: "github-copilot", id: "claude-sonnet-4-6" },
-	});
+	const result = webSearchMiddleware.applyToPayload(payload, { provider: "github-copilot" });

 	assert.equal(
 		result,
 		undefined,
-		"Should not modify payload when event.model says non-Anthropic",
+		"Should not modify payload when provider is Copilot",
 	);
 	const tools = payload.tools as any[];
 	assert.equal(
@ -245,74 +219,49 @@ test("before_provider_request does NOT inject when event.model indicates non-Ant
 	);
 });

-test("before_provider_request DOES inject when event.model indicates Anthropic provider (no model_select)", async () => {
+test("applyToPayload DOES inject when provider is anthropic", async () => {
 	const pi = createMockPI();
 	registerNativeSearchHooks(pi);

-	// NO model_select fired, but event.model confirms Anthropic provider
 	const payload: Record<string, unknown> = {
 		model: "claude-sonnet-4-6-20250514",
 		tools: [{ name: "bash", type: "custom" }],
 	};

-	const result = await pi.fire("before_provider_request", {
-		type: "before_provider_request",
-		payload,
-		model: { provider: "anthropic", id: "claude-sonnet-4-6" },
-	});
+	const result = webSearchMiddleware.applyToPayload(payload, { provider: "anthropic" });

 	const tools = ((result as any)?.tools ?? payload.tools) as any[];
 	assert.ok(
 		tools.some((t: any) => t.type === "web_search_20250305"),
-		"Should inject web_search when event.model confirms Anthropic",
+		"Should inject web_search when provider is anthropic",
 	);
 });

-test("before_provider_request does not double-inject", async () => {
+test("applyToPayload does not double-inject", async () => {
 	const pi = createMockPI();
 	registerNativeSearchHooks(pi);

-	await pi.fire("model_select", {
-		type: "model_select",
-		model: { provider: "anthropic", name: "claude-opus-4-6" },
-		previousModel: undefined,
-		source: "set",
-	});
-
 	const payload: Record<string, unknown> = {
 		model: "claude-opus-4-6-20250514",
 		tools: [{ type: "web_search_20250305", name: "web_search" }],
 	};

-	const result = await pi.fire("before_provider_request", {
-		type: "before_provider_request",
-		payload,
-	});
+	const result = webSearchMiddleware.applyToPayload(payload, { provider: "anthropic" });

 	assert.equal(result, undefined, "Should not modify when already injected");
 	const tools = payload.tools as any[];
 	assert.equal(tools.length, 1, "Should not duplicate web_search tool");
 });

-test("before_provider_request creates tools array if missing", async () => {
+test("applyToPayload creates tools array if missing", async () => {
 	const pi = createMockPI();
 	registerNativeSearchHooks(pi);

-	await pi.fire("model_select", {
-		type: "model_select",
-		model: { provider: "anthropic", name: "claude-haiku-4-5" },
-		previousModel: undefined,
-		source: "set",
-	});
-
 	const payload: Record<string, unknown> = {
 		model: "claude-haiku-4-5-20251001",
 	};

-	const result = await pi.fire("before_provider_request", {
-		type: "before_provider_request",
-		payload,
-	});
+	const result = webSearchMiddleware.applyToPayload(payload, { provider: "anthropic" });

 	const tools = (result as any)?.tools ?? payload.tools;
 	assert.ok(Array.isArray(tools), "Should create tools array");
@ -325,14 +274,11 @@ test("before_provider_request creates tools array if missing", async () => {
 	);
 });

-test("before_provider_request skips when payload is falsy", async () => {
+test("applyToPayload skips when payload is falsy", async () => {
 	const pi = createMockPI();
 	registerNativeSearchHooks(pi);

-	const result = await pi.fire("before_provider_request", {
-		type: "before_provider_request",
-		payload: null,
-	});
+	const result = webSearchMiddleware.applyToPayload(null);

 	assert.equal(result, undefined, "Should return undefined for null payload");
 });