feat(workflow): add product-audit (slim port)

Milestone-end workflow that compares declared product intent (VISION.md, RUNBOOKS.md, etc.) against actual code/test/deploy/docs evidence and emits structured gaps with severity. Soft gates — adds follow-up slices but doesn't hard-block merge. Slim port (4 new files + 1 registration) — extracts only the audit feature itself, not bunker's parallel rewrite of dispatch/prompts/ benchmark-selector that came with it in commit 2aa785475. Created: - prompts/product-audit.md — prompt verbatim, gsd_*→sf_* and .gsd→.sf - tools/product-audit-tool.ts — slim file-write implementation, atomicWriteAsync to .sf/active/{mid}/ PRODUCT-AUDIT.{json,md}; no DB deps - bootstrap/product-audit-tool.ts — pi-coding-agent tool registration, TypeBox schema for sf_product_audit - workflow-templates/product-audit.md — workflow template Modified: - bootstrap/register-extension.ts — 2 lines: import + add to nonCriticalRegistrations - workflow-templates/registry.json — registry entry - package.json — version 2.75.0 → 2.75.1 Verdict logic (no-gaps | gaps-found | contract-underspecified) is the load-bearing innovation: contract-underspecified forces the auditor to flag unverifiable docs as a real gap rather than rubber-stamping no-gaps when the product contract is silent. Out of scope: phase enum changes, dispatch hookup. Wire-up to the phase machine is a follow-up; the prompt + tool + template stand alone. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-29 13:55:23 +02:00 · 2026-04-29 13:55:23 +02:00 · a8cf2cd941
commit a8cf2cd941
parent 2eebeccb93
7 changed files with 549 additions and 1 deletions
--- a/package.json
+++ b/package.json
@ -1,6 +1,6 @@
 {
  "name": "singularity-forge",
-  "version": "2.75.0",
+  "version": "2.75.1",
  "description": "Singularity Forge runtime core",
  "license": "MIT",
  "repository": {
--- a/src/resources/extensions/sf/bootstrap/product-audit-tool.ts
+++ b/src/resources/extensions/sf/bootstrap/product-audit-tool.ts
@ -0,0 +1,102 @@
+// SF — Product Audit tool registration
+//
+// Exposes `sf_product_audit` to the LLM. The tool persists a structured
+// product-completeness audit (verdict + gaps) to
+// `.sf/active/{milestoneId}/PRODUCT-AUDIT.{json,md}`.
+
+import { Type } from "@sinclair/typebox";
+import type { ExtensionAPI } from "@singularity-forge/pi-coding-agent";
+
+import {
+	handleProductAudit,
+	type ProductAuditParams,
+} from "../tools/product-audit-tool.js";
+
+export function registerProductAuditTool(pi: ExtensionAPI): void {
+	pi.registerTool({
+		name: "sf_product_audit",
+		label: "Product Audit",
+		description:
+			"Persist a milestone-end product-completeness audit. Compares declared " +
+			"product intent against actual code/test/deploy/docs evidence and writes " +
+			"structured gaps to .sf/active/{milestoneId}/PRODUCT-AUDIT.{json,md}. " +
+			"Soft gate — does not hard-block milestone completion.",
+		promptSnippet:
+			"Save a milestone product-audit (verdict + gaps with severity and suggested follow-up slices) to .sf/active/{milestoneId}/PRODUCT-AUDIT.{json,md}",
+		promptGuidelines: [
+			"Call exactly once per milestone audit run.",
+			"verdict=no-gaps requires positive evidence for every material capability.",
+			"verdict=gaps-found includes at least one gap; critical/high gaps will be turned into follow-up slices downstream.",
+			"verdict=contract-underspecified means product docs are too vague to validate; emit one high-severity gap that clarifies the contract.",
+			"Every gap must cite concrete sourceDocs (repo-relative paths) and concrete foundEvidence/missingEvidence — no vague TODOs.",
+		],
+		parameters: Type.Object({
+			milestoneId: Type.String({
+				description: "Milestone ID this audit belongs to (e.g. M001)",
+			}),
+			verdict: Type.Union(
+				[
+					Type.Literal("no-gaps"),
+					Type.Literal("gaps-found"),
+					Type.Literal("contract-underspecified"),
+				],
+				{ description: "Overall audit verdict" },
+			),
+			summary: Type.String({
+				description: "Short evidence-based summary of the audit",
+			}),
+			gaps: Type.Array(
+				Type.Object({
+					capability: Type.String(),
+					expectedEvidence: Type.Array(Type.String()),
+					foundEvidence: Type.Array(Type.String()),
+					missingEvidence: Type.Array(Type.String()),
+					severity: Type.Union([
+						Type.Literal("critical"),
+						Type.Literal("high"),
+						Type.Literal("medium"),
+						Type.Literal("low"),
+					]),
+					suggestedSlice: Type.Object({
+						title: Type.String(),
+						demo: Type.String(),
+						risk: Type.String(),
+						depends: Type.Array(Type.String()),
+					}),
+					confidence: Type.Number({ minimum: 0, maximum: 1 }),
+					sourceDocs: Type.Array(Type.String()),
+				}),
+			),
+		}),
+		async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
+			const result = await handleProductAudit(
+				params as ProductAuditParams,
+				process.cwd(),
+			);
+			if ("error" in result) {
+				return {
+					content: [
+						{
+							type: "text" as const,
+							text: `Error: ${result.error}`,
+						},
+					],
+					details: { operation: "sf_product_audit", error: result.error },
+					isError: true,
+				};
+			}
+			return {
+				content: [
+					{
+						type: "text" as const,
+						text:
+							`Product audit ${result.milestoneId} saved — verdict=${result.verdict}, ` +
+							`gaps=${result.gapCount} (actionable=${result.actionableGapCount}). ` +
+							`Wrote ${result.markdownPath} and ${result.jsonPath}.`,
+					},
+				],
+				details: { operation: "sf_product_audit", ...result },
+			};
+		},
+	});
+}
--- a/src/resources/extensions/sf/bootstrap/register-extension.ts
+++ b/src/resources/extensions/sf/bootstrap/register-extension.ts
@ -15,6 +15,7 @@ import { registerDynamicTools } from "./dynamic-tools.js";
 import { registerExecTools } from "./exec-tools.js";
 import { registerJournalTools } from "./journal-tools.js";
 import { registerMemoryTools } from "./memory-tools.js";
+import { registerProductAuditTool } from "./product-audit-tool.js";
 import { registerQueryTools } from "./query-tools.js";
 import { registerHooks } from "./register-hooks.js";
 import { registerShortcuts } from "./register-shortcuts.js";
@ -100,6 +101,7 @@ export function registerSfExtension(pi: ExtensionAPI): void {
 		["db-tools", () => registerDbTools(pi)],
 		["exec-tools", () => registerExecTools(pi)],
 		["memory-tools", () => registerMemoryTools(pi)],
+		["product-audit-tool", () => registerProductAuditTool(pi)],
 		["journal-tools", () => registerJournalTools(pi)],
 		["query-tools", () => registerQueryTools(pi)],
 		["shortcuts", () => registerShortcuts(pi)],
--- a/src/resources/extensions/sf/prompts/product-audit.md
+++ b/src/resources/extensions/sf/prompts/product-audit.md
@ -0,0 +1,73 @@
+# Product Completeness Audit
+
+Working directory: `{{workingDirectory}}`
+Milestone: `{{milestoneId}} — {{milestoneTitle}}`
+Audit output: `{{productAuditPath}}`
+
+{{skillActivation}}
+
+{{inlinedContext}}
+
+## Mission
+
+Compare the repository's declared finished-product intent against actual evidence from code, tests, docs, deployment artifacts, runbooks, and `.sf` milestone artifacts.
+
+Use repo docs as the product contract. Do not hardcode assumptions from another project. If docs are vague, report that as a product-contract gap only when it blocks confident validation.
+
+## Codebase Inspection
+
+You must inspect the codebase enough to confirm whether declared product capabilities exist. Prefer semantic or structured search when available:
+
+- Start with MCP discovery: call `mcp_servers`, then `mcp_discover` for configured repo-intelligence servers. In our repositories, use Serena first when present for symbol search, references, and cross-file codebase mapping. If Serena is not configured or a call fails, state that and continue with AST/text search.
+- Use AST-aware search (`ast-grep`, repository semantic tools, or equivalent) for implementation patterns, exported APIs, route handlers, CLI commands, service definitions, config keys, and test coverage.
+- Use text search (`rg`) for docs, deployment scripts, runbooks, CI workflows, build targets, and evidence strings.
+- Use available repo-intelligence MCP tools such as Serena, DeepWiki, Context7, package-intelligence, or project-specific servers when configured, especially for cross-file symbol tracing and architecture questions.
+- Do not rely only on preloaded docs. If a required capability is declared, look for concrete implementation, tests, config, deploy, and operational evidence.
+- Do not edit source files in this audit. The only write path is the `sf_product_audit` tool.
+
+## Evidence Rules
+
+- Cite every product expectation with repo-relative `sourceDocs` paths.
+- `foundEvidence` must name concrete files, symbols, tests, commands, artifacts, or documented operational procedures.
+- `missingEvidence` must be concrete and checkable. Do not write vague TODOs.
+- A `no-gaps` verdict is valid only when positive evidence exists for every material product expectation discovered.
+- Critical/high gaps are for capabilities required by the declared finished product and missing enough evidence to trust completion.
+- Medium/low gaps remain advisory unless the declared finished product requires them.
+- Safety warnings are advisory here. This audit does not hard-block completion; it adds follow-up slices for real critical/high product gaps.
+
+## Required Output
+
+Call `sf_product_audit` exactly once with:
+
+```json
+{
+  "milestoneId": "{{milestoneId}}",
+  "verdict": "no-gaps | gaps-found | contract-underspecified",
+  "summary": "short evidence-based summary",
+  "gaps": [
+    {
+      "capability": "capability name",
+      "expectedEvidence": ["required evidence from product contract"],
+      "foundEvidence": ["positive evidence found, with paths"],
+      "missingEvidence": ["missing concrete evidence"],
+      "severity": "critical | high | medium | low",
+      "suggestedSlice": {
+        "title": "Implement/verify the missing product capability",
+        "demo": "After this: concrete product-readiness proof exists.",
+        "risk": "high",
+        "depends": []
+      },
+      "confidence": 0.8,
+      "sourceDocs": ["VISION.md", "docs/RUNBOOKS.md"]
+    }
+  ]
+}
+```
+
+For no material gaps, pass `gaps: []` and explain the positive evidence in `summary`.
+
+If the finished-product contract is underspecified and that blocks validation, add one high-severity gap whose suggested slice clarifies the product contract.
+
+After the tool call, respond with:
+
+`Product audit {{milestoneId}} complete — <verdict>.`
--- a/src/resources/extensions/sf/tools/product-audit-tool.ts
+++ b/src/resources/extensions/sf/tools/product-audit-tool.ts
@ -0,0 +1,311 @@
+// SF — Product Completeness Audit tool
+//
+// Slim implementation of the milestone-end product-audit workflow phase.
+// The tool name is `sf_product_audit`. It validates a structured audit
+// payload (verdict + gaps) and writes the result to:
+//   .sf/active/{milestoneId}/PRODUCT-AUDIT.json
+//   .sf/active/{milestoneId}/PRODUCT-AUDIT.md
+//
+// This is a soft gate — it does not hard-block milestone completion.
+// Follow-up slice scheduling for actionable gaps lives outside this tool.
+
+import { join } from "node:path";
+
+import { atomicWriteAsync } from "../atomic-write.js";
+import { isNonEmptyString } from "../validation.js";
+
+export const PRODUCT_GAP_SEVERITIES = [
+	"critical",
+	"high",
+	"medium",
+	"low",
+] as const;
+export const PRODUCT_AUDIT_VERDICTS = [
+	"no-gaps",
+	"gaps-found",
+	"contract-underspecified",
+] as const;
+
+export type ProductGapSeverity = (typeof PRODUCT_GAP_SEVERITIES)[number];
+export type ProductAuditVerdict = (typeof PRODUCT_AUDIT_VERDICTS)[number];
+
+export interface SuggestedProductSlice {
+	title: string;
+	demo: string;
+	risk: string;
+	depends: string[];
+}
+
+export interface ProductAuditGap {
+	capability: string;
+	expectedEvidence: string[];
+	foundEvidence: string[];
+	missingEvidence: string[];
+	severity: ProductGapSeverity;
+	suggestedSlice: SuggestedProductSlice;
+	confidence: number;
+	sourceDocs: string[];
+}
+
+export interface ProductAuditParams {
+	milestoneId: string;
+	verdict: ProductAuditVerdict;
+	summary: string;
+	gaps: ProductAuditGap[];
+}
+
+export interface ProductAuditResult {
+	milestoneId: string;
+	verdict: ProductAuditVerdict;
+	gapCount: number;
+	actionableGapCount: number;
+	jsonPath: string;
+	markdownPath: string;
+}
+
+function validateStringArray(value: unknown, fieldName: string): string[] {
+	if (!Array.isArray(value)) {
+		throw new Error(`${fieldName} must be an array`);
+	}
+	for (let i = 0; i < value.length; i++) {
+		if (typeof value[i] !== "string") {
+			throw new Error(`${fieldName}[${i}] must be a string`);
+		}
+	}
+	return value as string[];
+}
+
+function validateSuggestedSlice(
+	value: unknown,
+	fieldName: string,
+): SuggestedProductSlice {
+	if (!value || typeof value !== "object") {
+		throw new Error(`${fieldName} must be an object`);
+	}
+	const slice = value as Partial<SuggestedProductSlice>;
+	if (!isNonEmptyString(slice.title)) {
+		throw new Error(`${fieldName}.title is required`);
+	}
+	if (!isNonEmptyString(slice.demo)) {
+		throw new Error(`${fieldName}.demo is required`);
+	}
+	if (!isNonEmptyString(slice.risk)) {
+		throw new Error(`${fieldName}.risk is required`);
+	}
+	const depends = validateStringArray(
+		slice.depends ?? [],
+		`${fieldName}.depends`,
+	);
+	return {
+		title: slice.title,
+		demo: slice.demo,
+		risk: slice.risk,
+		depends,
+	};
+}
+
+export function validateProductAuditParams(
+	params: ProductAuditParams,
+): ProductAuditParams {
+	if (!params || typeof params !== "object") {
+		throw new Error("params must be an object");
+	}
+	if (!isNonEmptyString(params.milestoneId)) {
+		throw new Error("milestoneId is required");
+	}
+	if (
+		!PRODUCT_AUDIT_VERDICTS.includes(params.verdict as ProductAuditVerdict)
+	) {
+		throw new Error(
+			`verdict must be one of: ${PRODUCT_AUDIT_VERDICTS.join(", ")}`,
+		);
+	}
+	if (!isNonEmptyString(params.summary)) {
+		throw new Error("summary is required");
+	}
+	if (!Array.isArray(params.gaps)) {
+		throw new Error("gaps must be an array");
+	}
+
+	const gaps: ProductAuditGap[] = [];
+	for (let i = 0; i < params.gaps.length; i++) {
+		const gap = params.gaps[i];
+		if (!gap || typeof gap !== "object") {
+			throw new Error(`gaps[${i}] must be an object`);
+		}
+		if (!isNonEmptyString(gap.capability)) {
+			throw new Error(`gaps[${i}].capability is required`);
+		}
+		const expectedEvidence = validateStringArray(
+			gap.expectedEvidence,
+			`gaps[${i}].expectedEvidence`,
+		);
+		const foundEvidence = validateStringArray(
+			gap.foundEvidence,
+			`gaps[${i}].foundEvidence`,
+		);
+		const missingEvidence = validateStringArray(
+			gap.missingEvidence,
+			`gaps[${i}].missingEvidence`,
+		);
+		if (
+			!PRODUCT_GAP_SEVERITIES.includes(gap.severity as ProductGapSeverity)
+		) {
+			throw new Error(
+				`gaps[${i}].severity must be one of: ${PRODUCT_GAP_SEVERITIES.join(", ")}`,
+			);
+		}
+		const suggestedSlice = validateSuggestedSlice(
+			gap.suggestedSlice,
+			`gaps[${i}].suggestedSlice`,
+		);
+		if (
+			typeof gap.confidence !== "number" ||
+			!Number.isFinite(gap.confidence) ||
+			gap.confidence < 0 ||
+			gap.confidence > 1
+		) {
+			throw new Error(`gaps[${i}].confidence must be a number from 0 to 1`);
+		}
+		const sourceDocs = validateStringArray(
+			gap.sourceDocs,
+			`gaps[${i}].sourceDocs`,
+		);
+		gaps.push({
+			capability: gap.capability,
+			expectedEvidence,
+			foundEvidence,
+			missingEvidence,
+			severity: gap.severity,
+			suggestedSlice,
+			confidence: gap.confidence,
+			sourceDocs,
+		});
+	}
+
+	return {
+		milestoneId: params.milestoneId,
+		verdict: params.verdict,
+		summary: params.summary,
+		gaps,
+	};
+}
+
+function renderEvidenceList(values: string[]): string {
+	return values.length > 0
+		? values.map((value) => `- ${value}`).join("\n")
+		: "- None";
+}
+
+function renderAuditMarkdown(
+	params: ProductAuditParams,
+	actionableGaps: ProductAuditGap[],
+): string {
+	const gapTable =
+		params.gaps.length > 0
+			? params.gaps
+					.map(
+						(gap) =>
+							`| ${gap.capability} | ${gap.severity} | ${gap.confidence.toFixed(2)} | ${gap.sourceDocs.join(", ") || "None"} |`,
+					)
+					.join("\n")
+			: "| None | n/a | n/a | n/a |";
+
+	const gapDetails =
+		params.gaps.length > 0
+			? params.gaps
+					.map((gap, index) =>
+						[
+							`## Gap ${index + 1}: ${gap.capability}`,
+							"",
+							`- Severity: ${gap.severity}`,
+							`- Confidence: ${gap.confidence.toFixed(2)}`,
+							`- Suggested slice: ${gap.suggestedSlice.title}`,
+							`- Source docs: ${gap.sourceDocs.join(", ") || "None"}`,
+							"",
+							"### Expected Evidence",
+							renderEvidenceList(gap.expectedEvidence),
+							"",
+							"### Found Evidence",
+							renderEvidenceList(gap.foundEvidence),
+							"",
+							"### Missing Evidence",
+							renderEvidenceList(gap.missingEvidence),
+						].join("\n"),
+					)
+					.join("\n\n")
+			: "## Gaps\n\nNo material product gaps were found with positive evidence.";
+
+	return [
+		"---",
+		`verdict: ${params.verdict}`,
+		`gap_count: ${params.gaps.length}`,
+		`actionable_gap_count: ${actionableGaps.length}`,
+		`created_at: ${new Date().toISOString()}`,
+		"---",
+		"",
+		`# Product Audit: ${params.milestoneId}`,
+		"",
+		"## Summary",
+		"",
+		params.summary.trim(),
+		"",
+		"## Gap Index",
+		"",
+		"| Capability | Severity | Confidence | Source docs |",
+		"|---|---:|---:|---|",
+		gapTable,
+		"",
+		gapDetails,
+		"",
+	].join("\n");
+}
+
+export async function handleProductAudit(
+	rawParams: ProductAuditParams,
+	basePath: string,
+): Promise<ProductAuditResult | { error: string }> {
+	let params: ProductAuditParams;
+	try {
+		params = validateProductAuditParams(rawParams);
+	} catch (err) {
+		return { error: `validation failed: ${(err as Error).message}` };
+	}
+
+	const actionableGaps = params.gaps.filter(
+		(gap) => gap.severity === "critical" || gap.severity === "high",
+	);
+
+	const auditDir = join(basePath, ".sf", "active", params.milestoneId);
+	const jsonPath = join(auditDir, "PRODUCT-AUDIT.json");
+	const markdownPath = join(auditDir, "PRODUCT-AUDIT.md");
+
+	const jsonPayload = {
+		milestoneId: params.milestoneId,
+		verdict: params.verdict,
+		summary: params.summary,
+		gapCount: params.gaps.length,
+		actionableGapCount: actionableGaps.length,
+		createdAt: new Date().toISOString(),
+		gaps: params.gaps,
+	};
+
+	try {
+		await atomicWriteAsync(jsonPath, `${JSON.stringify(jsonPayload, null, 2)}\n`);
+		await atomicWriteAsync(
+			markdownPath,
+			renderAuditMarkdown(params, actionableGaps),
+		);
+	} catch (err) {
+		return { error: `audit save failed: ${(err as Error).message}` };
+	}
+
+	return {
+		milestoneId: params.milestoneId,
+		verdict: params.verdict,
+		gapCount: params.gaps.length,
+		actionableGapCount: actionableGaps.length,
+		jsonPath,
+		markdownPath,
+	};
+}
--- a/src/resources/extensions/sf/workflow-templates/product-audit.md
+++ b/src/resources/extensions/sf/workflow-templates/product-audit.md
@ -0,0 +1,44 @@
+# Product Audit Workflow
+
+<template_meta>
+name: product-audit
+version: 1
+requires_project: true
+artifact_dir: .sf/active/
+</template_meta>
+
+<purpose>
+End-of-milestone soft gate that compares declared product intent (VISION.md,
+RUNBOOKS.md, PRD-style docs) against actual code/test/deploy/docs evidence.
+Outputs structured gaps with severity (critical/high/medium/low). Does NOT
+hard-block milestone completion — actionable gaps become follow-up slices.
+</purpose>
+
+<phases>
+1. audit — Inspect code/tests/docs, score gaps, call sf_product_audit
+</phases>
+
+<process>
+
+## Phase 1: Audit
+
+**Goal:** Produce a structured PRODUCT-AUDIT artifact for the active milestone.
+
+1. Load the audit prompt at `prompts/product-audit.md`.
+2. Treat repo docs (VISION.md, README.md, docs/RUNBOOKS.md, milestone artifacts)
+   as the product contract. Do not invent expectations.
+3. Inspect the codebase using semantic search (Serena/MCP), AST tools, and `rg`
+   to confirm declared capabilities have concrete evidence (code, tests,
+   deployment artifacts, runbooks).
+4. Score each gap with severity, confidence, and a suggested follow-up slice.
+5. Call `sf_product_audit` exactly once with the structured payload. The tool
+   writes:
+   - `.sf/active/{milestoneId}/PRODUCT-AUDIT.json` (machine-readable)
+   - `.sf/active/{milestoneId}/PRODUCT-AUDIT.md` (human-readable)
+
+**Verdicts:**
+- `no-gaps` — every material capability has positive evidence
+- `gaps-found` — one or more material gaps, captured with suggested slices
+- `contract-underspecified` — product docs are too vague to validate against
+
+</process>
--- a/src/resources/extensions/sf/workflow-templates/registry.json
+++ b/src/resources/extensions/sf/workflow-templates/registry.json
@ -146,6 +146,22 @@
 			"artifact_dir": ".sf/workflows/upgrades/",
 			"estimated_complexity": "medium",
 			"requires_project": false
+		},
+		"product-audit": {
+			"name": "Product Audit",
+			"description": "End-of-milestone soft gate comparing declared product intent against code/test/deploy/docs evidence; emits structured gaps and suggested follow-up slices",
+			"file": "product-audit.md",
+			"phases": ["audit"],
+			"triggers": [
+				"product audit",
+				"completeness audit",
+				"milestone audit",
+				"product readiness",
+				"product gap"
+			],
+			"artifact_dir": ".sf/active/",
+			"estimated_complexity": "low",
+			"requires_project": true
 		}
 	}
 }