feat(deploy): vision-to-production pipeline — deploy/smoke/release/rollback/challenge

- sf-db.js: ensureDeployTables() adds deploy_runs, smoke_results, release_records, rollback_runs (schema v51); migration block follows sleeptime v50 - preferences.js: deploy block merged (target, command, url, auto_release, release_type, publish_channel, adversarial_review) - auto-prompts.js: buildDeployPrompt, buildSmokeProductionPrompt, buildReleasePrompt, buildRollbackPrompt, buildChallengePrompt - auto-dispatch.js: 5 new rules — completing-milestone→challenge, completing-milestone→release, release-done→deploy, deploy-done→smoke-production, smoke-failed→rollback - prompts/: deploy.md, smoke-production.md, release.md, rollback.md, challenge.md - sf-db-migration test: bump expected schema version 49→51 The autonomous loop can now carry a milestone from complete-milestone all the way to a live, smoke-verified, tagged release. Each stage is gated by prefs (auto_release, deploy.target, deploy.url) so projects opt in per stage. Challenge (adversarial review) runs before release when adversarial_review is set. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-05-09 15:25:47 +02:00 · 2026-05-09 15:25:47 +02:00 · 3b249c4144
commit 3b249c4144
parent d09c8282d0
10 changed files with 823 additions and 1 deletions
--- a/src/resources/extensions/sf/auto-dispatch.js
+++ b/src/resources/extensions/sf/auto-dispatch.js
@ -17,6 +17,11 @@ import {
 	buildDiscussMilestonePrompt,
 	buildDiscussProjectPrompt,
 	buildDiscussRequirementsPrompt,
+	buildDeployPrompt,
+	buildSmokeProductionPrompt,
+	buildReleasePrompt,
+	buildRollbackPrompt,
+	buildChallengePrompt,
 	buildExecuteTaskPrompt,
 	buildGateEvaluatePrompt,
 	buildParallelResearchSlicesPrompt,
@ -1699,6 +1704,186 @@ export const DISPATCH_RULES = [
 			};
 		},
 	},
+	{
+		name: "completing-milestone → challenge",
+		match: async ({ state, mid, midTitle, basePath, prefs }) => {
+			if (state.phase !== "completing-milestone") return null;
+			if (!prefs?.deploy?.adversarial_review) return null;
+			// Only trigger if no challenge assessment exists for this milestone yet
+			try {
+				const { getDatabase } = await import("./sf-db.js");
+				const db = getDatabase(basePath);
+				const row = db
+					.prepare(
+						"SELECT id FROM assessments WHERE milestone_id = ? AND artifact_type = 'CHALLENGE' LIMIT 1",
+					)
+					.get(mid);
+				if (row) return null;
+			} catch {
+				return null;
+			}
+			return {
+				action: "dispatch",
+				unitType: "challenge",
+				unitId: `challenge-${mid}`,
+				prompt: await buildChallengePrompt(
+					mid,
+					midTitle,
+					"milestone",
+					prefs?.deploy?.adversarial_mode ?? "red-team",
+					basePath,
+				),
+			};
+		},
+	},
+	{
+		name: "completing-milestone → release",
+		match: async ({ state, mid, midTitle, basePath, prefs }) => {
+			if (state.phase !== "completing-milestone") return null;
+			if (!prefs?.deploy?.auto_release) return null;
+			// Only if no release record exists for this milestone yet
+			let hasRelease = false;
+			try {
+				const { getDatabase } = await import("./sf-db.js");
+				const db = getDatabase(basePath);
+				const row = db
+					.prepare(
+						"SELECT id FROM release_records WHERE milestone_id = ? LIMIT 1",
+					)
+					.get(mid);
+				hasRelease = !!row;
+			} catch {
+				// DB unavailable — skip this rule
+				return null;
+			}
+			if (hasRelease) return null;
+			return {
+				action: "dispatch",
+				unitType: "release",
+				unitId: `release-${mid}`,
+				prompt: await buildReleasePrompt(mid, midTitle, basePath),
+			};
+		},
+	},
+	{
+		name: "release-done → deploy",
+		match: async ({ state, mid, midTitle, basePath, prefs }) => {
+			if (state.phase !== "completing-milestone") return null;
+			if (!prefs?.deploy?.target || !prefs?.deploy?.command) return null;
+			// Only trigger if a release record exists but no deploy run exists
+			let hasRelease = false;
+			let hasDeployRun = false;
+			try {
+				const { getDatabase } = await import("./sf-db.js");
+				const db = getDatabase(basePath);
+				const rr = db
+					.prepare(
+						"SELECT id FROM release_records WHERE milestone_id = ? LIMIT 1",
+					)
+					.get(mid);
+				hasRelease = !!rr;
+				const dr = db
+					.prepare(
+						"SELECT id FROM deploy_runs WHERE milestone_id = ? AND status != 'rolled-back' LIMIT 1",
+					)
+					.get(mid);
+				hasDeployRun = !!dr;
+			} catch {
+				return null;
+			}
+			if (!hasRelease || hasDeployRun) return null;
+			return {
+				action: "dispatch",
+				unitType: "deploy",
+				unitId: `deploy-${mid}`,
+				prompt: await buildDeployPrompt(mid, midTitle, basePath),
+			};
+		},
+	},
+	{
+		name: "deploy-done → smoke-production",
+		match: async ({ state, mid, midTitle, basePath, prefs }) => {
+			if (state.phase !== "completing-milestone") return null;
+			if (!prefs?.deploy?.url) return null;
+			let deployRunId = null;
+			let hasSmokeResult = false;
+			try {
+				const { getDatabase } = await import("./sf-db.js");
+				const db = getDatabase(basePath);
+				const dr = db
+					.prepare(
+						"SELECT id FROM deploy_runs WHERE milestone_id = ? AND status = 'success' ORDER BY created_at DESC LIMIT 1",
+					)
+					.get(mid);
+				if (!dr) return null;
+				deployRunId = dr.id;
+				const sr = db
+					.prepare(
+						"SELECT id FROM smoke_results WHERE deploy_run_id = ? LIMIT 1",
+					)
+					.get(deployRunId);
+				hasSmokeResult = !!sr;
+			} catch {
+				return null;
+			}
+			if (!deployRunId || hasSmokeResult) return null;
+			return {
+				action: "dispatch",
+				unitType: "smoke-production",
+				unitId: `smoke-${mid}`,
+				prompt: await buildSmokeProductionPrompt(
+					mid,
+					midTitle,
+					deployRunId,
+					basePath,
+				),
+			};
+		},
+	},
+	{
+		name: "smoke-failed → rollback",
+		match: async ({ state, mid, midTitle, basePath, prefs }) => {
+			if (state.phase !== "completing-milestone") return null;
+			if (!prefs?.deploy?.target) return null;
+			let deployRunId = null;
+			let failReason = "Smoke check failed";
+			try {
+				const { getDatabase } = await import("./sf-db.js");
+				const db = getDatabase(basePath);
+				const sr = db
+					.prepare(
+						"SELECT sr.deploy_run_id, sr.verdict FROM smoke_results sr " +
+							"WHERE sr.milestone_id = ? AND sr.verdict = 'FAIL' " +
+							"ORDER BY sr.created_at DESC LIMIT 1",
+					)
+					.get(mid);
+				if (!sr) return null;
+				deployRunId = sr.deploy_run_id;
+				// Only trigger if there is no rollback run yet for this deploy
+				const rr = db
+					.prepare(
+						"SELECT id FROM rollback_runs WHERE deploy_run_id = ? LIMIT 1",
+					)
+					.get(deployRunId);
+				if (rr) return null;
+			} catch {
+				return null;
+			}
+			if (!deployRunId) return null;
+			return {
+				action: "dispatch",
+				unitType: "rollback",
+				unitId: `rollback-${mid}`,
+				prompt: await buildRollbackPrompt(
+					mid,
+					midTitle,
+					deployRunId,
+					failReason,
+					basePath,
+				),
+			};
+		},
+	},
 	{
 		name: "complete → stop",
 		match: async ({ state }) => {
--- a/src/resources/extensions/sf/auto-prompts.js
+++ b/src/resources/extensions/sf/auto-prompts.js
@ -3021,3 +3021,233 @@ export async function buildRewriteDocsPrompt(
 		overridesPath: relSfRootFile("OVERRIDES"),
 	});
 }
+
+/**
+ * Build the deploy prompt for a completed milestone.
+ *
+ * Purpose: executes the project's configured deploy command, records the
+ * outcome to deploy_runs in sf.db, and surfaces the deployed URL so the
+ * smoke unit can verify it.
+ *
+ * Consumer: auto-dispatch.js `deploy` unit type, triggered after release-done
+ * when prefs.deploy.target is set.
+ */
+export async function buildDeployPrompt(mid, midTitle, base) {
+	const prefs = loadEffectiveSFPreferences();
+	const deploy = prefs?.preferences?.deploy ?? {};
+	const resolveArtifact = async (key) => {
+		switch (key) {
+			case "project":
+				return await inlineProjectFromDb(base);
+			default:
+				return null;
+		}
+	};
+	const composed = await composeInlinedContext("deploy", resolveArtifact);
+	const inlinedContext = capPreamble(
+		`## Inlined Context (preloaded — do not re-read these files)\n\n${composed}`,
+	);
+	return loadPrompt("deploy", {
+		workingDirectory: base,
+		milestoneId: mid,
+		milestoneTitle: midTitle,
+		deployTarget: deploy.target ?? "custom",
+		deployCommand: deploy.command ?? "echo 'No deploy command configured'",
+		deployedUrl: deploy.url ?? "",
+		inlinedContext,
+		skillActivation: buildSkillActivationBlock({
+			base,
+			milestoneId: mid,
+			extraContext: [inlinedContext],
+			unitType: "deploy",
+		}),
+	});
+}
+
+/**
+ * Build the smoke-production prompt for a deployed milestone.
+ *
+ * Purpose: exercises the live production URL after deploy to verify the
+ * deployment is correct; writes smoke_results to sf.db; blocks on failure
+ * so rollback can run.
+ *
+ * Consumer: auto-dispatch.js `smoke-production` unit type, triggered after
+ * deploy-done when prefs.deploy.url is set.
+ */
+export async function buildSmokeProductionPrompt(
+	mid,
+	midTitle,
+	deployRunId,
+	base,
+) {
+	const prefs = loadEffectiveSFPreferences();
+	const deploy = prefs?.preferences?.deploy ?? {};
+	const resolveArtifact = async (key) => {
+		switch (key) {
+			case "project":
+				return await inlineProjectFromDb(base);
+			default:
+				return null;
+		}
+	};
+	const composed = await composeInlinedContext(
+		"smoke-production",
+		resolveArtifact,
+	);
+	const inlinedContext = capPreamble(
+		`## Inlined Context (preloaded — do not re-read these files)\n\n${composed}`,
+	);
+	return loadPrompt("smoke-production", {
+		workingDirectory: base,
+		milestoneId: mid,
+		milestoneTitle: midTitle,
+		deployRunId,
+		deployedUrl: deploy.url ?? "",
+		versionPath: deploy.version_path ?? "",
+		releaseVersion: "",
+		verdict: "PENDING",
+		inlinedContext,
+		skillActivation: buildSkillActivationBlock({
+			base,
+			milestoneId: mid,
+			extraContext: [inlinedContext],
+			unitType: "smoke-production",
+		}),
+	});
+}
+
+/**
+ * Build the release prompt for a completed, validated milestone.
+ *
+ * Purpose: bumps semver, writes CHANGELOG, commits, tags, and optionally
+ * publishes (npm/docker/github-release); records to release_records in sf.db.
+ *
+ * Consumer: auto-dispatch.js `release` unit type, triggered after
+ * completing-milestone when prefs.deploy.auto_release is truthy.
+ */
+export async function buildReleasePrompt(mid, midTitle, base) {
+	const prefs = loadEffectiveSFPreferences();
+	const deploy = prefs?.preferences?.deploy ?? {};
+	const resolveArtifact = async (key) => {
+		switch (key) {
+			case "project":
+				return await inlineProjectFromDb(base);
+			default:
+				return null;
+		}
+	};
+	const composed = await composeInlinedContext("release", resolveArtifact);
+	const inlinedContext = capPreamble(
+		`## Inlined Context (preloaded — do not re-read these files)\n\n${composed}`,
+	);
+	return loadPrompt("release", {
+		workingDirectory: base,
+		milestoneId: mid,
+		milestoneTitle: midTitle,
+		currentVersion: deploy.current_version ?? "0.0.0",
+		releaseType: deploy.release_type ?? "patch",
+		publishChannel: deploy.publish_channel ?? "none",
+		today: new Date().toISOString().slice(0, 10),
+		inlinedContext,
+		skillActivation: buildSkillActivationBlock({
+			base,
+			milestoneId: mid,
+			extraContext: [inlinedContext],
+			unitType: "release",
+		}),
+	});
+}
+
+/**
+ * Build the rollback prompt for a failed smoke-production check.
+ *
+ * Purpose: reverts the failed deployment to the last known-good state;
+ * records to rollback_runs and updates the deploy_runs status in sf.db.
+ *
+ * Consumer: auto-dispatch.js `rollback` unit type, triggered after
+ * smoke-failed when prefs.deploy.target is set.
+ */
+export async function buildRollbackPrompt(
+	mid,
+	midTitle,
+	deployRunId,
+	rollbackReason,
+	base,
+) {
+	const prefs = loadEffectiveSFPreferences();
+	const deploy = prefs?.preferences?.deploy ?? {};
+	const resolveArtifact = async (key) => {
+		switch (key) {
+			case "project":
+				return await inlineProjectFromDb(base);
+			default:
+				return null;
+		}
+	};
+	const composed = await composeInlinedContext("rollback", resolveArtifact);
+	const inlinedContext = capPreamble(
+		`## Inlined Context (preloaded — do not re-read these files)\n\n${composed}`,
+	);
+	return loadPrompt("rollback", {
+		workingDirectory: base,
+		milestoneId: mid,
+		milestoneTitle: midTitle,
+		deployRunId,
+		deployTarget: deploy.target ?? "custom",
+		deployedUrl: deploy.url ?? "",
+		appName: deploy.app_name ?? mid,
+		rollbackReason,
+		inlinedContext,
+		skillActivation: buildSkillActivationBlock({
+			base,
+			milestoneId: mid,
+			extraContext: [inlinedContext],
+			unitType: "rollback",
+		}),
+	});
+}
+
+/**
+ * Build the challenge (adversarial review) prompt for a milestone or slice.
+ *
+ * Purpose: activates the adversary agent to red-team, assumption-audit, or
+ * failure-inject against the named target; blocks the pipeline if critical
+ * findings require remediation.
+ *
+ * Consumer: auto-dispatch.js `challenge` unit type, dispatched by the
+ * adversary role in the swarm after validation passes.
+ */
+export async function buildChallengePrompt(
+	mid,
+	midTitle,
+	challengeTarget,
+	challengeMode,
+	base,
+) {
+	const resolveArtifact = async (key) => {
+		switch (key) {
+			case "project":
+				return await inlineProjectFromDb(base);
+			default:
+				return null;
+		}
+	};
+	const composed = await composeInlinedContext("challenge", resolveArtifact);
+	const inlinedContext = capPreamble(
+		`## Inlined Context (preloaded — do not re-read these files)\n\n${composed}`,
+	);
+	return loadPrompt("challenge", {
+		workingDirectory: base,
+		milestoneId: mid,
+		milestoneTitle: midTitle,
+		challengeTarget: challengeTarget ?? "milestone",
+		challengeMode: challengeMode ?? "red-team",
+		inlinedContext,
+		skillActivation: buildSkillActivationBlock({
+			base,
+			milestoneId: mid,
+			extraContext: [inlinedContext],
+			unitType: "challenge",
+		}),
+	});
+}
--- a/src/resources/extensions/sf/preferences.js
+++ b/src/resources/extensions/sf/preferences.js
@ -602,6 +602,11 @@ function mergePreferences(base, override) {
 		subscription: override.subscription ?? base.subscription,
 		allow_flat_rate_providers:
 			override.allow_flat_rate_providers ?? base.allow_flat_rate_providers,
+		// ── Production delivery ──
+		deploy:
+			base.deploy || override.deploy
+				? { ...(base.deploy ?? {}), ...(override.deploy ?? {}) }
+				: undefined,
 	};
 }
 function mergeStringLists(base, override) {
--- a/src/resources/extensions/sf/prompts/challenge.md
+++ b/src/resources/extensions/sf/prompts/challenge.md
@ -0,0 +1,73 @@
+You are executing SF autonomous mode.
+
+## UNIT: Challenge (Adversarial Review) — {{milestoneId}}
+
+## Working Directory
+
+Your working directory is `{{workingDirectory}}`. All file reads, writes, and shell commands MUST operate relative to this directory.
+
+{{inlinedContext}}
+
+{{skillActivation}}
+
+---
+
+## Challenge Instructions
+
+**Milestone:** `{{milestoneId}}` — {{milestoneTitle}}
+**Target:** `{{challengeTarget}}` (slice / component / architecture / security / assumptions)
+**Challenge mode:** `{{challengeMode}}` (red-team / assumption-audit / failure-injection / adversarial-spec)
+
+You are the adversary agent. Your job is NOT to complete work — it is to rigorously attack the correctness, safety, and assumptions of what has been built or planned. You represent the failure modes the product team has not considered.
+
+### Challenge rules
+
+1. **Be ruthless but specific.** Every finding must include: what breaks, under what condition, and what the impact is.
+2. **No false positives for diplomacy.** If something is fine, say so. Do not manufacture findings to seem thorough.
+3. **Evidence-first.** For each finding, provide a concrete reproduction path: a command, a code path, an input, or a condition.
+4. **Prioritise by blast radius.** Data loss, security, and correctness > performance > UX > style.
+
+### What to challenge
+
+Based on `{{challengeMode}}`:
+
+- **red-team** — attempt to break the system: inject invalid inputs, exceed rate limits, trigger edge cases, exploit assumptions in the implementation. Try to cause data loss, auth bypass, or incorrect output.
+- **assumption-audit** — enumerate every assumption in the design docs, milestone spec, and slice plans. For each, state: what happens if the assumption is wrong? What is the evidence it is valid?
+- **failure-injection** — simulate dependency failures: DB unavailable, API timeout, disk full, OOM, concurrent writes. Does the system degrade gracefully or crash?
+- **adversarial-spec** — re-read the spec as an adversarial user. Find underspecified behaviour, ambiguous edge cases, missing error states, and spec contradictions.
+
+### Output format
+
+For each finding:
+
+```
+## Finding [N]: <title>
+
+**Severity:** critical / high / medium / low
+**Mode:** red-team / assumption / failure / spec
+**Condition:** <what must be true for this to trigger>
+**Impact:** <what breaks and how badly>
+**Evidence:** <command, code path, or reproduction steps>
+**Recommendation:** <minimal fix or acknowledgement that the risk is accepted>
+```
+
+After all findings, provide an **Overall Verdict**:
+- `PASS` — no significant findings; the system is robust enough to proceed.
+- `NEEDS-REMEDIATION` — one or more critical/high findings must be addressed before deploy.
+- `ADVISORY` — findings are low severity; proceed with awareness.
+
+Call `sf_summary_save` with `milestone_id: {{milestoneId}}`, `artifact_type: "CHALLENGE"`, and the full challenge report as content.
+
+### Report sf-internal observations
+
+If you observe sf-the-tool friction during this unit, file via `sf_self_report`.
+
+When done, say: "Challenge {{milestoneId}} complete — verdict: <verdict>."
+
+---
+
+**After completing this step, output exactly one of these markers:**
+
+- `<turn_status>complete</turn_status>` if verdict is PASS or ADVISORY
+- `<turn_status>blocked</turn_status>` if verdict is NEEDS-REMEDIATION
+- `<turn_status>giving_up</turn_status>` if the target is too underspecified to challenge meaningfully
--- a/src/resources/extensions/sf/prompts/deploy.md
+++ b/src/resources/extensions/sf/prompts/deploy.md
@ -0,0 +1,63 @@
+You are executing SF autonomous mode.
+
+## UNIT: Deploy — {{milestoneId}}
+
+## Working Directory
+
+Your working directory is `{{workingDirectory}}`. All file reads, writes, and shell commands MUST operate relative to this directory.
+
+{{inlinedContext}}
+
+{{skillActivation}}
+
+---
+
+## Deploy Instructions
+
+**Milestone:** `{{milestoneId}}` — {{milestoneTitle}}
+**Deploy target:** `{{deployTarget}}`
+**Deploy command:** `{{deployCommand}}`
+**Expected live URL:** `{{deployedUrl}}`
+
+You are the deployment agent. Your job is to ship the completed, validated milestone to production.
+
+### Pre-flight checks (run before deploying)
+
+1. Verify the release record exists for this milestone (`release_records` in sf.db or `{{releaseVersion}}` tag in git).
+2. Verify no un-merged worktree conflicts (`git status` clean).
+3. Verify the deploy command exists and is executable.
+4. If a `.env.deploy` or environment variable list is specified in `preferences.yaml` under `deploy.env_check`, verify those vars are set.
+
+### Deploy execution
+
+Run `{{deployCommand}}` and capture all stdout/stderr.
+
+Record the deploy run to the database:
+- `INSERT INTO deploy_runs (id, milestone_id, target, command, status, exit_code, output, deployed_url, created_at, finished_at)`
+- Use a UUID for `id`.
+- Set `status = 'success'` if exit code is 0, `status = 'failed'` otherwise.
+
+### Post-deploy
+
+If the deploy succeeded:
+- Write `deployed_url` to the deploy_runs row.
+- Call `sf_summary_save` with `milestone_id: {{milestoneId}}`, `artifact_type: "DEPLOY"`, and a brief deploy report as content.
+
+If the deploy failed:
+- Set `status = 'failed'` in deploy_runs.
+- Call `sf_summary_save` with the failure output as content.
+- Output `<turn_status>blocked</turn_status>` — do NOT attempt to fix the deploy failure inline; it requires a repair unit.
+
+### Report sf-internal observations
+
+If you observe sf-the-tool friction during this unit, file via `sf_self_report`.
+
+When done, say: "Deploy {{milestoneId}} complete."
+
+---
+
+**After completing this step, output exactly one of these markers:**
+
+- `<turn_status>complete</turn_status>` if deploy succeeded
+- `<turn_status>blocked</turn_status>` if deploy failed or pre-flight failed
+- `<turn_status>giving_up</turn_status>` if the deploy environment is fundamentally broken
--- a/src/resources/extensions/sf/prompts/release.md
+++ b/src/resources/extensions/sf/prompts/release.md
@ -0,0 +1,61 @@
+You are executing SF autonomous mode.
+
+## UNIT: Release — {{milestoneId}}
+
+## Working Directory
+
+Your working directory is `{{workingDirectory}}`. All file reads, writes, and shell commands MUST operate relative to this directory.
+
+{{inlinedContext}}
+
+{{skillActivation}}
+
+---
+
+## Release Instructions
+
+**Milestone:** `{{milestoneId}}` — {{milestoneTitle}}
+**Current version:** `{{currentVersion}}`
+**Release type:** `{{releaseType}}` (major / minor / patch)
+**Publish channel:** `{{publishChannel}}` (npm / docker / github-release / none)
+
+You are the release agent. Your job is to version, tag, changelog, and optionally publish the completed milestone.
+
+### Steps
+
+1. **Determine new version** — apply `{{releaseType}}` bump to `{{currentVersion}}`. Use semver. If `package.json` exists, read the current version from it.
+
+2. **Update version files** — update `package.json` (and any `packages/*/package.json` if this is a monorepo) with the new version. Update `version.txt` or `VERSION` if present.
+
+3. **Generate changelog entry** — summarise the milestone's completed slices into a CHANGELOG.md entry under `## [{{newVersion}}] - {{today}}`. Use the slice SUMMARY files as source material. Be concise: one bullet per slice.
+
+4. **Commit the release** — `git add -A && git commit -m "chore(release): {{newVersion}}"`.
+
+5. **Tag** — `git tag -a v{{newVersion}} -m "Release {{newVersion}} — {{milestoneTitle}}"`.
+
+6. **Publish** (if `{{publishChannel}}` is not `none`):
+   - `npm`: run `npm publish --access public` (or `npm publish` for private).
+   - `docker`: run the `deploy.publish_command` from preferences.
+   - `github-release`: create a GitHub release via `gh release create v{{newVersion}} --title "{{newVersion}}" --notes "$(cat CHANGELOG.md | head -50)"`.
+
+7. **Record to DB** — INSERT into `release_records (id, milestone_id, version, prev_version, changelog_entry, git_tag, published, created_at)`.
+
+8. **Call `sf_summary_save`** with `milestone_id: {{milestoneId}}`, `artifact_type: "RELEASE"`, and the changelog entry as content.
+
+### On failure
+
+If publish fails (network error, auth error), set `published = 0` in release_records, commit and tag are still valid. Output `<turn_status>blocked</turn_status>` with a clear reason.
+
+### Report sf-internal observations
+
+If you observe sf-the-tool friction during this unit, file via `sf_self_report`.
+
+When done, say: "Release {{newVersion}} complete."
+
+---
+
+**After completing this step, output exactly one of these markers:**
+
+- `<turn_status>complete</turn_status>` if release succeeded (publish optional)
+- `<turn_status>blocked</turn_status>` if a required step failed
+- `<turn_status>giving_up</turn_status>` if version state is corrupted and cannot be resolved
--- a/src/resources/extensions/sf/prompts/rollback.md
+++ b/src/resources/extensions/sf/prompts/rollback.md
@ -0,0 +1,61 @@
+You are executing SF autonomous mode.
+
+## UNIT: Rollback — {{milestoneId}}
+
+## Working Directory
+
+Your working directory is `{{workingDirectory}}`. All file reads, writes, and shell commands MUST operate relative to this directory.
+
+{{inlinedContext}}
+
+{{skillActivation}}
+
+---
+
+## Rollback Instructions
+
+**Milestone:** `{{milestoneId}}` — {{milestoneTitle}}
+**Failed deploy run:** `{{deployRunId}}`
+**Deploy target:** `{{deployTarget}}`
+**Rollback reason:** `{{rollbackReason}}`
+
+You are the rollback agent. A smoke test failed after deployment. Your job is to revert the production environment to the last known-good state.
+
+### Rollback steps
+
+1. **Identify the rollback command** from `preferences.yaml` under `deploy.rollback_command`. If not set, derive it:
+   - Fly.io: `fly releases rollback --app {{appName}}`
+   - Docker/Kubernetes: re-deploy the previous image tag
+   - npm: no rollback (record only)
+   - Vercel: `vercel rollback`
+   - Custom: run `deploy.rollback_command`
+
+2. **Execute the rollback** — run the command and capture output.
+
+3. **Verify rollback** — re-run the health check against `{{deployedUrl}}`. Confirm the previous version is live.
+
+4. **Record to DB** — INSERT into `rollback_runs (id, deploy_run_id, milestone_id, reason, status, output, created_at, finished_at)`.
+   - Set `status = 'success'` if the health check passes post-rollback.
+   - Set `status = 'failed'` if the environment is still broken.
+
+5. **Update the deploy run** — UPDATE `deploy_runs SET status = 'rolled-back' WHERE id = '{{deployRunId}}'`.
+
+6. **Call `sf_summary_save`** with `milestone_id: {{milestoneId}}`, `artifact_type: "ROLLBACK"`, and the rollback report as content.
+
+### After rollback
+
+Output `<turn_status>blocked</turn_status>` — the milestone requires a repair unit to address the smoke failure before re-attempting deploy. Do NOT attempt to fix the underlying bug inline.
+
+### Report sf-internal observations
+
+If you observe sf-the-tool friction during this unit, file via `sf_self_report`.
+
+When done, say: "Rollback {{milestoneId}} complete."
+
+---
+
+**After completing this step, output exactly one of these markers:**
+
+- `<turn_status>complete</turn_status>` if rollback succeeded and environment is stable
+- `<turn_status>blocked</turn_status>` if rollback failed or environment is still broken
+- `<turn_status>giving_up</turn_status>` if the production environment is unrecoverable without human intervention
--- a/src/resources/extensions/sf/prompts/smoke-production.md
+++ b/src/resources/extensions/sf/prompts/smoke-production.md
@ -0,0 +1,67 @@
+You are executing SF autonomous mode.
+
+## UNIT: Smoke Test Production — {{milestoneId}}
+
+## Working Directory
+
+Your working directory is `{{workingDirectory}}`. All file reads, writes, and shell commands MUST operate relative to this directory.
+
+{{inlinedContext}}
+
+{{skillActivation}}
+
+---
+
+## Smoke Test Instructions
+
+**Milestone:** `{{milestoneId}}` — {{milestoneTitle}}
+**Live URL:** `{{deployedUrl}}`
+**Deploy run ID:** `{{deployRunId}}`
+
+You are the production smoke tester. Your job is to verify the live deployment is healthy and correct — not to run unit tests, but to exercise the real running service.
+
+### Smoke checks to run
+
+For each check below, record: the check description, the tool/command used, the actual response observed, and PASS / FAIL.
+
+1. **Health endpoint** — `GET {{deployedUrl}}/health` (or `/`, or the configured `deploy.health_path`). Expect HTTP 200.
+2. **Version check** — if `deploy.version_path` is set, `GET {{deployedUrl}}/{{versionPath}}` and verify the version matches `{{releaseVersion}}`.
+3. **Critical path** — exercise the minimum viable user flow defined in `deploy.smoke_checks` from preferences, or the UAT ASSESSMENT for this milestone's first slice.
+4. **Error rate** — if observability is configured (`deploy.metrics_url`), check error rate is < 1%.
+
+Use `curl`, browser tools, or the `bash` tool to execute checks. Capture raw HTTP responses as evidence.
+
+### Record results
+
+INSERT a row into `smoke_results`:
+- `id`: UUID
+- `deploy_run_id`: `{{deployRunId}}`
+- `milestone_id`: `{{milestoneId}}`
+- `url`: `{{deployedUrl}}`
+- `status`: `'pass'` or `'fail'`
+- `verdict`: `'PASS'` or `'FAIL'`
+- `checks_json`: JSON array of `{ check, result, evidence }` objects
+- `created_at` / `finished_at`
+
+Call `sf_summary_save` with `milestone_id: {{milestoneId}}`, `artifact_type: "SMOKE"`, and the full smoke result markdown.
+
+### On failure
+
+If any critical check fails:
+- Set `verdict = 'FAIL'` in smoke_results.
+- Output `<turn_status>blocked</turn_status>`.
+- The autonomous loop will trigger a rollback unit next.
+
+### Report sf-internal observations
+
+If you observe sf-the-tool friction during this unit, file via `sf_self_report`.
+
+When done, say: "Smoke {{milestoneId}} complete — verdict: {{verdict}}."
+
+---
+
+**After completing this step, output exactly one of these markers:**
+
+- `<turn_status>complete</turn_status>` if all smoke checks passed
+- `<turn_status>blocked</turn_status>` if any critical check failed
+- `<turn_status>giving_up</turn_status>` if the production environment is unreachable
--- a/src/resources/extensions/sf/sf-db.js
+++ b/src/resources/extensions/sf/sf-db.js
@ -577,6 +577,70 @@ function ensureUokMessageTables(db) {
 		"CREATE INDEX IF NOT EXISTS idx_uok_messages_sent ON uok_messages(sent_at DESC)",
 	);
 }
+function ensureDeployTables(db) {
+	db.exec(`
+      CREATE TABLE IF NOT EXISTS deploy_runs (
+        id TEXT PRIMARY KEY,
+        milestone_id TEXT NOT NULL,
+        target TEXT NOT NULL,
+        command TEXT NOT NULL,
+        status TEXT NOT NULL DEFAULT 'pending',
+        exit_code INTEGER DEFAULT NULL,
+        output TEXT DEFAULT NULL,
+        deployed_url TEXT DEFAULT NULL,
+        created_at TEXT NOT NULL,
+        finished_at TEXT DEFAULT NULL
+      )
+    `);
+	db.exec(`
+      CREATE TABLE IF NOT EXISTS smoke_results (
+        id TEXT PRIMARY KEY,
+        deploy_run_id TEXT NOT NULL,
+        milestone_id TEXT NOT NULL,
+        url TEXT NOT NULL,
+        status TEXT NOT NULL DEFAULT 'pending',
+        verdict TEXT DEFAULT NULL,
+        checks_json TEXT NOT NULL DEFAULT '[]',
+        created_at TEXT NOT NULL,
+        finished_at TEXT DEFAULT NULL,
+        FOREIGN KEY (deploy_run_id) REFERENCES deploy_runs(id) ON DELETE CASCADE
+      )
+    `);
+	db.exec(`
+      CREATE TABLE IF NOT EXISTS release_records (
+        id TEXT PRIMARY KEY,
+        milestone_id TEXT NOT NULL,
+        version TEXT NOT NULL,
+        prev_version TEXT DEFAULT NULL,
+        changelog_entry TEXT DEFAULT NULL,
+        git_tag TEXT DEFAULT NULL,
+        published INTEGER NOT NULL DEFAULT 0,
+        created_at TEXT NOT NULL
+      )
+    `);
+	db.exec(`
+      CREATE TABLE IF NOT EXISTS rollback_runs (
+        id TEXT PRIMARY KEY,
+        deploy_run_id TEXT NOT NULL,
+        milestone_id TEXT NOT NULL,
+        reason TEXT NOT NULL,
+        status TEXT NOT NULL DEFAULT 'pending',
+        output TEXT DEFAULT NULL,
+        created_at TEXT NOT NULL,
+        finished_at TEXT DEFAULT NULL,
+        FOREIGN KEY (deploy_run_id) REFERENCES deploy_runs(id) ON DELETE CASCADE
+      )
+    `);
+	db.exec(
+		"CREATE INDEX IF NOT EXISTS idx_deploy_runs_milestone ON deploy_runs(milestone_id, created_at DESC)",
+	);
+	db.exec(
+		"CREATE INDEX IF NOT EXISTS idx_smoke_results_deploy ON smoke_results(deploy_run_id)",
+	);
+	db.exec(
+		"CREATE INDEX IF NOT EXISTS idx_release_records_milestone ON release_records(milestone_id, created_at DESC)",
+	);
+}
 function ensureSleeptimeQueueTable(db) {
 	db.exec(`
      CREATE TABLE IF NOT EXISTS sleeptime_consolidation_queue (
@ -1307,6 +1371,7 @@ function initSchema(db, fileBacked) {
 		ensureSessionTables(db);
 		ensureSessionSnapshotTable(db);
 		ensureUokMessageTables(db);
+		ensureDeployTables(db);
 		ensureSleeptimeQueueTable(db);
 		ensureSpecSchemaTables(db);
 		ensureTaskFrontmatterColumns(db);
@ -2933,6 +2998,18 @@ function migrateSchema(db) {
 				":applied_at": new Date().toISOString(),
 			});
 		}
+		if (currentVersion < 51) {
+			// Add deploy/smoke/release/rollback tables — closes the vision→production loop.
+			// deploy_runs tracks each deployment attempt; smoke_results tracks live verification;
+			// release_records tracks version bumps and publishes; rollback_runs tracks reversions.
+			ensureDeployTables(db);
+			db.prepare(
+				"INSERT INTO schema_version (version, applied_at) VALUES (:version, :applied_at)",
+			).run({
+				":version": 51,
+				":applied_at": new Date().toISOString(),
+			});
+		}
 		db.exec("COMMIT");
 	} catch (err) {
 		db.exec("ROLLBACK");
--- a/src/resources/extensions/sf/tests/sf-db-migration.test.mjs
+++ b/src/resources/extensions/sf/tests/sf-db-migration.test.mjs
@ -223,7 +223,7 @@ test("openDatabase_migrates_v27_tasks_without_created_at_through_spec_backfill",
 	const version = db
 		.prepare("SELECT MAX(version) AS version FROM schema_version")
 		.get();
-	assert.equal(version.version, 49);
+	assert.equal(version.version, 51);
 	const taskSpec = db
 		.prepare(
 			"SELECT milestone_id, slice_id, task_id, verify FROM task_specs WHERE task_id = 'T01'",