diff --git a/src/resources/extensions/sf/auto-dispatch.js b/src/resources/extensions/sf/auto-dispatch.js
index 1de723054..e517a7a1b 100644
--- a/src/resources/extensions/sf/auto-dispatch.js
+++ b/src/resources/extensions/sf/auto-dispatch.js
@@ -17,6 +17,11 @@ import {
buildDiscussMilestonePrompt,
buildDiscussProjectPrompt,
buildDiscussRequirementsPrompt,
+ buildDeployPrompt,
+ buildSmokeProductionPrompt,
+ buildReleasePrompt,
+ buildRollbackPrompt,
+ buildChallengePrompt,
buildExecuteTaskPrompt,
buildGateEvaluatePrompt,
buildParallelResearchSlicesPrompt,
@@ -1699,6 +1704,186 @@ export const DISPATCH_RULES = [
};
},
},
+ {
+ name: "completing-milestone → challenge",
+ match: async ({ state, mid, midTitle, basePath, prefs }) => {
+ if (state.phase !== "completing-milestone") return null;
+ if (!prefs?.deploy?.adversarial_review) return null;
+ // Only trigger if no challenge assessment exists for this milestone yet
+ try {
+ const { getDatabase } = await import("./sf-db.js");
+ const db = getDatabase(basePath);
+ const row = db
+ .prepare(
+ "SELECT id FROM assessments WHERE milestone_id = ? AND artifact_type = 'CHALLENGE' LIMIT 1",
+ )
+ .get(mid);
+ if (row) return null;
+ } catch {
+ return null;
+ }
+ return {
+ action: "dispatch",
+ unitType: "challenge",
+ unitId: `challenge-${mid}`,
+ prompt: await buildChallengePrompt(
+ mid,
+ midTitle,
+ "milestone",
+ prefs?.deploy?.adversarial_mode ?? "red-team",
+ basePath,
+ ),
+ };
+ },
+ },
+ {
+ name: "completing-milestone → release",
+ match: async ({ state, mid, midTitle, basePath, prefs }) => {
+ if (state.phase !== "completing-milestone") return null;
+ if (!prefs?.deploy?.auto_release) return null;
+ // Only if no release record exists for this milestone yet
+ let hasRelease = false;
+ try {
+ const { getDatabase } = await import("./sf-db.js");
+ const db = getDatabase(basePath);
+ const row = db
+ .prepare(
+ "SELECT id FROM release_records WHERE milestone_id = ? LIMIT 1",
+ )
+ .get(mid);
+ hasRelease = !!row;
+ } catch {
+ // DB unavailable — skip this rule
+ return null;
+ }
+ if (hasRelease) return null;
+ return {
+ action: "dispatch",
+ unitType: "release",
+ unitId: `release-${mid}`,
+ prompt: await buildReleasePrompt(mid, midTitle, basePath),
+ };
+ },
+ },
+ {
+ name: "release-done → deploy",
+ match: async ({ state, mid, midTitle, basePath, prefs }) => {
+ if (state.phase !== "completing-milestone") return null;
+ if (!prefs?.deploy?.target || !prefs?.deploy?.command) return null;
+ // Only trigger if a release record exists but no deploy run exists
+ let hasRelease = false;
+ let hasDeployRun = false;
+ try {
+ const { getDatabase } = await import("./sf-db.js");
+ const db = getDatabase(basePath);
+ const rr = db
+ .prepare(
+ "SELECT id FROM release_records WHERE milestone_id = ? LIMIT 1",
+ )
+ .get(mid);
+ hasRelease = !!rr;
+ const dr = db
+ .prepare(
+ "SELECT id FROM deploy_runs WHERE milestone_id = ? AND status != 'rolled-back' LIMIT 1",
+ )
+ .get(mid);
+ hasDeployRun = !!dr;
+ } catch {
+ return null;
+ }
+ if (!hasRelease || hasDeployRun) return null;
+ return {
+ action: "dispatch",
+ unitType: "deploy",
+ unitId: `deploy-${mid}`,
+ prompt: await buildDeployPrompt(mid, midTitle, basePath),
+ };
+ },
+ },
+ {
+ name: "deploy-done → smoke-production",
+ match: async ({ state, mid, midTitle, basePath, prefs }) => {
+ if (state.phase !== "completing-milestone") return null;
+ if (!prefs?.deploy?.url) return null;
+ let deployRunId = null;
+ let hasSmokeResult = false;
+ try {
+ const { getDatabase } = await import("./sf-db.js");
+ const db = getDatabase(basePath);
+ const dr = db
+ .prepare(
+ "SELECT id FROM deploy_runs WHERE milestone_id = ? AND status = 'success' ORDER BY created_at DESC LIMIT 1",
+ )
+ .get(mid);
+ if (!dr) return null;
+ deployRunId = dr.id;
+ const sr = db
+ .prepare(
+ "SELECT id FROM smoke_results WHERE deploy_run_id = ? LIMIT 1",
+ )
+ .get(deployRunId);
+ hasSmokeResult = !!sr;
+ } catch {
+ return null;
+ }
+ if (!deployRunId || hasSmokeResult) return null;
+ return {
+ action: "dispatch",
+ unitType: "smoke-production",
+ unitId: `smoke-${mid}`,
+ prompt: await buildSmokeProductionPrompt(
+ mid,
+ midTitle,
+ deployRunId,
+ basePath,
+ ),
+ };
+ },
+ },
+ {
+ name: "smoke-failed → rollback",
+ match: async ({ state, mid, midTitle, basePath, prefs }) => {
+ if (state.phase !== "completing-milestone") return null;
+ if (!prefs?.deploy?.target) return null;
+ let deployRunId = null;
+ let failReason = "Smoke check failed";
+ try {
+ const { getDatabase } = await import("./sf-db.js");
+ const db = getDatabase(basePath);
+ const sr = db
+ .prepare(
+ "SELECT sr.deploy_run_id, sr.verdict FROM smoke_results sr " +
+ "WHERE sr.milestone_id = ? AND sr.verdict = 'FAIL' " +
+ "ORDER BY sr.created_at DESC LIMIT 1",
+ )
+ .get(mid);
+ if (!sr) return null;
+ deployRunId = sr.deploy_run_id;
+ // Only trigger if there is no rollback run yet for this deploy
+ const rr = db
+ .prepare(
+ "SELECT id FROM rollback_runs WHERE deploy_run_id = ? LIMIT 1",
+ )
+ .get(deployRunId);
+ if (rr) return null;
+ } catch {
+ return null;
+ }
+ if (!deployRunId) return null;
+ return {
+ action: "dispatch",
+ unitType: "rollback",
+ unitId: `rollback-${mid}`,
+ prompt: await buildRollbackPrompt(
+ mid,
+ midTitle,
+ deployRunId,
+ failReason,
+ basePath,
+ ),
+ };
+ },
+ },
{
name: "complete → stop",
match: async ({ state }) => {
diff --git a/src/resources/extensions/sf/auto-prompts.js b/src/resources/extensions/sf/auto-prompts.js
index 42ce6963b..664818b29 100644
--- a/src/resources/extensions/sf/auto-prompts.js
+++ b/src/resources/extensions/sf/auto-prompts.js
@@ -3021,3 +3021,233 @@ export async function buildRewriteDocsPrompt(
overridesPath: relSfRootFile("OVERRIDES"),
});
}
+
+/**
+ * Build the deploy prompt for a completed milestone.
+ *
+ * Purpose: executes the project's configured deploy command, records the
+ * outcome to deploy_runs in sf.db, and surfaces the deployed URL so the
+ * smoke unit can verify it.
+ *
+ * Consumer: auto-dispatch.js `deploy` unit type, triggered after release-done
+ * when prefs.deploy.target is set.
+ */
+export async function buildDeployPrompt(mid, midTitle, base) {
+ const prefs = loadEffectiveSFPreferences();
+ const deploy = prefs?.preferences?.deploy ?? {};
+ const resolveArtifact = async (key) => {
+ switch (key) {
+ case "project":
+ return await inlineProjectFromDb(base);
+ default:
+ return null;
+ }
+ };
+ const composed = await composeInlinedContext("deploy", resolveArtifact);
+ const inlinedContext = capPreamble(
+ `## Inlined Context (preloaded — do not re-read these files)\n\n${composed}`,
+ );
+ return loadPrompt("deploy", {
+ workingDirectory: base,
+ milestoneId: mid,
+ milestoneTitle: midTitle,
+ deployTarget: deploy.target ?? "custom",
+ deployCommand: deploy.command ?? "echo 'No deploy command configured'",
+ deployedUrl: deploy.url ?? "",
+ inlinedContext,
+ skillActivation: buildSkillActivationBlock({
+ base,
+ milestoneId: mid,
+ extraContext: [inlinedContext],
+ unitType: "deploy",
+ }),
+ });
+}
+
+/**
+ * Build the smoke-production prompt for a deployed milestone.
+ *
+ * Purpose: exercises the live production URL after deploy to verify the
+ * deployment is correct; writes smoke_results to sf.db; blocks on failure
+ * so rollback can run.
+ *
+ * Consumer: auto-dispatch.js `smoke-production` unit type, triggered after
+ * deploy-done when prefs.deploy.url is set.
+ */
+export async function buildSmokeProductionPrompt(
+ mid,
+ midTitle,
+ deployRunId,
+ base,
+) {
+ const prefs = loadEffectiveSFPreferences();
+ const deploy = prefs?.preferences?.deploy ?? {};
+ const resolveArtifact = async (key) => {
+ switch (key) {
+ case "project":
+ return await inlineProjectFromDb(base);
+ default:
+ return null;
+ }
+ };
+ const composed = await composeInlinedContext(
+ "smoke-production",
+ resolveArtifact,
+ );
+ const inlinedContext = capPreamble(
+ `## Inlined Context (preloaded — do not re-read these files)\n\n${composed}`,
+ );
+ return loadPrompt("smoke-production", {
+ workingDirectory: base,
+ milestoneId: mid,
+ milestoneTitle: midTitle,
+ deployRunId,
+ deployedUrl: deploy.url ?? "",
+ versionPath: deploy.version_path ?? "",
+ releaseVersion: "",
+ verdict: "PENDING",
+ inlinedContext,
+ skillActivation: buildSkillActivationBlock({
+ base,
+ milestoneId: mid,
+ extraContext: [inlinedContext],
+ unitType: "smoke-production",
+ }),
+ });
+}
+
+/**
+ * Build the release prompt for a completed, validated milestone.
+ *
+ * Purpose: bumps semver, writes CHANGELOG, commits, tags, and optionally
+ * publishes (npm/docker/github-release); records to release_records in sf.db.
+ *
+ * Consumer: auto-dispatch.js `release` unit type, triggered after
+ * completing-milestone when prefs.deploy.auto_release is truthy.
+ */
+export async function buildReleasePrompt(mid, midTitle, base) {
+ const prefs = loadEffectiveSFPreferences();
+ const deploy = prefs?.preferences?.deploy ?? {};
+ const resolveArtifact = async (key) => {
+ switch (key) {
+ case "project":
+ return await inlineProjectFromDb(base);
+ default:
+ return null;
+ }
+ };
+ const composed = await composeInlinedContext("release", resolveArtifact);
+ const inlinedContext = capPreamble(
+ `## Inlined Context (preloaded — do not re-read these files)\n\n${composed}`,
+ );
+ return loadPrompt("release", {
+ workingDirectory: base,
+ milestoneId: mid,
+ milestoneTitle: midTitle,
+ currentVersion: deploy.current_version ?? "0.0.0",
+ releaseType: deploy.release_type ?? "patch",
+ publishChannel: deploy.publish_channel ?? "none",
+ today: new Date().toISOString().slice(0, 10),
+ inlinedContext,
+ skillActivation: buildSkillActivationBlock({
+ base,
+ milestoneId: mid,
+ extraContext: [inlinedContext],
+ unitType: "release",
+ }),
+ });
+}
+
+/**
+ * Build the rollback prompt for a failed smoke-production check.
+ *
+ * Purpose: reverts the failed deployment to the last known-good state;
+ * records to rollback_runs and updates the deploy_runs status in sf.db.
+ *
+ * Consumer: auto-dispatch.js `rollback` unit type, triggered after
+ * smoke-failed when prefs.deploy.target is set.
+ */
+export async function buildRollbackPrompt(
+ mid,
+ midTitle,
+ deployRunId,
+ rollbackReason,
+ base,
+) {
+ const prefs = loadEffectiveSFPreferences();
+ const deploy = prefs?.preferences?.deploy ?? {};
+ const resolveArtifact = async (key) => {
+ switch (key) {
+ case "project":
+ return await inlineProjectFromDb(base);
+ default:
+ return null;
+ }
+ };
+ const composed = await composeInlinedContext("rollback", resolveArtifact);
+ const inlinedContext = capPreamble(
+ `## Inlined Context (preloaded — do not re-read these files)\n\n${composed}`,
+ );
+ return loadPrompt("rollback", {
+ workingDirectory: base,
+ milestoneId: mid,
+ milestoneTitle: midTitle,
+ deployRunId,
+ deployTarget: deploy.target ?? "custom",
+ deployedUrl: deploy.url ?? "",
+ appName: deploy.app_name ?? mid,
+ rollbackReason,
+ inlinedContext,
+ skillActivation: buildSkillActivationBlock({
+ base,
+ milestoneId: mid,
+ extraContext: [inlinedContext],
+ unitType: "rollback",
+ }),
+ });
+}
+
+/**
+ * Build the challenge (adversarial review) prompt for a milestone or slice.
+ *
+ * Purpose: activates the adversary agent to red-team, assumption-audit, or
+ * failure-inject against the named target; blocks the pipeline if critical
+ * findings require remediation.
+ *
+ * Consumer: auto-dispatch.js `challenge` unit type, dispatched by the
+ * adversary role in the swarm after validation passes.
+ */
+export async function buildChallengePrompt(
+ mid,
+ midTitle,
+ challengeTarget,
+ challengeMode,
+ base,
+) {
+ const resolveArtifact = async (key) => {
+ switch (key) {
+ case "project":
+ return await inlineProjectFromDb(base);
+ default:
+ return null;
+ }
+ };
+ const composed = await composeInlinedContext("challenge", resolveArtifact);
+ const inlinedContext = capPreamble(
+ `## Inlined Context (preloaded — do not re-read these files)\n\n${composed}`,
+ );
+ return loadPrompt("challenge", {
+ workingDirectory: base,
+ milestoneId: mid,
+ milestoneTitle: midTitle,
+ challengeTarget: challengeTarget ?? "milestone",
+ challengeMode: challengeMode ?? "red-team",
+ inlinedContext,
+ skillActivation: buildSkillActivationBlock({
+ base,
+ milestoneId: mid,
+ extraContext: [inlinedContext],
+ unitType: "challenge",
+ }),
+ });
+}
diff --git a/src/resources/extensions/sf/preferences.js b/src/resources/extensions/sf/preferences.js
index d649360f3..27ae7b5dd 100644
--- a/src/resources/extensions/sf/preferences.js
+++ b/src/resources/extensions/sf/preferences.js
@@ -602,6 +602,11 @@ function mergePreferences(base, override) {
subscription: override.subscription ?? base.subscription,
allow_flat_rate_providers:
override.allow_flat_rate_providers ?? base.allow_flat_rate_providers,
+ // ── Production delivery ──
+ deploy:
+ base.deploy || override.deploy
+ ? { ...(base.deploy ?? {}), ...(override.deploy ?? {}) }
+ : undefined,
};
}
function mergeStringLists(base, override) {
diff --git a/src/resources/extensions/sf/prompts/challenge.md b/src/resources/extensions/sf/prompts/challenge.md
new file mode 100644
index 000000000..7f84d7934
--- /dev/null
+++ b/src/resources/extensions/sf/prompts/challenge.md
@@ -0,0 +1,73 @@
+You are executing SF autonomous mode.
+
+## UNIT: Challenge (Adversarial Review) — {{milestoneId}}
+
+## Working Directory
+
+Your working directory is `{{workingDirectory}}`. All file reads, writes, and shell commands MUST operate relative to this directory.
+
+{{inlinedContext}}
+
+{{skillActivation}}
+
+---
+
+## Challenge Instructions
+
+**Milestone:** `{{milestoneId}}` — {{milestoneTitle}}
+**Target:** `{{challengeTarget}}` (slice / component / architecture / security / assumptions)
+**Challenge mode:** `{{challengeMode}}` (red-team / assumption-audit / failure-injection / adversarial-spec)
+
+You are the adversary agent. Your job is NOT to complete work — it is to rigorously attack the correctness, safety, and assumptions of what has been built or planned. You represent the failure modes the product team has not considered.
+
+### Challenge rules
+
+1. **Be ruthless but specific.** Every finding must include: what breaks, under what condition, and what the impact is.
+2. **No false positives for diplomacy.** If something is fine, say so. Do not manufacture findings to seem thorough.
+3. **Evidence-first.** For each finding, provide a concrete reproduction path: a command, a code path, an input, or a condition.
+4. **Prioritise by blast radius.** Data loss, security, and correctness > performance > UX > style.
+
+### What to challenge
+
+Based on `{{challengeMode}}`:
+
+- **red-team** — attempt to break the system: inject invalid inputs, exceed rate limits, trigger edge cases, exploit assumptions in the implementation. Try to cause data loss, auth bypass, or incorrect output.
+- **assumption-audit** — enumerate every assumption in the design docs, milestone spec, and slice plans. For each, state: what happens if the assumption is wrong? What is the evidence it is valid?
+- **failure-injection** — simulate dependency failures: DB unavailable, API timeout, disk full, OOM, concurrent writes. Does the system degrade gracefully or crash?
+- **adversarial-spec** — re-read the spec as an adversarial user. Find underspecified behaviour, ambiguous edge cases, missing error states, and spec contradictions.
+
+### Output format
+
+For each finding:
+
+```
+## Finding [N]:
+
+**Severity:** critical / high / medium / low
+**Mode:** red-team / assumption / failure / spec
+**Condition:**
+**Impact:**
+**Evidence:**
+**Recommendation:**
+```
+
+After all findings, provide an **Overall Verdict**:
+- `PASS` — no significant findings; the system is robust enough to proceed.
+- `NEEDS-REMEDIATION` — one or more critical/high findings must be addressed before deploy.
+- `ADVISORY` — findings are low severity; proceed with awareness.
+
+Call `sf_summary_save` with `milestone_id: {{milestoneId}}`, `artifact_type: "CHALLENGE"`, and the full challenge report as content.
+
+### Report sf-internal observations
+
+If you observe sf-the-tool friction during this unit, file via `sf_self_report`.
+
+When done, say: "Challenge {{milestoneId}} complete — verdict: ."
+
+---
+
+**After completing this step, output exactly one of these markers:**
+
+- `complete` if verdict is PASS or ADVISORY
+- `blocked` if verdict is NEEDS-REMEDIATION
+- `giving_up` if the target is too underspecified to challenge meaningfully
diff --git a/src/resources/extensions/sf/prompts/deploy.md b/src/resources/extensions/sf/prompts/deploy.md
new file mode 100644
index 000000000..2d8257d82
--- /dev/null
+++ b/src/resources/extensions/sf/prompts/deploy.md
@@ -0,0 +1,63 @@
+You are executing SF autonomous mode.
+
+## UNIT: Deploy — {{milestoneId}}
+
+## Working Directory
+
+Your working directory is `{{workingDirectory}}`. All file reads, writes, and shell commands MUST operate relative to this directory.
+
+{{inlinedContext}}
+
+{{skillActivation}}
+
+---
+
+## Deploy Instructions
+
+**Milestone:** `{{milestoneId}}` — {{milestoneTitle}}
+**Deploy target:** `{{deployTarget}}`
+**Deploy command:** `{{deployCommand}}`
+**Expected live URL:** `{{deployedUrl}}`
+
+You are the deployment agent. Your job is to ship the completed, validated milestone to production.
+
+### Pre-flight checks (run before deploying)
+
+1. Verify the release record exists for this milestone (`release_records` in sf.db or `{{releaseVersion}}` tag in git).
+2. Verify no un-merged worktree conflicts (`git status` clean).
+3. Verify the deploy command exists and is executable.
+4. If a `.env.deploy` or environment variable list is specified in `preferences.yaml` under `deploy.env_check`, verify those vars are set.
+
+### Deploy execution
+
+Run `{{deployCommand}}` and capture all stdout/stderr.
+
+Record the deploy run to the database:
+- `INSERT INTO deploy_runs (id, milestone_id, target, command, status, exit_code, output, deployed_url, created_at, finished_at)`
+- Use a UUID for `id`.
+- Set `status = 'success'` if exit code is 0, `status = 'failed'` otherwise.
+
+### Post-deploy
+
+If the deploy succeeded:
+- Write `deployed_url` to the deploy_runs row.
+- Call `sf_summary_save` with `milestone_id: {{milestoneId}}`, `artifact_type: "DEPLOY"`, and a brief deploy report as content.
+
+If the deploy failed:
+- Set `status = 'failed'` in deploy_runs.
+- Call `sf_summary_save` with the failure output as content.
+- Output `blocked` — do NOT attempt to fix the deploy failure inline; it requires a repair unit.
+
+### Report sf-internal observations
+
+If you observe sf-the-tool friction during this unit, file via `sf_self_report`.
+
+When done, say: "Deploy {{milestoneId}} complete."
+
+---
+
+**After completing this step, output exactly one of these markers:**
+
+- `complete` if deploy succeeded
+- `blocked` if deploy failed or pre-flight failed
+- `giving_up` if the deploy environment is fundamentally broken
diff --git a/src/resources/extensions/sf/prompts/release.md b/src/resources/extensions/sf/prompts/release.md
new file mode 100644
index 000000000..85f9331cd
--- /dev/null
+++ b/src/resources/extensions/sf/prompts/release.md
@@ -0,0 +1,61 @@
+You are executing SF autonomous mode.
+
+## UNIT: Release — {{milestoneId}}
+
+## Working Directory
+
+Your working directory is `{{workingDirectory}}`. All file reads, writes, and shell commands MUST operate relative to this directory.
+
+{{inlinedContext}}
+
+{{skillActivation}}
+
+---
+
+## Release Instructions
+
+**Milestone:** `{{milestoneId}}` — {{milestoneTitle}}
+**Current version:** `{{currentVersion}}`
+**Release type:** `{{releaseType}}` (major / minor / patch)
+**Publish channel:** `{{publishChannel}}` (npm / docker / github-release / none)
+
+You are the release agent. Your job is to version, tag, changelog, and optionally publish the completed milestone.
+
+### Steps
+
+1. **Determine new version** — apply `{{releaseType}}` bump to `{{currentVersion}}`. Use semver. If `package.json` exists, read the current version from it.
+
+2. **Update version files** — update `package.json` (and any `packages/*/package.json` if this is a monorepo) with the new version. Update `version.txt` or `VERSION` if present.
+
+3. **Generate changelog entry** — summarise the milestone's completed slices into a CHANGELOG.md entry under `## [{{newVersion}}] - {{today}}`. Use the slice SUMMARY files as source material. Be concise: one bullet per slice.
+
+4. **Commit the release** — `git add -A && git commit -m "chore(release): {{newVersion}}"`.
+
+5. **Tag** — `git tag -a v{{newVersion}} -m "Release {{newVersion}} — {{milestoneTitle}}"`.
+
+6. **Publish** (if `{{publishChannel}}` is not `none`):
+ - `npm`: run `npm publish --access public` (or `npm publish` for private).
+ - `docker`: run the `deploy.publish_command` from preferences.
+ - `github-release`: create a GitHub release via `gh release create v{{newVersion}} --title "{{newVersion}}" --notes "$(cat CHANGELOG.md | head -50)"`.
+
+7. **Record to DB** — INSERT into `release_records (id, milestone_id, version, prev_version, changelog_entry, git_tag, published, created_at)`.
+
+8. **Call `sf_summary_save`** with `milestone_id: {{milestoneId}}`, `artifact_type: "RELEASE"`, and the changelog entry as content.
+
+### On failure
+
+If publish fails (network error, auth error), set `published = 0` in release_records, commit and tag are still valid. Output `blocked` with a clear reason.
+
+### Report sf-internal observations
+
+If you observe sf-the-tool friction during this unit, file via `sf_self_report`.
+
+When done, say: "Release {{newVersion}} complete."
+
+---
+
+**After completing this step, output exactly one of these markers:**
+
+- `complete` if release succeeded (publish optional)
+- `blocked` if a required step failed
+- `giving_up` if version state is corrupted and cannot be resolved
diff --git a/src/resources/extensions/sf/prompts/rollback.md b/src/resources/extensions/sf/prompts/rollback.md
new file mode 100644
index 000000000..41c070cdf
--- /dev/null
+++ b/src/resources/extensions/sf/prompts/rollback.md
@@ -0,0 +1,61 @@
+You are executing SF autonomous mode.
+
+## UNIT: Rollback — {{milestoneId}}
+
+## Working Directory
+
+Your working directory is `{{workingDirectory}}`. All file reads, writes, and shell commands MUST operate relative to this directory.
+
+{{inlinedContext}}
+
+{{skillActivation}}
+
+---
+
+## Rollback Instructions
+
+**Milestone:** `{{milestoneId}}` — {{milestoneTitle}}
+**Failed deploy run:** `{{deployRunId}}`
+**Deploy target:** `{{deployTarget}}`
+**Rollback reason:** `{{rollbackReason}}`
+
+You are the rollback agent. A smoke test failed after deployment. Your job is to revert the production environment to the last known-good state.
+
+### Rollback steps
+
+1. **Identify the rollback command** from `preferences.yaml` under `deploy.rollback_command`. If not set, derive it:
+ - Fly.io: `fly releases rollback --app {{appName}}`
+ - Docker/Kubernetes: re-deploy the previous image tag
+ - npm: no rollback (record only)
+ - Vercel: `vercel rollback`
+ - Custom: run `deploy.rollback_command`
+
+2. **Execute the rollback** — run the command and capture output.
+
+3. **Verify rollback** — re-run the health check against `{{deployedUrl}}`. Confirm the previous version is live.
+
+4. **Record to DB** — INSERT into `rollback_runs (id, deploy_run_id, milestone_id, reason, status, output, created_at, finished_at)`.
+ - Set `status = 'success'` if the health check passes post-rollback.
+ - Set `status = 'failed'` if the environment is still broken.
+
+5. **Update the deploy run** — UPDATE `deploy_runs SET status = 'rolled-back' WHERE id = '{{deployRunId}}'`.
+
+6. **Call `sf_summary_save`** with `milestone_id: {{milestoneId}}`, `artifact_type: "ROLLBACK"`, and the rollback report as content.
+
+### After rollback
+
+Output `blocked` — the milestone requires a repair unit to address the smoke failure before re-attempting deploy. Do NOT attempt to fix the underlying bug inline.
+
+### Report sf-internal observations
+
+If you observe sf-the-tool friction during this unit, file via `sf_self_report`.
+
+When done, say: "Rollback {{milestoneId}} complete."
+
+---
+
+**After completing this step, output exactly one of these markers:**
+
+- `complete` if rollback succeeded and environment is stable
+- `blocked` if rollback failed or environment is still broken
+- `giving_up` if the production environment is unrecoverable without human intervention
diff --git a/src/resources/extensions/sf/prompts/smoke-production.md b/src/resources/extensions/sf/prompts/smoke-production.md
new file mode 100644
index 000000000..fd955e180
--- /dev/null
+++ b/src/resources/extensions/sf/prompts/smoke-production.md
@@ -0,0 +1,67 @@
+You are executing SF autonomous mode.
+
+## UNIT: Smoke Test Production — {{milestoneId}}
+
+## Working Directory
+
+Your working directory is `{{workingDirectory}}`. All file reads, writes, and shell commands MUST operate relative to this directory.
+
+{{inlinedContext}}
+
+{{skillActivation}}
+
+---
+
+## Smoke Test Instructions
+
+**Milestone:** `{{milestoneId}}` — {{milestoneTitle}}
+**Live URL:** `{{deployedUrl}}`
+**Deploy run ID:** `{{deployRunId}}`
+
+You are the production smoke tester. Your job is to verify the live deployment is healthy and correct — not to run unit tests, but to exercise the real running service.
+
+### Smoke checks to run
+
+For each check below, record: the check description, the tool/command used, the actual response observed, and PASS / FAIL.
+
+1. **Health endpoint** — `GET {{deployedUrl}}/health` (or `/`, or the configured `deploy.health_path`). Expect HTTP 200.
+2. **Version check** — if `deploy.version_path` is set, `GET {{deployedUrl}}/{{versionPath}}` and verify the version matches `{{releaseVersion}}`.
+3. **Critical path** — exercise the minimum viable user flow defined in `deploy.smoke_checks` from preferences, or the UAT ASSESSMENT for this milestone's first slice.
+4. **Error rate** — if observability is configured (`deploy.metrics_url`), check error rate is < 1%.
+
+Use `curl`, browser tools, or the `bash` tool to execute checks. Capture raw HTTP responses as evidence.
+
+### Record results
+
+INSERT a row into `smoke_results`:
+- `id`: UUID
+- `deploy_run_id`: `{{deployRunId}}`
+- `milestone_id`: `{{milestoneId}}`
+- `url`: `{{deployedUrl}}`
+- `status`: `'pass'` or `'fail'`
+- `verdict`: `'PASS'` or `'FAIL'`
+- `checks_json`: JSON array of `{ check, result, evidence }` objects
+- `created_at` / `finished_at`
+
+Call `sf_summary_save` with `milestone_id: {{milestoneId}}`, `artifact_type: "SMOKE"`, and the full smoke result markdown.
+
+### On failure
+
+If any critical check fails:
+- Set `verdict = 'FAIL'` in smoke_results.
+- Output `blocked`.
+- The autonomous loop will trigger a rollback unit next.
+
+### Report sf-internal observations
+
+If you observe sf-the-tool friction during this unit, file via `sf_self_report`.
+
+When done, say: "Smoke {{milestoneId}} complete — verdict: {{verdict}}."
+
+---
+
+**After completing this step, output exactly one of these markers:**
+
+- `complete` if all smoke checks passed
+- `blocked` if any critical check failed
+- `giving_up` if the production environment is unreachable
diff --git a/src/resources/extensions/sf/sf-db.js b/src/resources/extensions/sf/sf-db.js
index 3ef625684..d47e51df5 100644
--- a/src/resources/extensions/sf/sf-db.js
+++ b/src/resources/extensions/sf/sf-db.js
@@ -577,6 +577,70 @@ function ensureUokMessageTables(db) {
"CREATE INDEX IF NOT EXISTS idx_uok_messages_sent ON uok_messages(sent_at DESC)",
);
}
+function ensureDeployTables(db) {
+ db.exec(`
+ CREATE TABLE IF NOT EXISTS deploy_runs (
+ id TEXT PRIMARY KEY,
+ milestone_id TEXT NOT NULL,
+ target TEXT NOT NULL,
+ command TEXT NOT NULL,
+ status TEXT NOT NULL DEFAULT 'pending',
+ exit_code INTEGER DEFAULT NULL,
+ output TEXT DEFAULT NULL,
+ deployed_url TEXT DEFAULT NULL,
+ created_at TEXT NOT NULL,
+ finished_at TEXT DEFAULT NULL
+ )
+ `);
+ db.exec(`
+ CREATE TABLE IF NOT EXISTS smoke_results (
+ id TEXT PRIMARY KEY,
+ deploy_run_id TEXT NOT NULL,
+ milestone_id TEXT NOT NULL,
+ url TEXT NOT NULL,
+ status TEXT NOT NULL DEFAULT 'pending',
+ verdict TEXT DEFAULT NULL,
+ checks_json TEXT NOT NULL DEFAULT '[]',
+ created_at TEXT NOT NULL,
+ finished_at TEXT DEFAULT NULL,
+ FOREIGN KEY (deploy_run_id) REFERENCES deploy_runs(id) ON DELETE CASCADE
+ )
+ `);
+ db.exec(`
+ CREATE TABLE IF NOT EXISTS release_records (
+ id TEXT PRIMARY KEY,
+ milestone_id TEXT NOT NULL,
+ version TEXT NOT NULL,
+ prev_version TEXT DEFAULT NULL,
+ changelog_entry TEXT DEFAULT NULL,
+ git_tag TEXT DEFAULT NULL,
+ published INTEGER NOT NULL DEFAULT 0,
+ created_at TEXT NOT NULL
+ )
+ `);
+ db.exec(`
+ CREATE TABLE IF NOT EXISTS rollback_runs (
+ id TEXT PRIMARY KEY,
+ deploy_run_id TEXT NOT NULL,
+ milestone_id TEXT NOT NULL,
+ reason TEXT NOT NULL,
+ status TEXT NOT NULL DEFAULT 'pending',
+ output TEXT DEFAULT NULL,
+ created_at TEXT NOT NULL,
+ finished_at TEXT DEFAULT NULL,
+ FOREIGN KEY (deploy_run_id) REFERENCES deploy_runs(id) ON DELETE CASCADE
+ )
+ `);
+ db.exec(
+ "CREATE INDEX IF NOT EXISTS idx_deploy_runs_milestone ON deploy_runs(milestone_id, created_at DESC)",
+ );
+ db.exec(
+ "CREATE INDEX IF NOT EXISTS idx_smoke_results_deploy ON smoke_results(deploy_run_id)",
+ );
+ db.exec(
+ "CREATE INDEX IF NOT EXISTS idx_release_records_milestone ON release_records(milestone_id, created_at DESC)",
+ );
+}
function ensureSleeptimeQueueTable(db) {
db.exec(`
CREATE TABLE IF NOT EXISTS sleeptime_consolidation_queue (
@@ -1307,6 +1371,7 @@ function initSchema(db, fileBacked) {
ensureSessionTables(db);
ensureSessionSnapshotTable(db);
ensureUokMessageTables(db);
+ ensureDeployTables(db);
ensureSleeptimeQueueTable(db);
ensureSpecSchemaTables(db);
ensureTaskFrontmatterColumns(db);
@@ -2933,6 +2998,18 @@ function migrateSchema(db) {
":applied_at": new Date().toISOString(),
});
}
+ if (currentVersion < 51) {
+ // Add deploy/smoke/release/rollback tables — closes the vision→production loop.
+ // deploy_runs tracks each deployment attempt; smoke_results tracks live verification;
+ // release_records tracks version bumps and publishes; rollback_runs tracks reversions.
+ ensureDeployTables(db);
+ db.prepare(
+ "INSERT INTO schema_version (version, applied_at) VALUES (:version, :applied_at)",
+ ).run({
+ ":version": 51,
+ ":applied_at": new Date().toISOString(),
+ });
+ }
db.exec("COMMIT");
} catch (err) {
db.exec("ROLLBACK");
diff --git a/src/resources/extensions/sf/tests/sf-db-migration.test.mjs b/src/resources/extensions/sf/tests/sf-db-migration.test.mjs
index 1fd7ce610..19af0c72d 100644
--- a/src/resources/extensions/sf/tests/sf-db-migration.test.mjs
+++ b/src/resources/extensions/sf/tests/sf-db-migration.test.mjs
@@ -223,7 +223,7 @@ test("openDatabase_migrates_v27_tasks_without_created_at_through_spec_backfill",
const version = db
.prepare("SELECT MAX(version) AS version FROM schema_version")
.get();
- assert.equal(version.version, 49);
+ assert.equal(version.version, 51);
const taskSpec = db
.prepare(
"SELECT milestone_id, slice_id, task_id, verify FROM task_specs WHERE task_id = 'T01'",