feat(deploy): vision-to-production pipeline — deploy/smoke/release/rollback/challenge

- sf-db.js: ensureDeployTables() adds deploy_runs, smoke_results, release_records,
  rollback_runs (schema v51); migration block follows sleeptime v50
- preferences.js: deploy block merged (target, command, url, auto_release,
  release_type, publish_channel, adversarial_review)
- auto-prompts.js: buildDeployPrompt, buildSmokeProductionPrompt,
  buildReleasePrompt, buildRollbackPrompt, buildChallengePrompt
- auto-dispatch.js: 5 new rules — completing-milestone→challenge,
  completing-milestone→release, release-done→deploy,
  deploy-done→smoke-production, smoke-failed→rollback
- prompts/: deploy.md, smoke-production.md, release.md, rollback.md, challenge.md
- sf-db-migration test: bump expected schema version 49→51

The autonomous loop can now carry a milestone from complete-milestone all the
way to a live, smoke-verified, tagged release. Each stage is gated by prefs
(auto_release, deploy.target, deploy.url) so projects opt in per stage.
Challenge (adversarial review) runs before release when adversarial_review is set.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
Mikael Hugo 2026-05-09 15:25:47 +02:00
parent d09c8282d0
commit 3b249c4144
10 changed files with 823 additions and 1 deletions

View file

@ -17,6 +17,11 @@ import {
buildDiscussMilestonePrompt,
buildDiscussProjectPrompt,
buildDiscussRequirementsPrompt,
buildDeployPrompt,
buildSmokeProductionPrompt,
buildReleasePrompt,
buildRollbackPrompt,
buildChallengePrompt,
buildExecuteTaskPrompt,
buildGateEvaluatePrompt,
buildParallelResearchSlicesPrompt,
@ -1699,6 +1704,186 @@ export const DISPATCH_RULES = [
};
},
},
{
name: "completing-milestone → challenge",
match: async ({ state, mid, midTitle, basePath, prefs }) => {
if (state.phase !== "completing-milestone") return null;
if (!prefs?.deploy?.adversarial_review) return null;
// Only trigger if no challenge assessment exists for this milestone yet
try {
const { getDatabase } = await import("./sf-db.js");
const db = getDatabase(basePath);
const row = db
.prepare(
"SELECT id FROM assessments WHERE milestone_id = ? AND artifact_type = 'CHALLENGE' LIMIT 1",
)
.get(mid);
if (row) return null;
} catch {
return null;
}
return {
action: "dispatch",
unitType: "challenge",
unitId: `challenge-${mid}`,
prompt: await buildChallengePrompt(
mid,
midTitle,
"milestone",
prefs?.deploy?.adversarial_mode ?? "red-team",
basePath,
),
};
},
},
{
name: "completing-milestone → release",
match: async ({ state, mid, midTitle, basePath, prefs }) => {
if (state.phase !== "completing-milestone") return null;
if (!prefs?.deploy?.auto_release) return null;
// Only if no release record exists for this milestone yet
let hasRelease = false;
try {
const { getDatabase } = await import("./sf-db.js");
const db = getDatabase(basePath);
const row = db
.prepare(
"SELECT id FROM release_records WHERE milestone_id = ? LIMIT 1",
)
.get(mid);
hasRelease = !!row;
} catch {
// DB unavailable — skip this rule
return null;
}
if (hasRelease) return null;
return {
action: "dispatch",
unitType: "release",
unitId: `release-${mid}`,
prompt: await buildReleasePrompt(mid, midTitle, basePath),
};
},
},
{
name: "release-done → deploy",
match: async ({ state, mid, midTitle, basePath, prefs }) => {
if (state.phase !== "completing-milestone") return null;
if (!prefs?.deploy?.target || !prefs?.deploy?.command) return null;
// Only trigger if a release record exists but no deploy run exists
let hasRelease = false;
let hasDeployRun = false;
try {
const { getDatabase } = await import("./sf-db.js");
const db = getDatabase(basePath);
const rr = db
.prepare(
"SELECT id FROM release_records WHERE milestone_id = ? LIMIT 1",
)
.get(mid);
hasRelease = !!rr;
const dr = db
.prepare(
"SELECT id FROM deploy_runs WHERE milestone_id = ? AND status != 'rolled-back' LIMIT 1",
)
.get(mid);
hasDeployRun = !!dr;
} catch {
return null;
}
if (!hasRelease || hasDeployRun) return null;
return {
action: "dispatch",
unitType: "deploy",
unitId: `deploy-${mid}`,
prompt: await buildDeployPrompt(mid, midTitle, basePath),
};
},
},
{
name: "deploy-done → smoke-production",
match: async ({ state, mid, midTitle, basePath, prefs }) => {
if (state.phase !== "completing-milestone") return null;
if (!prefs?.deploy?.url) return null;
let deployRunId = null;
let hasSmokeResult = false;
try {
const { getDatabase } = await import("./sf-db.js");
const db = getDatabase(basePath);
const dr = db
.prepare(
"SELECT id FROM deploy_runs WHERE milestone_id = ? AND status = 'success' ORDER BY created_at DESC LIMIT 1",
)
.get(mid);
if (!dr) return null;
deployRunId = dr.id;
const sr = db
.prepare(
"SELECT id FROM smoke_results WHERE deploy_run_id = ? LIMIT 1",
)
.get(deployRunId);
hasSmokeResult = !!sr;
} catch {
return null;
}
if (!deployRunId || hasSmokeResult) return null;
return {
action: "dispatch",
unitType: "smoke-production",
unitId: `smoke-${mid}`,
prompt: await buildSmokeProductionPrompt(
mid,
midTitle,
deployRunId,
basePath,
),
};
},
},
{
name: "smoke-failed → rollback",
match: async ({ state, mid, midTitle, basePath, prefs }) => {
if (state.phase !== "completing-milestone") return null;
if (!prefs?.deploy?.target) return null;
let deployRunId = null;
let failReason = "Smoke check failed";
try {
const { getDatabase } = await import("./sf-db.js");
const db = getDatabase(basePath);
const sr = db
.prepare(
"SELECT sr.deploy_run_id, sr.verdict FROM smoke_results sr " +
"WHERE sr.milestone_id = ? AND sr.verdict = 'FAIL' " +
"ORDER BY sr.created_at DESC LIMIT 1",
)
.get(mid);
if (!sr) return null;
deployRunId = sr.deploy_run_id;
// Only trigger if there is no rollback run yet for this deploy
const rr = db
.prepare(
"SELECT id FROM rollback_runs WHERE deploy_run_id = ? LIMIT 1",
)
.get(deployRunId);
if (rr) return null;
} catch {
return null;
}
if (!deployRunId) return null;
return {
action: "dispatch",
unitType: "rollback",
unitId: `rollback-${mid}`,
prompt: await buildRollbackPrompt(
mid,
midTitle,
deployRunId,
failReason,
basePath,
),
};
},
},
{
name: "complete → stop",
match: async ({ state }) => {

View file

@ -3021,3 +3021,233 @@ export async function buildRewriteDocsPrompt(
overridesPath: relSfRootFile("OVERRIDES"),
});
}
/**
* Build the deploy prompt for a completed milestone.
*
* Purpose: executes the project's configured deploy command, records the
* outcome to deploy_runs in sf.db, and surfaces the deployed URL so the
* smoke unit can verify it.
*
* Consumer: auto-dispatch.js `deploy` unit type, triggered after release-done
* when prefs.deploy.target is set.
*/
export async function buildDeployPrompt(mid, midTitle, base) {
const prefs = loadEffectiveSFPreferences();
const deploy = prefs?.preferences?.deploy ?? {};
const resolveArtifact = async (key) => {
switch (key) {
case "project":
return await inlineProjectFromDb(base);
default:
return null;
}
};
const composed = await composeInlinedContext("deploy", resolveArtifact);
const inlinedContext = capPreamble(
`## Inlined Context (preloaded — do not re-read these files)\n\n${composed}`,
);
return loadPrompt("deploy", {
workingDirectory: base,
milestoneId: mid,
milestoneTitle: midTitle,
deployTarget: deploy.target ?? "custom",
deployCommand: deploy.command ?? "echo 'No deploy command configured'",
deployedUrl: deploy.url ?? "",
inlinedContext,
skillActivation: buildSkillActivationBlock({
base,
milestoneId: mid,
extraContext: [inlinedContext],
unitType: "deploy",
}),
});
}
/**
* Build the smoke-production prompt for a deployed milestone.
*
* Purpose: exercises the live production URL after deploy to verify the
* deployment is correct; writes smoke_results to sf.db; blocks on failure
* so rollback can run.
*
* Consumer: auto-dispatch.js `smoke-production` unit type, triggered after
* deploy-done when prefs.deploy.url is set.
*/
export async function buildSmokeProductionPrompt(
mid,
midTitle,
deployRunId,
base,
) {
const prefs = loadEffectiveSFPreferences();
const deploy = prefs?.preferences?.deploy ?? {};
const resolveArtifact = async (key) => {
switch (key) {
case "project":
return await inlineProjectFromDb(base);
default:
return null;
}
};
const composed = await composeInlinedContext(
"smoke-production",
resolveArtifact,
);
const inlinedContext = capPreamble(
`## Inlined Context (preloaded — do not re-read these files)\n\n${composed}`,
);
return loadPrompt("smoke-production", {
workingDirectory: base,
milestoneId: mid,
milestoneTitle: midTitle,
deployRunId,
deployedUrl: deploy.url ?? "",
versionPath: deploy.version_path ?? "",
releaseVersion: "",
verdict: "PENDING",
inlinedContext,
skillActivation: buildSkillActivationBlock({
base,
milestoneId: mid,
extraContext: [inlinedContext],
unitType: "smoke-production",
}),
});
}
/**
* Build the release prompt for a completed, validated milestone.
*
* Purpose: bumps semver, writes CHANGELOG, commits, tags, and optionally
* publishes (npm/docker/github-release); records to release_records in sf.db.
*
* Consumer: auto-dispatch.js `release` unit type, triggered after
* completing-milestone when prefs.deploy.auto_release is truthy.
*/
export async function buildReleasePrompt(mid, midTitle, base) {
const prefs = loadEffectiveSFPreferences();
const deploy = prefs?.preferences?.deploy ?? {};
const resolveArtifact = async (key) => {
switch (key) {
case "project":
return await inlineProjectFromDb(base);
default:
return null;
}
};
const composed = await composeInlinedContext("release", resolveArtifact);
const inlinedContext = capPreamble(
`## Inlined Context (preloaded — do not re-read these files)\n\n${composed}`,
);
return loadPrompt("release", {
workingDirectory: base,
milestoneId: mid,
milestoneTitle: midTitle,
currentVersion: deploy.current_version ?? "0.0.0",
releaseType: deploy.release_type ?? "patch",
publishChannel: deploy.publish_channel ?? "none",
today: new Date().toISOString().slice(0, 10),
inlinedContext,
skillActivation: buildSkillActivationBlock({
base,
milestoneId: mid,
extraContext: [inlinedContext],
unitType: "release",
}),
});
}
/**
* Build the rollback prompt for a failed smoke-production check.
*
* Purpose: reverts the failed deployment to the last known-good state;
* records to rollback_runs and updates the deploy_runs status in sf.db.
*
* Consumer: auto-dispatch.js `rollback` unit type, triggered after
* smoke-failed when prefs.deploy.target is set.
*/
export async function buildRollbackPrompt(
mid,
midTitle,
deployRunId,
rollbackReason,
base,
) {
const prefs = loadEffectiveSFPreferences();
const deploy = prefs?.preferences?.deploy ?? {};
const resolveArtifact = async (key) => {
switch (key) {
case "project":
return await inlineProjectFromDb(base);
default:
return null;
}
};
const composed = await composeInlinedContext("rollback", resolveArtifact);
const inlinedContext = capPreamble(
`## Inlined Context (preloaded — do not re-read these files)\n\n${composed}`,
);
return loadPrompt("rollback", {
workingDirectory: base,
milestoneId: mid,
milestoneTitle: midTitle,
deployRunId,
deployTarget: deploy.target ?? "custom",
deployedUrl: deploy.url ?? "",
appName: deploy.app_name ?? mid,
rollbackReason,
inlinedContext,
skillActivation: buildSkillActivationBlock({
base,
milestoneId: mid,
extraContext: [inlinedContext],
unitType: "rollback",
}),
});
}
/**
* Build the challenge (adversarial review) prompt for a milestone or slice.
*
* Purpose: activates the adversary agent to red-team, assumption-audit, or
* failure-inject against the named target; blocks the pipeline if critical
* findings require remediation.
*
* Consumer: auto-dispatch.js `challenge` unit type, dispatched by the
* adversary role in the swarm after validation passes.
*/
export async function buildChallengePrompt(
mid,
midTitle,
challengeTarget,
challengeMode,
base,
) {
const resolveArtifact = async (key) => {
switch (key) {
case "project":
return await inlineProjectFromDb(base);
default:
return null;
}
};
const composed = await composeInlinedContext("challenge", resolveArtifact);
const inlinedContext = capPreamble(
`## Inlined Context (preloaded — do not re-read these files)\n\n${composed}`,
);
return loadPrompt("challenge", {
workingDirectory: base,
milestoneId: mid,
milestoneTitle: midTitle,
challengeTarget: challengeTarget ?? "milestone",
challengeMode: challengeMode ?? "red-team",
inlinedContext,
skillActivation: buildSkillActivationBlock({
base,
milestoneId: mid,
extraContext: [inlinedContext],
unitType: "challenge",
}),
});
}

View file

@ -602,6 +602,11 @@ function mergePreferences(base, override) {
subscription: override.subscription ?? base.subscription,
allow_flat_rate_providers:
override.allow_flat_rate_providers ?? base.allow_flat_rate_providers,
// ── Production delivery ──
deploy:
base.deploy || override.deploy
? { ...(base.deploy ?? {}), ...(override.deploy ?? {}) }
: undefined,
};
}
function mergeStringLists(base, override) {

View file

@ -0,0 +1,73 @@
You are executing SF autonomous mode.
## UNIT: Challenge (Adversarial Review) — {{milestoneId}}
## Working Directory
Your working directory is `{{workingDirectory}}`. All file reads, writes, and shell commands MUST operate relative to this directory.
{{inlinedContext}}
{{skillActivation}}
---
## Challenge Instructions
**Milestone:** `{{milestoneId}}` — {{milestoneTitle}}
**Target:** `{{challengeTarget}}` (slice / component / architecture / security / assumptions)
**Challenge mode:** `{{challengeMode}}` (red-team / assumption-audit / failure-injection / adversarial-spec)
You are the adversary agent. Your job is NOT to complete work — it is to rigorously attack the correctness, safety, and assumptions of what has been built or planned. You represent the failure modes the product team has not considered.
### Challenge rules
1. **Be ruthless but specific.** Every finding must include: what breaks, under what condition, and what the impact is.
2. **No false positives for diplomacy.** If something is fine, say so. Do not manufacture findings to seem thorough.
3. **Evidence-first.** For each finding, provide a concrete reproduction path: a command, a code path, an input, or a condition.
4. **Prioritise by blast radius.** Data loss, security, and correctness > performance > UX > style.
### What to challenge
Based on `{{challengeMode}}`:
- **red-team** — attempt to break the system: inject invalid inputs, exceed rate limits, trigger edge cases, exploit assumptions in the implementation. Try to cause data loss, auth bypass, or incorrect output.
- **assumption-audit** — enumerate every assumption in the design docs, milestone spec, and slice plans. For each, state: what happens if the assumption is wrong? What is the evidence it is valid?
- **failure-injection** — simulate dependency failures: DB unavailable, API timeout, disk full, OOM, concurrent writes. Does the system degrade gracefully or crash?
- **adversarial-spec** — re-read the spec as an adversarial user. Find underspecified behaviour, ambiguous edge cases, missing error states, and spec contradictions.
### Output format
For each finding:
```
## Finding [N]: <title>
**Severity:** critical / high / medium / low
**Mode:** red-team / assumption / failure / spec
**Condition:** <what must be true for this to trigger>
**Impact:** <what breaks and how badly>
**Evidence:** <command, code path, or reproduction steps>
**Recommendation:** <minimal fix or acknowledgement that the risk is accepted>
```
After all findings, provide an **Overall Verdict**:
- `PASS` — no significant findings; the system is robust enough to proceed.
- `NEEDS-REMEDIATION` — one or more critical/high findings must be addressed before deploy.
- `ADVISORY` — findings are low severity; proceed with awareness.
Call `sf_summary_save` with `milestone_id: {{milestoneId}}`, `artifact_type: "CHALLENGE"`, and the full challenge report as content.
### Report sf-internal observations
If you observe sf-the-tool friction during this unit, file via `sf_self_report`.
When done, say: "Challenge {{milestoneId}} complete — verdict: <verdict>."
---
**After completing this step, output exactly one of these markers:**
- `<turn_status>complete</turn_status>` if verdict is PASS or ADVISORY
- `<turn_status>blocked</turn_status>` if verdict is NEEDS-REMEDIATION
- `<turn_status>giving_up</turn_status>` if the target is too underspecified to challenge meaningfully

View file

@ -0,0 +1,63 @@
You are executing SF autonomous mode.
## UNIT: Deploy — {{milestoneId}}
## Working Directory
Your working directory is `{{workingDirectory}}`. All file reads, writes, and shell commands MUST operate relative to this directory.
{{inlinedContext}}
{{skillActivation}}
---
## Deploy Instructions
**Milestone:** `{{milestoneId}}` — {{milestoneTitle}}
**Deploy target:** `{{deployTarget}}`
**Deploy command:** `{{deployCommand}}`
**Expected live URL:** `{{deployedUrl}}`
You are the deployment agent. Your job is to ship the completed, validated milestone to production.
### Pre-flight checks (run before deploying)
1. Verify the release record exists for this milestone (`release_records` in sf.db or `{{releaseVersion}}` tag in git).
2. Verify no un-merged worktree conflicts (`git status` clean).
3. Verify the deploy command exists and is executable.
4. If a `.env.deploy` or environment variable list is specified in `preferences.yaml` under `deploy.env_check`, verify those vars are set.
### Deploy execution
Run `{{deployCommand}}` and capture all stdout/stderr.
Record the deploy run to the database:
- `INSERT INTO deploy_runs (id, milestone_id, target, command, status, exit_code, output, deployed_url, created_at, finished_at)`
- Use a UUID for `id`.
- Set `status = 'success'` if exit code is 0, `status = 'failed'` otherwise.
### Post-deploy
If the deploy succeeded:
- Write `deployed_url` to the deploy_runs row.
- Call `sf_summary_save` with `milestone_id: {{milestoneId}}`, `artifact_type: "DEPLOY"`, and a brief deploy report as content.
If the deploy failed:
- Set `status = 'failed'` in deploy_runs.
- Call `sf_summary_save` with the failure output as content.
- Output `<turn_status>blocked</turn_status>` — do NOT attempt to fix the deploy failure inline; it requires a repair unit.
### Report sf-internal observations
If you observe sf-the-tool friction during this unit, file via `sf_self_report`.
When done, say: "Deploy {{milestoneId}} complete."
---
**After completing this step, output exactly one of these markers:**
- `<turn_status>complete</turn_status>` if deploy succeeded
- `<turn_status>blocked</turn_status>` if deploy failed or pre-flight failed
- `<turn_status>giving_up</turn_status>` if the deploy environment is fundamentally broken

View file

@ -0,0 +1,61 @@
You are executing SF autonomous mode.
## UNIT: Release — {{milestoneId}}
## Working Directory
Your working directory is `{{workingDirectory}}`. All file reads, writes, and shell commands MUST operate relative to this directory.
{{inlinedContext}}
{{skillActivation}}
---
## Release Instructions
**Milestone:** `{{milestoneId}}` — {{milestoneTitle}}
**Current version:** `{{currentVersion}}`
**Release type:** `{{releaseType}}` (major / minor / patch)
**Publish channel:** `{{publishChannel}}` (npm / docker / github-release / none)
You are the release agent. Your job is to version, tag, changelog, and optionally publish the completed milestone.
### Steps
1. **Determine new version** — apply `{{releaseType}}` bump to `{{currentVersion}}`. Use semver. If `package.json` exists, read the current version from it.
2. **Update version files** — update `package.json` (and any `packages/*/package.json` if this is a monorepo) with the new version. Update `version.txt` or `VERSION` if present.
3. **Generate changelog entry** — summarise the milestone's completed slices into a CHANGELOG.md entry under `## [{{newVersion}}] - {{today}}`. Use the slice SUMMARY files as source material. Be concise: one bullet per slice.
4. **Commit the release**`git add -A && git commit -m "chore(release): {{newVersion}}"`.
5. **Tag**`git tag -a v{{newVersion}} -m "Release {{newVersion}} — {{milestoneTitle}}"`.
6. **Publish** (if `{{publishChannel}}` is not `none`):
- `npm`: run `npm publish --access public` (or `npm publish` for private).
- `docker`: run the `deploy.publish_command` from preferences.
- `github-release`: create a GitHub release via `gh release create v{{newVersion}} --title "{{newVersion}}" --notes "$(cat CHANGELOG.md | head -50)"`.
7. **Record to DB** — INSERT into `release_records (id, milestone_id, version, prev_version, changelog_entry, git_tag, published, created_at)`.
8. **Call `sf_summary_save`** with `milestone_id: {{milestoneId}}`, `artifact_type: "RELEASE"`, and the changelog entry as content.
### On failure
If publish fails (network error, auth error), set `published = 0` in release_records, commit and tag are still valid. Output `<turn_status>blocked</turn_status>` with a clear reason.
### Report sf-internal observations
If you observe sf-the-tool friction during this unit, file via `sf_self_report`.
When done, say: "Release {{newVersion}} complete."
---
**After completing this step, output exactly one of these markers:**
- `<turn_status>complete</turn_status>` if release succeeded (publish optional)
- `<turn_status>blocked</turn_status>` if a required step failed
- `<turn_status>giving_up</turn_status>` if version state is corrupted and cannot be resolved

View file

@ -0,0 +1,61 @@
You are executing SF autonomous mode.
## UNIT: Rollback — {{milestoneId}}
## Working Directory
Your working directory is `{{workingDirectory}}`. All file reads, writes, and shell commands MUST operate relative to this directory.
{{inlinedContext}}
{{skillActivation}}
---
## Rollback Instructions
**Milestone:** `{{milestoneId}}` — {{milestoneTitle}}
**Failed deploy run:** `{{deployRunId}}`
**Deploy target:** `{{deployTarget}}`
**Rollback reason:** `{{rollbackReason}}`
You are the rollback agent. A smoke test failed after deployment. Your job is to revert the production environment to the last known-good state.
### Rollback steps
1. **Identify the rollback command** from `preferences.yaml` under `deploy.rollback_command`. If not set, derive it:
- Fly.io: `fly releases rollback --app {{appName}}`
- Docker/Kubernetes: re-deploy the previous image tag
- npm: no rollback (record only)
- Vercel: `vercel rollback`
- Custom: run `deploy.rollback_command`
2. **Execute the rollback** — run the command and capture output.
3. **Verify rollback** — re-run the health check against `{{deployedUrl}}`. Confirm the previous version is live.
4. **Record to DB** — INSERT into `rollback_runs (id, deploy_run_id, milestone_id, reason, status, output, created_at, finished_at)`.
- Set `status = 'success'` if the health check passes post-rollback.
- Set `status = 'failed'` if the environment is still broken.
5. **Update the deploy run** — UPDATE `deploy_runs SET status = 'rolled-back' WHERE id = '{{deployRunId}}'`.
6. **Call `sf_summary_save`** with `milestone_id: {{milestoneId}}`, `artifact_type: "ROLLBACK"`, and the rollback report as content.
### After rollback
Output `<turn_status>blocked</turn_status>` — the milestone requires a repair unit to address the smoke failure before re-attempting deploy. Do NOT attempt to fix the underlying bug inline.
### Report sf-internal observations
If you observe sf-the-tool friction during this unit, file via `sf_self_report`.
When done, say: "Rollback {{milestoneId}} complete."
---
**After completing this step, output exactly one of these markers:**
- `<turn_status>complete</turn_status>` if rollback succeeded and environment is stable
- `<turn_status>blocked</turn_status>` if rollback failed or environment is still broken
- `<turn_status>giving_up</turn_status>` if the production environment is unrecoverable without human intervention

View file

@ -0,0 +1,67 @@
You are executing SF autonomous mode.
## UNIT: Smoke Test Production — {{milestoneId}}
## Working Directory
Your working directory is `{{workingDirectory}}`. All file reads, writes, and shell commands MUST operate relative to this directory.
{{inlinedContext}}
{{skillActivation}}
---
## Smoke Test Instructions
**Milestone:** `{{milestoneId}}` — {{milestoneTitle}}
**Live URL:** `{{deployedUrl}}`
**Deploy run ID:** `{{deployRunId}}`
You are the production smoke tester. Your job is to verify the live deployment is healthy and correct — not to run unit tests, but to exercise the real running service.
### Smoke checks to run
For each check below, record: the check description, the tool/command used, the actual response observed, and PASS / FAIL.
1. **Health endpoint**`GET {{deployedUrl}}/health` (or `/`, or the configured `deploy.health_path`). Expect HTTP 200.
2. **Version check** — if `deploy.version_path` is set, `GET {{deployedUrl}}/{{versionPath}}` and verify the version matches `{{releaseVersion}}`.
3. **Critical path** — exercise the minimum viable user flow defined in `deploy.smoke_checks` from preferences, or the UAT ASSESSMENT for this milestone's first slice.
4. **Error rate** — if observability is configured (`deploy.metrics_url`), check error rate is < 1%.
Use `curl`, browser tools, or the `bash` tool to execute checks. Capture raw HTTP responses as evidence.
### Record results
INSERT a row into `smoke_results`:
- `id`: UUID
- `deploy_run_id`: `{{deployRunId}}`
- `milestone_id`: `{{milestoneId}}`
- `url`: `{{deployedUrl}}`
- `status`: `'pass'` or `'fail'`
- `verdict`: `'PASS'` or `'FAIL'`
- `checks_json`: JSON array of `{ check, result, evidence }` objects
- `created_at` / `finished_at`
Call `sf_summary_save` with `milestone_id: {{milestoneId}}`, `artifact_type: "SMOKE"`, and the full smoke result markdown.
### On failure
If any critical check fails:
- Set `verdict = 'FAIL'` in smoke_results.
- Output `<turn_status>blocked</turn_status>`.
- The autonomous loop will trigger a rollback unit next.
### Report sf-internal observations
If you observe sf-the-tool friction during this unit, file via `sf_self_report`.
When done, say: "Smoke {{milestoneId}} complete — verdict: {{verdict}}."
---
**After completing this step, output exactly one of these markers:**
- `<turn_status>complete</turn_status>` if all smoke checks passed
- `<turn_status>blocked</turn_status>` if any critical check failed
- `<turn_status>giving_up</turn_status>` if the production environment is unreachable

View file

@ -577,6 +577,70 @@ function ensureUokMessageTables(db) {
"CREATE INDEX IF NOT EXISTS idx_uok_messages_sent ON uok_messages(sent_at DESC)",
);
}
function ensureDeployTables(db) {
db.exec(`
CREATE TABLE IF NOT EXISTS deploy_runs (
id TEXT PRIMARY KEY,
milestone_id TEXT NOT NULL,
target TEXT NOT NULL,
command TEXT NOT NULL,
status TEXT NOT NULL DEFAULT 'pending',
exit_code INTEGER DEFAULT NULL,
output TEXT DEFAULT NULL,
deployed_url TEXT DEFAULT NULL,
created_at TEXT NOT NULL,
finished_at TEXT DEFAULT NULL
)
`);
db.exec(`
CREATE TABLE IF NOT EXISTS smoke_results (
id TEXT PRIMARY KEY,
deploy_run_id TEXT NOT NULL,
milestone_id TEXT NOT NULL,
url TEXT NOT NULL,
status TEXT NOT NULL DEFAULT 'pending',
verdict TEXT DEFAULT NULL,
checks_json TEXT NOT NULL DEFAULT '[]',
created_at TEXT NOT NULL,
finished_at TEXT DEFAULT NULL,
FOREIGN KEY (deploy_run_id) REFERENCES deploy_runs(id) ON DELETE CASCADE
)
`);
db.exec(`
CREATE TABLE IF NOT EXISTS release_records (
id TEXT PRIMARY KEY,
milestone_id TEXT NOT NULL,
version TEXT NOT NULL,
prev_version TEXT DEFAULT NULL,
changelog_entry TEXT DEFAULT NULL,
git_tag TEXT DEFAULT NULL,
published INTEGER NOT NULL DEFAULT 0,
created_at TEXT NOT NULL
)
`);
db.exec(`
CREATE TABLE IF NOT EXISTS rollback_runs (
id TEXT PRIMARY KEY,
deploy_run_id TEXT NOT NULL,
milestone_id TEXT NOT NULL,
reason TEXT NOT NULL,
status TEXT NOT NULL DEFAULT 'pending',
output TEXT DEFAULT NULL,
created_at TEXT NOT NULL,
finished_at TEXT DEFAULT NULL,
FOREIGN KEY (deploy_run_id) REFERENCES deploy_runs(id) ON DELETE CASCADE
)
`);
db.exec(
"CREATE INDEX IF NOT EXISTS idx_deploy_runs_milestone ON deploy_runs(milestone_id, created_at DESC)",
);
db.exec(
"CREATE INDEX IF NOT EXISTS idx_smoke_results_deploy ON smoke_results(deploy_run_id)",
);
db.exec(
"CREATE INDEX IF NOT EXISTS idx_release_records_milestone ON release_records(milestone_id, created_at DESC)",
);
}
function ensureSleeptimeQueueTable(db) {
db.exec(`
CREATE TABLE IF NOT EXISTS sleeptime_consolidation_queue (
@ -1307,6 +1371,7 @@ function initSchema(db, fileBacked) {
ensureSessionTables(db);
ensureSessionSnapshotTable(db);
ensureUokMessageTables(db);
ensureDeployTables(db);
ensureSleeptimeQueueTable(db);
ensureSpecSchemaTables(db);
ensureTaskFrontmatterColumns(db);
@ -2933,6 +2998,18 @@ function migrateSchema(db) {
":applied_at": new Date().toISOString(),
});
}
if (currentVersion < 51) {
// Add deploy/smoke/release/rollback tables — closes the vision→production loop.
// deploy_runs tracks each deployment attempt; smoke_results tracks live verification;
// release_records tracks version bumps and publishes; rollback_runs tracks reversions.
ensureDeployTables(db);
db.prepare(
"INSERT INTO schema_version (version, applied_at) VALUES (:version, :applied_at)",
).run({
":version": 51,
":applied_at": new Date().toISOString(),
});
}
db.exec("COMMIT");
} catch (err) {
db.exec("ROLLBACK");

View file

@ -223,7 +223,7 @@ test("openDatabase_migrates_v27_tasks_without_created_at_through_spec_backfill",
const version = db
.prepare("SELECT MAX(version) AS version FROM schema_version")
.get();
assert.equal(version.version, 49);
assert.equal(version.version, 51);
const taskSpec = db
.prepare(
"SELECT milestone_id, slice_id, task_id, verify FROM task_specs WHERE task_id = 'T01'",