From 3b249c41449482496afe00c4a444ead1f082f05c Mon Sep 17 00:00:00 2001 From: Mikael Hugo Date: Sat, 9 May 2026 15:25:47 +0200 Subject: [PATCH] =?UTF-8?q?feat(deploy):=20vision-to-production=20pipeline?= =?UTF-8?q?=20=E2=80=94=20deploy/smoke/release/rollback/challenge?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - sf-db.js: ensureDeployTables() adds deploy_runs, smoke_results, release_records, rollback_runs (schema v51); migration block follows sleeptime v50 - preferences.js: deploy block merged (target, command, url, auto_release, release_type, publish_channel, adversarial_review) - auto-prompts.js: buildDeployPrompt, buildSmokeProductionPrompt, buildReleasePrompt, buildRollbackPrompt, buildChallengePrompt - auto-dispatch.js: 5 new rules — completing-milestone→challenge, completing-milestone→release, release-done→deploy, deploy-done→smoke-production, smoke-failed→rollback - prompts/: deploy.md, smoke-production.md, release.md, rollback.md, challenge.md - sf-db-migration test: bump expected schema version 49→51 The autonomous loop can now carry a milestone from complete-milestone all the way to a live, smoke-verified, tagged release. Each stage is gated by prefs (auto_release, deploy.target, deploy.url) so projects opt in per stage. Challenge (adversarial review) runs before release when adversarial_review is set. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/resources/extensions/sf/auto-dispatch.js | 185 ++++++++++++++ src/resources/extensions/sf/auto-prompts.js | 230 ++++++++++++++++++ src/resources/extensions/sf/preferences.js | 5 + .../extensions/sf/prompts/challenge.md | 73 ++++++ src/resources/extensions/sf/prompts/deploy.md | 63 +++++ .../extensions/sf/prompts/release.md | 61 +++++ .../extensions/sf/prompts/rollback.md | 61 +++++ .../extensions/sf/prompts/smoke-production.md | 67 +++++ src/resources/extensions/sf/sf-db.js | 77 ++++++ .../sf/tests/sf-db-migration.test.mjs | 2 +- 10 files changed, 823 insertions(+), 1 deletion(-) create mode 100644 src/resources/extensions/sf/prompts/challenge.md create mode 100644 src/resources/extensions/sf/prompts/deploy.md create mode 100644 src/resources/extensions/sf/prompts/release.md create mode 100644 src/resources/extensions/sf/prompts/rollback.md create mode 100644 src/resources/extensions/sf/prompts/smoke-production.md diff --git a/src/resources/extensions/sf/auto-dispatch.js b/src/resources/extensions/sf/auto-dispatch.js index 1de723054..e517a7a1b 100644 --- a/src/resources/extensions/sf/auto-dispatch.js +++ b/src/resources/extensions/sf/auto-dispatch.js @@ -17,6 +17,11 @@ import { buildDiscussMilestonePrompt, buildDiscussProjectPrompt, buildDiscussRequirementsPrompt, + buildDeployPrompt, + buildSmokeProductionPrompt, + buildReleasePrompt, + buildRollbackPrompt, + buildChallengePrompt, buildExecuteTaskPrompt, buildGateEvaluatePrompt, buildParallelResearchSlicesPrompt, @@ -1699,6 +1704,186 @@ export const DISPATCH_RULES = [ }; }, }, + { + name: "completing-milestone → challenge", + match: async ({ state, mid, midTitle, basePath, prefs }) => { + if (state.phase !== "completing-milestone") return null; + if (!prefs?.deploy?.adversarial_review) return null; + // Only trigger if no challenge assessment exists for this milestone yet + try { + const { getDatabase } = await import("./sf-db.js"); + const db = getDatabase(basePath); + const row = db + .prepare( + "SELECT id FROM assessments WHERE milestone_id = ? AND artifact_type = 'CHALLENGE' LIMIT 1", + ) + .get(mid); + if (row) return null; + } catch { + return null; + } + return { + action: "dispatch", + unitType: "challenge", + unitId: `challenge-${mid}`, + prompt: await buildChallengePrompt( + mid, + midTitle, + "milestone", + prefs?.deploy?.adversarial_mode ?? "red-team", + basePath, + ), + }; + }, + }, + { + name: "completing-milestone → release", + match: async ({ state, mid, midTitle, basePath, prefs }) => { + if (state.phase !== "completing-milestone") return null; + if (!prefs?.deploy?.auto_release) return null; + // Only if no release record exists for this milestone yet + let hasRelease = false; + try { + const { getDatabase } = await import("./sf-db.js"); + const db = getDatabase(basePath); + const row = db + .prepare( + "SELECT id FROM release_records WHERE milestone_id = ? LIMIT 1", + ) + .get(mid); + hasRelease = !!row; + } catch { + // DB unavailable — skip this rule + return null; + } + if (hasRelease) return null; + return { + action: "dispatch", + unitType: "release", + unitId: `release-${mid}`, + prompt: await buildReleasePrompt(mid, midTitle, basePath), + }; + }, + }, + { + name: "release-done → deploy", + match: async ({ state, mid, midTitle, basePath, prefs }) => { + if (state.phase !== "completing-milestone") return null; + if (!prefs?.deploy?.target || !prefs?.deploy?.command) return null; + // Only trigger if a release record exists but no deploy run exists + let hasRelease = false; + let hasDeployRun = false; + try { + const { getDatabase } = await import("./sf-db.js"); + const db = getDatabase(basePath); + const rr = db + .prepare( + "SELECT id FROM release_records WHERE milestone_id = ? LIMIT 1", + ) + .get(mid); + hasRelease = !!rr; + const dr = db + .prepare( + "SELECT id FROM deploy_runs WHERE milestone_id = ? AND status != 'rolled-back' LIMIT 1", + ) + .get(mid); + hasDeployRun = !!dr; + } catch { + return null; + } + if (!hasRelease || hasDeployRun) return null; + return { + action: "dispatch", + unitType: "deploy", + unitId: `deploy-${mid}`, + prompt: await buildDeployPrompt(mid, midTitle, basePath), + }; + }, + }, + { + name: "deploy-done → smoke-production", + match: async ({ state, mid, midTitle, basePath, prefs }) => { + if (state.phase !== "completing-milestone") return null; + if (!prefs?.deploy?.url) return null; + let deployRunId = null; + let hasSmokeResult = false; + try { + const { getDatabase } = await import("./sf-db.js"); + const db = getDatabase(basePath); + const dr = db + .prepare( + "SELECT id FROM deploy_runs WHERE milestone_id = ? AND status = 'success' ORDER BY created_at DESC LIMIT 1", + ) + .get(mid); + if (!dr) return null; + deployRunId = dr.id; + const sr = db + .prepare( + "SELECT id FROM smoke_results WHERE deploy_run_id = ? LIMIT 1", + ) + .get(deployRunId); + hasSmokeResult = !!sr; + } catch { + return null; + } + if (!deployRunId || hasSmokeResult) return null; + return { + action: "dispatch", + unitType: "smoke-production", + unitId: `smoke-${mid}`, + prompt: await buildSmokeProductionPrompt( + mid, + midTitle, + deployRunId, + basePath, + ), + }; + }, + }, + { + name: "smoke-failed → rollback", + match: async ({ state, mid, midTitle, basePath, prefs }) => { + if (state.phase !== "completing-milestone") return null; + if (!prefs?.deploy?.target) return null; + let deployRunId = null; + let failReason = "Smoke check failed"; + try { + const { getDatabase } = await import("./sf-db.js"); + const db = getDatabase(basePath); + const sr = db + .prepare( + "SELECT sr.deploy_run_id, sr.verdict FROM smoke_results sr " + + "WHERE sr.milestone_id = ? AND sr.verdict = 'FAIL' " + + "ORDER BY sr.created_at DESC LIMIT 1", + ) + .get(mid); + if (!sr) return null; + deployRunId = sr.deploy_run_id; + // Only trigger if there is no rollback run yet for this deploy + const rr = db + .prepare( + "SELECT id FROM rollback_runs WHERE deploy_run_id = ? LIMIT 1", + ) + .get(deployRunId); + if (rr) return null; + } catch { + return null; + } + if (!deployRunId) return null; + return { + action: "dispatch", + unitType: "rollback", + unitId: `rollback-${mid}`, + prompt: await buildRollbackPrompt( + mid, + midTitle, + deployRunId, + failReason, + basePath, + ), + }; + }, + }, { name: "complete → stop", match: async ({ state }) => { diff --git a/src/resources/extensions/sf/auto-prompts.js b/src/resources/extensions/sf/auto-prompts.js index 42ce6963b..664818b29 100644 --- a/src/resources/extensions/sf/auto-prompts.js +++ b/src/resources/extensions/sf/auto-prompts.js @@ -3021,3 +3021,233 @@ export async function buildRewriteDocsPrompt( overridesPath: relSfRootFile("OVERRIDES"), }); } + +/** + * Build the deploy prompt for a completed milestone. + * + * Purpose: executes the project's configured deploy command, records the + * outcome to deploy_runs in sf.db, and surfaces the deployed URL so the + * smoke unit can verify it. + * + * Consumer: auto-dispatch.js `deploy` unit type, triggered after release-done + * when prefs.deploy.target is set. + */ +export async function buildDeployPrompt(mid, midTitle, base) { + const prefs = loadEffectiveSFPreferences(); + const deploy = prefs?.preferences?.deploy ?? {}; + const resolveArtifact = async (key) => { + switch (key) { + case "project": + return await inlineProjectFromDb(base); + default: + return null; + } + }; + const composed = await composeInlinedContext("deploy", resolveArtifact); + const inlinedContext = capPreamble( + `## Inlined Context (preloaded — do not re-read these files)\n\n${composed}`, + ); + return loadPrompt("deploy", { + workingDirectory: base, + milestoneId: mid, + milestoneTitle: midTitle, + deployTarget: deploy.target ?? "custom", + deployCommand: deploy.command ?? "echo 'No deploy command configured'", + deployedUrl: deploy.url ?? "", + inlinedContext, + skillActivation: buildSkillActivationBlock({ + base, + milestoneId: mid, + extraContext: [inlinedContext], + unitType: "deploy", + }), + }); +} + +/** + * Build the smoke-production prompt for a deployed milestone. + * + * Purpose: exercises the live production URL after deploy to verify the + * deployment is correct; writes smoke_results to sf.db; blocks on failure + * so rollback can run. + * + * Consumer: auto-dispatch.js `smoke-production` unit type, triggered after + * deploy-done when prefs.deploy.url is set. + */ +export async function buildSmokeProductionPrompt( + mid, + midTitle, + deployRunId, + base, +) { + const prefs = loadEffectiveSFPreferences(); + const deploy = prefs?.preferences?.deploy ?? {}; + const resolveArtifact = async (key) => { + switch (key) { + case "project": + return await inlineProjectFromDb(base); + default: + return null; + } + }; + const composed = await composeInlinedContext( + "smoke-production", + resolveArtifact, + ); + const inlinedContext = capPreamble( + `## Inlined Context (preloaded — do not re-read these files)\n\n${composed}`, + ); + return loadPrompt("smoke-production", { + workingDirectory: base, + milestoneId: mid, + milestoneTitle: midTitle, + deployRunId, + deployedUrl: deploy.url ?? "", + versionPath: deploy.version_path ?? "", + releaseVersion: "", + verdict: "PENDING", + inlinedContext, + skillActivation: buildSkillActivationBlock({ + base, + milestoneId: mid, + extraContext: [inlinedContext], + unitType: "smoke-production", + }), + }); +} + +/** + * Build the release prompt for a completed, validated milestone. + * + * Purpose: bumps semver, writes CHANGELOG, commits, tags, and optionally + * publishes (npm/docker/github-release); records to release_records in sf.db. + * + * Consumer: auto-dispatch.js `release` unit type, triggered after + * completing-milestone when prefs.deploy.auto_release is truthy. + */ +export async function buildReleasePrompt(mid, midTitle, base) { + const prefs = loadEffectiveSFPreferences(); + const deploy = prefs?.preferences?.deploy ?? {}; + const resolveArtifact = async (key) => { + switch (key) { + case "project": + return await inlineProjectFromDb(base); + default: + return null; + } + }; + const composed = await composeInlinedContext("release", resolveArtifact); + const inlinedContext = capPreamble( + `## Inlined Context (preloaded — do not re-read these files)\n\n${composed}`, + ); + return loadPrompt("release", { + workingDirectory: base, + milestoneId: mid, + milestoneTitle: midTitle, + currentVersion: deploy.current_version ?? "0.0.0", + releaseType: deploy.release_type ?? "patch", + publishChannel: deploy.publish_channel ?? "none", + today: new Date().toISOString().slice(0, 10), + inlinedContext, + skillActivation: buildSkillActivationBlock({ + base, + milestoneId: mid, + extraContext: [inlinedContext], + unitType: "release", + }), + }); +} + +/** + * Build the rollback prompt for a failed smoke-production check. + * + * Purpose: reverts the failed deployment to the last known-good state; + * records to rollback_runs and updates the deploy_runs status in sf.db. + * + * Consumer: auto-dispatch.js `rollback` unit type, triggered after + * smoke-failed when prefs.deploy.target is set. + */ +export async function buildRollbackPrompt( + mid, + midTitle, + deployRunId, + rollbackReason, + base, +) { + const prefs = loadEffectiveSFPreferences(); + const deploy = prefs?.preferences?.deploy ?? {}; + const resolveArtifact = async (key) => { + switch (key) { + case "project": + return await inlineProjectFromDb(base); + default: + return null; + } + }; + const composed = await composeInlinedContext("rollback", resolveArtifact); + const inlinedContext = capPreamble( + `## Inlined Context (preloaded — do not re-read these files)\n\n${composed}`, + ); + return loadPrompt("rollback", { + workingDirectory: base, + milestoneId: mid, + milestoneTitle: midTitle, + deployRunId, + deployTarget: deploy.target ?? "custom", + deployedUrl: deploy.url ?? "", + appName: deploy.app_name ?? mid, + rollbackReason, + inlinedContext, + skillActivation: buildSkillActivationBlock({ + base, + milestoneId: mid, + extraContext: [inlinedContext], + unitType: "rollback", + }), + }); +} + +/** + * Build the challenge (adversarial review) prompt for a milestone or slice. + * + * Purpose: activates the adversary agent to red-team, assumption-audit, or + * failure-inject against the named target; blocks the pipeline if critical + * findings require remediation. + * + * Consumer: auto-dispatch.js `challenge` unit type, dispatched by the + * adversary role in the swarm after validation passes. + */ +export async function buildChallengePrompt( + mid, + midTitle, + challengeTarget, + challengeMode, + base, +) { + const resolveArtifact = async (key) => { + switch (key) { + case "project": + return await inlineProjectFromDb(base); + default: + return null; + } + }; + const composed = await composeInlinedContext("challenge", resolveArtifact); + const inlinedContext = capPreamble( + `## Inlined Context (preloaded — do not re-read these files)\n\n${composed}`, + ); + return loadPrompt("challenge", { + workingDirectory: base, + milestoneId: mid, + milestoneTitle: midTitle, + challengeTarget: challengeTarget ?? "milestone", + challengeMode: challengeMode ?? "red-team", + inlinedContext, + skillActivation: buildSkillActivationBlock({ + base, + milestoneId: mid, + extraContext: [inlinedContext], + unitType: "challenge", + }), + }); +} diff --git a/src/resources/extensions/sf/preferences.js b/src/resources/extensions/sf/preferences.js index d649360f3..27ae7b5dd 100644 --- a/src/resources/extensions/sf/preferences.js +++ b/src/resources/extensions/sf/preferences.js @@ -602,6 +602,11 @@ function mergePreferences(base, override) { subscription: override.subscription ?? base.subscription, allow_flat_rate_providers: override.allow_flat_rate_providers ?? base.allow_flat_rate_providers, + // ── Production delivery ── + deploy: + base.deploy || override.deploy + ? { ...(base.deploy ?? {}), ...(override.deploy ?? {}) } + : undefined, }; } function mergeStringLists(base, override) { diff --git a/src/resources/extensions/sf/prompts/challenge.md b/src/resources/extensions/sf/prompts/challenge.md new file mode 100644 index 000000000..7f84d7934 --- /dev/null +++ b/src/resources/extensions/sf/prompts/challenge.md @@ -0,0 +1,73 @@ +You are executing SF autonomous mode. + +## UNIT: Challenge (Adversarial Review) — {{milestoneId}} + +## Working Directory + +Your working directory is `{{workingDirectory}}`. All file reads, writes, and shell commands MUST operate relative to this directory. + +{{inlinedContext}} + +{{skillActivation}} + +--- + +## Challenge Instructions + +**Milestone:** `{{milestoneId}}` — {{milestoneTitle}} +**Target:** `{{challengeTarget}}` (slice / component / architecture / security / assumptions) +**Challenge mode:** `{{challengeMode}}` (red-team / assumption-audit / failure-injection / adversarial-spec) + +You are the adversary agent. Your job is NOT to complete work — it is to rigorously attack the correctness, safety, and assumptions of what has been built or planned. You represent the failure modes the product team has not considered. + +### Challenge rules + +1. **Be ruthless but specific.** Every finding must include: what breaks, under what condition, and what the impact is. +2. **No false positives for diplomacy.** If something is fine, say so. Do not manufacture findings to seem thorough. +3. **Evidence-first.** For each finding, provide a concrete reproduction path: a command, a code path, an input, or a condition. +4. **Prioritise by blast radius.** Data loss, security, and correctness > performance > UX > style. + +### What to challenge + +Based on `{{challengeMode}}`: + +- **red-team** — attempt to break the system: inject invalid inputs, exceed rate limits, trigger edge cases, exploit assumptions in the implementation. Try to cause data loss, auth bypass, or incorrect output. +- **assumption-audit** — enumerate every assumption in the design docs, milestone spec, and slice plans. For each, state: what happens if the assumption is wrong? What is the evidence it is valid? +- **failure-injection** — simulate dependency failures: DB unavailable, API timeout, disk full, OOM, concurrent writes. Does the system degrade gracefully or crash? +- **adversarial-spec** — re-read the spec as an adversarial user. Find underspecified behaviour, ambiguous edge cases, missing error states, and spec contradictions. + +### Output format + +For each finding: + +``` +## Finding [N]: + +**Severity:** critical / high / medium / low +**Mode:** red-team / assumption / failure / spec +**Condition:** <what must be true for this to trigger> +**Impact:** <what breaks and how badly> +**Evidence:** <command, code path, or reproduction steps> +**Recommendation:** <minimal fix or acknowledgement that the risk is accepted> +``` + +After all findings, provide an **Overall Verdict**: +- `PASS` — no significant findings; the system is robust enough to proceed. +- `NEEDS-REMEDIATION` — one or more critical/high findings must be addressed before deploy. +- `ADVISORY` — findings are low severity; proceed with awareness. + +Call `sf_summary_save` with `milestone_id: {{milestoneId}}`, `artifact_type: "CHALLENGE"`, and the full challenge report as content. + +### Report sf-internal observations + +If you observe sf-the-tool friction during this unit, file via `sf_self_report`. + +When done, say: "Challenge {{milestoneId}} complete — verdict: <verdict>." + +--- + +**After completing this step, output exactly one of these markers:** + +- `<turn_status>complete</turn_status>` if verdict is PASS or ADVISORY +- `<turn_status>blocked</turn_status>` if verdict is NEEDS-REMEDIATION +- `<turn_status>giving_up</turn_status>` if the target is too underspecified to challenge meaningfully diff --git a/src/resources/extensions/sf/prompts/deploy.md b/src/resources/extensions/sf/prompts/deploy.md new file mode 100644 index 000000000..2d8257d82 --- /dev/null +++ b/src/resources/extensions/sf/prompts/deploy.md @@ -0,0 +1,63 @@ +You are executing SF autonomous mode. + +## UNIT: Deploy — {{milestoneId}} + +## Working Directory + +Your working directory is `{{workingDirectory}}`. All file reads, writes, and shell commands MUST operate relative to this directory. + +{{inlinedContext}} + +{{skillActivation}} + +--- + +## Deploy Instructions + +**Milestone:** `{{milestoneId}}` — {{milestoneTitle}} +**Deploy target:** `{{deployTarget}}` +**Deploy command:** `{{deployCommand}}` +**Expected live URL:** `{{deployedUrl}}` + +You are the deployment agent. Your job is to ship the completed, validated milestone to production. + +### Pre-flight checks (run before deploying) + +1. Verify the release record exists for this milestone (`release_records` in sf.db or `{{releaseVersion}}` tag in git). +2. Verify no un-merged worktree conflicts (`git status` clean). +3. Verify the deploy command exists and is executable. +4. If a `.env.deploy` or environment variable list is specified in `preferences.yaml` under `deploy.env_check`, verify those vars are set. + +### Deploy execution + +Run `{{deployCommand}}` and capture all stdout/stderr. + +Record the deploy run to the database: +- `INSERT INTO deploy_runs (id, milestone_id, target, command, status, exit_code, output, deployed_url, created_at, finished_at)` +- Use a UUID for `id`. +- Set `status = 'success'` if exit code is 0, `status = 'failed'` otherwise. + +### Post-deploy + +If the deploy succeeded: +- Write `deployed_url` to the deploy_runs row. +- Call `sf_summary_save` with `milestone_id: {{milestoneId}}`, `artifact_type: "DEPLOY"`, and a brief deploy report as content. + +If the deploy failed: +- Set `status = 'failed'` in deploy_runs. +- Call `sf_summary_save` with the failure output as content. +- Output `<turn_status>blocked</turn_status>` — do NOT attempt to fix the deploy failure inline; it requires a repair unit. + +### Report sf-internal observations + +If you observe sf-the-tool friction during this unit, file via `sf_self_report`. + +When done, say: "Deploy {{milestoneId}} complete." + +--- + +**After completing this step, output exactly one of these markers:** + +- `<turn_status>complete</turn_status>` if deploy succeeded +- `<turn_status>blocked</turn_status>` if deploy failed or pre-flight failed +- `<turn_status>giving_up</turn_status>` if the deploy environment is fundamentally broken diff --git a/src/resources/extensions/sf/prompts/release.md b/src/resources/extensions/sf/prompts/release.md new file mode 100644 index 000000000..85f9331cd --- /dev/null +++ b/src/resources/extensions/sf/prompts/release.md @@ -0,0 +1,61 @@ +You are executing SF autonomous mode. + +## UNIT: Release — {{milestoneId}} + +## Working Directory + +Your working directory is `{{workingDirectory}}`. All file reads, writes, and shell commands MUST operate relative to this directory. + +{{inlinedContext}} + +{{skillActivation}} + +--- + +## Release Instructions + +**Milestone:** `{{milestoneId}}` — {{milestoneTitle}} +**Current version:** `{{currentVersion}}` +**Release type:** `{{releaseType}}` (major / minor / patch) +**Publish channel:** `{{publishChannel}}` (npm / docker / github-release / none) + +You are the release agent. Your job is to version, tag, changelog, and optionally publish the completed milestone. + +### Steps + +1. **Determine new version** — apply `{{releaseType}}` bump to `{{currentVersion}}`. Use semver. If `package.json` exists, read the current version from it. + +2. **Update version files** — update `package.json` (and any `packages/*/package.json` if this is a monorepo) with the new version. Update `version.txt` or `VERSION` if present. + +3. **Generate changelog entry** — summarise the milestone's completed slices into a CHANGELOG.md entry under `## [{{newVersion}}] - {{today}}`. Use the slice SUMMARY files as source material. Be concise: one bullet per slice. + +4. **Commit the release** — `git add -A && git commit -m "chore(release): {{newVersion}}"`. + +5. **Tag** — `git tag -a v{{newVersion}} -m "Release {{newVersion}} — {{milestoneTitle}}"`. + +6. **Publish** (if `{{publishChannel}}` is not `none`): + - `npm`: run `npm publish --access public` (or `npm publish` for private). + - `docker`: run the `deploy.publish_command` from preferences. + - `github-release`: create a GitHub release via `gh release create v{{newVersion}} --title "{{newVersion}}" --notes "$(cat CHANGELOG.md | head -50)"`. + +7. **Record to DB** — INSERT into `release_records (id, milestone_id, version, prev_version, changelog_entry, git_tag, published, created_at)`. + +8. **Call `sf_summary_save`** with `milestone_id: {{milestoneId}}`, `artifact_type: "RELEASE"`, and the changelog entry as content. + +### On failure + +If publish fails (network error, auth error), set `published = 0` in release_records, commit and tag are still valid. Output `<turn_status>blocked</turn_status>` with a clear reason. + +### Report sf-internal observations + +If you observe sf-the-tool friction during this unit, file via `sf_self_report`. + +When done, say: "Release {{newVersion}} complete." + +--- + +**After completing this step, output exactly one of these markers:** + +- `<turn_status>complete</turn_status>` if release succeeded (publish optional) +- `<turn_status>blocked</turn_status>` if a required step failed +- `<turn_status>giving_up</turn_status>` if version state is corrupted and cannot be resolved diff --git a/src/resources/extensions/sf/prompts/rollback.md b/src/resources/extensions/sf/prompts/rollback.md new file mode 100644 index 000000000..41c070cdf --- /dev/null +++ b/src/resources/extensions/sf/prompts/rollback.md @@ -0,0 +1,61 @@ +You are executing SF autonomous mode. + +## UNIT: Rollback — {{milestoneId}} + +## Working Directory + +Your working directory is `{{workingDirectory}}`. All file reads, writes, and shell commands MUST operate relative to this directory. + +{{inlinedContext}} + +{{skillActivation}} + +--- + +## Rollback Instructions + +**Milestone:** `{{milestoneId}}` — {{milestoneTitle}} +**Failed deploy run:** `{{deployRunId}}` +**Deploy target:** `{{deployTarget}}` +**Rollback reason:** `{{rollbackReason}}` + +You are the rollback agent. A smoke test failed after deployment. Your job is to revert the production environment to the last known-good state. + +### Rollback steps + +1. **Identify the rollback command** from `preferences.yaml` under `deploy.rollback_command`. If not set, derive it: + - Fly.io: `fly releases rollback --app {{appName}}` + - Docker/Kubernetes: re-deploy the previous image tag + - npm: no rollback (record only) + - Vercel: `vercel rollback` + - Custom: run `deploy.rollback_command` + +2. **Execute the rollback** — run the command and capture output. + +3. **Verify rollback** — re-run the health check against `{{deployedUrl}}`. Confirm the previous version is live. + +4. **Record to DB** — INSERT into `rollback_runs (id, deploy_run_id, milestone_id, reason, status, output, created_at, finished_at)`. + - Set `status = 'success'` if the health check passes post-rollback. + - Set `status = 'failed'` if the environment is still broken. + +5. **Update the deploy run** — UPDATE `deploy_runs SET status = 'rolled-back' WHERE id = '{{deployRunId}}'`. + +6. **Call `sf_summary_save`** with `milestone_id: {{milestoneId}}`, `artifact_type: "ROLLBACK"`, and the rollback report as content. + +### After rollback + +Output `<turn_status>blocked</turn_status>` — the milestone requires a repair unit to address the smoke failure before re-attempting deploy. Do NOT attempt to fix the underlying bug inline. + +### Report sf-internal observations + +If you observe sf-the-tool friction during this unit, file via `sf_self_report`. + +When done, say: "Rollback {{milestoneId}} complete." + +--- + +**After completing this step, output exactly one of these markers:** + +- `<turn_status>complete</turn_status>` if rollback succeeded and environment is stable +- `<turn_status>blocked</turn_status>` if rollback failed or environment is still broken +- `<turn_status>giving_up</turn_status>` if the production environment is unrecoverable without human intervention diff --git a/src/resources/extensions/sf/prompts/smoke-production.md b/src/resources/extensions/sf/prompts/smoke-production.md new file mode 100644 index 000000000..fd955e180 --- /dev/null +++ b/src/resources/extensions/sf/prompts/smoke-production.md @@ -0,0 +1,67 @@ +You are executing SF autonomous mode. + +## UNIT: Smoke Test Production — {{milestoneId}} + +## Working Directory + +Your working directory is `{{workingDirectory}}`. All file reads, writes, and shell commands MUST operate relative to this directory. + +{{inlinedContext}} + +{{skillActivation}} + +--- + +## Smoke Test Instructions + +**Milestone:** `{{milestoneId}}` — {{milestoneTitle}} +**Live URL:** `{{deployedUrl}}` +**Deploy run ID:** `{{deployRunId}}` + +You are the production smoke tester. Your job is to verify the live deployment is healthy and correct — not to run unit tests, but to exercise the real running service. + +### Smoke checks to run + +For each check below, record: the check description, the tool/command used, the actual response observed, and PASS / FAIL. + +1. **Health endpoint** — `GET {{deployedUrl}}/health` (or `/`, or the configured `deploy.health_path`). Expect HTTP 200. +2. **Version check** — if `deploy.version_path` is set, `GET {{deployedUrl}}/{{versionPath}}` and verify the version matches `{{releaseVersion}}`. +3. **Critical path** — exercise the minimum viable user flow defined in `deploy.smoke_checks` from preferences, or the UAT ASSESSMENT for this milestone's first slice. +4. **Error rate** — if observability is configured (`deploy.metrics_url`), check error rate is < 1%. + +Use `curl`, browser tools, or the `bash` tool to execute checks. Capture raw HTTP responses as evidence. + +### Record results + +INSERT a row into `smoke_results`: +- `id`: UUID +- `deploy_run_id`: `{{deployRunId}}` +- `milestone_id`: `{{milestoneId}}` +- `url`: `{{deployedUrl}}` +- `status`: `'pass'` or `'fail'` +- `verdict`: `'PASS'` or `'FAIL'` +- `checks_json`: JSON array of `{ check, result, evidence }` objects +- `created_at` / `finished_at` + +Call `sf_summary_save` with `milestone_id: {{milestoneId}}`, `artifact_type: "SMOKE"`, and the full smoke result markdown. + +### On failure + +If any critical check fails: +- Set `verdict = 'FAIL'` in smoke_results. +- Output `<turn_status>blocked</turn_status>`. +- The autonomous loop will trigger a rollback unit next. + +### Report sf-internal observations + +If you observe sf-the-tool friction during this unit, file via `sf_self_report`. + +When done, say: "Smoke {{milestoneId}} complete — verdict: {{verdict}}." + +--- + +**After completing this step, output exactly one of these markers:** + +- `<turn_status>complete</turn_status>` if all smoke checks passed +- `<turn_status>blocked</turn_status>` if any critical check failed +- `<turn_status>giving_up</turn_status>` if the production environment is unreachable diff --git a/src/resources/extensions/sf/sf-db.js b/src/resources/extensions/sf/sf-db.js index 3ef625684..d47e51df5 100644 --- a/src/resources/extensions/sf/sf-db.js +++ b/src/resources/extensions/sf/sf-db.js @@ -577,6 +577,70 @@ function ensureUokMessageTables(db) { "CREATE INDEX IF NOT EXISTS idx_uok_messages_sent ON uok_messages(sent_at DESC)", ); } +function ensureDeployTables(db) { + db.exec(` + CREATE TABLE IF NOT EXISTS deploy_runs ( + id TEXT PRIMARY KEY, + milestone_id TEXT NOT NULL, + target TEXT NOT NULL, + command TEXT NOT NULL, + status TEXT NOT NULL DEFAULT 'pending', + exit_code INTEGER DEFAULT NULL, + output TEXT DEFAULT NULL, + deployed_url TEXT DEFAULT NULL, + created_at TEXT NOT NULL, + finished_at TEXT DEFAULT NULL + ) + `); + db.exec(` + CREATE TABLE IF NOT EXISTS smoke_results ( + id TEXT PRIMARY KEY, + deploy_run_id TEXT NOT NULL, + milestone_id TEXT NOT NULL, + url TEXT NOT NULL, + status TEXT NOT NULL DEFAULT 'pending', + verdict TEXT DEFAULT NULL, + checks_json TEXT NOT NULL DEFAULT '[]', + created_at TEXT NOT NULL, + finished_at TEXT DEFAULT NULL, + FOREIGN KEY (deploy_run_id) REFERENCES deploy_runs(id) ON DELETE CASCADE + ) + `); + db.exec(` + CREATE TABLE IF NOT EXISTS release_records ( + id TEXT PRIMARY KEY, + milestone_id TEXT NOT NULL, + version TEXT NOT NULL, + prev_version TEXT DEFAULT NULL, + changelog_entry TEXT DEFAULT NULL, + git_tag TEXT DEFAULT NULL, + published INTEGER NOT NULL DEFAULT 0, + created_at TEXT NOT NULL + ) + `); + db.exec(` + CREATE TABLE IF NOT EXISTS rollback_runs ( + id TEXT PRIMARY KEY, + deploy_run_id TEXT NOT NULL, + milestone_id TEXT NOT NULL, + reason TEXT NOT NULL, + status TEXT NOT NULL DEFAULT 'pending', + output TEXT DEFAULT NULL, + created_at TEXT NOT NULL, + finished_at TEXT DEFAULT NULL, + FOREIGN KEY (deploy_run_id) REFERENCES deploy_runs(id) ON DELETE CASCADE + ) + `); + db.exec( + "CREATE INDEX IF NOT EXISTS idx_deploy_runs_milestone ON deploy_runs(milestone_id, created_at DESC)", + ); + db.exec( + "CREATE INDEX IF NOT EXISTS idx_smoke_results_deploy ON smoke_results(deploy_run_id)", + ); + db.exec( + "CREATE INDEX IF NOT EXISTS idx_release_records_milestone ON release_records(milestone_id, created_at DESC)", + ); +} function ensureSleeptimeQueueTable(db) { db.exec(` CREATE TABLE IF NOT EXISTS sleeptime_consolidation_queue ( @@ -1307,6 +1371,7 @@ function initSchema(db, fileBacked) { ensureSessionTables(db); ensureSessionSnapshotTable(db); ensureUokMessageTables(db); + ensureDeployTables(db); ensureSleeptimeQueueTable(db); ensureSpecSchemaTables(db); ensureTaskFrontmatterColumns(db); @@ -2933,6 +2998,18 @@ function migrateSchema(db) { ":applied_at": new Date().toISOString(), }); } + if (currentVersion < 51) { + // Add deploy/smoke/release/rollback tables — closes the vision→production loop. + // deploy_runs tracks each deployment attempt; smoke_results tracks live verification; + // release_records tracks version bumps and publishes; rollback_runs tracks reversions. + ensureDeployTables(db); + db.prepare( + "INSERT INTO schema_version (version, applied_at) VALUES (:version, :applied_at)", + ).run({ + ":version": 51, + ":applied_at": new Date().toISOString(), + }); + } db.exec("COMMIT"); } catch (err) { db.exec("ROLLBACK"); diff --git a/src/resources/extensions/sf/tests/sf-db-migration.test.mjs b/src/resources/extensions/sf/tests/sf-db-migration.test.mjs index 1fd7ce610..19af0c72d 100644 --- a/src/resources/extensions/sf/tests/sf-db-migration.test.mjs +++ b/src/resources/extensions/sf/tests/sf-db-migration.test.mjs @@ -223,7 +223,7 @@ test("openDatabase_migrates_v27_tasks_without_created_at_through_spec_backfill", const version = db .prepare("SELECT MAX(version) AS version FROM schema_version") .get(); - assert.equal(version.version, 49); + assert.equal(version.version, 51); const taskSpec = db .prepare( "SELECT milestone_id, slice_id, task_id, verify FROM task_specs WHERE task_id = 'T01'",