feat(deploy): vision-to-production pipeline — deploy/smoke/release/rollback/challenge
- sf-db.js: ensureDeployTables() adds deploy_runs, smoke_results, release_records, rollback_runs (schema v51); migration block follows sleeptime v50 - preferences.js: deploy block merged (target, command, url, auto_release, release_type, publish_channel, adversarial_review) - auto-prompts.js: buildDeployPrompt, buildSmokeProductionPrompt, buildReleasePrompt, buildRollbackPrompt, buildChallengePrompt - auto-dispatch.js: 5 new rules — completing-milestone→challenge, completing-milestone→release, release-done→deploy, deploy-done→smoke-production, smoke-failed→rollback - prompts/: deploy.md, smoke-production.md, release.md, rollback.md, challenge.md - sf-db-migration test: bump expected schema version 49→51 The autonomous loop can now carry a milestone from complete-milestone all the way to a live, smoke-verified, tagged release. Each stage is gated by prefs (auto_release, deploy.target, deploy.url) so projects opt in per stage. Challenge (adversarial review) runs before release when adversarial_review is set. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
parent
d09c8282d0
commit
3b249c4144
10 changed files with 823 additions and 1 deletions
|
|
@ -17,6 +17,11 @@ import {
|
|||
buildDiscussMilestonePrompt,
|
||||
buildDiscussProjectPrompt,
|
||||
buildDiscussRequirementsPrompt,
|
||||
buildDeployPrompt,
|
||||
buildSmokeProductionPrompt,
|
||||
buildReleasePrompt,
|
||||
buildRollbackPrompt,
|
||||
buildChallengePrompt,
|
||||
buildExecuteTaskPrompt,
|
||||
buildGateEvaluatePrompt,
|
||||
buildParallelResearchSlicesPrompt,
|
||||
|
|
@ -1699,6 +1704,186 @@ export const DISPATCH_RULES = [
|
|||
};
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "completing-milestone → challenge",
|
||||
match: async ({ state, mid, midTitle, basePath, prefs }) => {
|
||||
if (state.phase !== "completing-milestone") return null;
|
||||
if (!prefs?.deploy?.adversarial_review) return null;
|
||||
// Only trigger if no challenge assessment exists for this milestone yet
|
||||
try {
|
||||
const { getDatabase } = await import("./sf-db.js");
|
||||
const db = getDatabase(basePath);
|
||||
const row = db
|
||||
.prepare(
|
||||
"SELECT id FROM assessments WHERE milestone_id = ? AND artifact_type = 'CHALLENGE' LIMIT 1",
|
||||
)
|
||||
.get(mid);
|
||||
if (row) return null;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
return {
|
||||
action: "dispatch",
|
||||
unitType: "challenge",
|
||||
unitId: `challenge-${mid}`,
|
||||
prompt: await buildChallengePrompt(
|
||||
mid,
|
||||
midTitle,
|
||||
"milestone",
|
||||
prefs?.deploy?.adversarial_mode ?? "red-team",
|
||||
basePath,
|
||||
),
|
||||
};
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "completing-milestone → release",
|
||||
match: async ({ state, mid, midTitle, basePath, prefs }) => {
|
||||
if (state.phase !== "completing-milestone") return null;
|
||||
if (!prefs?.deploy?.auto_release) return null;
|
||||
// Only if no release record exists for this milestone yet
|
||||
let hasRelease = false;
|
||||
try {
|
||||
const { getDatabase } = await import("./sf-db.js");
|
||||
const db = getDatabase(basePath);
|
||||
const row = db
|
||||
.prepare(
|
||||
"SELECT id FROM release_records WHERE milestone_id = ? LIMIT 1",
|
||||
)
|
||||
.get(mid);
|
||||
hasRelease = !!row;
|
||||
} catch {
|
||||
// DB unavailable — skip this rule
|
||||
return null;
|
||||
}
|
||||
if (hasRelease) return null;
|
||||
return {
|
||||
action: "dispatch",
|
||||
unitType: "release",
|
||||
unitId: `release-${mid}`,
|
||||
prompt: await buildReleasePrompt(mid, midTitle, basePath),
|
||||
};
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "release-done → deploy",
|
||||
match: async ({ state, mid, midTitle, basePath, prefs }) => {
|
||||
if (state.phase !== "completing-milestone") return null;
|
||||
if (!prefs?.deploy?.target || !prefs?.deploy?.command) return null;
|
||||
// Only trigger if a release record exists but no deploy run exists
|
||||
let hasRelease = false;
|
||||
let hasDeployRun = false;
|
||||
try {
|
||||
const { getDatabase } = await import("./sf-db.js");
|
||||
const db = getDatabase(basePath);
|
||||
const rr = db
|
||||
.prepare(
|
||||
"SELECT id FROM release_records WHERE milestone_id = ? LIMIT 1",
|
||||
)
|
||||
.get(mid);
|
||||
hasRelease = !!rr;
|
||||
const dr = db
|
||||
.prepare(
|
||||
"SELECT id FROM deploy_runs WHERE milestone_id = ? AND status != 'rolled-back' LIMIT 1",
|
||||
)
|
||||
.get(mid);
|
||||
hasDeployRun = !!dr;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
if (!hasRelease || hasDeployRun) return null;
|
||||
return {
|
||||
action: "dispatch",
|
||||
unitType: "deploy",
|
||||
unitId: `deploy-${mid}`,
|
||||
prompt: await buildDeployPrompt(mid, midTitle, basePath),
|
||||
};
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "deploy-done → smoke-production",
|
||||
match: async ({ state, mid, midTitle, basePath, prefs }) => {
|
||||
if (state.phase !== "completing-milestone") return null;
|
||||
if (!prefs?.deploy?.url) return null;
|
||||
let deployRunId = null;
|
||||
let hasSmokeResult = false;
|
||||
try {
|
||||
const { getDatabase } = await import("./sf-db.js");
|
||||
const db = getDatabase(basePath);
|
||||
const dr = db
|
||||
.prepare(
|
||||
"SELECT id FROM deploy_runs WHERE milestone_id = ? AND status = 'success' ORDER BY created_at DESC LIMIT 1",
|
||||
)
|
||||
.get(mid);
|
||||
if (!dr) return null;
|
||||
deployRunId = dr.id;
|
||||
const sr = db
|
||||
.prepare(
|
||||
"SELECT id FROM smoke_results WHERE deploy_run_id = ? LIMIT 1",
|
||||
)
|
||||
.get(deployRunId);
|
||||
hasSmokeResult = !!sr;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
if (!deployRunId || hasSmokeResult) return null;
|
||||
return {
|
||||
action: "dispatch",
|
||||
unitType: "smoke-production",
|
||||
unitId: `smoke-${mid}`,
|
||||
prompt: await buildSmokeProductionPrompt(
|
||||
mid,
|
||||
midTitle,
|
||||
deployRunId,
|
||||
basePath,
|
||||
),
|
||||
};
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "smoke-failed → rollback",
|
||||
match: async ({ state, mid, midTitle, basePath, prefs }) => {
|
||||
if (state.phase !== "completing-milestone") return null;
|
||||
if (!prefs?.deploy?.target) return null;
|
||||
let deployRunId = null;
|
||||
let failReason = "Smoke check failed";
|
||||
try {
|
||||
const { getDatabase } = await import("./sf-db.js");
|
||||
const db = getDatabase(basePath);
|
||||
const sr = db
|
||||
.prepare(
|
||||
"SELECT sr.deploy_run_id, sr.verdict FROM smoke_results sr " +
|
||||
"WHERE sr.milestone_id = ? AND sr.verdict = 'FAIL' " +
|
||||
"ORDER BY sr.created_at DESC LIMIT 1",
|
||||
)
|
||||
.get(mid);
|
||||
if (!sr) return null;
|
||||
deployRunId = sr.deploy_run_id;
|
||||
// Only trigger if there is no rollback run yet for this deploy
|
||||
const rr = db
|
||||
.prepare(
|
||||
"SELECT id FROM rollback_runs WHERE deploy_run_id = ? LIMIT 1",
|
||||
)
|
||||
.get(deployRunId);
|
||||
if (rr) return null;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
if (!deployRunId) return null;
|
||||
return {
|
||||
action: "dispatch",
|
||||
unitType: "rollback",
|
||||
unitId: `rollback-${mid}`,
|
||||
prompt: await buildRollbackPrompt(
|
||||
mid,
|
||||
midTitle,
|
||||
deployRunId,
|
||||
failReason,
|
||||
basePath,
|
||||
),
|
||||
};
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "complete → stop",
|
||||
match: async ({ state }) => {
|
||||
|
|
|
|||
|
|
@ -3021,3 +3021,233 @@ export async function buildRewriteDocsPrompt(
|
|||
overridesPath: relSfRootFile("OVERRIDES"),
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Build the deploy prompt for a completed milestone.
|
||||
*
|
||||
* Purpose: executes the project's configured deploy command, records the
|
||||
* outcome to deploy_runs in sf.db, and surfaces the deployed URL so the
|
||||
* smoke unit can verify it.
|
||||
*
|
||||
* Consumer: auto-dispatch.js `deploy` unit type, triggered after release-done
|
||||
* when prefs.deploy.target is set.
|
||||
*/
|
||||
export async function buildDeployPrompt(mid, midTitle, base) {
|
||||
const prefs = loadEffectiveSFPreferences();
|
||||
const deploy = prefs?.preferences?.deploy ?? {};
|
||||
const resolveArtifact = async (key) => {
|
||||
switch (key) {
|
||||
case "project":
|
||||
return await inlineProjectFromDb(base);
|
||||
default:
|
||||
return null;
|
||||
}
|
||||
};
|
||||
const composed = await composeInlinedContext("deploy", resolveArtifact);
|
||||
const inlinedContext = capPreamble(
|
||||
`## Inlined Context (preloaded — do not re-read these files)\n\n${composed}`,
|
||||
);
|
||||
return loadPrompt("deploy", {
|
||||
workingDirectory: base,
|
||||
milestoneId: mid,
|
||||
milestoneTitle: midTitle,
|
||||
deployTarget: deploy.target ?? "custom",
|
||||
deployCommand: deploy.command ?? "echo 'No deploy command configured'",
|
||||
deployedUrl: deploy.url ?? "",
|
||||
inlinedContext,
|
||||
skillActivation: buildSkillActivationBlock({
|
||||
base,
|
||||
milestoneId: mid,
|
||||
extraContext: [inlinedContext],
|
||||
unitType: "deploy",
|
||||
}),
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Build the smoke-production prompt for a deployed milestone.
|
||||
*
|
||||
* Purpose: exercises the live production URL after deploy to verify the
|
||||
* deployment is correct; writes smoke_results to sf.db; blocks on failure
|
||||
* so rollback can run.
|
||||
*
|
||||
* Consumer: auto-dispatch.js `smoke-production` unit type, triggered after
|
||||
* deploy-done when prefs.deploy.url is set.
|
||||
*/
|
||||
export async function buildSmokeProductionPrompt(
|
||||
mid,
|
||||
midTitle,
|
||||
deployRunId,
|
||||
base,
|
||||
) {
|
||||
const prefs = loadEffectiveSFPreferences();
|
||||
const deploy = prefs?.preferences?.deploy ?? {};
|
||||
const resolveArtifact = async (key) => {
|
||||
switch (key) {
|
||||
case "project":
|
||||
return await inlineProjectFromDb(base);
|
||||
default:
|
||||
return null;
|
||||
}
|
||||
};
|
||||
const composed = await composeInlinedContext(
|
||||
"smoke-production",
|
||||
resolveArtifact,
|
||||
);
|
||||
const inlinedContext = capPreamble(
|
||||
`## Inlined Context (preloaded — do not re-read these files)\n\n${composed}`,
|
||||
);
|
||||
return loadPrompt("smoke-production", {
|
||||
workingDirectory: base,
|
||||
milestoneId: mid,
|
||||
milestoneTitle: midTitle,
|
||||
deployRunId,
|
||||
deployedUrl: deploy.url ?? "",
|
||||
versionPath: deploy.version_path ?? "",
|
||||
releaseVersion: "",
|
||||
verdict: "PENDING",
|
||||
inlinedContext,
|
||||
skillActivation: buildSkillActivationBlock({
|
||||
base,
|
||||
milestoneId: mid,
|
||||
extraContext: [inlinedContext],
|
||||
unitType: "smoke-production",
|
||||
}),
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Build the release prompt for a completed, validated milestone.
|
||||
*
|
||||
* Purpose: bumps semver, writes CHANGELOG, commits, tags, and optionally
|
||||
* publishes (npm/docker/github-release); records to release_records in sf.db.
|
||||
*
|
||||
* Consumer: auto-dispatch.js `release` unit type, triggered after
|
||||
* completing-milestone when prefs.deploy.auto_release is truthy.
|
||||
*/
|
||||
export async function buildReleasePrompt(mid, midTitle, base) {
|
||||
const prefs = loadEffectiveSFPreferences();
|
||||
const deploy = prefs?.preferences?.deploy ?? {};
|
||||
const resolveArtifact = async (key) => {
|
||||
switch (key) {
|
||||
case "project":
|
||||
return await inlineProjectFromDb(base);
|
||||
default:
|
||||
return null;
|
||||
}
|
||||
};
|
||||
const composed = await composeInlinedContext("release", resolveArtifact);
|
||||
const inlinedContext = capPreamble(
|
||||
`## Inlined Context (preloaded — do not re-read these files)\n\n${composed}`,
|
||||
);
|
||||
return loadPrompt("release", {
|
||||
workingDirectory: base,
|
||||
milestoneId: mid,
|
||||
milestoneTitle: midTitle,
|
||||
currentVersion: deploy.current_version ?? "0.0.0",
|
||||
releaseType: deploy.release_type ?? "patch",
|
||||
publishChannel: deploy.publish_channel ?? "none",
|
||||
today: new Date().toISOString().slice(0, 10),
|
||||
inlinedContext,
|
||||
skillActivation: buildSkillActivationBlock({
|
||||
base,
|
||||
milestoneId: mid,
|
||||
extraContext: [inlinedContext],
|
||||
unitType: "release",
|
||||
}),
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Build the rollback prompt for a failed smoke-production check.
|
||||
*
|
||||
* Purpose: reverts the failed deployment to the last known-good state;
|
||||
* records to rollback_runs and updates the deploy_runs status in sf.db.
|
||||
*
|
||||
* Consumer: auto-dispatch.js `rollback` unit type, triggered after
|
||||
* smoke-failed when prefs.deploy.target is set.
|
||||
*/
|
||||
export async function buildRollbackPrompt(
|
||||
mid,
|
||||
midTitle,
|
||||
deployRunId,
|
||||
rollbackReason,
|
||||
base,
|
||||
) {
|
||||
const prefs = loadEffectiveSFPreferences();
|
||||
const deploy = prefs?.preferences?.deploy ?? {};
|
||||
const resolveArtifact = async (key) => {
|
||||
switch (key) {
|
||||
case "project":
|
||||
return await inlineProjectFromDb(base);
|
||||
default:
|
||||
return null;
|
||||
}
|
||||
};
|
||||
const composed = await composeInlinedContext("rollback", resolveArtifact);
|
||||
const inlinedContext = capPreamble(
|
||||
`## Inlined Context (preloaded — do not re-read these files)\n\n${composed}`,
|
||||
);
|
||||
return loadPrompt("rollback", {
|
||||
workingDirectory: base,
|
||||
milestoneId: mid,
|
||||
milestoneTitle: midTitle,
|
||||
deployRunId,
|
||||
deployTarget: deploy.target ?? "custom",
|
||||
deployedUrl: deploy.url ?? "",
|
||||
appName: deploy.app_name ?? mid,
|
||||
rollbackReason,
|
||||
inlinedContext,
|
||||
skillActivation: buildSkillActivationBlock({
|
||||
base,
|
||||
milestoneId: mid,
|
||||
extraContext: [inlinedContext],
|
||||
unitType: "rollback",
|
||||
}),
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Build the challenge (adversarial review) prompt for a milestone or slice.
|
||||
*
|
||||
* Purpose: activates the adversary agent to red-team, assumption-audit, or
|
||||
* failure-inject against the named target; blocks the pipeline if critical
|
||||
* findings require remediation.
|
||||
*
|
||||
* Consumer: auto-dispatch.js `challenge` unit type, dispatched by the
|
||||
* adversary role in the swarm after validation passes.
|
||||
*/
|
||||
export async function buildChallengePrompt(
|
||||
mid,
|
||||
midTitle,
|
||||
challengeTarget,
|
||||
challengeMode,
|
||||
base,
|
||||
) {
|
||||
const resolveArtifact = async (key) => {
|
||||
switch (key) {
|
||||
case "project":
|
||||
return await inlineProjectFromDb(base);
|
||||
default:
|
||||
return null;
|
||||
}
|
||||
};
|
||||
const composed = await composeInlinedContext("challenge", resolveArtifact);
|
||||
const inlinedContext = capPreamble(
|
||||
`## Inlined Context (preloaded — do not re-read these files)\n\n${composed}`,
|
||||
);
|
||||
return loadPrompt("challenge", {
|
||||
workingDirectory: base,
|
||||
milestoneId: mid,
|
||||
milestoneTitle: midTitle,
|
||||
challengeTarget: challengeTarget ?? "milestone",
|
||||
challengeMode: challengeMode ?? "red-team",
|
||||
inlinedContext,
|
||||
skillActivation: buildSkillActivationBlock({
|
||||
base,
|
||||
milestoneId: mid,
|
||||
extraContext: [inlinedContext],
|
||||
unitType: "challenge",
|
||||
}),
|
||||
});
|
||||
}
|
||||
|
|
|
|||
|
|
@ -602,6 +602,11 @@ function mergePreferences(base, override) {
|
|||
subscription: override.subscription ?? base.subscription,
|
||||
allow_flat_rate_providers:
|
||||
override.allow_flat_rate_providers ?? base.allow_flat_rate_providers,
|
||||
// ── Production delivery ──
|
||||
deploy:
|
||||
base.deploy || override.deploy
|
||||
? { ...(base.deploy ?? {}), ...(override.deploy ?? {}) }
|
||||
: undefined,
|
||||
};
|
||||
}
|
||||
function mergeStringLists(base, override) {
|
||||
|
|
|
|||
73
src/resources/extensions/sf/prompts/challenge.md
Normal file
73
src/resources/extensions/sf/prompts/challenge.md
Normal file
|
|
@ -0,0 +1,73 @@
|
|||
You are executing SF autonomous mode.
|
||||
|
||||
## UNIT: Challenge (Adversarial Review) — {{milestoneId}}
|
||||
|
||||
## Working Directory
|
||||
|
||||
Your working directory is `{{workingDirectory}}`. All file reads, writes, and shell commands MUST operate relative to this directory.
|
||||
|
||||
{{inlinedContext}}
|
||||
|
||||
{{skillActivation}}
|
||||
|
||||
---
|
||||
|
||||
## Challenge Instructions
|
||||
|
||||
**Milestone:** `{{milestoneId}}` — {{milestoneTitle}}
|
||||
**Target:** `{{challengeTarget}}` (slice / component / architecture / security / assumptions)
|
||||
**Challenge mode:** `{{challengeMode}}` (red-team / assumption-audit / failure-injection / adversarial-spec)
|
||||
|
||||
You are the adversary agent. Your job is NOT to complete work — it is to rigorously attack the correctness, safety, and assumptions of what has been built or planned. You represent the failure modes the product team has not considered.
|
||||
|
||||
### Challenge rules
|
||||
|
||||
1. **Be ruthless but specific.** Every finding must include: what breaks, under what condition, and what the impact is.
|
||||
2. **No false positives for diplomacy.** If something is fine, say so. Do not manufacture findings to seem thorough.
|
||||
3. **Evidence-first.** For each finding, provide a concrete reproduction path: a command, a code path, an input, or a condition.
|
||||
4. **Prioritise by blast radius.** Data loss, security, and correctness > performance > UX > style.
|
||||
|
||||
### What to challenge
|
||||
|
||||
Based on `{{challengeMode}}`:
|
||||
|
||||
- **red-team** — attempt to break the system: inject invalid inputs, exceed rate limits, trigger edge cases, exploit assumptions in the implementation. Try to cause data loss, auth bypass, or incorrect output.
|
||||
- **assumption-audit** — enumerate every assumption in the design docs, milestone spec, and slice plans. For each, state: what happens if the assumption is wrong? What is the evidence it is valid?
|
||||
- **failure-injection** — simulate dependency failures: DB unavailable, API timeout, disk full, OOM, concurrent writes. Does the system degrade gracefully or crash?
|
||||
- **adversarial-spec** — re-read the spec as an adversarial user. Find underspecified behaviour, ambiguous edge cases, missing error states, and spec contradictions.
|
||||
|
||||
### Output format
|
||||
|
||||
For each finding:
|
||||
|
||||
```
|
||||
## Finding [N]: <title>
|
||||
|
||||
**Severity:** critical / high / medium / low
|
||||
**Mode:** red-team / assumption / failure / spec
|
||||
**Condition:** <what must be true for this to trigger>
|
||||
**Impact:** <what breaks and how badly>
|
||||
**Evidence:** <command, code path, or reproduction steps>
|
||||
**Recommendation:** <minimal fix or acknowledgement that the risk is accepted>
|
||||
```
|
||||
|
||||
After all findings, provide an **Overall Verdict**:
|
||||
- `PASS` — no significant findings; the system is robust enough to proceed.
|
||||
- `NEEDS-REMEDIATION` — one or more critical/high findings must be addressed before deploy.
|
||||
- `ADVISORY` — findings are low severity; proceed with awareness.
|
||||
|
||||
Call `sf_summary_save` with `milestone_id: {{milestoneId}}`, `artifact_type: "CHALLENGE"`, and the full challenge report as content.
|
||||
|
||||
### Report sf-internal observations
|
||||
|
||||
If you observe sf-the-tool friction during this unit, file via `sf_self_report`.
|
||||
|
||||
When done, say: "Challenge {{milestoneId}} complete — verdict: <verdict>."
|
||||
|
||||
---
|
||||
|
||||
**After completing this step, output exactly one of these markers:**
|
||||
|
||||
- `<turn_status>complete</turn_status>` if verdict is PASS or ADVISORY
|
||||
- `<turn_status>blocked</turn_status>` if verdict is NEEDS-REMEDIATION
|
||||
- `<turn_status>giving_up</turn_status>` if the target is too underspecified to challenge meaningfully
|
||||
63
src/resources/extensions/sf/prompts/deploy.md
Normal file
63
src/resources/extensions/sf/prompts/deploy.md
Normal file
|
|
@ -0,0 +1,63 @@
|
|||
You are executing SF autonomous mode.
|
||||
|
||||
## UNIT: Deploy — {{milestoneId}}
|
||||
|
||||
## Working Directory
|
||||
|
||||
Your working directory is `{{workingDirectory}}`. All file reads, writes, and shell commands MUST operate relative to this directory.
|
||||
|
||||
{{inlinedContext}}
|
||||
|
||||
{{skillActivation}}
|
||||
|
||||
---
|
||||
|
||||
## Deploy Instructions
|
||||
|
||||
**Milestone:** `{{milestoneId}}` — {{milestoneTitle}}
|
||||
**Deploy target:** `{{deployTarget}}`
|
||||
**Deploy command:** `{{deployCommand}}`
|
||||
**Expected live URL:** `{{deployedUrl}}`
|
||||
|
||||
You are the deployment agent. Your job is to ship the completed, validated milestone to production.
|
||||
|
||||
### Pre-flight checks (run before deploying)
|
||||
|
||||
1. Verify the release record exists for this milestone (`release_records` in sf.db or `{{releaseVersion}}` tag in git).
|
||||
2. Verify no un-merged worktree conflicts (`git status` clean).
|
||||
3. Verify the deploy command exists and is executable.
|
||||
4. If a `.env.deploy` or environment variable list is specified in `preferences.yaml` under `deploy.env_check`, verify those vars are set.
|
||||
|
||||
### Deploy execution
|
||||
|
||||
Run `{{deployCommand}}` and capture all stdout/stderr.
|
||||
|
||||
Record the deploy run to the database:
|
||||
- `INSERT INTO deploy_runs (id, milestone_id, target, command, status, exit_code, output, deployed_url, created_at, finished_at)`
|
||||
- Use a UUID for `id`.
|
||||
- Set `status = 'success'` if exit code is 0, `status = 'failed'` otherwise.
|
||||
|
||||
### Post-deploy
|
||||
|
||||
If the deploy succeeded:
|
||||
- Write `deployed_url` to the deploy_runs row.
|
||||
- Call `sf_summary_save` with `milestone_id: {{milestoneId}}`, `artifact_type: "DEPLOY"`, and a brief deploy report as content.
|
||||
|
||||
If the deploy failed:
|
||||
- Set `status = 'failed'` in deploy_runs.
|
||||
- Call `sf_summary_save` with the failure output as content.
|
||||
- Output `<turn_status>blocked</turn_status>` — do NOT attempt to fix the deploy failure inline; it requires a repair unit.
|
||||
|
||||
### Report sf-internal observations
|
||||
|
||||
If you observe sf-the-tool friction during this unit, file via `sf_self_report`.
|
||||
|
||||
When done, say: "Deploy {{milestoneId}} complete."
|
||||
|
||||
---
|
||||
|
||||
**After completing this step, output exactly one of these markers:**
|
||||
|
||||
- `<turn_status>complete</turn_status>` if deploy succeeded
|
||||
- `<turn_status>blocked</turn_status>` if deploy failed or pre-flight failed
|
||||
- `<turn_status>giving_up</turn_status>` if the deploy environment is fundamentally broken
|
||||
61
src/resources/extensions/sf/prompts/release.md
Normal file
61
src/resources/extensions/sf/prompts/release.md
Normal file
|
|
@ -0,0 +1,61 @@
|
|||
You are executing SF autonomous mode.
|
||||
|
||||
## UNIT: Release — {{milestoneId}}
|
||||
|
||||
## Working Directory
|
||||
|
||||
Your working directory is `{{workingDirectory}}`. All file reads, writes, and shell commands MUST operate relative to this directory.
|
||||
|
||||
{{inlinedContext}}
|
||||
|
||||
{{skillActivation}}
|
||||
|
||||
---
|
||||
|
||||
## Release Instructions
|
||||
|
||||
**Milestone:** `{{milestoneId}}` — {{milestoneTitle}}
|
||||
**Current version:** `{{currentVersion}}`
|
||||
**Release type:** `{{releaseType}}` (major / minor / patch)
|
||||
**Publish channel:** `{{publishChannel}}` (npm / docker / github-release / none)
|
||||
|
||||
You are the release agent. Your job is to version, tag, changelog, and optionally publish the completed milestone.
|
||||
|
||||
### Steps
|
||||
|
||||
1. **Determine new version** — apply `{{releaseType}}` bump to `{{currentVersion}}`. Use semver. If `package.json` exists, read the current version from it.
|
||||
|
||||
2. **Update version files** — update `package.json` (and any `packages/*/package.json` if this is a monorepo) with the new version. Update `version.txt` or `VERSION` if present.
|
||||
|
||||
3. **Generate changelog entry** — summarise the milestone's completed slices into a CHANGELOG.md entry under `## [{{newVersion}}] - {{today}}`. Use the slice SUMMARY files as source material. Be concise: one bullet per slice.
|
||||
|
||||
4. **Commit the release** — `git add -A && git commit -m "chore(release): {{newVersion}}"`.
|
||||
|
||||
5. **Tag** — `git tag -a v{{newVersion}} -m "Release {{newVersion}} — {{milestoneTitle}}"`.
|
||||
|
||||
6. **Publish** (if `{{publishChannel}}` is not `none`):
|
||||
- `npm`: run `npm publish --access public` (or `npm publish` for private).
|
||||
- `docker`: run the `deploy.publish_command` from preferences.
|
||||
- `github-release`: create a GitHub release via `gh release create v{{newVersion}} --title "{{newVersion}}" --notes "$(cat CHANGELOG.md | head -50)"`.
|
||||
|
||||
7. **Record to DB** — INSERT into `release_records (id, milestone_id, version, prev_version, changelog_entry, git_tag, published, created_at)`.
|
||||
|
||||
8. **Call `sf_summary_save`** with `milestone_id: {{milestoneId}}`, `artifact_type: "RELEASE"`, and the changelog entry as content.
|
||||
|
||||
### On failure
|
||||
|
||||
If publish fails (network error, auth error), set `published = 0` in release_records, commit and tag are still valid. Output `<turn_status>blocked</turn_status>` with a clear reason.
|
||||
|
||||
### Report sf-internal observations
|
||||
|
||||
If you observe sf-the-tool friction during this unit, file via `sf_self_report`.
|
||||
|
||||
When done, say: "Release {{newVersion}} complete."
|
||||
|
||||
---
|
||||
|
||||
**After completing this step, output exactly one of these markers:**
|
||||
|
||||
- `<turn_status>complete</turn_status>` if release succeeded (publish optional)
|
||||
- `<turn_status>blocked</turn_status>` if a required step failed
|
||||
- `<turn_status>giving_up</turn_status>` if version state is corrupted and cannot be resolved
|
||||
61
src/resources/extensions/sf/prompts/rollback.md
Normal file
61
src/resources/extensions/sf/prompts/rollback.md
Normal file
|
|
@ -0,0 +1,61 @@
|
|||
You are executing SF autonomous mode.
|
||||
|
||||
## UNIT: Rollback — {{milestoneId}}
|
||||
|
||||
## Working Directory
|
||||
|
||||
Your working directory is `{{workingDirectory}}`. All file reads, writes, and shell commands MUST operate relative to this directory.
|
||||
|
||||
{{inlinedContext}}
|
||||
|
||||
{{skillActivation}}
|
||||
|
||||
---
|
||||
|
||||
## Rollback Instructions
|
||||
|
||||
**Milestone:** `{{milestoneId}}` — {{milestoneTitle}}
|
||||
**Failed deploy run:** `{{deployRunId}}`
|
||||
**Deploy target:** `{{deployTarget}}`
|
||||
**Rollback reason:** `{{rollbackReason}}`
|
||||
|
||||
You are the rollback agent. A smoke test failed after deployment. Your job is to revert the production environment to the last known-good state.
|
||||
|
||||
### Rollback steps
|
||||
|
||||
1. **Identify the rollback command** from `preferences.yaml` under `deploy.rollback_command`. If not set, derive it:
|
||||
- Fly.io: `fly releases rollback --app {{appName}}`
|
||||
- Docker/Kubernetes: re-deploy the previous image tag
|
||||
- npm: no rollback (record only)
|
||||
- Vercel: `vercel rollback`
|
||||
- Custom: run `deploy.rollback_command`
|
||||
|
||||
2. **Execute the rollback** — run the command and capture output.
|
||||
|
||||
3. **Verify rollback** — re-run the health check against `{{deployedUrl}}`. Confirm the previous version is live.
|
||||
|
||||
4. **Record to DB** — INSERT into `rollback_runs (id, deploy_run_id, milestone_id, reason, status, output, created_at, finished_at)`.
|
||||
- Set `status = 'success'` if the health check passes post-rollback.
|
||||
- Set `status = 'failed'` if the environment is still broken.
|
||||
|
||||
5. **Update the deploy run** — UPDATE `deploy_runs SET status = 'rolled-back' WHERE id = '{{deployRunId}}'`.
|
||||
|
||||
6. **Call `sf_summary_save`** with `milestone_id: {{milestoneId}}`, `artifact_type: "ROLLBACK"`, and the rollback report as content.
|
||||
|
||||
### After rollback
|
||||
|
||||
Output `<turn_status>blocked</turn_status>` — the milestone requires a repair unit to address the smoke failure before re-attempting deploy. Do NOT attempt to fix the underlying bug inline.
|
||||
|
||||
### Report sf-internal observations
|
||||
|
||||
If you observe sf-the-tool friction during this unit, file via `sf_self_report`.
|
||||
|
||||
When done, say: "Rollback {{milestoneId}} complete."
|
||||
|
||||
---
|
||||
|
||||
**After completing this step, output exactly one of these markers:**
|
||||
|
||||
- `<turn_status>complete</turn_status>` if rollback succeeded and environment is stable
|
||||
- `<turn_status>blocked</turn_status>` if rollback failed or environment is still broken
|
||||
- `<turn_status>giving_up</turn_status>` if the production environment is unrecoverable without human intervention
|
||||
67
src/resources/extensions/sf/prompts/smoke-production.md
Normal file
67
src/resources/extensions/sf/prompts/smoke-production.md
Normal file
|
|
@ -0,0 +1,67 @@
|
|||
You are executing SF autonomous mode.
|
||||
|
||||
## UNIT: Smoke Test Production — {{milestoneId}}
|
||||
|
||||
## Working Directory
|
||||
|
||||
Your working directory is `{{workingDirectory}}`. All file reads, writes, and shell commands MUST operate relative to this directory.
|
||||
|
||||
{{inlinedContext}}
|
||||
|
||||
{{skillActivation}}
|
||||
|
||||
---
|
||||
|
||||
## Smoke Test Instructions
|
||||
|
||||
**Milestone:** `{{milestoneId}}` — {{milestoneTitle}}
|
||||
**Live URL:** `{{deployedUrl}}`
|
||||
**Deploy run ID:** `{{deployRunId}}`
|
||||
|
||||
You are the production smoke tester. Your job is to verify the live deployment is healthy and correct — not to run unit tests, but to exercise the real running service.
|
||||
|
||||
### Smoke checks to run
|
||||
|
||||
For each check below, record: the check description, the tool/command used, the actual response observed, and PASS / FAIL.
|
||||
|
||||
1. **Health endpoint** — `GET {{deployedUrl}}/health` (or `/`, or the configured `deploy.health_path`). Expect HTTP 200.
|
||||
2. **Version check** — if `deploy.version_path` is set, `GET {{deployedUrl}}/{{versionPath}}` and verify the version matches `{{releaseVersion}}`.
|
||||
3. **Critical path** — exercise the minimum viable user flow defined in `deploy.smoke_checks` from preferences, or the UAT ASSESSMENT for this milestone's first slice.
|
||||
4. **Error rate** — if observability is configured (`deploy.metrics_url`), check error rate is < 1%.
|
||||
|
||||
Use `curl`, browser tools, or the `bash` tool to execute checks. Capture raw HTTP responses as evidence.
|
||||
|
||||
### Record results
|
||||
|
||||
INSERT a row into `smoke_results`:
|
||||
- `id`: UUID
|
||||
- `deploy_run_id`: `{{deployRunId}}`
|
||||
- `milestone_id`: `{{milestoneId}}`
|
||||
- `url`: `{{deployedUrl}}`
|
||||
- `status`: `'pass'` or `'fail'`
|
||||
- `verdict`: `'PASS'` or `'FAIL'`
|
||||
- `checks_json`: JSON array of `{ check, result, evidence }` objects
|
||||
- `created_at` / `finished_at`
|
||||
|
||||
Call `sf_summary_save` with `milestone_id: {{milestoneId}}`, `artifact_type: "SMOKE"`, and the full smoke result markdown.
|
||||
|
||||
### On failure
|
||||
|
||||
If any critical check fails:
|
||||
- Set `verdict = 'FAIL'` in smoke_results.
|
||||
- Output `<turn_status>blocked</turn_status>`.
|
||||
- The autonomous loop will trigger a rollback unit next.
|
||||
|
||||
### Report sf-internal observations
|
||||
|
||||
If you observe sf-the-tool friction during this unit, file via `sf_self_report`.
|
||||
|
||||
When done, say: "Smoke {{milestoneId}} complete — verdict: {{verdict}}."
|
||||
|
||||
---
|
||||
|
||||
**After completing this step, output exactly one of these markers:**
|
||||
|
||||
- `<turn_status>complete</turn_status>` if all smoke checks passed
|
||||
- `<turn_status>blocked</turn_status>` if any critical check failed
|
||||
- `<turn_status>giving_up</turn_status>` if the production environment is unreachable
|
||||
|
|
@ -577,6 +577,70 @@ function ensureUokMessageTables(db) {
|
|||
"CREATE INDEX IF NOT EXISTS idx_uok_messages_sent ON uok_messages(sent_at DESC)",
|
||||
);
|
||||
}
|
||||
function ensureDeployTables(db) {
|
||||
db.exec(`
|
||||
CREATE TABLE IF NOT EXISTS deploy_runs (
|
||||
id TEXT PRIMARY KEY,
|
||||
milestone_id TEXT NOT NULL,
|
||||
target TEXT NOT NULL,
|
||||
command TEXT NOT NULL,
|
||||
status TEXT NOT NULL DEFAULT 'pending',
|
||||
exit_code INTEGER DEFAULT NULL,
|
||||
output TEXT DEFAULT NULL,
|
||||
deployed_url TEXT DEFAULT NULL,
|
||||
created_at TEXT NOT NULL,
|
||||
finished_at TEXT DEFAULT NULL
|
||||
)
|
||||
`);
|
||||
db.exec(`
|
||||
CREATE TABLE IF NOT EXISTS smoke_results (
|
||||
id TEXT PRIMARY KEY,
|
||||
deploy_run_id TEXT NOT NULL,
|
||||
milestone_id TEXT NOT NULL,
|
||||
url TEXT NOT NULL,
|
||||
status TEXT NOT NULL DEFAULT 'pending',
|
||||
verdict TEXT DEFAULT NULL,
|
||||
checks_json TEXT NOT NULL DEFAULT '[]',
|
||||
created_at TEXT NOT NULL,
|
||||
finished_at TEXT DEFAULT NULL,
|
||||
FOREIGN KEY (deploy_run_id) REFERENCES deploy_runs(id) ON DELETE CASCADE
|
||||
)
|
||||
`);
|
||||
db.exec(`
|
||||
CREATE TABLE IF NOT EXISTS release_records (
|
||||
id TEXT PRIMARY KEY,
|
||||
milestone_id TEXT NOT NULL,
|
||||
version TEXT NOT NULL,
|
||||
prev_version TEXT DEFAULT NULL,
|
||||
changelog_entry TEXT DEFAULT NULL,
|
||||
git_tag TEXT DEFAULT NULL,
|
||||
published INTEGER NOT NULL DEFAULT 0,
|
||||
created_at TEXT NOT NULL
|
||||
)
|
||||
`);
|
||||
db.exec(`
|
||||
CREATE TABLE IF NOT EXISTS rollback_runs (
|
||||
id TEXT PRIMARY KEY,
|
||||
deploy_run_id TEXT NOT NULL,
|
||||
milestone_id TEXT NOT NULL,
|
||||
reason TEXT NOT NULL,
|
||||
status TEXT NOT NULL DEFAULT 'pending',
|
||||
output TEXT DEFAULT NULL,
|
||||
created_at TEXT NOT NULL,
|
||||
finished_at TEXT DEFAULT NULL,
|
||||
FOREIGN KEY (deploy_run_id) REFERENCES deploy_runs(id) ON DELETE CASCADE
|
||||
)
|
||||
`);
|
||||
db.exec(
|
||||
"CREATE INDEX IF NOT EXISTS idx_deploy_runs_milestone ON deploy_runs(milestone_id, created_at DESC)",
|
||||
);
|
||||
db.exec(
|
||||
"CREATE INDEX IF NOT EXISTS idx_smoke_results_deploy ON smoke_results(deploy_run_id)",
|
||||
);
|
||||
db.exec(
|
||||
"CREATE INDEX IF NOT EXISTS idx_release_records_milestone ON release_records(milestone_id, created_at DESC)",
|
||||
);
|
||||
}
|
||||
function ensureSleeptimeQueueTable(db) {
|
||||
db.exec(`
|
||||
CREATE TABLE IF NOT EXISTS sleeptime_consolidation_queue (
|
||||
|
|
@ -1307,6 +1371,7 @@ function initSchema(db, fileBacked) {
|
|||
ensureSessionTables(db);
|
||||
ensureSessionSnapshotTable(db);
|
||||
ensureUokMessageTables(db);
|
||||
ensureDeployTables(db);
|
||||
ensureSleeptimeQueueTable(db);
|
||||
ensureSpecSchemaTables(db);
|
||||
ensureTaskFrontmatterColumns(db);
|
||||
|
|
@ -2933,6 +2998,18 @@ function migrateSchema(db) {
|
|||
":applied_at": new Date().toISOString(),
|
||||
});
|
||||
}
|
||||
if (currentVersion < 51) {
|
||||
// Add deploy/smoke/release/rollback tables — closes the vision→production loop.
|
||||
// deploy_runs tracks each deployment attempt; smoke_results tracks live verification;
|
||||
// release_records tracks version bumps and publishes; rollback_runs tracks reversions.
|
||||
ensureDeployTables(db);
|
||||
db.prepare(
|
||||
"INSERT INTO schema_version (version, applied_at) VALUES (:version, :applied_at)",
|
||||
).run({
|
||||
":version": 51,
|
||||
":applied_at": new Date().toISOString(),
|
||||
});
|
||||
}
|
||||
db.exec("COMMIT");
|
||||
} catch (err) {
|
||||
db.exec("ROLLBACK");
|
||||
|
|
|
|||
|
|
@ -223,7 +223,7 @@ test("openDatabase_migrates_v27_tasks_without_created_at_through_spec_backfill",
|
|||
const version = db
|
||||
.prepare("SELECT MAX(version) AS version FROM schema_version")
|
||||
.get();
|
||||
assert.equal(version.version, 49);
|
||||
assert.equal(version.version, 51);
|
||||
const taskSpec = db
|
||||
.prepare(
|
||||
"SELECT milestone_id, slice_id, task_id, verify FROM task_specs WHERE task_id = 'T01'",
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue