Bundles the working-tree state into one coherent commit covering the
upgrade-safety glue that complements today's earlier landings
(orphan-recovery, sf-db single-connection, drain-timer-not-unref'd,
forceShutdown drain, shutdown-state.ts, instrumentation.ts,
shutdown-signal.js, gate-deadlock-classifier).
Modified:
docker/Dockerfile.source-server — image build tweaks for the source-
server variant used by the in-container upgrader.
docker/docker-compose.vega.yaml — env passthroughs for host-side dirs
(SF_SOURCE_HOST_ROOT, SF_WORKSPACE_HOST_DIR, SF_WORKSPACES_HOST_DIR,
SF_HOME_HOST_DIR), docker socket mount, group_add for docker GID,
and SF_RPC_SHUTDOWN_GRACE_MS=600000 matching the 10-min drain.
scripts/run-vega-source-server.mjs — substantial rework supporting
the in-container upgrade flow.
scripts/upgrade-vega-source-server.mjs — buildEnv() + dockerBuildEnv()
helpers, probeBind via SF_VEGA_PROBE_HOST, containerExists()
pre-check before drainContainer, stop timeout now matches the
10-min RPC grace via SF_VEGA_DRAIN_STOP_TIME (default 610s).
src/web/project-discovery-service.ts — calls
recoverProjectRuntimeQueues() on each of the 3 discovery paths
(root monorepo, per-entry, nested SF projects). Closes the
cloud-volume mtime-lag window codex flagged.
web/app/api/ready/route.ts — calls recoverProjectRuntimeQueues() on
every readiness probe, and now also reads shutdown-state so the
probe returns 503 while draining.
web/components/sf/projects-view.tsx — UI wiring for the upgrade
trigger.
web/pages/api/projects.ts — backend API addition for the project
enumeration that feeds projects-view.
docs/specs/sf-self-deploy.md — docs update for the new flow.
package.json — script alias.
Added:
scripts/build-web-host.mjs — new build helper for the standalone web
host artifact consumed by the upgrade flow.
src/resources/extensions/sf/tests/auto-shutdown-signal.test.mjs —
unit test for the cooperative-shutdown signal module (registers /
requests / snapshot).
src/web/project-runtime-recovery.ts — thin wrapper around
recoverOrphanedFeedbackDrains for per-project use from web routes.
web/app/api/drain/route.ts — explicit drain endpoint for operator-
triggered queue flush.
web/app/api/server-upgrade/route.ts — auth-gated endpoint that
spawns the in-container upgrader via docker socket; passes through
host-dir env so the upgrader knows real bind-mount paths.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
230 lines
6.4 KiB
JavaScript
230 lines
6.4 KiB
JavaScript
#!/usr/bin/env node
|
|
/**
|
|
* upgrade-vega-source-server.mjs — blue/green upgrade the shared vega SF
|
|
* webserver.
|
|
*
|
|
* Purpose: prove a candidate source-mounted server on a side port before
|
|
* replacing the shared production container on port 4000.
|
|
*
|
|
* Consumer: `npm run docker:vega:upgrade` locally and Forgejo/host-side deploy
|
|
* automation when vega is the target.
|
|
*/
|
|
import { spawnSync } from "node:child_process";
|
|
import { dirname, resolve } from "node:path";
|
|
import { fileURLToPath } from "node:url";
|
|
|
|
const root = resolve(fileURLToPath(new URL("..", import.meta.url)));
|
|
const bind = process.env.SF_VEGA_BIND || "127.0.0.1";
|
|
const prodName = process.env.SF_VEGA_CONTAINER || "sf-server-vega";
|
|
const candidateName =
|
|
process.env.SF_VEGA_CANDIDATE_CONTAINER || "sf-server-vega-candidate";
|
|
const prodPort = process.env.SF_VEGA_PORT || "4000";
|
|
const candidatePort = process.env.SF_VEGA_CANDIDATE_PORT || "4001";
|
|
const workspacesRoot = process.env.SF_WORKSPACES_DIR || dirname(root);
|
|
const skipBuild = process.env.SF_VEGA_UPGRADE_SKIP_BUILD === "1";
|
|
const probeBind = process.env.SF_VEGA_PROBE_HOST || bind;
|
|
|
|
if (!skipBuild) {
|
|
run("npm", ["run", "build:web-host"], { env: buildEnv() });
|
|
run(process.execPath, [
|
|
"scripts/generate-release-manifest.mjs",
|
|
"--out",
|
|
"dist/sf-release-manifest.json",
|
|
]);
|
|
}
|
|
run(
|
|
"docker",
|
|
[
|
|
"build",
|
|
"-f",
|
|
"docker/Dockerfile.source-server",
|
|
"-t",
|
|
process.env.SF_VEGA_IMAGE || "sf-source-server:vega",
|
|
".",
|
|
],
|
|
{ env: dockerBuildEnv() },
|
|
);
|
|
|
|
startServer(candidateName, candidatePort);
|
|
await probeServer(candidatePort, "candidate");
|
|
|
|
await requestDrain(prodPort, "prod");
|
|
drainContainer(prodName);
|
|
startServer(prodName, prodPort);
|
|
await probeServer(prodPort, "prod");
|
|
|
|
await requestDrain(candidatePort, "candidate");
|
|
drainContainer(candidateName);
|
|
process.stdout.write(
|
|
`sf server upgraded: ${prodName} is healthy on ${bind}:${prodPort}\n`,
|
|
);
|
|
|
|
function startServer(name, port) {
|
|
run("node", ["scripts/run-vega-source-server.mjs", "up"], {
|
|
env: {
|
|
...process.env,
|
|
SF_VEGA_CONTAINER: name,
|
|
SF_VEGA_PORT: port,
|
|
SF_VEGA_SKIP_IMAGE_BUILD: "1",
|
|
},
|
|
});
|
|
}
|
|
|
|
async function probeServer(port, label) {
|
|
const baseUrl = `http://${probeBind}:${port}`;
|
|
const checks = [
|
|
["healthz", `${baseUrl}/api/healthz`],
|
|
["ready", `${baseUrl}/api/ready`],
|
|
["version", `${baseUrl}/api/version`],
|
|
[
|
|
"projects",
|
|
`${baseUrl}/api/projects?root=${encodeURIComponent(workspacesRoot)}&detail=true`,
|
|
],
|
|
];
|
|
const deadline = Date.now() + 60_000;
|
|
let lastError = "";
|
|
while (Date.now() < deadline) {
|
|
try {
|
|
for (const [name, url] of checks) {
|
|
const response = await fetch(url, { cache: "no-store" });
|
|
if (!response.ok) {
|
|
throw new Error(`${name} returned ${response.status}`);
|
|
}
|
|
if (name === "projects") {
|
|
const projects = await response.json();
|
|
if (!Array.isArray(projects) || projects.length === 0) {
|
|
throw new Error("projects returned no configured repos");
|
|
}
|
|
} else {
|
|
await response.arrayBuffer();
|
|
}
|
|
}
|
|
process.stdout.write(`${label} probes passed on ${baseUrl}\n`);
|
|
return;
|
|
} catch (error) {
|
|
lastError = error instanceof Error ? error.message : String(error);
|
|
await delay(1000);
|
|
}
|
|
}
|
|
showLogs(label === "candidate" ? candidateName : prodName);
|
|
throw new Error(`${label} probes failed: ${lastError}`);
|
|
}
|
|
|
|
async function requestDrain(port, label) {
|
|
const baseUrl = `http://${probeBind}:${port}`;
|
|
try {
|
|
const response = await fetch(`${baseUrl}/api/drain`, {
|
|
method: "POST",
|
|
headers: authHeaders(),
|
|
});
|
|
if (!response.ok && response.status !== 404) {
|
|
throw new Error(`drain returned ${response.status}`);
|
|
}
|
|
if (response.ok) {
|
|
await waitForDrainHealthz(port, label);
|
|
}
|
|
} catch (error) {
|
|
process.stdout.write(
|
|
`${label} drain preflight skipped: ${
|
|
error instanceof Error ? error.message : String(error)
|
|
}\n`,
|
|
);
|
|
}
|
|
}
|
|
|
|
async function waitForDrainHealthz(port, label) {
|
|
const baseUrl = `http://${probeBind}:${port}`;
|
|
const deadline = Date.now() + 10_000;
|
|
let lastStatus = "unknown";
|
|
while (Date.now() < deadline) {
|
|
try {
|
|
const response = await fetch(`${baseUrl}/api/healthz`, {
|
|
cache: "no-store",
|
|
headers: authHeaders(),
|
|
});
|
|
lastStatus = String(response.status);
|
|
if (response.status === 503) {
|
|
process.stdout.write(`${label} drain acknowledged on ${baseUrl}\n`);
|
|
return;
|
|
}
|
|
} catch (error) {
|
|
lastStatus = error instanceof Error ? error.message : String(error);
|
|
}
|
|
await delay(250);
|
|
}
|
|
process.stdout.write(
|
|
`${label} drain did not surface on healthz before stop (last=${lastStatus})\n`,
|
|
);
|
|
}
|
|
|
|
function authHeaders() {
|
|
const token = process.env.SF_WEB_AUTH_TOKEN;
|
|
return token ? { Authorization: `Bearer ${token}` } : {};
|
|
}
|
|
|
|
function showLogs(name) {
|
|
spawnSync("docker", ["logs", "--tail=120", name], {
|
|
cwd: root,
|
|
stdio: "inherit",
|
|
env: process.env,
|
|
});
|
|
}
|
|
|
|
function buildEnv() {
|
|
const nodeOptions = [process.env.NODE_OPTIONS, "--disable-warning=DEP0205"]
|
|
.filter(Boolean)
|
|
.join(" ");
|
|
return {
|
|
...process.env,
|
|
NODE_ENV: "production",
|
|
NEXT_TELEMETRY_DISABLED: "1",
|
|
NODE_OPTIONS: nodeOptions,
|
|
NPM_CONFIG_UPDATE_NOTIFIER: "false",
|
|
npm_config_update_notifier: "false",
|
|
};
|
|
}
|
|
|
|
function dockerBuildEnv() {
|
|
return {
|
|
...process.env,
|
|
DOCKER_BUILDKIT: "1",
|
|
BUILDKIT_PROGRESS: process.env.BUILDKIT_PROGRESS || "plain",
|
|
DEBIAN_FRONTEND: "noninteractive",
|
|
};
|
|
}
|
|
|
|
function drainContainer(name) {
|
|
if (!containerExists(name)) return;
|
|
// 610s: matches SF_RPC_SHUTDOWN_GRACE_MS=600000 in rpc-mode's
|
|
// graceful-shutdown handler with a 10s safety margin for Node exit.
|
|
// Normal drains finish in <1s; the long ceiling is for pathological
|
|
// lock contention so queued self-feedback writes are never lost
|
|
// across an upgrade. Override per-deployment via env if needed.
|
|
const stopTime = process.env.SF_VEGA_DRAIN_STOP_TIME || "610";
|
|
run("docker", ["stop", "--timeout", stopTime, name], { allowFailure: true });
|
|
run("docker", ["rm", "-f", name], { allowFailure: true });
|
|
}
|
|
|
|
function containerExists(name) {
|
|
const result = spawnSync("docker", ["container", "inspect", name], {
|
|
cwd: root,
|
|
stdio: "ignore",
|
|
env: process.env,
|
|
});
|
|
return result.status === 0;
|
|
}
|
|
|
|
function delay(ms) {
|
|
return new Promise((resolveDelay) => setTimeout(resolveDelay, ms));
|
|
}
|
|
|
|
function run(command, args, options = {}) {
|
|
const result = spawnSync(command, args, {
|
|
cwd: root,
|
|
stdio: "inherit",
|
|
env: options.env ?? process.env,
|
|
});
|
|
if (result.status !== 0 && !options.allowFailure) {
|
|
process.exit(result.status ?? 1);
|
|
}
|
|
}
|