feat: operational glue for upgrade-safety chain
Bundles the working-tree state into one coherent commit covering the
upgrade-safety glue that complements today's earlier landings
(orphan-recovery, sf-db single-connection, drain-timer-not-unref'd,
forceShutdown drain, shutdown-state.ts, instrumentation.ts,
shutdown-signal.js, gate-deadlock-classifier).
Modified:
docker/Dockerfile.source-server — image build tweaks for the source-
server variant used by the in-container upgrader.
docker/docker-compose.vega.yaml — env passthroughs for host-side dirs
(SF_SOURCE_HOST_ROOT, SF_WORKSPACE_HOST_DIR, SF_WORKSPACES_HOST_DIR,
SF_HOME_HOST_DIR), docker socket mount, group_add for docker GID,
and SF_RPC_SHUTDOWN_GRACE_MS=600000 matching the 10-min drain.
scripts/run-vega-source-server.mjs — substantial rework supporting
the in-container upgrade flow.
scripts/upgrade-vega-source-server.mjs — buildEnv() + dockerBuildEnv()
helpers, probeBind via SF_VEGA_PROBE_HOST, containerExists()
pre-check before drainContainer, stop timeout now matches the
10-min RPC grace via SF_VEGA_DRAIN_STOP_TIME (default 610s).
src/web/project-discovery-service.ts — calls
recoverProjectRuntimeQueues() on each of the 3 discovery paths
(root monorepo, per-entry, nested SF projects). Closes the
cloud-volume mtime-lag window codex flagged.
web/app/api/ready/route.ts — calls recoverProjectRuntimeQueues() on
every readiness probe, and now also reads shutdown-state so the
probe returns 503 while draining.
web/components/sf/projects-view.tsx — UI wiring for the upgrade
trigger.
web/pages/api/projects.ts — backend API addition for the project
enumeration that feeds projects-view.
docs/specs/sf-self-deploy.md — docs update for the new flow.
package.json — script alias.
Added:
scripts/build-web-host.mjs — new build helper for the standalone web
host artifact consumed by the upgrade flow.
src/resources/extensions/sf/tests/auto-shutdown-signal.test.mjs —
unit test for the cooperative-shutdown signal module (registers /
requests / snapshot).
src/web/project-runtime-recovery.ts — thin wrapper around
recoverOrphanedFeedbackDrains for per-project use from web routes.
web/app/api/drain/route.ts — explicit drain endpoint for operator-
triggered queue flush.
web/app/api/server-upgrade/route.ts — auth-gated endpoint that
spawns the in-container upgrader via docker socket; passes through
host-dir env so the upgrader knows real bind-mount paths.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
c0358a2fc7
commit
8c945550fa
15 changed files with 607 additions and 41 deletions
|
|
@ -15,6 +15,8 @@ ENV SF_WEB_PREFER_SOURCE=0
|
|||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
ca-certificates \
|
||||
docker-buildx \
|
||||
docker-cli \
|
||||
git \
|
||||
libsecret-1-0 \
|
||||
tini \
|
||||
|
|
|
|||
|
|
@ -6,6 +6,8 @@ services:
|
|||
container_name: ${SF_VEGA_CONTAINER:-sf-server-vega}
|
||||
working_dir: /opt/sf
|
||||
user: "${PUID:-1000}:${PGID:-1000}"
|
||||
group_add:
|
||||
- "${DOCKER_GID:-999}"
|
||||
ports:
|
||||
- "${SF_VEGA_BIND:-127.0.0.1}:4000:4000"
|
||||
volumes:
|
||||
|
|
@ -15,6 +17,7 @@ services:
|
|||
- ${SF_WORKSPACES_DIR:-/home/mhugo/code}:${SF_WORKSPACES_DIR:-/home/mhugo/code}
|
||||
- ${HOME}/.sf:/home/node/.sf
|
||||
- ${HOME}/.gitconfig:/home/node/.gitconfig:ro
|
||||
- /var/run/docker.sock:/var/run/docker.sock
|
||||
environment:
|
||||
HOME: /home/node
|
||||
NODE_ENV: development
|
||||
|
|
@ -23,12 +26,17 @@ services:
|
|||
SF_RELEASE_MANIFEST: /opt/sf/dist/sf-release-manifest.json
|
||||
SF_WEB_PROJECT_CWD: ${SF_WORKSPACE_DIR:-/home/mhugo/code/singularity-forge}
|
||||
SF_WORKSPACES_DIR: ${SF_WORKSPACES_DIR:-/home/mhugo/code}
|
||||
SF_SOURCE_HOST_ROOT: ${SF_SOURCE_HOST_ROOT:-/home/mhugo/code/singularity-forge}
|
||||
SF_WORKSPACE_HOST_DIR: ${SF_WORKSPACE_HOST_DIR:-/home/mhugo/code/singularity-forge}
|
||||
SF_WORKSPACES_HOST_DIR: ${SF_WORKSPACES_HOST_DIR:-/home/mhugo/code}
|
||||
SF_HOME_HOST_DIR: ${SF_HOME_HOST_DIR:-/home/mhugo/.sf}
|
||||
SF_WEB_HOST: 0.0.0.0
|
||||
SF_WEB_PORT: "4000"
|
||||
HOSTNAME: 0.0.0.0
|
||||
PORT: "4000"
|
||||
SF_WEB_ALLOWED_ORIGINS: ${SF_WEB_ALLOWED_ORIGINS:-http://127.0.0.1:4000,http://localhost:4000}
|
||||
SF_DEV_SERVER_WATCH: "1"
|
||||
SF_RPC_SHUTDOWN_GRACE_MS: "600000"
|
||||
command:
|
||||
- node
|
||||
- /opt/sf/dist/web/standalone/server.js
|
||||
|
|
|
|||
|
|
@ -30,9 +30,8 @@ The required gates are:
|
|||
- build `docker/Dockerfile.sf-server`
|
||||
- generate `dist/sf-release-manifest.json`
|
||||
|
||||
The image builder can be Docker, BuildKit, Kaniko, or `nix2container`. SF does
|
||||
not depend on the builder implementation. The deployment contract starts at the
|
||||
OCI image plus release manifest.
|
||||
The image builder is Docker/BuildKit. The deployment contract starts at the OCI
|
||||
image plus release manifest.
|
||||
|
||||
## Server Runtime
|
||||
|
||||
|
|
@ -77,6 +76,15 @@ implementation, one shared webserver process, and repo-scoped worker/session
|
|||
state underneath it. Restarting the runner replaces the shared vega webserver,
|
||||
not one container per repo.
|
||||
|
||||
Use `npm run docker:vega:upgrade` for the local blue/green path. It builds the
|
||||
web host, writes the release manifest, starts `sf-server-vega-candidate` on
|
||||
port 4001, probes health/readiness/version/projects, replaces `sf-server-vega`
|
||||
on port 4000 only after the candidate passes, probes prod, then removes the
|
||||
candidate. Replacement drains the old container with
|
||||
`docker stop --timeout ${SF_VEGA_DRAIN_STOP_TIME:-610}` before forced removal
|
||||
fallback. The default leaves a 10 second margin over the RPC child's
|
||||
`SF_RPC_SHUTDOWN_GRACE_MS=600000` queue-drain handler.
|
||||
|
||||
## Promotion
|
||||
|
||||
Test must roll before prod:
|
||||
|
|
|
|||
|
|
@ -52,12 +52,13 @@
|
|||
"build:core": "npm run build:pi && npm run build:rpc-client && npm run build:daemon && npm run check:versioned-json && tsgo && npm run copy-resources && npm run copy-themes && npm run copy-export-html",
|
||||
"build": "npm run build:core && node scripts/build-web-if-stale.cjs",
|
||||
"stage:web-host": "node scripts/stage-web-standalone.cjs",
|
||||
"build:web-host": "npm --prefix web run build && npm run stage:web-host",
|
||||
"build:web-host": "node scripts/build-web-host.mjs",
|
||||
"release:manifest": "node scripts/generate-release-manifest.mjs",
|
||||
"docker:build-sf-server": "docker build -f docker/Dockerfile.sf-server -t ghcr.io/singularity-ng/sf-server .",
|
||||
"docker:vega:up": "node scripts/run-vega-source-server.mjs up",
|
||||
"docker:vega:logs": "node scripts/run-vega-source-server.mjs logs",
|
||||
"docker:vega:down": "node scripts/run-vega-source-server.mjs down",
|
||||
"docker:vega:upgrade": "node scripts/upgrade-vega-source-server.mjs",
|
||||
"docs:features": "node scripts/generate-features-inventory.mjs",
|
||||
"copy-resources": "node scripts/copy-resources.cjs",
|
||||
"copy-themes": "node scripts/copy-themes.cjs",
|
||||
|
|
|
|||
37
scripts/build-web-host.mjs
Normal file
37
scripts/build-web-host.mjs
Normal file
|
|
@ -0,0 +1,37 @@
|
|||
#!/usr/bin/env node
|
||||
/**
|
||||
* build-web-host.mjs — build and stage the standalone SF web host.
|
||||
*
|
||||
* Purpose: keep Next.js build output clean on Node 26 while preserving normal
|
||||
* build failures and staging behavior.
|
||||
*
|
||||
* Consumer: `npm run build:web-host` and the vega self-upgrade path.
|
||||
*/
|
||||
import { spawnSync } from "node:child_process";
|
||||
|
||||
run("npm", ["--prefix", "web", "run", "build"], {
|
||||
env: buildEnv(),
|
||||
});
|
||||
run("npm", ["run", "stage:web-host"]);
|
||||
|
||||
function buildEnv() {
|
||||
const nodeOptions = [
|
||||
process.env.NODE_OPTIONS,
|
||||
"--disable-warning=DEP0205",
|
||||
]
|
||||
.filter(Boolean)
|
||||
.join(" ");
|
||||
return {
|
||||
...process.env,
|
||||
NODE_OPTIONS: nodeOptions,
|
||||
NEXT_TELEMETRY_DISABLED: process.env.NEXT_TELEMETRY_DISABLED || "1",
|
||||
};
|
||||
}
|
||||
|
||||
function run(command, args, options = {}) {
|
||||
const result = spawnSync(command, args, {
|
||||
stdio: "inherit",
|
||||
env: options.env ?? process.env,
|
||||
});
|
||||
if (result.status !== 0) process.exit(result.status ?? 1);
|
||||
}
|
||||
|
|
@ -9,6 +9,7 @@
|
|||
* Consumer: `npm run docker:vega:up` on vega.
|
||||
*/
|
||||
import { spawnSync } from "node:child_process";
|
||||
import { statSync } from "node:fs";
|
||||
import { homedir } from "node:os";
|
||||
import { dirname, resolve } from "node:path";
|
||||
import { fileURLToPath } from "node:url";
|
||||
|
|
@ -18,11 +19,19 @@ const image = process.env.SF_VEGA_IMAGE || "sf-source-server:vega";
|
|||
const bind = process.env.SF_VEGA_BIND || "127.0.0.1";
|
||||
const workspace = resolve(process.env.SF_WORKSPACE_DIR || root);
|
||||
const workspacesRoot = resolve(process.env.SF_WORKSPACES_DIR || dirname(root));
|
||||
const sourceHostRoot = resolve(process.env.SF_SOURCE_HOST_ROOT || root);
|
||||
const workspaceHost = resolve(process.env.SF_WORKSPACE_HOST_DIR || workspace);
|
||||
const workspacesHost = resolve(
|
||||
process.env.SF_WORKSPACES_HOST_DIR || workspacesRoot,
|
||||
);
|
||||
const sfHomeHost = resolve(process.env.SF_HOME_HOST_DIR || `${homedir()}/.sf`);
|
||||
const name = process.env.SF_VEGA_CONTAINER || "sf-server-vega";
|
||||
const port = process.env.SF_VEGA_PORT || "4000";
|
||||
const uid = process.env.PUID || String(process.getuid?.() ?? 1000);
|
||||
const gid = process.env.PGID || String(process.getgid?.() ?? 1000);
|
||||
const dockerSocketGid = socketGroupId("/var/run/docker.sock");
|
||||
const command = process.argv[2] ?? "up";
|
||||
const skipImageBuild = process.env.SF_VEGA_SKIP_IMAGE_BUILD === "1";
|
||||
|
||||
if (command === "--help" || command === "-h" || command === "help") {
|
||||
process.stdout.write(`Usage:
|
||||
|
|
@ -52,6 +61,10 @@ if (command === "print") {
|
|||
port,
|
||||
workspace,
|
||||
workspacesRoot,
|
||||
sourceHostRoot,
|
||||
workspaceHost,
|
||||
workspacesHost,
|
||||
sfHomeHost,
|
||||
sfSource: root,
|
||||
},
|
||||
null,
|
||||
|
|
@ -67,7 +80,8 @@ if (command === "logs") {
|
|||
}
|
||||
|
||||
if (command === "down") {
|
||||
run("docker", ["rm", "-f", name]);
|
||||
await requestDrain(port);
|
||||
drainContainer(name);
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
|
|
@ -80,16 +94,16 @@ const allowedOrigins =
|
|||
process.env.SF_WEB_ALLOWED_ORIGINS ||
|
||||
`http://127.0.0.1:${port},http://localhost:${port}`;
|
||||
|
||||
run("docker", [
|
||||
"build",
|
||||
"-f",
|
||||
"docker/Dockerfile.source-server",
|
||||
"-t",
|
||||
image,
|
||||
".",
|
||||
]);
|
||||
if (!skipImageBuild) {
|
||||
run(
|
||||
"docker",
|
||||
["build", "-f", "docker/Dockerfile.source-server", "-t", image, "."],
|
||||
{ env: dockerBuildEnv() },
|
||||
);
|
||||
}
|
||||
|
||||
spawnSync("docker", ["rm", "-f", name], { stdio: "ignore" });
|
||||
await requestDrain(port);
|
||||
drainContainer(name);
|
||||
|
||||
run("docker", [
|
||||
"run",
|
||||
|
|
@ -100,6 +114,7 @@ run("docker", [
|
|||
"unless-stopped",
|
||||
"--user",
|
||||
`${uid}:${gid}`,
|
||||
...(dockerSocketGid ? ["--group-add", dockerSocketGid] : []),
|
||||
"-p",
|
||||
`${bind}:${port}:4000`,
|
||||
"-e",
|
||||
|
|
@ -117,6 +132,14 @@ run("docker", [
|
|||
"-e",
|
||||
`SF_WORKSPACES_DIR=${workspacesRoot}`,
|
||||
"-e",
|
||||
`SF_SOURCE_HOST_ROOT=${sourceHostRoot}`,
|
||||
"-e",
|
||||
`SF_WORKSPACE_HOST_DIR=${workspaceHost}`,
|
||||
"-e",
|
||||
`SF_WORKSPACES_HOST_DIR=${workspacesHost}`,
|
||||
"-e",
|
||||
`SF_HOME_HOST_DIR=${sfHomeHost}`,
|
||||
"-e",
|
||||
"HOSTNAME=0.0.0.0",
|
||||
"-e",
|
||||
"PORT=4000",
|
||||
|
|
@ -128,18 +151,22 @@ run("docker", [
|
|||
`SF_WEB_ALLOWED_ORIGINS=${allowedOrigins}`,
|
||||
"-e",
|
||||
"SF_DEV_SERVER_WATCH=1",
|
||||
"-e",
|
||||
"SF_RPC_SHUTDOWN_GRACE_MS=600000",
|
||||
"-v",
|
||||
`${root}:/opt/sf`,
|
||||
`${sourceHostRoot}:/opt/sf`,
|
||||
"-v",
|
||||
`${workspace}:/workspace`,
|
||||
`${workspaceHost}:/workspace`,
|
||||
"-v",
|
||||
`${workspacesRoot}:/workspaces`,
|
||||
`${workspacesHost}:/workspaces`,
|
||||
"-v",
|
||||
`${workspacesRoot}:${workspacesRoot}`,
|
||||
`${workspacesHost}:${workspacesRoot}`,
|
||||
"-v",
|
||||
`${homedir()}/.sf:/home/node/.sf`,
|
||||
`${sfHomeHost}:/home/node/.sf`,
|
||||
"-v",
|
||||
`${homedir()}/.gitconfig:/home/node/.gitconfig:ro`,
|
||||
"-v",
|
||||
"/var/run/docker.sock:/var/run/docker.sock",
|
||||
image,
|
||||
"node",
|
||||
"/opt/sf/dist/web/standalone/server.js",
|
||||
|
|
@ -150,11 +177,92 @@ process.stdout.write(`SF source: ${root}\n`);
|
|||
process.stdout.write(`Initial workspace: ${workspace}\n`);
|
||||
process.stdout.write(`Workspace parent: ${workspacesRoot}\n`);
|
||||
|
||||
function run(command, args) {
|
||||
function run(command, args, options = {}) {
|
||||
const result = spawnSync(command, args, {
|
||||
cwd: root,
|
||||
stdio: "inherit",
|
||||
env: options.env ?? process.env,
|
||||
});
|
||||
if (result.status !== 0 && !options.allowFailure) {
|
||||
process.exit(result.status ?? 1);
|
||||
}
|
||||
}
|
||||
|
||||
function dockerBuildEnv() {
|
||||
return {
|
||||
...process.env,
|
||||
DOCKER_BUILDKIT: "1",
|
||||
BUILDKIT_PROGRESS: process.env.BUILDKIT_PROGRESS || "plain",
|
||||
DEBIAN_FRONTEND: "noninteractive",
|
||||
};
|
||||
}
|
||||
|
||||
function socketGroupId(path) {
|
||||
try {
|
||||
return String(statSync(path).gid);
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
function drainContainer(containerName) {
|
||||
if (!containerExists(containerName)) return;
|
||||
const stopTime = process.env.SF_VEGA_DRAIN_STOP_TIME || "610";
|
||||
run("docker", ["stop", "--timeout", stopTime, containerName], {
|
||||
allowFailure: true,
|
||||
});
|
||||
run("docker", ["rm", "-f", containerName], { allowFailure: true });
|
||||
}
|
||||
|
||||
async function requestDrain(targetPort) {
|
||||
if (!containerExists(name)) return;
|
||||
const baseUrl = `http://${bind}:${targetPort}`;
|
||||
try {
|
||||
const response = await fetch(`${baseUrl}/api/drain`, {
|
||||
method: "POST",
|
||||
headers: authHeaders(),
|
||||
});
|
||||
if (!response.ok && response.status !== 404) {
|
||||
throw new Error(`drain returned ${response.status}`);
|
||||
}
|
||||
if (response.ok) {
|
||||
await waitForDrainHealthz(baseUrl);
|
||||
}
|
||||
} catch (error) {
|
||||
process.stdout.write(
|
||||
`drain preflight skipped: ${
|
||||
error instanceof Error ? error.message : String(error)
|
||||
}\n`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
async function waitForDrainHealthz(baseUrl) {
|
||||
const deadline = Date.now() + 10_000;
|
||||
while (Date.now() < deadline) {
|
||||
try {
|
||||
const response = await fetch(`${baseUrl}/api/healthz`, {
|
||||
cache: "no-store",
|
||||
headers: authHeaders(),
|
||||
});
|
||||
if (response.status === 503) return;
|
||||
} catch {
|
||||
return;
|
||||
}
|
||||
await new Promise((resolveDelay) => setTimeout(resolveDelay, 250));
|
||||
}
|
||||
}
|
||||
|
||||
function authHeaders() {
|
||||
const token = process.env.SF_WEB_AUTH_TOKEN;
|
||||
return token ? { Authorization: `Bearer ${token}` } : {};
|
||||
}
|
||||
|
||||
function containerExists(containerName) {
|
||||
const result = spawnSync("docker", ["container", "inspect", containerName], {
|
||||
cwd: root,
|
||||
stdio: "ignore",
|
||||
env: process.env,
|
||||
});
|
||||
if (result.status !== 0) process.exit(result.status ?? 1);
|
||||
return result.status === 0;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -22,31 +22,38 @@ const prodPort = process.env.SF_VEGA_PORT || "4000";
|
|||
const candidatePort = process.env.SF_VEGA_CANDIDATE_PORT || "4001";
|
||||
const workspacesRoot = process.env.SF_WORKSPACES_DIR || dirname(root);
|
||||
const skipBuild = process.env.SF_VEGA_UPGRADE_SKIP_BUILD === "1";
|
||||
const probeBind = process.env.SF_VEGA_PROBE_HOST || bind;
|
||||
|
||||
if (!skipBuild) {
|
||||
run("npm", ["run", "build:web-host"]);
|
||||
run("npm", ["run", "build:web-host"], { env: buildEnv() });
|
||||
run(process.execPath, [
|
||||
"scripts/generate-release-manifest.mjs",
|
||||
"--out",
|
||||
"dist/sf-release-manifest.json",
|
||||
]);
|
||||
}
|
||||
run("docker", [
|
||||
"build",
|
||||
"-f",
|
||||
"docker/Dockerfile.source-server",
|
||||
"-t",
|
||||
process.env.SF_VEGA_IMAGE || "sf-source-server:vega",
|
||||
".",
|
||||
]);
|
||||
run(
|
||||
"docker",
|
||||
[
|
||||
"build",
|
||||
"-f",
|
||||
"docker/Dockerfile.source-server",
|
||||
"-t",
|
||||
process.env.SF_VEGA_IMAGE || "sf-source-server:vega",
|
||||
".",
|
||||
],
|
||||
{ env: dockerBuildEnv() },
|
||||
);
|
||||
|
||||
startServer(candidateName, candidatePort);
|
||||
await probeServer(candidatePort, "candidate");
|
||||
|
||||
await requestDrain(prodPort, "prod");
|
||||
drainContainer(prodName);
|
||||
startServer(prodName, prodPort);
|
||||
await probeServer(prodPort, "prod");
|
||||
|
||||
await requestDrain(candidatePort, "candidate");
|
||||
drainContainer(candidateName);
|
||||
process.stdout.write(
|
||||
`sf server upgraded: ${prodName} is healthy on ${bind}:${prodPort}\n`,
|
||||
|
|
@ -64,7 +71,7 @@ function startServer(name, port) {
|
|||
}
|
||||
|
||||
async function probeServer(port, label) {
|
||||
const baseUrl = `http://${bind}:${port}`;
|
||||
const baseUrl = `http://${probeBind}:${port}`;
|
||||
const checks = [
|
||||
["healthz", `${baseUrl}/api/healthz`],
|
||||
["ready", `${baseUrl}/api/ready`],
|
||||
|
|
@ -103,6 +110,58 @@ async function probeServer(port, label) {
|
|||
throw new Error(`${label} probes failed: ${lastError}`);
|
||||
}
|
||||
|
||||
async function requestDrain(port, label) {
|
||||
const baseUrl = `http://${probeBind}:${port}`;
|
||||
try {
|
||||
const response = await fetch(`${baseUrl}/api/drain`, {
|
||||
method: "POST",
|
||||
headers: authHeaders(),
|
||||
});
|
||||
if (!response.ok && response.status !== 404) {
|
||||
throw new Error(`drain returned ${response.status}`);
|
||||
}
|
||||
if (response.ok) {
|
||||
await waitForDrainHealthz(port, label);
|
||||
}
|
||||
} catch (error) {
|
||||
process.stdout.write(
|
||||
`${label} drain preflight skipped: ${
|
||||
error instanceof Error ? error.message : String(error)
|
||||
}\n`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
async function waitForDrainHealthz(port, label) {
|
||||
const baseUrl = `http://${probeBind}:${port}`;
|
||||
const deadline = Date.now() + 10_000;
|
||||
let lastStatus = "unknown";
|
||||
while (Date.now() < deadline) {
|
||||
try {
|
||||
const response = await fetch(`${baseUrl}/api/healthz`, {
|
||||
cache: "no-store",
|
||||
headers: authHeaders(),
|
||||
});
|
||||
lastStatus = String(response.status);
|
||||
if (response.status === 503) {
|
||||
process.stdout.write(`${label} drain acknowledged on ${baseUrl}\n`);
|
||||
return;
|
||||
}
|
||||
} catch (error) {
|
||||
lastStatus = error instanceof Error ? error.message : String(error);
|
||||
}
|
||||
await delay(250);
|
||||
}
|
||||
process.stdout.write(
|
||||
`${label} drain did not surface on healthz before stop (last=${lastStatus})\n`,
|
||||
);
|
||||
}
|
||||
|
||||
function authHeaders() {
|
||||
const token = process.env.SF_WEB_AUTH_TOKEN;
|
||||
return token ? { Authorization: `Bearer ${token}` } : {};
|
||||
}
|
||||
|
||||
function showLogs(name) {
|
||||
spawnSync("docker", ["logs", "--tail=120", name], {
|
||||
cwd: root,
|
||||
|
|
@ -111,7 +170,31 @@ function showLogs(name) {
|
|||
});
|
||||
}
|
||||
|
||||
function buildEnv() {
|
||||
const nodeOptions = [process.env.NODE_OPTIONS, "--disable-warning=DEP0205"]
|
||||
.filter(Boolean)
|
||||
.join(" ");
|
||||
return {
|
||||
...process.env,
|
||||
NODE_ENV: "production",
|
||||
NEXT_TELEMETRY_DISABLED: "1",
|
||||
NODE_OPTIONS: nodeOptions,
|
||||
NPM_CONFIG_UPDATE_NOTIFIER: "false",
|
||||
npm_config_update_notifier: "false",
|
||||
};
|
||||
}
|
||||
|
||||
function dockerBuildEnv() {
|
||||
return {
|
||||
...process.env,
|
||||
DOCKER_BUILDKIT: "1",
|
||||
BUILDKIT_PROGRESS: process.env.BUILDKIT_PROGRESS || "plain",
|
||||
DEBIAN_FRONTEND: "noninteractive",
|
||||
};
|
||||
}
|
||||
|
||||
function drainContainer(name) {
|
||||
if (!containerExists(name)) return;
|
||||
// 610s: matches SF_RPC_SHUTDOWN_GRACE_MS=600000 in rpc-mode's
|
||||
// graceful-shutdown handler with a 10s safety margin for Node exit.
|
||||
// Normal drains finish in <1s; the long ceiling is for pathological
|
||||
|
|
@ -122,6 +205,15 @@ function drainContainer(name) {
|
|||
run("docker", ["rm", "-f", name], { allowFailure: true });
|
||||
}
|
||||
|
||||
function containerExists(name) {
|
||||
const result = spawnSync("docker", ["container", "inspect", name], {
|
||||
cwd: root,
|
||||
stdio: "ignore",
|
||||
env: process.env,
|
||||
});
|
||||
return result.status === 0;
|
||||
}
|
||||
|
||||
function delay(ms) {
|
||||
return new Promise((resolveDelay) => setTimeout(resolveDelay, ms));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,42 @@
|
|||
/**
|
||||
* auto-shutdown-signal.test.mjs — verifies autonomous shutdown signalling.
|
||||
*
|
||||
* Purpose: prove SIGTERM-aware autonomous loops can stop starting new work at
|
||||
* safe boundaries during server/container drain.
|
||||
*
|
||||
* Consumer: auto/loop.js before dispatching each autonomous iteration.
|
||||
*/
|
||||
import assert from "node:assert/strict";
|
||||
import { test } from "node:test";
|
||||
|
||||
import {
|
||||
_resetAutonomousShutdownForTests,
|
||||
autonomousShutdownSnapshot,
|
||||
isAutonomousShutdownRequested,
|
||||
requestAutonomousShutdown,
|
||||
} from "../auto/shutdown-signal.js";
|
||||
|
||||
test("shutdown_signal_when_requested_exposes_snapshot", () => {
|
||||
_resetAutonomousShutdownForTests();
|
||||
assert.equal(isAutonomousShutdownRequested(), false);
|
||||
|
||||
requestAutonomousShutdown("SIGTERM");
|
||||
|
||||
assert.equal(isAutonomousShutdownRequested(), true);
|
||||
const snapshot = autonomousShutdownSnapshot();
|
||||
assert.equal(snapshot.requested, true);
|
||||
assert.equal(snapshot.signal, "SIGTERM");
|
||||
assert.match(snapshot.requestedAt, /^\d{4}-\d{2}-\d{2}T/);
|
||||
assert.equal(typeof snapshot.elapsedMs, "number");
|
||||
_resetAutonomousShutdownForTests();
|
||||
});
|
||||
|
||||
test("shutdown_signal_when_called_twice_keeps_first_signal", () => {
|
||||
_resetAutonomousShutdownForTests();
|
||||
|
||||
requestAutonomousShutdown("SIGTERM");
|
||||
requestAutonomousShutdown("SIGINT");
|
||||
|
||||
assert.equal(autonomousShutdownSnapshot().signal, "SIGTERM");
|
||||
_resetAutonomousShutdownForTests();
|
||||
});
|
||||
|
|
@ -5,6 +5,7 @@ import type {
|
|||
ProjectDetectionSignals,
|
||||
} from "./bridge-service.ts";
|
||||
import { detectProjectKind } from "./bridge-service.ts";
|
||||
import { recoverProjectRuntimeQueues } from "./project-runtime-recovery.ts";
|
||||
|
||||
// ─── Project Discovery ─────────────────────────────────────────────────────
|
||||
|
||||
|
|
@ -105,6 +106,7 @@ export function discoverProjects(
|
|||
// .sf, or is a recognizable project), return it as a single entry.
|
||||
const rootDetection = detectProjectKind(devRootPath);
|
||||
if (rootDetection.signals.isMonorepo) {
|
||||
recoverProjectRuntimeQueues(devRootPath);
|
||||
const stat = statSync(devRootPath);
|
||||
return [
|
||||
{
|
||||
|
|
@ -131,6 +133,7 @@ export function discoverProjects(
|
|||
if (EXCLUDED_DIRS.has(entry.name)) continue;
|
||||
|
||||
const fullPath = join(devRootPath, entry.name);
|
||||
recoverProjectRuntimeQueues(fullPath);
|
||||
const { kind, signals } = detectProjectKind(fullPath);
|
||||
const stat = statSync(fullPath);
|
||||
|
||||
|
|
@ -147,6 +150,7 @@ export function discoverProjects(
|
|||
|
||||
for (const nestedSfProject of findNestedSfProjects(devRootPath)) {
|
||||
if (seen.has(nestedSfProject)) continue;
|
||||
recoverProjectRuntimeQueues(nestedSfProject);
|
||||
const { kind, signals } = detectProjectKind(nestedSfProject);
|
||||
const stat = statSync(nestedSfProject);
|
||||
projects.push({
|
||||
|
|
|
|||
22
src/web/project-runtime-recovery.ts
Normal file
22
src/web/project-runtime-recovery.ts
Normal file
|
|
@ -0,0 +1,22 @@
|
|||
/**
|
||||
* project-runtime-recovery.ts — best-effort repair of repo-local runtime queues.
|
||||
*
|
||||
* Purpose: let the shared SF webserver make project state reload-safe before
|
||||
* surfacing a repo as ready after container replacement.
|
||||
*
|
||||
* Consumer: web readiness and project discovery API routes.
|
||||
*/
|
||||
import { recoverOrphanedFeedbackDrains } from "../../packages/coding-agent/src/modes/rpc/feedback-queue-recovery.ts";
|
||||
|
||||
/**
|
||||
* Recover transient runtime files that can be safely replayed for one project.
|
||||
*
|
||||
* Purpose: keep repo-local `.sf/runtime` queues from staying stranded after a
|
||||
* fast webserver/container upgrade.
|
||||
*
|
||||
* Consumer: `/api/ready`, `/api/projects`, and shared project discovery.
|
||||
*/
|
||||
export function recoverProjectRuntimeQueues(projectPath: string | null): void {
|
||||
if (!projectPath) return;
|
||||
recoverOrphanedFeedbackDrains(projectPath);
|
||||
}
|
||||
25
web/app/api/drain/route.ts
Normal file
25
web/app/api/drain/route.ts
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
import { verifyAuthToken } from "../../../lib/auth-guard";
|
||||
import {
|
||||
markShuttingDown,
|
||||
shutdownStateSnapshot,
|
||||
} from "../../../../src/web/shutdown-state.ts";
|
||||
|
||||
export const runtime = "nodejs";
|
||||
export const dynamic = "force-dynamic";
|
||||
|
||||
export async function POST(request: Request): Promise<Response> {
|
||||
const authError = verifyAuthToken(request);
|
||||
if (authError) return authError;
|
||||
|
||||
markShuttingDown("manual");
|
||||
return Response.json(
|
||||
{
|
||||
accepted: true,
|
||||
...shutdownStateSnapshot(),
|
||||
},
|
||||
{
|
||||
status: 202,
|
||||
headers: { "Cache-Control": "no-store" },
|
||||
},
|
||||
);
|
||||
}
|
||||
|
|
@ -1,22 +1,33 @@
|
|||
import { existsSync } from "node:fs";
|
||||
|
||||
import { getReleaseInfo } from "../../../../src/web/release-info.ts";
|
||||
import { recoverProjectRuntimeQueues } from "../../../../src/web/project-runtime-recovery.ts";
|
||||
import {
|
||||
isShuttingDown,
|
||||
shutdownStateSnapshot,
|
||||
} from "../../../../src/web/shutdown-state.ts";
|
||||
|
||||
export const runtime = "nodejs";
|
||||
export const dynamic = "force-dynamic";
|
||||
|
||||
export async function GET(): Promise<Response> {
|
||||
const release = getReleaseInfo();
|
||||
recoverProjectRuntimeQueues(release.projectCwd);
|
||||
const projectReady =
|
||||
release.projectCwd === null || existsSync(release.projectCwd);
|
||||
const ready = release.ok && projectReady;
|
||||
const shuttingDown = isShuttingDown();
|
||||
const ready = release.ok && projectReady && !shuttingDown;
|
||||
return Response.json(
|
||||
{
|
||||
...release,
|
||||
ready,
|
||||
...(shuttingDown
|
||||
? { shuttingDown: true, shutdown: shutdownStateSnapshot() }
|
||||
: {}),
|
||||
checks: {
|
||||
projectCwd: projectReady ? "pass" : "fail",
|
||||
manifest: release.manifestLoaded ? "pass" : "absent",
|
||||
shutdown: shuttingDown ? "draining" : "pass",
|
||||
},
|
||||
},
|
||||
{
|
||||
|
|
|
|||
107
web/app/api/server-upgrade/route.ts
Normal file
107
web/app/api/server-upgrade/route.ts
Normal file
|
|
@ -0,0 +1,107 @@
|
|||
import { spawnSync } from "node:child_process";
|
||||
import { randomUUID } from "node:crypto";
|
||||
import { statSync } from "node:fs";
|
||||
import { getgid, getuid } from "node:process";
|
||||
|
||||
import { verifyAuthToken } from "../../../lib/auth-guard";
|
||||
|
||||
export const runtime = "nodejs";
|
||||
export const dynamic = "force-dynamic";
|
||||
|
||||
export async function POST(request: Request): Promise<Response> {
|
||||
const authError = verifyAuthToken(request);
|
||||
if (authError) return authError;
|
||||
|
||||
const sourceHostRoot =
|
||||
process.env.SF_SOURCE_HOST_ROOT ?? "/home/mhugo/code/singularity-forge";
|
||||
const workspaceHost =
|
||||
process.env.SF_WORKSPACE_HOST_DIR ??
|
||||
process.env.SF_WEB_PROJECT_CWD ??
|
||||
sourceHostRoot;
|
||||
const workspacesHost =
|
||||
process.env.SF_WORKSPACES_HOST_DIR ?? "/home/mhugo/code";
|
||||
const sfHomeHost = process.env.SF_HOME_HOST_DIR ?? "/home/mhugo/.sf";
|
||||
const image = process.env.SF_VEGA_IMAGE ?? "sf-source-server:vega";
|
||||
const name = `sf-server-vega-upgrader-${randomUUID().slice(0, 8)}`;
|
||||
const uid = process.env.PUID ?? String(getuid?.() ?? 1000);
|
||||
const gid = process.env.PGID ?? String(getgid?.() ?? 1000);
|
||||
const dockerSocketGid = socketGroupId("/var/run/docker.sock");
|
||||
|
||||
const args = [
|
||||
"run",
|
||||
"-d",
|
||||
"--rm",
|
||||
"--name",
|
||||
name,
|
||||
"--network",
|
||||
"host",
|
||||
"--user",
|
||||
`${uid}:${gid}`,
|
||||
...(dockerSocketGid ? ["--group-add", dockerSocketGid] : []),
|
||||
"-v",
|
||||
`${sourceHostRoot}:/opt/sf`,
|
||||
"-v",
|
||||
`${workspaceHost}:/workspace`,
|
||||
"-v",
|
||||
`${workspacesHost}:/workspaces`,
|
||||
"-v",
|
||||
`${workspacesHost}:${workspacesHost}`,
|
||||
"-v",
|
||||
`${sfHomeHost}:/home/node/.sf`,
|
||||
"-v",
|
||||
"/var/run/docker.sock:/var/run/docker.sock",
|
||||
"-e",
|
||||
`SF_SOURCE_HOST_ROOT=${sourceHostRoot}`,
|
||||
"-e",
|
||||
`SF_WORKSPACE_HOST_DIR=${workspaceHost}`,
|
||||
"-e",
|
||||
`SF_WORKSPACES_HOST_DIR=${workspacesHost}`,
|
||||
"-e",
|
||||
`SF_HOME_HOST_DIR=${sfHomeHost}`,
|
||||
"-e",
|
||||
`SF_WORKSPACE_DIR=${workspaceHost}`,
|
||||
"-e",
|
||||
`SF_WORKSPACES_DIR=${workspacesHost}`,
|
||||
"-e",
|
||||
"SF_VEGA_PORT=4000",
|
||||
"-e",
|
||||
"SF_VEGA_CANDIDATE_PORT=4001",
|
||||
"-e",
|
||||
"SF_VEGA_PROBE_HOST=127.0.0.1",
|
||||
"-e",
|
||||
"DOCKER_BUILDKIT=1",
|
||||
"-e",
|
||||
"BUILDKIT_PROGRESS=plain",
|
||||
image,
|
||||
"node",
|
||||
"/opt/sf/scripts/upgrade-vega-source-server.mjs",
|
||||
];
|
||||
|
||||
try {
|
||||
const result = spawnSync("docker", args, {
|
||||
cwd: "/opt/sf",
|
||||
encoding: "utf8",
|
||||
stdio: ["ignore", "pipe", "pipe"],
|
||||
});
|
||||
if (result.status !== 0) {
|
||||
throw new Error(result.stderr || result.stdout || "docker run failed");
|
||||
}
|
||||
return Response.json(
|
||||
{ triggered: true, upgrader: name, containerId: result.stdout.trim() },
|
||||
{ status: 202, headers: { "Cache-Control": "no-store" } },
|
||||
);
|
||||
} catch (error) {
|
||||
return Response.json(
|
||||
{ error: error instanceof Error ? error.message : String(error) },
|
||||
{ status: 500, headers: { "Cache-Control": "no-store" } },
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
function socketGroupId(path: string): string | null {
|
||||
try {
|
||||
return String(statSync(path).gid);
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
|
@ -14,6 +14,7 @@ import {
|
|||
Layers,
|
||||
Loader2,
|
||||
Plus,
|
||||
RefreshCw,
|
||||
Search,
|
||||
Sparkles,
|
||||
X,
|
||||
|
|
@ -393,6 +394,8 @@ export function ProjectsPanel({
|
|||
const [newProjectOpen, setNewProjectOpen] = useState(false);
|
||||
const [changeRootOpen, setChangeRootOpen] = useState(false);
|
||||
const [addRepoOpen, setAddRepoOpen] = useState(false);
|
||||
const [upgradeBusy, setUpgradeBusy] = useState(false);
|
||||
const [upgradeError, setUpgradeError] = useState<string | null>(null);
|
||||
const _workspaceState = useSFWorkspaceState();
|
||||
|
||||
const handleProjectCreated = useCallback(
|
||||
|
|
@ -427,6 +430,27 @@ export function ProjectsPanel({
|
|||
[],
|
||||
);
|
||||
|
||||
const handleUpgradeServer = useCallback(async () => {
|
||||
setUpgradeBusy(true);
|
||||
setUpgradeError(null);
|
||||
try {
|
||||
const res = await authFetch("/api/server-upgrade", { method: "POST" });
|
||||
if (!res.ok) {
|
||||
const body = await res.json().catch(() => ({}));
|
||||
throw new Error(
|
||||
(body as { error?: string }).error ??
|
||||
`Upgrade trigger failed (${res.status})`,
|
||||
);
|
||||
}
|
||||
} catch (err) {
|
||||
setUpgradeError(
|
||||
err instanceof Error ? err.message : "Failed to trigger upgrade",
|
||||
);
|
||||
} finally {
|
||||
setUpgradeBusy(false);
|
||||
}
|
||||
}, []);
|
||||
|
||||
// Sort: active-sf first, then by name
|
||||
const sortedProjects = [...projects].sort((a, b) => {
|
||||
const kindOrder: Record<ProjectDetectionKind, number> = {
|
||||
|
|
@ -587,15 +611,34 @@ export function ProjectsPanel({
|
|||
</div>
|
||||
)}
|
||||
</div>
|
||||
<Button
|
||||
variant="ghost"
|
||||
size="icon"
|
||||
className="h-8 w-8 shrink-0"
|
||||
onClick={() => onOpenChange(false)}
|
||||
>
|
||||
<X className="h-4 w-4" />
|
||||
</Button>
|
||||
<div className="flex items-center gap-1">
|
||||
<Button
|
||||
variant="ghost"
|
||||
size="icon"
|
||||
className="h-8 w-8 shrink-0"
|
||||
onClick={() => void handleUpgradeServer()}
|
||||
disabled={upgradeBusy}
|
||||
title="Upgrade server"
|
||||
>
|
||||
<RefreshCw
|
||||
className={cn("h-4 w-4", upgradeBusy && "animate-spin")}
|
||||
/>
|
||||
</Button>
|
||||
<Button
|
||||
variant="ghost"
|
||||
size="icon"
|
||||
className="h-8 w-8 shrink-0"
|
||||
onClick={() => onOpenChange(false)}
|
||||
>
|
||||
<X className="h-4 w-4" />
|
||||
</Button>
|
||||
</div>
|
||||
</div>
|
||||
{upgradeError && (
|
||||
<div className="border-b border-border/50 px-5 py-2 text-xs text-destructive">
|
||||
{upgradeError}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Scrollable project list */}
|
||||
<ScrollArea className="min-h-0 flex-1">
|
||||
|
|
|
|||
|
|
@ -1,4 +1,12 @@
|
|||
import { existsSync, readdirSync, readFileSync, statSync } from "node:fs";
|
||||
import {
|
||||
appendFileSync,
|
||||
existsSync,
|
||||
readdirSync,
|
||||
readFileSync,
|
||||
renameSync,
|
||||
statSync,
|
||||
unlinkSync,
|
||||
} from "node:fs";
|
||||
import { homedir } from "node:os";
|
||||
import { basename, join, resolve } from "node:path";
|
||||
import type { NextApiRequest, NextApiResponse } from "next";
|
||||
|
|
@ -35,6 +43,7 @@ type WebPreferences = {
|
|||
|
||||
const EXCLUDED_DIRS = new Set(["node_modules", ".git"]);
|
||||
const MAX_NESTED_SF_DEPTH = 3;
|
||||
const SF_FEEDBACK_QUEUE_FILE = "sf-feedback-queue.jsonl";
|
||||
const webPreferencesPath = join(
|
||||
process.env.SF_HOME || join(homedir(), ".sf"),
|
||||
"web-preferences.json",
|
||||
|
|
@ -111,6 +120,7 @@ function projectMetadata(
|
|||
path: string,
|
||||
includeProgress: boolean,
|
||||
): ProjectMetadata {
|
||||
recoverProjectRuntimeQueues(path);
|
||||
const stat = statSync(path);
|
||||
const signals = detectProject(path);
|
||||
const kind = signals.hasSfFolder
|
||||
|
|
@ -130,6 +140,52 @@ function projectMetadata(
|
|||
};
|
||||
}
|
||||
|
||||
function recoverProjectRuntimeQueues(projectPath: string): void {
|
||||
const runtimeDir = join(projectPath, ".sf", "runtime");
|
||||
if (!existsSync(runtimeDir)) return;
|
||||
let entries;
|
||||
try {
|
||||
entries = readdirSync(runtimeDir);
|
||||
} catch {
|
||||
return;
|
||||
}
|
||||
const orphanRe = new RegExp(
|
||||
`^${SF_FEEDBACK_QUEUE_FILE.replace(/\./g, "\\.")}\\.(\\d+)\\.[^.]+\\.draining$`,
|
||||
);
|
||||
const queuePath = join(runtimeDir, SF_FEEDBACK_QUEUE_FILE);
|
||||
for (const name of entries) {
|
||||
const match = name.match(orphanRe);
|
||||
if (!match) continue;
|
||||
const orphanPid = Number(match[1]);
|
||||
if (!Number.isFinite(orphanPid) || orphanPid <= 0) continue;
|
||||
if (isPidAlive(orphanPid)) continue;
|
||||
const orphanPath = join(runtimeDir, name);
|
||||
try {
|
||||
if (existsSync(queuePath)) {
|
||||
appendFileSync(queuePath, readFileSync(orphanPath, "utf-8"), "utf-8");
|
||||
unlinkSync(orphanPath);
|
||||
} else {
|
||||
renameSync(orphanPath, queuePath);
|
||||
}
|
||||
} catch {
|
||||
// Best effort only; a later RPC/web probe can retry recovery.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function isPidAlive(pid: number): boolean {
|
||||
try {
|
||||
process.kill(pid, 0);
|
||||
return true;
|
||||
} catch (error) {
|
||||
return (
|
||||
error instanceof Error &&
|
||||
"code" in error &&
|
||||
(error as NodeJS.ErrnoException).code === "EPERM"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
function discoverProjects(root: string, includeProgress: boolean) {
|
||||
const explicitProjects = readExplicitProjectPaths();
|
||||
if (explicitProjects.length > 0) {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue