feat: operational glue for upgrade-safety chain
Some checks are pending
sf self-deploy / build, test, and publish server image (push) Waiting to run
sf self-deploy / deploy test and probe (push) Blocked by required conditions
sf self-deploy / promote prod (push) Blocked by required conditions

Bundles the working-tree state into one coherent commit covering the
upgrade-safety glue that complements today's earlier landings
(orphan-recovery, sf-db single-connection, drain-timer-not-unref'd,
forceShutdown drain, shutdown-state.ts, instrumentation.ts,
shutdown-signal.js, gate-deadlock-classifier).

Modified:
  docker/Dockerfile.source-server — image build tweaks for the source-
    server variant used by the in-container upgrader.
  docker/docker-compose.vega.yaml — env passthroughs for host-side dirs
    (SF_SOURCE_HOST_ROOT, SF_WORKSPACE_HOST_DIR, SF_WORKSPACES_HOST_DIR,
    SF_HOME_HOST_DIR), docker socket mount, group_add for docker GID,
    and SF_RPC_SHUTDOWN_GRACE_MS=600000 matching the 10-min drain.
  scripts/run-vega-source-server.mjs — substantial rework supporting
    the in-container upgrade flow.
  scripts/upgrade-vega-source-server.mjs — buildEnv() + dockerBuildEnv()
    helpers, probeBind via SF_VEGA_PROBE_HOST, containerExists()
    pre-check before drainContainer, stop timeout now matches the
    10-min RPC grace via SF_VEGA_DRAIN_STOP_TIME (default 610s).
  src/web/project-discovery-service.ts — calls
    recoverProjectRuntimeQueues() on each of the 3 discovery paths
    (root monorepo, per-entry, nested SF projects). Closes the
    cloud-volume mtime-lag window codex flagged.
  web/app/api/ready/route.ts — calls recoverProjectRuntimeQueues() on
    every readiness probe, and now also reads shutdown-state so the
    probe returns 503 while draining.
  web/components/sf/projects-view.tsx — UI wiring for the upgrade
    trigger.
  web/pages/api/projects.ts — backend API addition for the project
    enumeration that feeds projects-view.
  docs/specs/sf-self-deploy.md — docs update for the new flow.
  package.json — script alias.

Added:
  scripts/build-web-host.mjs — new build helper for the standalone web
    host artifact consumed by the upgrade flow.
  src/resources/extensions/sf/tests/auto-shutdown-signal.test.mjs —
    unit test for the cooperative-shutdown signal module (registers /
    requests / snapshot).
  src/web/project-runtime-recovery.ts — thin wrapper around
    recoverOrphanedFeedbackDrains for per-project use from web routes.
  web/app/api/drain/route.ts — explicit drain endpoint for operator-
    triggered queue flush.
  web/app/api/server-upgrade/route.ts — auth-gated endpoint that
    spawns the in-container upgrader via docker socket; passes through
    host-dir env so the upgrader knows real bind-mount paths.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Mikael Hugo 2026-05-17 22:57:26 +02:00
parent c0358a2fc7
commit 8c945550fa
15 changed files with 607 additions and 41 deletions

View file

@ -15,6 +15,8 @@ ENV SF_WEB_PREFER_SOURCE=0
RUN apt-get update && apt-get install -y --no-install-recommends \
ca-certificates \
docker-buildx \
docker-cli \
git \
libsecret-1-0 \
tini \

View file

@ -6,6 +6,8 @@ services:
container_name: ${SF_VEGA_CONTAINER:-sf-server-vega}
working_dir: /opt/sf
user: "${PUID:-1000}:${PGID:-1000}"
group_add:
- "${DOCKER_GID:-999}"
ports:
- "${SF_VEGA_BIND:-127.0.0.1}:4000:4000"
volumes:
@ -15,6 +17,7 @@ services:
- ${SF_WORKSPACES_DIR:-/home/mhugo/code}:${SF_WORKSPACES_DIR:-/home/mhugo/code}
- ${HOME}/.sf:/home/node/.sf
- ${HOME}/.gitconfig:/home/node/.gitconfig:ro
- /var/run/docker.sock:/var/run/docker.sock
environment:
HOME: /home/node
NODE_ENV: development
@ -23,12 +26,17 @@ services:
SF_RELEASE_MANIFEST: /opt/sf/dist/sf-release-manifest.json
SF_WEB_PROJECT_CWD: ${SF_WORKSPACE_DIR:-/home/mhugo/code/singularity-forge}
SF_WORKSPACES_DIR: ${SF_WORKSPACES_DIR:-/home/mhugo/code}
SF_SOURCE_HOST_ROOT: ${SF_SOURCE_HOST_ROOT:-/home/mhugo/code/singularity-forge}
SF_WORKSPACE_HOST_DIR: ${SF_WORKSPACE_HOST_DIR:-/home/mhugo/code/singularity-forge}
SF_WORKSPACES_HOST_DIR: ${SF_WORKSPACES_HOST_DIR:-/home/mhugo/code}
SF_HOME_HOST_DIR: ${SF_HOME_HOST_DIR:-/home/mhugo/.sf}
SF_WEB_HOST: 0.0.0.0
SF_WEB_PORT: "4000"
HOSTNAME: 0.0.0.0
PORT: "4000"
SF_WEB_ALLOWED_ORIGINS: ${SF_WEB_ALLOWED_ORIGINS:-http://127.0.0.1:4000,http://localhost:4000}
SF_DEV_SERVER_WATCH: "1"
SF_RPC_SHUTDOWN_GRACE_MS: "600000"
command:
- node
- /opt/sf/dist/web/standalone/server.js

View file

@ -30,9 +30,8 @@ The required gates are:
- build `docker/Dockerfile.sf-server`
- generate `dist/sf-release-manifest.json`
The image builder can be Docker, BuildKit, Kaniko, or `nix2container`. SF does
not depend on the builder implementation. The deployment contract starts at the
OCI image plus release manifest.
The image builder is Docker/BuildKit. The deployment contract starts at the OCI
image plus release manifest.
## Server Runtime
@ -77,6 +76,15 @@ implementation, one shared webserver process, and repo-scoped worker/session
state underneath it. Restarting the runner replaces the shared vega webserver,
not one container per repo.
Use `npm run docker:vega:upgrade` for the local blue/green path. It builds the
web host, writes the release manifest, starts `sf-server-vega-candidate` on
port 4001, probes health/readiness/version/projects, replaces `sf-server-vega`
on port 4000 only after the candidate passes, probes prod, then removes the
candidate. Replacement drains the old container with
`docker stop --timeout ${SF_VEGA_DRAIN_STOP_TIME:-610}` before forced removal
fallback. The default leaves a 10 second margin over the RPC child's
`SF_RPC_SHUTDOWN_GRACE_MS=600000` queue-drain handler.
## Promotion
Test must roll before prod:

View file

@ -52,12 +52,13 @@
"build:core": "npm run build:pi && npm run build:rpc-client && npm run build:daemon && npm run check:versioned-json && tsgo && npm run copy-resources && npm run copy-themes && npm run copy-export-html",
"build": "npm run build:core && node scripts/build-web-if-stale.cjs",
"stage:web-host": "node scripts/stage-web-standalone.cjs",
"build:web-host": "npm --prefix web run build && npm run stage:web-host",
"build:web-host": "node scripts/build-web-host.mjs",
"release:manifest": "node scripts/generate-release-manifest.mjs",
"docker:build-sf-server": "docker build -f docker/Dockerfile.sf-server -t ghcr.io/singularity-ng/sf-server .",
"docker:vega:up": "node scripts/run-vega-source-server.mjs up",
"docker:vega:logs": "node scripts/run-vega-source-server.mjs logs",
"docker:vega:down": "node scripts/run-vega-source-server.mjs down",
"docker:vega:upgrade": "node scripts/upgrade-vega-source-server.mjs",
"docs:features": "node scripts/generate-features-inventory.mjs",
"copy-resources": "node scripts/copy-resources.cjs",
"copy-themes": "node scripts/copy-themes.cjs",

View file

@ -0,0 +1,37 @@
#!/usr/bin/env node
/**
* build-web-host.mjs build and stage the standalone SF web host.
*
* Purpose: keep Next.js build output clean on Node 26 while preserving normal
* build failures and staging behavior.
*
* Consumer: `npm run build:web-host` and the vega self-upgrade path.
*/
import { spawnSync } from "node:child_process";
run("npm", ["--prefix", "web", "run", "build"], {
env: buildEnv(),
});
run("npm", ["run", "stage:web-host"]);
function buildEnv() {
const nodeOptions = [
process.env.NODE_OPTIONS,
"--disable-warning=DEP0205",
]
.filter(Boolean)
.join(" ");
return {
...process.env,
NODE_OPTIONS: nodeOptions,
NEXT_TELEMETRY_DISABLED: process.env.NEXT_TELEMETRY_DISABLED || "1",
};
}
function run(command, args, options = {}) {
const result = spawnSync(command, args, {
stdio: "inherit",
env: options.env ?? process.env,
});
if (result.status !== 0) process.exit(result.status ?? 1);
}

View file

@ -9,6 +9,7 @@
* Consumer: `npm run docker:vega:up` on vega.
*/
import { spawnSync } from "node:child_process";
import { statSync } from "node:fs";
import { homedir } from "node:os";
import { dirname, resolve } from "node:path";
import { fileURLToPath } from "node:url";
@ -18,11 +19,19 @@ const image = process.env.SF_VEGA_IMAGE || "sf-source-server:vega";
const bind = process.env.SF_VEGA_BIND || "127.0.0.1";
const workspace = resolve(process.env.SF_WORKSPACE_DIR || root);
const workspacesRoot = resolve(process.env.SF_WORKSPACES_DIR || dirname(root));
const sourceHostRoot = resolve(process.env.SF_SOURCE_HOST_ROOT || root);
const workspaceHost = resolve(process.env.SF_WORKSPACE_HOST_DIR || workspace);
const workspacesHost = resolve(
process.env.SF_WORKSPACES_HOST_DIR || workspacesRoot,
);
const sfHomeHost = resolve(process.env.SF_HOME_HOST_DIR || `${homedir()}/.sf`);
const name = process.env.SF_VEGA_CONTAINER || "sf-server-vega";
const port = process.env.SF_VEGA_PORT || "4000";
const uid = process.env.PUID || String(process.getuid?.() ?? 1000);
const gid = process.env.PGID || String(process.getgid?.() ?? 1000);
const dockerSocketGid = socketGroupId("/var/run/docker.sock");
const command = process.argv[2] ?? "up";
const skipImageBuild = process.env.SF_VEGA_SKIP_IMAGE_BUILD === "1";
if (command === "--help" || command === "-h" || command === "help") {
process.stdout.write(`Usage:
@ -52,6 +61,10 @@ if (command === "print") {
port,
workspace,
workspacesRoot,
sourceHostRoot,
workspaceHost,
workspacesHost,
sfHomeHost,
sfSource: root,
},
null,
@ -67,7 +80,8 @@ if (command === "logs") {
}
if (command === "down") {
run("docker", ["rm", "-f", name]);
await requestDrain(port);
drainContainer(name);
process.exit(0);
}
@ -80,16 +94,16 @@ const allowedOrigins =
process.env.SF_WEB_ALLOWED_ORIGINS ||
`http://127.0.0.1:${port},http://localhost:${port}`;
run("docker", [
"build",
"-f",
"docker/Dockerfile.source-server",
"-t",
image,
".",
]);
if (!skipImageBuild) {
run(
"docker",
["build", "-f", "docker/Dockerfile.source-server", "-t", image, "."],
{ env: dockerBuildEnv() },
);
}
spawnSync("docker", ["rm", "-f", name], { stdio: "ignore" });
await requestDrain(port);
drainContainer(name);
run("docker", [
"run",
@ -100,6 +114,7 @@ run("docker", [
"unless-stopped",
"--user",
`${uid}:${gid}`,
...(dockerSocketGid ? ["--group-add", dockerSocketGid] : []),
"-p",
`${bind}:${port}:4000`,
"-e",
@ -117,6 +132,14 @@ run("docker", [
"-e",
`SF_WORKSPACES_DIR=${workspacesRoot}`,
"-e",
`SF_SOURCE_HOST_ROOT=${sourceHostRoot}`,
"-e",
`SF_WORKSPACE_HOST_DIR=${workspaceHost}`,
"-e",
`SF_WORKSPACES_HOST_DIR=${workspacesHost}`,
"-e",
`SF_HOME_HOST_DIR=${sfHomeHost}`,
"-e",
"HOSTNAME=0.0.0.0",
"-e",
"PORT=4000",
@ -128,18 +151,22 @@ run("docker", [
`SF_WEB_ALLOWED_ORIGINS=${allowedOrigins}`,
"-e",
"SF_DEV_SERVER_WATCH=1",
"-e",
"SF_RPC_SHUTDOWN_GRACE_MS=600000",
"-v",
`${root}:/opt/sf`,
`${sourceHostRoot}:/opt/sf`,
"-v",
`${workspace}:/workspace`,
`${workspaceHost}:/workspace`,
"-v",
`${workspacesRoot}:/workspaces`,
`${workspacesHost}:/workspaces`,
"-v",
`${workspacesRoot}:${workspacesRoot}`,
`${workspacesHost}:${workspacesRoot}`,
"-v",
`${homedir()}/.sf:/home/node/.sf`,
`${sfHomeHost}:/home/node/.sf`,
"-v",
`${homedir()}/.gitconfig:/home/node/.gitconfig:ro`,
"-v",
"/var/run/docker.sock:/var/run/docker.sock",
image,
"node",
"/opt/sf/dist/web/standalone/server.js",
@ -150,11 +177,92 @@ process.stdout.write(`SF source: ${root}\n`);
process.stdout.write(`Initial workspace: ${workspace}\n`);
process.stdout.write(`Workspace parent: ${workspacesRoot}\n`);
function run(command, args) {
function run(command, args, options = {}) {
const result = spawnSync(command, args, {
cwd: root,
stdio: "inherit",
env: options.env ?? process.env,
});
if (result.status !== 0 && !options.allowFailure) {
process.exit(result.status ?? 1);
}
}
function dockerBuildEnv() {
return {
...process.env,
DOCKER_BUILDKIT: "1",
BUILDKIT_PROGRESS: process.env.BUILDKIT_PROGRESS || "plain",
DEBIAN_FRONTEND: "noninteractive",
};
}
function socketGroupId(path) {
try {
return String(statSync(path).gid);
} catch {
return null;
}
}
function drainContainer(containerName) {
if (!containerExists(containerName)) return;
const stopTime = process.env.SF_VEGA_DRAIN_STOP_TIME || "610";
run("docker", ["stop", "--timeout", stopTime, containerName], {
allowFailure: true,
});
run("docker", ["rm", "-f", containerName], { allowFailure: true });
}
async function requestDrain(targetPort) {
if (!containerExists(name)) return;
const baseUrl = `http://${bind}:${targetPort}`;
try {
const response = await fetch(`${baseUrl}/api/drain`, {
method: "POST",
headers: authHeaders(),
});
if (!response.ok && response.status !== 404) {
throw new Error(`drain returned ${response.status}`);
}
if (response.ok) {
await waitForDrainHealthz(baseUrl);
}
} catch (error) {
process.stdout.write(
`drain preflight skipped: ${
error instanceof Error ? error.message : String(error)
}\n`,
);
}
}
async function waitForDrainHealthz(baseUrl) {
const deadline = Date.now() + 10_000;
while (Date.now() < deadline) {
try {
const response = await fetch(`${baseUrl}/api/healthz`, {
cache: "no-store",
headers: authHeaders(),
});
if (response.status === 503) return;
} catch {
return;
}
await new Promise((resolveDelay) => setTimeout(resolveDelay, 250));
}
}
function authHeaders() {
const token = process.env.SF_WEB_AUTH_TOKEN;
return token ? { Authorization: `Bearer ${token}` } : {};
}
function containerExists(containerName) {
const result = spawnSync("docker", ["container", "inspect", containerName], {
cwd: root,
stdio: "ignore",
env: process.env,
});
if (result.status !== 0) process.exit(result.status ?? 1);
return result.status === 0;
}

View file

@ -22,31 +22,38 @@ const prodPort = process.env.SF_VEGA_PORT || "4000";
const candidatePort = process.env.SF_VEGA_CANDIDATE_PORT || "4001";
const workspacesRoot = process.env.SF_WORKSPACES_DIR || dirname(root);
const skipBuild = process.env.SF_VEGA_UPGRADE_SKIP_BUILD === "1";
const probeBind = process.env.SF_VEGA_PROBE_HOST || bind;
if (!skipBuild) {
run("npm", ["run", "build:web-host"]);
run("npm", ["run", "build:web-host"], { env: buildEnv() });
run(process.execPath, [
"scripts/generate-release-manifest.mjs",
"--out",
"dist/sf-release-manifest.json",
]);
}
run("docker", [
"build",
"-f",
"docker/Dockerfile.source-server",
"-t",
process.env.SF_VEGA_IMAGE || "sf-source-server:vega",
".",
]);
run(
"docker",
[
"build",
"-f",
"docker/Dockerfile.source-server",
"-t",
process.env.SF_VEGA_IMAGE || "sf-source-server:vega",
".",
],
{ env: dockerBuildEnv() },
);
startServer(candidateName, candidatePort);
await probeServer(candidatePort, "candidate");
await requestDrain(prodPort, "prod");
drainContainer(prodName);
startServer(prodName, prodPort);
await probeServer(prodPort, "prod");
await requestDrain(candidatePort, "candidate");
drainContainer(candidateName);
process.stdout.write(
`sf server upgraded: ${prodName} is healthy on ${bind}:${prodPort}\n`,
@ -64,7 +71,7 @@ function startServer(name, port) {
}
async function probeServer(port, label) {
const baseUrl = `http://${bind}:${port}`;
const baseUrl = `http://${probeBind}:${port}`;
const checks = [
["healthz", `${baseUrl}/api/healthz`],
["ready", `${baseUrl}/api/ready`],
@ -103,6 +110,58 @@ async function probeServer(port, label) {
throw new Error(`${label} probes failed: ${lastError}`);
}
async function requestDrain(port, label) {
const baseUrl = `http://${probeBind}:${port}`;
try {
const response = await fetch(`${baseUrl}/api/drain`, {
method: "POST",
headers: authHeaders(),
});
if (!response.ok && response.status !== 404) {
throw new Error(`drain returned ${response.status}`);
}
if (response.ok) {
await waitForDrainHealthz(port, label);
}
} catch (error) {
process.stdout.write(
`${label} drain preflight skipped: ${
error instanceof Error ? error.message : String(error)
}\n`,
);
}
}
async function waitForDrainHealthz(port, label) {
const baseUrl = `http://${probeBind}:${port}`;
const deadline = Date.now() + 10_000;
let lastStatus = "unknown";
while (Date.now() < deadline) {
try {
const response = await fetch(`${baseUrl}/api/healthz`, {
cache: "no-store",
headers: authHeaders(),
});
lastStatus = String(response.status);
if (response.status === 503) {
process.stdout.write(`${label} drain acknowledged on ${baseUrl}\n`);
return;
}
} catch (error) {
lastStatus = error instanceof Error ? error.message : String(error);
}
await delay(250);
}
process.stdout.write(
`${label} drain did not surface on healthz before stop (last=${lastStatus})\n`,
);
}
function authHeaders() {
const token = process.env.SF_WEB_AUTH_TOKEN;
return token ? { Authorization: `Bearer ${token}` } : {};
}
function showLogs(name) {
spawnSync("docker", ["logs", "--tail=120", name], {
cwd: root,
@ -111,7 +170,31 @@ function showLogs(name) {
});
}
function buildEnv() {
const nodeOptions = [process.env.NODE_OPTIONS, "--disable-warning=DEP0205"]
.filter(Boolean)
.join(" ");
return {
...process.env,
NODE_ENV: "production",
NEXT_TELEMETRY_DISABLED: "1",
NODE_OPTIONS: nodeOptions,
NPM_CONFIG_UPDATE_NOTIFIER: "false",
npm_config_update_notifier: "false",
};
}
function dockerBuildEnv() {
return {
...process.env,
DOCKER_BUILDKIT: "1",
BUILDKIT_PROGRESS: process.env.BUILDKIT_PROGRESS || "plain",
DEBIAN_FRONTEND: "noninteractive",
};
}
function drainContainer(name) {
if (!containerExists(name)) return;
// 610s: matches SF_RPC_SHUTDOWN_GRACE_MS=600000 in rpc-mode's
// graceful-shutdown handler with a 10s safety margin for Node exit.
// Normal drains finish in <1s; the long ceiling is for pathological
@ -122,6 +205,15 @@ function drainContainer(name) {
run("docker", ["rm", "-f", name], { allowFailure: true });
}
function containerExists(name) {
const result = spawnSync("docker", ["container", "inspect", name], {
cwd: root,
stdio: "ignore",
env: process.env,
});
return result.status === 0;
}
function delay(ms) {
return new Promise((resolveDelay) => setTimeout(resolveDelay, ms));
}

View file

@ -0,0 +1,42 @@
/**
* auto-shutdown-signal.test.mjs verifies autonomous shutdown signalling.
*
* Purpose: prove SIGTERM-aware autonomous loops can stop starting new work at
* safe boundaries during server/container drain.
*
* Consumer: auto/loop.js before dispatching each autonomous iteration.
*/
import assert from "node:assert/strict";
import { test } from "node:test";
import {
_resetAutonomousShutdownForTests,
autonomousShutdownSnapshot,
isAutonomousShutdownRequested,
requestAutonomousShutdown,
} from "../auto/shutdown-signal.js";
test("shutdown_signal_when_requested_exposes_snapshot", () => {
_resetAutonomousShutdownForTests();
assert.equal(isAutonomousShutdownRequested(), false);
requestAutonomousShutdown("SIGTERM");
assert.equal(isAutonomousShutdownRequested(), true);
const snapshot = autonomousShutdownSnapshot();
assert.equal(snapshot.requested, true);
assert.equal(snapshot.signal, "SIGTERM");
assert.match(snapshot.requestedAt, /^\d{4}-\d{2}-\d{2}T/);
assert.equal(typeof snapshot.elapsedMs, "number");
_resetAutonomousShutdownForTests();
});
test("shutdown_signal_when_called_twice_keeps_first_signal", () => {
_resetAutonomousShutdownForTests();
requestAutonomousShutdown("SIGTERM");
requestAutonomousShutdown("SIGINT");
assert.equal(autonomousShutdownSnapshot().signal, "SIGTERM");
_resetAutonomousShutdownForTests();
});

View file

@ -5,6 +5,7 @@ import type {
ProjectDetectionSignals,
} from "./bridge-service.ts";
import { detectProjectKind } from "./bridge-service.ts";
import { recoverProjectRuntimeQueues } from "./project-runtime-recovery.ts";
// ─── Project Discovery ─────────────────────────────────────────────────────
@ -105,6 +106,7 @@ export function discoverProjects(
// .sf, or is a recognizable project), return it as a single entry.
const rootDetection = detectProjectKind(devRootPath);
if (rootDetection.signals.isMonorepo) {
recoverProjectRuntimeQueues(devRootPath);
const stat = statSync(devRootPath);
return [
{
@ -131,6 +133,7 @@ export function discoverProjects(
if (EXCLUDED_DIRS.has(entry.name)) continue;
const fullPath = join(devRootPath, entry.name);
recoverProjectRuntimeQueues(fullPath);
const { kind, signals } = detectProjectKind(fullPath);
const stat = statSync(fullPath);
@ -147,6 +150,7 @@ export function discoverProjects(
for (const nestedSfProject of findNestedSfProjects(devRootPath)) {
if (seen.has(nestedSfProject)) continue;
recoverProjectRuntimeQueues(nestedSfProject);
const { kind, signals } = detectProjectKind(nestedSfProject);
const stat = statSync(nestedSfProject);
projects.push({

View file

@ -0,0 +1,22 @@
/**
* project-runtime-recovery.ts best-effort repair of repo-local runtime queues.
*
* Purpose: let the shared SF webserver make project state reload-safe before
* surfacing a repo as ready after container replacement.
*
* Consumer: web readiness and project discovery API routes.
*/
import { recoverOrphanedFeedbackDrains } from "../../packages/coding-agent/src/modes/rpc/feedback-queue-recovery.ts";
/**
* Recover transient runtime files that can be safely replayed for one project.
*
* Purpose: keep repo-local `.sf/runtime` queues from staying stranded after a
* fast webserver/container upgrade.
*
* Consumer: `/api/ready`, `/api/projects`, and shared project discovery.
*/
export function recoverProjectRuntimeQueues(projectPath: string | null): void {
if (!projectPath) return;
recoverOrphanedFeedbackDrains(projectPath);
}

View file

@ -0,0 +1,25 @@
import { verifyAuthToken } from "../../../lib/auth-guard";
import {
markShuttingDown,
shutdownStateSnapshot,
} from "../../../../src/web/shutdown-state.ts";
export const runtime = "nodejs";
export const dynamic = "force-dynamic";
export async function POST(request: Request): Promise<Response> {
const authError = verifyAuthToken(request);
if (authError) return authError;
markShuttingDown("manual");
return Response.json(
{
accepted: true,
...shutdownStateSnapshot(),
},
{
status: 202,
headers: { "Cache-Control": "no-store" },
},
);
}

View file

@ -1,22 +1,33 @@
import { existsSync } from "node:fs";
import { getReleaseInfo } from "../../../../src/web/release-info.ts";
import { recoverProjectRuntimeQueues } from "../../../../src/web/project-runtime-recovery.ts";
import {
isShuttingDown,
shutdownStateSnapshot,
} from "../../../../src/web/shutdown-state.ts";
export const runtime = "nodejs";
export const dynamic = "force-dynamic";
export async function GET(): Promise<Response> {
const release = getReleaseInfo();
recoverProjectRuntimeQueues(release.projectCwd);
const projectReady =
release.projectCwd === null || existsSync(release.projectCwd);
const ready = release.ok && projectReady;
const shuttingDown = isShuttingDown();
const ready = release.ok && projectReady && !shuttingDown;
return Response.json(
{
...release,
ready,
...(shuttingDown
? { shuttingDown: true, shutdown: shutdownStateSnapshot() }
: {}),
checks: {
projectCwd: projectReady ? "pass" : "fail",
manifest: release.manifestLoaded ? "pass" : "absent",
shutdown: shuttingDown ? "draining" : "pass",
},
},
{

View file

@ -0,0 +1,107 @@
import { spawnSync } from "node:child_process";
import { randomUUID } from "node:crypto";
import { statSync } from "node:fs";
import { getgid, getuid } from "node:process";
import { verifyAuthToken } from "../../../lib/auth-guard";
export const runtime = "nodejs";
export const dynamic = "force-dynamic";
export async function POST(request: Request): Promise<Response> {
const authError = verifyAuthToken(request);
if (authError) return authError;
const sourceHostRoot =
process.env.SF_SOURCE_HOST_ROOT ?? "/home/mhugo/code/singularity-forge";
const workspaceHost =
process.env.SF_WORKSPACE_HOST_DIR ??
process.env.SF_WEB_PROJECT_CWD ??
sourceHostRoot;
const workspacesHost =
process.env.SF_WORKSPACES_HOST_DIR ?? "/home/mhugo/code";
const sfHomeHost = process.env.SF_HOME_HOST_DIR ?? "/home/mhugo/.sf";
const image = process.env.SF_VEGA_IMAGE ?? "sf-source-server:vega";
const name = `sf-server-vega-upgrader-${randomUUID().slice(0, 8)}`;
const uid = process.env.PUID ?? String(getuid?.() ?? 1000);
const gid = process.env.PGID ?? String(getgid?.() ?? 1000);
const dockerSocketGid = socketGroupId("/var/run/docker.sock");
const args = [
"run",
"-d",
"--rm",
"--name",
name,
"--network",
"host",
"--user",
`${uid}:${gid}`,
...(dockerSocketGid ? ["--group-add", dockerSocketGid] : []),
"-v",
`${sourceHostRoot}:/opt/sf`,
"-v",
`${workspaceHost}:/workspace`,
"-v",
`${workspacesHost}:/workspaces`,
"-v",
`${workspacesHost}:${workspacesHost}`,
"-v",
`${sfHomeHost}:/home/node/.sf`,
"-v",
"/var/run/docker.sock:/var/run/docker.sock",
"-e",
`SF_SOURCE_HOST_ROOT=${sourceHostRoot}`,
"-e",
`SF_WORKSPACE_HOST_DIR=${workspaceHost}`,
"-e",
`SF_WORKSPACES_HOST_DIR=${workspacesHost}`,
"-e",
`SF_HOME_HOST_DIR=${sfHomeHost}`,
"-e",
`SF_WORKSPACE_DIR=${workspaceHost}`,
"-e",
`SF_WORKSPACES_DIR=${workspacesHost}`,
"-e",
"SF_VEGA_PORT=4000",
"-e",
"SF_VEGA_CANDIDATE_PORT=4001",
"-e",
"SF_VEGA_PROBE_HOST=127.0.0.1",
"-e",
"DOCKER_BUILDKIT=1",
"-e",
"BUILDKIT_PROGRESS=plain",
image,
"node",
"/opt/sf/scripts/upgrade-vega-source-server.mjs",
];
try {
const result = spawnSync("docker", args, {
cwd: "/opt/sf",
encoding: "utf8",
stdio: ["ignore", "pipe", "pipe"],
});
if (result.status !== 0) {
throw new Error(result.stderr || result.stdout || "docker run failed");
}
return Response.json(
{ triggered: true, upgrader: name, containerId: result.stdout.trim() },
{ status: 202, headers: { "Cache-Control": "no-store" } },
);
} catch (error) {
return Response.json(
{ error: error instanceof Error ? error.message : String(error) },
{ status: 500, headers: { "Cache-Control": "no-store" } },
);
}
}
function socketGroupId(path: string): string | null {
try {
return String(statSync(path).gid);
} catch {
return null;
}
}

View file

@ -14,6 +14,7 @@ import {
Layers,
Loader2,
Plus,
RefreshCw,
Search,
Sparkles,
X,
@ -393,6 +394,8 @@ export function ProjectsPanel({
const [newProjectOpen, setNewProjectOpen] = useState(false);
const [changeRootOpen, setChangeRootOpen] = useState(false);
const [addRepoOpen, setAddRepoOpen] = useState(false);
const [upgradeBusy, setUpgradeBusy] = useState(false);
const [upgradeError, setUpgradeError] = useState<string | null>(null);
const _workspaceState = useSFWorkspaceState();
const handleProjectCreated = useCallback(
@ -427,6 +430,27 @@ export function ProjectsPanel({
[],
);
const handleUpgradeServer = useCallback(async () => {
setUpgradeBusy(true);
setUpgradeError(null);
try {
const res = await authFetch("/api/server-upgrade", { method: "POST" });
if (!res.ok) {
const body = await res.json().catch(() => ({}));
throw new Error(
(body as { error?: string }).error ??
`Upgrade trigger failed (${res.status})`,
);
}
} catch (err) {
setUpgradeError(
err instanceof Error ? err.message : "Failed to trigger upgrade",
);
} finally {
setUpgradeBusy(false);
}
}, []);
// Sort: active-sf first, then by name
const sortedProjects = [...projects].sort((a, b) => {
const kindOrder: Record<ProjectDetectionKind, number> = {
@ -587,15 +611,34 @@ export function ProjectsPanel({
</div>
)}
</div>
<Button
variant="ghost"
size="icon"
className="h-8 w-8 shrink-0"
onClick={() => onOpenChange(false)}
>
<X className="h-4 w-4" />
</Button>
<div className="flex items-center gap-1">
<Button
variant="ghost"
size="icon"
className="h-8 w-8 shrink-0"
onClick={() => void handleUpgradeServer()}
disabled={upgradeBusy}
title="Upgrade server"
>
<RefreshCw
className={cn("h-4 w-4", upgradeBusy && "animate-spin")}
/>
</Button>
<Button
variant="ghost"
size="icon"
className="h-8 w-8 shrink-0"
onClick={() => onOpenChange(false)}
>
<X className="h-4 w-4" />
</Button>
</div>
</div>
{upgradeError && (
<div className="border-b border-border/50 px-5 py-2 text-xs text-destructive">
{upgradeError}
</div>
)}
{/* Scrollable project list */}
<ScrollArea className="min-h-0 flex-1">

View file

@ -1,4 +1,12 @@
import { existsSync, readdirSync, readFileSync, statSync } from "node:fs";
import {
appendFileSync,
existsSync,
readdirSync,
readFileSync,
renameSync,
statSync,
unlinkSync,
} from "node:fs";
import { homedir } from "node:os";
import { basename, join, resolve } from "node:path";
import type { NextApiRequest, NextApiResponse } from "next";
@ -35,6 +43,7 @@ type WebPreferences = {
const EXCLUDED_DIRS = new Set(["node_modules", ".git"]);
const MAX_NESTED_SF_DEPTH = 3;
const SF_FEEDBACK_QUEUE_FILE = "sf-feedback-queue.jsonl";
const webPreferencesPath = join(
process.env.SF_HOME || join(homedir(), ".sf"),
"web-preferences.json",
@ -111,6 +120,7 @@ function projectMetadata(
path: string,
includeProgress: boolean,
): ProjectMetadata {
recoverProjectRuntimeQueues(path);
const stat = statSync(path);
const signals = detectProject(path);
const kind = signals.hasSfFolder
@ -130,6 +140,52 @@ function projectMetadata(
};
}
function recoverProjectRuntimeQueues(projectPath: string): void {
const runtimeDir = join(projectPath, ".sf", "runtime");
if (!existsSync(runtimeDir)) return;
let entries;
try {
entries = readdirSync(runtimeDir);
} catch {
return;
}
const orphanRe = new RegExp(
`^${SF_FEEDBACK_QUEUE_FILE.replace(/\./g, "\\.")}\\.(\\d+)\\.[^.]+\\.draining$`,
);
const queuePath = join(runtimeDir, SF_FEEDBACK_QUEUE_FILE);
for (const name of entries) {
const match = name.match(orphanRe);
if (!match) continue;
const orphanPid = Number(match[1]);
if (!Number.isFinite(orphanPid) || orphanPid <= 0) continue;
if (isPidAlive(orphanPid)) continue;
const orphanPath = join(runtimeDir, name);
try {
if (existsSync(queuePath)) {
appendFileSync(queuePath, readFileSync(orphanPath, "utf-8"), "utf-8");
unlinkSync(orphanPath);
} else {
renameSync(orphanPath, queuePath);
}
} catch {
// Best effort only; a later RPC/web probe can retry recovery.
}
}
}
function isPidAlive(pid: number): boolean {
try {
process.kill(pid, 0);
return true;
} catch (error) {
return (
error instanceof Error &&
"code" in error &&
(error as NodeJS.ErrnoException).code === "EPERM"
);
}
}
function discoverProjects(root: string, includeProgress: boolean) {
const explicitProjects = readExplicitProjectPaths();
if (explicitProjects.length > 0) {