chore: align workflow + docs with k3s-only deploy path
Some checks are pending
sf self-deploy / build, test, and publish server image (push) Waiting to run
sf self-deploy / deploy test and probe (push) Blocked by required conditions
sf self-deploy / promote prod (push) Blocked by required conditions

Followup to the dead-docker delete: remove `docker:vega:*` package.json
scripts, the projects-view upgrade button, and the docker-compose-vega
sections of sf-self-deploy.md. Self-deploy workflow stays k3s-only
(build → push → deploy-test → deploy-prod via kubectl set image).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Mikael Hugo 2026-05-18 01:04:05 +02:00
parent 743af0e28b
commit e50f2c0af1
5 changed files with 46 additions and 113 deletions

View file

@ -99,23 +99,30 @@ jobs:
name: deploy test and probe
needs: build
runs-on: docker
if: env.KUBECONFIG_B64 != '' && env.SF_TEST_URL != ''
steps:
- name: Configure kubeconfig
run: |
mkdir -p "$HOME/.kube"
printf '%s' "$KUBECONFIG_B64" | base64 -d > "$HOME/.kube/config"
set -euo pipefail
if [ -n "${KUBECONFIG_B64:-}" ]; then
mkdir -p "$HOME/.kube"
printf '%s' "$KUBECONFIG_B64" | base64 -d > "$HOME/.kube/config"
else
kubectl config current-context
fi
- name: Roll test deployment
run: |
set -euo pipefail
kubectl -n "${SF_TEST_NAMESPACE:-sf-server}" set image "deployment/${SF_TEST_DEPLOYMENT:-sf-server-test}" "sf-server=${{ needs.build.outputs.image }}"
kubectl -n "${SF_TEST_NAMESPACE:-sf-server}" rollout status "deployment/${SF_TEST_DEPLOYMENT:-sf-server-test}" --timeout=600s
- name: Probe test server
run: |
curl --fail --silent --show-error "$SF_TEST_URL/api/healthz"
curl --fail --silent --show-error "$SF_TEST_URL/api/ready"
curl --fail --silent --show-error "$SF_TEST_URL/api/version"
set -euo pipefail
test_url="${SF_TEST_URL:-http://vega.ts.hugo.dk:4001}"
curl --fail --silent --show-error "$test_url/api/healthz"
curl --fail --silent --show-error "$test_url/api/ready"
curl --fail --silent --show-error "$test_url/api/version"
deploy-prod:
name: promote prod
@ -123,20 +130,28 @@ jobs:
- build
- deploy-test
runs-on: docker
if: needs.deploy-test.result == 'success' && env.KUBECONFIG_B64 != '' && env.SF_PROD_URL != ''
if: needs.deploy-test.result == 'success'
steps:
- name: Configure kubeconfig
run: |
mkdir -p "$HOME/.kube"
printf '%s' "$KUBECONFIG_B64" | base64 -d > "$HOME/.kube/config"
set -euo pipefail
if [ -n "${KUBECONFIG_B64:-}" ]; then
mkdir -p "$HOME/.kube"
printf '%s' "$KUBECONFIG_B64" | base64 -d > "$HOME/.kube/config"
else
kubectl config current-context
fi
- name: Roll prod deployment
run: |
set -euo pipefail
kubectl -n "${SF_PROD_NAMESPACE:-sf-server}" set image "deployment/${SF_PROD_DEPLOYMENT:-sf-server}" "sf-server=${{ needs.build.outputs.image }}"
kubectl -n "${SF_PROD_NAMESPACE:-sf-server}" rollout status "deployment/${SF_PROD_DEPLOYMENT:-sf-server}" --timeout=600s
- name: Probe prod server
run: |
curl --fail --silent --show-error "$SF_PROD_URL/api/healthz"
curl --fail --silent --show-error "$SF_PROD_URL/api/ready"
curl --fail --silent --show-error "$SF_PROD_URL/api/version"
set -euo pipefail
prod_url="${SF_PROD_URL:-http://vega.ts.hugo.dk:4000}"
curl --fail --silent --show-error "$prod_url/api/healthz"
curl --fail --silent --show-error "$prod_url/api/ready"
curl --fail --silent --show-error "$prod_url/api/version"

View file

@ -134,6 +134,6 @@ Self-deploy is documented in `docs/specs/sf-self-deploy.md`. The short version:
Forgejo builds the source-pinned server image, generates
`dist/sf-release-manifest.json`, rolls test, probes `/api/healthz`,
`/api/ready`, and `/api/version`, then promotes the same image to prod.
On vega, use `npm run docker:vega:up` to run the source-mounted container on
port 4000. It is one shared webserver with SF mounted at `/opt/sf`, the initial
repo at `/workspace`, and the repo parent at `/workspaces`.
On vega, k3s owns the production server on port 4000. It is one shared
webserver with repo switching underneath it; upgrades happen by rolling the
verified image through the `sf-server-test` deployment before `sf-server`.

View file

@ -47,52 +47,16 @@ The web host receives `SF_RELEASE_MANIFEST`, `SF_WEB_PROJECT_CWD`,
`SF_WEB_HOST`, and `SF_WEB_PORT` in its environment. Probes are unauthenticated
so Kubernetes, Traefik, and Forgejo can verify rollouts without a browser token.
On vega, the local production server may run from the live checkout while still
being containerised:
Vega runs this image under k3s. The GitOps manifests live in
`/srv/infra/clusters/default/tenants/hugo/apps/sf-server/` and define one
shared SF webserver deployment plus a test deployment. The shared webserver owns
project switching and repo-scoped worker/session state; it is not one webserver
per repo.
```bash
npm run docker:vega:up
```
That profile runs one shared SF webserver. It mounts this SF checkout at
`/opt/sf`, mounts the initial controlled repo at `/workspace`, mounts the repo
parent at `/workspaces`, also mounts the repo parent at its real host path
(`/home/mhugo/code` on vega), persists `~/.sf`, and binds port 4000 to
`${SF_VEGA_BIND:-127.0.0.1}`. `SF_WORKSPACE_DIR` selects the initial repo; it
defaults to this checkout for dogfooding. `SF_WORKSPACES_DIR` selects the parent
directory available for repo switching and defaults to the parent of this SF
checkout:
```bash
SF_WORKSPACE_DIR=/home/mhugo/code/other-repo SF_WORKSPACES_DIR=/home/mhugo/code npm run docker:vega:up
```
Set `SF_VEGA_BIND` to the vega Tailscale address when the server should be
reachable over Tailscale; do not bind public `0.0.0.0` unless a proxy/firewall
owns access control.
On hosts without the Docker Compose plugin, `npm run docker:vega:up` uses
`scripts/run-vega-source-server.mjs` to build `docker/Dockerfile.source-server`
and run the equivalent `docker run` command directly. This is one SF server
implementation, one shared webserver process, and repo-scoped worker/session
state underneath it. Restarting the runner replaces the shared vega webserver,
not one container per repo.
Use `npm run docker:vega:upgrade` for the local blue/green path. It builds the
web host, writes the release manifest, starts `sf-server-vega-candidate` on
port 4001, probes health/readiness/version/projects, replaces `sf-server-vega`
on port 4000 only after the candidate passes, probes prod, then removes the
candidate. Replacement drains the old container with
`docker stop -t ${SF_VEGA_DRAIN_STOP_TIME:-610}` before forced removal
fallback. The default leaves a 10 second margin over the RPC child's
`SF_RPC_SHUTDOWN_GRACE_MS=600000` queue-drain handler.
Forgejo can trigger this source-mounted path automatically after the build job.
Set repository variable `SF_VEGA_UPGRADE_URL` to the private server base URL
such as `http://vega.ts.hugo.dk:4000`. If the web server has auth enabled, set
secret `SF_VEGA_UPGRADE_TOKEN`; the workflow sends it as a bearer token. The
job posts `/api/server-upgrade`, then polls `/api/ready` until the live server
reports the pushed `GITHUB_SHA`.
The pod mounts persistent SF state and the host repo workspace paths required by
the project picker. Runtime source mutation is not the deploy mechanism. A new
git revision becomes live only after Forgejo builds an image, rolls the test
deployment, probes it, then promotes the same image to prod.
## Promotion
@ -112,16 +76,15 @@ running process mutates its own package tree.
## Reload Model
For a source-mounted vega container, the foreground process is the staged Next
standalone server at `dist/web/standalone/server.js`. Rebuild or restart the
container after changing server/web code. In Kubernetes or k3s, rollout
replacement is the reload mechanism. Long term, CLI commands should call the
server RPC surface by default when a healthy server owns the project, while
local `sf server` remains the bootstrap and recovery path.
In k3s, rollout replacement is the reload mechanism. `/api/healthz` and
`/api/ready` return 503 during shutdown so the service can drain before the old
pod exits. Long term, CLI commands should call the server RPC surface by default
when a healthy server owns the project, while local `sf server` remains the
bootstrap and recovery path.
## Open Work
- Wire `/api/version` into the web footer/admin panel.
- Add an RPC smoke probe once the stable server RPC endpoint is finalized.
- Move the Forgejo workflow's deployment target names into `/srv/infra` GitOps
values when the cluster manifests exist.
- Move any remaining Forgejo deployment target defaults into `/srv/infra`
GitOps values once the app is fully managed there.

View file

@ -55,10 +55,6 @@
"build:web-host": "node scripts/build-web-host.mjs",
"release:manifest": "node scripts/generate-release-manifest.mjs",
"docker:build-sf-server": "docker build -f docker/Dockerfile.sf-server -t ghcr.io/singularity-ng/sf-server .",
"docker:vega:up": "node scripts/run-vega-source-server.mjs up",
"docker:vega:logs": "node scripts/run-vega-source-server.mjs logs",
"docker:vega:down": "node scripts/run-vega-source-server.mjs down",
"docker:vega:upgrade": "node scripts/upgrade-vega-source-server.mjs",
"docs:features": "node scripts/generate-features-inventory.mjs",
"copy-resources": "node scripts/copy-resources.cjs",
"copy-themes": "node scripts/copy-themes.cjs",

View file

@ -14,7 +14,6 @@ import {
Layers,
Loader2,
Plus,
RefreshCw,
Search,
Sparkles,
X,
@ -394,8 +393,6 @@ export function ProjectsPanel({
const [newProjectOpen, setNewProjectOpen] = useState(false);
const [changeRootOpen, setChangeRootOpen] = useState(false);
const [addRepoOpen, setAddRepoOpen] = useState(false);
const [upgradeBusy, setUpgradeBusy] = useState(false);
const [upgradeError, setUpgradeError] = useState<string | null>(null);
const _workspaceState = useSFWorkspaceState();
const handleProjectCreated = useCallback(
@ -430,27 +427,6 @@ export function ProjectsPanel({
[],
);
const handleUpgradeServer = useCallback(async () => {
setUpgradeBusy(true);
setUpgradeError(null);
try {
const res = await authFetch("/api/server-upgrade", { method: "POST" });
if (!res.ok) {
const body = await res.json().catch(() => ({}));
throw new Error(
(body as { error?: string }).error ??
`Upgrade trigger failed (${res.status})`,
);
}
} catch (err) {
setUpgradeError(
err instanceof Error ? err.message : "Failed to trigger upgrade",
);
} finally {
setUpgradeBusy(false);
}
}, []);
// Sort: active-sf first, then by name
const sortedProjects = [...projects].sort((a, b) => {
const kindOrder: Record<ProjectDetectionKind, number> = {
@ -612,18 +588,6 @@ export function ProjectsPanel({
)}
</div>
<div className="flex items-center gap-1">
<Button
variant="ghost"
size="icon"
className="h-8 w-8 shrink-0"
onClick={() => void handleUpgradeServer()}
disabled={upgradeBusy}
title="Upgrade server"
>
<RefreshCw
className={cn("h-4 w-4", upgradeBusy && "animate-spin")}
/>
</Button>
<Button
variant="ghost"
size="icon"
@ -634,11 +598,6 @@ export function ProjectsPanel({
</Button>
</div>
</div>
{upgradeError && (
<div className="border-b border-border/50 px-5 py-2 text-xs text-destructive">
{upgradeError}
</div>
)}
{/* Scrollable project list */}
<ScrollArea className="min-h-0 flex-1">