chore: align workflow + docs with k3s-only deploy path

Followup to the dead-docker delete: remove `docker:vega:*` package.json scripts, the projects-view upgrade button, and the docker-compose-vega sections of sf-self-deploy.md. Self-deploy workflow stays k3s-only (build → push → deploy-test → deploy-prod via kubectl set image). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-18 01:04:05 +02:00 · 2026-05-18 01:04:05 +02:00 · e50f2c0af1
commit e50f2c0af1
parent 743af0e28b
5 changed files with 46 additions and 113 deletions
--- a/.forgejo/workflows/self-deploy.yml
+++ b/.forgejo/workflows/self-deploy.yml
@ -99,23 +99,30 @@ jobs:
    name: deploy test and probe
    needs: build
    runs-on: docker
-    if: env.KUBECONFIG_B64 != '' && env.SF_TEST_URL != ''
    steps:
      - name: Configure kubeconfig
        run: |
-          mkdir -p "$HOME/.kube"
-          printf '%s' "$KUBECONFIG_B64" | base64 -d > "$HOME/.kube/config"
+          set -euo pipefail
+          if [ -n "${KUBECONFIG_B64:-}" ]; then
+            mkdir -p "$HOME/.kube"
+            printf '%s' "$KUBECONFIG_B64" | base64 -d > "$HOME/.kube/config"
+          else
+            kubectl config current-context
+          fi

      - name: Roll test deployment
        run: |
+          set -euo pipefail
          kubectl -n "${SF_TEST_NAMESPACE:-sf-server}" set image "deployment/${SF_TEST_DEPLOYMENT:-sf-server-test}" "sf-server=${{ needs.build.outputs.image }}"
          kubectl -n "${SF_TEST_NAMESPACE:-sf-server}" rollout status "deployment/${SF_TEST_DEPLOYMENT:-sf-server-test}" --timeout=600s

      - name: Probe test server
        run: |
-          curl --fail --silent --show-error "$SF_TEST_URL/api/healthz"
-          curl --fail --silent --show-error "$SF_TEST_URL/api/ready"
-          curl --fail --silent --show-error "$SF_TEST_URL/api/version"
+          set -euo pipefail
+          test_url="${SF_TEST_URL:-http://vega.ts.hugo.dk:4001}"
+          curl --fail --silent --show-error "$test_url/api/healthz"
+          curl --fail --silent --show-error "$test_url/api/ready"
+          curl --fail --silent --show-error "$test_url/api/version"

  deploy-prod:
    name: promote prod
@ -123,20 +130,28 @@ jobs:
      - build
      - deploy-test
    runs-on: docker
-    if: needs.deploy-test.result == 'success' && env.KUBECONFIG_B64 != '' && env.SF_PROD_URL != ''
+    if: needs.deploy-test.result == 'success'
    steps:
      - name: Configure kubeconfig
        run: |
-          mkdir -p "$HOME/.kube"
-          printf '%s' "$KUBECONFIG_B64" | base64 -d > "$HOME/.kube/config"
+          set -euo pipefail
+          if [ -n "${KUBECONFIG_B64:-}" ]; then
+            mkdir -p "$HOME/.kube"
+            printf '%s' "$KUBECONFIG_B64" | base64 -d > "$HOME/.kube/config"
+          else
+            kubectl config current-context
+          fi

      - name: Roll prod deployment
        run: |
+          set -euo pipefail
          kubectl -n "${SF_PROD_NAMESPACE:-sf-server}" set image "deployment/${SF_PROD_DEPLOYMENT:-sf-server}" "sf-server=${{ needs.build.outputs.image }}"
          kubectl -n "${SF_PROD_NAMESPACE:-sf-server}" rollout status "deployment/${SF_PROD_DEPLOYMENT:-sf-server}" --timeout=600s

      - name: Probe prod server
        run: |
-          curl --fail --silent --show-error "$SF_PROD_URL/api/healthz"
-          curl --fail --silent --show-error "$SF_PROD_URL/api/ready"
-          curl --fail --silent --show-error "$SF_PROD_URL/api/version"
+          set -euo pipefail
+          prod_url="${SF_PROD_URL:-http://vega.ts.hugo.dk:4000}"
+          curl --fail --silent --show-error "$prod_url/api/healthz"
+          curl --fail --silent --show-error "$prod_url/api/ready"
+          curl --fail --silent --show-error "$prod_url/api/version"
--- a/CLAUDE.md
+++ b/CLAUDE.md
@ -134,6 +134,6 @@ Self-deploy is documented in `docs/specs/sf-self-deploy.md`. The short version:
 Forgejo builds the source-pinned server image, generates
 `dist/sf-release-manifest.json`, rolls test, probes `/api/healthz`,
 `/api/ready`, and `/api/version`, then promotes the same image to prod.
-On vega, use `npm run docker:vega:up` to run the source-mounted container on
-port 4000. It is one shared webserver with SF mounted at `/opt/sf`, the initial
-repo at `/workspace`, and the repo parent at `/workspaces`.
+On vega, k3s owns the production server on port 4000. It is one shared
+webserver with repo switching underneath it; upgrades happen by rolling the
+verified image through the `sf-server-test` deployment before `sf-server`.
--- a/docs/specs/sf-self-deploy.md
+++ b/docs/specs/sf-self-deploy.md
@ -47,52 +47,16 @@ The web host receives `SF_RELEASE_MANIFEST`, `SF_WEB_PROJECT_CWD`,
 `SF_WEB_HOST`, and `SF_WEB_PORT` in its environment. Probes are unauthenticated
 so Kubernetes, Traefik, and Forgejo can verify rollouts without a browser token.

-On vega, the local production server may run from the live checkout while still
-being containerised:
+Vega runs this image under k3s. The GitOps manifests live in
+`/srv/infra/clusters/default/tenants/hugo/apps/sf-server/` and define one
+shared SF webserver deployment plus a test deployment. The shared webserver owns
+project switching and repo-scoped worker/session state; it is not one webserver
+per repo.

-```bash
-npm run docker:vega:up
-```
-
-That profile runs one shared SF webserver. It mounts this SF checkout at
-`/opt/sf`, mounts the initial controlled repo at `/workspace`, mounts the repo
-parent at `/workspaces`, also mounts the repo parent at its real host path
-(`/home/mhugo/code` on vega), persists `~/.sf`, and binds port 4000 to
-`${SF_VEGA_BIND:-127.0.0.1}`. `SF_WORKSPACE_DIR` selects the initial repo; it
-defaults to this checkout for dogfooding. `SF_WORKSPACES_DIR` selects the parent
-directory available for repo switching and defaults to the parent of this SF
-checkout:
-
-```bash
-SF_WORKSPACE_DIR=/home/mhugo/code/other-repo SF_WORKSPACES_DIR=/home/mhugo/code npm run docker:vega:up
-```
-
-Set `SF_VEGA_BIND` to the vega Tailscale address when the server should be
-reachable over Tailscale; do not bind public `0.0.0.0` unless a proxy/firewall
-owns access control.
-
-On hosts without the Docker Compose plugin, `npm run docker:vega:up` uses
-`scripts/run-vega-source-server.mjs` to build `docker/Dockerfile.source-server`
-and run the equivalent `docker run` command directly. This is one SF server
-implementation, one shared webserver process, and repo-scoped worker/session
-state underneath it. Restarting the runner replaces the shared vega webserver,
-not one container per repo.
-
-Use `npm run docker:vega:upgrade` for the local blue/green path. It builds the
-web host, writes the release manifest, starts `sf-server-vega-candidate` on
-port 4001, probes health/readiness/version/projects, replaces `sf-server-vega`
-on port 4000 only after the candidate passes, probes prod, then removes the
-candidate. Replacement drains the old container with
-`docker stop -t ${SF_VEGA_DRAIN_STOP_TIME:-610}` before forced removal
-fallback. The default leaves a 10 second margin over the RPC child's
-`SF_RPC_SHUTDOWN_GRACE_MS=600000` queue-drain handler.
-
-Forgejo can trigger this source-mounted path automatically after the build job.
-Set repository variable `SF_VEGA_UPGRADE_URL` to the private server base URL
-such as `http://vega.ts.hugo.dk:4000`. If the web server has auth enabled, set
-secret `SF_VEGA_UPGRADE_TOKEN`; the workflow sends it as a bearer token. The
-job posts `/api/server-upgrade`, then polls `/api/ready` until the live server
-reports the pushed `GITHUB_SHA`.
+The pod mounts persistent SF state and the host repo workspace paths required by
+the project picker. Runtime source mutation is not the deploy mechanism. A new
+git revision becomes live only after Forgejo builds an image, rolls the test
+deployment, probes it, then promotes the same image to prod.

 ## Promotion

@ -112,16 +76,15 @@ running process mutates its own package tree.

 ## Reload Model

-For a source-mounted vega container, the foreground process is the staged Next
-standalone server at `dist/web/standalone/server.js`. Rebuild or restart the
-container after changing server/web code. In Kubernetes or k3s, rollout
-replacement is the reload mechanism. Long term, CLI commands should call the
-server RPC surface by default when a healthy server owns the project, while
-local `sf server` remains the bootstrap and recovery path.
+In k3s, rollout replacement is the reload mechanism. `/api/healthz` and
+`/api/ready` return 503 during shutdown so the service can drain before the old
+pod exits. Long term, CLI commands should call the server RPC surface by default
+when a healthy server owns the project, while local `sf server` remains the
+bootstrap and recovery path.

 ## Open Work

 - Wire `/api/version` into the web footer/admin panel.
 - Add an RPC smoke probe once the stable server RPC endpoint is finalized.
- Move the Forgejo workflow's deployment target names into `/srv/infra` GitOps
-  values when the cluster manifests exist.
+- Move any remaining Forgejo deployment target defaults into `/srv/infra`
+  GitOps values once the app is fully managed there.
--- a/package.json
+++ b/package.json
@ -55,10 +55,6 @@
 		"build:web-host": "node scripts/build-web-host.mjs",
 		"release:manifest": "node scripts/generate-release-manifest.mjs",
 		"docker:build-sf-server": "docker build -f docker/Dockerfile.sf-server -t ghcr.io/singularity-ng/sf-server .",
-		"docker:vega:up": "node scripts/run-vega-source-server.mjs up",
-		"docker:vega:logs": "node scripts/run-vega-source-server.mjs logs",
-		"docker:vega:down": "node scripts/run-vega-source-server.mjs down",
-		"docker:vega:upgrade": "node scripts/upgrade-vega-source-server.mjs",
 		"docs:features": "node scripts/generate-features-inventory.mjs",
 		"copy-resources": "node scripts/copy-resources.cjs",
 		"copy-themes": "node scripts/copy-themes.cjs",
--- a/web/components/sf/projects-view.tsx
+++ b/web/components/sf/projects-view.tsx
@ -14,7 +14,6 @@ import {
 	Layers,
 	Loader2,
 	Plus,
-	RefreshCw,
 	Search,
 	Sparkles,
 	X,
@ -394,8 +393,6 @@ export function ProjectsPanel({
 	const [newProjectOpen, setNewProjectOpen] = useState(false);
 	const [changeRootOpen, setChangeRootOpen] = useState(false);
 	const [addRepoOpen, setAddRepoOpen] = useState(false);
-	const [upgradeBusy, setUpgradeBusy] = useState(false);
-	const [upgradeError, setUpgradeError] = useState<string | null>(null);
 	const _workspaceState = useSFWorkspaceState();

 	const handleProjectCreated = useCallback(
@ -430,27 +427,6 @@ export function ProjectsPanel({
 		[],
 	);

-	const handleUpgradeServer = useCallback(async () => {
-		setUpgradeBusy(true);
-		setUpgradeError(null);
-		try {
-			const res = await authFetch("/api/server-upgrade", { method: "POST" });
-			if (!res.ok) {
-				const body = await res.json().catch(() => ({}));
-				throw new Error(
-					(body as { error?: string }).error ??
-						`Upgrade trigger failed (${res.status})`,
-				);
-			}
-		} catch (err) {
-			setUpgradeError(
-				err instanceof Error ? err.message : "Failed to trigger upgrade",
-			);
-		} finally {
-			setUpgradeBusy(false);
-		}
-	}, []);
-
 	// Sort: active-sf first, then by name
 	const sortedProjects = [...projects].sort((a, b) => {
 		const kindOrder: Record<ProjectDetectionKind, number> = {
@ -612,18 +588,6 @@ export function ProjectsPanel({
 						)}
 					</div>
 					<div className="flex items-center gap-1">
-						<Button
-							variant="ghost"
-							size="icon"
-							className="h-8 w-8 shrink-0"
-							onClick={() => void handleUpgradeServer()}
-							disabled={upgradeBusy}
-							title="Upgrade server"
-						>
-							<RefreshCw
-								className={cn("h-4 w-4", upgradeBusy && "animate-spin")}
-							/>
-						</Button>
 						<Button
 							variant="ghost"
 							size="icon"
@ -634,11 +598,6 @@ export function ProjectsPanel({
 						</Button>
 					</div>
 				</div>
-				{upgradeError && (
-					<div className="border-b border-border/50 px-5 py-2 text-xs text-destructive">
-						{upgradeError}
-					</div>
-				)}

 				{/* Scrollable project list */}
 				<ScrollArea className="min-h-0 flex-1">