diff --git a/.forgejo/workflows/self-deploy.yml b/.forgejo/workflows/self-deploy.yml index 2580ba778..d8ea6551b 100644 --- a/.forgejo/workflows/self-deploy.yml +++ b/.forgejo/workflows/self-deploy.yml @@ -99,23 +99,30 @@ jobs: name: deploy test and probe needs: build runs-on: docker - if: env.KUBECONFIG_B64 != '' && env.SF_TEST_URL != '' steps: - name: Configure kubeconfig run: | - mkdir -p "$HOME/.kube" - printf '%s' "$KUBECONFIG_B64" | base64 -d > "$HOME/.kube/config" + set -euo pipefail + if [ -n "${KUBECONFIG_B64:-}" ]; then + mkdir -p "$HOME/.kube" + printf '%s' "$KUBECONFIG_B64" | base64 -d > "$HOME/.kube/config" + else + kubectl config current-context + fi - name: Roll test deployment run: | + set -euo pipefail kubectl -n "${SF_TEST_NAMESPACE:-sf-server}" set image "deployment/${SF_TEST_DEPLOYMENT:-sf-server-test}" "sf-server=${{ needs.build.outputs.image }}" kubectl -n "${SF_TEST_NAMESPACE:-sf-server}" rollout status "deployment/${SF_TEST_DEPLOYMENT:-sf-server-test}" --timeout=600s - name: Probe test server run: | - curl --fail --silent --show-error "$SF_TEST_URL/api/healthz" - curl --fail --silent --show-error "$SF_TEST_URL/api/ready" - curl --fail --silent --show-error "$SF_TEST_URL/api/version" + set -euo pipefail + test_url="${SF_TEST_URL:-http://vega.ts.hugo.dk:4001}" + curl --fail --silent --show-error "$test_url/api/healthz" + curl --fail --silent --show-error "$test_url/api/ready" + curl --fail --silent --show-error "$test_url/api/version" deploy-prod: name: promote prod @@ -123,20 +130,28 @@ jobs: - build - deploy-test runs-on: docker - if: needs.deploy-test.result == 'success' && env.KUBECONFIG_B64 != '' && env.SF_PROD_URL != '' + if: needs.deploy-test.result == 'success' steps: - name: Configure kubeconfig run: | - mkdir -p "$HOME/.kube" - printf '%s' "$KUBECONFIG_B64" | base64 -d > "$HOME/.kube/config" + set -euo pipefail + if [ -n "${KUBECONFIG_B64:-}" ]; then + mkdir -p "$HOME/.kube" + printf '%s' "$KUBECONFIG_B64" | base64 -d > "$HOME/.kube/config" + else + kubectl config current-context + fi - name: Roll prod deployment run: | + set -euo pipefail kubectl -n "${SF_PROD_NAMESPACE:-sf-server}" set image "deployment/${SF_PROD_DEPLOYMENT:-sf-server}" "sf-server=${{ needs.build.outputs.image }}" kubectl -n "${SF_PROD_NAMESPACE:-sf-server}" rollout status "deployment/${SF_PROD_DEPLOYMENT:-sf-server}" --timeout=600s - name: Probe prod server run: | - curl --fail --silent --show-error "$SF_PROD_URL/api/healthz" - curl --fail --silent --show-error "$SF_PROD_URL/api/ready" - curl --fail --silent --show-error "$SF_PROD_URL/api/version" + set -euo pipefail + prod_url="${SF_PROD_URL:-http://vega.ts.hugo.dk:4000}" + curl --fail --silent --show-error "$prod_url/api/healthz" + curl --fail --silent --show-error "$prod_url/api/ready" + curl --fail --silent --show-error "$prod_url/api/version" diff --git a/CLAUDE.md b/CLAUDE.md index d143b2219..ba2d7dfc9 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -134,6 +134,6 @@ Self-deploy is documented in `docs/specs/sf-self-deploy.md`. The short version: Forgejo builds the source-pinned server image, generates `dist/sf-release-manifest.json`, rolls test, probes `/api/healthz`, `/api/ready`, and `/api/version`, then promotes the same image to prod. -On vega, use `npm run docker:vega:up` to run the source-mounted container on -port 4000. It is one shared webserver with SF mounted at `/opt/sf`, the initial -repo at `/workspace`, and the repo parent at `/workspaces`. +On vega, k3s owns the production server on port 4000. It is one shared +webserver with repo switching underneath it; upgrades happen by rolling the +verified image through the `sf-server-test` deployment before `sf-server`. diff --git a/docs/specs/sf-self-deploy.md b/docs/specs/sf-self-deploy.md index d47072c6d..c9e449787 100644 --- a/docs/specs/sf-self-deploy.md +++ b/docs/specs/sf-self-deploy.md @@ -47,52 +47,16 @@ The web host receives `SF_RELEASE_MANIFEST`, `SF_WEB_PROJECT_CWD`, `SF_WEB_HOST`, and `SF_WEB_PORT` in its environment. Probes are unauthenticated so Kubernetes, Traefik, and Forgejo can verify rollouts without a browser token. -On vega, the local production server may run from the live checkout while still -being containerised: +Vega runs this image under k3s. The GitOps manifests live in +`/srv/infra/clusters/default/tenants/hugo/apps/sf-server/` and define one +shared SF webserver deployment plus a test deployment. The shared webserver owns +project switching and repo-scoped worker/session state; it is not one webserver +per repo. -```bash -npm run docker:vega:up -``` - -That profile runs one shared SF webserver. It mounts this SF checkout at -`/opt/sf`, mounts the initial controlled repo at `/workspace`, mounts the repo -parent at `/workspaces`, also mounts the repo parent at its real host path -(`/home/mhugo/code` on vega), persists `~/.sf`, and binds port 4000 to -`${SF_VEGA_BIND:-127.0.0.1}`. `SF_WORKSPACE_DIR` selects the initial repo; it -defaults to this checkout for dogfooding. `SF_WORKSPACES_DIR` selects the parent -directory available for repo switching and defaults to the parent of this SF -checkout: - -```bash -SF_WORKSPACE_DIR=/home/mhugo/code/other-repo SF_WORKSPACES_DIR=/home/mhugo/code npm run docker:vega:up -``` - -Set `SF_VEGA_BIND` to the vega Tailscale address when the server should be -reachable over Tailscale; do not bind public `0.0.0.0` unless a proxy/firewall -owns access control. - -On hosts without the Docker Compose plugin, `npm run docker:vega:up` uses -`scripts/run-vega-source-server.mjs` to build `docker/Dockerfile.source-server` -and run the equivalent `docker run` command directly. This is one SF server -implementation, one shared webserver process, and repo-scoped worker/session -state underneath it. Restarting the runner replaces the shared vega webserver, -not one container per repo. - -Use `npm run docker:vega:upgrade` for the local blue/green path. It builds the -web host, writes the release manifest, starts `sf-server-vega-candidate` on -port 4001, probes health/readiness/version/projects, replaces `sf-server-vega` -on port 4000 only after the candidate passes, probes prod, then removes the -candidate. Replacement drains the old container with -`docker stop -t ${SF_VEGA_DRAIN_STOP_TIME:-610}` before forced removal -fallback. The default leaves a 10 second margin over the RPC child's -`SF_RPC_SHUTDOWN_GRACE_MS=600000` queue-drain handler. - -Forgejo can trigger this source-mounted path automatically after the build job. -Set repository variable `SF_VEGA_UPGRADE_URL` to the private server base URL -such as `http://vega.ts.hugo.dk:4000`. If the web server has auth enabled, set -secret `SF_VEGA_UPGRADE_TOKEN`; the workflow sends it as a bearer token. The -job posts `/api/server-upgrade`, then polls `/api/ready` until the live server -reports the pushed `GITHUB_SHA`. +The pod mounts persistent SF state and the host repo workspace paths required by +the project picker. Runtime source mutation is not the deploy mechanism. A new +git revision becomes live only after Forgejo builds an image, rolls the test +deployment, probes it, then promotes the same image to prod. ## Promotion @@ -112,16 +76,15 @@ running process mutates its own package tree. ## Reload Model -For a source-mounted vega container, the foreground process is the staged Next -standalone server at `dist/web/standalone/server.js`. Rebuild or restart the -container after changing server/web code. In Kubernetes or k3s, rollout -replacement is the reload mechanism. Long term, CLI commands should call the -server RPC surface by default when a healthy server owns the project, while -local `sf server` remains the bootstrap and recovery path. +In k3s, rollout replacement is the reload mechanism. `/api/healthz` and +`/api/ready` return 503 during shutdown so the service can drain before the old +pod exits. Long term, CLI commands should call the server RPC surface by default +when a healthy server owns the project, while local `sf server` remains the +bootstrap and recovery path. ## Open Work - Wire `/api/version` into the web footer/admin panel. - Add an RPC smoke probe once the stable server RPC endpoint is finalized. -- Move the Forgejo workflow's deployment target names into `/srv/infra` GitOps - values when the cluster manifests exist. +- Move any remaining Forgejo deployment target defaults into `/srv/infra` + GitOps values once the app is fully managed there. diff --git a/package.json b/package.json index 7d3be896d..73db4a693 100644 --- a/package.json +++ b/package.json @@ -55,10 +55,6 @@ "build:web-host": "node scripts/build-web-host.mjs", "release:manifest": "node scripts/generate-release-manifest.mjs", "docker:build-sf-server": "docker build -f docker/Dockerfile.sf-server -t ghcr.io/singularity-ng/sf-server .", - "docker:vega:up": "node scripts/run-vega-source-server.mjs up", - "docker:vega:logs": "node scripts/run-vega-source-server.mjs logs", - "docker:vega:down": "node scripts/run-vega-source-server.mjs down", - "docker:vega:upgrade": "node scripts/upgrade-vega-source-server.mjs", "docs:features": "node scripts/generate-features-inventory.mjs", "copy-resources": "node scripts/copy-resources.cjs", "copy-themes": "node scripts/copy-themes.cjs", diff --git a/web/components/sf/projects-view.tsx b/web/components/sf/projects-view.tsx index 106b207de..72442f6e7 100644 --- a/web/components/sf/projects-view.tsx +++ b/web/components/sf/projects-view.tsx @@ -14,7 +14,6 @@ import { Layers, Loader2, Plus, - RefreshCw, Search, Sparkles, X, @@ -394,8 +393,6 @@ export function ProjectsPanel({ const [newProjectOpen, setNewProjectOpen] = useState(false); const [changeRootOpen, setChangeRootOpen] = useState(false); const [addRepoOpen, setAddRepoOpen] = useState(false); - const [upgradeBusy, setUpgradeBusy] = useState(false); - const [upgradeError, setUpgradeError] = useState(null); const _workspaceState = useSFWorkspaceState(); const handleProjectCreated = useCallback( @@ -430,27 +427,6 @@ export function ProjectsPanel({ [], ); - const handleUpgradeServer = useCallback(async () => { - setUpgradeBusy(true); - setUpgradeError(null); - try { - const res = await authFetch("/api/server-upgrade", { method: "POST" }); - if (!res.ok) { - const body = await res.json().catch(() => ({})); - throw new Error( - (body as { error?: string }).error ?? - `Upgrade trigger failed (${res.status})`, - ); - } - } catch (err) { - setUpgradeError( - err instanceof Error ? err.message : "Failed to trigger upgrade", - ); - } finally { - setUpgradeBusy(false); - } - }, []); - // Sort: active-sf first, then by name const sortedProjects = [...projects].sort((a, b) => { const kindOrder: Record = { @@ -612,18 +588,6 @@ export function ProjectsPanel({ )}
-
- {upgradeError && ( -
- {upgradeError} -
- )} {/* Scrollable project list */}