From c1c7f8b6b0b34e1c1f41d23afd0b6405f55842f5 Mon Sep 17 00:00:00 2001 From: Tom Boucher Date: Sat, 21 Mar 2026 10:43:56 -0400 Subject: [PATCH] perf(ci): reduce pipeline minutes with shallow clones, npm caching, and exponential backoff (#1700) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CI workflow: - Replace fetch-depth: 0 with shallow clones (depth 1-2) in lint and build jobs — saves ~30-60s per job - Remove fetch-depth: 0 from build and windows-portability (default depth 1 is sufficient for build/test) Pipeline workflow: - Add cache: 'npm' to dev-publish, test-verify, and prod-release setup-node steps — saves ~1-2 min per job on npm ci - Move ${{ }} expressions from run: blocks to env: variables in prod-release and update-builder to prevent command injection vectors - Use fetch-depth: 2 in update-builder (only needs parent diff) Build-native workflow: - Replace hardcoded sleep 30 + single verification with exponential backoff polling (5s → 10s → 20s → 30s cap, max 5 attempts) - Replace fixed 15s retry intervals in post-publish smoke test with exponential backoff (5s → 10s → 20s → 30s cap, 8 attempts) - Replace fixed 15s dist-tag verification loop with exponential backoff (6 attempts vs 10 × 15s) Estimated savings: ~5-10 min per full CI+pipeline run, ~1-3 min per native build publish. Co-authored-by: Claude Opus 4.6 (1M context) Co-authored-by: TÂCHES --- .github/workflows/build-native.yml | 82 +++++++++++++++++++----------- .github/workflows/ci.yml | 6 +-- .github/workflows/pipeline.yml | 56 +++++++++++++------- docs/ci-cd-pipeline.md | 5 ++ 4 files changed, 95 insertions(+), 54 deletions(-) diff --git a/.github/workflows/build-native.yml b/.github/workflows/build-native.yml index c74bc6f70..3d3bcd9b9 100644 --- a/.github/workflows/build-native.yml +++ b/.github/workflows/build-native.yml @@ -156,29 +156,44 @@ jobs: cd "$GITHUB_WORKSPACE" done - - name: Wait for npm registry propagation - run: sleep 30 - - name: Verify platform packages are published run: | VERSION=$(node -p "require('./package.json').version") echo "Verifying platform packages at version ${VERSION}..." - FAILED=0 - for platform in darwin-arm64 darwin-x64 linux-x64-gnu linux-arm64-gnu win32-x64-msvc; do - PKG="@gsd-build/engine-${platform}" - PUBLISHED=$(npm view "${PKG}@${VERSION}" version 2>/dev/null || echo "") - if [ "${PUBLISHED}" = "${VERSION}" ]; then - echo " ✓ ${PKG}@${VERSION}" - else - echo "::error::${PKG}@${VERSION} not found on npm (got: '${PUBLISHED}')" - FAILED=1 + # Exponential backoff: 5s, 10s, 20s, 30s, 30s (max 5 attempts, ~95s worst case vs fixed 30s + single check) + DELAY=5 + for attempt in $(seq 1 5); do + FAILED=0 + for platform in darwin-arm64 darwin-x64 linux-x64-gnu linux-arm64-gnu win32-x64-msvc; do + PKG="@gsd-build/engine-${platform}" + PUBLISHED=$(npm view "${PKG}@${VERSION}" version 2>/dev/null || echo "") + if [ "${PUBLISHED}" != "${VERSION}" ]; then + FAILED=1 + break + fi + done + if [ "${FAILED}" = "0" ]; then + echo "All platform packages verified (attempt ${attempt})." + break fi + if [ "$attempt" = "5" ]; then + echo "::error::One or more platform packages not found after 5 attempts. Aborting." + for platform in darwin-arm64 darwin-x64 linux-x64-gnu linux-arm64-gnu win32-x64-msvc; do + PKG="@gsd-build/engine-${platform}" + PUBLISHED=$(npm view "${PKG}@${VERSION}" version 2>/dev/null || echo "") + if [ "${PUBLISHED}" = "${VERSION}" ]; then + echo " ✓ ${PKG}@${VERSION}" + else + echo " ✗ ${PKG}@${VERSION} (got: '${PUBLISHED}')" + fi + done + exit 1 + fi + echo " Attempt ${attempt}: not all packages visible yet, retrying in ${DELAY}s..." + sleep "$DELAY" + DELAY=$((DELAY * 2)) + if [ "$DELAY" -gt 30 ]; then DELAY=30; fi done - if [ "${FAILED}" = "1" ]; then - echo "::error::One or more platform packages are missing from npm. Aborting main package publish to prevent broken installs." - exit 1 - fi - echo "All platform packages verified." - name: Install dependencies run: npm ci @@ -213,28 +228,31 @@ jobs: cd "$TMPDIR" npm init -y > /dev/null 2>&1 - # Wait for npm registry to show the new version (metadata propagation) + # Wait for npm registry with exponential backoff (5s, 10s, 20s, 30s, 30s, 30s, 30s — max ~155s vs fixed 5min) echo "Waiting for gsd-pi@${VERSION} to appear on npm..." - for attempt in $(seq 1 20); do + DELAY=5 + for attempt in $(seq 1 8); do PUBLISHED=$(npm view "gsd-pi@${VERSION}" version 2>/dev/null || echo "") if [ "${PUBLISHED}" = "${VERSION}" ]; then echo " ✓ Version ${VERSION} visible on npm (attempt ${attempt})" break fi - if [ "$attempt" = "20" ]; then - echo "::warning::gsd-pi@${VERSION} not visible on npm after 5 minutes — skipping smoke test" + if [ "$attempt" = "8" ]; then + echo "::warning::gsd-pi@${VERSION} not visible on npm after 8 attempts — skipping smoke test" exit 0 fi - sleep 15 + echo " Attempt ${attempt}: not yet visible, retrying in ${DELAY}s..." + sleep "$DELAY" + DELAY=$((DELAY * 2)) + if [ "$DELAY" -gt 30 ]; then DELAY=30; fi done - # Now install and verify + # Install and verify with backoff (5s, 10s, 20s) echo "Installing gsd-pi@${VERSION}..." + DELAY=5 for attempt in 1 2 3; do if npm install "gsd-pi@${VERSION}" 2>&1 | tee /tmp/install-output.txt; then echo " ✓ Install succeeded" - # Run version check via node directly (npx may resolve wrong binary) - # Strip ANSI escape codes and match version on any line (--version prints a banner) RAW=$(node node_modules/gsd-pi/dist/loader.js --version 2>&1 || echo "FAILED") ACTUAL=$(echo "$RAW" | sed 's/\x1b\[[0-9;]*m//g' | grep -oE "^${VERSION}$" | head -1) if [ "$ACTUAL" = "$VERSION" ]; then @@ -247,9 +265,10 @@ jobs: exit 1 fi fi - echo "Install attempt ${attempt}/3 failed, retrying in 15s..." + echo "Install attempt ${attempt}/3 failed, retrying in ${DELAY}s..." cat /tmp/install-output.txt - sleep 15 + sleep "$DELAY" + DELAY=$((DELAY * 2)) done echo "::error::Smoke test failed — gsd-pi@${VERSION} not installable" exit 1 @@ -259,14 +278,17 @@ jobs: run: | VERSION=$(node -p "require('./package.json').version") echo "Verifying npm dist-tag 'latest' points to ${VERSION}..." - for attempt in $(seq 1 10); do + DELAY=5 + for attempt in $(seq 1 6); do LATEST=$(npm view gsd-pi dist-tags.latest 2>/dev/null || echo "") if [ "${LATEST}" = "${VERSION}" ]; then echo " ✓ npm dist-tags.latest = ${VERSION}" exit 0 fi - echo " Attempt ${attempt}/10: latest=${LATEST}, expected=${VERSION}, retrying in 15s..." - sleep 15 + echo " Attempt ${attempt}/6: latest=${LATEST}, expected=${VERSION}, retrying in ${DELAY}s..." + sleep "$DELAY" + DELAY=$((DELAY * 2)) + if [ "$DELAY" -gt 30 ]; then DELAY=30; fi done echo "::error::dist-tags.latest is '${LATEST}' but expected '${VERSION}' — run: npm dist-tag add gsd-pi@${VERSION} latest" exit 1 diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3258c7157..0b3864b6c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -75,7 +75,7 @@ jobs: steps: - uses: actions/checkout@v6 with: - fetch-depth: 0 + fetch-depth: 2 - name: Scan for hardcoded secrets run: bash scripts/secret-scan.sh --diff origin/main @@ -103,8 +103,6 @@ jobs: steps: - name: Checkout repository uses: actions/checkout@v6 - with: - fetch-depth: 0 - name: Setup Node.js uses: actions/setup-node@v6 @@ -140,8 +138,6 @@ jobs: steps: - name: Checkout repository uses: actions/checkout@v6 - with: - fetch-depth: 0 - name: Setup Node.js uses: actions/setup-node@v6 diff --git a/.github/workflows/pipeline.yml b/.github/workflows/pipeline.yml index 9ca59503e..dc5a48b20 100644 --- a/.github/workflows/pipeline.yml +++ b/.github/workflows/pipeline.yml @@ -36,6 +36,7 @@ jobs: with: node-version: 24 registry-url: https://registry.npmjs.org + cache: 'npm' - name: Install dependencies run: npm ci @@ -78,6 +79,7 @@ jobs: with: node-version: 24 registry-url: https://registry.npmjs.org + cache: 'npm' - name: Install gsd-pi@dev globally run: npm install -g gsd-pi@dev @@ -101,9 +103,10 @@ jobs: npm run test:live-regression - name: Promote to @next - run: npm dist-tag add gsd-pi@${{ needs.dev-publish.outputs.dev-version }} next env: + DEV_VERSION: ${{ needs.dev-publish.outputs.dev-version }} NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} + run: npm dist-tag add "gsd-pi@${DEV_VERSION}" next - name: Log in to GHCR uses: docker/login-action@v4 @@ -113,13 +116,15 @@ jobs: password: ${{ secrets.GITHUB_TOKEN }} - name: Build and push runtime Docker image + env: + DEV_VERSION: ${{ needs.dev-publish.outputs.dev-version }} run: | docker build --target runtime \ - -t ghcr.io/gsd-build/gsd-pi:next \ - -t ghcr.io/gsd-build/gsd-pi:${{ needs.dev-publish.outputs.dev-version }} \ + -t "ghcr.io/gsd-build/gsd-pi:next" \ + -t "ghcr.io/gsd-build/gsd-pi:${DEV_VERSION}" \ . docker push ghcr.io/gsd-build/gsd-pi:next - docker push ghcr.io/gsd-build/gsd-pi:${{ needs.dev-publish.outputs.dev-version }} + docker push "ghcr.io/gsd-build/gsd-pi:${DEV_VERSION}" prod-release: name: Production Release @@ -136,6 +141,7 @@ jobs: with: node-version: 24 registry-url: https://registry.npmjs.org + cache: 'npm' - name: Install dependencies run: npm ci @@ -158,44 +164,50 @@ jobs: echo "$OUTPUT" | jq -r '.releaseNotes' > /tmp/release-notes.md - name: Bump version and sync packages - run: node scripts/bump-version.mjs "${{ steps.release.outputs.version }}" + env: + RELEASE_VERSION: ${{ steps.release.outputs.version }} + run: node scripts/bump-version.mjs "$RELEASE_VERSION" - name: Update CHANGELOG.md run: node scripts/update-changelog.mjs /tmp/changelog-entry.md - name: Commit, tag, and push + env: + RELEASE_VERSION: ${{ steps.release.outputs.version }} run: | git config user.name "github-actions[bot]" git config user.email "github-actions[bot]@users.noreply.github.com" git add package.json package-lock.json CHANGELOG.md native/npm/*/package.json pkg/package.json packages/pi-coding-agent/package.json - git commit -m "release: v${{ steps.release.outputs.version }}" - git tag "v${{ steps.release.outputs.version }}" + git commit -m "release: v${RELEASE_VERSION}" + git tag "v${RELEASE_VERSION}" git push origin main - git push origin "v${{ steps.release.outputs.version }}" + git push origin "v${RELEASE_VERSION}" - name: Build release run: npm run build - name: Publish release to npm @latest + env: + NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} + RELEASE_VERSION: ${{ steps.release.outputs.version }} run: | OUTPUT=$(npm publish 2>&1) && echo "$OUTPUT" || { if echo "$OUTPUT" | grep -q "cannot publish over the previously published"; then echo "Version already published — promoting to latest" - npm dist-tag add gsd-pi@${{ steps.release.outputs.version }} latest + npm dist-tag add "gsd-pi@${RELEASE_VERSION}" latest else echo "$OUTPUT" exit 1 fi } - env: - NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} - name: Create GitHub Release env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + RELEASE_VERSION: ${{ steps.release.outputs.version }} run: | - gh release create "v${{ steps.release.outputs.version }}" \ - --title "v${{ steps.release.outputs.version }}" \ + gh release create "v${RELEASE_VERSION}" \ + --title "v${RELEASE_VERSION}" \ --notes-file /tmp/release-notes.md \ --latest @@ -203,12 +215,12 @@ jobs: if: ${{ env.DISCORD_WEBHOOK != '' }} env: DISCORD_WEBHOOK: ${{ secrets.DISCORD_CHANGELOG_WEBHOOK }} + RELEASE_VERSION: ${{ steps.release.outputs.version }} run: | - VERSION="${{ steps.release.outputs.version }}" NOTES=$(cat /tmp/release-notes.md) curl -s -X POST "$DISCORD_WEBHOOK" \ -H "Content-Type: application/json" \ - -d "$(jq -n --arg c "**GSD v${VERSION} Released**\n\n${NOTES}\n\n\`npm i gsd-pi@${VERSION}\`" '{content:$c}')" + -d "$(jq -n --arg c "**GSD v${RELEASE_VERSION} Released**\n\n${NOTES}\n\n\`npm i gsd-pi@${RELEASE_VERSION}\`" '{content:$c}')" - name: Log in to GHCR uses: docker/login-action@v4 @@ -218,9 +230,11 @@ jobs: password: ${{ secrets.GITHUB_TOKEN }} - name: Tag runtime Docker image as latest + env: + DEV_VERSION: ${{ needs.dev-publish.outputs.dev-version }} run: | - docker pull ghcr.io/gsd-build/gsd-pi:${{ needs.dev-publish.outputs.dev-version }} - docker tag ghcr.io/gsd-build/gsd-pi:${{ needs.dev-publish.outputs.dev-version }} ghcr.io/gsd-build/gsd-pi:latest + docker pull "ghcr.io/gsd-build/gsd-pi:${DEV_VERSION}" + docker tag "ghcr.io/gsd-build/gsd-pi:${DEV_VERSION}" ghcr.io/gsd-build/gsd-pi:latest docker push ghcr.io/gsd-build/gsd-pi:latest update-builder: @@ -229,12 +243,16 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v6 + with: + fetch-depth: 2 - name: Check for Dockerfile changes id: check + env: + HEAD_SHA: ${{ github.event.workflow_run.head_sha }} run: | - CHANGED=$(git diff --name-only ${{ github.event.workflow_run.head_sha }}~1 ${{ github.event.workflow_run.head_sha }} -- Dockerfile || echo "") - echo "changed=$([[ -n \"$CHANGED\" ]] && echo 'true' || echo 'false')" >> "$GITHUB_OUTPUT" + CHANGED=$(git diff --name-only "${HEAD_SHA}~1" "${HEAD_SHA}" -- Dockerfile || echo "") + echo "changed=$([[ -n "$CHANGED" ]] && echo 'true' || echo 'false')" >> "$GITHUB_OUTPUT" - name: Log in to GHCR if: steps.check.outputs.changed == 'true' diff --git a/docs/ci-cd-pipeline.md b/docs/ci-cd-pipeline.md index 79364568f..80410d124 100644 --- a/docs/ci-cd-pipeline.md +++ b/docs/ci-cd-pipeline.md @@ -70,6 +70,11 @@ docker run --rm -v $(pwd):/workspace ghcr.io/gsd-build/gsd-pi:latest --version **CI optimization (v2.38):** GitHub Actions minutes were reduced ~60-70% (~10k → ~3-4k/month) through workflow consolidation and caching improvements. +**Pipeline optimization (v2.41):** +- **Shallow clones** — CI lint and build jobs use `fetch-depth: 1` or `fetch-depth: 2` instead of full history, saving ~30-60s per job +- **npm cache in pipeline** — dev-publish, test-verify, and prod-release now use `cache: 'npm'` on setup-node, saving ~1-2 min per job on repeat runs +- **Exponential backoff** — npm registry propagation waits in `build-native.yml` replaced hardcoded `sleep 30` + fixed 15s retries with exponential backoff (5s → 10s → 20s → 30s cap), typically finishing in <15s when the registry is fast +- **Security hardening** — pipeline.yml moved `${{ }}` expressions from `run:` blocks to `env:` variables to prevent command injection vectors ### Docs-Only PR Detection (v2.41) CI automatically detects when a PR contains only documentation changes (`.md` files and `docs/` content). When docs-only: