oncall-engine/.github/workflows/integration_tests.yml

name: Integration Tests

on:
  pull_request:
  # You can use the merge_group event to trigger your GitHub Actions workflow when
  # a pull request is added to a merge queue
  # https://docs.github.com/en/repositories/configuring-branches-and-merges-in-your-repository/configuring-pull-request-merges/managing-a-merge-queue#triggering-merge-group-checks-with-github-actions
  merge_group:

# TODO: ideally we would be able to have one CI job which spins up the kind cluster and does the helm release
# then we could have the UI and backend integration tests dependent on this job and not have to each
# independently spin up the cluster. This doesn't seem to be supported however
# https://github.com/docker/build-push-action/issues/225
#
# Probably one way to get around this would be to deploy the helm release to a sandbox k8s cluster somewhere? and reference
# that in the various integration test jobs
jobs:
  build-engine-docker-image:
    name: Build engine Docker image
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
        uses: actions/checkout@v3

      - name: Set up Docker Buildx # We need this step for docker caching
        uses: docker/setup-buildx-action@v2

      - name: Build docker image locally # using github actions docker cache
        uses: docker/build-push-action@v2
        with:
          context: ./engine
          file: ./engine/Dockerfile
          push: false
          load: true
          tags: oncall/engine:latest
          cache-from: type=gha
          cache-to: type=gha,mode=max
          outputs: type=docker,dest=/tmp/oncall-engine.tar

      # https://github.com/docker/build-push-action/issues/225#issuecomment-727639184
      - name: Persist engine Docker image between jobs
        uses: actions/upload-artifact@v2
        with:
          name: oncall-engine
          path: /tmp/oncall-engine.tar

  backend-integration-tests:
    name: Backend Integration Tests
    needs: build-engine-docker-image
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
        uses: actions/checkout@v3

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v2

      - name: Download engine Docker image
        uses: actions/download-artifact@v2
        with:
          name: oncall-engine
          path: /tmp

      - name: Create k8s Kind Cluster
        uses: helm/kind-action@v1.3.0
        with:
          config: ./helm/kind.yml

      - name: Load image on the nodes of the cluster
        run: kind load image-archive --name=chart-testing /tmp/oncall-engine.tar

      - name: Install helm chart
        run: |
          helm install test-release \
          --values ./simple.yml \
          --values ./values-local-image.yml \
          ./oncall
        working-directory: helm

      - name: Await k8s pods and other resources up
        uses: jupyterhub/action-k8s-await-workloads@v1
        with:
          workloads: "" # all
          namespace: "" # default
          timeout: 300
          max-restarts: -1

      # GitHub Action reference: https://github.com/jupyterhub/action-k8s-namespace-report
      - name: Kubernetes namespace report
        uses: jupyterhub/action-k8s-namespace-report@v1
        if: always()

      - name: Bootstrap organization and integration
        run: |
          export POD_NAME=$(kubectl get pods --namespace default -l "app.kubernetes.io/name=oncall,app.kubernetes.io/instance=test-release,app.kubernetes.io/component=engine" -o jsonpath="{.items[0].metadata.name}")
          export ONCALL_INTEGRATION_URL=http://localhost:30001$(kubectl exec -it $POD_NAME -- bash -c "python manage.py setup_end_to_end_test --bootstrap_integration")
          echo "ONCALL_INTEGRATION_URL=$ONCALL_INTEGRATION_URL" >> $GITHUB_ENV

      - name: Send an alert to the integration
        run: |
          echo $ONCALL_INTEGRATION_URL
          export TEST_ID=test-0
          echo "TEST_ID=$TEST_ID" >> $GITHUB_ENV
          curl -X POST "$ONCALL_INTEGRATION_URL" \
            -H 'Content-Type: Application/json' \
            -d '{
              "alert_uid": "08d6891a-835c-e661-39fa-96b6a9e26552",
              "title": "'"$TEST_ID"'",
              "image_url": "https://upload.wikimedia.org/wikipedia/commons/e/ee/Grumpy_Cat_by_Gage_Skidmore.jpg",
              "state": "alerting",
              "link_to_upstream_details": "https://en.wikipedia.org/wiki/Downtime",
              "message": "Smth happened. Oh no!"
            }'

      - name: Await 1 alert group and 1 alert created during the test (timeout 30 seconds)
        run: |
          export POD_NAME=$(kubectl get pods --namespace default -l "app.kubernetes.io/name=oncall,app.kubernetes.io/instance=test-release,app.kubernetes.io/component=engine" -o jsonpath="{.items[0].metadata.name}")
          tries=30
          while [ "$tries" -gt 0 ]; do
              if kubectl exec -it $POD_NAME -c oncall -- bash -c "python manage.py setup_end_to_end_test --return_results_for_test_id $TEST_ID" | grep -q '1, 1'
              then
                  break
              fi

              tries=$(( tries - 1 ))
              sleep 1
          done

          if [ "$tries" -eq 0 ]; then
              echo 'Expected "1, 1" (alert groups, alerts). They were not created in 30 seconds during this integration test. Something is broken' >&2
              exit 1
          fi

  ui-integration-tests:
    needs: build-engine-docker-image
    runs-on: ubuntu-latest
    name: "UI Integration Tests - Grafana: ${{ matrix.grafana-image-tag }}"
    strategy:
      matrix:
        grafana-image-tag:
          - 9.2.6
          - main
          - latest
    steps:
      - name: Checkout
        uses: actions/checkout@v3

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v2

      - name: Download engine Docker image
        uses: actions/download-artifact@v2
        with:
          name: oncall-engine
          path: /tmp

      - name: Create k8s Kind Cluster
        uses: helm/kind-action@v1.3.0
        with:
          config: ./helm/kind.yml

      - name: Load image on the nodes of the cluster
        run: kind load image-archive --name=chart-testing /tmp/oncall-engine.tar

      # yarn caching doesn't properly work with subdirectories hence the following two steps
      # which calculate a cache key and restore the cache manually
      # see this GH issue for more details https://github.com/actions/setup-node/issues/488#issue-1231822552
      - uses: actions/setup-node@v3
        with:
          node-version: 14.17.0
          cache: "yarn"
          cache-dependency-path: grafana-plugin/yarn.lock

      - name: Get yarn cache directory path
        id: yarn-cache-dir-path
        run: echo "dir=$(yarn config get cacheFolder)" >> $GITHUB_OUTPUT
        shell: bash
        working-directory: ./grafana-plugin

      - name: Restore yarn cache
        uses: actions/cache@v3
        with:
          path: ${{ steps.yarn-cache-dir-path.outputs.dir }}
          key: yarn-cache-folder-${{ hashFiles('**/yarn.lock', '.yarnrc.yml') }}
          restore-keys: |
            yarn-cache-folder-

      # https://stackoverflow.com/a/62244232
      # --prefer-offline tells yarn to use cached downloads (in the cache directory mentioned above)
      # during installation whenever possible instead of downloading from the server.
      - name: Install dependencies
        working-directory: ./grafana-plugin
        run: yarn install --frozen-lockfile --prefer-offline

      # build the plugin frontend
      - name: Build plugin frontend
        working-directory: ./grafana-plugin
        run: yarn build:dev

      # by settings grafana.plugins to [] and configuring grafana.extraVolumeMounts we are using the locally built
      # OnCall plugin rather than the latest published version
      # the /oncall-plugin hostPath refers to the kind volumeMount that points to the ./grafana-plugin dir
      # see ./helm/kind.yml for more details
      - name: Install helm chart
        run: |
          helm install helm-testing \
            --values ./helm/simple.yml \
            --values ./helm/values-local-image.yml \
            --set-json 'env=[{"name":"GRAFANA_CLOUD_NOTIFICATIONS_ENABLED","value":"False"}]' \
            --set oncall.twilio.accountSid="${{ secrets.TWILIO_ACCOUNT_SID }}" \
            --set oncall.twilio.authToken="${{ secrets.TWILIO_AUTH_TOKEN }}" \
            --set oncall.twilio.phoneNumber="\"${{ secrets.TWILIO_PHONE_NUMBER }}"\" \
            --set oncall.twilio.verifySid="${{ secrets.TWILIO_VERIFY_SID }}" \
            --set grafana.image.tag=${{ matrix.grafana-image-tag }} \
            --set grafana.env.GF_SECURITY_ADMIN_USER=oncall \
            --set grafana.env.GF_SECURITY_ADMIN_PASSWORD=oncall \
            --set grafana.env.GF_PLUGINS_ALLOW_LOADING_UNSIGNED_PLUGINS=grafana-oncall-app \
            --set-json "grafana.plugins=[]" \
            --set-json 'grafana.securityContext={"runAsUser": 0, "runAsGroup": 0, "fsGroup": 0}' \
            --set-json 'grafana.extraVolumeMounts=[{"name":"plugins","mountPath":"/var/lib/grafana/plugins/grafana-plugin","hostPath":"/oncall-plugin","readOnly":true}]' \
            ./helm/oncall

      # https://github.com/microsoft/playwright/issues/7249#issuecomment-1154603556
      # Figures out the version of playwright that's installed.
      # The result is stored in steps.playwright-version.outputs.version
      - name: Get installed Playwright version
        id: playwright-version
        working-directory: ./grafana-plugin
        run: echo "::set-output name=version::$(yarn list --pattern @playwright/test | grep @playwright/test@ | sed 's/[^0-9.]*\([0-9.]*\).*/\1/')"

      # https://github.com/microsoft/playwright/issues/7249#issuecomment-1317670494
      # Attempt to restore the correct Playwright browser binaries based on the
      # currently installed version of Playwright (The browser binary versions
      # may change with Playwright versions).
      # Note: Playwright's cache directory is hard coded because that's what it
      # says to do in the docs. There doesn't appear to be a command that prints
      # it out for us.
      - name: Cache Playwright binaries
        uses: actions/cache@v3
        id: playwright-cache
        with:
          path: "~/.cache/ms-playwright"
          key: "${{ runner.os }}-playwright-${{ steps.playwright-version.outputs.version }}"

      # TODO: If the Playwright browser binaries weren't able to be restored, install them
      # https://github.com/microsoft/playwright/issues/7249#issuecomment-1256878540
      - name: Install Playwright
        # if: steps.playwright-cache.outputs.cache-hit != 'true'
        working-directory: ./grafana-plugin
        run: |
          npx playwright install
          npx playwright install-deps

      # - name: Install Playwright system dependencies
      #   run: npx playwright install-deps
      #   if: steps.playwright-cache.outputs.cache-hit == 'true'
      #   working-directory: ./grafana-plugin

      - name: Await k8s pods and other resources up
        uses: jupyterhub/action-k8s-await-workloads@v1
        with:
          workloads: "" # all
          namespace: "" # default
          timeout: 300
          max-restarts: -1

      # GitHub Action reference: https://github.com/jupyterhub/action-k8s-namespace-report
      - name: Kubernetes namespace report
        uses: jupyterhub/action-k8s-namespace-report@v1
        if: always()

      - name: Run Integration Tests
        env:
          # BASE_URL represents what is accessed via a browser
          BASE_URL: http://localhost:30002/grafana
          # ONCALL_API_URL is what is configured in the plugin configuration form
          # it is what the grafana container uses to communicate with the OnCall backend
          #
          # 172.17.0.1 is the docker bridge network default gateway. Requests originate in the grafana container
          # hit 172.17.0.1 which proxies the request onto the host where port 30001 is the node port that is mapped
          # to the OnCall API
          ONCALL_API_URL: http://172.17.0.1:30001
          GRAFANA_USERNAME: oncall
          GRAFANA_PASSWORD: oncall
          MAILSLURP_API_KEY: ${{ secrets.MAILSLURP_API_KEY }}
        working-directory: ./grafana-plugin
        run: yarn test:integration

      - uses: actions/upload-artifact@v3
        if: always()
        with:
          name: playwright-report
          path: ./grafana-plugin/playwright-report/
          retention-days: 30