# What this PR does - updates the GitHub Actions workflow to move the e2e tests into a "[reusable workflow](https://docs.github.com/en/actions/using-workflows/reusing-workflows#creating-a-reusable-workflow)" which are run in two scenarios: - all tests _except_ those annotated as `@expensive` are run against `grafana/grafana:latest` on all feature branches - all tests _including_ `@expensive` tests are run on weekdays @ 07h00 UTC, against a matrix of 6 grafana versions. Results of these builds will be posted to `#irm-amixr-flux` Slack channel. - local development will now be: ```bash make build-dev-images init-k8s start-k8s ``` - `build-dev-images` - builds the engine and UI docker images (only need to run first time) - `init-k8s` - creates a `kind` cluster and loads the two Docker images onto the cluster nodes (only need to run first time) - `start-k8s` - switches `kubectl` context to the created `kind` cluster, and uses `helm` to deploy everything as defined in `./dev/helm-local.yml` and `./dev/helm-local.dev.yml` (that latter file is `.gitignored` and specific to how _you_ want your setup to look like. Hot reloading works as before. This is the _start_ of #2381. (I've marked these `make` commands as beta, because they've not yet been thoroughly tested for local development). - modifies the `helm` chart to add the concept of `oncall.devMode`, `ui`, and ability to run oncall w/ sqlite - `oncall.devMode` will essentially just add `volumes` and `volumeMounts` to the various engine/migrate containers + - `ui.enabled` + `ui.env` - create a ui container (which is needed for hot reloading locally) - `sqlite` - this was useful for the e2e test environments where Github runner resources are scarce. Running `mariadb` eats up precious resources, instead lets just use sqlite here - fixes an issue that caused sporadic HTTP 502s from the grafana plugin-proxy, which led to flaky tests. See [this comment](https://github.com/grafana/oncall/pull/2751/files#diff-09040e8df192699b9c5742110ebbe8d9d5c3938cb156cc1cb99fa1c3fdee4fefR72-R77) for more context + a link to a relevant Slack conversation. **tldr;** there is a bug with the Grafana plugin proxy in Grafana >= v10.0.3. Let's stop using the `latest`/`main` docker tags in our test and pin to `10.0.2` for now - ~~re-enables the e2e test which validates a phone number via SMS, and asserts that we can receive an alert escalation via SMS (new Mailslurp API Key has been added as a repo secret)~~ update: this is still blocked by procurement, will be done in a future PR ## Checklist - [x] Unit, integration, and e2e (if applicable) tests updated - [x] Documentation added (or `pr:no public docs` PR label added if not required) - [x] `CHANGELOG.md` updated (or `pr:no changelog` PR label added if not required)
210 lines
8.4 KiB
YAML
210 lines
8.4 KiB
YAML
name: e2e tests
|
|
"on":
|
|
workflow_call:
|
|
inputs:
|
|
grafana-image-tag:
|
|
required: true
|
|
type: string
|
|
run-expensive-tests:
|
|
description: >
|
|
Whether or not to run Playwright tests that're annotated as "@expensive"
|
|
(ex. tests that incur costs such as sending SMSes via Twilio/Mailslurp)
|
|
required: true
|
|
type: boolean
|
|
secrets:
|
|
TWILIO_ACCOUNT_SID:
|
|
required: true
|
|
TWILIO_AUTH_TOKEN:
|
|
required: true
|
|
TWILIO_PHONE_NUMBER:
|
|
required: true
|
|
TWILIO_VERIFY_SID:
|
|
required: true
|
|
MAILSLURP_API_KEY:
|
|
required: true
|
|
|
|
jobs:
|
|
end-to-end-tests:
|
|
# default "ubuntu-latest" runners only provide 2 CPU cores + 7GB of RAM. this seems to lead to HTTP 504s from
|
|
# the oncall backend, and hence, flaky tests. Let's use CI runners w/ more resources to avoid this (plus
|
|
# this will allow us to run more backend containers and parralelize the tests)
|
|
runs-on: ubuntu-latest-8-cores
|
|
name: "Grafana: ${{ inputs.grafana-image-tag }}"
|
|
steps:
|
|
- name: Checkout
|
|
uses: actions/checkout@v3
|
|
|
|
- name: Collect Workflow Telemetry
|
|
uses: runforesight/workflow-telemetry-action@v1
|
|
with:
|
|
comment_on_pr: false
|
|
proc_trace_chart_show: false
|
|
proc_trace_table_show: false
|
|
|
|
- name: Create k8s Kind Cluster
|
|
uses: helm/kind-action@v1.3.0
|
|
with:
|
|
config: ./.github/kind.yml
|
|
|
|
- uses: actions/setup-node@v3
|
|
with:
|
|
node-version: 18.16.0
|
|
cache: "yarn"
|
|
cache-dependency-path: grafana-plugin/yarn.lock
|
|
|
|
- name: Use cached frontend dependencies
|
|
id: cache-frontend-dependencies
|
|
uses: actions/cache@v3
|
|
with:
|
|
path: grafana-plugin/node_modules
|
|
key: ${{ runner.os }}-frontend-node-modules-${{ hashFiles('grafana-plugin/yarn.lock') }}
|
|
|
|
- name: Install frontend dependencies
|
|
if: steps.cache-frontend-dependencies.outputs.cache-hit != 'true'
|
|
working-directory: grafana-plugin
|
|
run: yarn install --frozen-lockfile --prefer-offline --network-timeout 500000
|
|
|
|
- name: Use cached plugin frontend build
|
|
id: cache-plugin-frontend
|
|
uses: actions/cache@v3
|
|
with:
|
|
path: grafana-plugin/dist
|
|
key: ${{ runner.os }}-plugin-frontend-${{ hashFiles('grafana-plugin/src/**/*', 'grafana-plugin/yarn.lock') }}
|
|
|
|
- name: Build plugin frontend
|
|
if: steps.cache-plugin-frontend.outputs.cache-hit != 'true'
|
|
working-directory: grafana-plugin
|
|
run: yarn build:dev
|
|
|
|
- name: Set up Docker Buildx # We need this step for docker caching
|
|
uses: docker/setup-buildx-action@v2
|
|
|
|
- name: Build engine Docker image locally # using github actions docker cache
|
|
uses: docker/build-push-action@v4
|
|
with:
|
|
context: ./engine
|
|
file: ./engine/Dockerfile
|
|
push: false
|
|
load: true
|
|
tags: oncall/engine:latest
|
|
cache-from: type=gha
|
|
cache-to: type=gha,mode=max
|
|
outputs: type=docker,dest=/tmp/oncall-engine.tar
|
|
|
|
- name: Load engine Docker image on the nodes of the cluster
|
|
run: kind load image-archive --name=chart-testing /tmp/oncall-engine.tar
|
|
|
|
- name: Install helm chart
|
|
run: |
|
|
helm install oncall-ci \
|
|
--values ./.github/helm-values.yml \
|
|
--set oncall.twilio.accountSid="${{ secrets.TWILIO_ACCOUNT_SID }}" \
|
|
--set oncall.twilio.authToken="${{ secrets.TWILIO_AUTH_TOKEN }}" \
|
|
--set oncall.twilio.phoneNumber="\"${{ secrets.TWILIO_PHONE_NUMBER }}"\" \
|
|
--set oncall.twilio.verifySid="${{ secrets.TWILIO_VERIFY_SID }}" \
|
|
--set grafana.image.tag=${{ inputs.grafana-image-tag }} \
|
|
./helm/oncall
|
|
|
|
# helpful reference for properly caching the playwright binaries/dependencies
|
|
# https://playwrightsolutions.com/playwright-github-action-to-cache-the-browser-binaries/
|
|
- name: Get installed Playwright version
|
|
id: playwright-version
|
|
working-directory: grafana-plugin
|
|
run: >
|
|
echo "PLAYWRIGHT_VERSION=$(cat ./package.json |
|
|
jq -r '.devDependencies["@playwright/test"]')" >> $GITHUB_ENV
|
|
|
|
- name: Cache Playwright binaries/dependencies
|
|
id: playwright-cache
|
|
uses: actions/cache@v3
|
|
with:
|
|
path: "~/.cache/ms-playwright"
|
|
key: ${{ runner.os }}-playwright-${{ env.PLAYWRIGHT_VERSION }}-chromium-firefox-webkit
|
|
|
|
# For the next two steps, use the binary directly from node_modules/.bin as opposed to npx playwright
|
|
# due to this bug (https://github.com/microsoft/playwright/issues/13188)
|
|
- name: Install Playwright Browsers
|
|
if: steps.playwright-cache.outputs.cache-hit != 'true'
|
|
working-directory: grafana-plugin
|
|
run: ./node_modules/.bin/playwright install --with-deps chromium firefox webkit
|
|
|
|
# use the cached browsers, but we still need to install the necessary system dependencies
|
|
# (system deps are installed in the cache-miss step above by the --with-deps flag)
|
|
- name: Install Playwright System Dependencies
|
|
if: steps.playwright-cache.outputs.cache-hit == 'true'
|
|
working-directory: grafana-plugin
|
|
run: ./node_modules/.bin/playwright install-deps chromium firefox webkit
|
|
|
|
# we could instead use the --wait flag for the helm install command above
|
|
# but there's no reason to block on that step
|
|
# instead we can let the k8s resources start up behind the scenes and do other
|
|
# setup tasks (ex. install playwright + its dependencies)
|
|
- name: Wait until k8s resources are ready
|
|
run: |
|
|
kubectl rollout status deployment/oncall-ci-grafana --timeout=300s
|
|
kubectl rollout status deployment/oncall-ci-engine --timeout=300s
|
|
kubectl rollout status deployment/oncall-ci-celery --timeout=300s
|
|
|
|
- name: Run e2e Tests
|
|
env:
|
|
# BASE_URL represents what is accessed via a browser
|
|
BASE_URL: http://localhost:30002/grafana
|
|
# ONCALL_API_URL is what is configured in the plugin configuration form
|
|
# it is what the grafana container uses to communicate with the OnCall backend
|
|
#
|
|
# 172.17.0.1 is the docker bridge network default gateway. Requests originate in the grafana container
|
|
# hit 172.17.0.1 which proxies the request onto the host where port 30001 is the node port that is mapped
|
|
# to the OnCall API
|
|
ONCALL_API_URL: http://172.17.0.1:30001
|
|
GRAFANA_ADMIN_USERNAME: oncall
|
|
GRAFANA_ADMIN_PASSWORD: oncall
|
|
GRAFANA_EDITOR_USERNAME: editor
|
|
GRAFANA_EDITOR_PASSWORD: editor
|
|
GRAFANA_VIEWER_USERNAME: viewer
|
|
GRAFANA_VIEWER_PASSWORD: viewer
|
|
MAILSLURP_API_KEY: ${{ secrets.MAILSLURP_API_KEY }}
|
|
working-directory: ./grafana-plugin
|
|
run: yarn test:e2e
|
|
|
|
- name: Run expensive e2e Tests
|
|
if: inputs.run-expensive-tests
|
|
env:
|
|
BASE_URL: http://localhost:30002/grafana
|
|
ONCALL_API_URL: http://172.17.0.1:30001
|
|
GRAFANA_ADMIN_USERNAME: oncall
|
|
GRAFANA_ADMIN_PASSWORD: oncall
|
|
GRAFANA_EDITOR_USERNAME: editor
|
|
GRAFANA_EDITOR_PASSWORD: editor
|
|
GRAFANA_VIEWER_USERNAME: viewer
|
|
GRAFANA_VIEWER_PASSWORD: viewer
|
|
MAILSLURP_API_KEY: ${{ secrets.MAILSLURP_API_KEY }}
|
|
working-directory: ./grafana-plugin
|
|
run: yarn test:e2e-expensive
|
|
|
|
# spit out the engine, celery, and grafana logs, if the the e2e tests have failed (or were flaky)
|
|
# can be helpful for debugging these tests
|
|
# GitHub Action reference: https://github.com/jupyterhub/action-k8s-namespace-report
|
|
- name: oncall-engine logs
|
|
if: always()
|
|
uses: jupyterhub/action-k8s-namespace-report@v1
|
|
with:
|
|
important-workloads: deploy/oncall-ci-engine
|
|
|
|
- name: oncall-celery logs
|
|
if: always()
|
|
uses: jupyterhub/action-k8s-namespace-report@v1
|
|
with:
|
|
important-workloads: deploy/oncall-ci-celery
|
|
|
|
- name: grafana logs
|
|
if: always()
|
|
uses: jupyterhub/action-k8s-namespace-report@v1
|
|
with:
|
|
important-workloads: deploy/oncall-ci-grafana
|
|
|
|
- uses: actions/upload-artifact@v3
|
|
if: failure()
|
|
with:
|
|
name: playwright-report
|
|
path: ./grafana-plugin/playwright-report/
|
|
retention-days: 30
|