# What this PR does * Upgrade to the recent Grafana * Upgrade to the recent bitnami mariadb, rabbitmq charts which support arm64 now * Remove deprecated psp policies from grafana chart * Make startupProbe period smaller to increase installation speed ## Which issue(s) this PR fixes ## Checklist - [ ] Unit, integration, and e2e (if applicable) tests updated - [ ] Documentation added (or `pr:no public docs` PR label added if not required) - [ ] `CHANGELOG.md` updated (or `pr:no changelog` PR label added if not required)
463 lines
18 KiB
YAML
463 lines
18 KiB
YAML
name: Linting and Unit/e2e Tests
|
|
|
|
on:
|
|
push:
|
|
branches:
|
|
- main
|
|
- dev
|
|
pull_request:
|
|
# You can use the merge_group event to trigger your GitHub Actions workflow when
|
|
# a pull request is added to a merge queue
|
|
# https://docs.github.com/en/repositories/configuring-branches-and-merges-in-your-repository/configuring-pull-request-merges/managing-a-merge-queue#triggering-merge-group-checks-with-github-actions
|
|
merge_group:
|
|
|
|
concurrency:
|
|
# Cancel any running workflow for the same branch when new commits are pushed.
|
|
# We group both by ref_name (available when CI is triggered by a push to a branch/tag)
|
|
# and head_ref (available when CI is triggered by a PR).
|
|
group: "${{ github.ref_name }}-${{ github.head_ref }}"
|
|
cancel-in-progress: true
|
|
|
|
jobs:
|
|
lint-entire-project:
|
|
name: "Lint entire project"
|
|
runs-on: ubuntu-latest
|
|
steps:
|
|
- uses: actions/checkout@v3
|
|
- uses: actions/setup-python@v4
|
|
with:
|
|
python-version: "3.11.3"
|
|
cache: "pip"
|
|
cache-dependency-path: engine/requirements.txt
|
|
# following 2 steps - need to install the frontend dependencies for the eslint/prettier/stylelint steps
|
|
- uses: actions/setup-node@v3
|
|
with:
|
|
node-version: 18.16.0
|
|
cache: "yarn"
|
|
cache-dependency-path: grafana-plugin/yarn.lock
|
|
- name: Use cached frontend dependencies
|
|
id: cache-frontend-dependencies
|
|
uses: actions/cache@v3
|
|
with:
|
|
path: grafana-plugin/node_modules
|
|
key: ${{ runner.os }}-frontend-node-modules-${{ hashFiles('grafana-plugin/yarn.lock') }}
|
|
- name: Install frontend dependencies
|
|
if: steps.cache-frontend-dependencies.outputs.cache-hit != 'true'
|
|
working-directory: grafana-plugin
|
|
run: yarn install --frozen-lockfile --prefer-offline --network-timeout 500000
|
|
- uses: pre-commit/action@v3.0.0
|
|
|
|
lint-test-and-build-frontend:
|
|
name: "Lint, test, and build frontend"
|
|
runs-on: ubuntu-latest
|
|
steps:
|
|
- uses: actions/checkout@v3
|
|
- uses: actions/setup-node@v3
|
|
with:
|
|
node-version: 18.16.0
|
|
cache: "yarn"
|
|
cache-dependency-path: grafana-plugin/yarn.lock
|
|
- name: Use cached frontend dependencies
|
|
id: cache-frontend-dependencies
|
|
uses: actions/cache@v3
|
|
with:
|
|
path: grafana-plugin/node_modules
|
|
key: ${{ runner.os }}-frontend-node-modules-${{ hashFiles('grafana-plugin/yarn.lock') }}
|
|
- name: Install frontend dependencies
|
|
if: steps.cache-frontend-dependencies.outputs.cache-hit != 'true'
|
|
working-directory: grafana-plugin
|
|
run: yarn install --frozen-lockfile --prefer-offline --network-timeout 500000
|
|
- name: Build frontend (will run linter and tests)
|
|
working-directory: grafana-plugin
|
|
run: yarn build
|
|
|
|
test-technical-documentation:
|
|
name: "Test technical documentation"
|
|
runs-on: ubuntu-latest
|
|
steps:
|
|
- name: "Check out code"
|
|
uses: "actions/checkout@v3"
|
|
- name: "Build website"
|
|
# -e HUGO_REFLINKSERRORLEVEL=ERROR prevents merging broken refs with the downside
|
|
# that no refs to external content can be used as these refs will not resolve in the
|
|
# docs-base image.
|
|
run: |
|
|
docker run -v ${PWD}/docs/sources:/hugo/content/docs/oncall/latest -e HUGO_REFLINKSERRORLEVEL=ERROR --rm grafana/docs-base:latest /bin/bash -c 'make hugo'
|
|
|
|
lint-migrations-backend-mysql-rabbitmq:
|
|
name: "Lint database migrations"
|
|
runs-on: ubuntu-latest
|
|
env:
|
|
DATABASE_HOST: localhost
|
|
RABBITMQ_URI: amqp://rabbitmq:rabbitmq@localhost:5672
|
|
DJANGO_SETTINGS_MODULE: settings.ci-test
|
|
SLACK_CLIENT_OAUTH_ID: 1
|
|
services:
|
|
rabbit_test:
|
|
image: rabbitmq:3.7.19
|
|
env:
|
|
RABBITMQ_DEFAULT_USER: rabbitmq
|
|
RABBITMQ_DEFAULT_PASS: rabbitmq
|
|
ports:
|
|
- 5672:5672
|
|
mysql_test:
|
|
image: mysql:8.0.32
|
|
env:
|
|
MYSQL_DATABASE: oncall_local_dev
|
|
MYSQL_ROOT_PASSWORD: local_dev_pwd
|
|
ports:
|
|
- 3306:3306
|
|
steps:
|
|
- uses: actions/checkout@v3
|
|
- uses: actions/setup-python@v4
|
|
with:
|
|
python-version: "3.11.3"
|
|
cache: "pip"
|
|
cache-dependency-path: engine/requirements.txt
|
|
- name: Lint migrations
|
|
working-directory: engine
|
|
run: |
|
|
pip install -r requirements.txt
|
|
python manage.py lintmigrations
|
|
|
|
unit-test-helm-chart:
|
|
name: "Helm Chart Unit Tests"
|
|
runs-on: ubuntu-latest
|
|
steps:
|
|
- uses: actions/checkout@v3
|
|
- uses: d3adb5/helm-unittest-action@v2
|
|
with:
|
|
helm-version: v3.8.0
|
|
charts: ./helm/oncall
|
|
|
|
unit-test-backend-mysql-rabbitmq:
|
|
name: "Backend Tests: MySQL + RabbitMQ (RBAC enabled: ${{ matrix.rbac_enabled }})"
|
|
runs-on: ubuntu-latest
|
|
strategy:
|
|
matrix:
|
|
rbac_enabled: ["True", "False"]
|
|
env:
|
|
DJANGO_SETTINGS_MODULE: settings.ci-test
|
|
DATABASE_HOST: localhost
|
|
RABBITMQ_URI: amqp://rabbitmq:rabbitmq@localhost:5672
|
|
SLACK_CLIENT_OAUTH_ID: 1
|
|
ONCALL_TESTING_RBAC_ENABLED: ${{ matrix.rbac_enabled }}
|
|
services:
|
|
rabbit_test:
|
|
image: rabbitmq:3.7.19
|
|
env:
|
|
RABBITMQ_DEFAULT_USER: rabbitmq
|
|
RABBITMQ_DEFAULT_PASS: rabbitmq
|
|
ports:
|
|
- 5672:5672
|
|
mysql_test:
|
|
image: mysql:8.0.32
|
|
env:
|
|
MYSQL_DATABASE: oncall_local_dev
|
|
MYSQL_ROOT_PASSWORD: local_dev_pwd
|
|
ports:
|
|
- 3306:3306
|
|
steps:
|
|
- uses: actions/checkout@v3
|
|
- uses: actions/setup-python@v4
|
|
with:
|
|
python-version: "3.11.3"
|
|
cache: "pip"
|
|
cache-dependency-path: engine/requirements.txt
|
|
- name: Unit Test Backend
|
|
working-directory: engine
|
|
run: |
|
|
apt-get update && apt-get install -y netcat
|
|
pip install -r requirements.txt
|
|
./wait_for_test_mysql_start.sh && pytest -x
|
|
|
|
unit-test-backend-postgresql-rabbitmq:
|
|
name: "Backend Tests: PostgreSQL + RabbitMQ (RBAC enabled: ${{ matrix.rbac_enabled }})"
|
|
runs-on: ubuntu-latest
|
|
strategy:
|
|
matrix:
|
|
rbac_enabled: ["True", "False"]
|
|
env:
|
|
DATABASE_TYPE: postgresql
|
|
DATABASE_HOST: localhost
|
|
RABBITMQ_URI: amqp://rabbitmq:rabbitmq@localhost:5672
|
|
DJANGO_SETTINGS_MODULE: settings.ci-test
|
|
SLACK_CLIENT_OAUTH_ID: 1
|
|
ONCALL_TESTING_RBAC_ENABLED: ${{ matrix.rbac_enabled }}
|
|
services:
|
|
rabbit_test:
|
|
image: rabbitmq:3.7.19
|
|
env:
|
|
RABBITMQ_DEFAULT_USER: rabbitmq
|
|
RABBITMQ_DEFAULT_PASS: rabbitmq
|
|
ports:
|
|
- 5672:5672
|
|
postgresql_test:
|
|
image: postgres:14.4
|
|
env:
|
|
POSTGRES_DB: oncall_local_dev
|
|
POSTGRES_PASSWORD: local_dev_pwd
|
|
ports:
|
|
- 5432:5432
|
|
# Set health checks to wait until postgres has started
|
|
options: >-
|
|
--health-cmd pg_isready
|
|
--health-interval 10s
|
|
--health-timeout 5s
|
|
--health-retries 5
|
|
steps:
|
|
- uses: actions/checkout@v3
|
|
- uses: actions/setup-python@v4
|
|
with:
|
|
python-version: "3.11.3"
|
|
cache: "pip"
|
|
cache-dependency-path: engine/requirements.txt
|
|
- name: Unit Test Backend
|
|
working-directory: engine
|
|
run: |
|
|
pip install -r requirements.txt
|
|
pytest -x
|
|
|
|
unit-test-backend-sqlite-redis:
|
|
name: "Backend Tests: SQLite + Redis (RBAC enabled: ${{ matrix.rbac_enabled }})"
|
|
runs-on: ubuntu-latest
|
|
strategy:
|
|
matrix:
|
|
rbac_enabled: ["True", "False"]
|
|
env:
|
|
DATABASE_TYPE: sqlite3
|
|
BROKER_TYPE: redis
|
|
REDIS_URI: redis://localhost:6379
|
|
DJANGO_SETTINGS_MODULE: settings.ci-test
|
|
SLACK_CLIENT_OAUTH_ID: 1
|
|
ONCALL_TESTING_RBAC_ENABLED: ${{ matrix.rbac_enabled }}
|
|
services:
|
|
redis_test:
|
|
image: redis:7.0.5
|
|
ports:
|
|
- 6379:6379
|
|
options: >-
|
|
--health-cmd "redis-cli ping"
|
|
--health-interval 10s
|
|
--health-timeout 5s
|
|
--health-retries 5
|
|
steps:
|
|
- uses: actions/checkout@v3
|
|
- uses: actions/setup-python@v4
|
|
with:
|
|
python-version: "3.11.3"
|
|
cache: "pip"
|
|
cache-dependency-path: engine/requirements.txt
|
|
- name: Unit Test Backend
|
|
working-directory: engine
|
|
run: |
|
|
apt-get update && apt-get install -y netcat
|
|
pip install -r requirements.txt
|
|
pytest -x
|
|
|
|
unit-test-pd-migrator:
|
|
name: "Unit tests - PagerDuty Migrator"
|
|
runs-on: ubuntu-latest
|
|
steps:
|
|
- uses: actions/checkout@v3
|
|
- uses: actions/setup-python@v4
|
|
with:
|
|
python-version: "3.11.3"
|
|
cache: "pip"
|
|
cache-dependency-path: tools/pagerduty-migrator/requirements.txt
|
|
- name: Unit Test PD Migrator
|
|
working-directory: tools/pagerduty-migrator
|
|
run: |
|
|
pip install -r requirements.txt
|
|
pytest -x
|
|
|
|
end-to-end-tests:
|
|
# default "ubuntu-latest" runners only provide 2 CPU cores + 7GB of RAM. this seems to lead to HTTP 504s from
|
|
# the oncall backend, and hence, flaky tests. Let's use CI runners w/ more resources to avoid this (plus
|
|
# this will allow us to run more backend containers and parralelize the tests)
|
|
runs-on: ubuntu-latest-8-cores
|
|
name: "End to end tests - Grafana: ${{ matrix.grafana-image-tag }}"
|
|
strategy:
|
|
matrix:
|
|
grafana-image-tag:
|
|
# OnCall doesn't work on the following versions of Grafana
|
|
# - 8.5.22
|
|
# - 9.0.0
|
|
# - 9.1.0
|
|
|
|
# 9.2.0 is the earliest version where things work
|
|
- 9.2.13
|
|
- 9.3.14
|
|
- 9.4.10
|
|
- 9.5.2
|
|
- main
|
|
- latest
|
|
fail-fast: false
|
|
steps:
|
|
- name: Checkout
|
|
uses: actions/checkout@v3
|
|
|
|
- name: Collect Workflow Telemetry
|
|
uses: runforesight/workflow-telemetry-action@v1
|
|
with:
|
|
comment_on_pr: false
|
|
proc_trace_chart_show: false
|
|
proc_trace_table_show: false
|
|
|
|
- name: Create k8s Kind Cluster
|
|
uses: helm/kind-action@v1.3.0
|
|
with:
|
|
config: ./helm/kind.yml
|
|
|
|
- uses: actions/setup-node@v3
|
|
with:
|
|
node-version: 18.16.0
|
|
cache: "yarn"
|
|
cache-dependency-path: grafana-plugin/yarn.lock
|
|
|
|
- name: Use cached frontend dependencies
|
|
id: cache-frontend-dependencies
|
|
uses: actions/cache@v3
|
|
with:
|
|
path: grafana-plugin/node_modules
|
|
key: ${{ runner.os }}-frontend-node-modules-${{ hashFiles('grafana-plugin/yarn.lock') }}
|
|
|
|
- name: Install frontend dependencies
|
|
if: steps.cache-frontend-dependencies.outputs.cache-hit != 'true'
|
|
working-directory: grafana-plugin
|
|
run: yarn install --frozen-lockfile --prefer-offline --network-timeout 500000
|
|
|
|
- name: Use cached plugin frontend build
|
|
id: cache-plugin-frontend
|
|
uses: actions/cache@v3
|
|
with:
|
|
path: grafana-plugin/dist
|
|
key: ${{ runner.os }}-plugin-frontend-${{ hashFiles('grafana-plugin/src/**/*', 'grafana-plugin/yarn.lock') }}
|
|
|
|
- name: Build plugin frontend
|
|
if: steps.cache-plugin-frontend.outputs.cache-hit != 'true'
|
|
working-directory: grafana-plugin
|
|
run: yarn build:dev
|
|
|
|
- name: Set up Docker Buildx # We need this step for docker caching
|
|
uses: docker/setup-buildx-action@v2
|
|
|
|
- name: Build engine Docker image locally # using github actions docker cache
|
|
uses: docker/build-push-action@v2
|
|
with:
|
|
context: ./engine
|
|
file: ./engine/Dockerfile
|
|
push: false
|
|
load: true
|
|
tags: oncall/engine:latest
|
|
cache-from: type=gha
|
|
cache-to: type=gha,mode=max
|
|
outputs: type=docker,dest=/tmp/oncall-engine.tar
|
|
|
|
- name: Load engine Docker image on the nodes of the cluster
|
|
run: kind load image-archive --name=chart-testing /tmp/oncall-engine.tar
|
|
|
|
# spin up 3 engine and 3 celery pods, this will allow us to parralelize the integration tests,
|
|
# and complete them much faster by using multiple test processes
|
|
# With just 1 engine/celery/grafana pod, the backend crawls to a halt when there is > 1 parallelized integration test process
|
|
# NOTE: it appears that using > 1 grafana container w/ SQLite as the database sometimes leads to failed
|
|
# grafana database migrations (this is documented in this GitHub issue
|
|
# https://github.com/bitnami/charts/issues/10905)
|
|
#
|
|
# by settings grafana.plugins to [] and configuring grafana.extraVolumeMounts we are using the locally built
|
|
# OnCall plugin rather than the latest published version
|
|
# the /oncall-plugin hostPath refers to the kind volumeMount that points to the ./grafana-plugin dir
|
|
# see ./helm/kind.yml for more details
|
|
- name: Install helm chart
|
|
run: |
|
|
helm install helm-testing \
|
|
--values ./helm/simple.yml \
|
|
--values ./helm/values-local-image.yml \
|
|
--set-json 'env=[{"name":"GRAFANA_CLOUD_NOTIFICATIONS_ENABLED","value":"False"}]' \
|
|
--set engine.replicaCount=3 \
|
|
--set celery.replicaCount=3 \
|
|
--set celery.worker_beat_enabled="False" \
|
|
--set oncall.twilio.accountSid="${{ secrets.TWILIO_ACCOUNT_SID }}" \
|
|
--set oncall.twilio.authToken="${{ secrets.TWILIO_AUTH_TOKEN }}" \
|
|
--set oncall.twilio.phoneNumber="\"${{ secrets.TWILIO_PHONE_NUMBER }}"\" \
|
|
--set oncall.twilio.verifySid="${{ secrets.TWILIO_VERIFY_SID }}" \
|
|
--set grafana.replicas=1 \
|
|
--set grafana.image.tag=${{ matrix.grafana-image-tag }} \
|
|
--set grafana.env.GF_SECURITY_ADMIN_USER=oncall \
|
|
--set grafana.env.GF_SECURITY_ADMIN_PASSWORD=oncall \
|
|
--set grafana.env.GF_FEATURE_TOGGLES_ENABLE=topnav \
|
|
--set grafana.env.GF_PLUGINS_ALLOW_LOADING_UNSIGNED_PLUGINS=grafana-oncall-app \
|
|
--set-json "grafana.plugins=[]" \
|
|
--set-json 'grafana.extraVolumeMounts=[{"name":"plugins","mountPath":"/var/lib/grafana/plugins/grafana-plugin","hostPath":"/oncall-plugin","readOnly":true}]' \
|
|
./helm/oncall
|
|
|
|
# helpful reference for properly caching the playwright binaries/dependencies
|
|
# https://playwrightsolutions.com/playwright-github-action-to-cache-the-browser-binaries/
|
|
- name: Get installed Playwright version
|
|
id: playwright-version
|
|
working-directory: grafana-plugin
|
|
run: echo "PLAYWRIGHT_VERSION=$(cat ./package.json | jq -r '.devDependencies["@playwright/test"]')" >> $GITHUB_ENV
|
|
|
|
- name: Cache Playwright binaries/dependencies
|
|
id: playwright-cache
|
|
uses: actions/cache@v3
|
|
with:
|
|
path: "~/.cache/ms-playwright"
|
|
key: ${{ runner.os }}-playwright-${{ env.PLAYWRIGHT_VERSION }}-chromium-firefox-webkit
|
|
|
|
# For the next two steps, use the binary directly from node_modules/.bin as opposed to npx playwright
|
|
# due to this bug (https://github.com/microsoft/playwright/issues/13188)
|
|
- name: Install Playwright Browsers
|
|
if: steps.playwright-cache.outputs.cache-hit != 'true'
|
|
working-directory: grafana-plugin
|
|
run: ./node_modules/.bin/playwright install --with-deps chromium firefox webkit
|
|
|
|
# use the cached browsers, but we still need to install the necessary system dependencies
|
|
# (system deps are installed in the cache-miss step above by the --with-deps flag)
|
|
- name: Install Playwright System Dependencies
|
|
if: steps.playwright-cache.outputs.cache-hit == 'true'
|
|
working-directory: grafana-plugin
|
|
run: ./node_modules/.bin/playwright install-deps chromium firefox webkit
|
|
|
|
# we could instead use the --wait flag for the helm install command above
|
|
# but there's no reason to block on that step
|
|
# instead we can let the k8s resources start up behind the scenes and do other
|
|
# setup tasks (ex. install playwright + its dependencies)
|
|
- name: Wait until k8s resources are ready
|
|
run: |
|
|
kubectl rollout status deployment/helm-testing-grafana --timeout=300s
|
|
kubectl rollout status deployment/helm-testing-oncall-engine --timeout=300s
|
|
kubectl rollout status deployment/helm-testing-oncall-celery --timeout=300s
|
|
|
|
- name: Run Integration Tests
|
|
env:
|
|
# BASE_URL represents what is accessed via a browser
|
|
BASE_URL: http://localhost:30002/grafana
|
|
# ONCALL_API_URL is what is configured in the plugin configuration form
|
|
# it is what the grafana container uses to communicate with the OnCall backend
|
|
#
|
|
# 172.17.0.1 is the docker bridge network default gateway. Requests originate in the grafana container
|
|
# hit 172.17.0.1 which proxies the request onto the host where port 30001 is the node port that is mapped
|
|
# to the OnCall API
|
|
ONCALL_API_URL: http://172.17.0.1:30001
|
|
GRAFANA_USERNAME: oncall
|
|
GRAFANA_PASSWORD: oncall
|
|
MAILSLURP_API_KEY: ${{ secrets.MAILSLURP_API_KEY }}
|
|
working-directory: ./grafana-plugin
|
|
run: yarn test:integration
|
|
|
|
# spit out the engine, celery, and grafana logs, if the the e2e tests have failed
|
|
# can be helpful for debugging failing tests
|
|
# GitHub Action reference: https://github.com/jupyterhub/action-k8s-namespace-report
|
|
- name: Kubernetes namespace report
|
|
uses: jupyterhub/action-k8s-namespace-report@v1
|
|
if: failure()
|
|
with:
|
|
important-workloads: "deploy/helm-testing-oncall-engine deploy/helm-testing-oncall-celery deploy/helm-testing-grafana"
|
|
|
|
- uses: actions/upload-artifact@v3
|
|
if: failure()
|
|
with:
|
|
name: playwright-report-grafana-${{ matrix.grafana-image-tag }}
|
|
path: ./grafana-plugin/playwright-report/
|
|
retention-days: 30
|