oncall-engine/.github/workflows/linting-and-tests.yml
Joey Orlando b26706e7e4
configure yamllint pre-commit step (#2728)
# What this PR does

Add [`yamllint`](https://github.com/adrienverge/yamllint) to
`pre-commit` configuration + fix pre-existing errors

## Checklist

- [x] Unit, integration, and e2e (if applicable) tests updated
- [x] Documentation added (or `pr:no public docs` PR label added if not
required)
- [x] `CHANGELOG.md` updated (or `pr:no changelog` PR label added if not
required)
2023-08-03 02:35:08 -04:00

508 lines
19 KiB
YAML

name: Linting and Unit/e2e Tests
"on":
push:
branches:
- main
- dev
pull_request:
# You can use the merge_group event to trigger your GitHub Actions workflow when
# a pull request is added to a merge queue
# https://docs.github.com/en/repositories/configuring-branches-and-merges-in-your-repository/configuring-pull-request-merges/managing-a-merge-queue#triggering-merge-group-checks-with-github-actions
merge_group:
concurrency:
# Cancel any running workflow for the same branch when new commits are pushed.
# We group both by ref_name (available when CI is triggered by a push to a branch/tag)
# and head_ref (available when CI is triggered by a PR).
group: "${{ github.ref_name }}-${{ github.head_ref }}"
cancel-in-progress: true
jobs:
lint-entire-project:
name: "Lint entire project"
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
python-version: "3.11.4"
cache: "pip"
cache-dependency-path: |
engine/requirements.txt
engine/requirements-dev.txt
# following 2 steps - need to install the frontend dependencies for the eslint/prettier/stylelint steps
- uses: actions/setup-node@v3
with:
node-version: 18.16.0
cache: "yarn"
cache-dependency-path: grafana-plugin/yarn.lock
- name: Use cached frontend dependencies
id: cache-frontend-dependencies
uses: actions/cache@v3
with:
path: grafana-plugin/node_modules
key: ${{ runner.os }}-frontend-node-modules-${{ hashFiles('grafana-plugin/yarn.lock') }}
- name: Install frontend dependencies
if: steps.cache-frontend-dependencies.outputs.cache-hit != 'true'
working-directory: grafana-plugin
run: yarn install --frozen-lockfile --prefer-offline --network-timeout 500000
- uses: pre-commit/action@v3.0.0
lint-test-and-build-frontend:
name: "Lint, test, and build frontend"
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/setup-node@v3
with:
node-version: 18.16.0
cache: "yarn"
cache-dependency-path: grafana-plugin/yarn.lock
- name: Use cached frontend dependencies
id: cache-frontend-dependencies
uses: actions/cache@v3
with:
path: grafana-plugin/node_modules
key: ${{ runner.os }}-frontend-node-modules-${{ hashFiles('grafana-plugin/yarn.lock') }}
- name: Install frontend dependencies
if: steps.cache-frontend-dependencies.outputs.cache-hit != 'true'
working-directory: grafana-plugin
run: yarn install --frozen-lockfile --prefer-offline --network-timeout 500000
- name: Build frontend (will run linter and tests)
working-directory: grafana-plugin
run: yarn build
test-technical-documentation:
name: "Test technical documentation"
runs-on: ubuntu-latest
steps:
- name: "Check out code"
uses: "actions/checkout@v3"
- name: "Build website"
# -e HUGO_REFLINKSERRORLEVEL=ERROR prevents merging broken refs with the downside
# that no refs to external content can be used as these refs will not resolve in the
# docs-base image.
run: >
docker run -v ${PWD}/docs/sources:/hugo/content/docs/oncall/latest
-e HUGO_REFLINKSERRORLEVEL=ERROR
--rm grafana/docs-base:latest /bin/bash
-c 'echo -e "---\\nredirectURL: /hugo/content/docs/oncall/latest/\\ntype: redirect\\nversioned: true\\n---\\n"
> /hugo/content/docs/oncall/_index.md; make hugo'
lint-migrations-backend-mysql-rabbitmq:
name: "Lint database migrations"
runs-on: ubuntu-latest
env:
DATABASE_HOST: localhost
RABBITMQ_URI: amqp://rabbitmq:rabbitmq@localhost:5672
DJANGO_SETTINGS_MODULE: settings.ci-test
SLACK_CLIENT_OAUTH_ID: 1
services:
rabbit_test:
image: rabbitmq:3.12.0
env:
RABBITMQ_DEFAULT_USER: rabbitmq
RABBITMQ_DEFAULT_PASS: rabbitmq
ports:
- 5672:5672
mysql_test:
image: mysql:8.0.32
env:
MYSQL_DATABASE: oncall_local_dev
MYSQL_ROOT_PASSWORD: local_dev_pwd
ports:
- 3306:3306
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
python-version: "3.11.4"
cache: "pip"
cache-dependency-path: |
engine/requirements.txt
engine/requirements-dev.txt
- name: Lint migrations
working-directory: engine
run: |
pip install -r requirements.txt -r requirements-dev.txt
python manage.py lintmigrations
unit-test-helm-chart:
name: "Helm Chart Unit Tests"
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: azure/setup-helm@v3
with:
version: v3.8.0
- name: Install helm unittest plugin
run: helm plugin install https://github.com/helm-unittest/helm-unittest.git --version=v0.3.3
- name: Run tests
run: helm unittest ./helm/oncall
unit-test-backend-mysql-rabbitmq:
name: "Backend Tests: MySQL + RabbitMQ (RBAC enabled: ${{ matrix.rbac_enabled }})"
runs-on: ubuntu-latest
strategy:
matrix:
rbac_enabled: ["True", "False"]
env:
DJANGO_SETTINGS_MODULE: settings.ci-test
DATABASE_HOST: localhost
RABBITMQ_URI: amqp://rabbitmq:rabbitmq@localhost:5672
SLACK_CLIENT_OAUTH_ID: 1
ONCALL_TESTING_RBAC_ENABLED: ${{ matrix.rbac_enabled }}
services:
rabbit_test:
image: rabbitmq:3.12.0
env:
RABBITMQ_DEFAULT_USER: rabbitmq
RABBITMQ_DEFAULT_PASS: rabbitmq
ports:
- 5672:5672
mysql_test:
image: mysql:8.0.32
env:
MYSQL_DATABASE: oncall_local_dev
MYSQL_ROOT_PASSWORD: local_dev_pwd
ports:
- 3306:3306
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
python-version: "3.11.4"
cache: "pip"
cache-dependency-path: |
engine/requirements.txt
engine/requirements-dev.txt
- name: Unit Test Backend
working-directory: engine
run: |
apt-get update && apt-get install -y netcat-traditional
pip install -r requirements.txt -r requirements-dev.txt
./wait_for_test_mysql_start.sh && pytest -x
unit-test-backend-postgresql-rabbitmq:
name: "Backend Tests: PostgreSQL + RabbitMQ (RBAC enabled: ${{ matrix.rbac_enabled }})"
runs-on: ubuntu-latest
strategy:
matrix:
rbac_enabled: ["True", "False"]
env:
DATABASE_TYPE: postgresql
DATABASE_HOST: localhost
RABBITMQ_URI: amqp://rabbitmq:rabbitmq@localhost:5672
DJANGO_SETTINGS_MODULE: settings.ci-test
SLACK_CLIENT_OAUTH_ID: 1
ONCALL_TESTING_RBAC_ENABLED: ${{ matrix.rbac_enabled }}
services:
rabbit_test:
image: rabbitmq:3.12.0
env:
RABBITMQ_DEFAULT_USER: rabbitmq
RABBITMQ_DEFAULT_PASS: rabbitmq
ports:
- 5672:5672
postgresql_test:
image: postgres:14.4
env:
POSTGRES_DB: oncall_local_dev
POSTGRES_PASSWORD: local_dev_pwd
ports:
- 5432:5432
# Set health checks to wait until postgres has started
options: >-
--health-cmd pg_isready
--health-interval 10s
--health-timeout 5s
--health-retries 5
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
python-version: "3.11.4"
cache: "pip"
cache-dependency-path: |
engine/requirements.txt
engine/requirements-dev.txt
- name: Unit Test Backend
working-directory: engine
run: |
pip install -r requirements.txt -r requirements-dev.txt
pytest -x
unit-test-backend-sqlite-redis:
name: "Backend Tests: SQLite + Redis (RBAC enabled: ${{ matrix.rbac_enabled }})"
runs-on: ubuntu-latest
strategy:
matrix:
rbac_enabled: ["True", "False"]
env:
DATABASE_TYPE: sqlite3
BROKER_TYPE: redis
REDIS_URI: redis://localhost:6379
DJANGO_SETTINGS_MODULE: settings.ci-test
SLACK_CLIENT_OAUTH_ID: 1
ONCALL_TESTING_RBAC_ENABLED: ${{ matrix.rbac_enabled }}
services:
redis_test:
image: redis:7.0.5
ports:
- 6379:6379
options: >-
--health-cmd "redis-cli ping"
--health-interval 10s
--health-timeout 5s
--health-retries 5
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
python-version: "3.11.4"
cache: "pip"
cache-dependency-path: |
engine/requirements.txt
engine/requirements-dev.txt
- name: Unit Test Backend
working-directory: engine
run: |
apt-get update && apt-get install -y netcat-traditional
pip install -r requirements.txt -r requirements-dev.txt
pytest -x
unit-test-pd-migrator:
name: "Unit tests - PagerDuty Migrator"
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
python-version: "3.11.4"
cache: "pip"
cache-dependency-path: tools/pagerduty-migrator/requirements.txt
- name: Unit Test PD Migrator
working-directory: tools/pagerduty-migrator
run: |
pip install -r requirements.txt
pytest -x
mypy:
name: "mypy"
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
python-version: "3.11.4"
cache: "pip"
cache-dependency-path: |
engine/requirements.txt
engine/requirements-dev.txt
- name: mypy Static Type Checking
working-directory: engine
run: |
pip install -r requirements.txt -r requirements-dev.txt
mypy .
end-to-end-tests:
# default "ubuntu-latest" runners only provide 2 CPU cores + 7GB of RAM. this seems to lead to HTTP 504s from
# the oncall backend, and hence, flaky tests. Let's use CI runners w/ more resources to avoid this (plus
# this will allow us to run more backend containers and parralelize the tests)
runs-on: ubuntu-latest-8-cores
name: "End to end tests - Grafana: ${{ matrix.grafana-image-tag }}"
strategy:
matrix:
grafana-image-tag:
# OnCall doesn't work on the following versions of Grafana
# - 8.5.22
# - 9.0.0
# - 9.1.0
# 9.2.0 is the earliest version where things work
- 9.2.13
- 9.3.14
- 9.4.10
- 9.5.2
- main
- latest
fail-fast: false
steps:
- name: Checkout
uses: actions/checkout@v3
- name: Collect Workflow Telemetry
uses: runforesight/workflow-telemetry-action@v1
with:
comment_on_pr: false
proc_trace_chart_show: false
proc_trace_table_show: false
- name: Create k8s Kind Cluster
uses: helm/kind-action@v1.3.0
with:
config: ./helm/kind.yml
- uses: actions/setup-node@v3
with:
node-version: 18.16.0
cache: "yarn"
cache-dependency-path: grafana-plugin/yarn.lock
- name: Use cached frontend dependencies
id: cache-frontend-dependencies
uses: actions/cache@v3
with:
path: grafana-plugin/node_modules
key: ${{ runner.os }}-frontend-node-modules-${{ hashFiles('grafana-plugin/yarn.lock') }}
- name: Install frontend dependencies
if: steps.cache-frontend-dependencies.outputs.cache-hit != 'true'
working-directory: grafana-plugin
run: yarn install --frozen-lockfile --prefer-offline --network-timeout 500000
- name: Use cached plugin frontend build
id: cache-plugin-frontend
uses: actions/cache@v3
with:
path: grafana-plugin/dist
key: ${{ runner.os }}-plugin-frontend-${{ hashFiles('grafana-plugin/src/**/*', 'grafana-plugin/yarn.lock') }}
- name: Build plugin frontend
if: steps.cache-plugin-frontend.outputs.cache-hit != 'true'
working-directory: grafana-plugin
run: yarn build:dev
- name: Set up Docker Buildx # We need this step for docker caching
uses: docker/setup-buildx-action@v2
- name: Build engine Docker image locally # using github actions docker cache
uses: docker/build-push-action@v2
with:
context: ./engine
file: ./engine/Dockerfile
push: false
load: true
tags: oncall/engine:latest
cache-from: type=gha
cache-to: type=gha,mode=max
outputs: type=docker,dest=/tmp/oncall-engine.tar
- name: Load engine Docker image on the nodes of the cluster
run: kind load image-archive --name=chart-testing /tmp/oncall-engine.tar
# spin up 3 engine and 3 celery pods, this will allow us to parralelize the integration tests,
# and complete them much faster by using multiple test processes
# With just 1 engine/celery/grafana pod, the backend crawls to a halt when there is > 1 parallelized
# integration test process
# NOTE: it appears that using > 1 grafana container w/ SQLite as the database sometimes leads to failed
# grafana database migrations (this is documented in this GitHub issue
# https://github.com/bitnami/charts/issues/10905)
#
# by settings grafana.plugins to [] and configuring grafana.extraVolumeMounts we are using the locally built
# OnCall plugin rather than the latest published version
# the /oncall-plugin hostPath refers to the kind volumeMount that points to the ./grafana-plugin dir
# see ./helm/kind.yml for more details
- name: Install helm chart
# yamllint disable rule:line-length
run: |
helm install helm-testing \
--values ./helm/simple.yml \
--values ./helm/values-local-image.yml \
--set-json 'env=[{"name":"GRAFANA_CLOUD_NOTIFICATIONS_ENABLED","value":"False"},{"name":"BASE_URL","value":"http://172.17.0.1:30001"}]' \
--set engine.replicaCount=3 \
--set celery.replicaCount=3 \
--set celery.worker_beat_enabled="False" \
--set oncall.twilio.accountSid="${{ secrets.TWILIO_ACCOUNT_SID }}" \
--set oncall.twilio.authToken="${{ secrets.TWILIO_AUTH_TOKEN }}" \
--set oncall.twilio.phoneNumber="\"${{ secrets.TWILIO_PHONE_NUMBER }}"\" \
--set oncall.twilio.verifySid="${{ secrets.TWILIO_VERIFY_SID }}" \
--set grafana.replicas=1 \
--set grafana.image.tag=${{ matrix.grafana-image-tag }} \
--set grafana.env.GF_SECURITY_ADMIN_USER=oncall \
--set grafana.env.GF_SECURITY_ADMIN_PASSWORD=oncall \
--set grafana.env.GF_FEATURE_TOGGLES_ENABLE=topnav \
--set grafana.env.GF_PLUGINS_ALLOW_LOADING_UNSIGNED_PLUGINS=grafana-oncall-app \
--set-json "grafana.plugins=[]" \
--set-json 'grafana.extraVolumeMounts=[{"name":"plugins","mountPath":"/var/lib/grafana/plugins/grafana-plugin","hostPath":"/oncall-plugin","readOnly":true}]' \
./helm/oncall
# yamllint enable rule:line-length
# helpful reference for properly caching the playwright binaries/dependencies
# https://playwrightsolutions.com/playwright-github-action-to-cache-the-browser-binaries/
- name: Get installed Playwright version
id: playwright-version
working-directory: grafana-plugin
run: >
echo "PLAYWRIGHT_VERSION=$(cat ./package.json |
jq -r '.devDependencies["@playwright/test"]')" >> $GITHUB_ENV
- name: Cache Playwright binaries/dependencies
id: playwright-cache
uses: actions/cache@v3
with:
path: "~/.cache/ms-playwright"
key: ${{ runner.os }}-playwright-${{ env.PLAYWRIGHT_VERSION }}-chromium-firefox-webkit
# For the next two steps, use the binary directly from node_modules/.bin as opposed to npx playwright
# due to this bug (https://github.com/microsoft/playwright/issues/13188)
- name: Install Playwright Browsers
if: steps.playwright-cache.outputs.cache-hit != 'true'
working-directory: grafana-plugin
run: ./node_modules/.bin/playwright install --with-deps chromium firefox webkit
# use the cached browsers, but we still need to install the necessary system dependencies
# (system deps are installed in the cache-miss step above by the --with-deps flag)
- name: Install Playwright System Dependencies
if: steps.playwright-cache.outputs.cache-hit == 'true'
working-directory: grafana-plugin
run: ./node_modules/.bin/playwright install-deps chromium firefox webkit
# we could instead use the --wait flag for the helm install command above
# but there's no reason to block on that step
# instead we can let the k8s resources start up behind the scenes and do other
# setup tasks (ex. install playwright + its dependencies)
- name: Wait until k8s resources are ready
run: |
kubectl rollout status deployment/helm-testing-grafana --timeout=300s
kubectl rollout status deployment/helm-testing-oncall-engine --timeout=300s
kubectl rollout status deployment/helm-testing-oncall-celery --timeout=300s
- name: Run Integration Tests
env:
# BASE_URL represents what is accessed via a browser
BASE_URL: http://localhost:30002/grafana
# ONCALL_API_URL is what is configured in the plugin configuration form
# it is what the grafana container uses to communicate with the OnCall backend
#
# 172.17.0.1 is the docker bridge network default gateway. Requests originate in the grafana container
# hit 172.17.0.1 which proxies the request onto the host where port 30001 is the node port that is mapped
# to the OnCall API
ONCALL_API_URL: http://172.17.0.1:30001
GRAFANA_ADMIN_USERNAME: oncall
GRAFANA_ADMIN_PASSWORD: oncall
GRAFANA_EDITOR_USERNAME: editor
GRAFANA_EDITOR_PASSWORD: editor
GRAFANA_VIEWER_USERNAME: viewer
GRAFANA_VIEWER_PASSWORD: viewer
MAILSLURP_API_KEY: ${{ secrets.MAILSLURP_API_KEY }}
working-directory: ./grafana-plugin
run: yarn test:e2e
# spit out the engine, celery, and grafana logs, if the the e2e tests have failed
# can be helpful for debugging failing tests
# GitHub Action reference: https://github.com/jupyterhub/action-k8s-namespace-report
- name: Kubernetes namespace report
uses: jupyterhub/action-k8s-namespace-report@v1
if: failure()
with:
important-workloads: >
deploy/helm-testing-oncall-engine deploy/helm-testing-oncall-celery deploy/helm-testing-grafana
- uses: actions/upload-artifact@v3
if: failure()
with:
name: playwright-report-grafana-${{ matrix.grafana-image-tag }}
path: ./grafana-plugin/playwright-report/
retention-days: 30