diff --git a/docs/img/architecture_diagram.png b/docs/img/architecture_diagram.png new file mode 100644 index 00000000..cec88050 Binary files /dev/null and b/docs/img/architecture_diagram.png differ diff --git a/helm/oncall/README.md b/helm/oncall/README.md index bbec683f..d9989285 100644 --- a/helm/oncall/README.md +++ b/helm/oncall/README.md @@ -1,73 +1,118 @@ -# oncall +# Grafana OnCall Helm Chart -![Version: 0.1.0](https://img.shields.io/badge/Version-0.1.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 1.16.0](https://img.shields.io/badge/AppVersion-1.16.0-informational?style=flat-square) +This Grafana OnCall Chart is the best way to operate Grafana OnCall on Kubernetes. +It will deploy Grafana OnCall engine and celery workers, along with RabbitMQ cluster, Redis Cluster, and MySQL 5.7 database. +It will also deploy cert manager and nginx ingress controller, as Grafana OnCall backend might need to be externally available +to receive alerts from other monitoring systems. Grafana OnCall engine acts as a backend and can be connected to the Grafana frontend plugin named Grafana OnCall. +Architecture diagram can be found [here](https://raw.githubusercontent.com/grafana/oncall/dev/docs/img/architecture_diagram.png) -A Helm chart for Kubernetes +> Default helm chart configuration is not intended for production. The helm chart includes all the services into a single release, +> which is not recommended for production usage. It is recommended to run stateful services such as MySQL and RabbitMQ +> separately from this release or use managed PaaS solutions. It will significantly reduce the overhead of managing them -## Requirements +## Install +### Installing the helm chart +```bash +helm repo add grafana https://grafana.github.io/helm-charts +helm repo update +helm install \ + --wait \ + --set base_url=example.com \ + --set grafana."grafana\.ini".server.domain=example.com \ + grafana-oncall \ + grafana/oncall +``` -| Repository | Name | Version | -|------------|------|---------| -| https://charts.bitnami.com/bitnami | mariadb | 11.0.10 | -| https://charts.bitnami.com/bitnami | rabbitmq | 10.1.1 | -| https://charts.bitnami.com/bitnami | redis | 16.10.1 | -| https://charts.jetstack.io | cert-manager | v1.8.0 | -| https://grafana.github.io/helm-charts | grafana | 6.29.6 | -| https://helm.nginx.com/stable | nginx-ingress | 0.13.2 | +Follow the `helm install` output to finish setting up Grafana OnCall backend and Grafana OnCall frontend plugin -## Values +## Configuration -| Key | Type | Default | Description | -|-----|------|---------|-------------| -| base_url | string | `"ildari.me"` | | -| celery.replicaCount | int | `1` | | -| celery.resources | object | `{}` | | -| cert-manager.enabled | bool | `true` | | -| cert-manager.installCRDs | bool | `true` | | -| cert-manager.webhook.securePort | int | `10260` | | -| cert-manager.webhook.timeoutSeconds | int | `30` | | -| engine.replicaCount | int | `1` | | -| engine.resources | object | `{}` | | -| env | list | `[]` | | -| externalMysql.db_name | string | `nil` | | -| externalMysql.host | string | `nil` | | -| externalMysql.password | string | `nil` | | -| externalMysql.port | string | `nil` | | -| externalMysql.user | string | `nil` | | -| externalRabbitmq.host | string | `nil` | | -| externalRabbitmq.password | string | `nil` | | -| externalRabbitmq.port | string | `nil` | | -| externalRabbitmq.user | string | `nil` | | -| external_redis.host | string | `nil` | | -| external_redis.password | string | `nil` | | -| fullnameOverride | string | `""` | | -| grafana."grafana.ini".server.domain | string | `"example.com"` | | -| grafana."grafana.ini".server.root_url | string | `"%(protocol)s://%(domain)s/grafana"` | | -| grafana."grafana.ini".server.serve_from_sub_path | bool | `true` | | -| grafana.enabled | bool | `true` | | -| grafana.persistence.enabled | bool | `true` | | -| grafana.plugins[0] | string | `"grafana-oncall-app"` | | -| ildar.enabled | bool | `true` | | -| image.pullPolicy | string | `"IfNotPresent"` | | -| image.repository | string | `"registry.digitalocean.com/ildar-testing/hobby-oncall-2"` | | -| image.tag | string | `"latest"` | | -| imagePullSecrets[0].name | string | `"registry-ildar-testing"` | | -| ingress.enabled | bool | `true` | | -| mariadb.auth.database | string | `"oncall"` | | -| mariadb.enabled | bool | `true` | | -| nameOverride | string | `""` | | -| nginx-ingress.enabled | bool | `true` | | -| podAnnotations | object | `{}` | | -| podSecurityContext | object | `{}` | | -| rabbitmq.enabled | bool | `true` | | -| redis.enabled | bool | `true` | | -| securityContext | object | `{}` | | -| service.enabled | bool | `false` | | -| service.port | int | `8080` | | -| service.type | string | `"LoadBalancer"` | | -| serviceAccount.annotations | object | `{}` | | -| serviceAccount.create | bool | `true` | | -| serviceAccount.name | string | `""` | | +You can edit values.yml to make changes to the helm chart configuration and re-deploy the release with the following command: +```bash +helm upgrade \ + --install \ + --wait \ + --set base_url=example.com \ + --set grafana."grafana\.ini".server.domain=example.com \ + grafana-oncall \ + grafana/oncall +``` ----------------------------------------------- -Autogenerated from chart metadata using [helm-docs v1.10.0](https://github.com/norwoodj/helm-docs/releases/v1.10.0) +### Set up external access +Grafana OnCall can be connected to the external monitoring systems or grafana deployed to the other cluster. +Nginx Ingress Controller and Cert Manager charts are included in the helm chart with the default configuration. +If you set the DNS A Record pointing to the external IP address of the installation with the Hostname matching base_url parameter, https will be automatically set up. If grafana is enabled in the chart values, it will also be available on https:///grafana/. See the details in `helm install` output. + +To use a different ingress controller or tls certificate management system, set the following values to false and edit ingress settings + +``` +nginx-ingress: + enabled: false + +cert-manager: + enabled: false + +ingress: + enabled: true + annotations: + kubernetes.io/ingress.class: "nginx" + cert-manager.io/issuer: "letsencrypt-prod" +``` + +### Connect external MySQL + +It is recommended to use the managed MySQL 5.7 database provided by your cloud provider +Make sure to create the database with the following parameters before installing this chart +``` +CREATE DATABASE oncall CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci; +``` + +To use an external MySQL instance set mysql.enabled to `false` and configure the `externalMysql` parameters. +``` +mariadb: + enabled: true + +# Make sure to create the database with the following parameters: +# CREATE DATABASE oncall CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci; +externalMysql: + host: + port: + db_name: + user: + password: + ``` + +### Connect external RabbitMQ + +Option 1. Install RabbitMQ separately into the cluster using the [official documentation](https://www.rabbitmq.com/kubernetes/operator/operator-overview.html) +Option 2. Use managed solution such as [CloudAMPQ](https://www.cloudamqp.com/) + +To use an external RabbitMQ instance set rabbitmq.enabled to `false` and configure the `externalRabbitmq` parameters. +``` +rabbitmq: + enabled: false # Disable the RabbitMQ dependency from the release + +externalRabbitmq: + host: + port: + user: + password: +``` + +## Uninstall +### Uninstalling the helm chart +```bash +helm delete grafana-oncall +``` + +### Clean up PVC's +```bash +kubectl delete pvc data-grafana-oncall-mariadb-0 data-grafana-oncall-rabbitmq-0 \ +redis-data-grafana-oncall-redis-master-0 redis-data-grafana-oncall-redis-replicas-0 \ +redis-data-grafana-oncall-redis-replicas-1 redis-data-grafana-oncall-redis-replicas-2 +``` + +### Clean up secrets +```bash +kubectl delete secrets certificate-tls grafana-oncall-cert-manager-webhook-ca grafana-oncall-ingress-nginx-admission +``` diff --git a/helm/oncall/templates/NOTES.txt b/helm/oncall/templates/NOTES.txt index 4a5d5688..11260295 100644 --- a/helm/oncall/templates/NOTES.txt +++ b/helm/oncall/templates/NOTES.txt @@ -1,42 +1,54 @@ +================================================================= +📞 Grafana OnCall Notes +================================================================= + 👋 Your Grafana OnCall instance has been successfully deployed -A few steps left to finish the configuration, Copy-paste this these command to get the instructions: +TODO: Add disclaimer about stateful services, ingress controller, certificates - export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "oncall.name" . }},app.kubernetes.io/instance={{ .Release.Name }},app.kubernetes.io/component=engine" -o jsonpath="{.items[0].metadata.name}") - kubectl exec -it $POD_NAME -c wait-for-db -- bash -c "python manage.py migrate;" - echo Database was successfully migrated 👍 +{{- if not .Values.migrate.enabled }} + 🤖 To migrate the database run these commands: - echo ❗ Set up a DNS record for your domain. - echo Select A Record for Type and enter the Host you would like to point to an IP address: - echo @ - used to point a root domain (yourdomain.tld) to the IP address: - echo A Record | @ | $(k get ingress oncall-ildar-engine -o jsonpath="{.status.loadBalancer.ingress[0].ip}") - - echo Issuing the token to connect Grafana OnCall backend and Grafana OnCall plugin - wait 5 - kubectl exec -it $POD_NAME -- bash -c "python manage.py issue_invite_for_the_frontend --override" - - echo Open Grafana in your browser and Enable Grafana OnCall plugin there -{{- if .Values.grafana.enabled }} - echo Grafana was installed as a part of this helm release. - echo We will need to connect Grafana OnCall plugin and Grafana OnCall backend. - - echo Open https://{{ .Values.base_url }}/grafana/plugins/grafana-oncall-app - echo Username: {{ .Values.grafana.adminUser }} - echo Password $(kubectl get secret --namespace {{ .Release.Namespace }} {{ template "oncall.grafana.fullname" . }} -o jsonpath="{.data.admin-password}" | base64 --decode ; echo) - -{{- else }} - echo Grafana was not installed as a part of this helm release. Open your own Grafana in the browser. - echo Make sure your external Grafana is available by the network for the containers installed by this release. + export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "oncall.name" . }},app.kubernetes.io/instance={{ .Release.Name }},app.kubernetes.io/component=engine" -o jsonpath="{.items[0].metadata.name}") + kubectl exec -it $POD_NAME -c wait-for-db -- bash -c "python manage.py migrate;" {{- end }} -4. Configure Grafana OnCall plugin to work with Grafana OnCall backend - echo Open Grafana, go to "Configuration" - "Plugins" and find Grafana OnCall plugin - echo Fill the "Invite token" issued on step 2 - echo NOTE: you can re-issue the token by running this command: kubectl exec -it $POD_NAME -- bash -c "python manage.py issue_invite_for_the_frontend --override" - echo Fill the Grafana OnCall Backend URL: http://{{ include "oncall.engine.fullname" . }}:8080 - echo Fill the Grafana URL from step 3{{ if .Values.grafana.enabled }}: http://{{ include "oncall.grafana.fullname" . }}{{- end }} - echo NOTE: this URL should be accessible by Grafana OnCall Backend container + ❗ Set up a DNS record for your domain (use A Record and "@" to point a root domain to the IP address) + Get the external IP address by running the following commands and point {{ .Values.base_url }} to it: + + kubectl get ingress {{ include "oncall.engine.fullname" . }} -o jsonpath="{.status.loadBalancer.ingress[0].ip}" + + Wait until the dns record got propagated. + NOTE: Check with the following command: nslookup {{ .Values.base_url }} + Try reaching https://{{ .Values.base_url }}/ready/ from the browser, make sure it is not cached locally + +{{- if .Values.grafana.enabled }} + 🦎 Grafana was installed as a part of this helm release. Open https://{{ .Values.base_url }}/grafana/plugins/grafana-oncall-app + The User is {{ .Values.grafana.adminUser }} + Get password by running this command: + + kubectl get secret --namespace {{ .Release.Namespace }} {{ template "oncall.grafana.fullname" . }} -o jsonpath="{.data.admin-password}" | base64 --decode ; echo + +{{- else }} + 🦎 Grafana was NOT installed as a part of this helm release. Open external Grafana, go to "Configuration" - "Plugins" and find Grafana OnCall plugin + NOTE: Make sure your external Grafana is available by the network for the containers installed by this release. +{{- end }} + + 🔗 Connect Grafana OnCall Plugin to Grafana OnCall backend: + + Issue the one-time token to connect Grafana OnCall backend and Grafana OnCall plugin by running these commands: + + export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "oncall.name" . }},app.kubernetes.io/instance={{ .Release.Name }},app.kubernetes.io/component=engine" -o jsonpath="{.items[0].metadata.name}") + kubectl exec -it $POD_NAME -- bash -c "python manage.py issue_invite_for_the_frontend --override" + + Fill the Grafana OnCall Backend URL: + + http://{{ include "oncall.engine.fullname" . }}:8080 + + Fill the Grafana URL: + + {{ if .Values.grafana.enabled }}http://{{ include "oncall.grafana.fullname" . }}{{ else }}https://{{- end }} -echo 🎉🎉🎉 Done! 🎉🎉🎉 +🎉🎉🎉 Done! 🎉🎉🎉 diff --git a/helm/oncall/templates/engine/job-migrate.yaml b/helm/oncall/templates/engine/job-migrate.yaml new file mode 100644 index 00000000..5bfc3019 --- /dev/null +++ b/helm/oncall/templates/engine/job-migrate.yaml @@ -0,0 +1,54 @@ +{{- if .Values.migrate.enabled -}} +apiVersion: batch/v1 +kind: Job +metadata: + name: {{ printf "%s-migrate-%s" (include "oncall.engine.fullname" .) (now | date "2006-01-02-15-04-05") }} + labels: + {{- include "oncall.engine.labels" . | nindent 4 }} +spec: + ttlSecondsAfterFinished: 20 + template: + metadata: + name: {{ printf "%s-migrate-%s" (include "oncall.engine.fullname" .) (now | date "2006-01-02-15-04-05") }} + {{- with .Values.podAnnotations }} + annotations: + random-annotation: {{ randAlphaNum 10 | lower }} + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "oncall.engine.selectorLabels" . | nindent 8 }} + spec: + restartPolicy: Never + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + serviceAccountName: {{ include "oncall.serviceAccountName" . }} + securityContext: + {{- toYaml .Values.podSecurityContext | nindent 8 }} + containers: + - name: {{ .Chart.Name }}-migrate + securityContext: + {{- toYaml .Values.securityContext | nindent 12 }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + command: + - /bin/sh + - -c + - | + until (nc -vz $MYSQL_HOST $MYSQL_PORT); + do + echo "waiting for MySQL"; sleep 1; + done + python manage.py migrate + env: + {{- include "snippet.oncall.env" . | nindent 12 }} + {{- include "snippet.mysql.env" . | nindent 12 }} + {{- include "snippet.rabbitmq.env" . | nindent 12 }} + {{- include "snippet.redis.env" . | nindent 12 }} + {{- if .Values.env }} + {{- toYaml .Values.env | nindent 12 }} + {{- end }} + resources: + {{- toYaml .Values.engine.resources | nindent 12 }} +{{- end }} diff --git a/helm/oncall/templates/ingress-regular.yaml b/helm/oncall/templates/ingress-regular.yaml index d98519ac..31c4e367 100644 --- a/helm/oncall/templates/ingress-regular.yaml +++ b/helm/oncall/templates/ingress-regular.yaml @@ -1,18 +1,32 @@ -{{- if .Values.ingress.enabled }} +{{- if .Values.ingress.enabled -}} +{{- $fullName := include "oncall.fullname" . -}} +{{- $svcPort := .Values.service.port -}} +{{- if and .Values.ingress.className (not (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion)) }} + {{- if not (hasKey .Values.ingress.annotations "kubernetes.io/ingress.class") }} + {{- $_ := set .Values.ingress.annotations "kubernetes.io/ingress.class" .Values.ingress.className}} + {{- end }} +{{- end }} +{{- if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion -}} apiVersion: networking.k8s.io/v1 +{{- else if semverCompare ">=1.14-0" .Capabilities.KubeVersion.GitVersion -}} +apiVersion: networking.k8s.io/v1beta1 +{{- else -}} +apiVersion: extensions/v1beta1 +{{- end }} kind: Ingress metadata: - name: {{ include "oncall.engine.fullname" . }} + name: {{ $fullName }} labels: {{- include "oncall.labels" . | nindent 4 }} + {{- with .Values.ingress.annotations }} annotations: - kubernetes.io/ingress.class: "nginx" - cert-manager.io/issuer: "letsencrypt-prod" + {{- toYaml . | nindent 4 }} + {{- end }} spec: tls: - hosts: - {{ .Values.base_url | quote }} - secretName: quickstart-example-tls + secretName: certificate-tls rules: - host: {{ .Values.base_url | quote }} http: diff --git a/helm/oncall/values.yaml b/helm/oncall/values.yaml index 80d97e6e..f1cc235b 100644 --- a/helm/oncall/values.yaml +++ b/helm/oncall/values.yaml @@ -1,19 +1,27 @@ -# Default values for Grafana OnCall +# Values for configuring the deployment of Grafana OnCall -# i.e. example.com -base_url: ildari.me +# Set the domain name Grafana OnCall will be installed on. +# If you want to install grafana as a part of this release make sure to configure grafana.grafana.ini.server.domain too +base_url: example.com image: - # TODO: use our public repo + # Grafana OnCall docker image repository repository: registry.digitalocean.com/ildar-testing/hobby-oncall-2 - pullPolicy: IfNotPresent - # Overrides the image tag whose default is the chart appVersion. tag: "latest" + pullPolicy: IfNotPresent # TODO: remove this after we use public image imagePullSecrets: - name: "registry-ildar-testing" +# Whether to create additional service for external connections +# ClusterIP service is always created +service: + enabled: false + type: LoadBalancer + port: 8080 + +# Engine pods configuration engine: replicaCount: 1 resources: {} @@ -24,6 +32,7 @@ engine: # cpu: 100m # memory: 128Mi +# Celery workers pods configuration celery: replicaCount: 1 resources: {} @@ -34,54 +43,51 @@ celery: # cpu: 100m # memory: 128Mi +# Whether to run django database migrations automatically +migrate: + enabled: true + +# Additional env variables to add to deployments env: [] -service: - enabled: false - type: LoadBalancer - port: 8080 - +# Enable ingress object for external access to the resources ingress: enabled: true # className: "" -# annotations: {} -## kubernetes.io/ingress.class: nginx -## kubernetes.io/tls-acme: "true" -# hosts: [] -## - host: oncall.example.com -## paths: -## - path: / -## pathType: ImplementationSpecific -# tls: # [] -## - secretName: oncall-example-com-tls -## hosts: -## - oncall.example.com + annotations: + kubernetes.io/ingress.class: "nginx" + cert-manager.io/issuer: "letsencrypt-prod" +# Whether to install ingress controller nginx-ingress: enabled: true +# Install cert-manager as a part of the release cert-manager: enabled: true + # Instal CRD resources installCRDs: true webhook: timeoutSeconds: 30 # cert-manager tries to use the already used port, changing to another one # https://github.com/cert-manager/cert-manager/issues/3237 - # https://cert-manager.io/docs/installation/compatibility/#gke-autopilot - securePort: 10261 + # https://cert-manager.io/docs/installation/compatibility/ + securePort: 10260 + # Fix self-checks https://github.com/jetstack/cert-manager/issues/4286 podDnsPolicy: None podDnsConfig: nameservers: - 8.8.8.8 - 1.1.1.1 -# Additional services configuration -# We recommend using external services to reduce the overhead of managing statefule services +# MySQL is included into this release for the convenience. +# It is recommended to host it separately from this release +# Set mariadb.enabled = false and configure externalMysql mariadb: enabled: true - auth: - database: oncall +# Make sure to create the database with the following parameters: +# CREATE DATABASE oncall CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci; externalMysql: host: port: @@ -89,6 +95,9 @@ externalMysql: user: password: +# RabbitMQ is included into this release for the convenience. +# It is recommended to host it separately from this release +# Set rabbitmq.enabled = false and configure externalRabbitmq rabbitmq: enabled: true @@ -141,7 +150,3 @@ securityContext: {} # readOnlyRootFilesystem: true # runAsNonRoot: true # runAsUser: 1000 - - -ildar: - enabled: true \ No newline at end of file