From d26f76fea08329729d81ffb5987e0a4c6ee2cc69 Mon Sep 17 00:00:00 2001
From: Ildar Iskhakov <Ildar.iskhakov@grafana.com>
Date: Mon, 21 Nov 2022 23:10:25 +0800
Subject: [PATCH] Add new ci pipeline: helm chart end to end testing (#855)

---
 .github/workflows/helm_tests.yml              | 90 +++++++++++++++++++
 .../commands/setup_end_to_end_test.py         | 52 +++++++++++
 helm/README.md                                | 29 ++++++
 helm/kind.yml                                 |  9 ++
 helm/oncall/templates/engine/job-migrate.yaml |  1 +
 .../templates/engine/service-external.yaml    |  3 +
 helm/simple.yml                               | 23 +++++
 helm/values-arm64.yml                         | 16 ++++
 helm/values-local-image.yml                   |  4 +
 9 files changed, 227 insertions(+)
 create mode 100644 .github/workflows/helm_tests.yml
 create mode 100644 engine/engine/management/commands/setup_end_to_end_test.py
 create mode 100644 helm/README.md
 create mode 100644 helm/kind.yml
 create mode 100644 helm/simple.yml
 create mode 100644 helm/values-arm64.yml
 create mode 100644 helm/values-local-image.yml

diff --git a/.github/workflows/helm_tests.yml b/.github/workflows/helm_tests.yml
new file mode 100644
index 00000000..a95b4389
--- /dev/null
+++ b/.github/workflows/helm_tests.yml
@@ -0,0 +1,90 @@
+name: Helm End to End Testing
+
+on:
+  - pull_request
+
+jobs:
+  create-cluster:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+
+      - name: Set up Docker Buildx  # We need this step for docker caching
+        uses: docker/setup-buildx-action@v2
+
+      - name: Build docker image locally  # using github actions docker cache
+        uses: docker/build-push-action@v2
+        with:
+          context: ./engine
+          file: ./engine/Dockerfile
+          push: false
+          load: true
+          tags: oncall/engine:latest
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
+
+      - name: Create k8s Kind Cluster
+        uses: helm/kind-action@v1.3.0
+        with:
+          config: ./helm/kind.yml
+
+      - name: Load image on the nodes of the cluster
+        run: kind load docker-image --name=chart-testing oncall/engine:latest
+
+      - name: Install helm chart
+        run: helm install test-release helm/oncall --values helm/simple.yml --values helm/values-local-image.yml
+
+      - name: Await k8s pods and other resources up
+        uses: jupyterhub/action-k8s-await-workloads@v1
+        with:
+          workloads: "" # all
+          namespace: "" # default
+          timeout: 300
+          max-restarts: 0
+
+      - name: Bootstrap organization and integration
+        run: |
+          export POD_NAME=$(kubectl get pods --namespace default -l "app.kubernetes.io/name=oncall,app.kubernetes.io/instance=test-release,app.kubernetes.io/component=engine" -o jsonpath="{.items[0].metadata.name}")
+          export ONCALL_INTEGRATION_URL=http://localhost:30001$(kubectl exec -it $POD_NAME -- bash -c "python manage.py setup_end_to_end_test --bootstrap_integration")
+          echo "ONCALL_INTEGRATION_URL=$ONCALL_INTEGRATION_URL" >> $GITHUB_ENV
+
+      - name: Send an alert to the integration
+        run: |
+          echo $ONCALL_INTEGRATION_URL
+          export TEST_ID=test-0
+          echo "TEST_ID=$TEST_ID" >> $GITHUB_ENV
+          curl -X POST "$ONCALL_INTEGRATION_URL" \
+            -H 'Content-Type: Application/json' \
+            -d '{
+              "alert_uid": "08d6891a-835c-e661-39fa-96b6a9e26552",
+              "title": "'"$TEST_ID"'",
+              "image_url": "https://upload.wikimedia.org/wikipedia/commons/e/ee/Grumpy_Cat_by_Gage_Skidmore.jpg",
+              "state": "alerting",
+              "link_to_upstream_details": "https://en.wikipedia.org/wiki/Downtime",
+              "message": "Smth happened. Oh no!"
+            }'
+
+      # GitHub Action reference: https://github.com/jupyterhub/action-k8s-namespace-report
+      - name: Kubernetes namespace report
+        uses: jupyterhub/action-k8s-namespace-report@v1
+        if: always()
+
+      - name: Await 1 alert group and 1 alert created during the test (timeout 30 seconds)
+        run: |
+          export POD_NAME=$(kubectl get pods --namespace default -l "app.kubernetes.io/name=oncall,app.kubernetes.io/instance=test-release,app.kubernetes.io/component=engine" -o jsonpath="{.items[0].metadata.name}")
+          tries=30
+          while [ "$tries" -gt 0 ]; do
+              if kubectl exec -it $POD_NAME -c oncall -- bash -c "python manage.py setup_end_to_end_test --return_results_for_test_id $TEST_ID" | grep -q '1, 1'
+              then
+                  break
+              fi
+
+              tries=$(( tries - 1 ))
+              sleep 1
+          done
+
+          if [ "$tries" -eq 0 ]; then
+              echo 'Expected "1, 1" (alert groups, alerts). They were not created in 30 seconds during this integration test. Something is broken' >&2
+              exit 1
+          fi
diff --git a/engine/engine/management/commands/setup_end_to_end_test.py b/engine/engine/management/commands/setup_end_to_end_test.py
new file mode 100644
index 00000000..d1bd0fb8
--- /dev/null
+++ b/engine/engine/management/commands/setup_end_to_end_test.py
@@ -0,0 +1,52 @@
+from django.core.management import BaseCommand
+from django.db.models.signals import post_save
+from django.urls import reverse
+
+from apps.alerts.models import Alert, AlertGroup, AlertReceiveChannel, listen_for_alertreceivechannel_model_save
+from apps.alerts.tests.factories import AlertReceiveChannelFactory
+from apps.user_management.tests.factories import OrganizationFactory
+
+
+class Command(BaseCommand):
+    def add_arguments(self, parser):
+        group = parser.add_mutually_exclusive_group(required=True)
+        group.add_argument(
+            "--bootstrap_integration",
+            action="store_true",
+            help="Create random formatted webhook integration",
+        )
+
+        group.add_argument(
+            "--return_results_for_test_id",
+            type=str,
+            help="Count alert groups with specific text in the title and their alerts",
+        )
+
+    def handle(self, *args, **options):
+        if options["bootstrap_integration"]:
+            organization = OrganizationFactory()
+
+            def _make_alert_receive_channel(organization, **kwargs):
+                if "integration" not in kwargs:
+                    kwargs["integration"] = "formatted_webhook"
+                post_save.disconnect(listen_for_alertreceivechannel_model_save, sender=AlertReceiveChannel)
+                alert_receive_channel = AlertReceiveChannelFactory(organization=organization, **kwargs)
+                post_save.connect(listen_for_alertreceivechannel_model_save, sender=AlertReceiveChannel)
+                return alert_receive_channel
+
+            integration = _make_alert_receive_channel(
+                organization, integration=AlertReceiveChannel.INTEGRATION_FORMATTED_WEBHOOK
+            )
+            url = reverse(
+                "integrations:universal",
+                kwargs={
+                    "integration_type": AlertReceiveChannel.INTEGRATION_FORMATTED_WEBHOOK,
+                    "alert_channel_key": integration.token,
+                },
+            )
+            return url
+        elif test_id := options["return_results_for_test_id"]:
+            alert_groups_pks = list(AlertGroup.all_objects.filter(web_title_cache=test_id).values_list("id", flat=True))
+            alert_groups_count = len(alert_groups_pks)
+            alerts_count = Alert.objects.filter(group_id__in=alert_groups_pks).count()
+            return f"{alert_groups_count}, {alerts_count}"
diff --git a/helm/README.md b/helm/README.md
new file mode 100644
index 00000000..96d31394
--- /dev/null
+++ b/helm/README.md
@@ -0,0 +1,29 @@
+# How to run the chart locally
+
+1. Create the cluster with [kind](https://kind.sigs.k8s.io/docs/user/quick-start/#installation)
+    > Make sure ports 30001 and 30002 are free on your machine
+    ```
+    kind create cluster --image kindest/node:v1.24.7 --config kind.yml
+    ```
+
+2. Install the helm chart
+    ```
+    helm install helm-testing \
+    ../oncall --wait --timeout 30m \
+    --wait-for-jobs \
+    --values ci/simple.yml \
+    --values ci/values-arm64.yml
+    ```
+
+3. Get credentials
+    ```
+    echo "\n\nOpen Grafana on localhost:30002 with credentials - user: admin, password: $(kubectl get secret --namespace default helm-testing-grafana -o jsonpath="{.data.admin-password}" | base64 --decode ; echo)"
+    echo "Open Plugins -> Grafana OnCall -> fill form: backend url: localhost:30001, grafana url: localhost: 30001, token below"
+    export POD_NAME=$(kubectl get pods --namespace default -l "app.kubernetes.io/name=oncall,app.kubernetes.io/instance=helm-testing,app.kubernetes.io/component=engine" -o jsonpath="{.items[0].metadata.name}")
+    kubectl exec -it $POD_NAME -- bash -c "python manage.py issue_invite_for_the_frontend --override"
+    ```
+
+4. Clean up
+    ```
+    kind delete cluster
+    ```
\ No newline at end of file
diff --git a/helm/kind.yml b/helm/kind.yml
new file mode 100644
index 00000000..b6b6f526
--- /dev/null
+++ b/helm/kind.yml
@@ -0,0 +1,9 @@
+kind: Cluster
+apiVersion: kind.x-k8s.io/v1alpha4
+nodes:
+- role: control-plane
+  extraPortMappings:
+  - containerPort: 30001
+    hostPort: 30001
+  - containerPort: 30002
+    hostPort: 30002
\ No newline at end of file
diff --git a/helm/oncall/templates/engine/job-migrate.yaml b/helm/oncall/templates/engine/job-migrate.yaml
index 86fcbaa9..47667afc 100644
--- a/helm/oncall/templates/engine/job-migrate.yaml
+++ b/helm/oncall/templates/engine/job-migrate.yaml
@@ -6,6 +6,7 @@ metadata:
   labels:
     {{- include "oncall.engine.labels" . | nindent 4 }}
 spec:
+  backoffLimit: 15
   ttlSecondsAfterFinished: 20
   template:
     metadata:
diff --git a/helm/oncall/templates/engine/service-external.yaml b/helm/oncall/templates/engine/service-external.yaml
index 0de9d892..eec1f0bf 100644
--- a/helm/oncall/templates/engine/service-external.yaml
+++ b/helm/oncall/templates/engine/service-external.yaml
@@ -16,6 +16,9 @@ spec:
       targetPort: http
       protocol: TCP
       name: http
+      {{- if and (eq .Values.service.type "NodePort") (.Values.service.nodePort) }}
+      nodePort: {{ .Values.service.nodePort }}
+      {{- end }}
   selector:
     {{- include "oncall.engine.selectorLabels" . | nindent 4 }}
 {{- end }}
\ No newline at end of file
diff --git a/helm/simple.yml b/helm/simple.yml
new file mode 100644
index 00000000..4a703006
--- /dev/null
+++ b/helm/simple.yml
@@ -0,0 +1,23 @@
+base_url: localhost:30001
+ingress:
+  enabled: false
+ingress-nginx:
+  enabled: false
+cert-manager:
+  enabled: false
+service:
+  enabled: true
+  type: NodePort
+  port: 8080
+  nodePort: 30001
+grafana:
+   service:
+     type: NodePort
+     nodePort: 30002
+database:
+  # can be either mysql or postgresql
+  type: postgresql
+mariadb:
+  enabled: false
+postgresql:
+  enabled: true
diff --git a/helm/values-arm64.yml b/helm/values-arm64.yml
new file mode 100644
index 00000000..7d59ae5a
--- /dev/null
+++ b/helm/values-arm64.yml
@@ -0,0 +1,16 @@
+# Substituting bitnami image with official image
+# to be able to run Rabbitmq on arm64 (Mac M1)
+# Optional for amd64 systems
+rabbitmq:
+  enabled: true
+  image:
+    repository: rabbitmq
+    tag: 3.10.10
+  auth:
+    username: user
+    password: user
+  extraEnvVars:
+    - name: RABBITMQ_DEFAULT_USER
+      value: user
+    - name: RABBITMQ_DEFAULT_PASS
+      value: user
\ No newline at end of file
diff --git a/helm/values-local-image.yml b/helm/values-local-image.yml
new file mode 100644
index 00000000..db44a5e3
--- /dev/null
+++ b/helm/values-local-image.yml
@@ -0,0 +1,4 @@
+image:
+  repository: oncall/engine
+  tag: latest
+  pullPolicy: IfNotPresent
\ No newline at end of file