From c793e550c623f3b2f68f492250a6db5518dfd69d Mon Sep 17 00:00:00 2001
From: Joey Orlando <joey.orlando@grafana.com>
Date: Tue, 23 May 2023 17:26:12 -0400
Subject: [PATCH] re-enable e2e UI tests on CI (#1961)

#1692 is still open. This PR is not an ideal approach, but it's a quick
win while we wait for that issue to be resolved.

By retrying failing tests up to 3 times, we _should_ be fine to
re-enable these on CI. If a test is failing > 3 times, there's likely a
legitimate issue occuring.
---
 .github/workflows/linting-and-tests.yml       | 50 ++++++++++-------
 .../integration-tests/globalSetup.ts          | 12 +++--
 .../schedules/quality.test.ts                 | 19 +++++--
 grafana-plugin/playwright.config.ts           | 11 ++--
 .../EscalationsFilters.module.css             | 13 -----
 .../EscalationsFilters/EscalationsFilters.tsx | 54 -------------------
 .../ScheduleQuality/ScheduleQuality.tsx       |  2 +-
 .../ScheduleQualityDetails.tsx                |  2 +-
 grafana-plugin/src/plugin.json                |  3 +-
 9 files changed, 67 insertions(+), 99 deletions(-)
 delete mode 100644 grafana-plugin/src/components/EscalationsFilters/EscalationsFilters.module.css
 delete mode 100644 grafana-plugin/src/components/EscalationsFilters/EscalationsFilters.tsx

diff --git a/.github/workflows/linting-and-tests.yml b/.github/workflows/linting-and-tests.yml
index 633a528d..d85932ce 100644
--- a/.github/workflows/linting-and-tests.yml
+++ b/.github/workflows/linting-and-tests.yml
@@ -262,15 +262,24 @@ jobs:
           pytest -x
 
   end-to-end-tests:
-    # TODO: reenable this job once https://github.com/grafana/oncall/issues/1692 is fixed
-    if: ${{ false }}
-    runs-on: ubuntu-latest
+    # default "ubuntu-latest" runners only provide 2 CPU cores + 7GB of RAM. this seems to lead to HTTP 504s from
+    # the oncall backend, and hence, flaky tests. Let's use CI runners w/ more resources to avoid this (plus
+    # this will allow us to run more backend containers and parralelize the tests)
+    runs-on: ubuntu-latest-8-cores
     name: "End to end tests - Grafana: ${{ matrix.grafana-image-tag }}"
     strategy:
       matrix:
         grafana-image-tag:
-          - 8.5.22
-          - 9.2.6
+          # OnCall doesn't work on the following versions of Grafana
+          # - 8.5.22
+          # - 9.0.0
+          # - 9.1.0
+
+          # 9.2.0 is the earliest version where things work
+          - 9.2.13
+          - 9.3.14
+          - 9.4.10
+          - 9.5.2
           - main
           - latest
       fail-fast: false
@@ -331,10 +340,9 @@ jobs:
       - name: Load engine Docker image on the nodes of the cluster
         run: kind load image-archive --name=chart-testing /tmp/oncall-engine.tar
 
-      # spin up 2 engine, 2 celery, and 2 grafana pods, this will allow us to parralelize the integration tests
+      # spin up 3 engine, 3 celery, and 3 grafana pods, this will allow us to parralelize the integration tests,
       # and complete them much faster by using multiple test processes
-      # With just 1 engine/celery/grafana pod, the backend crawls to a halt when there is > 1 parallelized integration
-      # test process
+      # With just 1 engine/celery/grafana pod, the backend crawls to a halt when there is > 1 parallelized integration test process
       #
       # by settings grafana.plugins to [] and configuring grafana.extraVolumeMounts we are using the locally built
       # OnCall plugin rather than the latest published version
@@ -346,14 +354,14 @@ jobs:
             --values ./helm/simple.yml \
             --values ./helm/values-local-image.yml \
             --set-json 'env=[{"name":"GRAFANA_CLOUD_NOTIFICATIONS_ENABLED","value":"False"}]' \
-            --set engine.replicaCount=1 \
-            --set celery.replicaCount=1 \
+            --set engine.replicaCount=3 \
+            --set celery.replicaCount=3 \
             --set celery.worker_beat_enabled="False" \
             --set oncall.twilio.accountSid="${{ secrets.TWILIO_ACCOUNT_SID }}" \
             --set oncall.twilio.authToken="${{ secrets.TWILIO_AUTH_TOKEN }}" \
             --set oncall.twilio.phoneNumber="\"${{ secrets.TWILIO_PHONE_NUMBER }}"\" \
             --set oncall.twilio.verifySid="${{ secrets.TWILIO_VERIFY_SID }}" \
-            --set grafana.replicas=1 \
+            --set grafana.replicas=3 \
             --set grafana.image.tag=${{ matrix.grafana-image-tag }} \
             --set grafana.env.GF_SECURITY_ADMIN_USER=oncall \
             --set grafana.env.GF_SECURITY_ADMIN_PASSWORD=oncall \
@@ -378,12 +386,19 @@ jobs:
           path: "~/.cache/ms-playwright"
           key: ${{ runner.os }}-playwright-${{ env.PLAYWRIGHT_VERSION }}-chromium-firefox-webkit
 
-      - name: Install Playwright binaries/dependencies
+      # For the next two steps, use the binary directly from node_modules/.bin as opposed to npx playwright
+      # due to this bug (https://github.com/microsoft/playwright/issues/13188)
+      - name: Install Playwright Browsers
         if: steps.playwright-cache.outputs.cache-hit != 'true'
-        # https://stackoverflow.com/questions/65900299/install-single-dependency-from-package-json-with-yarn
-        run: |
-          yarn add "@playwright/test@${{ env.PLAYWRIGHT_VERSION }}"
-          npx playwright install --with-deps chromium firefox webkit
+        working-directory: grafana-plugin
+        run: ./node_modules/.bin/playwright install --with-deps chromium firefox webkit
+
+      # use the cached browsers, but we still need to install the necessary system dependencies
+      # (system deps are installed in the cache-miss step above by the --with-deps flag)
+      - name: Install Playwright System Dependencies
+        if: steps.playwright-cache.outputs.cache-hit == 'true'
+        working-directory: grafana-plugin
+        run: ./node_modules/.bin/playwright install-deps chromium firefox webkit
 
       - name: Await k8s pods and other resources up
         uses: jupyterhub/action-k8s-await-workloads@v1
@@ -408,8 +423,7 @@ jobs:
           GRAFANA_PASSWORD: oncall
           MAILSLURP_API_KEY: ${{ secrets.MAILSLURP_API_KEY }}
         working-directory: ./grafana-plugin
-        # -x = exit command after first failing test
-        run: yarn test:integration -x
+        run: yarn test:integration
 
       # always spit out the engine and celery logs, AFTER the e2e tests have completed
       # can be helpful for debugging failing/flaky tests
diff --git a/grafana-plugin/integration-tests/globalSetup.ts b/grafana-plugin/integration-tests/globalSetup.ts
index 244158d5..04af1cfc 100644
--- a/grafana-plugin/integration-tests/globalSetup.ts
+++ b/grafana-plugin/integration-tests/globalSetup.ts
@@ -7,7 +7,7 @@ import { goToGrafanaPage } from './utils/navigation';
 /**
  * go to config page and wait for plugin icon to be available on left-hand navigation
  */
-export const configureOnCallPlugin = async (page: Page): Promise<void> => {
+const configureOnCallPlugin = async (page: Page): Promise<void> => {
   // plugin configuration can safely be skipped for non open-source environments
   if (!IS_OPEN_SOURCE) {
     return;
@@ -31,8 +31,14 @@ export const configureOnCallPlugin = async (page: Page): Promise<void> => {
     await clickButton({ page, buttonText: 'Connect' });
   }
 
-  // wait for the "Connected to OnCall" message to know that everything is properly configured
-  await expect(page.getByTestId('status-message-block')).toHaveText(/Connected to OnCall.*/);
+  /**
+   * wait for the "Connected to OnCall" message to know that everything is properly configured
+   *
+   * Regarding increasing the timeout for the "plugin configured" assertion:
+   * This is because it can sometimes take a bit longer for the backend sync to finish. The default assertion
+   * timeout is 5s, which is sometimes not enough if the backend is under load
+   */
+  await expect(page.getByTestId('status-message-block')).toHaveText(/Connected to OnCall.*/, { timeout: 25_000 });
 };
 
 /**
diff --git a/grafana-plugin/integration-tests/schedules/quality.test.ts b/grafana-plugin/integration-tests/schedules/quality.test.ts
index 94b8e2a0..4e010162 100644
--- a/grafana-plugin/integration-tests/schedules/quality.test.ts
+++ b/grafana-plugin/integration-tests/schedules/quality.test.ts
@@ -6,13 +6,24 @@ test('check schedule quality for simple 1-user schedule', async ({ page }) => {
   const onCallScheduleName = generateRandomValue();
   await createOnCallSchedule(page, onCallScheduleName);
 
-  await expect(page.locator('div[class*="ScheduleQuality"]')).toHaveText('Quality: Great');
+  /**
+   * this page.reload() call is a hack to temporarily get around this issue
+   * https://github.com/grafana/oncall/issues/1968
+   */
+  await page.reload({ waitUntil: 'networkidle' });
 
-  await page.hover('div[class*="ScheduleQuality"]');
-  await expect(page.locator('div[class*="ScheduleQualityDetails"] >> span[class*="Text"] >> nth=2 ')).toHaveText(
+  const scheduleQualityElement = page.getByTestId('schedule-quality');
+
+  await expect(scheduleQualityElement).toHaveText('Quality: Great', { timeout: 15_000 });
+
+  await scheduleQualityElement.hover();
+
+  const scheduleQualityDetailsElement = page.getByTestId('schedule-quality-details');
+
+  await expect(scheduleQualityDetailsElement.locator('span[class*="Text"] >> nth=2 ')).toHaveText(
     'Schedule has no gaps'
   );
-  await expect(page.locator('div[class*="ScheduleQualityDetails"] >> span[class*="Text"] >> nth=3 ')).toHaveText(
+  await expect(scheduleQualityDetailsElement.locator('span[class*="Text"] >> nth=3 ')).toHaveText(
     'Schedule is perfectly balanced'
   );
 });
diff --git a/grafana-plugin/playwright.config.ts b/grafana-plugin/playwright.config.ts
index 9084e46a..2ca27677 100644
--- a/grafana-plugin/playwright.config.ts
+++ b/grafana-plugin/playwright.config.ts
@@ -14,7 +14,7 @@ const config: PlaywrightTestConfig = {
   testDir: './integration-tests',
   globalSetup: './integration-tests/globalSetup.ts',
   /* Maximum time one test can run for. */
-  timeout: 90 * 1000,
+  timeout: 60 * 1000,
   expect: {
     /**
      * Maximum time expect() should wait for the condition to be met.
@@ -26,8 +26,13 @@ const config: PlaywrightTestConfig = {
   fullyParallel: true,
   /* Fail the build on CI if you accidentally left test.only in the source code. */
   forbidOnly: !!process.env.CI,
-  /* Retry on CI only */
-  retries: process.env.CI ? 3 : 0,
+  /**
+   * Retry on CI only
+   *
+   * NOTE: until we fix this issue (https://github.com/grafana/oncall/issues/1692) which occasionally leads
+   * to flaky tests.. let's just retry failed tests. If the same test fails 3 times, you know something must be up
+   */
+  retries: !!process.env.CI ? 3 : 0,
   workers: 1,
   /* Reporter to use. See https://playwright.dev/docs/test-reporters */
   reporter: 'html',
diff --git a/grafana-plugin/src/components/EscalationsFilters/EscalationsFilters.module.css b/grafana-plugin/src/components/EscalationsFilters/EscalationsFilters.module.css
deleted file mode 100644
index 0ed200a7..00000000
--- a/grafana-plugin/src/components/EscalationsFilters/EscalationsFilters.module.css
+++ /dev/null
@@ -1,13 +0,0 @@
-.root {
-  display: flex;
-  align-items: center;
-}
-
-.search {
-  max-width: 400px;
-}
-
-.icon-button {
-  color: var(--secondary-text-color);
-  margin-left: 8px;
-}
diff --git a/grafana-plugin/src/components/EscalationsFilters/EscalationsFilters.tsx b/grafana-plugin/src/components/EscalationsFilters/EscalationsFilters.tsx
deleted file mode 100644
index f2387261..00000000
--- a/grafana-plugin/src/components/EscalationsFilters/EscalationsFilters.tsx
+++ /dev/null
@@ -1,54 +0,0 @@
-import React, { ChangeEvent, FC, useCallback } from 'react';
-
-import { Icon, Input, IconButton } from '@grafana/ui';
-import cn from 'classnames/bind';
-
-import styles from './EscalationsFilters.module.css';
-
-export interface Filters {
-  searchTerm: string;
-}
-
-interface EscalationsFiltersProps {
-  value: Filters;
-  onChange: (filters: Filters) => void;
-}
-
-const cx = cn.bind(styles);
-
-const EscalationsFilters: FC<EscalationsFiltersProps> = (props) => {
-  const { value, onChange } = props;
-
-  const onSearchTermChangeCallback = useCallback(
-    (e: ChangeEvent<HTMLInputElement>) => {
-      const filters = {
-        ...value,
-        searchTerm: e.currentTarget.value,
-      };
-
-      onChange(filters);
-    },
-    [onChange, value]
-  );
-
-  const handleClear = useCallback(() => {
-    onChange({ searchTerm: '' });
-  }, [onChange]);
-
-  return (
-    <div className={cx('root')}>
-      <Input
-        autoFocus
-        data-testid="escalation-chain-search-input"
-        className={cx('search')}
-        prefix={<Icon name="search" />}
-        placeholder="Search escalations..."
-        value={value.searchTerm}
-        onChange={onSearchTermChangeCallback}
-      />
-      <IconButton name="times" onClick={handleClear} className={cx('icon-button')} tooltip="Clear search input" />
-    </div>
-  );
-};
-
-export default EscalationsFilters;
diff --git a/grafana-plugin/src/components/ScheduleQuality/ScheduleQuality.tsx b/grafana-plugin/src/components/ScheduleQuality/ScheduleQuality.tsx
index 0b97dd7c..afbb3444 100644
--- a/grafana-plugin/src/components/ScheduleQuality/ScheduleQuality.tsx
+++ b/grafana-plugin/src/components/ScheduleQuality/ScheduleQuality.tsx
@@ -38,7 +38,7 @@ const ScheduleQuality: FC<ScheduleQualityProps> = ({ schedule, lastUpdated }) =>
 
   return (
     <>
-      <div className={cx('root')}>
+      <div className={cx('root')} data-testid="schedule-quality">
         {relatedEscalationChains?.length > 0 && schedule?.number_of_escalation_chains > 0 && (
           <TooltipBadge
             borderType="link"
diff --git a/grafana-plugin/src/components/ScheduleQualityDetails/ScheduleQualityDetails.tsx b/grafana-plugin/src/components/ScheduleQualityDetails/ScheduleQualityDetails.tsx
index 89fc40bf..f1d7fde5 100644
--- a/grafana-plugin/src/components/ScheduleQualityDetails/ScheduleQualityDetails.tsx
+++ b/grafana-plugin/src/components/ScheduleQualityDetails/ScheduleQualityDetails.tsx
@@ -29,7 +29,7 @@ export const ScheduleQualityDetails: FC<ScheduleQualityDetailsProps> = ({ qualit
   const warningComments = comments.filter((c) => c.type === 'warning');
 
   return (
-    <div className={cx('root')}>
+    <div className={cx('root')} data-testid="schedule-quality-details">
       <div className={cx('container')}>
         <div className={cx('container', 'container--withLateralPadding')}>
           <Text type={cx('secondary', 'header')}>
diff --git a/grafana-plugin/src/plugin.json b/grafana-plugin/src/plugin.json
index cdbfef28..6250bbe8 100644
--- a/grafana-plugin/src/plugin.json
+++ b/grafana-plugin/src/plugin.json
@@ -620,8 +620,7 @@
     }
   ],
   "dependencies": {
-    "grafanaDependency": ">=8.3.2",
-    "grafanaVersion": "8.3",
+    "grafanaDependency": ">=9.2.0",
     "plugins": []
   }
 }