diff --git a/.github/workflows/linting-and-tests.yml b/.github/workflows/linting-and-tests.yml
index d85932ce..456b8d34 100644
--- a/.github/workflows/linting-and-tests.yml
+++ b/.github/workflows/linting-and-tests.yml
@@ -287,6 +287,13 @@ jobs:
       - name: Checkout
         uses: actions/checkout@v3
 
+      - name: Collect Workflow Telemetry
+        uses: runforesight/workflow-telemetry-action@v1
+        with:
+          comment_on_pr: false
+          proc_trace_chart_show: false
+          proc_trace_table_show: false
+
       - name: Create k8s Kind Cluster
         uses: helm/kind-action@v1.3.0
         with:
@@ -340,9 +347,12 @@ jobs:
       - name: Load engine Docker image on the nodes of the cluster
         run: kind load image-archive --name=chart-testing /tmp/oncall-engine.tar
 
-      # spin up 3 engine, 3 celery, and 3 grafana pods, this will allow us to parralelize the integration tests,
+      # spin up 3 engine and 3 celery pods, this will allow us to parralelize the integration tests,
       # and complete them much faster by using multiple test processes
       # With just 1 engine/celery/grafana pod, the backend crawls to a halt when there is > 1 parallelized integration test process
+      # NOTE: it appears that using > 1 grafana container w/ SQLite as the database sometimes leads to failed
+      # grafana database migrations (this is documented in this GitHub issue
+      # https://github.com/bitnami/charts/issues/10905)
       #
       # by settings grafana.plugins to [] and configuring grafana.extraVolumeMounts we are using the locally built
       # OnCall plugin rather than the latest published version
@@ -361,7 +371,7 @@ jobs:
             --set oncall.twilio.authToken="${{ secrets.TWILIO_AUTH_TOKEN }}" \
             --set oncall.twilio.phoneNumber="\"${{ secrets.TWILIO_PHONE_NUMBER }}"\" \
             --set oncall.twilio.verifySid="${{ secrets.TWILIO_VERIFY_SID }}" \
-            --set grafana.replicas=3 \
+            --set grafana.replicas=1 \
             --set grafana.image.tag=${{ matrix.grafana-image-tag }} \
             --set grafana.env.GF_SECURITY_ADMIN_USER=oncall \
             --set grafana.env.GF_SECURITY_ADMIN_PASSWORD=oncall \
@@ -400,13 +410,15 @@ jobs:
         working-directory: grafana-plugin
         run: ./node_modules/.bin/playwright install-deps chromium firefox webkit
 
-      - name: Await k8s pods and other resources up
-        uses: jupyterhub/action-k8s-await-workloads@v1
-        with:
-          workloads: "" # all
-          namespace: "" # default
-          timeout: 300
-          max-restarts: -1
+      # we could instead use the --wait flag for the helm install command above
+      # but there's no reason to block on that step
+      # instead we can let the k8s resources start up behind the scenes and do other
+      # setup tasks (ex. install playwright + its dependencies)
+      - name: Wait until k8s resources are ready
+        run: |
+          kubectl rollout status deployment/helm-testing-grafana --timeout=300s
+          kubectl rollout status deployment/helm-testing-oncall-engine --timeout=300s
+          kubectl rollout status deployment/helm-testing-oncall-celery --timeout=300s
 
       - name: Run Integration Tests
         env:
@@ -425,14 +437,14 @@ jobs:
         working-directory: ./grafana-plugin
         run: yarn test:integration
 
-      # always spit out the engine and celery logs, AFTER the e2e tests have completed
-      # can be helpful for debugging failing/flaky tests
+      # spit out the engine, celery, and grafana logs, if the the e2e tests have failed
+      # can be helpful for debugging failing tests
       # GitHub Action reference: https://github.com/jupyterhub/action-k8s-namespace-report
       - name: Kubernetes namespace report
         uses: jupyterhub/action-k8s-namespace-report@v1
         if: failure()
         with:
-          important-workloads: "deploy/helm-testing-oncall-engine deploy/helm-testing-oncall-celery"
+          important-workloads: "deploy/helm-testing-oncall-engine deploy/helm-testing-oncall-celery deploy/helm-testing-grafana"
 
       - uses: actions/upload-artifact@v3
         if: failure()
diff --git a/grafana-plugin/integration-tests/globalSetup.ts b/grafana-plugin/integration-tests/globalSetup.ts
index 04af1cfc..79fb4b0c 100644
--- a/grafana-plugin/integration-tests/globalSetup.ts
+++ b/grafana-plugin/integration-tests/globalSetup.ts
@@ -4,6 +4,8 @@ import { BASE_URL, GRAFANA_PASSWORD, GRAFANA_USERNAME, IS_OPEN_SOURCE, ONCALL_AP
 import { clickButton, getInputByName } from './utils/forms';
 import { goToGrafanaPage } from './utils/navigation';
 
+const GLOBAL_SETUP_RETRIES = 3;
+
 /**
  * go to config page and wait for plugin icon to be available on left-hand navigation
  */
@@ -67,4 +69,23 @@ const globalSetup = async (config: FullConfig): Promise<void> => {
   await browserContext.close();
 };
 
-export default globalSetup;
+/**
+ * Let's retry global setup, in the event that it fails due to an oncall-engine/oncall-celery backend error.
+ * Sometimes the sync endpoint will randomly return HTTP 500.
+ * See here for an example CI job which failed global setup
+ * https://github.com/grafana/oncall/actions/runs/5062712137/jobs/9088529416#step:19:2536
+ *
+ * References on retrying playwright global setup
+ * https://github.com/microsoft/playwright/discussions/11371
+ */
+const globalSetupWithRetries = async (config: FullConfig): Promise<void> => {
+  for (let i = 0; i < GLOBAL_SETUP_RETRIES - 1; i++) {
+    try {
+      return await globalSetup(config);
+    } catch (e) {}
+  }
+  // One last time, throwing an error if it fails.
+  await globalSetup(config);
+};
+
+export default globalSetupWithRetries;
diff --git a/grafana-plugin/playwright.config.ts b/grafana-plugin/playwright.config.ts
index 2ca27677..5cae7ef8 100644
--- a/grafana-plugin/playwright.config.ts
+++ b/grafana-plugin/playwright.config.ts
@@ -33,7 +33,7 @@ const config: PlaywrightTestConfig = {
    * to flaky tests.. let's just retry failed tests. If the same test fails 3 times, you know something must be up
    */
   retries: !!process.env.CI ? 3 : 0,
-  workers: 1,
+  workers: !!process.env.CI ? 2 : 1,
   /* Reporter to use. See https://playwright.dev/docs/test-reporters */
   reporter: 'html',
   /* Shared settings for all the projects below. See https://playwright.dev/docs/api/class-testoptions. */