oncall-engine/grafana-plugin/e2e-tests/globalSetup.ts

145 lines
4.7 KiB
TypeScript
Raw Normal View History

import { OrgRole } from '@grafana/data';
import { test as setup, chromium, expect, Page, BrowserContext, FullConfig, APIRequestContext } from '@playwright/test';
import { getOnCallApiUrl } from 'utils/consts';
import { VIEWER_USER_STORAGE_STATE, EDITOR_USER_STORAGE_STATE, ADMIN_USER_STORAGE_STATE } from '../playwright.config';
import GrafanaAPIClient from './utils/clients/grafana';
import {
GRAFANA_ADMIN_PASSWORD,
GRAFANA_ADMIN_USERNAME,
GRAFANA_EDITOR_PASSWORD,
GRAFANA_EDITOR_USERNAME,
GRAFANA_VIEWER_PASSWORD,
GRAFANA_VIEWER_USERNAME,
IS_CLOUD,
IS_OPEN_SOURCE,
} from './utils/constants';
import { clickButton, getInputByName } from './utils/forms';
import { goToGrafanaPage } from './utils/navigation';
const grafanaApiClient = new GrafanaAPIClient(GRAFANA_ADMIN_USERNAME, GRAFANA_ADMIN_PASSWORD);
e2e tests on CI - actually await k8s resources to be ready before starting tests (#1997) Occasionally, the Playwright global setup step (which authenticates w/ the Grafana API + configures the plugin) would fail, leading to the CI job to instantly fail (playwright doesn't retry global setup if it fails). My current hypothesis as to why this is happening is because the `oncall-engine` and `oncall-celery` pods aren't _actually_ ready in these cases based on the way the `jupyterhub/action-k8s-await-workloads` action await k8s workloads: <img width="1076" alt="Screenshot 2023-05-23 at 18 24 36" src="https://github.com/grafana/oncall/assets/9406895/68d8d2d9-4274-4749-8788-e0a9a3dbad83"> By using the `kubectl rollout status deployment/<deployment-name> --timeout=300s` instead, we can be sure that these pods are _actually_ ready to receive traffic before we start the tests. ```bash ❯ kubectl rollout status --help Show the status of the rollout. By default 'rollout status' will watch the status of the latest rollout until it's done. If you don't want to wait for the rollout to finish then you can use --watch=false. Note that if a new rollout starts in-between, then 'rollout status' will continue watching the latest revision. If you want to pin to a specific revision and abort if it is rolled over by another revision, use --revision=N where N is the revision you need to watch for. ``` Lastly, even despite this, sometimes the `POST /api/internal/v1/plugin/sync` endpoint will return HTTP 500 ([example logs](https://github.com/grafana/oncall/actions/runs/5062712137/jobs/9088529416#step:19:2536) from failed CI job). In this case, let's setup the Playwright global setup to retry 3 times.
2023-05-23 20:20:46 -04:00
type UserCreationSettings = {
adminAuthedRequest: APIRequestContext;
role: OrgRole;
};
const generateLoginStorageStateAndOptionallCreateUser = async (
config: FullConfig,
userName: string,
password: string,
storageStateFileLocation: string,
userCreationSettings?: UserCreationSettings,
closeContext = false
): Promise<BrowserContext> => {
if (userCreationSettings !== undefined && IS_OPEN_SOURCE) {
const { adminAuthedRequest, role } = userCreationSettings;
await grafanaApiClient.idempotentlyCreateUserWithRole(adminAuthedRequest, userName, password, role);
}
const { headless } = config.projects[0]!.use;
const browser = await chromium.launch({ headless, slowMo: headless ? 0 : 100 });
const browserContext = await browser.newContext();
await grafanaApiClient.login(browserContext.request, userName, password);
await browserContext.storageState({ path: storageStateFileLocation });
if (closeContext) {
await browserContext.close();
}
return browserContext;
};
/**
go to config page and wait for plugin icon to be available on left-hand navigation
*/
const configureOnCallPlugin = async (page: Page): Promise<void> => {
/**
* go to the oncall plugin configuration page and wait for the page to be loaded
*/
await goToGrafanaPage(page, '/plugins/grafana-oncall-app');
await page.waitForTimeout(2000);
// if plugin is configured, go to OnCall
const isConfigured = (await page.getByText('Connected to OnCall').count()) >= 1;
if (isConfigured) {
await page.getByRole('link', { name: 'Open Grafana OnCall' }).click();
return;
}
// otherwise we may need to reconfigure the plugin
const needToReconfigure = (await page.getByText('try removing your plugin configuration').count()) >= 1;
if (needToReconfigure) {
await clickButton({ page, buttonText: 'Remove current configuration' });
await clickButton({ page, buttonText: /^Remove$/ });
}
await page.waitForTimeout(2000);
const needToEnterOnCallApiUrl = await page.getByText(/Connected to OnCall/).isHidden();
if (needToEnterOnCallApiUrl) {
await getInputByName(page, 'onCallApiUrl').fill(getOnCallApiUrl() || 'http://oncall-dev-engine:8080');
await clickButton({ page, buttonText: 'Connect' });
}
/**
* wait for the "Connected to OnCall" message to know that everything is properly configured
*
* Regarding increasing the timeout for the "plugin configured" assertion:
* This is because it can sometimes take a bit longer for the backend sync to finish. The default assertion
* timeout is 5s, which is sometimes not enough if the backend is under load
*/
await expect(page.getByTestId('status-message-block')).toHaveText(/Connected to OnCall.*/, { timeout: 25_000 });
};
/**
* Borrowed from our friends on the Incident team
* https://github.com/grafana/incident/blob/main/plugin/e2e/global-setup.ts
*/
setup('Configure Grafana OnCall plugin', async ({ request }, { config }) => {
if (IS_CLOUD) {
await grafanaApiClient.pollInstanceUntilItIsHealthy(request);
}
const adminBrowserContext = await generateLoginStorageStateAndOptionallCreateUser(
config,
GRAFANA_ADMIN_USERNAME,
GRAFANA_ADMIN_PASSWORD,
ADMIN_USER_STORAGE_STATE
);
const adminPage = await adminBrowserContext.newPage();
const { request: adminAuthedRequest } = adminBrowserContext;
await generateLoginStorageStateAndOptionallCreateUser(
config,
GRAFANA_EDITOR_USERNAME,
GRAFANA_EDITOR_PASSWORD,
EDITOR_USER_STORAGE_STATE,
{
adminAuthedRequest,
role: OrgRole.Editor,
},
true
);
await generateLoginStorageStateAndOptionallCreateUser(
config,
GRAFANA_VIEWER_USERNAME,
GRAFANA_VIEWER_PASSWORD,
VIEWER_USER_STORAGE_STATE,
{
adminAuthedRequest,
role: OrgRole.Viewer,
},
true
);
if (IS_OPEN_SOURCE) {
// plugin configuration can safely be skipped for cloud environments
await configureOnCallPlugin(adminPage);
e2e tests on CI - actually await k8s resources to be ready before starting tests (#1997) Occasionally, the Playwright global setup step (which authenticates w/ the Grafana API + configures the plugin) would fail, leading to the CI job to instantly fail (playwright doesn't retry global setup if it fails). My current hypothesis as to why this is happening is because the `oncall-engine` and `oncall-celery` pods aren't _actually_ ready in these cases based on the way the `jupyterhub/action-k8s-await-workloads` action await k8s workloads: <img width="1076" alt="Screenshot 2023-05-23 at 18 24 36" src="https://github.com/grafana/oncall/assets/9406895/68d8d2d9-4274-4749-8788-e0a9a3dbad83"> By using the `kubectl rollout status deployment/<deployment-name> --timeout=300s` instead, we can be sure that these pods are _actually_ ready to receive traffic before we start the tests. ```bash ❯ kubectl rollout status --help Show the status of the rollout. By default 'rollout status' will watch the status of the latest rollout until it's done. If you don't want to wait for the rollout to finish then you can use --watch=false. Note that if a new rollout starts in-between, then 'rollout status' will continue watching the latest revision. If you want to pin to a specific revision and abort if it is rolled over by another revision, use --revision=N where N is the revision you need to watch for. ``` Lastly, even despite this, sometimes the `POST /api/internal/v1/plugin/sync` endpoint will return HTTP 500 ([example logs](https://github.com/grafana/oncall/actions/runs/5062712137/jobs/9088529416#step:19:2536) from failed CI job). In this case, let's setup the Playwright global setup to retry 3 times.
2023-05-23 20:20:46 -04:00
}
await adminBrowserContext.close();
});