fix(oncall): fix celery tolerations and affinity (#2353)

# What this PR does

Add affinity and tolerations for celery

## Which issue(s) this PR fixes

## Checklist

- [x] Unit, integration, and e2e (if applicable) tests updated
- [ ] Documentation added (or `pr:no public docs` PR label added if not
required)
- [x] `CHANGELOG.md` updated (or `pr:no changelog` PR label added if not
required)

Co-authored-by: Joey Orlando <joey.orlando@grafana.com>
This commit is contained in:
Aleksey Lazarev 2023-07-11 16:20:28 +08:00 committed by GitHub
parent bb53b8fc4f
commit ccab3aebd8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 240 additions and 6 deletions

View file

@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Add `event.users.avatar_full` field to `GET /api/internal/v1/schedules/{schedule_id}/filter_events`
payload by @joeyorlando ([#2459](https://github.com/grafana/oncall/pull/2459))
- Add `affinity` and `tolerations` for `celery` and `migrations` pods into helm chart + unit test for chart
### Changed

View file

@ -38,6 +38,14 @@ spec:
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.celery.affinity }}
affinity:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.celery.tolerations }}
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}
containers:
- name: {{ .Chart.Name }}
securityContext:

View file

@ -39,6 +39,14 @@ spec:
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.migrate.affinity }}
affinity:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.migrate.tolerations }}
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}
containers:
- name: {{ .Chart.Name }}-migrate
securityContext:

View file

@ -0,0 +1,49 @@
affinity -> should use custom affinity:
1: |
podAntiAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- podAffinityTerm:
labelSelector:
matchExpressions:
- key: app.kubernetes.io/name
operator: In
values:
- grafana
- key: app.kubernetes.io/instance
operator: In
values:
- grafana
topologyKey: failure-domain.beta.kubernetes.io/zone
weight: 100
2: |
podAntiAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- podAffinityTerm:
labelSelector:
matchExpressions:
- key: app.kubernetes.io/name
operator: In
values:
- grafana
- key: app.kubernetes.io/instance
operator: In
values:
- grafana
topologyKey: failure-domain.beta.kubernetes.io/zone
weight: 100
3: |
podAntiAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- podAffinityTerm:
labelSelector:
matchExpressions:
- key: app.kubernetes.io/name
operator: In
values:
- grafana
- key: app.kubernetes.io/instance
operator: In
values:
- grafana
topologyKey: failure-domain.beta.kubernetes.io/zone
weight: 100

View file

@ -0,0 +1,7 @@
nodeSelector -> should use custom nodeSelector:
1: |
unittest: here
2: |
unittest: here
3: |
unittest: here

View file

@ -0,0 +1,13 @@
tolerations -> should use custom tolerations:
1: |
- effect: NoSchedule
key: node-role.kubernetes.io/unittest
operator: Exists
2: |
- effect: NoSchedule
key: node-role.kubernetes.io/unittest
operator: Exists
3: |
- effect: NoSchedule
key: node-role.kubernetes.io/unittest
operator: Exists

View file

@ -0,0 +1,69 @@
suite: test image and imagePullPolicy for deployments
templates:
- celery/deployment-celery.yaml
- engine/deployment.yaml
- engine/job-migrate.yaml
release:
name: oncall
tests:
- it: affinity={} -> should set afffinity null
asserts:
- notExists:
path: spec.template.spec.affinity
- it: affinity -> should use custom affinity
set:
migrate:
affinity:
podAntiAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
podAffinityTerm:
labelSelector:
matchExpressions:
- key: app.kubernetes.io/name
operator: In
values:
- grafana
- key: app.kubernetes.io/instance
operator: In
values:
- grafana
topologyKey: "failure-domain.beta.kubernetes.io/zone"
engine:
affinity:
podAntiAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
podAffinityTerm:
labelSelector:
matchExpressions:
- key: app.kubernetes.io/name
operator: In
values:
- grafana
- key: app.kubernetes.io/instance
operator: In
values:
- grafana
topologyKey: "failure-domain.beta.kubernetes.io/zone"
celery:
affinity:
podAntiAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
podAffinityTerm:
labelSelector:
matchExpressions:
- key: app.kubernetes.io/name
operator: In
values:
- grafana
- key: app.kubernetes.io/instance
operator: In
values:
- grafana
topologyKey: "failure-domain.beta.kubernetes.io/zone"
asserts:
- matchSnapshot:
path: spec.template.spec.affinity

View file

@ -0,0 +1,27 @@
suite: test image and imagePullPolicy for deployments
templates:
- celery/deployment-celery.yaml
- engine/deployment.yaml
- engine/job-migrate.yaml
release:
name: oncall
tests:
- it: nodeSelector={} -> should set nodeSelector null
asserts:
- notExists:
path: spec.template.spec.nodeSelector
- it: nodeSelector -> should use custom nodeSelector
set:
migrate:
nodeSelector:
unittest: here
engine:
nodeSelector:
unittest: here
celery:
nodeSelector:
unittest: here
asserts:
- matchSnapshot:
path: spec.template.spec.nodeSelector

View file

@ -0,0 +1,33 @@
suite: test image and imagePullPolicy for deployments
templates:
- celery/deployment-celery.yaml
- engine/deployment.yaml
- engine/job-migrate.yaml
release:
name: oncall
tests:
- it: tolerations={} -> should set tolerations null
asserts:
- notExists:
path: spec.template.spec.tolerations
- it: tolerations -> should use custom tolerations
set:
migrate:
tolerations:
- key: "node-role.kubernetes.io/unittest"
operator: "Exists"
effect: "NoSchedule"
engine:
tolerations:
- key: "node-role.kubernetes.io/unittest"
operator: "Exists"
effect: "NoSchedule"
celery:
tolerations:
- key: "node-role.kubernetes.io/unittest"
operator: "Exists"
effect: "NoSchedule"
asserts:
- matchSnapshot:
path: spec.template.spec.tolerations

View file

@ -75,9 +75,6 @@ celery:
initialDelaySeconds: 30
periodSeconds: 300
timeoutSeconds: 10
## Node labels for pod assignment
## ref: https://kubernetes.io/docs/user-guide/node-selection/
nodeSelector: {}
resources: {}
# limits:
# cpu: 100m
@ -86,6 +83,18 @@ celery:
# cpu: 100m
# memory: 128Mi
## Affinity for pod assignment
## ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity
affinity: {}
## Node labels for pod assignment
## ref: https://kubernetes.io/docs/user-guide/node-selection/
nodeSelector: {}
## Tolerations for pod assignment
## ref: https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/
tolerations: []
oncall:
# Override default MIRAGE_CIPHER_IV (must be 16 bytes long)
# For existing installation, this should not be changed.
@ -178,14 +187,24 @@ oncall:
# Whether to run django database migrations automatically
migrate:
enabled: true
## Node labels for pod assignment
## ref: https://kubernetes.io/docs/user-guide/node-selection/
nodeSelector: {}
# TTL can be unset by setting ttlSecondsAfterFinished: ""
ttlSecondsAfterFinished: 20
# use a helm hook to manage the migration job
useHook: false
## Affinity for pod assignment
## ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity
affinity: {}
## Node labels for pod assignment
## ref: https://kubernetes.io/docs/user-guide/node-selection/
nodeSelector: {}
## Tolerations for pod assignment
## ref: https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/
tolerations: []
# Sets environment variables with name capitalized and prefixed with UWSGI_, and dashes are substituted with underscores.
# see more: https://uwsgi-docs.readthedocs.io/en/latest/Configuration.html#environment-variables
# Set null to disable all UWSGI environment variables