fix(oncall): fix celery tolerations and affinity (#2353)
# What this PR does Add affinity and tolerations for celery ## Which issue(s) this PR fixes ## Checklist - [x] Unit, integration, and e2e (if applicable) tests updated - [ ] Documentation added (or `pr:no public docs` PR label added if not required) - [x] `CHANGELOG.md` updated (or `pr:no changelog` PR label added if not required) Co-authored-by: Joey Orlando <joey.orlando@grafana.com>
This commit is contained in:
parent
bb53b8fc4f
commit
ccab3aebd8
10 changed files with 240 additions and 6 deletions
|
|
@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|||
|
||||
- Add `event.users.avatar_full` field to `GET /api/internal/v1/schedules/{schedule_id}/filter_events`
|
||||
payload by @joeyorlando ([#2459](https://github.com/grafana/oncall/pull/2459))
|
||||
- Add `affinity` and `tolerations` for `celery` and `migrations` pods into helm chart + unit test for chart
|
||||
|
||||
### Changed
|
||||
|
||||
|
|
|
|||
|
|
@ -38,6 +38,14 @@ spec:
|
|||
nodeSelector:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.celery.affinity }}
|
||||
affinity:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.celery.tolerations }}
|
||||
tolerations:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
containers:
|
||||
- name: {{ .Chart.Name }}
|
||||
securityContext:
|
||||
|
|
|
|||
|
|
@ -39,6 +39,14 @@ spec:
|
|||
nodeSelector:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.migrate.affinity }}
|
||||
affinity:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.migrate.tolerations }}
|
||||
tolerations:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
containers:
|
||||
- name: {{ .Chart.Name }}-migrate
|
||||
securityContext:
|
||||
|
|
|
|||
|
|
@ -0,0 +1,49 @@
|
|||
affinity -> should use custom affinity:
|
||||
1: |
|
||||
podAntiAffinity:
|
||||
preferredDuringSchedulingIgnoredDuringExecution:
|
||||
- podAffinityTerm:
|
||||
labelSelector:
|
||||
matchExpressions:
|
||||
- key: app.kubernetes.io/name
|
||||
operator: In
|
||||
values:
|
||||
- grafana
|
||||
- key: app.kubernetes.io/instance
|
||||
operator: In
|
||||
values:
|
||||
- grafana
|
||||
topologyKey: failure-domain.beta.kubernetes.io/zone
|
||||
weight: 100
|
||||
2: |
|
||||
podAntiAffinity:
|
||||
preferredDuringSchedulingIgnoredDuringExecution:
|
||||
- podAffinityTerm:
|
||||
labelSelector:
|
||||
matchExpressions:
|
||||
- key: app.kubernetes.io/name
|
||||
operator: In
|
||||
values:
|
||||
- grafana
|
||||
- key: app.kubernetes.io/instance
|
||||
operator: In
|
||||
values:
|
||||
- grafana
|
||||
topologyKey: failure-domain.beta.kubernetes.io/zone
|
||||
weight: 100
|
||||
3: |
|
||||
podAntiAffinity:
|
||||
preferredDuringSchedulingIgnoredDuringExecution:
|
||||
- podAffinityTerm:
|
||||
labelSelector:
|
||||
matchExpressions:
|
||||
- key: app.kubernetes.io/name
|
||||
operator: In
|
||||
values:
|
||||
- grafana
|
||||
- key: app.kubernetes.io/instance
|
||||
operator: In
|
||||
values:
|
||||
- grafana
|
||||
topologyKey: failure-domain.beta.kubernetes.io/zone
|
||||
weight: 100
|
||||
|
|
@ -0,0 +1,7 @@
|
|||
nodeSelector -> should use custom nodeSelector:
|
||||
1: |
|
||||
unittest: here
|
||||
2: |
|
||||
unittest: here
|
||||
3: |
|
||||
unittest: here
|
||||
|
|
@ -0,0 +1,13 @@
|
|||
tolerations -> should use custom tolerations:
|
||||
1: |
|
||||
- effect: NoSchedule
|
||||
key: node-role.kubernetes.io/unittest
|
||||
operator: Exists
|
||||
2: |
|
||||
- effect: NoSchedule
|
||||
key: node-role.kubernetes.io/unittest
|
||||
operator: Exists
|
||||
3: |
|
||||
- effect: NoSchedule
|
||||
key: node-role.kubernetes.io/unittest
|
||||
operator: Exists
|
||||
69
helm/oncall/tests/affinity_deployments_test.yaml
Normal file
69
helm/oncall/tests/affinity_deployments_test.yaml
Normal file
|
|
@ -0,0 +1,69 @@
|
|||
suite: test image and imagePullPolicy for deployments
|
||||
templates:
|
||||
- celery/deployment-celery.yaml
|
||||
- engine/deployment.yaml
|
||||
- engine/job-migrate.yaml
|
||||
release:
|
||||
name: oncall
|
||||
tests:
|
||||
- it: affinity={} -> should set afffinity null
|
||||
asserts:
|
||||
- notExists:
|
||||
path: spec.template.spec.affinity
|
||||
|
||||
- it: affinity -> should use custom affinity
|
||||
set:
|
||||
migrate:
|
||||
affinity:
|
||||
podAntiAffinity:
|
||||
preferredDuringSchedulingIgnoredDuringExecution:
|
||||
- weight: 100
|
||||
podAffinityTerm:
|
||||
labelSelector:
|
||||
matchExpressions:
|
||||
- key: app.kubernetes.io/name
|
||||
operator: In
|
||||
values:
|
||||
- grafana
|
||||
- key: app.kubernetes.io/instance
|
||||
operator: In
|
||||
values:
|
||||
- grafana
|
||||
topologyKey: "failure-domain.beta.kubernetes.io/zone"
|
||||
engine:
|
||||
affinity:
|
||||
podAntiAffinity:
|
||||
preferredDuringSchedulingIgnoredDuringExecution:
|
||||
- weight: 100
|
||||
podAffinityTerm:
|
||||
labelSelector:
|
||||
matchExpressions:
|
||||
- key: app.kubernetes.io/name
|
||||
operator: In
|
||||
values:
|
||||
- grafana
|
||||
- key: app.kubernetes.io/instance
|
||||
operator: In
|
||||
values:
|
||||
- grafana
|
||||
topologyKey: "failure-domain.beta.kubernetes.io/zone"
|
||||
celery:
|
||||
affinity:
|
||||
podAntiAffinity:
|
||||
preferredDuringSchedulingIgnoredDuringExecution:
|
||||
- weight: 100
|
||||
podAffinityTerm:
|
||||
labelSelector:
|
||||
matchExpressions:
|
||||
- key: app.kubernetes.io/name
|
||||
operator: In
|
||||
values:
|
||||
- grafana
|
||||
- key: app.kubernetes.io/instance
|
||||
operator: In
|
||||
values:
|
||||
- grafana
|
||||
topologyKey: "failure-domain.beta.kubernetes.io/zone"
|
||||
asserts:
|
||||
- matchSnapshot:
|
||||
path: spec.template.spec.affinity
|
||||
27
helm/oncall/tests/nodeselector_deployments_test.yaml
Normal file
27
helm/oncall/tests/nodeselector_deployments_test.yaml
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
suite: test image and imagePullPolicy for deployments
|
||||
templates:
|
||||
- celery/deployment-celery.yaml
|
||||
- engine/deployment.yaml
|
||||
- engine/job-migrate.yaml
|
||||
release:
|
||||
name: oncall
|
||||
tests:
|
||||
- it: nodeSelector={} -> should set nodeSelector null
|
||||
asserts:
|
||||
- notExists:
|
||||
path: spec.template.spec.nodeSelector
|
||||
|
||||
- it: nodeSelector -> should use custom nodeSelector
|
||||
set:
|
||||
migrate:
|
||||
nodeSelector:
|
||||
unittest: here
|
||||
engine:
|
||||
nodeSelector:
|
||||
unittest: here
|
||||
celery:
|
||||
nodeSelector:
|
||||
unittest: here
|
||||
asserts:
|
||||
- matchSnapshot:
|
||||
path: spec.template.spec.nodeSelector
|
||||
33
helm/oncall/tests/tolerations_deployments_test.yaml
Normal file
33
helm/oncall/tests/tolerations_deployments_test.yaml
Normal file
|
|
@ -0,0 +1,33 @@
|
|||
suite: test image and imagePullPolicy for deployments
|
||||
templates:
|
||||
- celery/deployment-celery.yaml
|
||||
- engine/deployment.yaml
|
||||
- engine/job-migrate.yaml
|
||||
release:
|
||||
name: oncall
|
||||
tests:
|
||||
- it: tolerations={} -> should set tolerations null
|
||||
asserts:
|
||||
- notExists:
|
||||
path: spec.template.spec.tolerations
|
||||
|
||||
- it: tolerations -> should use custom tolerations
|
||||
set:
|
||||
migrate:
|
||||
tolerations:
|
||||
- key: "node-role.kubernetes.io/unittest"
|
||||
operator: "Exists"
|
||||
effect: "NoSchedule"
|
||||
engine:
|
||||
tolerations:
|
||||
- key: "node-role.kubernetes.io/unittest"
|
||||
operator: "Exists"
|
||||
effect: "NoSchedule"
|
||||
celery:
|
||||
tolerations:
|
||||
- key: "node-role.kubernetes.io/unittest"
|
||||
operator: "Exists"
|
||||
effect: "NoSchedule"
|
||||
asserts:
|
||||
- matchSnapshot:
|
||||
path: spec.template.spec.tolerations
|
||||
|
|
@ -75,9 +75,6 @@ celery:
|
|||
initialDelaySeconds: 30
|
||||
periodSeconds: 300
|
||||
timeoutSeconds: 10
|
||||
## Node labels for pod assignment
|
||||
## ref: https://kubernetes.io/docs/user-guide/node-selection/
|
||||
nodeSelector: {}
|
||||
resources: {}
|
||||
# limits:
|
||||
# cpu: 100m
|
||||
|
|
@ -86,6 +83,18 @@ celery:
|
|||
# cpu: 100m
|
||||
# memory: 128Mi
|
||||
|
||||
## Affinity for pod assignment
|
||||
## ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity
|
||||
affinity: {}
|
||||
|
||||
## Node labels for pod assignment
|
||||
## ref: https://kubernetes.io/docs/user-guide/node-selection/
|
||||
nodeSelector: {}
|
||||
|
||||
## Tolerations for pod assignment
|
||||
## ref: https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/
|
||||
tolerations: []
|
||||
|
||||
oncall:
|
||||
# Override default MIRAGE_CIPHER_IV (must be 16 bytes long)
|
||||
# For existing installation, this should not be changed.
|
||||
|
|
@ -178,14 +187,24 @@ oncall:
|
|||
# Whether to run django database migrations automatically
|
||||
migrate:
|
||||
enabled: true
|
||||
## Node labels for pod assignment
|
||||
## ref: https://kubernetes.io/docs/user-guide/node-selection/
|
||||
nodeSelector: {}
|
||||
# TTL can be unset by setting ttlSecondsAfterFinished: ""
|
||||
ttlSecondsAfterFinished: 20
|
||||
# use a helm hook to manage the migration job
|
||||
useHook: false
|
||||
|
||||
## Affinity for pod assignment
|
||||
## ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity
|
||||
affinity: {}
|
||||
|
||||
## Node labels for pod assignment
|
||||
## ref: https://kubernetes.io/docs/user-guide/node-selection/
|
||||
nodeSelector: {}
|
||||
|
||||
## Tolerations for pod assignment
|
||||
## ref: https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/
|
||||
tolerations: []
|
||||
|
||||
|
||||
# Sets environment variables with name capitalized and prefixed with UWSGI_, and dashes are substituted with underscores.
|
||||
# see more: https://uwsgi-docs.readthedocs.io/en/latest/Configuration.html#environment-variables
|
||||
# Set null to disable all UWSGI environment variables
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue