chore(resources): remove scaleGPU support (#709)

**Description**
<!--
Please include a summary of the change and which issue is fixed. Please
also include relevant motivation and context. List any dependencies that
are required for this change.
-->
⚒️ Fixes  # <!--(issue)-->

**⚙️ Type of change**

- [ ] ⚙️ Feature/App addition
- [ ] 🪛 Bugfix
- [ ] ⚠️ Breaking change (fix or feature that would cause existing
functionality to not work as expected)
- [x] 🔃 Refactor of current code

**🧪 How Has This Been Tested?**
<!--
Please describe the tests that you ran to verify your changes. Provide
instructions so we can reproduce. Please also list any relevant details
for your test configuration
-->

**📃 Notes:**
<!-- Please enter any other relevant information here -->

**✔️ Checklist:**

- [x] ⚖️ My code follows the style guidelines of this project
- [x] 👀 I have performed a self-review of my own code
- [x] #️⃣ I have commented my code, particularly in hard-to-understand
areas
- [x] 📄 I have made corresponding changes to the documentation
- [x] ⚠️ My changes generate no new warnings
- [x] 🧪 I have added tests to this description that prove my fix is
effective or that my feature works
- [x] ⬆️ I increased versions for any altered app according to semantic
versioning

** App addition**

If this PR is an app addition please make sure you have done the
following.

- [ ] 🖼️ I have added an icon in the Chart's root directory called
`icon.png`

---

_Please don't blindly check all the boxes. Read them and only check
those that apply.
Those checkboxes are there for the reviewer to see what is this all
about and
the status of this PR with a quick glance._

---------

Co-authored-by: Kjeld Schouten <kjeld@schouten-lebbing.nl>
This commit is contained in:
Stavros Kois
2024-02-24 11:37:33 +02:00
committed by GitHub
parent a3158b8c7c
commit 05279f7472
12 changed files with 191 additions and 896 deletions

View File

@@ -56,7 +56,7 @@ jobs:
name: Unit Tests
runs-on: ubuntu-22.04
env:
helmUnitVersion: 0.4.1
helmUnitVersion: 0.4.2
strategy:
fail-fast: false
matrix:

View File

@@ -3,7 +3,7 @@ appVersion: ""
dependencies:
- name: common
repository: file://../common
version: ~17.5.0
version: ~18.0.0
deprecated: false
description: Helper chart to test different use cases of the common library
home: https://github.com/truecharts/apps/tree/master/charts/library/common-test

View File

@@ -224,12 +224,6 @@ tests:
- it: should create the correct fixed envs with GPU
set:
scaleGPU:
- gpu:
nvidia.com/gpu: 1
targetSelector:
workload-name:
- container-name1
image: *image
TZ: Europe/London
containerOptions:
@@ -248,6 +242,9 @@ tests:
primary: true
imageSelector: image
probes: *probes
resources:
limits:
nvidia.com/gpu: 1
asserts:
- documentIndex: &deploymentDoc 0
isKind:
@@ -273,12 +270,6 @@ tests:
- it: should create the correct fixed envs with GPU and overridden on container level
set:
scaleGPU:
- gpu:
nvidia.com/gpu: 1
targetSelector:
workload-name:
- container-name1
image: *image
TZ: Europe/London
containerOptions:
@@ -297,6 +288,9 @@ tests:
primary: true
imageSelector: image
probes: *probes
resources:
limits:
nvidia.com/gpu: 1
fixedEnv:
NVIDIA_CAPS:
- all

View File

@@ -67,6 +67,7 @@ tests:
limits:
cpu: 2000m
memory: 4Gi
nvidia.com/gpu: 1
some-resource: 1
some-other-resource: 0
asserts:
@@ -84,6 +85,7 @@ tests:
limits:
cpu: 2000m
memory: 4Gi
nvidia.com/gpu: 1
some-resource: 1
some-other-resource: 0
requests:
@@ -113,7 +115,8 @@ tests:
limits:
cpu: 2000m
memory: 4Gi
some-resource: 2
some-resource: 0
some-other-resource: 2
asserts:
- documentIndex: &deploymentDoc 0
isKind:
@@ -129,8 +132,8 @@ tests:
limits:
cpu: 2000m
memory: 4Gi
some-resource: 2
some-other-resource: 0
some-resource: 0
some-other-resource: 2
requests:
cpu: 10m
memory: 50Mi
@@ -393,439 +396,6 @@ tests:
cpu: 10m
memory: 1Gi
- it: should assign GPU on the primary pod/container
set:
image: *image
scaleGPU:
- gpu:
nvidia.com/gpu: 1
global:
namespace: ix-namespace
ixChartContext:
addNvidiaRuntimeClass: true
nvidiaRuntimeClassName: nvidia
workload:
workload-name1:
enabled: true
primary: true
type: Deployment
podSpec:
containers:
container-name1:
enabled: true
primary: true
imageSelector: image
probes: *probes
container-name2:
enabled: true
primary: false
imageSelector: image
probes: *probes
workload-name2:
enabled: true
primary: false
type: Deployment
podSpec:
containers:
container-name1:
enabled: true
primary: true
imageSelector: image
probes: *probes
asserts:
- documentIndex: &deploymentDoc 0
isKind:
of: Deployment
- documentIndex: *deploymentDoc
isAPIVersion:
of: apps/v1
- documentIndex: *deploymentDoc
equal:
path: spec.template.spec.runtimeClassName
value: nvidia
- documentIndex: *deploymentDoc
isSubset:
path: spec.template.spec.containers[0]
content:
resources:
limits:
cpu: 4000m
memory: 8Gi
nvidia.com/gpu: "1"
requests:
cpu: 10m
memory: 50Mi
- documentIndex: *deploymentDoc
isSubset:
path: spec.template.spec.containers[1]
content:
resources:
limits:
cpu: 4000m
memory: 8Gi
requests:
cpu: 10m
memory: 50Mi
- documentIndex: &otherDeploymentDoc 1
isKind:
of: Deployment
- documentIndex: *otherDeploymentDoc
isAPIVersion:
of: apps/v1
- documentIndex: *otherDeploymentDoc
isNullOrEmpty:
path: spec.template.spec.runtimeClassName
- documentIndex: *otherDeploymentDoc
isSubset:
path: spec.template.spec.containers[0]
content:
resources:
limits:
cpu: 4000m
memory: 8Gi
requests:
cpu: 10m
memory: 50Mi
- it: should assign GPU on the selected pod/container
set:
image: *image
global:
namespace: ix-namespace
ixChartContext:
addNvidiaRuntimeClass: true
nvidiaRuntimeClassName: nvidia
scaleGPU:
- gpu:
nvidia.com/gpu: 1
targetSelector:
workload-name2:
- container-name1
workload:
workload-name1:
enabled: true
primary: true
type: Deployment
podSpec:
containers:
container-name1:
enabled: true
primary: true
imageSelector: image
probes: *probes
container-name2:
enabled: true
primary: false
imageSelector: image
probes: *probes
workload-name2:
enabled: true
primary: false
type: Deployment
podSpec:
containers:
container-name1:
enabled: true
primary: true
imageSelector: image
probes: *probes
asserts:
- documentIndex: &deploymentDoc 0
isKind:
of: Deployment
- documentIndex: *deploymentDoc
isAPIVersion:
of: apps/v1
- documentIndex: *deploymentDoc
isNullOrEmpty:
path: spec.template.spec.runtimeClassName
- documentIndex: *deploymentDoc
isSubset:
path: spec.template.spec.containers[0]
content:
resources:
limits:
cpu: 4000m
memory: 8Gi
requests:
cpu: 10m
memory: 50Mi
- documentIndex: *deploymentDoc
isSubset:
path: spec.template.spec.containers[1]
content:
resources:
limits:
cpu: 4000m
memory: 8Gi
requests:
cpu: 10m
memory: 50Mi
- documentIndex: &otherDeploymentDoc 1
isKind:
of: Deployment
- documentIndex: *otherDeploymentDoc
isAPIVersion:
of: apps/v1
- documentIndex: *otherDeploymentDoc
equal:
path: spec.template.spec.runtimeClassName
value: nvidia
- documentIndex: *otherDeploymentDoc
isSubset:
path: spec.template.spec.containers[0]
content:
resources:
limits:
cpu: 4000m
memory: 8Gi
nvidia.com/gpu: "1"
requests:
cpu: 10m
memory: 50Mi
- it: should assign GPU on the selected pods/containers
set:
image: *image
global:
namespace: ix-namespace
ixChartContext:
addNvidiaRuntimeClass: true
nvidiaRuntimeClassName: nvidia
scaleGPU:
- gpu:
nvidia.com/gpu: 1
targetSelector:
workload-name1:
- container-name1
- container-name2
workload:
workload-name1:
enabled: true
primary: true
type: Deployment
podSpec:
containers:
container-name1:
enabled: true
primary: true
imageSelector: image
probes: *probes
container-name2:
enabled: true
primary: false
imageSelector: image
probes: *probes
workload-name2:
enabled: true
primary: false
type: Deployment
podSpec:
containers:
container-name1:
enabled: true
primary: true
imageSelector: image
probes: *probes
asserts:
- documentIndex: &deploymentDoc 0
isKind:
of: Deployment
- documentIndex: *deploymentDoc
isAPIVersion:
of: apps/v1
- documentIndex: *deploymentDoc
equal:
path: spec.template.spec.runtimeClassName
value: nvidia
- documentIndex: *deploymentDoc
isSubset:
path: spec.template.spec.containers[0]
content:
resources:
limits:
cpu: 4000m
memory: 8Gi
nvidia.com/gpu: "1"
requests:
cpu: 10m
memory: 50Mi
- documentIndex: *deploymentDoc
isSubset:
path: spec.template.spec.containers[1]
content:
resources:
limits:
cpu: 4000m
memory: 8Gi
nvidia.com/gpu: "1"
requests:
cpu: 10m
memory: 50Mi
- documentIndex: &otherDeploymentDoc 1
isKind:
of: Deployment
- documentIndex: *otherDeploymentDoc
isAPIVersion:
of: apps/v1
- documentIndex: *otherDeploymentDoc
isNullOrEmpty:
path: spec.template.spec.runtimeClassName
- documentIndex: *otherDeploymentDoc
isSubset:
path: spec.template.spec.containers[0]
content:
resources:
limits:
cpu: 4000m
memory: 8Gi
requests:
cpu: 10m
memory: 50Mi
- it: should assign GPU on the selected pod/container with multiple GPUs
set:
image: *image
global:
namespace: ix-namespace
ixChartContext:
addNvidiaRuntimeClass: true
nvidiaRuntimeClassName: nvidia
scaleGPU:
- gpu:
nvidia.com/gpu: 1
amd.com/gpu: 0
targetSelector:
workload-name1:
- container-name1
- container-name2
workload:
workload-name1:
enabled: true
primary: true
type: Deployment
podSpec:
containers:
container-name1:
enabled: true
primary: true
imageSelector: image
probes: *probes
container-name2:
enabled: true
primary: false
imageSelector: image
probes: *probes
asserts:
- documentIndex: &deploymentDoc 0
isKind:
of: Deployment
- documentIndex: *deploymentDoc
isAPIVersion:
of: apps/v1
- documentIndex: *deploymentDoc
equal:
path: spec.template.spec.runtimeClassName
value: nvidia
- documentIndex: *deploymentDoc
isSubset:
path: spec.template.spec.containers[0]
content:
resources:
limits:
cpu: 4000m
memory: 8Gi
nvidia.com/gpu: "1"
requests:
cpu: 10m
memory: 50Mi
- documentIndex: *deploymentDoc
isSubset:
path: spec.template.spec.containers[1]
content:
resources:
limits:
cpu: 4000m
memory: 8Gi
nvidia.com/gpu: "1"
requests:
cpu: 10m
memory: 50Mi
- it: should assign multiple GPU on the selected pod/container with multiple selected GPUs
set:
image: *image
global:
namespace: ix-namespace
ixChartContext:
addNvidiaRuntimeClass: true
nvidiaRuntimeClassName: nvidia
scaleGPU:
- gpu:
nvidia.com/gpu: 1
amd.com/gpu: 0
targetSelector:
workload-name1:
- container-name1
- gpu:
nvidia.com/gpu: 0
amd.com/gpu: 1
targetSelector:
workload-name1:
- container-name2
workload:
workload-name1:
enabled: true
primary: true
type: Deployment
podSpec:
containers:
container-name1:
enabled: true
primary: true
imageSelector: image
probes: *probes
container-name2:
enabled: true
primary: false
imageSelector: image
probes: *probes
asserts:
- documentIndex: &deploymentDoc 0
isKind:
of: Deployment
- documentIndex: *deploymentDoc
isAPIVersion:
of: apps/v1
- documentIndex: *deploymentDoc
equal:
path: spec.template.spec.runtimeClassName
value: nvidia
- documentIndex: *deploymentDoc
isSubset:
path: spec.template.spec.containers[0]
content:
resources:
limits:
cpu: 4000m
memory: 8Gi
nvidia.com/gpu: "1"
requests:
cpu: 10m
memory: 50Mi
- documentIndex: *deploymentDoc
isSubset:
path: spec.template.spec.containers[1]
content:
resources:
limits:
cpu: 4000m
memory: 8Gi
amd.com/gpu: "1"
requests:
cpu: 10m
memory: 50Mi
# Failures
- it: should fail with empty requests
set:
@@ -993,101 +563,3 @@ tests:
asserts:
- failedTemplate:
errorMessage: Container - Expected [resources.limits.memory] to have one of the following formats [(Suffixed with E/P/T/G/M/K - eg. 1G), (Suffixed with Ei/Pi/Ti/Gi/Mi/Ki - eg. 1Gi), (Plain Integer in bytes - eg. 1024), (Exponent - eg. 134e6)], but got [8GB]
- it: should fail with empty gpu in defined entry
set:
image: *image
scaleGPU:
- gpu:
targetSelector:
workload-name1:
- container-name1
workload:
workload-name1:
enabled: true
primary: true
type: Deployment
podSpec:
containers:
container-name1:
enabled: true
primary: true
imageSelector: image
probes: *probes
asserts:
- failedTemplate:
errorMessage: Container - Expected non-empty [scaleGPU.gpu]
- it: should fail with empty list under workload in targetSelector
set:
image: *image
scaleGPU:
- gpu:
key: value
targetSelector:
workload-name1: []
workload:
workload-name1:
enabled: true
primary: true
type: Deployment
podSpec:
containers:
container-name1:
enabled: true
primary: true
imageSelector: image
probes: *probes
asserts:
- failedTemplate:
errorMessage: Container - Expected non-empty list under pod in [scaleGPU.targetSelector]
- it: should fail with empty value in gpu
set:
image: *image
scaleGPU:
- gpu:
key: ""
targetSelector:
workload-name1:
- container-name1
workload:
workload-name1:
enabled: true
primary: true
type: Deployment
podSpec:
containers:
container-name1:
enabled: true
primary: true
imageSelector: image
probes: *probes
asserts:
- failedTemplate:
errorMessage: Container - Expected non-empty [scaleGPU] [value]
- it: should fail with no value in gpu
set:
image: *image
scaleGPU:
- gpu:
key:
targetSelector:
workload-name1:
- container-name1
workload:
workload-name1:
enabled: true
primary: true
type: Deployment
podSpec:
containers:
container-name1:
enabled: true
primary: true
imageSelector: image
probes: *probes
asserts:
- failedTemplate:
errorMessage: Container - Expected non-empty [scaleGPU] [value]

View File

@@ -141,11 +141,45 @@ tests:
path: spec.template.spec.hostUsers
value: true
- it: should pass with enabled hostUsers because of gpu
- it: should pass with enabled hostUsers because of nvidia gpu
set:
scaleGPU:
- gpu:
nvidia: "1"
resources:
limits:
nvidia.com/gpu: 1
workload:
workload-name1:
enabled: true
primary: true
type: Deployment
podSpec: {}
asserts:
- documentIndex: *deploymentDoc
equal:
path: spec.template.spec.hostUsers
value: true
- it: should pass with enabled hostUsers because of amd gpu
set:
resources:
limits:
amd.com/gpu: 1
workload:
workload-name1:
enabled: true
primary: true
type: Deployment
podSpec: {}
asserts:
- documentIndex: *deploymentDoc
equal:
path: spec.template.spec.hostUsers
value: true
- it: should pass with enabled hostUsers because of intel gpu
set:
resources:
limits:
intel.com/i915: 1
workload:
workload-name1:
enabled: true

View File

@@ -74,149 +74,6 @@ tests:
path: spec.template.spec.runtimeClassName
value: some-other-runtime-class
- it: should pass with runtimeClassName from ixChartContext with targetSelector
set:
scaleGPU:
- gpu:
key: value
targetSelector:
workload-name1:
- container-name1
workload-name3:
- container-name1
global:
namespace: ix-namespace
ixChartContext:
addNvidiaRuntimeClass: true
nvidiaRuntimeClassName: ix-runtime
podOptions:
runtimeClassName: some-class
workload:
workload-name1:
enabled: true
primary: true
type: Deployment
podSpec:
runtimeClassName: some-other-class
workload-name2:
enabled: true
primary: false
type: DaemonSet
podSpec:
runtimeClassName: some-class
workload-name3:
enabled: true
primary: false
type: StatefulSet
podSpec:
runtimeClassName: some-class
asserts:
- documentIndex: *deploymentDoc
equal:
path: spec.template.spec.runtimeClassName
value: ix-runtime
- documentIndex: &daemonSetDoc 1
isKind:
of: DaemonSet
- documentIndex: *daemonSetDoc
equal:
path: spec.template.spec.runtimeClassName
value: some-class
- documentIndex: &statefulSetDoc 2
isKind:
of: StatefulSet
- documentIndex: *statefulSetDoc
equal:
path: spec.template.spec.runtimeClassName
value: ix-runtime
- it: should not add runtimeClassName with gpu value 0
set:
scaleGPU:
- gpu:
key: 0
global:
namespace: ix-namespace
ixChartContext:
addNvidiaRuntimeClass: true
nvidiaRuntimeClassName: ix-runtime
workload:
workload-name1:
enabled: true
primary: true
type: Deployment
podSpec: {}
asserts:
- documentIndex: *deploymentDoc
isNullOrEmpty:
path: spec.template.spec.runtimeClassName
- it: should pass with runtimeClassName from ixChartContext without targetSelector (on primary workload)
set:
scaleGPU:
- gpu:
key: value
global:
namespace: ix-namespace
ixChartContext:
addNvidiaRuntimeClass: true
nvidiaRuntimeClassName: ix-runtime
workload:
workload-name1:
enabled: true
primary: true
type: Job
podSpec:
runtimeClassName: some-other-class
workload-name2:
enabled: true
primary: false
type: CronJob
schedule: "* * * * *"
podSpec: {}
asserts:
- documentIndex: &jobDoc 0
isKind:
of: Job
- documentIndex: *jobDoc
equal:
path: spec.template.spec.runtimeClassName
value: ix-runtime
- documentIndex: &cronJobDoc 1
isKind:
of: CronJob
- documentIndex: *cronJobDoc
isNullOrEmpty:
path: spec.jobTemplate.spec.template.spec.runtimeClassName
- it: should pass with runtimeClassName not set from ixChartContext without gpu value
set:
scaleGPU:
- gpu: {}
targetSelector:
workload-name1:
- container-name1
global:
namespace: ix-namespace
ixChartContext:
addNvidiaRuntimeClass: true
nvidiaRuntimeClassName: ix-runtime
workload:
workload-name1:
enabled: true
primary: true
type: Job
podSpec:
runtimeClassName: some-other-class
asserts:
- documentIndex: *jobDoc
isKind:
of: Job
- documentIndex: *jobDoc
equal:
path: spec.template.spec.runtimeClassName
value: some-other-class
- it: should pass with runtimeClass set to nvidia when in SCALE and using the container "resources" directly
set:
global:
@@ -250,6 +107,39 @@ tests:
path: spec.template.spec.runtimeClassName
value: nvidia
- it: should pass with runtimeClass NOT set when in SCALE and 0 gpu
set:
global:
namespace: ix-namespace
ixChartContext:
addNvidiaRuntimeClass: true
nvidiaRuntimeClassName: ix-runtime
workload:
workload-name1:
enabled: true
primary: true
type: Deployment
podSpec:
containers:
container-name:
enabled: true
primary: true
probes:
liveness:
enabled: false
readiness:
enabled: false
startup:
enabled: false
resources:
limits:
nvidia.com/gpu:
asserts:
- documentIndex: *deploymentDoc
equal:
path: spec.template.spec.runtimeClassName
value: null
- it: should pass with runtimeClass set to nvidia when in SCALE and using the top level "resources" directly
set:
global:

View File

@@ -354,7 +354,7 @@ tests:
- 568
sysctls: []
- it: should pass with with gpu assigned to primary pod
- it: should pass with with gpu assigned to single container
set:
workload:
workload-name1:
@@ -365,14 +365,25 @@ tests:
securityContext:
supplementalGroups:
- 1000
containers:
container1:
enabled: true
primary: true
probes:
liveness:
enabled: false
readiness:
enabled: false
startup:
enabled: false
resources:
limits:
nvidia.com/gpu: 1
workload-name2:
enabled: true
primary: false
type: Deployment
podSpec: {}
scaleGPU:
- gpu:
nvidia: "1"
asserts:
- documentIndex: &deploymentDoc 0
isKind:
@@ -402,7 +413,7 @@ tests:
- 568
sysctls: []
- it: should pass with with gpu assigned to specific pod
- it: should pass with with gpu assigned to multiple pods
set:
workload:
workload-name1:
@@ -413,70 +424,39 @@ tests:
securityContext:
supplementalGroups:
- 1000
containers:
container1:
enabled: true
primary: true
probes:
liveness:
enabled: false
readiness:
enabled: false
startup:
enabled: false
resources:
limits:
nvidia.com/gpu: 1
workload-name2:
enabled: true
primary: false
type: Deployment
podSpec: {}
scaleGPU:
- gpu:
nvidia: "1"
targetSelector:
workload-name1:
- container-name1
asserts:
- documentIndex: &deploymentDoc 0
isKind:
of: Deployment
- documentIndex: *deploymentDoc
equal:
path: spec.template.spec.securityContext
value:
fsGroup: 568
fsGroupChangePolicy: OnRootMismatch
supplementalGroups:
- 1000
- 44
- 107
- 568
sysctls: []
- documentIndex: &otherDeploymentDoc 1
isKind:
of: Deployment
- documentIndex: *otherDeploymentDoc
equal:
path: spec.template.spec.securityContext
value:
fsGroup: 568
fsGroupChangePolicy: OnRootMismatch
supplementalGroups:
- 568
sysctls: []
- it: should pass with with gpu assigned to multiple pod
set:
workload:
workload-name1:
enabled: true
primary: true
type: Deployment
podSpec:
securityContext:
supplementalGroups:
- 1000
workload-name2:
enabled: true
primary: false
type: Deployment
podSpec: {}
scaleGPU:
- gpu:
nvidia: "1"
targetSelector:
workload-name1:
- container-name1
workload-name2:
- container-name1
containers:
container1:
enabled: true
primary: true
probes:
liveness:
enabled: false
readiness:
enabled: false
startup:
enabled: false
resources:
limits:
nvidia.com/gpu: 1
asserts:
- documentIndex: &deploymentDoc 0
isKind:

View File

@@ -47,7 +47,7 @@ objectData: The object data to be used to render the container.
{{- $fixed = mustAppend $fixed (dict "k" "UMASK_SET" "v" $UMASK) -}}
{{- $nvidia := false -}}
{{- if eq (include "tc.v1.common.lib.container.resources.gpu" (dict "rootCtx" $rootCtx "objectData" $objectData "returnBool" true)) "true" -}}
{{- if eq (include "tc.v1.common.lib.container.resources.hasGPU" (dict "rootCtx" $rootCtx "objectData" $objectData "gpuType" "nvidia.com/gpu")) "true" -}}
{{- $nvidia = true -}}
{{- end -}}

View File

@@ -27,73 +27,12 @@ limits:
{{- with $resources.limits.memory }} {{/* Passing 0, will not render it, meaning unlimited */}}
memory: {{ . }}
{{- end -}}
{{- include "tc.v1.common.lib.container.resources.gpu" (dict "rootCtx" $rootCtx "objectData" $objectData) | trim | nindent 2 -}}
{{- range $k, $v := (omit $resources.limits "cpu" "memory") }} {{/* Omit cpu and memory, as they are handled above */}}
{{ $k }}: {{ $v }}
{{- end -}}
{{- end -}}
{{- end -}}
{{/* Returns GPU resource */}}
{{/* Call this template:
{{ include "tc.v1.common.lib.container.resources.gpu" (dict "rootCtx" $rootCtx "objectData" $objectData) }}
rootCtx: The root context of the chart.
objectData: The object data to be used to render the container.
*/}}
{{- define "tc.v1.common.lib.container.resources.gpu" -}}
{{- $objectData := .objectData -}}
{{- $rootCtx := .rootCtx -}}
{{- $returnBool := .returnBool -}}
{{- $gpuResource := list -}}
{{- range $GPUValues := $rootCtx.Values.scaleGPU -}}
{{- if not $GPUValues.gpu -}}
{{- fail "Container - Expected non-empty [scaleGPU.gpu]" -}}
{{- end -}}
{{- $selected := false -}}
{{/* Parse selector if defined */}}
{{- if $GPUValues.targetSelector -}}
{{- range $podName, $containers := $GPUValues.targetSelector -}}
{{- if not $containers -}}
{{- fail "Container - Expected non-empty list under pod in [scaleGPU.targetSelector]" -}}
{{- end -}}
{{- if and (eq $podName $objectData.podShortName) (mustHas $objectData.shortName $containers) -}}
{{- $selected = true -}}
{{- end -}}
{{- end -}}
{{/* If no selector, select primary pod/container */}}
{{- else if and $objectData.podPrimary $objectData.primary -}}
{{- $selected = true -}}
{{- end -}}
{{- if $selected -}}
{{- $gpuResource = mustAppend $gpuResource $GPUValues.gpu -}}
{{- end -}}
{{- end -}}
{{- if not $returnBool -}}
{{- range $gpu := $gpuResource -}}
{{- range $k, $v := $gpu -}}
{{- if or (kindIs "invalid" $v) (eq (toString $v) "") -}}
{{- fail "Container - Expected non-empty [scaleGPU] [value]" -}}
{{- end -}} {{/* Don't try to schedule 0 GPUs */}}
{{- if gt (int $v) 0 }}
{{ $k }}: {{ $v | quote }}
{{- end -}}
{{- end -}}
{{- end -}}
{{- else -}}
{{- if $gpuResource -}}
{{- "true" -}}
{{- end -}}
{{- end -}}
{{- end -}}
{{/* Validates resources to match a pattern */}}
{{/* Call this template:
{{ include "tc.v1.common.lib.container.resources.validation" (dict "resources" $resources) }}
@@ -141,3 +80,59 @@ resources: The resources object
{{- end -}}
{{- end -}}
{{- end -}}
{{- define "tc.v1.common.lib.pod.resources.hasGPU" -}}
{{- $rootCtx := .rootCtx -}}
{{- $objectData := .objectData -}}
{{- $gpuType := .gpuType -}}
{{- $types := (list "nvidia.com/gpu" "amd.com/gpu" "intel.com/i915") -}}
{{- if $gpuType -}}
{{- $types = (list $gpuType) -}}
{{- end -}}
{{- $gpu := false -}}
{{- if and ($rootCtx.Values.resources) ($rootCtx.Values.resources.limits) -}}
{{- range $t := $types -}}
{{- if gt ((get $rootCtx.Values.resources.limits $t) | int) 0 -}}
{{- $gpu = true -}}
{{- break -}}
{{- end -}}
{{- end -}}
{{- end -}}
{{- if $objectData.podSpec -}}
{{- range $k, $v := $objectData.podSpec.containers -}}
{{- if not $v.enabled -}}
{{- continue -}}
{{- end -}}
{{- range $t := $types -}}
{{- if eq (include "tc.v1.common.lib.container.resources.hasGPU" (dict "rootCtx" $rootCtx "objectData" $v "gpuType" $t)) "true" -}}
{{- $gpu = true -}}
{{- break -}}
{{- end -}}
{{- end -}}
{{- end -}}
{{- end -}}
{{- $gpu | toString -}}
{{- end -}}
{{- define "tc.v1.common.lib.container.resources.hasGPU" -}}
{{- $rootCtx := .rootCtx -}}
{{- $objectData := .objectData -}}
{{- $gpuType := .gpuType -}}
{{- $gpu := false -}}
{{- if and ($objectData.resources) ($objectData.resources.limits) -}}
{{- if gt ((get $objectData.resources.limits $gpuType) | int) 0 -}}
{{- $gpu = true -}}
{{- end -}}
{{- end -}}
{{- $gpu | toString -}}
{{- end -}}

View File

@@ -20,18 +20,7 @@ objectData: The object data to be used to render the Pod.
{{- $secContext = mustMergeOverwrite $secContext . -}}
{{- end -}}
{{- $gpuAdded := false -}}
{{- range $GPUValues := $rootCtx.Values.scaleGPU -}}
{{/* If there is a selector and pod is selected */}}
{{- if $GPUValues.targetSelector -}}
{{- if mustHas $objectData.shortName ($GPUValues.targetSelector | keys) -}}
{{- $gpuAdded = true -}}
{{- end -}}
{{/* If there is not a selector, but pod is primary */}}
{{- else if $objectData.primary -}}
{{- $gpuAdded = true -}}
{{- end -}}
{{- end -}}
{{- $gpu := (include "tc.v1.common.lib.pod.resources.hasGPU" (dict "rootCtx" $rootCtx "objectData" $objectData)) -}}
{{- $deviceGroups := (list 5 10 20 24) -}}
{{- $deviceAdded := false -}}
@@ -84,7 +73,7 @@ objectData: The object data to be used to render the Pod.
{{- end -}}
{{- end -}}
{{- if $gpuAdded -}}
{{- if eq $gpu "true" -}}
{{- $_ := set $secContext "supplementalGroups" (concat $secContext.supplementalGroups (list 44 107)) -}}
{{- $hostUsers = true -}}
{{- end -}}

View File

@@ -34,69 +34,13 @@ objectData: The object data to be used to render the Pod.
{{- define "tc.v1.common.lib.pod.runtimeClassName.scale" -}}
{{- $rootCtx := .rootCtx -}}
{{- $objectData := .objectData -}}
{{- $runtime := "" -}}
{{- $nvidia := false -}}
{{- if and ($rootCtx.Values.resources) ($rootCtx.Values.resources.limits) -}}
{{- if gt ((get $rootCtx.Values.resources.limits "nvidia.com/gpu") | int) 0 -}}
{{- $nvidia = true -}}
{{- end -}}
{{- end -}}
{{- range $rootCtx.Values.workload -}}
{{- if not .podSpec -}}
{{- continue -}}
{{- end -}}
{{- range $k, $v := .podSpec.containers -}}
{{- if or (not $v.resources) (not $v.resources.limits) -}}
{{- continue -}}
{{- end -}}
{{- if gt ((get $v.resources.limits "nvidia.com/gpu") | int) 0 -}}
{{- $nvidia = true -}}
{{- break -}}
{{- end -}}
{{- end -}}
{{- end -}}
{{- if $nvidia -}}
{{- $nvidia := (include "tc.v1.common.lib.pod.resources.hasGPU" (dict "rootCtx" $rootCtx "objectData" $objectData)) -}}
{{- if eq $nvidia "true" -}}
{{/* https://github.com/truenas/middleware/blob/0bfc05166c3f95b1ab4ca4a9614691f14303db2e/src/middlewared/middlewared/plugins/kubernetes_linux/utils.py#L16 */}}
{{- $runtime = "nvidia" -}}
{{- end -}}
{{/* Keep backwards compat with .scaleGPU */}}
{{- if $rootCtx.Values.global.ixChartContext.addNvidiaRuntimeClass -}}
{{- range $rootCtx.Values.scaleGPU -}}
{{- if .gpu -}} {{/* Make sure it has a value... */}}
{{- $scaleGPU := false -}}
{{- range $k, $v := .gpu -}}
{{- if $v -}} {{/* Make sure value is not "0" or "" */}}
{{- $scaleGPU = true -}}
{{- break -}}
{{- end -}}
{{- end -}}
{{- if $scaleGPU -}}
{{- if (kindIs "map" .targetSelector) -}}
{{- range $podName, $containers := .targetSelector -}}
{{- if eq $objectData.shortName $podName -}} {{/* If the pod is selected */}}
{{- $runtime = $rootCtx.Values.global.ixChartContext.nvidiaRuntimeClassName -}}
{{- end -}}
{{- end -}}
{{- else if $objectData.primary -}}
{{/* If the pod is primary and no targetSelector is given, assign to primary */}}
{{- $runtime = $rootCtx.Values.global.ixChartContext.nvidiaRuntimeClassName -}}
{{- end -}}
{{- end -}}
{{- end -}}
{{- end -}}
{{- end -}}
{{- $runtime -}}
{{- end -}}

View File

@@ -385,9 +385,6 @@ scaleExternalInterface: []
# -- (docs/scaleCertificate.md)
scaleCertificate: {}
# -- (docs/scaleGPU.md)
scaleGPU: []
# NOTES.txt
notes:
header: |