diff --git a/.gitignore b/.gitignore old mode 100644 new mode 100755 diff --git a/README.md b/README.md old mode 100644 new mode 100755 diff --git a/helm-values/LAMP-Contao-CSLSA/README.md b/helm-values/LAMP-Contao-CSLSA/README.md old mode 100644 new mode 100755 index 4b3b8e26f78acb4aea2d5e3651003d46c953cd07..c69ff9b53502dde95e4d2c604ddd060c206d3f7f --- a/helm-values/LAMP-Contao-CSLSA/README.md +++ b/helm-values/LAMP-Contao-CSLSA/README.md @@ -14,10 +14,10 @@ LAMP (Linux Apache Mysql PHP) System als Basis für die Installation von Contao, mkdir <path>/containerize/helm-values/LAMP cd <path>/containerize/helm-values/LAMP # Default Values ausgeben - helm inspect values lamp/lamp > values-default.yaml + helm inspect values containerize/ti-lamp > values-default.yaml ``` -2. `values.yaml` anlegen und anpassen, sieh mitgelieferte `values.yaml` +2. `values.yaml` anlegen und anpassen, siehe mitgelieferte `values.yaml` 3. Installation @@ -26,10 +26,25 @@ LAMP (Linux Apache Mysql PHP) System als Basis für die Installation von Contao, ```bash helm upgrade --install --wait contao \ -n contao --create-namespace \ - lamp/lamp \ + containerize/ti-lamp \ --values values.yaml \ --set mysql.rootPassword=<pass1> \ --set mysql.database=contao \ --set mysql.user=contao \ --set mysql.password=<pass2> + ``` + +4. Upgrade / Anpassungen + + ```bash + helm upgrade --install --wait contao \ + -n contao --create-namespace \ + containerize/ti-lamp \ + --values values.yaml \ + --set mysql.rootPassword=<pass1> \ + --set mysql.database=contao \ + --set mysql.user=contao \ + --set mysql.password=<pass2> \ + --set webdav.user=contao \ + --set webdav.password=<PASSWORD> ``` \ No newline at end of file diff --git a/helm-values/LAMP-Contao-CSLSA/values-default.yaml b/helm-values/LAMP-Contao-CSLSA/values-default.yaml old mode 100644 new mode 100755 diff --git a/helm-values/LAMP-Contao-CSLSA/values.yaml b/helm-values/LAMP-Contao-CSLSA/values.yaml old mode 100644 new mode 100755 index 27e2edfb1e4bdb870394f7fc086c9e1fd9d7bdcf..237ec1fbbf962eb8ae56b8b605c01eb5d196f691 --- a/helm-values/LAMP-Contao-CSLSA/values.yaml +++ b/helm-values/LAMP-Contao-CSLSA/values.yaml @@ -35,12 +35,18 @@ httpd: </VirtualHost> php: + # official PHP images ---- + # repository: "php" + # tag: "8.3-fpm-alpine" # has no gd+intl + # tag: "8.3-fpm" # has no gd+intl + # local Repo ------------- repository: "mcr.informatik.uni-halle.de/studio-r215/containerize/php-fpm" tag: "83" -# # official PHP images -# repository: "php" -# tag: "8.3-fpm-alpine" # has no gd+intl -# tag: "8.3-fpm" # has no gd+intl + # php.ini + ini: | + max_input_vars = 2000 + upload_max_filesize = 50M + post_max_size = 55M # resources: # requests: # memory: "128Mi" @@ -55,6 +61,11 @@ phpmyadmin: tag: "apache" subdomain: pma +webdav: + enabled: true + subdomain: webdv + + ingress: enabled: true domain: cslsa.tikube.informatik.uni-halle.de diff --git a/helm-values/NVidia/README.md b/helm-values/NVidia/README.md old mode 100644 new mode 100755 index ecd1e0c8eb238dedc039ccf2f3ec6f69a2b0b813..a26e36019091164b593130c224b48499aea319c7 --- a/helm-values/NVidia/README.md +++ b/helm-values/NVidia/README.md @@ -5,6 +5,8 @@ Für die Nutzung von CUDA in Kubernetes Knoten: Siehe https://docs.nvidia.com/datacenter/cloud-native/gpu-operator/latest/gpu-driver-configuration.html +Für die Nutzung mit vorinstallierten Treibern und Toolkit: +Siehe https://www.jimangel.io/posts/nvidia-rtx-gpu-kubernetes-setup/ ### Installation @@ -34,7 +36,7 @@ Siehe https://docs.nvidia.com/datacenter/cloud-native/gpu-operator/latest/gpu-dr helm status gpu-operator -n gpu-operator ``` -#### Variante Bare-metal/Passthrough with pre-installed NVIDIA drivers +#### Variante Bare-metal/Passthrough mit pre-installed NVIDIA-Treiber ```yaml driver: @@ -57,6 +59,44 @@ toolkit: value: "true" ``` +#### Variante Bare-metal/Passthrough mit pre-installed NVIDIA-Treiber und -Toolkit + +- Toolkit instlallieren + +```bash +# add nvidia-container-toolkit repo to apt sources +curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg \ +&& curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \ +sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \ +sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list +sudo apt update + +# install container toolkit +sudo apt install -y nvidia-container-toolkit +# options: --dry-run +sudo nvidia-ctk runtime configure --runtime=containerd + +reboot +``` + +```yaml +driver: + enabled: false + version: "550.127.05" # hier muss die passende Version NVidia-Treiber der Basismaschine eingetragen werden + +toolkit: + enabled: false + env: + - name: CONTAINERD_CONFIG + value: /var/lib/rancher/rke2/agent/etc/containerd/config.toml.tmpl + - name: CONTAINERD_SOCKET + value: /run/k3s/containerd/containerd.sock + - name: CONTAINERD_RUNTIME_CLASS + value: nvidia + - name: CONTAINERD_SET_AS_DEFAULT + value: "true" +``` + ### Time-Slicing <https://docs.nvidia.com/datacenter/cloud-native/gpu-operator/latest/gpu-sharing.html> diff --git a/helm-values/NVidia/gpu-operator-values-default.yaml b/helm-values/NVidia/gpu-operator-values-default.yaml old mode 100644 new mode 100755 index 04bbcc046e6c24447d42ce82e9f129d8d476842e..aa1cb7e82fc101201f0dcdecb62f683fb7de16fa --- a/helm-values/NVidia/gpu-operator-values-default.yaml +++ b/helm-values/NVidia/gpu-operator-values-default.yaml @@ -20,6 +20,18 @@ sandboxWorkloads: enabled: false defaultWorkload: "container" +hostPaths: + # rootFS represents the path to the root filesystem of the host. + # This is used by components that need to interact with the host filesystem + # and as such this must be a chroot-able filesystem. + # Examples include the MIG Manager and Toolkit Container which may need to + # stop, start, or restart systemd services + rootFS: "/" + + # driverInstallDir represents the root at which driver files including libraries, + # config files, and executables can be found. + driverInstallDir: "/run/nvidia/driver" + daemonsets: labels: {} annotations: {} @@ -67,11 +79,11 @@ operator: cleanupCRD: false # upgrade CRD on chart upgrade, requires --disable-openapi-validation flag # to be passed during helm upgrade. - upgradeCRD: false + upgradeCRD: true initContainer: image: cuda repository: nvcr.io/nvidia - version: 12.4.1-base-ubi8 + version: 12.6.2-base-ubi9 imagePullPolicy: IfNotPresent tolerations: - key: "node-role.kubernetes.io/master" @@ -131,7 +143,7 @@ driver: usePrecompiled: false repository: nvcr.io/nvidia image: driver - version: "550.54.15" + version: "550.127.05" imagePullPolicy: IfNotPresent imagePullSecrets: [] startupProbe: @@ -181,7 +193,7 @@ driver: repository: nvcr.io/nvidia/cloud-native # When choosing a different version of k8s-driver-manager, DO NOT downgrade to a version lower than v0.6.4 # to ensure k8s-driver-manager stays compatible with gpu-operator starting from v24.3.0 - version: v0.6.8 + version: v0.7.0 imagePullPolicy: IfNotPresent env: - name: ENABLE_GPU_POD_EVICTION @@ -219,7 +231,7 @@ toolkit: enabled: true repository: nvcr.io/nvidia/k8s image: container-toolkit - version: v1.15.0-ubuntu20.04 + version: v1.17.0-ubuntu20.04 imagePullPolicy: IfNotPresent imagePullSecrets: [] env: [] @@ -230,7 +242,7 @@ devicePlugin: enabled: true repository: nvcr.io/nvidia image: k8s-device-plugin - version: v0.15.0-ubi8 + version: v0.17.0-ubi9 imagePullPolicy: IfNotPresent imagePullSecrets: [] args: [] @@ -271,7 +283,7 @@ devicePlugin: config: # Create a ConfigMap (default: false) create: false - # ConfigMap name (either exiting or to create a new one with create=true above) + # ConfigMap name (either existing or to create a new one with create=true above) name: "" # Default config name within the ConfigMap default: "" @@ -288,9 +300,8 @@ dcgm: enabled: false repository: nvcr.io/nvidia/cloud-native image: dcgm - version: 3.3.5-1-ubuntu22.04 + version: 3.3.8-1-ubuntu22.04 imagePullPolicy: IfNotPresent - hostPort: 5555 args: [] env: [] resources: {} @@ -299,7 +310,7 @@ dcgmExporter: enabled: true repository: nvcr.io/nvidia/k8s image: dcgm-exporter - version: 3.3.5-3.4.1-ubuntu22.04 + version: 3.3.8-3.6.0-ubuntu22.04 imagePullPolicy: IfNotPresent env: - name: DCGM_EXPORTER_LISTEN @@ -321,12 +332,31 @@ dcgmExporter: # target_label: instance # replacement: $1 # action: replace + # DCGM Exporter configuration + # This block is used to configure DCGM Exporter to emit a customized list of metrics. + # Use "name" to either point to an existing ConfigMap or to create a new one with a + # list of configurations (i.e with create=true). + # When pointing to an existing ConfigMap, the ConfigMap must exist in the same namespace as the release. + # The metrics are expected to be listed under a key called `dcgm-metrics.csv`. + # Use "data" to build an integrated ConfigMap from a set of custom metrics as + # part of the chart. An example of some custom metrics are shown below. Note that + # the contents of "data" must be in CSV format and be valid DCGM Exporter metric configurations. + # config: + # name: custom-dcgm-exporter-metrics + # create: true + # data: |- + # Format + # If line starts with a '#' it is considered a comment + # DCGM FIELD, Prometheus metric type, help message + # Clocks + # DCGM_FI_DEV_SM_CLOCK, gauge, SM clock frequency (in MHz). + # DCGM_FI_DEV_MEM_CLOCK, gauge, Memory clock frequency (in MHz). gfd: enabled: true repository: nvcr.io/nvidia image: k8s-device-plugin - version: v0.15.0-ubi8 + version: v0.17.0-ubi9 imagePullPolicy: IfNotPresent imagePullSecrets: [] env: @@ -340,16 +370,52 @@ migManager: enabled: true repository: nvcr.io/nvidia/cloud-native image: k8s-mig-manager - version: v0.7.0-ubuntu20.04 + version: v0.10.0-ubuntu20.04 imagePullPolicy: IfNotPresent imagePullSecrets: [] env: - name: WITH_REBOOT value: "false" resources: {} + # MIG configuration + # Use "name" to either point to an existing ConfigMap or to create a new one with a list of configurations(i.e with create=true). + # Use "data" to build an integrated ConfigMap from a set of configurations as + # part of this helm chart. An example of setting "data" might be: + # config: + # name: custom-mig-parted-configs + # create: true + # data: |- + # config.yaml: |- + # version: v1 + # mig-configs: + # all-disabled: + # - devices: all + # mig-enabled: false + # custom-mig: + # - devices: [0] + # mig-enabled: false + # - devices: [1] + # mig-enabled: true + # mig-devices: + # "1g.10gb": 7 + # - devices: [2] + # mig-enabled: true + # mig-devices: + # "2g.20gb": 2 + # "3g.40gb": 1 + # - devices: [3] + # mig-enabled: true + # mig-devices: + # "3g.40gb": 1 + # "4g.40gb": 1 config: - name: "default-mig-parted-config" default: "all-disabled" + # Create a ConfigMap (default: false) + create: false + # ConfigMap name (either existing or to create a new one with create=true above) + name: "" + # Data section for the ConfigMap to create (i.e only applies when create=true) + data: {} gpuClientsConfig: name: "" @@ -367,7 +433,7 @@ gds: enabled: false repository: nvcr.io/nvidia/cloud-native image: nvidia-fs - version: "2.17.5" + version: "2.20.5" imagePullPolicy: IfNotPresent imagePullSecrets: [] env: [] @@ -377,7 +443,7 @@ gdrcopy: enabled: false repository: nvcr.io/nvidia/cloud-native image: gdrdrv - version: "v2.4.1" + version: "v2.4.1-2" imagePullPolicy: IfNotPresent imagePullSecrets: [] env: [] @@ -397,7 +463,7 @@ vgpuManager: repository: nvcr.io/nvidia/cloud-native # When choosing a different version of k8s-driver-manager, DO NOT downgrade to a version lower than v0.6.4 # to ensure k8s-driver-manager stays compatible with gpu-operator starting from v24.3.0 - version: v0.6.8 + version: v0.7.0 imagePullPolicy: IfNotPresent env: - name: ENABLE_GPU_POD_EVICTION @@ -409,7 +475,7 @@ vgpuDeviceManager: enabled: true repository: nvcr.io/nvidia/cloud-native image: vgpu-device-manager - version: "v0.2.6" + version: v0.2.8 imagePullPolicy: IfNotPresent imagePullSecrets: [] env: [] @@ -421,7 +487,7 @@ vfioManager: enabled: true repository: nvcr.io/nvidia image: cuda - version: 12.4.1-base-ubi8 + version: 12.6.2-base-ubi9 imagePullPolicy: IfNotPresent imagePullSecrets: [] env: [] @@ -431,7 +497,7 @@ vfioManager: repository: nvcr.io/nvidia/cloud-native # When choosing a different version of k8s-driver-manager, DO NOT downgrade to a version lower than v0.6.4 # to ensure k8s-driver-manager stays compatible with gpu-operator starting from v24.3.0 - version: v0.6.8 + version: v0.7.0 imagePullPolicy: IfNotPresent env: - name: ENABLE_GPU_POD_EVICTION @@ -457,7 +523,7 @@ kataManager: pullSecret: "" repository: nvcr.io/nvidia/cloud-native image: k8s-kata-manager - version: v0.2.0 + version: v0.2.2 imagePullPolicy: IfNotPresent imagePullSecrets: [] env: [] @@ -467,7 +533,7 @@ sandboxDevicePlugin: enabled: true repository: nvcr.io/nvidia image: kubevirt-gpu-device-plugin - version: v1.2.7 + version: v1.2.10 imagePullPolicy: IfNotPresent imagePullSecrets: [] args: [] @@ -489,6 +555,7 @@ ccManager: node-feature-discovery: enableNodeFeatureApi: true + priorityClassName: system-node-critical gc: enable: true replicaCount: 1 diff --git a/helm-values/NVidia/gpu-operator-values.yaml b/helm-values/NVidia/gpu-operator-values.yaml old mode 100644 new mode 100755 index a23a57552c986d2f0c747b84d6e289d7a22b513e..3ce8c7f33113636f8dc6096eca362a3ca8043b8f --- a/helm-values/NVidia/gpu-operator-values.yaml +++ b/helm-values/NVidia/gpu-operator-values.yaml @@ -1,18 +1,20 @@ -# values for gpu-operator. - +# values for gpu-operator driver: #enabled: true enabled: false - usePrecompiled: true - repository: nvcr.io/nvidia - image: driver + #usePrecompiled: true + #repository: nvcr.io/nvidia + #image: driver #version: "535.129.03" #version: "535.161.08" - version: "550.54.15" + #version: "550.54.15" + #version: "550.90.07" + version: "550.127.05" toolkit: - enabled: true + #enabled: true + enabled: false #env: [] env: - name: CONTAINERD_CONFIG @@ -24,3 +26,4 @@ toolkit: - name: CONTAINERD_SET_AS_DEFAULT value: "true" + diff --git a/helm-values/NVidia/time-slicing-config-all.yaml b/helm-values/NVidia/time-slicing-config-all.yaml old mode 100644 new mode 100755