]> git.proxmox.com Git - mirror_lxc.git/blame - hooks/nvidia
hooks: dhclient hook improvements
[mirror_lxc.git] / hooks / nvidia
CommitLineData
58e29e9b
FA
1#! /bin/bash
2
3# Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
4
5set -eu
6
7if [ -z "${CUDA_VERSION+x}" ] && [ -z "${NVIDIA_VISIBLE_DEVICES+x}" ]; then
8 # Not a GPU container, nothing to do, exit early.
9 exit 0
10fi
11
12export PATH=$PATH:/usr/sbin:/usr/bin:/sbin:/bin
13if ! which nvidia-container-cli >/dev/null; then
14 echo "ERROR: Missing tool nvidia-container-cli, see https://github.com/NVIDIA/libnvidia-container" >&2
15 exit 1
16fi
17
18in_userns() {
19 [ -e /proc/self/uid_map ] || { echo no; return; }
20 while read line; do
21 fields=$(echo $line | awk '{ print $1 " " $2 " " $3 }')
22 [ "$fields" = "0 0 4294967295" ] && { echo no; return; } || true
23 echo $fields | grep -q " 0 1$" && { echo userns-root; return; } || true
24 done < /proc/self/uid_map
25
26 [ "$(cat /proc/self/uid_map)" = "$(cat /proc/1/uid_map)" ] && \
27 { echo userns-root; return; }
28 echo yes
29}
30
31get_ldconfig() {
32 which "ldconfig.real" || which "ldconfig"
33 return $?
34}
35
36capability_to_cli() {
37 case "$1" in
38 compute) echo "--compute";;
39 compat32) echo "--compat32";;
40 graphics) echo "--graphics";;
41 utility) echo "--utility";;
42 video) echo "--video";;
43 *) exit 1;;
44 esac
45 return
46}
47
48# Same behavior as strconv.ParseBool in golang
49parse_bool() {
50 case "$1" in
51 1|t|T|TRUE|true|True) echo "true";;
52 0|f|F|FALSE|false|False) echo "false";;
53 *) exit 1;;
54 esac
55 return
56}
57
58usage() {
59 cat <<EOF
60nvidia-container-cli hook for LXC
61
62Special arguments:
63[ -h | --help ]: Print this help message and exit.
64
65Optional arguments:
66[ --no-load-kmods ]: Do not try to load the NVIDIA kernel modules.
67[ --disable-require ]: Disable all the constraints of the form NVIDIA_REQUIRE_*.
68[ --debug <path> ]: The path to the log file.
69[ --ldconfig <path> ]: The path to the ldconfig binary, use a '@' prefix for a host path.
70EOF
71 return 0
72}
73
74options=$(getopt -o h -l help,no-load-kmods,disable-require,debug:,ldconfig: -- "$@")
75if [ $? -ne 0 ]; then
76 usage
77 exit 1
78fi
79eval set -- "$options"
80
81CLI_LOAD_KMODS="true"
82CLI_DISABLE_REQUIRE="false"
83CLI_DEBUG=
84CLI_LDCONFIG=
85
86while :; do
87 case "$1" in
88 --help) usage && exit 1;;
89 --no-load-kmods) CLI_LOAD_KMODS="false"; shift 1;;
90 --disable-require) CLI_DISABLE_REQUIRE="true"; shift 1;;
91 --debug) CLI_DEBUG=$2; shift 2;;
92 --ldconfig) CLI_LDCONFIG=$2; shift 2;;
93 --) shift 1; break;;
94 *) break;;
95 esac
96done
97
98HOOK_SECTION=
99HOOK_TYPE=
100case "${LXC_HOOK_VERSION:-0}" in
101 0) HOOK_SECTION="${2:-}"; HOOK_TYPE="${3:-}";;
102 1) HOOK_SECTION="${LXC_HOOK_SECTION:-}"; HOOK_TYPE="${LXC_HOOK_TYPE:-}";;
103 *) echo "ERROR: Unsupported hook version: ${LXC_HOOK_VERSION}." >&2; exit 1;;
104esac
105
106if [ "${HOOK_SECTION}" != "lxc" ]; then
107 echo "ERROR: Not running through LXC." >&2
108 exit 1
109fi
110
111if [ "${HOOK_TYPE}" != "mount" ]; then
112 echo "ERROR: This hook must be used as a \"mount\" hook." >&2
113 exit 1
114fi
115
116USERNS=$(in_userns)
117if [ "${USERNS}" != "yes" ]; then
118 # This is a limitation of libnvidia-container.
119 echo "FIXME: This hook currently only works in unprivileged mode." >&2
120 exit 1
121fi
122
123if [ "${USERNS}" = "yes" ]; then
124 CLI_LOAD_KMODS="false"
125 if ! grep -q nvidia_uvm /proc/modules; then
126 echo "WARN: Kernel module nvidia_uvm is not loaded, nvidia-container-cli might fail. Make sure the NVIDIA device driver is installed and loaded." >&2
127 fi
128fi
129
130# https://github.com/nvidia/nvidia-container-runtime#nvidia_disable_require
131if [ -n "${NVIDIA_DISABLE_REQUIRE+x}" ]; then
132 if [ "$(parse_bool "${NVIDIA_DISABLE_REQUIRE}")" = "true" ]; then
133 CLI_DISABLE_REQUIRE="true"
134 fi
135fi
136
137if [ -z "${CLI_DEBUG}" ]; then
138 if [ "${LXC_LOG_LEVEL}" = "DEBUG" ] || [ "${LXC_LOG_LEVEL}" = "TRACE" ]; then
139 rootfs_path="${LXC_ROOTFS_PATH#*:}"
140 hookdir="${rootfs_path/%rootfs/hook}"
141 if mkdir -p "${hookdir}"; then
142 CLI_DEBUG="${hookdir}/nvidia.log"
143 fi
144 fi
145fi
146
147# A '@' prefix means a host path.
148if [ -z "${CLI_LDCONFIG}" ]; then
149 if host_ldconfig=$(get_ldconfig); then
150 CLI_LDCONFIG="@${host_ldconfig}"
151 fi
152fi
153
154# https://github.com/nvidia/nvidia-container-runtime#nvidia_visible_devices
155CLI_DEVICES=
156if [ -n "${NVIDIA_VISIBLE_DEVICES+x}" ]; then
157 CLI_DEVICES="${NVIDIA_VISIBLE_DEVICES}"
158fi
159
160# https://github.com/nvidia/nvidia-container-runtime#nvidia_driver_capabilities
161CLI_CAPABILITIES=
162if [ -n "${NVIDIA_DRIVER_CAPABILITIES+x}" ]; then
163 CLI_CAPABILITIES="${NVIDIA_DRIVER_CAPABILITIES//,/ }"
164fi
165
166# https://github.com/nvidia/nvidia-container-runtime#nvidia_require_
167CLI_REQUIREMENTS=
168for req in $(compgen -e "NVIDIA_REQUIRE_"); do
169 CLI_REQUIREMENTS="${CLI_REQUIREMENTS} ${!req}"
170done
171
172# https://github.com/nvidia/nvidia-container-runtime#cuda_version
173if [ -n "${CUDA_VERSION+x}" ] && [ -z "${NVIDIA_REQUIRE_CUDA+x}" ]; then
174 # Legacy CUDA image detected, default to all devices and all driver capabilities.
175 if [ -z "${CLI_DEVICES}" ]; then
176 CLI_DEVICES="all"
177 fi
178
179 if [ -z "${CLI_CAPABILITIES}" ]; then
180 CLI_CAPABILITIES="all"
181 fi
182
183 # Transform CUDA_VERSION=X.Y to a "cuda>=X.Y" constraint for nvidia-container-cli.
184 if [[ "${CUDA_VERSION}" =~ ^[0-9]+\.[0-9]+ ]]; then
185 CLI_REQUIREMENTS="${CLI_REQUIREMENTS} cuda>=${BASH_REMATCH[0]}"
186 fi
187fi
188
189if [ "${CLI_CAPABILITIES}" = "all" ]; then
190 CLI_CAPABILITIES="compute compat32 graphics utility video"
191fi
192
193if [ -z "${CLI_CAPABILITIES}" ]; then
194 CLI_CAPABILITIES="utility"
195fi
196
197global_args=("")
198configure_args=("")
199
200if [ -n "${CLI_DEBUG}" ]; then
201 echo "INFO: Writing nvidia-container-cli log at ${CLI_DEBUG}." >&2
202 global_args+=("--debug=${CLI_DEBUG}")
203fi
204
205if [ "${CLI_LOAD_KMODS}" = "true" ]; then
206 global_args+=(--load-kmods)
207fi
208
209if [ "${USERNS}" = "yes" ]; then
210 global_args+=(--user)
211 configure_args+=(--no-cgroups)
212fi
213
214if [ -n "${CLI_LDCONFIG}" ]; then
215 configure_args+=(--ldconfig="${CLI_LDCONFIG}")
216fi
217
218if [ -n "${CLI_DEVICES}" ] && [ "${CLI_DEVICES}" != "none" ]; then
219 configure_args+=(--device="${CLI_DEVICES}")
220fi
221
222for cap in ${CLI_CAPABILITIES}; do
223 if arg=$(capability_to_cli "${cap}"); then
224 configure_args+=("${arg}")
225 else
226 echo "ERROR: Unknown driver capability \"${cap}\"." >&2
227 exit 1
228 fi
229done
230
231if [ "${CLI_DISABLE_REQUIRE}" = "false" ]; then
232 for req in ${CLI_REQUIREMENTS}; do
233 configure_args+=(--require="${req}")
234 done
235fi
236
237set -x
238exec nvidia-container-cli ${global_args[@]} configure ${configure_args[@]} "${LXC_ROOTFS_MOUNT}"