]> git.proxmox.com Git - mirror_lxc.git/blame - hooks/nvidia
Merge pull request #2842 from brauner/2019-02-11/fix_licensing
[mirror_lxc.git] / hooks / nvidia
CommitLineData
58e29e9b 1#! /bin/bash
d23c6cc9
FA
2#
3# Copyright (c) 2017, 2018 NVIDIA CORPORATION.
4#
5# This library is free software; you can redistribute it and/or
6# modify it under the terms of the GNU Lesser General Public
7# License as published by the Free Software Foundation; either
8# version 2.1 of the License, or (at your option) any later version.
9#
10# This library is distributed in the hope that it will be useful,
11# but WITHOUT ANY WARRANTY; without even the implied warranty of
12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13# Lesser General Public License for more details.
14#
15# You should have received a copy of the GNU Lesser General Public
16# License along with this library; if not, write to the Free Software
17# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
58e29e9b
FA
18
19set -eu
20
b8724383
FA
21# NVIDIA_VISIBLE_DEVICES="" *or* NVIDIA_VISIBLE_DEVICES="void"
22# GPU support was explicitly disabled, exit early.
23if [ -z "${NVIDIA_VISIBLE_DEVICES-x}" ] || [ "${NVIDIA_VISIBLE_DEVICES:-}" = "void" ]; then
58e29e9b
FA
24 exit 0
25fi
26
b8724383
FA
27# https://github.com/nvidia/nvidia-container-runtime#cuda_version
28if [ -n "${CUDA_VERSION:-}" ] && [ -z "${NVIDIA_REQUIRE_CUDA:-}" ]; then
29 # Legacy CUDA image: default to all devices and all driver capabilities.
30 if [ -z "${NVIDIA_VISIBLE_DEVICES+x}" ]; then
31 NVIDIA_VISIBLE_DEVICES="all"
32 fi
33 if [ -z "${NVIDIA_DRIVER_CAPABILITIES:-}" ]; then
34 NVIDIA_DRIVER_CAPABILITIES="all"
35 fi
36 if [[ "${CUDA_VERSION}" =~ ^[0-9]+\.[0-9]+ ]]; then
37 NVIDIA_REQUIRE_CUDA="cuda>=${BASH_REMATCH[0]}"
38 fi
39else
40 # NVIDIA_VISIBLE_DEVICES unset and it's not a legacy CUDA image.
41 # This is not a GPU image, exit early.
42 if [ -z "${NVIDIA_VISIBLE_DEVICES+x}" ]; then
43 exit 0
44 fi
45fi
46
58e29e9b
FA
47export PATH=$PATH:/usr/sbin:/usr/bin:/sbin:/bin
48if ! which nvidia-container-cli >/dev/null; then
49 echo "ERROR: Missing tool nvidia-container-cli, see https://github.com/NVIDIA/libnvidia-container" >&2
50 exit 1
51fi
52
53in_userns() {
54 [ -e /proc/self/uid_map ] || { echo no; return; }
55 while read line; do
56 fields=$(echo $line | awk '{ print $1 " " $2 " " $3 }')
57 [ "$fields" = "0 0 4294967295" ] && { echo no; return; } || true
58 echo $fields | grep -q " 0 1$" && { echo userns-root; return; } || true
59 done < /proc/self/uid_map
60
16a312e1
LP
61 if [ -e /proc/1/uid_map ]; then
62 if [ "$(cat /proc/self/uid_map)" = "$(cat /proc/1/uid_map)" ]; then
63 echo userns-root
64 return
65 fi
66 fi
58e29e9b
FA
67 echo yes
68}
69
70get_ldconfig() {
71 which "ldconfig.real" || which "ldconfig"
72 return $?
73}
74
75capability_to_cli() {
76 case "$1" in
77 compute) echo "--compute";;
78 compat32) echo "--compat32";;
89f1ef11 79 display) echo "--display";;
58e29e9b
FA
80 graphics) echo "--graphics";;
81 utility) echo "--utility";;
82 video) echo "--video";;
83 *) exit 1;;
84 esac
85 return
86}
87
88# Same behavior as strconv.ParseBool in golang
89parse_bool() {
90 case "$1" in
91 1|t|T|TRUE|true|True) echo "true";;
92 0|f|F|FALSE|false|False) echo "false";;
93 *) exit 1;;
94 esac
95 return
96}
97
98usage() {
99 cat <<EOF
100nvidia-container-cli hook for LXC
101
102Special arguments:
103[ -h | --help ]: Print this help message and exit.
104
105Optional arguments:
106[ --no-load-kmods ]: Do not try to load the NVIDIA kernel modules.
107[ --disable-require ]: Disable all the constraints of the form NVIDIA_REQUIRE_*.
108[ --debug <path> ]: The path to the log file.
89f1ef11
FA
109[ --ldcache <path> ]: The path to the host system's DSO cache.
110[ --root <path> ]: The path to the driver root directory.
58e29e9b
FA
111[ --ldconfig <path> ]: The path to the ldconfig binary, use a '@' prefix for a host path.
112EOF
113 return 0
114}
115
89f1ef11 116options=$(getopt -o h -l help,no-load-kmods,disable-require,debug:,ldcache:,root:,ldconfig: -- "$@")
58e29e9b
FA
117if [ $? -ne 0 ]; then
118 usage
119 exit 1
120fi
121eval set -- "$options"
122
123CLI_LOAD_KMODS="true"
124CLI_DISABLE_REQUIRE="false"
125CLI_DEBUG=
89f1ef11
FA
126CLI_LDCACHE=
127CLI_ROOT=
58e29e9b
FA
128CLI_LDCONFIG=
129
130while :; do
131 case "$1" in
132 --help) usage && exit 1;;
133 --no-load-kmods) CLI_LOAD_KMODS="false"; shift 1;;
134 --disable-require) CLI_DISABLE_REQUIRE="true"; shift 1;;
135 --debug) CLI_DEBUG=$2; shift 2;;
89f1ef11
FA
136 --ldcache) CLI_LDCACHE=$2; shift 2;;
137 --root) CLI_ROOT=$2; shift 2;;
58e29e9b
FA
138 --ldconfig) CLI_LDCONFIG=$2; shift 2;;
139 --) shift 1; break;;
140 *) break;;
141 esac
142done
143
144HOOK_SECTION=
145HOOK_TYPE=
146case "${LXC_HOOK_VERSION:-0}" in
147 0) HOOK_SECTION="${2:-}"; HOOK_TYPE="${3:-}";;
148 1) HOOK_SECTION="${LXC_HOOK_SECTION:-}"; HOOK_TYPE="${LXC_HOOK_TYPE:-}";;
149 *) echo "ERROR: Unsupported hook version: ${LXC_HOOK_VERSION}." >&2; exit 1;;
150esac
151
152if [ "${HOOK_SECTION}" != "lxc" ]; then
153 echo "ERROR: Not running through LXC." >&2
154 exit 1
155fi
156
157if [ "${HOOK_TYPE}" != "mount" ]; then
158 echo "ERROR: This hook must be used as a \"mount\" hook." >&2
159 exit 1
160fi
161
162USERNS=$(in_userns)
163if [ "${USERNS}" != "yes" ]; then
164 # This is a limitation of libnvidia-container.
165 echo "FIXME: This hook currently only works in unprivileged mode." >&2
166 exit 1
167fi
168
169if [ "${USERNS}" = "yes" ]; then
170 CLI_LOAD_KMODS="false"
171 if ! grep -q nvidia_uvm /proc/modules; then
172 echo "WARN: Kernel module nvidia_uvm is not loaded, nvidia-container-cli might fail. Make sure the NVIDIA device driver is installed and loaded." >&2
173 fi
174fi
175
176# https://github.com/nvidia/nvidia-container-runtime#nvidia_disable_require
b8724383 177if [ -n "${NVIDIA_DISABLE_REQUIRE:-}" ]; then
58e29e9b
FA
178 if [ "$(parse_bool "${NVIDIA_DISABLE_REQUIRE}")" = "true" ]; then
179 CLI_DISABLE_REQUIRE="true"
180 fi
181fi
182
183if [ -z "${CLI_DEBUG}" ]; then
184 if [ "${LXC_LOG_LEVEL}" = "DEBUG" ] || [ "${LXC_LOG_LEVEL}" = "TRACE" ]; then
185 rootfs_path="${LXC_ROOTFS_PATH#*:}"
186 hookdir="${rootfs_path/%rootfs/hook}"
187 if mkdir -p "${hookdir}"; then
188 CLI_DEBUG="${hookdir}/nvidia.log"
189 fi
190 fi
191fi
192
193# A '@' prefix means a host path.
194if [ -z "${CLI_LDCONFIG}" ]; then
195 if host_ldconfig=$(get_ldconfig); then
196 CLI_LDCONFIG="@${host_ldconfig}"
197 fi
198fi
199
200# https://github.com/nvidia/nvidia-container-runtime#nvidia_visible_devices
b8724383 201CLI_DEVICES="${NVIDIA_VISIBLE_DEVICES}"
58e29e9b
FA
202
203# https://github.com/nvidia/nvidia-container-runtime#nvidia_driver_capabilities
204CLI_CAPABILITIES=
b8724383
FA
205if [ -n "${NVIDIA_DRIVER_CAPABILITIES:-}" ]; then
206 CLI_CAPABILITIES="${NVIDIA_DRIVER_CAPABILITIES//,/ }"
58e29e9b
FA
207fi
208
58e29e9b 209if [ "${CLI_CAPABILITIES}" = "all" ]; then
89f1ef11 210 CLI_CAPABILITIES="compute compat32 display graphics utility video"
58e29e9b
FA
211fi
212
213if [ -z "${CLI_CAPABILITIES}" ]; then
214 CLI_CAPABILITIES="utility"
215fi
216
52e1d4cb
FA
217global_args=()
218configure_args=()
58e29e9b
FA
219
220if [ -n "${CLI_DEBUG}" ]; then
221 echo "INFO: Writing nvidia-container-cli log at ${CLI_DEBUG}." >&2
222 global_args+=("--debug=${CLI_DEBUG}")
223fi
224
225if [ "${CLI_LOAD_KMODS}" = "true" ]; then
226 global_args+=(--load-kmods)
227fi
228
229if [ "${USERNS}" = "yes" ]; then
230 global_args+=(--user)
231 configure_args+=(--no-cgroups)
232fi
233
89f1ef11
FA
234if [ -n "${CLI_LDCACHE}" ]; then
235 global_args+=(--ldcache="${CLI_LDCACHE}")
236fi
237
238if [ -n "${CLI_ROOT}" ]; then
239 global_args+=(--root="${CLI_ROOT}")
240fi
241
58e29e9b
FA
242if [ -n "${CLI_LDCONFIG}" ]; then
243 configure_args+=(--ldconfig="${CLI_LDCONFIG}")
244fi
245
246if [ -n "${CLI_DEVICES}" ] && [ "${CLI_DEVICES}" != "none" ]; then
247 configure_args+=(--device="${CLI_DEVICES}")
248fi
249
250for cap in ${CLI_CAPABILITIES}; do
251 if arg=$(capability_to_cli "${cap}"); then
252 configure_args+=("${arg}")
253 else
254 echo "ERROR: Unknown driver capability \"${cap}\"." >&2
255 exit 1
256 fi
257done
258
52e1d4cb 259# https://github.com/nvidia/nvidia-container-runtime#nvidia_require_
58e29e9b 260if [ "${CLI_DISABLE_REQUIRE}" = "false" ]; then
52e1d4cb
FA
261 for req in $(compgen -e "NVIDIA_REQUIRE_"); do
262 configure_args+=("--require=${!req}")
58e29e9b
FA
263 done
264fi
265
4a0a5e89
FA
266if [ -d "/sys/kernel/security/apparmor" ]; then
267 # Try to transition to the unconfined AppArmor profile.
268 echo "changeprofile unconfined" > /proc/self/attr/current || true
269fi
270
58e29e9b 271set -x
52e1d4cb 272exec nvidia-container-cli ${global_args[@]} configure "${configure_args[@]}" "${LXC_ROOTFS_MOUNT}"