]>
Commit | Line | Data |
---|---|---|
58e29e9b FA |
1 | #! /bin/bash |
2 | ||
3 | # Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. | |
4 | ||
5 | set -eu | |
6 | ||
7 | if [ -z "${CUDA_VERSION+x}" ] && [ -z "${NVIDIA_VISIBLE_DEVICES+x}" ]; then | |
8 | # Not a GPU container, nothing to do, exit early. | |
9 | exit 0 | |
10 | fi | |
11 | ||
12 | export PATH=$PATH:/usr/sbin:/usr/bin:/sbin:/bin | |
13 | if ! which nvidia-container-cli >/dev/null; then | |
14 | echo "ERROR: Missing tool nvidia-container-cli, see https://github.com/NVIDIA/libnvidia-container" >&2 | |
15 | exit 1 | |
16 | fi | |
17 | ||
18 | in_userns() { | |
19 | [ -e /proc/self/uid_map ] || { echo no; return; } | |
20 | while read line; do | |
21 | fields=$(echo $line | awk '{ print $1 " " $2 " " $3 }') | |
22 | [ "$fields" = "0 0 4294967295" ] && { echo no; return; } || true | |
23 | echo $fields | grep -q " 0 1$" && { echo userns-root; return; } || true | |
24 | done < /proc/self/uid_map | |
25 | ||
26 | [ "$(cat /proc/self/uid_map)" = "$(cat /proc/1/uid_map)" ] && \ | |
27 | { echo userns-root; return; } | |
28 | echo yes | |
29 | } | |
30 | ||
31 | get_ldconfig() { | |
32 | which "ldconfig.real" || which "ldconfig" | |
33 | return $? | |
34 | } | |
35 | ||
36 | capability_to_cli() { | |
37 | case "$1" in | |
38 | compute) echo "--compute";; | |
39 | compat32) echo "--compat32";; | |
40 | graphics) echo "--graphics";; | |
41 | utility) echo "--utility";; | |
42 | video) echo "--video";; | |
43 | *) exit 1;; | |
44 | esac | |
45 | return | |
46 | } | |
47 | ||
48 | # Same behavior as strconv.ParseBool in golang | |
49 | parse_bool() { | |
50 | case "$1" in | |
51 | 1|t|T|TRUE|true|True) echo "true";; | |
52 | 0|f|F|FALSE|false|False) echo "false";; | |
53 | *) exit 1;; | |
54 | esac | |
55 | return | |
56 | } | |
57 | ||
58 | usage() { | |
59 | cat <<EOF | |
60 | nvidia-container-cli hook for LXC | |
61 | ||
62 | Special arguments: | |
63 | [ -h | --help ]: Print this help message and exit. | |
64 | ||
65 | Optional arguments: | |
66 | [ --no-load-kmods ]: Do not try to load the NVIDIA kernel modules. | |
67 | [ --disable-require ]: Disable all the constraints of the form NVIDIA_REQUIRE_*. | |
68 | [ --debug <path> ]: The path to the log file. | |
69 | [ --ldconfig <path> ]: The path to the ldconfig binary, use a '@' prefix for a host path. | |
70 | EOF | |
71 | return 0 | |
72 | } | |
73 | ||
74 | options=$(getopt -o h -l help,no-load-kmods,disable-require,debug:,ldconfig: -- "$@") | |
75 | if [ $? -ne 0 ]; then | |
76 | usage | |
77 | exit 1 | |
78 | fi | |
79 | eval set -- "$options" | |
80 | ||
81 | CLI_LOAD_KMODS="true" | |
82 | CLI_DISABLE_REQUIRE="false" | |
83 | CLI_DEBUG= | |
84 | CLI_LDCONFIG= | |
85 | ||
86 | while :; do | |
87 | case "$1" in | |
88 | --help) usage && exit 1;; | |
89 | --no-load-kmods) CLI_LOAD_KMODS="false"; shift 1;; | |
90 | --disable-require) CLI_DISABLE_REQUIRE="true"; shift 1;; | |
91 | --debug) CLI_DEBUG=$2; shift 2;; | |
92 | --ldconfig) CLI_LDCONFIG=$2; shift 2;; | |
93 | --) shift 1; break;; | |
94 | *) break;; | |
95 | esac | |
96 | done | |
97 | ||
98 | HOOK_SECTION= | |
99 | HOOK_TYPE= | |
100 | case "${LXC_HOOK_VERSION:-0}" in | |
101 | 0) HOOK_SECTION="${2:-}"; HOOK_TYPE="${3:-}";; | |
102 | 1) HOOK_SECTION="${LXC_HOOK_SECTION:-}"; HOOK_TYPE="${LXC_HOOK_TYPE:-}";; | |
103 | *) echo "ERROR: Unsupported hook version: ${LXC_HOOK_VERSION}." >&2; exit 1;; | |
104 | esac | |
105 | ||
106 | if [ "${HOOK_SECTION}" != "lxc" ]; then | |
107 | echo "ERROR: Not running through LXC." >&2 | |
108 | exit 1 | |
109 | fi | |
110 | ||
111 | if [ "${HOOK_TYPE}" != "mount" ]; then | |
112 | echo "ERROR: This hook must be used as a \"mount\" hook." >&2 | |
113 | exit 1 | |
114 | fi | |
115 | ||
116 | USERNS=$(in_userns) | |
117 | if [ "${USERNS}" != "yes" ]; then | |
118 | # This is a limitation of libnvidia-container. | |
119 | echo "FIXME: This hook currently only works in unprivileged mode." >&2 | |
120 | exit 1 | |
121 | fi | |
122 | ||
123 | if [ "${USERNS}" = "yes" ]; then | |
124 | CLI_LOAD_KMODS="false" | |
125 | if ! grep -q nvidia_uvm /proc/modules; then | |
126 | echo "WARN: Kernel module nvidia_uvm is not loaded, nvidia-container-cli might fail. Make sure the NVIDIA device driver is installed and loaded." >&2 | |
127 | fi | |
128 | fi | |
129 | ||
130 | # https://github.com/nvidia/nvidia-container-runtime#nvidia_disable_require | |
131 | if [ -n "${NVIDIA_DISABLE_REQUIRE+x}" ]; then | |
132 | if [ "$(parse_bool "${NVIDIA_DISABLE_REQUIRE}")" = "true" ]; then | |
133 | CLI_DISABLE_REQUIRE="true" | |
134 | fi | |
135 | fi | |
136 | ||
137 | if [ -z "${CLI_DEBUG}" ]; then | |
138 | if [ "${LXC_LOG_LEVEL}" = "DEBUG" ] || [ "${LXC_LOG_LEVEL}" = "TRACE" ]; then | |
139 | rootfs_path="${LXC_ROOTFS_PATH#*:}" | |
140 | hookdir="${rootfs_path/%rootfs/hook}" | |
141 | if mkdir -p "${hookdir}"; then | |
142 | CLI_DEBUG="${hookdir}/nvidia.log" | |
143 | fi | |
144 | fi | |
145 | fi | |
146 | ||
147 | # A '@' prefix means a host path. | |
148 | if [ -z "${CLI_LDCONFIG}" ]; then | |
149 | if host_ldconfig=$(get_ldconfig); then | |
150 | CLI_LDCONFIG="@${host_ldconfig}" | |
151 | fi | |
152 | fi | |
153 | ||
154 | # https://github.com/nvidia/nvidia-container-runtime#nvidia_visible_devices | |
155 | CLI_DEVICES= | |
156 | if [ -n "${NVIDIA_VISIBLE_DEVICES+x}" ]; then | |
157 | CLI_DEVICES="${NVIDIA_VISIBLE_DEVICES}" | |
158 | fi | |
159 | ||
160 | # https://github.com/nvidia/nvidia-container-runtime#nvidia_driver_capabilities | |
161 | CLI_CAPABILITIES= | |
162 | if [ -n "${NVIDIA_DRIVER_CAPABILITIES+x}" ]; then | |
163 | CLI_CAPABILITIES="${NVIDIA_DRIVER_CAPABILITIES//,/ }" | |
164 | fi | |
165 | ||
166 | # https://github.com/nvidia/nvidia-container-runtime#nvidia_require_ | |
167 | CLI_REQUIREMENTS= | |
168 | for req in $(compgen -e "NVIDIA_REQUIRE_"); do | |
169 | CLI_REQUIREMENTS="${CLI_REQUIREMENTS} ${!req}" | |
170 | done | |
171 | ||
172 | # https://github.com/nvidia/nvidia-container-runtime#cuda_version | |
173 | if [ -n "${CUDA_VERSION+x}" ] && [ -z "${NVIDIA_REQUIRE_CUDA+x}" ]; then | |
174 | # Legacy CUDA image detected, default to all devices and all driver capabilities. | |
175 | if [ -z "${CLI_DEVICES}" ]; then | |
176 | CLI_DEVICES="all" | |
177 | fi | |
178 | ||
179 | if [ -z "${CLI_CAPABILITIES}" ]; then | |
180 | CLI_CAPABILITIES="all" | |
181 | fi | |
182 | ||
183 | # Transform CUDA_VERSION=X.Y to a "cuda>=X.Y" constraint for nvidia-container-cli. | |
184 | if [[ "${CUDA_VERSION}" =~ ^[0-9]+\.[0-9]+ ]]; then | |
185 | CLI_REQUIREMENTS="${CLI_REQUIREMENTS} cuda>=${BASH_REMATCH[0]}" | |
186 | fi | |
187 | fi | |
188 | ||
189 | if [ "${CLI_CAPABILITIES}" = "all" ]; then | |
190 | CLI_CAPABILITIES="compute compat32 graphics utility video" | |
191 | fi | |
192 | ||
193 | if [ -z "${CLI_CAPABILITIES}" ]; then | |
194 | CLI_CAPABILITIES="utility" | |
195 | fi | |
196 | ||
197 | global_args=("") | |
198 | configure_args=("") | |
199 | ||
200 | if [ -n "${CLI_DEBUG}" ]; then | |
201 | echo "INFO: Writing nvidia-container-cli log at ${CLI_DEBUG}." >&2 | |
202 | global_args+=("--debug=${CLI_DEBUG}") | |
203 | fi | |
204 | ||
205 | if [ "${CLI_LOAD_KMODS}" = "true" ]; then | |
206 | global_args+=(--load-kmods) | |
207 | fi | |
208 | ||
209 | if [ "${USERNS}" = "yes" ]; then | |
210 | global_args+=(--user) | |
211 | configure_args+=(--no-cgroups) | |
212 | fi | |
213 | ||
214 | if [ -n "${CLI_LDCONFIG}" ]; then | |
215 | configure_args+=(--ldconfig="${CLI_LDCONFIG}") | |
216 | fi | |
217 | ||
218 | if [ -n "${CLI_DEVICES}" ] && [ "${CLI_DEVICES}" != "none" ]; then | |
219 | configure_args+=(--device="${CLI_DEVICES}") | |
220 | fi | |
221 | ||
222 | for cap in ${CLI_CAPABILITIES}; do | |
223 | if arg=$(capability_to_cli "${cap}"); then | |
224 | configure_args+=("${arg}") | |
225 | else | |
226 | echo "ERROR: Unknown driver capability \"${cap}\"." >&2 | |
227 | exit 1 | |
228 | fi | |
229 | done | |
230 | ||
231 | if [ "${CLI_DISABLE_REQUIRE}" = "false" ]; then | |
232 | for req in ${CLI_REQUIREMENTS}; do | |
233 | configure_args+=(--require="${req}") | |
234 | done | |
235 | fi | |
236 | ||
237 | set -x | |
238 | exec nvidia-container-cli ${global_args[@]} configure ${configure_args[@]} "${LXC_ROOTFS_MOUNT}" |