]>
Commit | Line | Data |
---|---|---|
f6783a7a | 1 | #! /usr/bin/env python |
58397e6c KT |
2 | # |
3 | # BSD LICENSE | |
4 | # | |
5 | # Copyright(c) 2010-2014 Intel Corporation. All rights reserved. | |
6 | # All rights reserved. | |
7 | # | |
8 | # Redistribution and use in source and binary forms, with or without | |
9 | # modification, are permitted provided that the following conditions | |
10 | # are met: | |
11 | # | |
12 | # * Redistributions of source code must retain the above copyright | |
13 | # notice, this list of conditions and the following disclaimer. | |
14 | # * Redistributions in binary form must reproduce the above copyright | |
15 | # notice, this list of conditions and the following disclaimer in | |
16 | # the documentation and/or other materials provided with the | |
17 | # distribution. | |
18 | # * Neither the name of Intel Corporation nor the names of its | |
19 | # contributors may be used to endorse or promote products derived | |
20 | # from this software without specific prior written permission. | |
21 | # | |
22 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
23 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
24 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
25 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
26 | # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
27 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
28 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
29 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
30 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
31 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
32 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
33 | # | |
34 | ||
35 | ##################################################################### | |
36 | # This script is designed to modify the call to the QEMU emulator | |
37 | # to support userspace vhost when starting a guest machine through | |
38 | # libvirt with vhost enabled. The steps to enable this are as follows | |
39 | # and should be run as root: | |
40 | # | |
41 | # 1. Place this script in a libvirtd's binary search PATH ($PATH) | |
42 | # A good location would be in the same directory that the QEMU | |
43 | # binary is located | |
44 | # | |
45 | # 2. Ensure that the script has the same owner/group and file | |
46 | # permissions as the QEMU binary | |
47 | # | |
48 | # 3. Update the VM xml file using "virsh edit VM.xml" | |
49 | # | |
50 | # 3.a) Set the VM to use the launch script | |
51 | # | |
52 | # Set the emulator path contained in the | |
53 | # <emulator><emulator/> tags | |
54 | # | |
55 | # e.g replace <emulator>/usr/bin/qemu-kvm<emulator/> | |
56 | # with <emulator>/usr/bin/qemu-wrap.py<emulator/> | |
57 | # | |
58 | # 3.b) Set the VM's device's to use vhost-net offload | |
59 | # | |
60 | # <interface type="network"> | |
61 | # <model type="virtio"/> | |
62 | # <driver name="vhost"/> | |
63 | # <interface/> | |
64 | # | |
65 | # 4. Enable libvirt to access our userpace device file by adding it to | |
66 | # controllers cgroup for libvirtd using the following steps | |
67 | # | |
68 | # 4.a) In /etc/libvirt/qemu.conf add/edit the following lines: | |
69 | # 1) cgroup_controllers = [ ... "devices", ... ] | |
70 | # 2) clear_emulator_capabilities = 0 | |
71 | # 3) user = "root" | |
72 | # 4) group = "root" | |
73 | # 5) cgroup_device_acl = [ | |
74 | # "/dev/null", "/dev/full", "/dev/zero", | |
75 | # "/dev/random", "/dev/urandom", | |
76 | # "/dev/ptmx", "/dev/kvm", "/dev/kqemu", | |
77 | # "/dev/rtc", "/dev/hpet", "/dev/net/tun", | |
78 | # "/dev/<devbase-name>-<index>", | |
79 | # "/dev/hugepages" | |
80 | # ] | |
81 | # | |
82 | # 4.b) Disable SELinux or set to permissive mode | |
83 | # | |
84 | # 4.c) Mount cgroup device controller | |
85 | # "mkdir /dev/cgroup" | |
86 | # "mount -t cgroup none /dev/cgroup -o devices" | |
87 | # | |
88 | # 4.d) Set hugetlbfs_mount variable - ( Optional ) | |
89 | # VMs using userspace vhost must use hugepage backed | |
90 | # memory. This can be enabled in the libvirt XML | |
91 | # config by adding a memory backing section to the | |
92 | # XML config e.g. | |
93 | # <memoryBacking> | |
94 | # <hugepages/> | |
95 | # </memoryBacking> | |
96 | # This memory backing section should be added after the | |
97 | # <memory> and <currentMemory> sections. This will add | |
98 | # flags "-mem-prealloc -mem-path <path>" to the QEMU | |
99 | # command line. The hugetlbfs_mount variable can be used | |
100 | # to override the default <path> passed through by libvirt. | |
101 | # | |
102 | # if "-mem-prealloc" or "-mem-path <path>" are not passed | |
103 | # through and a vhost device is detected then these options will | |
104 | # be automatically added by this script. This script will detect | |
105 | # the system hugetlbfs mount point to be used for <path>. The | |
106 | # default <path> for this script can be overidden by the | |
107 | # hugetlbfs_dir variable in the configuration section of this script. | |
108 | # | |
109 | # | |
110 | # 4.e) Restart the libvirtd system process | |
111 | # e.g. on Fedora "systemctl restart libvirtd.service" | |
112 | # | |
113 | # | |
114 | # 4.f) Edit the Configuration Parameters section of this script | |
115 | # to point to the correct emulator location and set any | |
116 | # addition options | |
117 | # | |
118 | # The script modifies the libvirtd Qemu call by modifying/adding | |
119 | # options based on the configuration parameters below. | |
120 | # NOTE: | |
121 | # emul_path and us_vhost_path must be set | |
122 | # All other parameters are optional | |
123 | ##################################################################### | |
124 | ||
125 | ||
126 | ############################################# | |
127 | # Configuration Parameters | |
128 | ############################################# | |
129 | #Path to QEMU binary | |
130 | emul_path = "/usr/local/bin/qemu-system-x86_64" | |
131 | ||
132 | #Path to userspace vhost device file | |
133 | # This filename should match the --dev-basename --dev-index parameters of | |
134 | # the command used to launch the userspace vhost sample application e.g. | |
135 | # if the sample app lauch command is: | |
136 | # ./build/vhost-switch ..... --dev-basename usvhost --dev-index 1 | |
137 | # then this variable should be set to: | |
138 | # us_vhost_path = "/dev/usvhost-1" | |
139 | us_vhost_path = "/dev/usvhost-1" | |
140 | ||
141 | #List of additional user defined emulation options. These options will | |
142 | #be added to all Qemu calls | |
143 | emul_opts_user = [] | |
144 | ||
145 | #List of additional user defined emulation options for vhost only. | |
146 | #These options will only be added to vhost enabled guests | |
147 | emul_opts_user_vhost = [] | |
148 | ||
149 | #For all VHOST enabled VMs, the VM memory is preallocated from hugetlbfs | |
150 | # Set this variable to one to enable this option for all VMs | |
151 | use_huge_all = 0 | |
152 | ||
153 | #Instead of autodetecting, override the hugetlbfs directory by setting | |
154 | #this variable | |
155 | hugetlbfs_dir = "" | |
156 | ||
157 | ############################################# | |
158 | ||
159 | ||
160 | ############################################# | |
161 | # ****** Do Not Modify Below this Line ****** | |
162 | ############################################# | |
163 | ||
164 | import sys, os, subprocess | |
165 | import time | |
166 | import signal | |
167 | ||
168 | ||
169 | #List of open userspace vhost file descriptors | |
170 | fd_list = [] | |
171 | ||
172 | #additional virtio device flags when using userspace vhost | |
173 | vhost_flags = [ "csum=off", | |
174 | "gso=off", | |
175 | "guest_tso4=off", | |
176 | "guest_tso6=off", | |
177 | "guest_ecn=off" | |
178 | ] | |
179 | ||
180 | #String of the path to the Qemu process pid | |
181 | qemu_pid = "/tmp/%d-qemu.pid" % os.getpid() | |
182 | ||
183 | ############################################# | |
184 | # Signal haldler to kill Qemu subprocess | |
185 | ############################################# | |
186 | def kill_qemu_process(signum, stack): | |
187 | pidfile = open(qemu_pid, 'r') | |
188 | pid = int(pidfile.read()) | |
189 | os.killpg(pid, signal.SIGTERM) | |
190 | pidfile.close() | |
191 | ||
192 | ||
193 | ############################################# | |
194 | # Find the system hugefile mount point. | |
195 | # Note: | |
196 | # if multiple hugetlbfs mount points exist | |
197 | # then the first one found will be used | |
198 | ############################################# | |
199 | def find_huge_mount(): | |
200 | ||
201 | if (len(hugetlbfs_dir)): | |
202 | return hugetlbfs_dir | |
203 | ||
204 | huge_mount = "" | |
205 | ||
206 | if (os.access("/proc/mounts", os.F_OK)): | |
207 | f = open("/proc/mounts", "r") | |
208 | line = f.readline() | |
209 | while line: | |
210 | line_split = line.split(" ") | |
211 | if line_split[2] == 'hugetlbfs': | |
212 | huge_mount = line_split[1] | |
213 | break | |
214 | line = f.readline() | |
215 | else: | |
216 | print "/proc/mounts not found" | |
217 | exit (1) | |
218 | ||
219 | f.close | |
220 | if len(huge_mount) == 0: | |
221 | print "Failed to find hugetlbfs mount point" | |
222 | exit (1) | |
223 | ||
224 | return huge_mount | |
225 | ||
226 | ||
227 | ############################################# | |
228 | # Get a userspace Vhost file descriptor | |
229 | ############################################# | |
230 | def get_vhost_fd(): | |
231 | ||
232 | if (os.access(us_vhost_path, os.F_OK)): | |
233 | fd = os.open( us_vhost_path, os.O_RDWR) | |
234 | else: | |
235 | print ("US-Vhost file %s not found" %us_vhost_path) | |
236 | exit (1) | |
237 | ||
238 | return fd | |
239 | ||
240 | ||
241 | ############################################# | |
242 | # Check for vhostfd. if found then replace | |
243 | # with our own vhost fd and append any vhost | |
244 | # flags onto the end | |
245 | ############################################# | |
246 | def modify_netdev_arg(arg): | |
247 | ||
248 | global fd_list | |
249 | vhost_in_use = 0 | |
250 | s = '' | |
251 | new_opts = [] | |
252 | netdev_opts = arg.split(",") | |
253 | ||
254 | for opt in netdev_opts: | |
255 | #check if vhost is used | |
256 | if "vhost" == opt[:5]: | |
257 | vhost_in_use = 1 | |
258 | else: | |
259 | new_opts.append(opt) | |
260 | ||
261 | #if using vhost append vhost options | |
262 | if vhost_in_use == 1: | |
263 | #append vhost on option | |
264 | new_opts.append('vhost=on') | |
265 | #append vhostfd ption | |
266 | new_fd = get_vhost_fd() | |
267 | new_opts.append('vhostfd=' + str(new_fd)) | |
268 | fd_list.append(new_fd) | |
269 | ||
270 | #concatenate all options | |
271 | for opt in new_opts: | |
272 | if len(s) > 0: | |
273 | s+=',' | |
274 | ||
275 | s+=opt | |
276 | ||
277 | return s | |
278 | ||
279 | ||
280 | ############################################# | |
281 | # Main | |
282 | ############################################# | |
283 | def main(): | |
284 | ||
285 | global fd_list | |
286 | global vhost_in_use | |
287 | new_args = [] | |
288 | num_cmd_args = len(sys.argv) | |
289 | emul_call = '' | |
290 | mem_prealloc_set = 0 | |
291 | mem_path_set = 0 | |
292 | num = 0; | |
293 | ||
294 | #parse the parameters | |
295 | while (num < num_cmd_args): | |
296 | arg = sys.argv[num] | |
297 | ||
298 | #Check netdev +1 parameter for vhostfd | |
299 | if arg == '-netdev': | |
300 | num_vhost_devs = len(fd_list) | |
301 | new_args.append(arg) | |
302 | ||
303 | num+=1 | |
304 | arg = sys.argv[num] | |
305 | mod_arg = modify_netdev_arg(arg) | |
306 | new_args.append(mod_arg) | |
307 | ||
308 | #append vhost flags if this is a vhost device | |
309 | # and -device is the next arg | |
310 | # i.e -device -opt1,-opt2,...,-opt3,%vhost | |
311 | if (num_vhost_devs < len(fd_list)): | |
312 | num+=1 | |
313 | arg = sys.argv[num] | |
314 | if arg == '-device': | |
315 | new_args.append(arg) | |
316 | num+=1 | |
317 | new_arg = sys.argv[num] | |
318 | for flag in vhost_flags: | |
319 | new_arg = ''.join([new_arg,',',flag]) | |
320 | new_args.append(new_arg) | |
321 | else: | |
322 | new_args.append(arg) | |
323 | elif arg == '-mem-prealloc': | |
324 | mem_prealloc_set = 1 | |
325 | new_args.append(arg) | |
326 | elif arg == '-mem-path': | |
327 | mem_path_set = 1 | |
328 | new_args.append(arg) | |
329 | ||
330 | else: | |
331 | new_args.append(arg) | |
332 | ||
333 | num+=1 | |
334 | ||
335 | #Set Qemu binary location | |
336 | emul_call+=emul_path | |
337 | emul_call+=" " | |
338 | ||
339 | #Add prealloc mem options if using vhost and not already added | |
340 | if ((len(fd_list) > 0) and (mem_prealloc_set == 0)): | |
341 | emul_call += "-mem-prealloc " | |
342 | ||
343 | #Add mempath mem options if using vhost and not already added | |
344 | if ((len(fd_list) > 0) and (mem_path_set == 0)): | |
345 | #Detect and add hugetlbfs mount point | |
346 | mp = find_huge_mount() | |
347 | mp = "".join(["-mem-path ", mp]) | |
348 | emul_call += mp | |
349 | emul_call += " " | |
350 | ||
351 | #add user options | |
352 | for opt in emul_opts_user: | |
353 | emul_call += opt | |
354 | emul_call += " " | |
355 | ||
356 | #Add add user vhost only options | |
357 | if len(fd_list) > 0: | |
358 | for opt in emul_opts_user_vhost: | |
359 | emul_call += opt | |
360 | emul_call += " " | |
361 | ||
362 | #Add updated libvirt options | |
363 | iter_args = iter(new_args) | |
364 | #skip 1st arg i.e. call to this script | |
365 | next(iter_args) | |
366 | for arg in iter_args: | |
367 | emul_call+=str(arg) | |
368 | emul_call+= " " | |
369 | ||
370 | emul_call += "-pidfile %s " % qemu_pid | |
371 | #Call QEMU | |
372 | process = subprocess.Popen(emul_call, shell=True, preexec_fn=os.setsid) | |
373 | ||
374 | for sig in [signal.SIGTERM, signal.SIGINT, signal.SIGHUP, signal.SIGQUIT]: | |
375 | signal.signal(sig, kill_qemu_process) | |
376 | ||
377 | process.wait() | |
378 | ||
379 | #Close usvhost files | |
380 | for fd in fd_list: | |
381 | os.close(fd) | |
382 | #Cleanup temporary files | |
383 | if os.access(qemu_pid, os.F_OK): | |
384 | os.remove(qemu_pid) | |
385 | ||
386 | ||
387 | ||
388 | if __name__ == "__main__": | |
389 | main() |