]> git.proxmox.com Git - qemu-server.git/blob - PVE/QemuServer.pm
pending-delete: remember force-deletes
[qemu-server.git] / PVE / QemuServer.pm
1 package PVE::QemuServer;
2
3 use strict;
4 use warnings;
5 use POSIX;
6 use IO::Handle;
7 use IO::Select;
8 use IO::File;
9 use IO::Dir;
10 use IO::Socket::UNIX;
11 use File::Basename;
12 use File::Path;
13 use File::stat;
14 use Getopt::Long;
15 use Digest::SHA;
16 use Fcntl ':flock';
17 use Cwd 'abs_path';
18 use IPC::Open3;
19 use JSON;
20 use Fcntl;
21 use PVE::SafeSyslog;
22 use Storable qw(dclone);
23 use PVE::Exception qw(raise raise_param_exc);
24 use PVE::Storage;
25 use PVE::Tools qw(run_command lock_file lock_file_full file_read_firstline dir_glob_foreach);
26 use PVE::JSONSchema qw(get_standard_option);
27 use PVE::Cluster qw(cfs_register_file cfs_read_file cfs_write_file cfs_lock_file);
28 use PVE::INotify;
29 use PVE::ProcFSTools;
30 use PVE::QMPClient;
31 use PVE::RPCEnvironment;
32 use Time::HiRes qw(gettimeofday);
33
34 my $qemu_snap_storage = {rbd => 1, sheepdog => 1};
35
36 my $cpuinfo = PVE::ProcFSTools::read_cpuinfo();
37
38 # Note about locking: we use flock on the config file protect
39 # against concurent actions.
40 # Aditionaly, we have a 'lock' setting in the config file. This
41 # can be set to 'migrate', 'backup', 'snapshot' or 'rollback'. Most actions are not
42 # allowed when such lock is set. But you can ignore this kind of
43 # lock with the --skiplock flag.
44
45 cfs_register_file('/qemu-server/',
46 \&parse_vm_config,
47 \&write_vm_config);
48
49 PVE::JSONSchema::register_standard_option('skiplock', {
50 description => "Ignore locks - only root is allowed to use this option.",
51 type => 'boolean',
52 optional => 1,
53 });
54
55 PVE::JSONSchema::register_standard_option('pve-qm-stateuri', {
56 description => "Some command save/restore state from this location.",
57 type => 'string',
58 maxLength => 128,
59 optional => 1,
60 });
61
62 PVE::JSONSchema::register_standard_option('pve-snapshot-name', {
63 description => "The name of the snapshot.",
64 type => 'string', format => 'pve-configid',
65 maxLength => 40,
66 });
67
68 #no warnings 'redefine';
69
70 sub cgroups_write {
71 my ($controller, $vmid, $option, $value) = @_;
72
73 my $path = "/sys/fs/cgroup/$controller/qemu.slice/$vmid.scope/$option";
74 PVE::ProcFSTools::write_proc_entry($path, $value);
75
76 }
77
78 my $nodename = PVE::INotify::nodename();
79
80 mkdir "/etc/pve/nodes/$nodename";
81 my $confdir = "/etc/pve/nodes/$nodename/qemu-server";
82 mkdir $confdir;
83
84 my $var_run_tmpdir = "/var/run/qemu-server";
85 mkdir $var_run_tmpdir;
86
87 my $lock_dir = "/var/lock/qemu-server";
88 mkdir $lock_dir;
89
90 my $pcisysfs = "/sys/bus/pci";
91
92 my $confdesc = {
93 onboot => {
94 optional => 1,
95 type => 'boolean',
96 description => "Specifies whether a VM will be started during system bootup.",
97 default => 0,
98 },
99 autostart => {
100 optional => 1,
101 type => 'boolean',
102 description => "Automatic restart after crash (currently ignored).",
103 default => 0,
104 },
105 hotplug => {
106 optional => 1,
107 type => 'string', format => 'pve-hotplug-features',
108 description => "Selectively enable hotplug features. This is a comma separated list of hotplug features: 'network', 'disk', 'cpu', 'memory' and 'usb'. Use '0' to disable hotplug completely. Value '1' is an alias for the default 'network,disk,usb'.",
109 default => 'network,disk,usb',
110 },
111 reboot => {
112 optional => 1,
113 type => 'boolean',
114 description => "Allow reboot. If set to '0' the VM exit on reboot.",
115 default => 1,
116 },
117 lock => {
118 optional => 1,
119 type => 'string',
120 description => "Lock/unlock the VM.",
121 enum => [qw(migrate backup snapshot rollback)],
122 },
123 cpulimit => {
124 optional => 1,
125 type => 'number',
126 description => "Limit of CPU usage. Note if the computer has 2 CPUs, it has total of '2' CPU time. Value '0' indicates no CPU limit.",
127 minimum => 0,
128 maximum => 128,
129 default => 0,
130 },
131 cpuunits => {
132 optional => 1,
133 type => 'integer',
134 description => "CPU weight for a VM. Argument is used in the kernel fair scheduler. The larger the number is, the more CPU time this VM gets. Number is relative to weights of all the other running VMs.\n\nNOTE: You can disable fair-scheduler configuration by setting this to 0.",
135 minimum => 0,
136 maximum => 500000,
137 default => 1000,
138 },
139 memory => {
140 optional => 1,
141 type => 'integer',
142 description => "Amount of RAM for the VM in MB. This is the maximum available memory when you use the balloon device.",
143 minimum => 16,
144 default => 512,
145 },
146 balloon => {
147 optional => 1,
148 type => 'integer',
149 description => "Amount of target RAM for the VM in MB. Using zero disables the ballon driver.",
150 minimum => 0,
151 },
152 shares => {
153 optional => 1,
154 type => 'integer',
155 description => "Amount of memory shares for auto-ballooning. The larger the number is, the more memory this VM gets. Number is relative to weights of all other running VMs. Using zero disables auto-ballooning",
156 minimum => 0,
157 maximum => 50000,
158 default => 1000,
159 },
160 keyboard => {
161 optional => 1,
162 type => 'string',
163 description => "Keybord layout for vnc server. Default is read from the datacenter configuration file.",
164 enum => PVE::Tools::kvmkeymaplist(),
165 default => 'en-us',
166 },
167 name => {
168 optional => 1,
169 type => 'string', format => 'dns-name',
170 description => "Set a name for the VM. Only used on the configuration web interface.",
171 },
172 scsihw => {
173 optional => 1,
174 type => 'string',
175 description => "scsi controller model",
176 enum => [qw(lsi lsi53c810 virtio-scsi-pci virtio-scsi-single megasas pvscsi)],
177 default => 'lsi',
178 },
179 description => {
180 optional => 1,
181 type => 'string',
182 description => "Description for the VM. Only used on the configuration web interface. This is saved as comment inside the configuration file.",
183 },
184 ostype => {
185 optional => 1,
186 type => 'string',
187 enum => [qw(other wxp w2k w2k3 w2k8 wvista win7 win8 l24 l26 solaris)],
188 description => <<EODESC,
189 Used to enable special optimization/features for specific
190 operating systems:
191
192 other => unspecified OS
193 wxp => Microsoft Windows XP
194 w2k => Microsoft Windows 2000
195 w2k3 => Microsoft Windows 2003
196 w2k8 => Microsoft Windows 2008
197 wvista => Microsoft Windows Vista
198 win7 => Microsoft Windows 7
199 win8 => Microsoft Windows 8/2012
200 l24 => Linux 2.4 Kernel
201 l26 => Linux 2.6/3.X Kernel
202 solaris => solaris/opensolaris/openindiania kernel
203
204 other|l24|l26|solaris ... no special behaviour
205 wxp|w2k|w2k3|w2k8|wvista|win7|win8 ... use --localtime switch
206 EODESC
207 },
208 boot => {
209 optional => 1,
210 type => 'string',
211 description => "Boot on floppy (a), hard disk (c), CD-ROM (d), or network (n).",
212 pattern => '[acdn]{1,4}',
213 default => 'cdn',
214 },
215 bootdisk => {
216 optional => 1,
217 type => 'string', format => 'pve-qm-bootdisk',
218 description => "Enable booting from specified disk.",
219 pattern => '(ide|sata|scsi|virtio)\d+',
220 },
221 smp => {
222 optional => 1,
223 type => 'integer',
224 description => "The number of CPUs. Please use option -sockets instead.",
225 minimum => 1,
226 default => 1,
227 },
228 sockets => {
229 optional => 1,
230 type => 'integer',
231 description => "The number of CPU sockets.",
232 minimum => 1,
233 default => 1,
234 },
235 cores => {
236 optional => 1,
237 type => 'integer',
238 description => "The number of cores per socket.",
239 minimum => 1,
240 default => 1,
241 },
242 numa => {
243 optional => 1,
244 type => 'boolean',
245 description => "Enable/disable Numa.",
246 default => 0,
247 },
248 vcpus => {
249 optional => 1,
250 type => 'integer',
251 description => "Number of hotplugged vcpus.",
252 minimum => 1,
253 default => 0,
254 },
255 acpi => {
256 optional => 1,
257 type => 'boolean',
258 description => "Enable/disable ACPI.",
259 default => 1,
260 },
261 agent => {
262 optional => 1,
263 type => 'boolean',
264 description => "Enable/disable Qemu GuestAgent.",
265 default => 0,
266 },
267 kvm => {
268 optional => 1,
269 type => 'boolean',
270 description => "Enable/disable KVM hardware virtualization.",
271 default => 1,
272 },
273 tdf => {
274 optional => 1,
275 type => 'boolean',
276 description => "Enable/disable time drift fix.",
277 default => 0,
278 },
279 localtime => {
280 optional => 1,
281 type => 'boolean',
282 description => "Set the real time clock to local time. This is enabled by default if ostype indicates a Microsoft OS.",
283 },
284 freeze => {
285 optional => 1,
286 type => 'boolean',
287 description => "Freeze CPU at startup (use 'c' monitor command to start execution).",
288 },
289 vga => {
290 optional => 1,
291 type => 'string',
292 description => "Select VGA type. If you want to use high resolution modes (>= 1280x1024x16) then you should use option 'std' or 'vmware'. Default is 'std' for win8/win7/w2k8, and 'cirrur' for other OS types. Option 'qxl' enables the SPICE display sever. You can also run without any graphic card using a serial devive as terminal.",
293 enum => [qw(std cirrus vmware qxl serial0 serial1 serial2 serial3 qxl2 qxl3 qxl4)],
294 },
295 watchdog => {
296 optional => 1,
297 type => 'string', format => 'pve-qm-watchdog',
298 typetext => '[[model=]i6300esb|ib700] [,[action=]reset|shutdown|poweroff|pause|debug|none]',
299 description => "Create a virtual hardware watchdog device. Once enabled (by a guest action), the watchdog must be periodically polled by an agent inside the guest or else the guest will be restarted (or execute the action specified)",
300 },
301 startdate => {
302 optional => 1,
303 type => 'string',
304 typetext => "(now | YYYY-MM-DD | YYYY-MM-DDTHH:MM:SS)",
305 description => "Set the initial date of the real time clock. Valid format for date are: 'now' or '2006-06-17T16:01:21' or '2006-06-17'.",
306 pattern => '(now|\d{4}-\d{1,2}-\d{1,2}(T\d{1,2}:\d{1,2}:\d{1,2})?)',
307 default => 'now',
308 },
309 startup => get_standard_option('pve-startup-order'),
310 template => {
311 optional => 1,
312 type => 'boolean',
313 description => "Enable/disable Template.",
314 default => 0,
315 },
316 args => {
317 optional => 1,
318 type => 'string',
319 description => <<EODESCR,
320 Note: this option is for experts only. It allows you to pass arbitrary arguments to kvm, for example:
321
322 args: -no-reboot -no-hpet
323 EODESCR
324 },
325 tablet => {
326 optional => 1,
327 type => 'boolean',
328 default => 1,
329 description => "Enable/disable the usb tablet device. This device is usually needed to allow absolute mouse positioning with VNC. Else the mouse runs out of sync with normal VNC clients. If you're running lots of console-only guests on one host, you may consider disabling this to save some context switches. This is turned of by default if you use spice (vga=qxl).",
330 },
331 migrate_speed => {
332 optional => 1,
333 type => 'integer',
334 description => "Set maximum speed (in MB/s) for migrations. Value 0 is no limit.",
335 minimum => 0,
336 default => 0,
337 },
338 migrate_downtime => {
339 optional => 1,
340 type => 'number',
341 description => "Set maximum tolerated downtime (in seconds) for migrations.",
342 minimum => 0,
343 default => 0.1,
344 },
345 cdrom => {
346 optional => 1,
347 type => 'string', format => 'pve-qm-drive',
348 typetext => 'volume',
349 description => "This is an alias for option -ide2",
350 },
351 cpu => {
352 optional => 1,
353 description => "Emulated CPU type.",
354 type => 'string',
355 enum => [ qw(486 athlon pentium pentium2 pentium3 coreduo core2duo kvm32 kvm64 qemu32 qemu64 phenom Conroe Penryn Nehalem Westmere SandyBridge IvyBridge Haswell Broadwell Opteron_G1 Opteron_G2 Opteron_G3 Opteron_G4 Opteron_G5 host) ],
356 default => 'kvm64',
357 },
358 parent => get_standard_option('pve-snapshot-name', {
359 optional => 1,
360 description => "Parent snapshot name. This is used internally, and should not be modified.",
361 }),
362 snaptime => {
363 optional => 1,
364 description => "Timestamp for snapshots.",
365 type => 'integer',
366 minimum => 0,
367 },
368 vmstate => {
369 optional => 1,
370 type => 'string', format => 'pve-volume-id',
371 description => "Reference to a volume which stores the VM state. This is used internally for snapshots.",
372 },
373 machine => {
374 description => "Specific the Qemu machine type.",
375 type => 'string',
376 pattern => '(pc|pc(-i440fx)?-\d+\.\d+|q35|pc-q35-\d+\.\d+)',
377 maxLength => 40,
378 optional => 1,
379 },
380 smbios1 => {
381 description => "Specify SMBIOS type 1 fields.",
382 type => 'string', format => 'pve-qm-smbios1',
383 typetext => "[manufacturer=str][,product=str][,version=str][,serial=str] [,uuid=uuid][,sku=str][,family=str]",
384 maxLength => 256,
385 optional => 1,
386 },
387 };
388
389 # what about other qemu settings ?
390 #cpu => 'string',
391 #machine => 'string',
392 #fda => 'file',
393 #fdb => 'file',
394 #mtdblock => 'file',
395 #sd => 'file',
396 #pflash => 'file',
397 #snapshot => 'bool',
398 #bootp => 'file',
399 ##tftp => 'dir',
400 ##smb => 'dir',
401 #kernel => 'file',
402 #append => 'string',
403 #initrd => 'file',
404 ##soundhw => 'string',
405
406 while (my ($k, $v) = each %$confdesc) {
407 PVE::JSONSchema::register_standard_option("pve-qm-$k", $v);
408 }
409
410 my $MAX_IDE_DISKS = 4;
411 my $MAX_SCSI_DISKS = 14;
412 my $MAX_VIRTIO_DISKS = 16;
413 my $MAX_SATA_DISKS = 6;
414 my $MAX_USB_DEVICES = 5;
415 my $MAX_NETS = 32;
416 my $MAX_UNUSED_DISKS = 8;
417 my $MAX_HOSTPCI_DEVICES = 4;
418 my $MAX_SERIAL_PORTS = 4;
419 my $MAX_PARALLEL_PORTS = 3;
420 my $MAX_NUMA = 8;
421 my $MAX_MEM = 4194304;
422 my $STATICMEM = 1024;
423
424 my $numadesc = {
425 optional => 1,
426 type => 'string', format => 'pve-qm-numanode',
427 typetext => "cpus=<id[-id],memory=<mb>[[,hostnodes=<id[-id]>] [,policy=<preferred|bind|interleave>]]",
428 description => "numa topology",
429 };
430 PVE::JSONSchema::register_standard_option("pve-qm-numanode", $numadesc);
431
432 for (my $i = 0; $i < $MAX_NUMA; $i++) {
433 $confdesc->{"numa$i"} = $numadesc;
434 }
435
436 my $nic_model_list = ['rtl8139', 'ne2k_pci', 'e1000', 'pcnet', 'virtio',
437 'ne2k_isa', 'i82551', 'i82557b', 'i82559er', 'vmxnet3',
438 'e1000-82540em', 'e1000-82544gc', 'e1000-82545em'];
439 my $nic_model_list_txt = join(' ', sort @$nic_model_list);
440
441 my $netdesc = {
442 optional => 1,
443 type => 'string', format => 'pve-qm-net',
444 typetext => "MODEL=XX:XX:XX:XX:XX:XX [,bridge=<dev>][,queues=<nbqueues>][,rate=<mbps>] [,tag=<vlanid>][,firewall=0|1],link_down=0|1]",
445 description => <<EODESCR,
446 Specify network devices.
447
448 MODEL is one of: $nic_model_list_txt
449
450 XX:XX:XX:XX:XX:XX should be an unique MAC address. This is
451 automatically generated if not specified.
452
453 The bridge parameter can be used to automatically add the interface to a bridge device. The Proxmox VE standard bridge is called 'vmbr0'.
454
455 Option 'rate' is used to limit traffic bandwidth from and to this interface. It is specified as floating point number, unit is 'Megabytes per second'.
456
457 If you specify no bridge, we create a kvm 'user' (NATed) network device, which provides DHCP and DNS services. The following addresses are used:
458
459 10.0.2.2 Gateway
460 10.0.2.3 DNS Server
461 10.0.2.4 SMB Server
462
463 The DHCP server assign addresses to the guest starting from 10.0.2.15.
464
465 EODESCR
466 };
467 PVE::JSONSchema::register_standard_option("pve-qm-net", $netdesc);
468
469 for (my $i = 0; $i < $MAX_NETS; $i++) {
470 $confdesc->{"net$i"} = $netdesc;
471 }
472
473 my $drivename_hash;
474
475 my $idedesc = {
476 optional => 1,
477 type => 'string', format => 'pve-qm-drive',
478 typetext => '[volume=]volume,] [,media=cdrom|disk] [,cyls=c,heads=h,secs=s[,trans=t]] [,snapshot=on|off] [,cache=none|writethrough|writeback|unsafe|directsync] [,format=f] [,backup=yes|no] [,rerror=ignore|report|stop] [,werror=enospc|ignore|report|stop] [,aio=native|threads] [,discard=ignore|on]',
479 description => "Use volume as IDE hard disk or CD-ROM (n is 0 to " .($MAX_IDE_DISKS -1) . ").",
480 };
481 PVE::JSONSchema::register_standard_option("pve-qm-ide", $idedesc);
482
483 my $scsidesc = {
484 optional => 1,
485 type => 'string', format => 'pve-qm-drive',
486 typetext => '[volume=]volume,] [,media=cdrom|disk] [,cyls=c,heads=h,secs=s[,trans=t]] [,snapshot=on|off] [,cache=none|writethrough|writeback|unsafe|directsync] [,format=f] [,backup=yes|no] [,rerror=ignore|report|stop] [,werror=enospc|ignore|report|stop] [,aio=native|threads] [,discard=ignore|on] [,iothread=on] [,queues=<nbqueues>]',
487 description => "Use volume as SCSI hard disk or CD-ROM (n is 0 to " . ($MAX_SCSI_DISKS - 1) . ").",
488 };
489 PVE::JSONSchema::register_standard_option("pve-qm-scsi", $scsidesc);
490
491 my $satadesc = {
492 optional => 1,
493 type => 'string', format => 'pve-qm-drive',
494 typetext => '[volume=]volume,] [,media=cdrom|disk] [,cyls=c,heads=h,secs=s[,trans=t]] [,snapshot=on|off] [,cache=none|writethrough|writeback|unsafe|directsync] [,format=f] [,backup=yes|no] [,rerror=ignore|report|stop] [,werror=enospc|ignore|report|stop] [,aio=native|threads] [,discard=ignore|on]',
495 description => "Use volume as SATA hard disk or CD-ROM (n is 0 to " . ($MAX_SATA_DISKS - 1). ").",
496 };
497 PVE::JSONSchema::register_standard_option("pve-qm-sata", $satadesc);
498
499 my $virtiodesc = {
500 optional => 1,
501 type => 'string', format => 'pve-qm-drive',
502 typetext => '[volume=]volume,] [,media=cdrom|disk] [,cyls=c,heads=h,secs=s[,trans=t]] [,snapshot=on|off] [,cache=none|writethrough|writeback|unsafe|directsync] [,format=f] [,backup=yes|no] [,rerror=ignore|report|stop] [,werror=enospc|ignore|report|stop] [,aio=native|threads] [,discard=ignore|on] [,iothread=on]',
503 description => "Use volume as VIRTIO hard disk (n is 0 to " . ($MAX_VIRTIO_DISKS - 1) . ").",
504 };
505 PVE::JSONSchema::register_standard_option("pve-qm-virtio", $virtiodesc);
506
507 my $usbdesc = {
508 optional => 1,
509 type => 'string', format => 'pve-qm-usb-device',
510 typetext => 'host=HOSTUSBDEVICE|spice',
511 description => <<EODESCR,
512 Configure an USB device (n is 0 to 4). This can be used to
513 pass-through usb devices to the guest. HOSTUSBDEVICE syntax is:
514
515 'bus-port(.port)*' (decimal numbers) or
516 'vendor_id:product_id' (hexadeciaml numbers)
517
518 You can use the 'lsusb -t' command to list existing usb devices.
519
520 Note: This option allows direct access to host hardware. So it is no longer possible to migrate such machines - use with special care.
521
522 The value 'spice' can be used to add a usb redirection devices for spice.
523
524 EODESCR
525 };
526 PVE::JSONSchema::register_standard_option("pve-qm-usb", $usbdesc);
527
528 my $hostpcidesc = {
529 optional => 1,
530 type => 'string', format => 'pve-qm-hostpci',
531 typetext => "[host=]HOSTPCIDEVICE [,driver=kvm|vfio] [,rombar=on|off] [,pcie=0|1] [,x-vga=on|off]",
532 description => <<EODESCR,
533 Map host pci devices. HOSTPCIDEVICE syntax is:
534
535 'bus:dev.func' (hexadecimal numbers)
536
537 You can us the 'lspci' command to list existing pci devices.
538
539 The 'rombar' option determines whether or not the device's ROM will be visible in the guest's memory map (default is 'on').
540
541 Note: This option allows direct access to host hardware. So it is no longer possible to migrate such machines - use with special care.
542
543 Experimental: user reported problems with this option.
544 EODESCR
545 };
546 PVE::JSONSchema::register_standard_option("pve-qm-hostpci", $hostpcidesc);
547
548 my $serialdesc = {
549 optional => 1,
550 type => 'string',
551 pattern => '(/dev/.+|socket)',
552 description => <<EODESCR,
553 Create a serial device inside the VM (n is 0 to 3), and pass through a host serial device (i.e. /dev/ttyS0), or create a unix socket on the host side (use 'qm terminal' to open a terminal connection).
554
555 Note: This option allows direct access to host hardware. So it is no longer possible to migrate such machines - use with special care.
556
557 Experimental: user reported problems with this option.
558 EODESCR
559 };
560
561 my $paralleldesc= {
562 optional => 1,
563 type => 'string',
564 pattern => '/dev/parport\d+|/dev/usb/lp\d+',
565 description => <<EODESCR,
566 Map host parallel devices (n is 0 to 2).
567
568 Note: This option allows direct access to host hardware. So it is no longer possible to migrate such machines - use with special care.
569
570 Experimental: user reported problems with this option.
571 EODESCR
572 };
573
574 for (my $i = 0; $i < $MAX_PARALLEL_PORTS; $i++) {
575 $confdesc->{"parallel$i"} = $paralleldesc;
576 }
577
578 for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
579 $confdesc->{"serial$i"} = $serialdesc;
580 }
581
582 for (my $i = 0; $i < $MAX_HOSTPCI_DEVICES; $i++) {
583 $confdesc->{"hostpci$i"} = $hostpcidesc;
584 }
585
586 for (my $i = 0; $i < $MAX_IDE_DISKS; $i++) {
587 $drivename_hash->{"ide$i"} = 1;
588 $confdesc->{"ide$i"} = $idedesc;
589 }
590
591 for (my $i = 0; $i < $MAX_SATA_DISKS; $i++) {
592 $drivename_hash->{"sata$i"} = 1;
593 $confdesc->{"sata$i"} = $satadesc;
594 }
595
596 for (my $i = 0; $i < $MAX_SCSI_DISKS; $i++) {
597 $drivename_hash->{"scsi$i"} = 1;
598 $confdesc->{"scsi$i"} = $scsidesc ;
599 }
600
601 for (my $i = 0; $i < $MAX_VIRTIO_DISKS; $i++) {
602 $drivename_hash->{"virtio$i"} = 1;
603 $confdesc->{"virtio$i"} = $virtiodesc;
604 }
605
606 for (my $i = 0; $i < $MAX_USB_DEVICES; $i++) {
607 $confdesc->{"usb$i"} = $usbdesc;
608 }
609
610 my $unuseddesc = {
611 optional => 1,
612 type => 'string', format => 'pve-volume-id',
613 description => "Reference to unused volumes.",
614 };
615
616 for (my $i = 0; $i < $MAX_UNUSED_DISKS; $i++) {
617 $confdesc->{"unused$i"} = $unuseddesc;
618 }
619
620 my $kvm_api_version = 0;
621
622 sub kvm_version {
623
624 return $kvm_api_version if $kvm_api_version;
625
626 my $fh = IO::File->new("</dev/kvm") ||
627 return 0;
628
629 if (my $v = $fh->ioctl(KVM_GET_API_VERSION(), 0)) {
630 $kvm_api_version = $v;
631 }
632
633 $fh->close();
634
635 return $kvm_api_version;
636 }
637
638 my $kvm_user_version;
639
640 sub kvm_user_version {
641
642 return $kvm_user_version if $kvm_user_version;
643
644 $kvm_user_version = 'unknown';
645
646 my $tmp = `kvm -help 2>/dev/null`;
647
648 if ($tmp =~ m/^QEMU( PC)? emulator version (\d+\.\d+(\.\d+)?)[,\s]/) {
649 $kvm_user_version = $2;
650 }
651
652 return $kvm_user_version;
653
654 }
655
656 my $kernel_has_vhost_net = -c '/dev/vhost-net';
657
658 sub disknames {
659 # order is important - used to autoselect boot disk
660 return ((map { "ide$_" } (0 .. ($MAX_IDE_DISKS - 1))),
661 (map { "scsi$_" } (0 .. ($MAX_SCSI_DISKS - 1))),
662 (map { "virtio$_" } (0 .. ($MAX_VIRTIO_DISKS - 1))),
663 (map { "sata$_" } (0 .. ($MAX_SATA_DISKS - 1))));
664 }
665
666 sub valid_drivename {
667 my $dev = shift;
668
669 return defined($drivename_hash->{$dev});
670 }
671
672 sub option_exists {
673 my $key = shift;
674 return defined($confdesc->{$key});
675 }
676
677 sub nic_models {
678 return $nic_model_list;
679 }
680
681 sub os_list_description {
682
683 return {
684 other => 'Other',
685 wxp => 'Windows XP',
686 w2k => 'Windows 2000',
687 w2k3 =>, 'Windows 2003',
688 w2k8 => 'Windows 2008',
689 wvista => 'Windows Vista',
690 win7 => 'Windows 7',
691 win8 => 'Windows 8/2012',
692 l24 => 'Linux 2.4',
693 l26 => 'Linux 2.6',
694 };
695 }
696
697 my $cdrom_path;
698
699 sub get_cdrom_path {
700
701 return $cdrom_path if $cdrom_path;
702
703 return $cdrom_path = "/dev/cdrom" if -l "/dev/cdrom";
704 return $cdrom_path = "/dev/cdrom1" if -l "/dev/cdrom1";
705 return $cdrom_path = "/dev/cdrom2" if -l "/dev/cdrom2";
706 }
707
708 sub get_iso_path {
709 my ($storecfg, $vmid, $cdrom) = @_;
710
711 if ($cdrom eq 'cdrom') {
712 return get_cdrom_path();
713 } elsif ($cdrom eq 'none') {
714 return '';
715 } elsif ($cdrom =~ m|^/|) {
716 return $cdrom;
717 } else {
718 return PVE::Storage::path($storecfg, $cdrom);
719 }
720 }
721
722 # try to convert old style file names to volume IDs
723 sub filename_to_volume_id {
724 my ($vmid, $file, $media) = @_;
725
726 if (!($file eq 'none' || $file eq 'cdrom' ||
727 $file =~ m|^/dev/.+| || $file =~ m/^([^:]+):(.+)$/)) {
728
729 return undef if $file =~ m|/|;
730
731 if ($media && $media eq 'cdrom') {
732 $file = "local:iso/$file";
733 } else {
734 $file = "local:$vmid/$file";
735 }
736 }
737
738 return $file;
739 }
740
741 sub verify_media_type {
742 my ($opt, $vtype, $media) = @_;
743
744 return if !$media;
745
746 my $etype;
747 if ($media eq 'disk') {
748 $etype = 'images';
749 } elsif ($media eq 'cdrom') {
750 $etype = 'iso';
751 } else {
752 die "internal error";
753 }
754
755 return if ($vtype eq $etype);
756
757 raise_param_exc({ $opt => "unexpected media type ($vtype != $etype)" });
758 }
759
760 sub cleanup_drive_path {
761 my ($opt, $storecfg, $drive) = @_;
762
763 # try to convert filesystem paths to volume IDs
764
765 if (($drive->{file} !~ m/^(cdrom|none)$/) &&
766 ($drive->{file} !~ m|^/dev/.+|) &&
767 ($drive->{file} !~ m/^([^:]+):(.+)$/) &&
768 ($drive->{file} !~ m/^\d+$/)) {
769 my ($vtype, $volid) = PVE::Storage::path_to_volume_id($storecfg, $drive->{file});
770 raise_param_exc({ $opt => "unable to associate path '$drive->{file}' to any storage"}) if !$vtype;
771 $drive->{media} = 'cdrom' if !$drive->{media} && $vtype eq 'iso';
772 verify_media_type($opt, $vtype, $drive->{media});
773 $drive->{file} = $volid;
774 }
775
776 $drive->{media} = 'cdrom' if !$drive->{media} && $drive->{file} =~ m/^(cdrom|none)$/;
777 }
778
779 sub create_conf_nolock {
780 my ($vmid, $settings) = @_;
781
782 my $filename = config_file($vmid);
783
784 die "configuration file '$filename' already exists\n" if -f $filename;
785
786 my $defaults = load_defaults();
787
788 $settings->{name} = "vm$vmid" if !$settings->{name};
789 $settings->{memory} = $defaults->{memory} if !$settings->{memory};
790
791 my $data = '';
792 foreach my $opt (keys %$settings) {
793 next if !$confdesc->{$opt};
794
795 my $value = $settings->{$opt};
796 next if !$value;
797
798 $data .= "$opt: $value\n";
799 }
800
801 PVE::Tools::file_set_contents($filename, $data);
802 }
803
804 sub parse_hotplug_features {
805 my ($data) = @_;
806
807 my $res = {};
808
809 return $res if $data eq '0';
810
811 $data = $confdesc->{hotplug}->{default} if $data eq '1';
812
813 foreach my $feature (PVE::Tools::split_list($data)) {
814 if ($feature =~ m/^(network|disk|cpu|memory|usb)$/) {
815 $res->{$1} = 1;
816 } else {
817 warn "ignoring unknown hotplug feature '$feature'\n";
818 }
819 }
820 return $res;
821 }
822
823 PVE::JSONSchema::register_format('pve-hotplug-features', \&pve_verify_hotplug_features);
824 sub pve_verify_hotplug_features {
825 my ($value, $noerr) = @_;
826
827 return $value if parse_hotplug_features($value);
828
829 return undef if $noerr;
830
831 die "unable to parse hotplug option\n";
832 }
833
834 my $parse_size = sub {
835 my ($value) = @_;
836
837 return undef if $value !~ m/^(\d+(\.\d+)?)([KMG])?$/;
838 my ($size, $unit) = ($1, $3);
839 if ($unit) {
840 if ($unit eq 'K') {
841 $size = $size * 1024;
842 } elsif ($unit eq 'M') {
843 $size = $size * 1024 * 1024;
844 } elsif ($unit eq 'G') {
845 $size = $size * 1024 * 1024 * 1024;
846 }
847 }
848 return int($size);
849 };
850
851 my $format_size = sub {
852 my ($size) = @_;
853
854 $size = int($size);
855
856 my $kb = int($size/1024);
857 return $size if $kb*1024 != $size;
858
859 my $mb = int($kb/1024);
860 return "${kb}K" if $mb*1024 != $kb;
861
862 my $gb = int($mb/1024);
863 return "${mb}M" if $gb*1024 != $mb;
864
865 return "${gb}G";
866 };
867
868 # ideX = [volume=]volume-id[,media=d][,cyls=c,heads=h,secs=s[,trans=t]]
869 # [,snapshot=on|off][,cache=on|off][,format=f][,backup=yes|no]
870 # [,rerror=ignore|report|stop][,werror=enospc|ignore|report|stop]
871 # [,aio=native|threads][,discard=ignore|on][,iothread=on]
872
873 sub parse_drive {
874 my ($key, $data) = @_;
875
876 my $res = {};
877
878 # $key may be undefined - used to verify JSON parameters
879 if (!defined($key)) {
880 $res->{interface} = 'unknown'; # should not harm when used to verify parameters
881 $res->{index} = 0;
882 } elsif ($key =~ m/^([^\d]+)(\d+)$/) {
883 $res->{interface} = $1;
884 $res->{index} = $2;
885 } else {
886 return undef;
887 }
888
889 foreach my $p (split (/,/, $data)) {
890 next if $p =~ m/^\s*$/;
891
892 if ($p =~ m/^(file|volume|cyls|heads|secs|trans|media|snapshot|cache|format|rerror|werror|backup|aio|bps|mbps|mbps_max|bps_rd|mbps_rd|mbps_rd_max|bps_wr|mbps_wr|mbps_wr_max|iops|iops_max|iops_rd|iops_rd_max|iops_wr|iops_wr_max|size|discard|iothread|queues)=(.+)$/) {
893 my ($k, $v) = ($1, $2);
894
895 $k = 'file' if $k eq 'volume';
896
897 return undef if defined $res->{$k};
898
899 if ($k eq 'bps' || $k eq 'bps_rd' || $k eq 'bps_wr') {
900 return undef if !$v || $v !~ m/^\d+/;
901 $k = "m$k";
902 $v = sprintf("%.3f", $v / (1024*1024));
903 }
904 $res->{$k} = $v;
905 } else {
906 if (!$res->{file} && $p !~ m/=/) {
907 $res->{file} = $p;
908 } else {
909 return undef;
910 }
911 }
912 }
913
914 return undef if !$res->{file};
915
916 return undef if $res->{cache} &&
917 $res->{cache} !~ m/^(off|none|writethrough|writeback|unsafe|directsync)$/;
918 return undef if $res->{snapshot} && $res->{snapshot} !~ m/^(on|off)$/;
919 return undef if $res->{cyls} && $res->{cyls} !~ m/^\d+$/;
920 return undef if $res->{heads} && $res->{heads} !~ m/^\d+$/;
921 return undef if $res->{secs} && $res->{secs} !~ m/^\d+$/;
922 return undef if $res->{media} && $res->{media} !~ m/^(disk|cdrom)$/;
923 return undef if $res->{trans} && $res->{trans} !~ m/^(none|lba|auto)$/;
924 return undef if $res->{format} && $res->{format} !~ m/^(raw|cow|qcow|qed|qcow2|vmdk|cloop)$/;
925 return undef if $res->{rerror} && $res->{rerror} !~ m/^(ignore|report|stop)$/;
926 return undef if $res->{werror} && $res->{werror} !~ m/^(enospc|ignore|report|stop)$/;
927 return undef if $res->{backup} && $res->{backup} !~ m/^(yes|no)$/;
928 return undef if $res->{aio} && $res->{aio} !~ m/^(native|threads)$/;
929 return undef if $res->{discard} && $res->{discard} !~ m/^(ignore|on)$/;
930 return undef if $res->{iothread} && $res->{iothread} !~ m/^(on)$/;
931 return undef if $res->{queues} && ($res->{queues} !~ m/^\d+$/ || $res->{queues} < 2);
932
933 return undef if $res->{mbps_rd} && $res->{mbps};
934 return undef if $res->{mbps_wr} && $res->{mbps};
935
936 return undef if $res->{mbps} && $res->{mbps} !~ m/^\d+(\.\d+)?$/;
937 return undef if $res->{mbps_max} && $res->{mbps_max} !~ m/^\d+(\.\d+)?$/;
938 return undef if $res->{mbps_rd} && $res->{mbps_rd} !~ m/^\d+(\.\d+)?$/;
939 return undef if $res->{mbps_rd_max} && $res->{mbps_rd_max} !~ m/^\d+(\.\d+)?$/;
940 return undef if $res->{mbps_wr} && $res->{mbps_wr} !~ m/^\d+(\.\d+)?$/;
941 return undef if $res->{mbps_wr_max} && $res->{mbps_wr_max} !~ m/^\d+(\.\d+)?$/;
942
943 return undef if $res->{iops_rd} && $res->{iops};
944 return undef if $res->{iops_wr} && $res->{iops};
945
946
947 return undef if $res->{iops} && $res->{iops} !~ m/^\d+$/;
948 return undef if $res->{iops_max} && $res->{iops_max} !~ m/^\d+$/;
949 return undef if $res->{iops_rd} && $res->{iops_rd} !~ m/^\d+$/;
950 return undef if $res->{iops_rd_max} && $res->{iops_rd_max} !~ m/^\d+$/;
951 return undef if $res->{iops_wr} && $res->{iops_wr} !~ m/^\d+$/;
952 return undef if $res->{iops_wr_max} && $res->{iops_wr_max} !~ m/^\d+$/;
953
954
955 if ($res->{size}) {
956 return undef if !defined($res->{size} = &$parse_size($res->{size}));
957 }
958
959 if ($res->{media} && ($res->{media} eq 'cdrom')) {
960 return undef if $res->{snapshot} || $res->{trans} || $res->{format};
961 return undef if $res->{heads} || $res->{secs} || $res->{cyls};
962 return undef if $res->{interface} eq 'virtio';
963 }
964
965 # rerror does not work with scsi drives
966 if ($res->{rerror}) {
967 return undef if $res->{interface} eq 'scsi';
968 }
969
970 return $res;
971 }
972
973 my @qemu_drive_options = qw(heads secs cyls trans media format cache snapshot rerror werror aio discard iops iops_rd iops_wr iops_max iops_rd_max iops_wr_max);
974
975 sub print_drive {
976 my ($vmid, $drive) = @_;
977
978 my $opts = '';
979 foreach my $o (@qemu_drive_options, 'mbps', 'mbps_rd', 'mbps_wr', 'mbps_max', 'mbps_rd_max', 'mbps_wr_max', 'backup', 'iothread', 'queues') {
980 $opts .= ",$o=$drive->{$o}" if $drive->{$o};
981 }
982
983 if ($drive->{size}) {
984 $opts .= ",size=" . &$format_size($drive->{size});
985 }
986
987 return "$drive->{file}$opts";
988 }
989
990 sub scsi_inquiry {
991 my($fh, $noerr) = @_;
992
993 my $SG_IO = 0x2285;
994 my $SG_GET_VERSION_NUM = 0x2282;
995
996 my $versionbuf = "\x00" x 8;
997 my $ret = ioctl($fh, $SG_GET_VERSION_NUM, $versionbuf);
998 if (!$ret) {
999 die "scsi ioctl SG_GET_VERSION_NUM failoed - $!\n" if !$noerr;
1000 return undef;
1001 }
1002 my $version = unpack("I", $versionbuf);
1003 if ($version < 30000) {
1004 die "scsi generic interface too old\n" if !$noerr;
1005 return undef;
1006 }
1007
1008 my $buf = "\x00" x 36;
1009 my $sensebuf = "\x00" x 8;
1010 my $cmd = pack("C x3 C x1", 0x12, 36);
1011
1012 # see /usr/include/scsi/sg.h
1013 my $sg_io_hdr_t = "i i C C s I P P P I I i P C C C C S S i I I";
1014
1015 my $packet = pack($sg_io_hdr_t, ord('S'), -3, length($cmd),
1016 length($sensebuf), 0, length($buf), $buf,
1017 $cmd, $sensebuf, 6000);
1018
1019 $ret = ioctl($fh, $SG_IO, $packet);
1020 if (!$ret) {
1021 die "scsi ioctl SG_IO failed - $!\n" if !$noerr;
1022 return undef;
1023 }
1024
1025 my @res = unpack($sg_io_hdr_t, $packet);
1026 if ($res[17] || $res[18]) {
1027 die "scsi ioctl SG_IO status error - $!\n" if !$noerr;
1028 return undef;
1029 }
1030
1031 my $res = {};
1032 (my $byte0, my $byte1, $res->{vendor},
1033 $res->{product}, $res->{revision}) = unpack("C C x6 A8 A16 A4", $buf);
1034
1035 $res->{removable} = $byte1 & 128 ? 1 : 0;
1036 $res->{type} = $byte0 & 31;
1037
1038 return $res;
1039 }
1040
1041 sub path_is_scsi {
1042 my ($path) = @_;
1043
1044 my $fh = IO::File->new("+<$path") || return undef;
1045 my $res = scsi_inquiry($fh, 1);
1046 close($fh);
1047
1048 return $res;
1049 }
1050
1051 sub machine_type_is_q35 {
1052 my ($conf) = @_;
1053
1054 return $conf->{machine} && ($conf->{machine} =~ m/q35/) ? 1 : 0;
1055 }
1056
1057 sub print_tabletdevice_full {
1058 my ($conf) = @_;
1059
1060 my $q35 = machine_type_is_q35($conf);
1061
1062 # we use uhci for old VMs because tablet driver was buggy in older qemu
1063 my $usbbus = $q35 ? "ehci" : "uhci";
1064
1065 return "usb-tablet,id=tablet,bus=$usbbus.0,port=1";
1066 }
1067
1068 sub print_drivedevice_full {
1069 my ($storecfg, $conf, $vmid, $drive, $bridges) = @_;
1070
1071 my $device = '';
1072 my $maxdev = 0;
1073
1074 if ($drive->{interface} eq 'virtio') {
1075 my $pciaddr = print_pci_addr("$drive->{interface}$drive->{index}", $bridges);
1076 $device = "virtio-blk-pci,drive=drive-$drive->{interface}$drive->{index},id=$drive->{interface}$drive->{index}$pciaddr";
1077 $device .= ",iothread=iothread-$drive->{interface}$drive->{index}" if $drive->{iothread};
1078 } elsif ($drive->{interface} eq 'scsi') {
1079
1080 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $drive);
1081 my $unit = $drive->{index} % $maxdev;
1082 my $devicetype = 'hd';
1083 my $path = '';
1084 if (drive_is_cdrom($drive)) {
1085 $devicetype = 'cd';
1086 } else {
1087 if ($drive->{file} =~ m|^/|) {
1088 $path = $drive->{file};
1089 } else {
1090 $path = PVE::Storage::path($storecfg, $drive->{file});
1091 }
1092
1093 if($path =~ m/^iscsi\:\/\//){
1094 $devicetype = 'generic';
1095 } else {
1096 if (my $info = path_is_scsi($path)) {
1097 if ($info->{type} == 0) {
1098 $devicetype = 'block';
1099 } elsif ($info->{type} == 1) { # tape
1100 $devicetype = 'generic';
1101 }
1102 }
1103 }
1104 }
1105
1106 if (!$conf->{scsihw} || ($conf->{scsihw} =~ m/^lsi/)){
1107 $device = "scsi-$devicetype,bus=$controller_prefix$controller.0,scsi-id=$unit,drive=drive-$drive->{interface}$drive->{index},id=$drive->{interface}$drive->{index}";
1108 } else {
1109 $device = "scsi-$devicetype,bus=$controller_prefix$controller.0,channel=0,scsi-id=0,lun=$drive->{index},drive=drive-$drive->{interface}$drive->{index},id=$drive->{interface}$drive->{index}";
1110 }
1111
1112 } elsif ($drive->{interface} eq 'ide'){
1113 $maxdev = 2;
1114 my $controller = int($drive->{index} / $maxdev);
1115 my $unit = $drive->{index} % $maxdev;
1116 my $devicetype = ($drive->{media} && $drive->{media} eq 'cdrom') ? "cd" : "hd";
1117
1118 $device = "ide-$devicetype,bus=ide.$controller,unit=$unit,drive=drive-$drive->{interface}$drive->{index},id=$drive->{interface}$drive->{index}";
1119 } elsif ($drive->{interface} eq 'sata'){
1120 my $controller = int($drive->{index} / $MAX_SATA_DISKS);
1121 my $unit = $drive->{index} % $MAX_SATA_DISKS;
1122 $device = "ide-drive,bus=ahci$controller.$unit,drive=drive-$drive->{interface}$drive->{index},id=$drive->{interface}$drive->{index}";
1123 } elsif ($drive->{interface} eq 'usb') {
1124 die "implement me";
1125 # -device ide-drive,bus=ide.1,unit=0,drive=drive-ide0-1-0,id=ide0-1-0
1126 } else {
1127 die "unsupported interface type";
1128 }
1129
1130 $device .= ",bootindex=$drive->{bootindex}" if $drive->{bootindex};
1131
1132 return $device;
1133 }
1134
1135 sub get_initiator_name {
1136 my $initiator;
1137
1138 my $fh = IO::File->new('/etc/iscsi/initiatorname.iscsi') || return undef;
1139 while (defined(my $line = <$fh>)) {
1140 next if $line !~ m/^\s*InitiatorName\s*=\s*([\.\-:\w]+)/;
1141 $initiator = $1;
1142 last;
1143 }
1144 $fh->close();
1145
1146 return $initiator;
1147 }
1148
1149 sub print_drive_full {
1150 my ($storecfg, $vmid, $drive) = @_;
1151
1152 my $path;
1153 my $volid = $drive->{file};
1154 my $format;
1155
1156 if (drive_is_cdrom($drive)) {
1157 $path = get_iso_path($storecfg, $vmid, $volid);
1158 } else {
1159 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
1160 if ($storeid) {
1161 $path = PVE::Storage::path($storecfg, $volid);
1162 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
1163 $format = qemu_img_format($scfg, $volname);
1164 } else {
1165 $path = $volid;
1166 }
1167 }
1168
1169 my $opts = '';
1170 foreach my $o (@qemu_drive_options) {
1171 next if $o eq 'bootindex';
1172 $opts .= ",$o=$drive->{$o}" if $drive->{$o};
1173 }
1174
1175 $opts .= ",format=$format" if $format && !$drive->{format};
1176
1177 foreach my $o (qw(bps bps_rd bps_wr)) {
1178 my $v = $drive->{"m$o"};
1179 $opts .= ",$o=" . int($v*1024*1024) if $v;
1180 }
1181
1182 my $cache_direct = 0;
1183
1184 if (my $cache = $drive->{cache}) {
1185 $cache_direct = $cache =~ /^(?:off|none|directsync)$/;
1186 } elsif (!drive_is_cdrom($drive)) {
1187 $opts .= ",cache=none";
1188 $cache_direct = 1;
1189 }
1190
1191 # aio native works only with O_DIRECT
1192 if (!$drive->{aio}) {
1193 if($cache_direct) {
1194 $opts .= ",aio=native";
1195 } else {
1196 $opts .= ",aio=threads";
1197 }
1198 }
1199
1200 my $detectzeroes = $drive->{discard} ? "unmap" : "on";
1201 $opts .= ",detect-zeroes=$detectzeroes" if !drive_is_cdrom($drive);
1202
1203 my $pathinfo = $path ? "file=$path," : '';
1204
1205 return "${pathinfo}if=none,id=drive-$drive->{interface}$drive->{index}$opts";
1206 }
1207
1208 sub print_netdevice_full {
1209 my ($vmid, $conf, $net, $netid, $bridges) = @_;
1210
1211 my $bootorder = $conf->{boot} || $confdesc->{boot}->{default};
1212
1213 my $device = $net->{model};
1214 if ($net->{model} eq 'virtio') {
1215 $device = 'virtio-net-pci';
1216 };
1217
1218 my $pciaddr = print_pci_addr("$netid", $bridges);
1219 my $tmpstr = "$device,mac=$net->{macaddr},netdev=$netid$pciaddr,id=$netid";
1220 if ($net->{queues} && $net->{queues} > 1 && $net->{model} eq 'virtio'){
1221 #Consider we have N queues, the number of vectors needed is 2*N + 2 (plus one config interrupt and control vq)
1222 my $vectors = $net->{queues} * 2 + 2;
1223 $tmpstr .= ",vectors=$vectors,mq=on";
1224 }
1225 $tmpstr .= ",bootindex=$net->{bootindex}" if $net->{bootindex} ;
1226 return $tmpstr;
1227 }
1228
1229 sub print_netdev_full {
1230 my ($vmid, $conf, $net, $netid) = @_;
1231
1232 my $i = '';
1233 if ($netid =~ m/^net(\d+)$/) {
1234 $i = int($1);
1235 }
1236
1237 die "got strange net id '$i'\n" if $i >= ${MAX_NETS};
1238
1239 my $ifname = "tap${vmid}i$i";
1240
1241 # kvm uses TUNSETIFF ioctl, and that limits ifname length
1242 die "interface name '$ifname' is too long (max 15 character)\n"
1243 if length($ifname) >= 16;
1244
1245 my $vhostparam = '';
1246 $vhostparam = ',vhost=on' if $kernel_has_vhost_net && $net->{model} eq 'virtio';
1247
1248 my $vmname = $conf->{name} || "vm$vmid";
1249
1250 my $netdev = "";
1251
1252 if ($net->{bridge}) {
1253 $netdev = "type=tap,id=$netid,ifname=${ifname},script=/var/lib/qemu-server/pve-bridge,downscript=/var/lib/qemu-server/pve-bridgedown$vhostparam";
1254 } else {
1255 $netdev = "type=user,id=$netid,hostname=$vmname";
1256 }
1257
1258 $netdev .= ",queues=$net->{queues}" if ($net->{queues} && $net->{model} eq 'virtio');
1259
1260 return $netdev;
1261 }
1262
1263 sub drive_is_cdrom {
1264 my ($drive) = @_;
1265
1266 return $drive && $drive->{media} && ($drive->{media} eq 'cdrom');
1267
1268 }
1269
1270 sub parse_numa {
1271 my ($data) = @_;
1272
1273 my $res = {};
1274
1275 foreach my $kvp (split(/,/, $data)) {
1276
1277 if ($kvp =~ m/^memory=(\S+)$/) {
1278 $res->{memory} = $1;
1279 } elsif ($kvp =~ m/^policy=(preferred|bind|interleave)$/) {
1280 $res->{policy} = $1;
1281 } elsif ($kvp =~ m/^cpus=(\d+)(-(\d+))?$/) {
1282 $res->{cpus}->{start} = $1;
1283 $res->{cpus}->{end} = $3;
1284 } elsif ($kvp =~ m/^hostnodes=(\d+)(-(\d+))?$/) {
1285 $res->{hostnodes}->{start} = $1;
1286 $res->{hostnodes}->{end} = $3;
1287 } else {
1288 return undef;
1289 }
1290 }
1291
1292 return $res;
1293 }
1294
1295 sub parse_hostpci {
1296 my ($value) = @_;
1297
1298 return undef if !$value;
1299
1300
1301 my @list = split(/,/, $value);
1302 my $found;
1303
1304 my $res = {};
1305 foreach my $kv (@list) {
1306
1307 if ($kv =~ m/^(host=)?([a-f0-9]{2}:[a-f0-9]{2})(\.([a-f0-9]))?$/) {
1308 $found = 1;
1309 if(defined($4)){
1310 push @{$res->{pciid}}, { id => $2 , function => $4};
1311
1312 }else{
1313 my $pcidevices = lspci($2);
1314 $res->{pciid} = $pcidevices->{$2};
1315 }
1316 } elsif ($kv =~ m/^driver=(kvm|vfio)$/) {
1317 $res->{driver} = $1;
1318 } elsif ($kv =~ m/^rombar=(on|off)$/) {
1319 $res->{rombar} = $1;
1320 } elsif ($kv =~ m/^x-vga=(on|off)$/) {
1321 $res->{'x-vga'} = $1;
1322 } elsif ($kv =~ m/^pcie=(\d+)$/) {
1323 $res->{pcie} = 1 if $1 == 1;
1324 } else {
1325 warn "unknown hostpci setting '$kv'\n";
1326 }
1327 }
1328
1329 return undef if !$found;
1330
1331 return $res;
1332 }
1333
1334 # netX: e1000=XX:XX:XX:XX:XX:XX,bridge=vmbr0,rate=<mbps>
1335 sub parse_net {
1336 my ($data) = @_;
1337
1338 my $res = {};
1339
1340 foreach my $kvp (split(/,/, $data)) {
1341
1342 if ($kvp =~ m/^(ne2k_pci|e1000|e1000-82540em|e1000-82544gc|e1000-82545em|rtl8139|pcnet|virtio|ne2k_isa|i82551|i82557b|i82559er|vmxnet3)(=([0-9a-f]{2}(:[0-9a-f]{2}){5}))?$/i) {
1343 my $model = lc($1);
1344 my $mac = defined($3) ? uc($3) : PVE::Tools::random_ether_addr();
1345 $res->{model} = $model;
1346 $res->{macaddr} = $mac;
1347 } elsif ($kvp =~ m/^bridge=(\S+)$/) {
1348 $res->{bridge} = $1;
1349 } elsif ($kvp =~ m/^queues=(\d+)$/) {
1350 $res->{queues} = $1;
1351 } elsif ($kvp =~ m/^rate=(\d+(\.\d+)?)$/) {
1352 $res->{rate} = $1;
1353 } elsif ($kvp =~ m/^tag=(\d+)$/) {
1354 $res->{tag} = $1;
1355 } elsif ($kvp =~ m/^firewall=([01])$/) {
1356 $res->{firewall} = $1;
1357 } elsif ($kvp =~ m/^link_down=([01])$/) {
1358 $res->{link_down} = $1;
1359 } else {
1360 return undef;
1361 }
1362
1363 }
1364
1365 return undef if !$res->{model};
1366
1367 return $res;
1368 }
1369
1370 sub print_net {
1371 my $net = shift;
1372
1373 my $res = "$net->{model}";
1374 $res .= "=$net->{macaddr}" if $net->{macaddr};
1375 $res .= ",bridge=$net->{bridge}" if $net->{bridge};
1376 $res .= ",rate=$net->{rate}" if $net->{rate};
1377 $res .= ",tag=$net->{tag}" if $net->{tag};
1378 $res .= ",firewall=1" if $net->{firewall};
1379 $res .= ",link_down=1" if $net->{link_down};
1380 $res .= ",queues=$net->{queues}" if $net->{queues};
1381
1382 return $res;
1383 }
1384
1385 sub add_random_macs {
1386 my ($settings) = @_;
1387
1388 foreach my $opt (keys %$settings) {
1389 next if $opt !~ m/^net(\d+)$/;
1390 my $net = parse_net($settings->{$opt});
1391 next if !$net;
1392 $settings->{$opt} = print_net($net);
1393 }
1394 }
1395
1396 sub add_unused_volume {
1397 my ($config, $volid) = @_;
1398
1399 my $key;
1400 for (my $ind = $MAX_UNUSED_DISKS - 1; $ind >= 0; $ind--) {
1401 my $test = "unused$ind";
1402 if (my $vid = $config->{$test}) {
1403 return if $vid eq $volid; # do not add duplicates
1404 } else {
1405 $key = $test;
1406 }
1407 }
1408
1409 die "To many unused volume - please delete them first.\n" if !$key;
1410
1411 $config->{$key} = $volid;
1412
1413 return $key;
1414 }
1415
1416 sub vm_is_volid_owner {
1417 my ($storecfg, $vmid, $volid) = @_;
1418
1419 if ($volid !~ m|^/|) {
1420 my ($path, $owner);
1421 eval { ($path, $owner) = PVE::Storage::path($storecfg, $volid); };
1422 if ($owner && ($owner == $vmid)) {
1423 return 1;
1424 }
1425 }
1426
1427 return undef;
1428 }
1429
1430 sub split_flagged_list {
1431 my $text = shift || '';
1432 $text =~ s/[,;]/ /g;
1433 $text =~ s/^\s+//;
1434 return { map { /^(!?)(.*)$/ && ($2, $1) } ($text =~ /\S+/g) };
1435 }
1436
1437 sub join_flagged_list {
1438 my ($how, $lst) = @_;
1439 join $how, map { $lst->{$_} . $_ } keys %$lst;
1440 }
1441
1442 sub vmconfig_delete_pending_option {
1443 my ($conf, $key, $force) = @_;
1444
1445 delete $conf->{pending}->{$key};
1446 my $pending_delete_hash = split_flagged_list($conf->{pending}->{delete});
1447 $pending_delete_hash->{$key} = $force ? '!' : '';
1448 $conf->{pending}->{delete} = join_flagged_list(',', $pending_delete_hash);
1449 }
1450
1451 sub vmconfig_undelete_pending_option {
1452 my ($conf, $key) = @_;
1453
1454 my $pending_delete_hash = split_flagged_list($conf->{pending}->{delete});
1455 delete $pending_delete_hash->{$key};
1456
1457 if (%$pending_delete_hash) {
1458 $conf->{pending}->{delete} = join_flagged_list(',', $pending_delete_hash);
1459 } else {
1460 delete $conf->{pending}->{delete};
1461 }
1462 }
1463
1464 sub vmconfig_register_unused_drive {
1465 my ($storecfg, $vmid, $conf, $drive) = @_;
1466
1467 if (!drive_is_cdrom($drive)) {
1468 my $volid = $drive->{file};
1469 if (vm_is_volid_owner($storecfg, $vmid, $volid)) {
1470 add_unused_volume($conf, $volid, $vmid);
1471 }
1472 }
1473 }
1474
1475 sub vmconfig_cleanup_pending {
1476 my ($conf) = @_;
1477
1478 # remove pending changes when nothing changed
1479 my $changes;
1480 foreach my $opt (keys %{$conf->{pending}}) {
1481 if (defined($conf->{$opt}) && ($conf->{pending}->{$opt} eq $conf->{$opt})) {
1482 $changes = 1;
1483 delete $conf->{pending}->{$opt};
1484 }
1485 }
1486
1487 my $current_delete_hash = split_flagged_list($conf->{pending}->{delete});
1488 my $pending_delete_hash = {};
1489 while (my ($opt, $force) = each %$current_delete_hash) {
1490 if (defined($conf->{$opt})) {
1491 $pending_delete_hash->{$opt} = $force;
1492 } else {
1493 $changes = 1;
1494 }
1495 }
1496
1497 if (%$pending_delete_hash) {
1498 $conf->{pending}->{delete} = join_flagged_list(',', $pending_delete_hash);
1499 } else {
1500 delete $conf->{pending}->{delete};
1501 }
1502
1503 return $changes;
1504 }
1505
1506 my $valid_smbios1_options = {
1507 manufacturer => '\S+',
1508 product => '\S+',
1509 version => '\S+',
1510 serial => '\S+',
1511 uuid => '[a-fA-F0-9]{8}(?:-[a-fA-F0-9]{4}){3}-[a-fA-F0-9]{12}',
1512 sku => '\S+',
1513 family => '\S+',
1514 };
1515
1516 # smbios: [manufacturer=str][,product=str][,version=str][,serial=str][,uuid=uuid][,sku=str][,family=str]
1517 sub parse_smbios1 {
1518 my ($data) = @_;
1519
1520 my $res = {};
1521
1522 foreach my $kvp (split(/,/, $data)) {
1523 return undef if $kvp !~ m/^(\S+)=(.+)$/;
1524 my ($k, $v) = split(/=/, $kvp);
1525 return undef if !defined($k) || !defined($v);
1526 return undef if !$valid_smbios1_options->{$k};
1527 return undef if $v !~ m/^$valid_smbios1_options->{$k}$/;
1528 $res->{$k} = $v;
1529 }
1530
1531 return $res;
1532 }
1533
1534 sub print_smbios1 {
1535 my ($smbios1) = @_;
1536
1537 my $data = '';
1538 foreach my $k (keys %$smbios1) {
1539 next if !defined($smbios1->{$k});
1540 next if !$valid_smbios1_options->{$k};
1541 $data .= ',' if $data;
1542 $data .= "$k=$smbios1->{$k}";
1543 }
1544 return $data;
1545 }
1546
1547 PVE::JSONSchema::register_format('pve-qm-smbios1', \&verify_smbios1);
1548 sub verify_smbios1 {
1549 my ($value, $noerr) = @_;
1550
1551 return $value if parse_smbios1($value);
1552
1553 return undef if $noerr;
1554
1555 die "unable to parse smbios (type 1) options\n";
1556 }
1557
1558 PVE::JSONSchema::register_format('pve-qm-bootdisk', \&verify_bootdisk);
1559 sub verify_bootdisk {
1560 my ($value, $noerr) = @_;
1561
1562 return $value if valid_drivename($value);
1563
1564 return undef if $noerr;
1565
1566 die "invalid boot disk '$value'\n";
1567 }
1568
1569 PVE::JSONSchema::register_format('pve-qm-numanode', \&verify_numa);
1570 sub verify_numa {
1571 my ($value, $noerr) = @_;
1572
1573 return $value if parse_numa($value);
1574
1575 return undef if $noerr;
1576
1577 die "unable to parse numa options\n";
1578 }
1579
1580 PVE::JSONSchema::register_format('pve-qm-net', \&verify_net);
1581 sub verify_net {
1582 my ($value, $noerr) = @_;
1583
1584 return $value if parse_net($value);
1585
1586 return undef if $noerr;
1587
1588 die "unable to parse network options\n";
1589 }
1590
1591 PVE::JSONSchema::register_format('pve-qm-drive', \&verify_drive);
1592 sub verify_drive {
1593 my ($value, $noerr) = @_;
1594
1595 return $value if parse_drive(undef, $value);
1596
1597 return undef if $noerr;
1598
1599 die "unable to parse drive options\n";
1600 }
1601
1602 PVE::JSONSchema::register_format('pve-qm-hostpci', \&verify_hostpci);
1603 sub verify_hostpci {
1604 my ($value, $noerr) = @_;
1605
1606 return $value if parse_hostpci($value);
1607
1608 return undef if $noerr;
1609
1610 die "unable to parse pci id\n";
1611 }
1612
1613 PVE::JSONSchema::register_format('pve-qm-watchdog', \&verify_watchdog);
1614 sub verify_watchdog {
1615 my ($value, $noerr) = @_;
1616
1617 return $value if parse_watchdog($value);
1618
1619 return undef if $noerr;
1620
1621 die "unable to parse watchdog options\n";
1622 }
1623
1624 sub parse_watchdog {
1625 my ($value) = @_;
1626
1627 return undef if !$value;
1628
1629 my $res = {};
1630
1631 foreach my $p (split(/,/, $value)) {
1632 next if $p =~ m/^\s*$/;
1633
1634 if ($p =~ m/^(model=)?(i6300esb|ib700)$/) {
1635 $res->{model} = $2;
1636 } elsif ($p =~ m/^(action=)?(reset|shutdown|poweroff|pause|debug|none)$/) {
1637 $res->{action} = $2;
1638 } else {
1639 return undef;
1640 }
1641 }
1642
1643 return $res;
1644 }
1645
1646 sub parse_usb_device {
1647 my ($value) = @_;
1648
1649 return undef if !$value;
1650
1651 my @dl = split(/,/, $value);
1652 my $found;
1653
1654 my $res = {};
1655 foreach my $v (@dl) {
1656 if ($v =~ m/^host=(0x)?([0-9A-Fa-f]{4}):(0x)?([0-9A-Fa-f]{4})$/) {
1657 $found = 1;
1658 $res->{vendorid} = $2;
1659 $res->{productid} = $4;
1660 } elsif ($v =~ m/^host=(\d+)\-(\d+(\.\d+)*)$/) {
1661 $found = 1;
1662 $res->{hostbus} = $1;
1663 $res->{hostport} = $2;
1664 } elsif ($v =~ m/^spice$/) {
1665 $found = 1;
1666 $res->{spice} = 1;
1667 } else {
1668 return undef;
1669 }
1670 }
1671 return undef if !$found;
1672
1673 return $res;
1674 }
1675
1676 PVE::JSONSchema::register_format('pve-qm-usb-device', \&verify_usb_device);
1677 sub verify_usb_device {
1678 my ($value, $noerr) = @_;
1679
1680 return $value if parse_usb_device($value);
1681
1682 return undef if $noerr;
1683
1684 die "unable to parse usb device\n";
1685 }
1686
1687 # add JSON properties for create and set function
1688 sub json_config_properties {
1689 my $prop = shift;
1690
1691 foreach my $opt (keys %$confdesc) {
1692 next if $opt eq 'parent' || $opt eq 'snaptime' || $opt eq 'vmstate';
1693 $prop->{$opt} = $confdesc->{$opt};
1694 }
1695
1696 return $prop;
1697 }
1698
1699 sub check_type {
1700 my ($key, $value) = @_;
1701
1702 die "unknown setting '$key'\n" if !$confdesc->{$key};
1703
1704 my $type = $confdesc->{$key}->{type};
1705
1706 if (!defined($value)) {
1707 die "got undefined value\n";
1708 }
1709
1710 if ($value =~ m/[\n\r]/) {
1711 die "property contains a line feed\n";
1712 }
1713
1714 if ($type eq 'boolean') {
1715 return 1 if ($value eq '1') || ($value =~ m/^(on|yes|true)$/i);
1716 return 0 if ($value eq '0') || ($value =~ m/^(off|no|false)$/i);
1717 die "type check ('boolean') failed - got '$value'\n";
1718 } elsif ($type eq 'integer') {
1719 return int($1) if $value =~ m/^(\d+)$/;
1720 die "type check ('integer') failed - got '$value'\n";
1721 } elsif ($type eq 'number') {
1722 return $value if $value =~ m/^(\d+)(\.\d+)?$/;
1723 die "type check ('number') failed - got '$value'\n";
1724 } elsif ($type eq 'string') {
1725 if (my $fmt = $confdesc->{$key}->{format}) {
1726 if ($fmt eq 'pve-qm-drive') {
1727 # special case - we need to pass $key to parse_drive()
1728 my $drive = parse_drive($key, $value);
1729 return $value if $drive;
1730 die "unable to parse drive options\n";
1731 }
1732 PVE::JSONSchema::check_format($fmt, $value);
1733 return $value;
1734 }
1735 $value =~ s/^\"(.*)\"$/$1/;
1736 return $value;
1737 } else {
1738 die "internal error"
1739 }
1740 }
1741
1742 sub lock_config_full {
1743 my ($vmid, $timeout, $code, @param) = @_;
1744
1745 my $filename = config_file_lock($vmid);
1746
1747 my $res = lock_file($filename, $timeout, $code, @param);
1748
1749 die $@ if $@;
1750
1751 return $res;
1752 }
1753
1754 sub lock_config_mode {
1755 my ($vmid, $timeout, $shared, $code, @param) = @_;
1756
1757 my $filename = config_file_lock($vmid);
1758
1759 my $res = lock_file_full($filename, $timeout, $shared, $code, @param);
1760
1761 die $@ if $@;
1762
1763 return $res;
1764 }
1765
1766 sub lock_config {
1767 my ($vmid, $code, @param) = @_;
1768
1769 return lock_config_full($vmid, 10, $code, @param);
1770 }
1771
1772 sub cfs_config_path {
1773 my ($vmid, $node) = @_;
1774
1775 $node = $nodename if !$node;
1776 return "nodes/$node/qemu-server/$vmid.conf";
1777 }
1778
1779 sub check_iommu_support{
1780 #fixme : need to check IOMMU support
1781 #http://www.linux-kvm.org/page/How_to_assign_devices_with_VT-d_in_KVM
1782
1783 my $iommu=1;
1784 return $iommu;
1785
1786 }
1787
1788 sub config_file {
1789 my ($vmid, $node) = @_;
1790
1791 my $cfspath = cfs_config_path($vmid, $node);
1792 return "/etc/pve/$cfspath";
1793 }
1794
1795 sub config_file_lock {
1796 my ($vmid) = @_;
1797
1798 return "$lock_dir/lock-$vmid.conf";
1799 }
1800
1801 sub touch_config {
1802 my ($vmid) = @_;
1803
1804 my $conf = config_file($vmid);
1805 utime undef, undef, $conf;
1806 }
1807
1808 sub destroy_vm {
1809 my ($storecfg, $vmid, $keep_empty_config) = @_;
1810
1811 my $conffile = config_file($vmid);
1812
1813 my $conf = load_config($vmid);
1814
1815 check_lock($conf);
1816
1817 # only remove disks owned by this VM
1818 foreach_drive($conf, sub {
1819 my ($ds, $drive) = @_;
1820
1821 return if drive_is_cdrom($drive);
1822
1823 my $volid = $drive->{file};
1824
1825 return if !$volid || $volid =~ m|^/|;
1826
1827 my ($path, $owner) = PVE::Storage::path($storecfg, $volid);
1828 return if !$path || !$owner || ($owner != $vmid);
1829
1830 PVE::Storage::vdisk_free($storecfg, $volid);
1831 });
1832
1833 if ($keep_empty_config) {
1834 PVE::Tools::file_set_contents($conffile, "memory: 128\n");
1835 } else {
1836 unlink $conffile;
1837 }
1838
1839 # also remove unused disk
1840 eval {
1841 my $dl = PVE::Storage::vdisk_list($storecfg, undef, $vmid);
1842
1843 eval {
1844 PVE::Storage::foreach_volid($dl, sub {
1845 my ($volid, $sid, $volname, $d) = @_;
1846 PVE::Storage::vdisk_free($storecfg, $volid);
1847 });
1848 };
1849 warn $@ if $@;
1850
1851 };
1852 warn $@ if $@;
1853 }
1854
1855 sub load_config {
1856 my ($vmid, $node) = @_;
1857
1858 my $cfspath = cfs_config_path($vmid, $node);
1859
1860 my $conf = PVE::Cluster::cfs_read_file($cfspath);
1861
1862 die "no such VM ('$vmid')\n" if !defined($conf);
1863
1864 return $conf;
1865 }
1866
1867 sub parse_vm_config {
1868 my ($filename, $raw) = @_;
1869
1870 return undef if !defined($raw);
1871
1872 my $res = {
1873 digest => Digest::SHA::sha1_hex($raw),
1874 snapshots => {},
1875 pending => {},
1876 };
1877
1878 $filename =~ m|/qemu-server/(\d+)\.conf$|
1879 || die "got strange filename '$filename'";
1880
1881 my $vmid = $1;
1882
1883 my $conf = $res;
1884 my $descr;
1885 my $section = '';
1886
1887 my @lines = split(/\n/, $raw);
1888 foreach my $line (@lines) {
1889 next if $line =~ m/^\s*$/;
1890
1891 if ($line =~ m/^\[PENDING\]\s*$/i) {
1892 $section = 'pending';
1893 if (defined($descr)) {
1894 $descr =~ s/\s+$//;
1895 $conf->{description} = $descr;
1896 }
1897 $descr = undef;
1898 $conf = $res->{$section} = {};
1899 next;
1900
1901 } elsif ($line =~ m/^\[([a-z][a-z0-9_\-]+)\]\s*$/i) {
1902 $section = $1;
1903 if (defined($descr)) {
1904 $descr =~ s/\s+$//;
1905 $conf->{description} = $descr;
1906 }
1907 $descr = undef;
1908 $conf = $res->{snapshots}->{$section} = {};
1909 next;
1910 }
1911
1912 if ($line =~ m/^\#(.*)\s*$/) {
1913 $descr = '' if !defined($descr);
1914 $descr .= PVE::Tools::decode_text($1) . "\n";
1915 next;
1916 }
1917
1918 if ($line =~ m/^(description):\s*(.*\S)\s*$/) {
1919 $descr = '' if !defined($descr);
1920 $descr .= PVE::Tools::decode_text($2);
1921 } elsif ($line =~ m/snapstate:\s*(prepare|delete)\s*$/) {
1922 $conf->{snapstate} = $1;
1923 } elsif ($line =~ m/^(args):\s*(.*\S)\s*$/) {
1924 my $key = $1;
1925 my $value = $2;
1926 $conf->{$key} = $value;
1927 } elsif ($line =~ m/^delete:\s*(.*\S)\s*$/) {
1928 my $value = $1;
1929 if ($section eq 'pending') {
1930 $conf->{delete} = $value; # we parse this later
1931 } else {
1932 warn "vm $vmid - propertry 'delete' is only allowed in [PENDING]\n";
1933 }
1934 } elsif ($line =~ m/^([a-z][a-z_]*\d*):\s*(\S+)\s*$/) {
1935 my $key = $1;
1936 my $value = $2;
1937 eval { $value = check_type($key, $value); };
1938 if ($@) {
1939 warn "vm $vmid - unable to parse value of '$key' - $@";
1940 } else {
1941 my $fmt = $confdesc->{$key}->{format};
1942 if ($fmt && $fmt eq 'pve-qm-drive') {
1943 my $v = parse_drive($key, $value);
1944 if (my $volid = filename_to_volume_id($vmid, $v->{file}, $v->{media})) {
1945 $v->{file} = $volid;
1946 $value = print_drive($vmid, $v);
1947 } else {
1948 warn "vm $vmid - unable to parse value of '$key'\n";
1949 next;
1950 }
1951 }
1952
1953 if ($key eq 'cdrom') {
1954 $conf->{ide2} = $value;
1955 } else {
1956 $conf->{$key} = $value;
1957 }
1958 }
1959 }
1960 }
1961
1962 if (defined($descr)) {
1963 $descr =~ s/\s+$//;
1964 $conf->{description} = $descr;
1965 }
1966 delete $res->{snapstate}; # just to be sure
1967
1968 return $res;
1969 }
1970
1971 sub write_vm_config {
1972 my ($filename, $conf) = @_;
1973
1974 delete $conf->{snapstate}; # just to be sure
1975
1976 if ($conf->{cdrom}) {
1977 die "option ide2 conflicts with cdrom\n" if $conf->{ide2};
1978 $conf->{ide2} = $conf->{cdrom};
1979 delete $conf->{cdrom};
1980 }
1981
1982 # we do not use 'smp' any longer
1983 if ($conf->{sockets}) {
1984 delete $conf->{smp};
1985 } elsif ($conf->{smp}) {
1986 $conf->{sockets} = $conf->{smp};
1987 delete $conf->{cores};
1988 delete $conf->{smp};
1989 }
1990
1991 my $used_volids = {};
1992
1993 my $cleanup_config = sub {
1994 my ($cref, $pending, $snapname) = @_;
1995
1996 foreach my $key (keys %$cref) {
1997 next if $key eq 'digest' || $key eq 'description' || $key eq 'snapshots' ||
1998 $key eq 'snapstate' || $key eq 'pending';
1999 my $value = $cref->{$key};
2000 if ($key eq 'delete') {
2001 die "propertry 'delete' is only allowed in [PENDING]\n"
2002 if !$pending;
2003 # fixme: check syntax?
2004 next;
2005 }
2006 eval { $value = check_type($key, $value); };
2007 die "unable to parse value of '$key' - $@" if $@;
2008
2009 $cref->{$key} = $value;
2010
2011 if (!$snapname && valid_drivename($key)) {
2012 my $drive = parse_drive($key, $value);
2013 $used_volids->{$drive->{file}} = 1 if $drive && $drive->{file};
2014 }
2015 }
2016 };
2017
2018 &$cleanup_config($conf);
2019
2020 &$cleanup_config($conf->{pending}, 1);
2021
2022 foreach my $snapname (keys %{$conf->{snapshots}}) {
2023 die "internal error" if $snapname eq 'pending';
2024 &$cleanup_config($conf->{snapshots}->{$snapname}, undef, $snapname);
2025 }
2026
2027 # remove 'unusedX' settings if we re-add a volume
2028 foreach my $key (keys %$conf) {
2029 my $value = $conf->{$key};
2030 if ($key =~ m/^unused/ && $used_volids->{$value}) {
2031 delete $conf->{$key};
2032 }
2033 }
2034
2035 my $generate_raw_config = sub {
2036 my ($conf, $pending) = @_;
2037
2038 my $raw = '';
2039
2040 # add description as comment to top of file
2041 if (defined(my $descr = $conf->{description})) {
2042 if ($descr) {
2043 foreach my $cl (split(/\n/, $descr)) {
2044 $raw .= '#' . PVE::Tools::encode_text($cl) . "\n";
2045 }
2046 } else {
2047 $raw .= "#\n" if $pending;
2048 }
2049 }
2050
2051 foreach my $key (sort keys %$conf) {
2052 next if $key eq 'digest' || $key eq 'description' || $key eq 'pending' || $key eq 'snapshots';
2053 $raw .= "$key: $conf->{$key}\n";
2054 }
2055 return $raw;
2056 };
2057
2058 my $raw = &$generate_raw_config($conf);
2059
2060 if (scalar(keys %{$conf->{pending}})){
2061 $raw .= "\n[PENDING]\n";
2062 $raw .= &$generate_raw_config($conf->{pending}, 1);
2063 }
2064
2065 foreach my $snapname (sort keys %{$conf->{snapshots}}) {
2066 $raw .= "\n[$snapname]\n";
2067 $raw .= &$generate_raw_config($conf->{snapshots}->{$snapname});
2068 }
2069
2070 return $raw;
2071 }
2072
2073 sub update_config_nolock {
2074 my ($vmid, $conf, $skiplock) = @_;
2075
2076 check_lock($conf) if !$skiplock;
2077
2078 my $cfspath = cfs_config_path($vmid);
2079
2080 PVE::Cluster::cfs_write_file($cfspath, $conf);
2081 }
2082
2083 sub update_config {
2084 my ($vmid, $conf, $skiplock) = @_;
2085
2086 lock_config($vmid, &update_config_nolock, $conf, $skiplock);
2087 }
2088
2089 sub load_defaults {
2090
2091 my $res = {};
2092
2093 # we use static defaults from our JSON schema configuration
2094 foreach my $key (keys %$confdesc) {
2095 if (defined(my $default = $confdesc->{$key}->{default})) {
2096 $res->{$key} = $default;
2097 }
2098 }
2099
2100 my $conf = PVE::Cluster::cfs_read_file('datacenter.cfg');
2101 $res->{keyboard} = $conf->{keyboard} if $conf->{keyboard};
2102
2103 return $res;
2104 }
2105
2106 sub config_list {
2107 my $vmlist = PVE::Cluster::get_vmlist();
2108 my $res = {};
2109 return $res if !$vmlist || !$vmlist->{ids};
2110 my $ids = $vmlist->{ids};
2111
2112 foreach my $vmid (keys %$ids) {
2113 my $d = $ids->{$vmid};
2114 next if !$d->{node} || $d->{node} ne $nodename;
2115 next if !$d->{type} || $d->{type} ne 'qemu';
2116 $res->{$vmid}->{exists} = 1;
2117 }
2118 return $res;
2119 }
2120
2121 # test if VM uses local resources (to prevent migration)
2122 sub check_local_resources {
2123 my ($conf, $noerr) = @_;
2124
2125 my $loc_res = 0;
2126
2127 $loc_res = 1 if $conf->{hostusb}; # old syntax
2128 $loc_res = 1 if $conf->{hostpci}; # old syntax
2129
2130 foreach my $k (keys %$conf) {
2131 next if $k =~ m/^usb/ && ($conf->{$k} eq 'spice');
2132 $loc_res = 1 if $k =~ m/^(usb|hostpci|serial|parallel)\d+$/;
2133 }
2134
2135 die "VM uses local resources\n" if $loc_res && !$noerr;
2136
2137 return $loc_res;
2138 }
2139
2140 # check if used storages are available on all nodes (use by migrate)
2141 sub check_storage_availability {
2142 my ($storecfg, $conf, $node) = @_;
2143
2144 foreach_drive($conf, sub {
2145 my ($ds, $drive) = @_;
2146
2147 my $volid = $drive->{file};
2148 return if !$volid;
2149
2150 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2151 return if !$sid;
2152
2153 # check if storage is available on both nodes
2154 my $scfg = PVE::Storage::storage_check_node($storecfg, $sid);
2155 PVE::Storage::storage_check_node($storecfg, $sid, $node);
2156 });
2157 }
2158
2159 # list nodes where all VM images are available (used by has_feature API)
2160 sub shared_nodes {
2161 my ($conf, $storecfg) = @_;
2162
2163 my $nodelist = PVE::Cluster::get_nodelist();
2164 my $nodehash = { map { $_ => 1 } @$nodelist };
2165 my $nodename = PVE::INotify::nodename();
2166
2167 foreach_drive($conf, sub {
2168 my ($ds, $drive) = @_;
2169
2170 my $volid = $drive->{file};
2171 return if !$volid;
2172
2173 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2174 if ($storeid) {
2175 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
2176 if ($scfg->{disable}) {
2177 $nodehash = {};
2178 } elsif (my $avail = $scfg->{nodes}) {
2179 foreach my $node (keys %$nodehash) {
2180 delete $nodehash->{$node} if !$avail->{$node};
2181 }
2182 } elsif (!$scfg->{shared}) {
2183 foreach my $node (keys %$nodehash) {
2184 delete $nodehash->{$node} if $node ne $nodename
2185 }
2186 }
2187 }
2188 });
2189
2190 return $nodehash
2191 }
2192
2193 sub check_lock {
2194 my ($conf) = @_;
2195
2196 die "VM is locked ($conf->{lock})\n" if $conf->{lock};
2197 }
2198
2199 sub check_cmdline {
2200 my ($pidfile, $pid) = @_;
2201
2202 my $fh = IO::File->new("/proc/$pid/cmdline", "r");
2203 if (defined($fh)) {
2204 my $line = <$fh>;
2205 $fh->close;
2206 return undef if !$line;
2207 my @param = split(/\0/, $line);
2208
2209 my $cmd = $param[0];
2210 return if !$cmd || ($cmd !~ m|kvm$| && $cmd !~ m|qemu-system-x86_64$|);
2211
2212 for (my $i = 0; $i < scalar (@param); $i++) {
2213 my $p = $param[$i];
2214 next if !$p;
2215 if (($p eq '-pidfile') || ($p eq '--pidfile')) {
2216 my $p = $param[$i+1];
2217 return 1 if $p && ($p eq $pidfile);
2218 return undef;
2219 }
2220 }
2221 }
2222 return undef;
2223 }
2224
2225 sub check_running {
2226 my ($vmid, $nocheck, $node) = @_;
2227
2228 my $filename = config_file($vmid, $node);
2229
2230 die "unable to find configuration file for VM $vmid - no such machine\n"
2231 if !$nocheck && ! -f $filename;
2232
2233 my $pidfile = pidfile_name($vmid);
2234
2235 if (my $fd = IO::File->new("<$pidfile")) {
2236 my $st = stat($fd);
2237 my $line = <$fd>;
2238 close($fd);
2239
2240 my $mtime = $st->mtime;
2241 if ($mtime > time()) {
2242 warn "file '$filename' modified in future\n";
2243 }
2244
2245 if ($line =~ m/^(\d+)$/) {
2246 my $pid = $1;
2247 if (check_cmdline($pidfile, $pid)) {
2248 if (my $pinfo = PVE::ProcFSTools::check_process_running($pid)) {
2249 return $pid;
2250 }
2251 }
2252 }
2253 }
2254
2255 return undef;
2256 }
2257
2258 sub vzlist {
2259
2260 my $vzlist = config_list();
2261
2262 my $fd = IO::Dir->new($var_run_tmpdir) || return $vzlist;
2263
2264 while (defined(my $de = $fd->read)) {
2265 next if $de !~ m/^(\d+)\.pid$/;
2266 my $vmid = $1;
2267 next if !defined($vzlist->{$vmid});
2268 if (my $pid = check_running($vmid)) {
2269 $vzlist->{$vmid}->{pid} = $pid;
2270 }
2271 }
2272
2273 return $vzlist;
2274 }
2275
2276 sub disksize {
2277 my ($storecfg, $conf) = @_;
2278
2279 my $bootdisk = $conf->{bootdisk};
2280 return undef if !$bootdisk;
2281 return undef if !valid_drivename($bootdisk);
2282
2283 return undef if !$conf->{$bootdisk};
2284
2285 my $drive = parse_drive($bootdisk, $conf->{$bootdisk});
2286 return undef if !defined($drive);
2287
2288 return undef if drive_is_cdrom($drive);
2289
2290 my $volid = $drive->{file};
2291 return undef if !$volid;
2292
2293 return $drive->{size};
2294 }
2295
2296 my $last_proc_pid_stat;
2297
2298 # get VM status information
2299 # This must be fast and should not block ($full == false)
2300 # We only query KVM using QMP if $full == true (this can be slow)
2301 sub vmstatus {
2302 my ($opt_vmid, $full) = @_;
2303
2304 my $res = {};
2305
2306 my $storecfg = PVE::Storage::config();
2307
2308 my $list = vzlist();
2309 my ($uptime) = PVE::ProcFSTools::read_proc_uptime(1);
2310
2311 my $cpucount = $cpuinfo->{cpus} || 1;
2312
2313 foreach my $vmid (keys %$list) {
2314 next if $opt_vmid && ($vmid ne $opt_vmid);
2315
2316 my $cfspath = cfs_config_path($vmid);
2317 my $conf = PVE::Cluster::cfs_read_file($cfspath) || {};
2318
2319 my $d = {};
2320 $d->{pid} = $list->{$vmid}->{pid};
2321
2322 # fixme: better status?
2323 $d->{status} = $list->{$vmid}->{pid} ? 'running' : 'stopped';
2324
2325 my $size = disksize($storecfg, $conf);
2326 if (defined($size)) {
2327 $d->{disk} = 0; # no info available
2328 $d->{maxdisk} = $size;
2329 } else {
2330 $d->{disk} = 0;
2331 $d->{maxdisk} = 0;
2332 }
2333
2334 $d->{cpus} = ($conf->{sockets} || 1) * ($conf->{cores} || 1);
2335 $d->{cpus} = $cpucount if $d->{cpus} > $cpucount;
2336 $d->{cpus} = $conf->{vcpus} if $conf->{vcpus};
2337
2338 $d->{name} = $conf->{name} || "VM $vmid";
2339 $d->{maxmem} = $conf->{memory} ? $conf->{memory}*(1024*1024) : 0;
2340
2341 if ($conf->{balloon}) {
2342 $d->{balloon_min} = $conf->{balloon}*(1024*1024);
2343 $d->{shares} = defined($conf->{shares}) ? $conf->{shares} : 1000;
2344 }
2345
2346 $d->{uptime} = 0;
2347 $d->{cpu} = 0;
2348 $d->{mem} = 0;
2349
2350 $d->{netout} = 0;
2351 $d->{netin} = 0;
2352
2353 $d->{diskread} = 0;
2354 $d->{diskwrite} = 0;
2355
2356 $d->{template} = is_template($conf);
2357
2358 $res->{$vmid} = $d;
2359 }
2360
2361 my $netdev = PVE::ProcFSTools::read_proc_net_dev();
2362 foreach my $dev (keys %$netdev) {
2363 next if $dev !~ m/^tap([1-9]\d*)i/;
2364 my $vmid = $1;
2365 my $d = $res->{$vmid};
2366 next if !$d;
2367
2368 $d->{netout} += $netdev->{$dev}->{receive};
2369 $d->{netin} += $netdev->{$dev}->{transmit};
2370
2371 if ($full) {
2372 $d->{nics}->{$dev}->{netout} = $netdev->{$dev}->{receive};
2373 $d->{nics}->{$dev}->{netin} = $netdev->{$dev}->{transmit};
2374 }
2375
2376 }
2377
2378 my $ctime = gettimeofday;
2379
2380 foreach my $vmid (keys %$list) {
2381
2382 my $d = $res->{$vmid};
2383 my $pid = $d->{pid};
2384 next if !$pid;
2385
2386 my $pstat = PVE::ProcFSTools::read_proc_pid_stat($pid);
2387 next if !$pstat; # not running
2388
2389 my $used = $pstat->{utime} + $pstat->{stime};
2390
2391 $d->{uptime} = int(($uptime - $pstat->{starttime})/$cpuinfo->{user_hz});
2392
2393 if ($pstat->{vsize}) {
2394 $d->{mem} = int(($pstat->{rss}/$pstat->{vsize})*$d->{maxmem});
2395 }
2396
2397 my $old = $last_proc_pid_stat->{$pid};
2398 if (!$old) {
2399 $last_proc_pid_stat->{$pid} = {
2400 time => $ctime,
2401 used => $used,
2402 cpu => 0,
2403 };
2404 next;
2405 }
2406
2407 my $dtime = ($ctime - $old->{time}) * $cpucount * $cpuinfo->{user_hz};
2408
2409 if ($dtime > 1000) {
2410 my $dutime = $used - $old->{used};
2411
2412 $d->{cpu} = (($dutime/$dtime)* $cpucount) / $d->{cpus};
2413 $last_proc_pid_stat->{$pid} = {
2414 time => $ctime,
2415 used => $used,
2416 cpu => $d->{cpu},
2417 };
2418 } else {
2419 $d->{cpu} = $old->{cpu};
2420 }
2421 }
2422
2423 return $res if !$full;
2424
2425 my $qmpclient = PVE::QMPClient->new();
2426
2427 my $ballooncb = sub {
2428 my ($vmid, $resp) = @_;
2429
2430 my $info = $resp->{'return'};
2431 return if !$info->{max_mem};
2432
2433 my $d = $res->{$vmid};
2434
2435 # use memory assigned to VM
2436 $d->{maxmem} = $info->{max_mem};
2437 $d->{balloon} = $info->{actual};
2438
2439 if (defined($info->{total_mem}) && defined($info->{free_mem})) {
2440 $d->{mem} = $info->{total_mem} - $info->{free_mem};
2441 $d->{freemem} = $info->{free_mem};
2442 }
2443
2444 $d->{ballooninfo} = $info;
2445 };
2446
2447 my $blockstatscb = sub {
2448 my ($vmid, $resp) = @_;
2449 my $data = $resp->{'return'} || [];
2450 my $totalrdbytes = 0;
2451 my $totalwrbytes = 0;
2452
2453 for my $blockstat (@$data) {
2454 $totalrdbytes = $totalrdbytes + $blockstat->{stats}->{rd_bytes};
2455 $totalwrbytes = $totalwrbytes + $blockstat->{stats}->{wr_bytes};
2456
2457 $blockstat->{device} =~ s/drive-//;
2458 $res->{$vmid}->{blockstat}->{$blockstat->{device}} = $blockstat->{stats};
2459 }
2460 $res->{$vmid}->{diskread} = $totalrdbytes;
2461 $res->{$vmid}->{diskwrite} = $totalwrbytes;
2462 };
2463
2464 my $statuscb = sub {
2465 my ($vmid, $resp) = @_;
2466
2467 $qmpclient->queue_cmd($vmid, $blockstatscb, 'query-blockstats');
2468 # this fails if ballon driver is not loaded, so this must be
2469 # the last commnand (following command are aborted if this fails).
2470 $qmpclient->queue_cmd($vmid, $ballooncb, 'query-balloon');
2471
2472 my $status = 'unknown';
2473 if (!defined($status = $resp->{'return'}->{status})) {
2474 warn "unable to get VM status\n";
2475 return;
2476 }
2477
2478 $res->{$vmid}->{qmpstatus} = $resp->{'return'}->{status};
2479 };
2480
2481 foreach my $vmid (keys %$list) {
2482 next if $opt_vmid && ($vmid ne $opt_vmid);
2483 next if !$res->{$vmid}->{pid}; # not running
2484 $qmpclient->queue_cmd($vmid, $statuscb, 'query-status');
2485 }
2486
2487 $qmpclient->queue_execute(undef, 1);
2488
2489 foreach my $vmid (keys %$list) {
2490 next if $opt_vmid && ($vmid ne $opt_vmid);
2491 $res->{$vmid}->{qmpstatus} = $res->{$vmid}->{status} if !$res->{$vmid}->{qmpstatus};
2492 }
2493
2494 return $res;
2495 }
2496
2497 sub foreach_dimm {
2498 my ($conf, $vmid, $memory, $sockets, $func) = @_;
2499
2500 my $dimm_id = 0;
2501 my $current_size = 1024;
2502 my $dimm_size = 512;
2503 return if $current_size == $memory;
2504
2505 for (my $j = 0; $j < 8; $j++) {
2506 for (my $i = 0; $i < 32; $i++) {
2507 my $name = "dimm${dimm_id}";
2508 $dimm_id++;
2509 my $numanode = $i % $sockets;
2510 $current_size += $dimm_size;
2511 &$func($conf, $vmid, $name, $dimm_size, $numanode, $current_size, $memory);
2512 return $current_size if $current_size >= $memory;
2513 }
2514 $dimm_size *= 2;
2515 }
2516 }
2517
2518 sub foreach_reverse_dimm {
2519 my ($conf, $vmid, $memory, $sockets, $func) = @_;
2520
2521 my $dimm_id = 253;
2522 my $current_size = 4177920;
2523 my $dimm_size = 65536;
2524 return if $current_size == $memory;
2525
2526 for (my $j = 0; $j < 8; $j++) {
2527 for (my $i = 0; $i < 32; $i++) {
2528 my $name = "dimm${dimm_id}";
2529 $dimm_id--;
2530 my $numanode = $i % $sockets;
2531 $current_size -= $dimm_size;
2532 &$func($conf, $vmid, $name, $dimm_size, $numanode, $current_size, $memory);
2533 return $current_size if $current_size <= $memory;
2534 }
2535 $dimm_size /= 2;
2536 }
2537 }
2538
2539 sub foreach_drive {
2540 my ($conf, $func) = @_;
2541
2542 foreach my $ds (keys %$conf) {
2543 next if !valid_drivename($ds);
2544
2545 my $drive = parse_drive($ds, $conf->{$ds});
2546 next if !$drive;
2547
2548 &$func($ds, $drive);
2549 }
2550 }
2551
2552 sub foreach_volid {
2553 my ($conf, $func) = @_;
2554
2555 my $volhash = {};
2556
2557 my $test_volid = sub {
2558 my ($volid, $is_cdrom) = @_;
2559
2560 return if !$volid;
2561
2562 $volhash->{$volid} = $is_cdrom || 0;
2563 };
2564
2565 foreach_drive($conf, sub {
2566 my ($ds, $drive) = @_;
2567 &$test_volid($drive->{file}, drive_is_cdrom($drive));
2568 });
2569
2570 foreach my $snapname (keys %{$conf->{snapshots}}) {
2571 my $snap = $conf->{snapshots}->{$snapname};
2572 &$test_volid($snap->{vmstate}, 0);
2573 foreach_drive($snap, sub {
2574 my ($ds, $drive) = @_;
2575 &$test_volid($drive->{file}, drive_is_cdrom($drive));
2576 });
2577 }
2578
2579 foreach my $volid (keys %$volhash) {
2580 &$func($volid, $volhash->{$volid});
2581 }
2582 }
2583
2584 sub vga_conf_has_spice {
2585 my ($vga) = @_;
2586
2587 return 0 if !$vga || $vga !~ m/^qxl([234])?$/;
2588
2589 return $1 || 1;
2590 }
2591
2592 sub config_to_command {
2593 my ($storecfg, $vmid, $conf, $defaults, $forcemachine) = @_;
2594
2595 my $cmd = [];
2596 my $globalFlags = [];
2597 my $machineFlags = [];
2598 my $rtcFlags = [];
2599 my $cpuFlags = [];
2600 my $devices = [];
2601 my $pciaddr = '';
2602 my $bridges = {};
2603 my $kvmver = kvm_user_version();
2604 my $vernum = 0; # unknown
2605 if ($kvmver =~ m/^(\d+)\.(\d+)$/) {
2606 $vernum = $1*1000000+$2*1000;
2607 } elsif ($kvmver =~ m/^(\d+)\.(\d+)\.(\d+)$/) {
2608 $vernum = $1*1000000+$2*1000+$3;
2609 }
2610
2611 die "detected old qemu-kvm binary ($kvmver)\n" if $vernum < 15000;
2612
2613 my $have_ovz = -f '/proc/vz/vestat';
2614
2615 my $q35 = machine_type_is_q35($conf);
2616 my $hotplug_features = parse_hotplug_features(defined($conf->{hotplug}) ? $conf->{hotplug} : '1');
2617 my $machine_type = $forcemachine || $conf->{machine};
2618
2619 my $cpuunits = defined($conf->{cpuunits}) ?
2620 $conf->{cpuunits} : $defaults->{cpuunits};
2621
2622 push @$cmd, '/usr/bin/systemd-run';
2623 push @$cmd, '--scope';
2624 push @$cmd, '--slice', "qemu";
2625 push @$cmd, '--unit', $vmid;
2626 push @$cmd, '-p', "CPUShares=$cpuunits";
2627 if ($conf->{cpulimit}) {
2628 my $cpulimit = int($conf->{cpulimit} * 100);
2629 push @$cmd, '-p', "CPUQuota=$cpulimit\%";
2630 }
2631
2632 push @$cmd, '/usr/bin/kvm';
2633
2634 push @$cmd, '-id', $vmid;
2635
2636 my $use_virtio = 0;
2637
2638 my $qmpsocket = qmp_socket($vmid);
2639 push @$cmd, '-chardev', "socket,id=qmp,path=$qmpsocket,server,nowait";
2640 push @$cmd, '-mon', "chardev=qmp,mode=control";
2641
2642 my $socket = vnc_socket($vmid);
2643 push @$cmd, '-vnc', "unix:$socket,x509,password";
2644
2645 push @$cmd, '-pidfile' , pidfile_name($vmid);
2646
2647 push @$cmd, '-daemonize';
2648
2649 if ($conf->{smbios1}) {
2650 push @$cmd, '-smbios', "type=1,$conf->{smbios1}";
2651 }
2652
2653 if ($q35) {
2654 # the q35 chipset support native usb2, so we enable usb controller
2655 # by default for this machine type
2656 push @$devices, '-readconfig', '/usr/share/qemu-server/pve-q35.cfg';
2657 } else {
2658 $pciaddr = print_pci_addr("piix3", $bridges);
2659 push @$devices, '-device', "piix3-usb-uhci,id=uhci$pciaddr.0x2";
2660
2661 my $use_usb2 = 0;
2662 for (my $i = 0; $i < $MAX_USB_DEVICES; $i++) {
2663 next if !$conf->{"usb$i"};
2664 $use_usb2 = 1;
2665 }
2666 # include usb device config
2667 push @$devices, '-readconfig', '/usr/share/qemu-server/pve-usb.cfg' if $use_usb2;
2668 }
2669
2670 my $vga = $conf->{vga};
2671
2672 my $qxlnum = vga_conf_has_spice($vga);
2673 $vga = 'qxl' if $qxlnum;
2674
2675 if (!$vga) {
2676 if ($conf->{ostype} && ($conf->{ostype} eq 'win8' ||
2677 $conf->{ostype} eq 'win7' ||
2678 $conf->{ostype} eq 'w2k8')) {
2679 $vga = 'std';
2680 } else {
2681 $vga = 'cirrus';
2682 }
2683 }
2684
2685 # enable absolute mouse coordinates (needed by vnc)
2686 my $tablet;
2687 if (defined($conf->{tablet})) {
2688 $tablet = $conf->{tablet};
2689 } else {
2690 $tablet = $defaults->{tablet};
2691 $tablet = 0 if $qxlnum; # disable for spice because it is not needed
2692 $tablet = 0 if $vga =~ m/^serial\d+$/; # disable if we use serial terminal (no vga card)
2693 }
2694
2695 push @$devices, '-device', print_tabletdevice_full($conf) if $tablet;
2696
2697 # host pci devices
2698 for (my $i = 0; $i < $MAX_HOSTPCI_DEVICES; $i++) {
2699 my $d = parse_hostpci($conf->{"hostpci$i"});
2700 next if !$d;
2701
2702 my $pcie = $d->{pcie};
2703 if($pcie){
2704 die "q35 machine model is not enabled" if !$q35;
2705 $pciaddr = print_pcie_addr("hostpci$i");
2706 }else{
2707 $pciaddr = print_pci_addr("hostpci$i", $bridges);
2708 }
2709
2710 my $rombar = $d->{rombar} && $d->{rombar} eq 'off' ? ",rombar=0" : "";
2711 my $driver = $d->{driver} && $d->{driver} eq 'vfio' ? "vfio-pci" : "pci-assign";
2712 my $xvga = $d->{'x-vga'} && $d->{'x-vga'} eq 'on' ? ",x-vga=on" : "";
2713 if ($xvga && $xvga ne '') {
2714 push @$cpuFlags, 'kvm=off';
2715 $vga = 'none';
2716 }
2717 $driver = "vfio-pci" if $xvga ne '';
2718 my $pcidevices = $d->{pciid};
2719 my $multifunction = 1 if @$pcidevices > 1;
2720
2721 my $j=0;
2722 foreach my $pcidevice (@$pcidevices) {
2723
2724 my $id = "hostpci$i";
2725 $id .= ".$j" if $multifunction;
2726 my $addr = $pciaddr;
2727 $addr .= ".$j" if $multifunction;
2728 my $devicestr = "$driver,host=$pcidevice->{id}.$pcidevice->{function},id=$id$addr";
2729
2730 if($j == 0){
2731 $devicestr .= "$rombar$xvga";
2732 $devicestr .= ",multifunction=on" if $multifunction;
2733 }
2734
2735 push @$devices, '-device', $devicestr;
2736 $j++;
2737 }
2738 }
2739
2740 # usb devices
2741 for (my $i = 0; $i < $MAX_USB_DEVICES; $i++) {
2742 my $d = parse_usb_device($conf->{"usb$i"});
2743 next if !$d;
2744 if ($d->{vendorid} && $d->{productid}) {
2745 push @$devices, '-device', "usb-host,vendorid=0x$d->{vendorid},productid=0x$d->{productid}";
2746 } elsif (defined($d->{hostbus}) && defined($d->{hostport})) {
2747 push @$devices, '-device', "usb-host,hostbus=$d->{hostbus},hostport=$d->{hostport}";
2748 } elsif ($d->{spice}) {
2749 # usb redir support for spice
2750 push @$devices, '-chardev', "spicevmc,id=usbredirchardev$i,name=usbredir";
2751 push @$devices, '-device', "usb-redir,chardev=usbredirchardev$i,id=usbredirdev$i,bus=ehci.0";
2752 }
2753 }
2754
2755 # serial devices
2756 for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
2757 if (my $path = $conf->{"serial$i"}) {
2758 if ($path eq 'socket') {
2759 my $socket = "/var/run/qemu-server/${vmid}.serial$i";
2760 push @$devices, '-chardev', "socket,id=serial$i,path=$socket,server,nowait";
2761 push @$devices, '-device', "isa-serial,chardev=serial$i";
2762 } else {
2763 die "no such serial device\n" if ! -c $path;
2764 push @$devices, '-chardev', "tty,id=serial$i,path=$path";
2765 push @$devices, '-device', "isa-serial,chardev=serial$i";
2766 }
2767 }
2768 }
2769
2770 # parallel devices
2771 for (my $i = 0; $i < $MAX_PARALLEL_PORTS; $i++) {
2772 if (my $path = $conf->{"parallel$i"}) {
2773 die "no such parallel device\n" if ! -c $path;
2774 my $devtype = $path =~ m!^/dev/usb/lp! ? 'tty' : 'parport';
2775 push @$devices, '-chardev', "$devtype,id=parallel$i,path=$path";
2776 push @$devices, '-device', "isa-parallel,chardev=parallel$i";
2777 }
2778 }
2779
2780 my $vmname = $conf->{name} || "vm$vmid";
2781
2782 push @$cmd, '-name', $vmname;
2783
2784 my $sockets = 1;
2785 $sockets = $conf->{smp} if $conf->{smp}; # old style - no longer iused
2786 $sockets = $conf->{sockets} if $conf->{sockets};
2787
2788 my $cores = $conf->{cores} || 1;
2789
2790 my $maxcpus = $sockets * $cores;
2791
2792 my $vcpus = $conf->{vcpus} ? $conf->{vcpus} : $maxcpus;
2793
2794 my $allowed_vcpus = $cpuinfo->{cpus};
2795
2796 die "MAX $maxcpus vcpus allowed per VM on this node\n"
2797 if ($allowed_vcpus < $maxcpus);
2798
2799 push @$cmd, '-smp', "$vcpus,sockets=$sockets,cores=$cores,maxcpus=$maxcpus";
2800
2801 push @$cmd, '-nodefaults';
2802
2803 my $bootorder = $conf->{boot} || $confdesc->{boot}->{default};
2804
2805 my $bootindex_hash = {};
2806 my $i = 1;
2807 foreach my $o (split(//, $bootorder)) {
2808 $bootindex_hash->{$o} = $i*100;
2809 $i++;
2810 }
2811
2812 push @$cmd, '-boot', "menu=on,strict=on,reboot-timeout=1000";
2813
2814 push @$cmd, '-no-acpi' if defined($conf->{acpi}) && $conf->{acpi} == 0;
2815
2816 push @$cmd, '-no-reboot' if defined($conf->{reboot}) && $conf->{reboot} == 0;
2817
2818 push @$cmd, '-vga', $vga if $vga && $vga !~ m/^serial\d+$/; # for kvm 77 and later
2819
2820 # time drift fix
2821 my $tdf = defined($conf->{tdf}) ? $conf->{tdf} : $defaults->{tdf};
2822
2823 my $nokvm = defined($conf->{kvm}) && $conf->{kvm} == 0 ? 1 : 0;
2824 my $useLocaltime = $conf->{localtime};
2825
2826 if (my $ost = $conf->{ostype}) {
2827 # other, wxp, w2k, w2k3, w2k8, wvista, win7, win8, l24, l26, solaris
2828
2829 if ($ost =~ m/^w/) { # windows
2830 $useLocaltime = 1 if !defined($conf->{localtime});
2831
2832 # use time drift fix when acpi is enabled
2833 if (!(defined($conf->{acpi}) && $conf->{acpi} == 0)) {
2834 $tdf = 1 if !defined($conf->{tdf});
2835 }
2836 }
2837
2838 if ($ost eq 'win7' || $ost eq 'win8' || $ost eq 'w2k8' ||
2839 $ost eq 'wvista') {
2840 push @$globalFlags, 'kvm-pit.lost_tick_policy=discard';
2841 push @$cmd, '-no-hpet';
2842 if (qemu_machine_feature_enabled ($machine_type, $kvmver, 2, 3)) {
2843 push @$cpuFlags , 'hv_spinlocks=0x1fff' if !$nokvm;
2844 push @$cpuFlags , 'hv_vapic' if !$nokvm;
2845 push @$cpuFlags , 'hv_time' if !$nokvm;
2846
2847 } else {
2848 push @$cpuFlags , 'hv_spinlocks=0xffff' if !$nokvm;
2849 }
2850 }
2851
2852 if ($ost eq 'win7' || $ost eq 'win8') {
2853 push @$cpuFlags , 'hv_relaxed' if !$nokvm;
2854 }
2855 }
2856
2857 push @$rtcFlags, 'driftfix=slew' if $tdf;
2858
2859 if ($nokvm) {
2860 push @$machineFlags, 'accel=tcg';
2861 } else {
2862 die "No accelerator found!\n" if !$cpuinfo->{hvm};
2863 }
2864
2865 if ($machine_type) {
2866 push @$machineFlags, "type=${machine_type}";
2867 }
2868
2869 if ($conf->{startdate}) {
2870 push @$rtcFlags, "base=$conf->{startdate}";
2871 } elsif ($useLocaltime) {
2872 push @$rtcFlags, 'base=localtime';
2873 }
2874
2875 my $cpu = $nokvm ? "qemu64" : "kvm64";
2876 $cpu = $conf->{cpu} if $conf->{cpu};
2877
2878 push @$cpuFlags , '+lahf_lm' if $cpu eq 'kvm64';
2879
2880 push @$cpuFlags , '+x2apic' if !$nokvm && $conf->{ostype} ne 'solaris';
2881
2882 push @$cpuFlags , '-x2apic' if $conf->{ostype} eq 'solaris';
2883
2884 push @$cpuFlags, '+sep' if $cpu eq 'kvm64' || $cpu eq 'kvm32';
2885
2886 push @$cpuFlags, '-rdtscp' if $cpu =~ m/^Opteron/;
2887
2888 if (qemu_machine_feature_enabled ($machine_type, $kvmver, 2, 3)) {
2889
2890 push @$cpuFlags , '+kvm_pv_unhalt' if !$nokvm;
2891 push @$cpuFlags , '+kvm_pv_eoi' if !$nokvm;
2892 }
2893
2894 push @$cpuFlags, 'enforce' if $cpu ne 'host' && !$nokvm;
2895
2896 $cpu .= "," . join(',', @$cpuFlags) if scalar(@$cpuFlags);
2897
2898 push @$cmd, '-cpu', $cpu;
2899
2900 my $memory = $conf->{memory} || $defaults->{memory};
2901 my $static_memory = 0;
2902 my $dimm_memory = 0;
2903
2904 if ($hotplug_features->{memory}) {
2905 die "Numa need to be enabled for memory hotplug\n" if !$conf->{numa};
2906 die "Total memory is bigger than ${MAX_MEM}MB\n" if $memory > $MAX_MEM;
2907 $static_memory = $STATICMEM;
2908 die "minimum memory must be ${static_memory}MB\n" if($memory < $static_memory);
2909 $dimm_memory = $memory - $static_memory;
2910 push @$cmd, '-m', "size=${static_memory},slots=255,maxmem=${MAX_MEM}M";
2911
2912 } else {
2913
2914 $static_memory = $memory;
2915 push @$cmd, '-m', $static_memory;
2916 }
2917
2918 if ($conf->{numa}) {
2919
2920 my $numa_totalmemory = undef;
2921 for (my $i = 0; $i < $MAX_NUMA; $i++) {
2922 next if !$conf->{"numa$i"};
2923 my $numa = parse_numa($conf->{"numa$i"});
2924 next if !$numa;
2925 # memory
2926 die "missing numa node$i memory value\n" if !$numa->{memory};
2927 my $numa_memory = $numa->{memory};
2928 $numa_totalmemory += $numa_memory;
2929 my $numa_object = "memory-backend-ram,id=ram-node$i,size=${numa_memory}M";
2930
2931 # cpus
2932 my $cpus_start = $numa->{cpus}->{start};
2933 die "missing numa node$i cpus\n" if !defined($cpus_start);
2934 my $cpus_end = $numa->{cpus}->{end} if defined($numa->{cpus}->{end});
2935 my $cpus = $cpus_start;
2936 if (defined($cpus_end)) {
2937 $cpus .= "-$cpus_end";
2938 die "numa node$i : cpu range $cpus is incorrect\n" if $cpus_end <= $cpus_start;
2939 }
2940
2941 # hostnodes
2942 my $hostnodes_start = $numa->{hostnodes}->{start};
2943 if (defined($hostnodes_start)) {
2944 my $hostnodes_end = $numa->{hostnodes}->{end} if defined($numa->{hostnodes}->{end});
2945 my $hostnodes = $hostnodes_start;
2946 if (defined($hostnodes_end)) {
2947 $hostnodes .= "-$hostnodes_end";
2948 die "host node $hostnodes range is incorrect\n" if $hostnodes_end <= $hostnodes_start;
2949 }
2950
2951 my $hostnodes_end_range = defined($hostnodes_end) ? $hostnodes_end : $hostnodes_start;
2952 for (my $i = $hostnodes_start; $i <= $hostnodes_end_range; $i++ ) {
2953 die "host numa node$i don't exist\n" if ! -d "/sys/devices/system/node/node$i/";
2954 }
2955
2956 # policy
2957 my $policy = $numa->{policy};
2958 die "you need to define a policy for hostnode $hostnodes\n" if !$policy;
2959 $numa_object .= ",host-nodes=$hostnodes,policy=$policy";
2960 }
2961
2962 push @$cmd, '-object', $numa_object;
2963 push @$cmd, '-numa', "node,nodeid=$i,cpus=$cpus,memdev=ram-node$i";
2964 }
2965
2966 die "total memory for NUMA nodes must be equal to vm static memory\n"
2967 if $numa_totalmemory && $numa_totalmemory != $static_memory;
2968
2969 #if no custom tology, we split memory and cores across numa nodes
2970 if(!$numa_totalmemory) {
2971
2972 my $numa_memory = ($static_memory / $sockets) . "M";
2973
2974 for (my $i = 0; $i < $sockets; $i++) {
2975
2976 my $cpustart = ($cores * $i);
2977 my $cpuend = ($cpustart + $cores - 1) if $cores && $cores > 1;
2978 my $cpus = $cpustart;
2979 $cpus .= "-$cpuend" if $cpuend;
2980
2981 push @$cmd, '-object', "memory-backend-ram,size=$numa_memory,id=ram-node$i";
2982 push @$cmd, '-numa', "node,nodeid=$i,cpus=$cpus,memdev=ram-node$i";
2983 }
2984 }
2985 }
2986
2987 if ($hotplug_features->{memory}) {
2988 foreach_dimm($conf, $vmid, $memory, $sockets, sub {
2989 my ($conf, $vmid, $name, $dimm_size, $numanode, $current_size, $memory) = @_;
2990 push @$cmd, "-object" , "memory-backend-ram,id=mem-$name,size=${dimm_size}M";
2991 push @$cmd, "-device", "pc-dimm,id=$name,memdev=mem-$name,node=$numanode";
2992
2993 #if dimm_memory is not aligned to dimm map
2994 if($current_size > $memory) {
2995 $conf->{memory} = $current_size;
2996 update_config_nolock($vmid, $conf, 1);
2997 }
2998 });
2999 }
3000
3001 push @$cmd, '-S' if $conf->{freeze};
3002
3003 # set keyboard layout
3004 my $kb = $conf->{keyboard} || $defaults->{keyboard};
3005 push @$cmd, '-k', $kb if $kb;
3006
3007 # enable sound
3008 #my $soundhw = $conf->{soundhw} || $defaults->{soundhw};
3009 #push @$cmd, '-soundhw', 'es1370';
3010 #push @$cmd, '-soundhw', $soundhw if $soundhw;
3011
3012 if($conf->{agent}) {
3013 my $qgasocket = qmp_socket($vmid, 1);
3014 my $pciaddr = print_pci_addr("qga0", $bridges);
3015 push @$devices, '-chardev', "socket,path=$qgasocket,server,nowait,id=qga0";
3016 push @$devices, '-device', "virtio-serial,id=qga0$pciaddr";
3017 push @$devices, '-device', 'virtserialport,chardev=qga0,name=org.qemu.guest_agent.0';
3018 }
3019
3020 my $spice_port;
3021
3022 if ($qxlnum) {
3023 if ($qxlnum > 1) {
3024 if ($conf->{ostype} && $conf->{ostype} =~ m/^w/){
3025 for(my $i = 1; $i < $qxlnum; $i++){
3026 my $pciaddr = print_pci_addr("vga$i", $bridges);
3027 push @$cmd, '-device', "qxl,id=vga$i,ram_size=67108864,vram_size=33554432$pciaddr";
3028 }
3029 } else {
3030 # assume other OS works like Linux
3031 push @$cmd, '-global', 'qxl-vga.ram_size=134217728';
3032 push @$cmd, '-global', 'qxl-vga.vram_size=67108864';
3033 }
3034 }
3035
3036 my $pciaddr = print_pci_addr("spice", $bridges);
3037
3038 my $nodename = PVE::INotify::nodename();
3039 my $pfamily = PVE::Tools::get_host_address_family($nodename);
3040 $spice_port = PVE::Tools::next_spice_port($pfamily);
3041
3042 push @$devices, '-spice', "tls-port=${spice_port},addr=localhost,tls-ciphers=DES-CBC3-SHA,seamless-migration=on";
3043
3044 push @$devices, '-device', "virtio-serial,id=spice$pciaddr";
3045 push @$devices, '-chardev', "spicevmc,id=vdagent,name=vdagent";
3046 push @$devices, '-device', "virtserialport,chardev=vdagent,name=com.redhat.spice.0";
3047 }
3048
3049 # enable balloon by default, unless explicitly disabled
3050 if (!defined($conf->{balloon}) || $conf->{balloon}) {
3051 $pciaddr = print_pci_addr("balloon0", $bridges);
3052 push @$devices, '-device', "virtio-balloon-pci,id=balloon0$pciaddr";
3053 }
3054
3055 if ($conf->{watchdog}) {
3056 my $wdopts = parse_watchdog($conf->{watchdog});
3057 $pciaddr = print_pci_addr("watchdog", $bridges);
3058 my $watchdog = $wdopts->{model} || 'i6300esb';
3059 push @$devices, '-device', "$watchdog$pciaddr";
3060 push @$devices, '-watchdog-action', $wdopts->{action} if $wdopts->{action};
3061 }
3062
3063 my $vollist = [];
3064 my $scsicontroller = {};
3065 my $ahcicontroller = {};
3066 my $scsihw = defined($conf->{scsihw}) ? $conf->{scsihw} : $defaults->{scsihw};
3067
3068 # Add iscsi initiator name if available
3069 if (my $initiator = get_initiator_name()) {
3070 push @$devices, '-iscsi', "initiator-name=$initiator";
3071 }
3072
3073 foreach_drive($conf, sub {
3074 my ($ds, $drive) = @_;
3075
3076 if (PVE::Storage::parse_volume_id($drive->{file}, 1)) {
3077 push @$vollist, $drive->{file};
3078 }
3079
3080 $use_virtio = 1 if $ds =~ m/^virtio/;
3081
3082 if (drive_is_cdrom ($drive)) {
3083 if ($bootindex_hash->{d}) {
3084 $drive->{bootindex} = $bootindex_hash->{d};
3085 $bootindex_hash->{d} += 1;
3086 }
3087 } else {
3088 if ($bootindex_hash->{c}) {
3089 $drive->{bootindex} = $bootindex_hash->{c} if $conf->{bootdisk} && ($conf->{bootdisk} eq $ds);
3090 $bootindex_hash->{c} += 1;
3091 }
3092 }
3093
3094 if($drive->{interface} eq 'virtio'){
3095 push @$cmd, '-object', "iothread,id=iothread-$ds" if $drive->{iothread};
3096 }
3097
3098 if ($drive->{interface} eq 'scsi') {
3099
3100 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $drive);
3101
3102 $pciaddr = print_pci_addr("$controller_prefix$controller", $bridges);
3103 my $scsihw_type = $scsihw =~ m/^virtio-scsi-single/ ? "virtio-scsi-pci" : $scsihw;
3104
3105 my $iothread = '';
3106 if($conf->{scsihw} && $conf->{scsihw} eq "virtio-scsi-single" && $drive->{iothread}){
3107 $iothread .= ",iothread=iothread-$controller_prefix$controller";
3108 push @$cmd, '-object', "iothread,id=iothread-$controller_prefix$controller";
3109 }
3110
3111 my $queues = '';
3112 if($conf->{scsihw} && $conf->{scsihw} eq "virtio-scsi-single" && $drive->{queues}){
3113 $queues = ",num_queues=$drive->{queues}";
3114 }
3115
3116 push @$devices, '-device', "$scsihw_type,id=$controller_prefix$controller$pciaddr$iothread$queues" if !$scsicontroller->{$controller};
3117 $scsicontroller->{$controller}=1;
3118 }
3119
3120 if ($drive->{interface} eq 'sata') {
3121 my $controller = int($drive->{index} / $MAX_SATA_DISKS);
3122 $pciaddr = print_pci_addr("ahci$controller", $bridges);
3123 push @$devices, '-device', "ahci,id=ahci$controller,multifunction=on$pciaddr" if !$ahcicontroller->{$controller};
3124 $ahcicontroller->{$controller}=1;
3125 }
3126
3127 my $drive_cmd = print_drive_full($storecfg, $vmid, $drive);
3128 push @$devices, '-drive',$drive_cmd;
3129 push @$devices, '-device', print_drivedevice_full($storecfg, $conf, $vmid, $drive, $bridges);
3130 });
3131
3132 for (my $i = 0; $i < $MAX_NETS; $i++) {
3133 next if !$conf->{"net$i"};
3134 my $d = parse_net($conf->{"net$i"});
3135 next if !$d;
3136
3137 $use_virtio = 1 if $d->{model} eq 'virtio';
3138
3139 if ($bootindex_hash->{n}) {
3140 $d->{bootindex} = $bootindex_hash->{n};
3141 $bootindex_hash->{n} += 1;
3142 }
3143
3144 my $netdevfull = print_netdev_full($vmid,$conf,$d,"net$i");
3145 push @$devices, '-netdev', $netdevfull;
3146
3147 my $netdevicefull = print_netdevice_full($vmid,$conf,$d,"net$i",$bridges);
3148 push @$devices, '-device', $netdevicefull;
3149 }
3150
3151 if (!$q35) {
3152 # add pci bridges
3153 if (qemu_machine_feature_enabled ($machine_type, $kvmver, 2, 3)) {
3154 $bridges->{1} = 1;
3155 $bridges->{2} = 1;
3156 }
3157
3158 $bridges->{3} = 1 if $scsihw =~ m/^virtio-scsi-single/;
3159
3160 while (my ($k, $v) = each %$bridges) {
3161 $pciaddr = print_pci_addr("pci.$k");
3162 unshift @$devices, '-device', "pci-bridge,id=pci.$k,chassis_nr=$k$pciaddr" if $k > 0;
3163 }
3164 }
3165
3166 # add custom args
3167 if ($conf->{args}) {
3168 my $aa = PVE::Tools::split_args($conf->{args});
3169 push @$cmd, @$aa;
3170 }
3171
3172 push @$cmd, @$devices;
3173 push @$cmd, '-rtc', join(',', @$rtcFlags)
3174 if scalar(@$rtcFlags);
3175 push @$cmd, '-machine', join(',', @$machineFlags)
3176 if scalar(@$machineFlags);
3177 push @$cmd, '-global', join(',', @$globalFlags)
3178 if scalar(@$globalFlags);
3179
3180 return wantarray ? ($cmd, $vollist, $spice_port) : $cmd;
3181 }
3182
3183 sub vnc_socket {
3184 my ($vmid) = @_;
3185 return "${var_run_tmpdir}/$vmid.vnc";
3186 }
3187
3188 sub spice_port {
3189 my ($vmid) = @_;
3190
3191 my $res = vm_mon_cmd($vmid, 'query-spice');
3192
3193 return $res->{'tls-port'} || $res->{'port'} || die "no spice port\n";
3194 }
3195
3196 sub qmp_socket {
3197 my ($vmid, $qga) = @_;
3198 my $sockettype = $qga ? 'qga' : 'qmp';
3199 return "${var_run_tmpdir}/$vmid.$sockettype";
3200 }
3201
3202 sub pidfile_name {
3203 my ($vmid) = @_;
3204 return "${var_run_tmpdir}/$vmid.pid";
3205 }
3206
3207 sub vm_devices_list {
3208 my ($vmid) = @_;
3209
3210 my $res = vm_mon_cmd($vmid, 'query-pci');
3211 my $devices = {};
3212 foreach my $pcibus (@$res) {
3213 foreach my $device (@{$pcibus->{devices}}) {
3214 next if !$device->{'qdev_id'};
3215 if ($device->{'pci_bridge'}) {
3216 $devices->{$device->{'qdev_id'}} = 1;
3217 foreach my $bridge_device (@{$device->{'pci_bridge'}->{devices}}) {
3218 next if !$bridge_device->{'qdev_id'};
3219 $devices->{$bridge_device->{'qdev_id'}} = 1;
3220 $devices->{$device->{'qdev_id'}}++;
3221 }
3222 } else {
3223 $devices->{$device->{'qdev_id'}} = 1;
3224 }
3225 }
3226 }
3227
3228 my $resblock = vm_mon_cmd($vmid, 'query-block');
3229 foreach my $block (@$resblock) {
3230 if($block->{device} =~ m/^drive-(\S+)/){
3231 $devices->{$1} = 1;
3232 }
3233 }
3234
3235 my $resmice = vm_mon_cmd($vmid, 'query-mice');
3236 foreach my $mice (@$resmice) {
3237 if ($mice->{name} eq 'QEMU HID Tablet') {
3238 $devices->{tablet} = 1;
3239 last;
3240 }
3241 }
3242
3243 return $devices;
3244 }
3245
3246 sub vm_deviceplug {
3247 my ($storecfg, $conf, $vmid, $deviceid, $device) = @_;
3248
3249 my $q35 = machine_type_is_q35($conf);
3250
3251 my $devices_list = vm_devices_list($vmid);
3252 return 1 if defined($devices_list->{$deviceid});
3253
3254 qemu_add_pci_bridge($storecfg, $conf, $vmid, $deviceid); # add PCI bridge if we need it for the device
3255
3256 if ($deviceid eq 'tablet') {
3257
3258 qemu_deviceadd($vmid, print_tabletdevice_full($conf));
3259
3260 } elsif ($deviceid =~ m/^(virtio)(\d+)$/) {
3261
3262 qemu_iothread_add($vmid, $deviceid, $device);
3263
3264 qemu_driveadd($storecfg, $vmid, $device);
3265 my $devicefull = print_drivedevice_full($storecfg, $conf, $vmid, $device);
3266
3267 qemu_deviceadd($vmid, $devicefull);
3268 eval { qemu_deviceaddverify($vmid, $deviceid); };
3269 if (my $err = $@) {
3270 eval { qemu_drivedel($vmid, $deviceid); };
3271 warn $@ if $@;
3272 die $err;
3273 }
3274
3275 } elsif ($deviceid =~ m/^(virtioscsi|scsihw)(\d+)$/) {
3276
3277
3278 my $scsihw = defined($conf->{scsihw}) ? $conf->{scsihw} : "lsi";
3279 my $pciaddr = print_pci_addr($deviceid);
3280 my $scsihw_type = $scsihw eq 'virtio-scsi-single' ? "virtio-scsi-pci" : $scsihw;
3281
3282 my $devicefull = "$scsihw_type,id=$deviceid$pciaddr";
3283
3284 if($deviceid =~ m/^virtioscsi(\d+)$/ && $device->{iothread}) {
3285 qemu_iothread_add($vmid, $deviceid, $device);
3286 $devicefull .= ",iothread=iothread-$deviceid";
3287 }
3288
3289 if($deviceid =~ m/^virtioscsi(\d+)$/ && $device->{queues}) {
3290 $devicefull .= ",num_queues=$device->{queues}";
3291 }
3292
3293 qemu_deviceadd($vmid, $devicefull);
3294 qemu_deviceaddverify($vmid, $deviceid);
3295
3296 } elsif ($deviceid =~ m/^(scsi)(\d+)$/) {
3297
3298 qemu_findorcreatescsihw($storecfg,$conf, $vmid, $device);
3299 qemu_driveadd($storecfg, $vmid, $device);
3300
3301 my $devicefull = print_drivedevice_full($storecfg, $conf, $vmid, $device);
3302 eval { qemu_deviceadd($vmid, $devicefull); };
3303 if (my $err = $@) {
3304 eval { qemu_drivedel($vmid, $deviceid); };
3305 warn $@ if $@;
3306 die $err;
3307 }
3308
3309 } elsif ($deviceid =~ m/^(net)(\d+)$/) {
3310
3311 return undef if !qemu_netdevadd($vmid, $conf, $device, $deviceid);
3312 my $netdevicefull = print_netdevice_full($vmid, $conf, $device, $deviceid);
3313 qemu_deviceadd($vmid, $netdevicefull);
3314 eval { qemu_deviceaddverify($vmid, $deviceid); };
3315 if (my $err = $@) {
3316 eval { qemu_netdevdel($vmid, $deviceid); };
3317 warn $@ if $@;
3318 die $err;
3319 }
3320
3321 } elsif (!$q35 && $deviceid =~ m/^(pci\.)(\d+)$/) {
3322
3323 my $bridgeid = $2;
3324 my $pciaddr = print_pci_addr($deviceid);
3325 my $devicefull = "pci-bridge,id=pci.$bridgeid,chassis_nr=$bridgeid$pciaddr";
3326
3327 qemu_deviceadd($vmid, $devicefull);
3328 qemu_deviceaddverify($vmid, $deviceid);
3329
3330 } else {
3331 die "can't hotplug device '$deviceid'\n";
3332 }
3333
3334 return 1;
3335 }
3336
3337 # fixme: this should raise exceptions on error!
3338 sub vm_deviceunplug {
3339 my ($vmid, $conf, $deviceid) = @_;
3340
3341 my $devices_list = vm_devices_list($vmid);
3342 return 1 if !defined($devices_list->{$deviceid});
3343
3344 die "can't unplug bootdisk" if $conf->{bootdisk} && $conf->{bootdisk} eq $deviceid;
3345
3346 if ($deviceid eq 'tablet') {
3347
3348 qemu_devicedel($vmid, $deviceid);
3349
3350 } elsif ($deviceid =~ m/^(virtio)(\d+)$/) {
3351
3352 qemu_devicedel($vmid, $deviceid);
3353 qemu_devicedelverify($vmid, $deviceid);
3354 qemu_drivedel($vmid, $deviceid);
3355 qemu_iothread_del($conf, $vmid, $deviceid);
3356
3357 } elsif ($deviceid =~ m/^(virtioscsi|scsihw)(\d+)$/) {
3358
3359 qemu_devicedel($vmid, $deviceid);
3360 qemu_devicedelverify($vmid, $deviceid);
3361 qemu_iothread_del($conf, $vmid, $deviceid);
3362
3363 } elsif ($deviceid =~ m/^(scsi)(\d+)$/) {
3364
3365 #qemu 2.3 segfault on drive_del with virtioscsi + iothread
3366 my $device = parse_drive($deviceid, $conf->{$deviceid});
3367 die "virtioscsi with iothread is not hot-unplugglable currently" if $device->{iothread};
3368
3369 qemu_devicedel($vmid, $deviceid);
3370 qemu_drivedel($vmid, $deviceid);
3371 qemu_deletescsihw($conf, $vmid, $deviceid);
3372
3373 } elsif ($deviceid =~ m/^(net)(\d+)$/) {
3374
3375 qemu_devicedel($vmid, $deviceid);
3376 qemu_devicedelverify($vmid, $deviceid);
3377 qemu_netdevdel($vmid, $deviceid);
3378
3379 } else {
3380 die "can't unplug device '$deviceid'\n";
3381 }
3382
3383 return 1;
3384 }
3385
3386 sub qemu_deviceadd {
3387 my ($vmid, $devicefull) = @_;
3388
3389 $devicefull = "driver=".$devicefull;
3390 my %options = split(/[=,]/, $devicefull);
3391
3392 vm_mon_cmd($vmid, "device_add" , %options);
3393 }
3394
3395 sub qemu_devicedel {
3396 my ($vmid, $deviceid) = @_;
3397
3398 my $ret = vm_mon_cmd($vmid, "device_del", id => $deviceid);
3399 }
3400
3401 sub qemu_iothread_add {
3402 my($vmid, $deviceid, $device) = @_;
3403
3404 if ($device->{iothread}) {
3405 my $iothreads = vm_iothreads_list($vmid);
3406 qemu_objectadd($vmid, "iothread-$deviceid", "iothread") if !$iothreads->{"iothread-$deviceid"};
3407 }
3408 }
3409
3410 sub qemu_iothread_del {
3411 my($conf, $vmid, $deviceid) = @_;
3412
3413 my $device = parse_drive($deviceid, $conf->{$deviceid});
3414 if ($device->{iothread}) {
3415 my $iothreads = vm_iothreads_list($vmid);
3416 qemu_objectdel($vmid, "iothread-$deviceid") if $iothreads->{"iothread-$deviceid"};
3417 }
3418 }
3419
3420 sub qemu_objectadd {
3421 my($vmid, $objectid, $qomtype) = @_;
3422
3423 vm_mon_cmd($vmid, "object-add", id => $objectid, "qom-type" => $qomtype);
3424
3425 return 1;
3426 }
3427
3428 sub qemu_objectdel {
3429 my($vmid, $objectid) = @_;
3430
3431 vm_mon_cmd($vmid, "object-del", id => $objectid);
3432
3433 return 1;
3434 }
3435
3436 sub qemu_driveadd {
3437 my ($storecfg, $vmid, $device) = @_;
3438
3439 my $drive = print_drive_full($storecfg, $vmid, $device);
3440 $drive =~ s/\\/\\\\/g;
3441 my $ret = vm_human_monitor_command($vmid, "drive_add auto \"$drive\"");
3442
3443 # If the command succeeds qemu prints: "OK"
3444 return 1 if $ret =~ m/OK/s;
3445
3446 die "adding drive failed: $ret\n";
3447 }
3448
3449 sub qemu_drivedel {
3450 my($vmid, $deviceid) = @_;
3451
3452 my $ret = vm_human_monitor_command($vmid, "drive_del drive-$deviceid");
3453 $ret =~ s/^\s+//;
3454
3455 return 1 if $ret eq "";
3456
3457 # NB: device not found errors mean the drive was auto-deleted and we ignore the error
3458 return 1 if $ret =~ m/Device \'.*?\' not found/s;
3459
3460 die "deleting drive $deviceid failed : $ret\n";
3461 }
3462
3463 sub qemu_deviceaddverify {
3464 my ($vmid, $deviceid) = @_;
3465
3466 for (my $i = 0; $i <= 5; $i++) {
3467 my $devices_list = vm_devices_list($vmid);
3468 return 1 if defined($devices_list->{$deviceid});
3469 sleep 1;
3470 }
3471
3472 die "error on hotplug device '$deviceid'\n";
3473 }
3474
3475
3476 sub qemu_devicedelverify {
3477 my ($vmid, $deviceid) = @_;
3478
3479 # need to verify that the device is correctly removed as device_del
3480 # is async and empty return is not reliable
3481
3482 for (my $i = 0; $i <= 5; $i++) {
3483 my $devices_list = vm_devices_list($vmid);
3484 return 1 if !defined($devices_list->{$deviceid});
3485 sleep 1;
3486 }
3487
3488 die "error on hot-unplugging device '$deviceid'\n";
3489 }
3490
3491 sub qemu_findorcreatescsihw {
3492 my ($storecfg, $conf, $vmid, $device) = @_;
3493
3494 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $device);
3495
3496 my $scsihwid="$controller_prefix$controller";
3497 my $devices_list = vm_devices_list($vmid);
3498
3499 if(!defined($devices_list->{$scsihwid})) {
3500 vm_deviceplug($storecfg, $conf, $vmid, $scsihwid, $device);
3501 }
3502
3503 return 1;
3504 }
3505
3506 sub qemu_deletescsihw {
3507 my ($conf, $vmid, $opt) = @_;
3508
3509 my $device = parse_drive($opt, $conf->{$opt});
3510
3511 if ($conf->{scsihw} && ($conf->{scsihw} eq 'virtio-scsi-single')) {
3512 vm_deviceunplug($vmid, $conf, "virtioscsi$device->{index}");
3513 return 1;
3514 }
3515
3516 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $device);
3517
3518 my $devices_list = vm_devices_list($vmid);
3519 foreach my $opt (keys %{$devices_list}) {
3520 if (PVE::QemuServer::valid_drivename($opt)) {
3521 my $drive = PVE::QemuServer::parse_drive($opt, $conf->{$opt});
3522 if($drive->{interface} eq 'scsi' && $drive->{index} < (($maxdev-1)*($controller+1))) {
3523 return 1;
3524 }
3525 }
3526 }
3527
3528 my $scsihwid="scsihw$controller";
3529
3530 vm_deviceunplug($vmid, $conf, $scsihwid);
3531
3532 return 1;
3533 }
3534
3535 sub qemu_add_pci_bridge {
3536 my ($storecfg, $conf, $vmid, $device) = @_;
3537
3538 my $bridges = {};
3539
3540 my $bridgeid;
3541
3542 print_pci_addr($device, $bridges);
3543
3544 while (my ($k, $v) = each %$bridges) {
3545 $bridgeid = $k;
3546 }
3547 return 1 if !defined($bridgeid) || $bridgeid < 1;
3548
3549 my $bridge = "pci.$bridgeid";
3550 my $devices_list = vm_devices_list($vmid);
3551
3552 if (!defined($devices_list->{$bridge})) {
3553 vm_deviceplug($storecfg, $conf, $vmid, $bridge);
3554 }
3555
3556 return 1;
3557 }
3558
3559 sub qemu_set_link_status {
3560 my ($vmid, $device, $up) = @_;
3561
3562 vm_mon_cmd($vmid, "set_link", name => $device,
3563 up => $up ? JSON::true : JSON::false);
3564 }
3565
3566 sub qemu_netdevadd {
3567 my ($vmid, $conf, $device, $deviceid) = @_;
3568
3569 my $netdev = print_netdev_full($vmid, $conf, $device, $deviceid);
3570 my %options = split(/[=,]/, $netdev);
3571
3572 vm_mon_cmd($vmid, "netdev_add", %options);
3573 return 1;
3574 }
3575
3576 sub qemu_netdevdel {
3577 my ($vmid, $deviceid) = @_;
3578
3579 vm_mon_cmd($vmid, "netdev_del", id => $deviceid);
3580 }
3581
3582 sub qemu_cpu_hotplug {
3583 my ($vmid, $conf, $vcpus) = @_;
3584
3585 my $sockets = 1;
3586 $sockets = $conf->{smp} if $conf->{smp}; # old style - no longer iused
3587 $sockets = $conf->{sockets} if $conf->{sockets};
3588 my $cores = $conf->{cores} || 1;
3589 my $maxcpus = $sockets * $cores;
3590
3591 $vcpus = $maxcpus if !$vcpus;
3592
3593 die "you can't add more vcpus than maxcpus\n"
3594 if $vcpus > $maxcpus;
3595
3596 my $currentvcpus = $conf->{vcpus} || $maxcpus;
3597 die "online cpu unplug is not yet possible\n"
3598 if $vcpus < $currentvcpus;
3599
3600 my $currentrunningvcpus = vm_mon_cmd($vmid, "query-cpus");
3601 die "vcpus in running vm is different than configuration\n"
3602 if scalar(@{$currentrunningvcpus}) != $currentvcpus;
3603
3604 for (my $i = $currentvcpus; $i < $vcpus; $i++) {
3605 vm_mon_cmd($vmid, "cpu-add", id => int($i));
3606 }
3607 }
3608
3609 sub qemu_memory_hotplug {
3610 my ($vmid, $conf, $defaults, $opt, $value) = @_;
3611
3612 return $value if !check_running($vmid);
3613
3614 my $memory = $conf->{memory} || $defaults->{memory};
3615 $value = $defaults->{memory} if !$value;
3616 return $value if $value == $memory;
3617
3618 my $static_memory = $STATICMEM;
3619 my $dimm_memory = $memory - $static_memory;
3620
3621 die "memory can't be lower than $static_memory MB" if $value < $static_memory;
3622 die "you cannot add more memory than $MAX_MEM MB!\n" if $memory > $MAX_MEM;
3623
3624
3625 my $sockets = 1;
3626 $sockets = $conf->{sockets} if $conf->{sockets};
3627
3628 if($value > $memory) {
3629
3630 foreach_dimm($conf, $vmid, $value, $sockets, sub {
3631 my ($conf, $vmid, $name, $dimm_size, $numanode, $current_size, $memory) = @_;
3632
3633 return if $current_size <= $conf->{memory};
3634
3635 eval { vm_mon_cmd($vmid, "object-add", 'qom-type' => "memory-backend-ram", id => "mem-$name", props => { size => int($dimm_size*1024*1024) } ) };
3636 if (my $err = $@) {
3637 eval { qemu_objectdel($vmid, "mem-$name"); };
3638 die $err;
3639 }
3640
3641 eval { vm_mon_cmd($vmid, "device_add", driver => "pc-dimm", id => "$name", memdev => "mem-$name", node => $numanode) };
3642 if (my $err = $@) {
3643 eval { qemu_objectdel($vmid, "mem-$name"); };
3644 die $err;
3645 }
3646 #update conf after each succesful module hotplug
3647 $conf->{memory} = $current_size;
3648 update_config_nolock($vmid, $conf, 1);
3649 });
3650
3651 } else {
3652
3653 foreach_reverse_dimm($conf, $vmid, $value, $sockets, sub {
3654 my ($conf, $vmid, $name, $dimm_size, $numanode, $current_size, $memory) = @_;
3655
3656 return if $current_size >= $conf->{memory};
3657 print "try to unplug memory dimm $name\n";
3658
3659 my $retry = 0;
3660 while (1) {
3661 eval { qemu_devicedel($vmid, $name) };
3662 sleep 3;
3663 my $dimm_list = qemu_dimm_list($vmid);
3664 last if !$dimm_list->{$name};
3665 raise_param_exc({ $name => "error unplug memory module" }) if $retry > 5;
3666 $retry++;
3667 }
3668
3669 #update conf after each succesful module unplug
3670 $conf->{memory} = $current_size;
3671
3672 eval { qemu_objectdel($vmid, "mem-$name"); };
3673 update_config_nolock($vmid, $conf, 1);
3674 });
3675 }
3676 }
3677
3678 sub qemu_dimm_list {
3679 my ($vmid) = @_;
3680
3681 my $dimmarray = vm_mon_cmd_nocheck($vmid, "query-memory-devices");
3682 my $dimms = {};
3683
3684 foreach my $dimm (@$dimmarray) {
3685
3686 $dimms->{$dimm->{data}->{id}}->{id} = $dimm->{data}->{id};
3687 $dimms->{$dimm->{data}->{id}}->{node} = $dimm->{data}->{node};
3688 $dimms->{$dimm->{data}->{id}}->{addr} = $dimm->{data}->{addr};
3689 $dimms->{$dimm->{data}->{id}}->{size} = $dimm->{data}->{size};
3690 $dimms->{$dimm->{data}->{id}}->{slot} = $dimm->{data}->{slot};
3691 }
3692 return $dimms;
3693 }
3694
3695 sub qemu_block_set_io_throttle {
3696 my ($vmid, $deviceid, $bps, $bps_rd, $bps_wr, $iops, $iops_rd, $iops_wr) = @_;
3697
3698 return if !check_running($vmid) ;
3699
3700 vm_mon_cmd($vmid, "block_set_io_throttle", device => $deviceid, bps => int($bps), bps_rd => int($bps_rd), bps_wr => int($bps_wr), iops => int($iops), iops_rd => int($iops_rd), iops_wr => int($iops_wr));
3701
3702 }
3703
3704 # old code, only used to shutdown old VM after update
3705 sub __read_avail {
3706 my ($fh, $timeout) = @_;
3707
3708 my $sel = new IO::Select;
3709 $sel->add($fh);
3710
3711 my $res = '';
3712 my $buf;
3713
3714 my @ready;
3715 while (scalar (@ready = $sel->can_read($timeout))) {
3716 my $count;
3717 if ($count = $fh->sysread($buf, 8192)) {
3718 if ($buf =~ /^(.*)\(qemu\) $/s) {
3719 $res .= $1;
3720 last;
3721 } else {
3722 $res .= $buf;
3723 }
3724 } else {
3725 if (!defined($count)) {
3726 die "$!\n";
3727 }
3728 last;
3729 }
3730 }
3731
3732 die "monitor read timeout\n" if !scalar(@ready);
3733
3734 return $res;
3735 }
3736
3737 # old code, only used to shutdown old VM after update
3738 sub vm_monitor_command {
3739 my ($vmid, $cmdstr, $nocheck) = @_;
3740
3741 my $res;
3742
3743 eval {
3744 die "VM $vmid not running\n" if !check_running($vmid, $nocheck);
3745
3746 my $sname = "${var_run_tmpdir}/$vmid.mon";
3747
3748 my $sock = IO::Socket::UNIX->new( Peer => $sname ) ||
3749 die "unable to connect to VM $vmid socket - $!\n";
3750
3751 my $timeout = 3;
3752
3753 # hack: migrate sometime blocks the monitor (when migrate_downtime
3754 # is set)
3755 if ($cmdstr =~ m/^(info\s+migrate|migrate\s)/) {
3756 $timeout = 60*60; # 1 hour
3757 }
3758
3759 # read banner;
3760 my $data = __read_avail($sock, $timeout);
3761
3762 if ($data !~ m/^QEMU\s+(\S+)\s+monitor\s/) {
3763 die "got unexpected qemu monitor banner\n";
3764 }
3765
3766 my $sel = new IO::Select;
3767 $sel->add($sock);
3768
3769 if (!scalar(my @ready = $sel->can_write($timeout))) {
3770 die "monitor write error - timeout";
3771 }
3772
3773 my $fullcmd = "$cmdstr\r";
3774
3775 # syslog('info', "VM $vmid monitor command: $cmdstr");
3776
3777 my $b;
3778 if (!($b = $sock->syswrite($fullcmd)) || ($b != length($fullcmd))) {
3779 die "monitor write error - $!";
3780 }
3781
3782 return if ($cmdstr eq 'q') || ($cmdstr eq 'quit');
3783
3784 $timeout = 20;
3785
3786 if ($cmdstr =~ m/^(info\s+migrate|migrate\s)/) {
3787 $timeout = 60*60; # 1 hour
3788 } elsif ($cmdstr =~ m/^(eject|change)/) {
3789 $timeout = 60; # note: cdrom mount command is slow
3790 }
3791 if ($res = __read_avail($sock, $timeout)) {
3792
3793 my @lines = split("\r?\n", $res);
3794
3795 shift @lines if $lines[0] !~ m/^unknown command/; # skip echo
3796
3797 $res = join("\n", @lines);
3798 $res .= "\n";
3799 }
3800 };
3801
3802 my $err = $@;
3803
3804 if ($err) {
3805 syslog("err", "VM $vmid monitor command failed - $err");
3806 die $err;
3807 }
3808
3809 return $res;
3810 }
3811
3812 sub qemu_block_resize {
3813 my ($vmid, $deviceid, $storecfg, $volid, $size) = @_;
3814
3815 my $running = check_running($vmid);
3816
3817 return if !PVE::Storage::volume_resize($storecfg, $volid, $size, $running);
3818
3819 return if !$running;
3820
3821 vm_mon_cmd($vmid, "block_resize", device => $deviceid, size => int($size));
3822
3823 }
3824
3825 sub qemu_volume_snapshot {
3826 my ($vmid, $deviceid, $storecfg, $volid, $snap) = @_;
3827
3828 my $running = check_running($vmid);
3829
3830 if ($running && do_snapshots_with_qemu($storecfg, $volid)){
3831 vm_mon_cmd($vmid, "snapshot-drive", device => $deviceid, name => $snap);
3832 } else {
3833 PVE::Storage::volume_snapshot($storecfg, $volid, $snap);
3834 }
3835 }
3836
3837 sub qemu_volume_snapshot_delete {
3838 my ($vmid, $deviceid, $storecfg, $volid, $snap) = @_;
3839
3840 my $running = check_running($vmid);
3841
3842 return if !PVE::Storage::volume_snapshot_delete($storecfg, $volid, $snap, $running);
3843
3844 return if !$running;
3845
3846 vm_mon_cmd($vmid, "delete-drive-snapshot", device => $deviceid, name => $snap);
3847 }
3848
3849 sub set_migration_caps {
3850 my ($vmid) = @_;
3851
3852 my $cap_ref = [];
3853
3854 my $enabled_cap = {
3855 "auto-converge" => 1,
3856 "xbzrle" => 0,
3857 "x-rdma-pin-all" => 0,
3858 "zero-blocks" => 0,
3859 };
3860
3861 my $supported_capabilities = vm_mon_cmd_nocheck($vmid, "query-migrate-capabilities");
3862
3863 for my $supported_capability (@$supported_capabilities) {
3864 push @$cap_ref, {
3865 capability => $supported_capability->{capability},
3866 state => $enabled_cap->{$supported_capability->{capability}} ? JSON::true : JSON::false,
3867 };
3868 }
3869
3870 vm_mon_cmd_nocheck($vmid, "migrate-set-capabilities", capabilities => $cap_ref);
3871 }
3872
3873 my $fast_plug_option = {
3874 'lock' => 1,
3875 'name' => 1,
3876 'onboot' => 1,
3877 'shares' => 1,
3878 'startup' => 1,
3879 'description' => 1,
3880 };
3881
3882 # hotplug changes in [PENDING]
3883 # $selection hash can be used to only apply specified options, for
3884 # example: { cores => 1 } (only apply changed 'cores')
3885 # $errors ref is used to return error messages
3886 sub vmconfig_hotplug_pending {
3887 my ($vmid, $conf, $storecfg, $selection, $errors) = @_;
3888
3889 my $defaults = load_defaults();
3890
3891 # commit values which do not have any impact on running VM first
3892 # Note: those option cannot raise errors, we we do not care about
3893 # $selection and always apply them.
3894
3895 my $add_error = sub {
3896 my ($opt, $msg) = @_;
3897 $errors->{$opt} = "hotplug problem - $msg";
3898 };
3899
3900 my $changes = 0;
3901 foreach my $opt (keys %{$conf->{pending}}) { # add/change
3902 if ($fast_plug_option->{$opt}) {
3903 $conf->{$opt} = $conf->{pending}->{$opt};
3904 delete $conf->{pending}->{$opt};
3905 $changes = 1;
3906 }
3907 }
3908
3909 if ($changes) {
3910 update_config_nolock($vmid, $conf, 1);
3911 $conf = load_config($vmid); # update/reload
3912 }
3913
3914 my $hotplug_features = parse_hotplug_features(defined($conf->{hotplug}) ? $conf->{hotplug} : '1');
3915
3916 my $pending_delete_hash = split_flagged_list($conf->{pending}->{delete});
3917 while (my ($opt, $force) = each %$pending_delete_hash) {
3918 next if $selection && !$selection->{$opt};
3919 eval {
3920 if ($opt eq 'hotplug') {
3921 die "skip\n" if ($conf->{hotplug} =~ /memory/);
3922 } elsif ($opt eq 'tablet') {
3923 die "skip\n" if !$hotplug_features->{usb};
3924 if ($defaults->{tablet}) {
3925 vm_deviceplug($storecfg, $conf, $vmid, $opt);
3926 } else {
3927 vm_deviceunplug($vmid, $conf, $opt);
3928 }
3929 } elsif ($opt eq 'vcpus') {
3930 die "skip\n" if !$hotplug_features->{cpu};
3931 qemu_cpu_hotplug($vmid, $conf, undef);
3932 } elsif ($opt eq 'balloon') {
3933 # enable balloon device is not hotpluggable
3934 die "skip\n" if !defined($conf->{balloon}) || $conf->{balloon};
3935 } elsif ($fast_plug_option->{$opt}) {
3936 # do nothing
3937 } elsif ($opt =~ m/^net(\d+)$/) {
3938 die "skip\n" if !$hotplug_features->{network};
3939 vm_deviceunplug($vmid, $conf, $opt);
3940 } elsif (valid_drivename($opt)) {
3941 die "skip\n" if !$hotplug_features->{disk} || $opt =~ m/(ide|sata)(\d+)/;
3942 vm_deviceunplug($vmid, $conf, $opt);
3943 vmconfig_delete_or_detach_drive($vmid, $storecfg, $conf, $opt, $force);
3944 } elsif ($opt =~ m/^memory$/) {
3945 die "skip\n" if !$hotplug_features->{memory};
3946 qemu_memory_hotplug($vmid, $conf, $defaults, $opt);
3947 } elsif ($opt eq 'cpuunits') {
3948 cgroups_write("cpu", $vmid, "cpu.shares", $defaults->{cpuunits});
3949 } elsif ($opt eq 'cpulimit') {
3950 cgroups_write("cpu", $vmid, "cpu.cfs_quota_us", -1);
3951 } else {
3952 die "skip\n";
3953 }
3954 };
3955 if (my $err = $@) {
3956 &$add_error($opt, $err) if $err ne "skip\n";
3957 } else {
3958 # save new config if hotplug was successful
3959 delete $conf->{$opt};
3960 vmconfig_undelete_pending_option($conf, $opt);
3961 update_config_nolock($vmid, $conf, 1);
3962 $conf = load_config($vmid); # update/reload
3963 }
3964 }
3965
3966 foreach my $opt (keys %{$conf->{pending}}) {
3967 next if $selection && !$selection->{$opt};
3968 my $value = $conf->{pending}->{$opt};
3969 eval {
3970 if ($opt eq 'hotplug') {
3971 die "skip\n" if ($value =~ /memory/) || ($value !~ /memory/ && $conf->{hotplug} =~ /memory/);
3972 } elsif ($opt eq 'tablet') {
3973 die "skip\n" if !$hotplug_features->{usb};
3974 if ($value == 1) {
3975 vm_deviceplug($storecfg, $conf, $vmid, $opt);
3976 } elsif ($value == 0) {
3977 vm_deviceunplug($vmid, $conf, $opt);
3978 }
3979 } elsif ($opt eq 'vcpus') {
3980 die "skip\n" if !$hotplug_features->{cpu};
3981 qemu_cpu_hotplug($vmid, $conf, $value);
3982 } elsif ($opt eq 'balloon') {
3983 # enable/disable balloning device is not hotpluggable
3984 my $old_balloon_enabled = !!(!defined($conf->{balloon}) || $conf->{balloon});
3985 my $new_balloon_enabled = !!(!defined($conf->{pending}->{balloon}) || $conf->{pending}->{balloon});
3986 die "skip\n" if $old_balloon_enabled != $new_balloon_enabled;
3987
3988 # allow manual ballooning if shares is set to zero
3989 if ((defined($conf->{shares}) && ($conf->{shares} == 0))) {
3990 my $balloon = $conf->{pending}->{balloon} || $conf->{memory} || $defaults->{memory};
3991 vm_mon_cmd($vmid, "balloon", value => $balloon*1024*1024);
3992 }
3993 } elsif ($opt =~ m/^net(\d+)$/) {
3994 # some changes can be done without hotplug
3995 vmconfig_update_net($storecfg, $conf, $hotplug_features->{network},
3996 $vmid, $opt, $value);
3997 } elsif (valid_drivename($opt)) {
3998 # some changes can be done without hotplug
3999 vmconfig_update_disk($storecfg, $conf, $hotplug_features->{disk},
4000 $vmid, $opt, $value, 1);
4001 } elsif ($opt =~ m/^memory$/) { #dimms
4002 die "skip\n" if !$hotplug_features->{memory};
4003 $value = qemu_memory_hotplug($vmid, $conf, $defaults, $opt, $value);
4004 } elsif ($opt eq 'cpuunits') {
4005 cgroups_write("cpu", $vmid, "cpu.shares", $conf->{pending}->{$opt});
4006 } elsif ($opt eq 'cpulimit') {
4007 my $cpulimit = $conf->{pending}->{$opt} == 0 ? -1 : int($conf->{pending}->{$opt} * 100000);
4008 cgroups_write("cpu", $vmid, "cpu.cfs_quota_us", $cpulimit);
4009 } else {
4010 die "skip\n"; # skip non-hot-pluggable options
4011 }
4012 };
4013 if (my $err = $@) {
4014 &$add_error($opt, $err) if $err ne "skip\n";
4015 } else {
4016 # save new config if hotplug was successful
4017 $conf->{$opt} = $value;
4018 delete $conf->{pending}->{$opt};
4019 update_config_nolock($vmid, $conf, 1);
4020 $conf = load_config($vmid); # update/reload
4021 }
4022 }
4023 }
4024
4025 sub delete_drive {
4026 my ($vmid, $storecfg, $conf, $key, $volid) = @_;
4027
4028 # check if the disk is really unused
4029 my $used_paths = PVE::QemuServer::get_used_paths($vmid, $storecfg, $conf, 1, $key);
4030 my $path = PVE::Storage::path($storecfg, $volid);
4031
4032 die "unable to delete '$volid' - volume is still in use (snapshot?)\n"
4033 if $used_paths->{$path};
4034 PVE::Storage::vdisk_free($storecfg, $volid);
4035 }
4036
4037 sub try_deallocate_drive {
4038 my ($storecfg, $vmid, $conf, $key, $drive, $rpcenv, $authuser, $force) = @_;
4039
4040 if (($force || $key =~ /^unused/) && !drive_is_cdrom($drive, 1)) {
4041 my $volid = $drive->{file};
4042 if (vm_is_volid_owner($storecfg, $vmid, $volid)) {
4043 my $sid = PVE::Storage::parse_volume_id($volid);
4044 $rpcenv->check($authuser, "/storage/$sid", ['Datastore.AllocateSpace']);
4045 delete_drive($vmid, $storecfg, $conf, $key, $drive->{file});
4046 return 1;
4047 }
4048 }
4049
4050 return undef;
4051 }
4052
4053 sub vmconfig_delete_or_detach_drive {
4054 my ($vmid, $storecfg, $conf, $opt, $force) = @_;
4055
4056 my $drive = parse_drive($opt, $conf->{$opt});
4057
4058 my $rpcenv = PVE::RPCEnvironment::get();
4059 my $authuser = $rpcenv->get_user();
4060
4061 if ($force) {
4062 $rpcenv->check_vm_perm($authuser, $vmid, undef, ['VM.Config.Disk']);
4063 try_deallocate_drive($storecfg, $vmid, $conf, $opt, $drive, $rpcenv, $authuser, $force);
4064 } else {
4065 vmconfig_register_unused_drive($storecfg, $vmid, $conf, $drive);
4066 }
4067 }
4068
4069 sub vmconfig_apply_pending {
4070 my ($vmid, $conf, $storecfg) = @_;
4071
4072 # cold plug
4073
4074 my $pending_delete_hash = split_flagged_list($conf->{pending}->{delete});
4075 while (my ($opt, $force) = each %$pending_delete_hash) {
4076 die "internal error" if $opt =~ m/^unused/;
4077 $conf = load_config($vmid); # update/reload
4078 if (!defined($conf->{$opt})) {
4079 vmconfig_undelete_pending_option($conf, $opt);
4080 update_config_nolock($vmid, $conf, 1);
4081 } elsif (valid_drivename($opt)) {
4082 vmconfig_delete_or_detach_drive($vmid, $storecfg, $conf, $opt, $force);
4083 vmconfig_undelete_pending_option($conf, $opt);
4084 delete $conf->{$opt};
4085 update_config_nolock($vmid, $conf, 1);
4086 } else {
4087 vmconfig_undelete_pending_option($conf, $opt);
4088 delete $conf->{$opt};
4089 update_config_nolock($vmid, $conf, 1);
4090 }
4091 }
4092
4093 $conf = load_config($vmid); # update/reload
4094
4095 foreach my $opt (keys %{$conf->{pending}}) { # add/change
4096 $conf = load_config($vmid); # update/reload
4097
4098 if (defined($conf->{$opt}) && ($conf->{$opt} eq $conf->{pending}->{$opt})) {
4099 # skip if nothing changed
4100 } elsif (valid_drivename($opt)) {
4101 vmconfig_register_unused_drive($storecfg, $vmid, $conf, parse_drive($opt, $conf->{$opt}))
4102 if defined($conf->{$opt});
4103 $conf->{$opt} = $conf->{pending}->{$opt};
4104 } else {
4105 $conf->{$opt} = $conf->{pending}->{$opt};
4106 }
4107
4108 delete $conf->{pending}->{$opt};
4109 update_config_nolock($vmid, $conf, 1);
4110 }
4111 }
4112
4113 my $safe_num_ne = sub {
4114 my ($a, $b) = @_;
4115
4116 return 0 if !defined($a) && !defined($b);
4117 return 1 if !defined($a);
4118 return 1 if !defined($b);
4119
4120 return $a != $b;
4121 };
4122
4123 my $safe_string_ne = sub {
4124 my ($a, $b) = @_;
4125
4126 return 0 if !defined($a) && !defined($b);
4127 return 1 if !defined($a);
4128 return 1 if !defined($b);
4129
4130 return $a ne $b;
4131 };
4132
4133 sub vmconfig_update_net {
4134 my ($storecfg, $conf, $hotplug, $vmid, $opt, $value) = @_;
4135
4136 my $newnet = parse_net($value);
4137
4138 if ($conf->{$opt}) {
4139 my $oldnet = parse_net($conf->{$opt});
4140
4141 if (&$safe_string_ne($oldnet->{model}, $newnet->{model}) ||
4142 &$safe_string_ne($oldnet->{macaddr}, $newnet->{macaddr}) ||
4143 &$safe_num_ne($oldnet->{queues}, $newnet->{queues}) ||
4144 !($newnet->{bridge} && $oldnet->{bridge})) { # bridge/nat mode change
4145
4146 # for non online change, we try to hot-unplug
4147 die "skip\n" if !$hotplug;
4148 vm_deviceunplug($vmid, $conf, $opt);
4149 } else {
4150
4151 die "internal error" if $opt !~ m/net(\d+)/;
4152 my $iface = "tap${vmid}i$1";
4153
4154 if (&$safe_num_ne($oldnet->{rate}, $newnet->{rate})) {
4155 PVE::Network::tap_rate_limit($iface, $newnet->{rate});
4156 }
4157
4158 if (&$safe_string_ne($oldnet->{bridge}, $newnet->{bridge}) ||
4159 &$safe_num_ne($oldnet->{tag}, $newnet->{tag}) ||
4160 &$safe_num_ne($oldnet->{firewall}, $newnet->{firewall})) {
4161 PVE::Network::tap_unplug($iface);
4162 PVE::Network::tap_plug($iface, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall});
4163 }
4164
4165 if (&$safe_string_ne($oldnet->{link_down}, $newnet->{link_down})) {
4166 qemu_set_link_status($vmid, $opt, !$newnet->{link_down});
4167 }
4168
4169 return 1;
4170 }
4171 }
4172
4173 if ($hotplug) {
4174 vm_deviceplug($storecfg, $conf, $vmid, $opt, $newnet);
4175 } else {
4176 die "skip\n";
4177 }
4178 }
4179
4180 sub vmconfig_update_disk {
4181 my ($storecfg, $conf, $hotplug, $vmid, $opt, $value, $force) = @_;
4182
4183 # fixme: do we need force?
4184
4185 my $drive = parse_drive($opt, $value);
4186
4187 if ($conf->{$opt}) {
4188
4189 if (my $old_drive = parse_drive($opt, $conf->{$opt})) {
4190
4191 my $media = $drive->{media} || 'disk';
4192 my $oldmedia = $old_drive->{media} || 'disk';
4193 die "unable to change media type\n" if $media ne $oldmedia;
4194
4195 if (!drive_is_cdrom($old_drive)) {
4196
4197 if ($drive->{file} ne $old_drive->{file}) {
4198
4199 die "skip\n" if !$hotplug;
4200
4201 # unplug and register as unused
4202 vm_deviceunplug($vmid, $conf, $opt);
4203 vmconfig_register_unused_drive($storecfg, $vmid, $conf, $old_drive)
4204
4205 } else {
4206 # update existing disk
4207
4208 # skip non hotpluggable value
4209 if (&$safe_num_ne($drive->{discard}, $old_drive->{discard}) ||
4210 &$safe_string_ne($drive->{iothread}, $old_drive->{iothread}) ||
4211 &$safe_string_ne($drive->{queues}, $old_drive->{queues}) ||
4212 &$safe_string_ne($drive->{cache}, $old_drive->{cache})) {
4213 die "skip\n";
4214 }
4215
4216 # apply throttle
4217 if (&$safe_num_ne($drive->{mbps}, $old_drive->{mbps}) ||
4218 &$safe_num_ne($drive->{mbps_rd}, $old_drive->{mbps_rd}) ||
4219 &$safe_num_ne($drive->{mbps_wr}, $old_drive->{mbps_wr}) ||
4220 &$safe_num_ne($drive->{iops}, $old_drive->{iops}) ||
4221 &$safe_num_ne($drive->{iops_rd}, $old_drive->{iops_rd}) ||
4222 &$safe_num_ne($drive->{iops_wr}, $old_drive->{iops_wr}) ||
4223 &$safe_num_ne($drive->{mbps_max}, $old_drive->{mbps_max}) ||
4224 &$safe_num_ne($drive->{mbps_rd_max}, $old_drive->{mbps_rd_max}) ||
4225 &$safe_num_ne($drive->{mbps_wr_max}, $old_drive->{mbps_wr_max}) ||
4226 &$safe_num_ne($drive->{iops_max}, $old_drive->{iops_max}) ||
4227 &$safe_num_ne($drive->{iops_rd_max}, $old_drive->{iops_rd_max}) ||
4228 &$safe_num_ne($drive->{iops_wr_max}, $old_drive->{iops_wr_max})) {
4229
4230 qemu_block_set_io_throttle($vmid,"drive-$opt",
4231 ($drive->{mbps} || 0)*1024*1024,
4232 ($drive->{mbps_rd} || 0)*1024*1024,
4233 ($drive->{mbps_wr} || 0)*1024*1024,
4234 $drive->{iops} || 0,
4235 $drive->{iops_rd} || 0,
4236 $drive->{iops_wr} || 0,
4237 ($drive->{mbps_max} || 0)*1024*1024,
4238 ($drive->{mbps_rd_max} || 0)*1024*1024,
4239 ($drive->{mbps_wr_max} || 0)*1024*1024,
4240 $drive->{iops_max} || 0,
4241 $drive->{iops_rd_max} || 0,
4242 $drive->{iops_wr_max} || 0);
4243
4244 }
4245
4246 return 1;
4247 }
4248
4249 } else { # cdrom
4250
4251 if ($drive->{file} eq 'none') {
4252 vm_mon_cmd($vmid, "eject",force => JSON::true,device => "drive-$opt");
4253 } else {
4254 my $path = get_iso_path($storecfg, $vmid, $drive->{file});
4255 vm_mon_cmd($vmid, "eject", force => JSON::true,device => "drive-$opt"); # force eject if locked
4256 vm_mon_cmd($vmid, "change", device => "drive-$opt",target => "$path") if $path;
4257 }
4258
4259 return 1;
4260 }
4261 }
4262 }
4263
4264 die "skip\n" if !$hotplug || $opt =~ m/(ide|sata)(\d+)/;
4265 # hotplug new disks
4266 vm_deviceplug($storecfg, $conf, $vmid, $opt, $drive);
4267 }
4268
4269 sub vm_start {
4270 my ($storecfg, $vmid, $statefile, $skiplock, $migratedfrom, $paused, $forcemachine, $spice_ticket) = @_;
4271
4272 lock_config($vmid, sub {
4273 my $conf = load_config($vmid, $migratedfrom);
4274
4275 die "you can't start a vm if it's a template\n" if is_template($conf);
4276
4277 check_lock($conf) if !$skiplock;
4278
4279 die "VM $vmid already running\n" if check_running($vmid, undef, $migratedfrom);
4280
4281 if (!$statefile && scalar(keys %{$conf->{pending}})) {
4282 vmconfig_apply_pending($vmid, $conf, $storecfg);
4283 $conf = load_config($vmid); # update/reload
4284 }
4285
4286 my $defaults = load_defaults();
4287
4288 # set environment variable useful inside network script
4289 $ENV{PVE_MIGRATED_FROM} = $migratedfrom if $migratedfrom;
4290
4291 my ($cmd, $vollist, $spice_port) = config_to_command($storecfg, $vmid, $conf, $defaults, $forcemachine);
4292
4293 my $migrate_port = 0;
4294 my $migrate_uri;
4295 if ($statefile) {
4296 if ($statefile eq 'tcp') {
4297 my $localip = "localhost";
4298 my $datacenterconf = PVE::Cluster::cfs_read_file('datacenter.cfg');
4299 my $nodename = PVE::INotify::nodename();
4300 if ($datacenterconf->{migration_unsecure}) {
4301 $localip = PVE::Cluster::remote_node_ip($nodename, 1);
4302 }
4303 my $pfamily = PVE::Tools::get_host_address_family($nodename);
4304 $migrate_port = PVE::Tools::next_migrate_port($pfamily);
4305 $migrate_uri = "tcp:[${localip}]:${migrate_port}";
4306 push @$cmd, '-incoming', $migrate_uri;
4307 push @$cmd, '-S';
4308 } else {
4309 push @$cmd, '-loadstate', $statefile;
4310 }
4311 } elsif ($paused) {
4312 push @$cmd, '-S';
4313 }
4314
4315 # host pci devices
4316 for (my $i = 0; $i < $MAX_HOSTPCI_DEVICES; $i++) {
4317 my $d = parse_hostpci($conf->{"hostpci$i"});
4318 next if !$d;
4319 my $pcidevices = $d->{pciid};
4320 foreach my $pcidevice (@$pcidevices) {
4321 my $pciid = $pcidevice->{id}.".".$pcidevice->{function};
4322
4323 my $info = pci_device_info("0000:$pciid");
4324 die "IOMMU not present\n" if !check_iommu_support();
4325 die "no pci device info for device '$pciid'\n" if !$info;
4326
4327 if ($d->{driver} && $d->{driver} eq "vfio") {
4328 die "can't unbind/bind pci group to vfio '$pciid'\n" if !pci_dev_group_bind_to_vfio($pciid);
4329 } else {
4330 die "can't unbind/bind to stub pci device '$pciid'\n" if !pci_dev_bind_to_stub($info);
4331 }
4332
4333 die "can't reset pci device '$pciid'\n" if $info->{has_fl_reset} and !pci_dev_reset($info);
4334 }
4335 }
4336
4337 PVE::Storage::activate_volumes($storecfg, $vollist);
4338
4339 eval { run_command($cmd, timeout => $statefile ? undef : 30,
4340 umask => 0077); };
4341 my $err = $@;
4342 die "start failed: $err" if $err;
4343
4344 print "migration listens on $migrate_uri\n" if $migrate_uri;
4345
4346 if ($statefile && $statefile ne 'tcp') {
4347 eval { vm_mon_cmd_nocheck($vmid, "cont"); };
4348 warn $@ if $@;
4349 }
4350
4351 if ($migratedfrom) {
4352
4353 eval {
4354 set_migration_caps($vmid);
4355 };
4356 warn $@ if $@;
4357
4358 if ($spice_port) {
4359 print "spice listens on port $spice_port\n";
4360 if ($spice_ticket) {
4361 vm_mon_cmd_nocheck($vmid, "set_password", protocol => 'spice', password => $spice_ticket);
4362 vm_mon_cmd_nocheck($vmid, "expire_password", protocol => 'spice', time => "+30");
4363 }
4364 }
4365
4366 } else {
4367
4368 if (!$statefile && (!defined($conf->{balloon}) || $conf->{balloon})) {
4369 vm_mon_cmd_nocheck($vmid, "balloon", value => $conf->{balloon}*1024*1024)
4370 if $conf->{balloon};
4371 }
4372
4373 foreach my $opt (keys %$conf) {
4374 next if $opt !~ m/^net\d+$/;
4375 my $nicconf = parse_net($conf->{$opt});
4376 qemu_set_link_status($vmid, $opt, 0) if $nicconf->{link_down};
4377 }
4378 }
4379
4380 vm_mon_cmd_nocheck($vmid, 'qom-set',
4381 path => "machine/peripheral/balloon0",
4382 property => "guest-stats-polling-interval",
4383 value => 2) if (!defined($conf->{balloon}) || $conf->{balloon});
4384
4385 });
4386 }
4387
4388 sub vm_mon_cmd {
4389 my ($vmid, $execute, %params) = @_;
4390
4391 my $cmd = { execute => $execute, arguments => \%params };
4392 vm_qmp_command($vmid, $cmd);
4393 }
4394
4395 sub vm_mon_cmd_nocheck {
4396 my ($vmid, $execute, %params) = @_;
4397
4398 my $cmd = { execute => $execute, arguments => \%params };
4399 vm_qmp_command($vmid, $cmd, 1);
4400 }
4401
4402 sub vm_qmp_command {
4403 my ($vmid, $cmd, $nocheck) = @_;
4404
4405 my $res;
4406
4407 my $timeout;
4408 if ($cmd->{arguments} && $cmd->{arguments}->{timeout}) {
4409 $timeout = $cmd->{arguments}->{timeout};
4410 delete $cmd->{arguments}->{timeout};
4411 }
4412
4413 eval {
4414 die "VM $vmid not running\n" if !check_running($vmid, $nocheck);
4415 my $sname = qmp_socket($vmid);
4416 if (-e $sname) { # test if VM is reasonambe new and supports qmp/qga
4417 my $qmpclient = PVE::QMPClient->new();
4418
4419 $res = $qmpclient->cmd($vmid, $cmd, $timeout);
4420 } elsif (-e "${var_run_tmpdir}/$vmid.mon") {
4421 die "can't execute complex command on old monitor - stop/start your vm to fix the problem\n"
4422 if scalar(%{$cmd->{arguments}});
4423 vm_monitor_command($vmid, $cmd->{execute}, $nocheck);
4424 } else {
4425 die "unable to open monitor socket\n";
4426 }
4427 };
4428 if (my $err = $@) {
4429 syslog("err", "VM $vmid qmp command failed - $err");
4430 die $err;
4431 }
4432
4433 return $res;
4434 }
4435
4436 sub vm_human_monitor_command {
4437 my ($vmid, $cmdline) = @_;
4438
4439 my $res;
4440
4441 my $cmd = {
4442 execute => 'human-monitor-command',
4443 arguments => { 'command-line' => $cmdline},
4444 };
4445
4446 return vm_qmp_command($vmid, $cmd);
4447 }
4448
4449 sub vm_commandline {
4450 my ($storecfg, $vmid) = @_;
4451
4452 my $conf = load_config($vmid);
4453
4454 my $defaults = load_defaults();
4455
4456 my $cmd = config_to_command($storecfg, $vmid, $conf, $defaults);
4457
4458 return join(' ', @$cmd);
4459 }
4460
4461 sub vm_reset {
4462 my ($vmid, $skiplock) = @_;
4463
4464 lock_config($vmid, sub {
4465
4466 my $conf = load_config($vmid);
4467
4468 check_lock($conf) if !$skiplock;
4469
4470 vm_mon_cmd($vmid, "system_reset");
4471 });
4472 }
4473
4474 sub get_vm_volumes {
4475 my ($conf) = @_;
4476
4477 my $vollist = [];
4478 foreach_volid($conf, sub {
4479 my ($volid, $is_cdrom) = @_;
4480
4481 return if $volid =~ m|^/|;
4482
4483 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
4484 return if !$sid;
4485
4486 push @$vollist, $volid;
4487 });
4488
4489 return $vollist;
4490 }
4491
4492 sub vm_stop_cleanup {
4493 my ($storecfg, $vmid, $conf, $keepActive, $apply_pending_changes) = @_;
4494
4495 eval {
4496
4497 if (!$keepActive) {
4498 my $vollist = get_vm_volumes($conf);
4499 PVE::Storage::deactivate_volumes($storecfg, $vollist);
4500 }
4501
4502 foreach my $ext (qw(mon qmp pid vnc qga)) {
4503 unlink "/var/run/qemu-server/${vmid}.$ext";
4504 }
4505
4506 vmconfig_apply_pending($vmid, $conf, $storecfg) if $apply_pending_changes;
4507 };
4508 warn $@ if $@; # avoid errors - just warn
4509 }
4510
4511 # Note: use $nockeck to skip tests if VM configuration file exists.
4512 # We need that when migration VMs to other nodes (files already moved)
4513 # Note: we set $keepActive in vzdump stop mode - volumes need to stay active
4514 sub vm_stop {
4515 my ($storecfg, $vmid, $skiplock, $nocheck, $timeout, $shutdown, $force, $keepActive, $migratedfrom) = @_;
4516
4517 $force = 1 if !defined($force) && !$shutdown;
4518
4519 if ($migratedfrom){
4520 my $pid = check_running($vmid, $nocheck, $migratedfrom);
4521 kill 15, $pid if $pid;
4522 my $conf = load_config($vmid, $migratedfrom);
4523 vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive, 0);
4524 return;
4525 }
4526
4527 lock_config($vmid, sub {
4528
4529 my $pid = check_running($vmid, $nocheck);
4530 return if !$pid;
4531
4532 my $conf;
4533 if (!$nocheck) {
4534 $conf = load_config($vmid);
4535 check_lock($conf) if !$skiplock;
4536 if (!defined($timeout) && $shutdown && $conf->{startup}) {
4537 my $opts = PVE::JSONSchema::pve_parse_startup_order($conf->{startup});
4538 $timeout = $opts->{down} if $opts->{down};
4539 }
4540 }
4541
4542 $timeout = 60 if !defined($timeout);
4543
4544 eval {
4545 if ($shutdown) {
4546 if (defined($conf) && $conf->{agent}) {
4547 vm_qmp_command($vmid, { execute => "guest-shutdown" }, $nocheck);
4548 } else {
4549 vm_qmp_command($vmid, { execute => "system_powerdown" }, $nocheck);
4550 }
4551 } else {
4552 vm_qmp_command($vmid, { execute => "quit" }, $nocheck);
4553 }
4554 };
4555 my $err = $@;
4556
4557 if (!$err) {
4558 my $count = 0;
4559 while (($count < $timeout) && check_running($vmid, $nocheck)) {
4560 $count++;
4561 sleep 1;
4562 }
4563
4564 if ($count >= $timeout) {
4565 if ($force) {
4566 warn "VM still running - terminating now with SIGTERM\n";
4567 kill 15, $pid;
4568 } else {
4569 die "VM quit/powerdown failed - got timeout\n";
4570 }
4571 } else {
4572 vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive, 1) if $conf;
4573 return;
4574 }
4575 } else {
4576 if ($force) {
4577 warn "VM quit/powerdown failed - terminating now with SIGTERM\n";
4578 kill 15, $pid;
4579 } else {
4580 die "VM quit/powerdown failed\n";
4581 }
4582 }
4583
4584 # wait again
4585 $timeout = 10;
4586
4587 my $count = 0;
4588 while (($count < $timeout) && check_running($vmid, $nocheck)) {
4589 $count++;
4590 sleep 1;
4591 }
4592
4593 if ($count >= $timeout) {
4594 warn "VM still running - terminating now with SIGKILL\n";
4595 kill 9, $pid;
4596 sleep 1;
4597 }
4598
4599 vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive, 1) if $conf;
4600 });
4601 }
4602
4603 sub vm_suspend {
4604 my ($vmid, $skiplock) = @_;
4605
4606 lock_config($vmid, sub {
4607
4608 my $conf = load_config($vmid);
4609
4610 check_lock($conf) if !($skiplock || ($conf->{lock} && $conf->{lock} eq 'backup'));
4611
4612 vm_mon_cmd($vmid, "stop");
4613 });
4614 }
4615
4616 sub vm_resume {
4617 my ($vmid, $skiplock) = @_;
4618
4619 lock_config($vmid, sub {
4620
4621 my $conf = load_config($vmid);
4622
4623 check_lock($conf) if !($skiplock || ($conf->{lock} && $conf->{lock} eq 'backup'));
4624
4625 vm_mon_cmd($vmid, "cont");
4626 });
4627 }
4628
4629 sub vm_sendkey {
4630 my ($vmid, $skiplock, $key) = @_;
4631
4632 lock_config($vmid, sub {
4633
4634 my $conf = load_config($vmid);
4635
4636 # there is no qmp command, so we use the human monitor command
4637 vm_human_monitor_command($vmid, "sendkey $key");
4638 });
4639 }
4640
4641 sub vm_destroy {
4642 my ($storecfg, $vmid, $skiplock) = @_;
4643
4644 lock_config($vmid, sub {
4645
4646 my $conf = load_config($vmid);
4647
4648 check_lock($conf) if !$skiplock;
4649
4650 if (!check_running($vmid)) {
4651 destroy_vm($storecfg, $vmid);
4652 } else {
4653 die "VM $vmid is running - destroy failed\n";
4654 }
4655 });
4656 }
4657
4658 # pci helpers
4659
4660 sub file_write {
4661 my ($filename, $buf) = @_;
4662
4663 my $fh = IO::File->new($filename, "w");
4664 return undef if !$fh;
4665
4666 my $res = print $fh $buf;
4667
4668 $fh->close();
4669
4670 return $res;
4671 }
4672
4673 sub pci_device_info {
4674 my ($name) = @_;
4675
4676 my $res;
4677
4678 return undef if $name !~ m/^([a-f0-9]{4}):([a-f0-9]{2}):([a-f0-9]{2})\.([a-f0-9])$/;
4679 my ($domain, $bus, $slot, $func) = ($1, $2, $3, $4);
4680
4681 my $irq = file_read_firstline("$pcisysfs/devices/$name/irq");
4682 return undef if !defined($irq) || $irq !~ m/^\d+$/;
4683
4684 my $vendor = file_read_firstline("$pcisysfs/devices/$name/vendor");
4685 return undef if !defined($vendor) || $vendor !~ s/^0x//;
4686
4687 my $product = file_read_firstline("$pcisysfs/devices/$name/device");
4688 return undef if !defined($product) || $product !~ s/^0x//;
4689
4690 $res = {
4691 name => $name,
4692 vendor => $vendor,
4693 product => $product,
4694 domain => $domain,
4695 bus => $bus,
4696 slot => $slot,
4697 func => $func,
4698 irq => $irq,
4699 has_fl_reset => -f "$pcisysfs/devices/$name/reset" || 0,
4700 };
4701
4702 return $res;
4703 }
4704
4705 sub pci_dev_reset {
4706 my ($dev) = @_;
4707
4708 my $name = $dev->{name};
4709
4710 my $fn = "$pcisysfs/devices/$name/reset";
4711
4712 return file_write($fn, "1");
4713 }
4714
4715 sub pci_dev_bind_to_stub {
4716 my ($dev) = @_;
4717
4718 my $name = $dev->{name};
4719
4720 my $testdir = "$pcisysfs/drivers/pci-stub/$name";
4721 return 1 if -d $testdir;
4722
4723 my $data = "$dev->{vendor} $dev->{product}";
4724 return undef if !file_write("$pcisysfs/drivers/pci-stub/new_id", $data);
4725
4726 my $fn = "$pcisysfs/devices/$name/driver/unbind";
4727 if (!file_write($fn, $name)) {
4728 return undef if -f $fn;
4729 }
4730
4731 $fn = "$pcisysfs/drivers/pci-stub/bind";
4732 if (! -d $testdir) {
4733 return undef if !file_write($fn, $name);
4734 }
4735
4736 return -d $testdir;
4737 }
4738
4739 sub pci_dev_bind_to_vfio {
4740 my ($dev) = @_;
4741
4742 my $name = $dev->{name};
4743
4744 my $vfio_basedir = "$pcisysfs/drivers/vfio-pci";
4745
4746 if (!-d $vfio_basedir) {
4747 system("/sbin/modprobe vfio-pci >/dev/null 2>/dev/null");
4748 }
4749 die "Cannot find vfio-pci module!\n" if !-d $vfio_basedir;
4750
4751 my $testdir = "$vfio_basedir/$name";
4752 return 1 if -d $testdir;
4753
4754 my $data = "$dev->{vendor} $dev->{product}";
4755 return undef if !file_write("$vfio_basedir/new_id", $data);
4756
4757 my $fn = "$pcisysfs/devices/$name/driver/unbind";
4758 if (!file_write($fn, $name)) {
4759 return undef if -f $fn;
4760 }
4761
4762 $fn = "$vfio_basedir/bind";
4763 if (! -d $testdir) {
4764 return undef if !file_write($fn, $name);
4765 }
4766
4767 return -d $testdir;
4768 }
4769
4770 sub pci_dev_group_bind_to_vfio {
4771 my ($pciid) = @_;
4772
4773 my $vfio_basedir = "$pcisysfs/drivers/vfio-pci";
4774
4775 if (!-d $vfio_basedir) {
4776 system("/sbin/modprobe vfio-pci >/dev/null 2>/dev/null");
4777 }
4778 die "Cannot find vfio-pci module!\n" if !-d $vfio_basedir;
4779
4780 # get IOMMU group devices
4781 opendir(my $D, "$pcisysfs/devices/0000:$pciid/iommu_group/devices/") || die "Cannot open iommu_group: $!\n";
4782 my @devs = grep /^0000:/, readdir($D);
4783 closedir($D);
4784
4785 foreach my $pciid (@devs) {
4786 $pciid =~ m/^([:\.\da-f]+)$/ or die "PCI ID $pciid not valid!\n";
4787
4788 # pci bridges, switches or root ports are not supported
4789 # they have a pci_bus subdirectory so skip them
4790 next if (-e "$pcisysfs/devices/$pciid/pci_bus");
4791
4792 my $info = pci_device_info($1);
4793 pci_dev_bind_to_vfio($info) || die "Cannot bind $pciid to vfio\n";
4794 }
4795
4796 return 1;
4797 }
4798
4799 sub print_pci_addr {
4800 my ($id, $bridges) = @_;
4801
4802 my $res = '';
4803 my $devices = {
4804 piix3 => { bus => 0, addr => 1 },
4805 #addr2 : first videocard
4806 balloon0 => { bus => 0, addr => 3 },
4807 watchdog => { bus => 0, addr => 4 },
4808 scsihw0 => { bus => 0, addr => 5 },
4809 'pci.3' => { bus => 0, addr => 5 }, #can also be used for virtio-scsi-single bridge
4810 scsihw1 => { bus => 0, addr => 6 },
4811 ahci0 => { bus => 0, addr => 7 },
4812 qga0 => { bus => 0, addr => 8 },
4813 spice => { bus => 0, addr => 9 },
4814 virtio0 => { bus => 0, addr => 10 },
4815 virtio1 => { bus => 0, addr => 11 },
4816 virtio2 => { bus => 0, addr => 12 },
4817 virtio3 => { bus => 0, addr => 13 },
4818 virtio4 => { bus => 0, addr => 14 },
4819 virtio5 => { bus => 0, addr => 15 },
4820 hostpci0 => { bus => 0, addr => 16 },
4821 hostpci1 => { bus => 0, addr => 17 },
4822 net0 => { bus => 0, addr => 18 },
4823 net1 => { bus => 0, addr => 19 },
4824 net2 => { bus => 0, addr => 20 },
4825 net3 => { bus => 0, addr => 21 },
4826 net4 => { bus => 0, addr => 22 },
4827 net5 => { bus => 0, addr => 23 },
4828 vga1 => { bus => 0, addr => 24 },
4829 vga2 => { bus => 0, addr => 25 },
4830 vga3 => { bus => 0, addr => 26 },
4831 hostpci2 => { bus => 0, addr => 27 },
4832 hostpci3 => { bus => 0, addr => 28 },
4833 #addr29 : usb-host (pve-usb.cfg)
4834 'pci.1' => { bus => 0, addr => 30 },
4835 'pci.2' => { bus => 0, addr => 31 },
4836 'net6' => { bus => 1, addr => 1 },
4837 'net7' => { bus => 1, addr => 2 },
4838 'net8' => { bus => 1, addr => 3 },
4839 'net9' => { bus => 1, addr => 4 },
4840 'net10' => { bus => 1, addr => 5 },
4841 'net11' => { bus => 1, addr => 6 },
4842 'net12' => { bus => 1, addr => 7 },
4843 'net13' => { bus => 1, addr => 8 },
4844 'net14' => { bus => 1, addr => 9 },
4845 'net15' => { bus => 1, addr => 10 },
4846 'net16' => { bus => 1, addr => 11 },
4847 'net17' => { bus => 1, addr => 12 },
4848 'net18' => { bus => 1, addr => 13 },
4849 'net19' => { bus => 1, addr => 14 },
4850 'net20' => { bus => 1, addr => 15 },
4851 'net21' => { bus => 1, addr => 16 },
4852 'net22' => { bus => 1, addr => 17 },
4853 'net23' => { bus => 1, addr => 18 },
4854 'net24' => { bus => 1, addr => 19 },
4855 'net25' => { bus => 1, addr => 20 },
4856 'net26' => { bus => 1, addr => 21 },
4857 'net27' => { bus => 1, addr => 22 },
4858 'net28' => { bus => 1, addr => 23 },
4859 'net29' => { bus => 1, addr => 24 },
4860 'net30' => { bus => 1, addr => 25 },
4861 'net31' => { bus => 1, addr => 26 },
4862 'virtio6' => { bus => 2, addr => 1 },
4863 'virtio7' => { bus => 2, addr => 2 },
4864 'virtio8' => { bus => 2, addr => 3 },
4865 'virtio9' => { bus => 2, addr => 4 },
4866 'virtio10' => { bus => 2, addr => 5 },
4867 'virtio11' => { bus => 2, addr => 6 },
4868 'virtio12' => { bus => 2, addr => 7 },
4869 'virtio13' => { bus => 2, addr => 8 },
4870 'virtio14' => { bus => 2, addr => 9 },
4871 'virtio15' => { bus => 2, addr => 10 },
4872 'virtioscsi0' => { bus => 3, addr => 1 },
4873 'virtioscsi1' => { bus => 3, addr => 2 },
4874 'virtioscsi2' => { bus => 3, addr => 3 },
4875 'virtioscsi3' => { bus => 3, addr => 4 },
4876 'virtioscsi4' => { bus => 3, addr => 5 },
4877 'virtioscsi5' => { bus => 3, addr => 6 },
4878 'virtioscsi6' => { bus => 3, addr => 7 },
4879 'virtioscsi7' => { bus => 3, addr => 8 },
4880 'virtioscsi8' => { bus => 3, addr => 9 },
4881 'virtioscsi9' => { bus => 3, addr => 10 },
4882 'virtioscsi10' => { bus => 3, addr => 11 },
4883 'virtioscsi11' => { bus => 3, addr => 12 },
4884 'virtioscsi12' => { bus => 3, addr => 13 },
4885 'virtioscsi13' => { bus => 3, addr => 14 },
4886 'virtioscsi14' => { bus => 3, addr => 15 },
4887 'virtioscsi15' => { bus => 3, addr => 16 },
4888 'virtioscsi16' => { bus => 3, addr => 17 },
4889 'virtioscsi17' => { bus => 3, addr => 18 },
4890 'virtioscsi18' => { bus => 3, addr => 19 },
4891 'virtioscsi19' => { bus => 3, addr => 20 },
4892 'virtioscsi20' => { bus => 3, addr => 21 },
4893 'virtioscsi21' => { bus => 3, addr => 22 },
4894 'virtioscsi22' => { bus => 3, addr => 23 },
4895 'virtioscsi23' => { bus => 3, addr => 24 },
4896 'virtioscsi24' => { bus => 3, addr => 25 },
4897 'virtioscsi25' => { bus => 3, addr => 26 },
4898 'virtioscsi26' => { bus => 3, addr => 27 },
4899 'virtioscsi27' => { bus => 3, addr => 28 },
4900 'virtioscsi28' => { bus => 3, addr => 29 },
4901 'virtioscsi29' => { bus => 3, addr => 30 },
4902 'virtioscsi30' => { bus => 3, addr => 31 },
4903
4904 };
4905
4906 if (defined($devices->{$id}->{bus}) && defined($devices->{$id}->{addr})) {
4907 my $addr = sprintf("0x%x", $devices->{$id}->{addr});
4908 my $bus = $devices->{$id}->{bus};
4909 $res = ",bus=pci.$bus,addr=$addr";
4910 $bridges->{$bus} = 1 if $bridges;
4911 }
4912 return $res;
4913
4914 }
4915
4916 sub print_pcie_addr {
4917 my ($id) = @_;
4918
4919 my $res = '';
4920 my $devices = {
4921 hostpci0 => { bus => "ich9-pcie-port-1", addr => 0 },
4922 hostpci1 => { bus => "ich9-pcie-port-2", addr => 0 },
4923 hostpci2 => { bus => "ich9-pcie-port-3", addr => 0 },
4924 hostpci3 => { bus => "ich9-pcie-port-4", addr => 0 },
4925 };
4926
4927 if (defined($devices->{$id}->{bus}) && defined($devices->{$id}->{addr})) {
4928 my $addr = sprintf("0x%x", $devices->{$id}->{addr});
4929 my $bus = $devices->{$id}->{bus};
4930 $res = ",bus=$bus,addr=$addr";
4931 }
4932 return $res;
4933
4934 }
4935
4936 # vzdump restore implementaion
4937
4938 sub tar_archive_read_firstfile {
4939 my $archive = shift;
4940
4941 die "ERROR: file '$archive' does not exist\n" if ! -f $archive;
4942
4943 # try to detect archive type first
4944 my $pid = open (TMP, "tar tf '$archive'|") ||
4945 die "unable to open file '$archive'\n";
4946 my $firstfile = <TMP>;
4947 kill 15, $pid;
4948 close TMP;
4949
4950 die "ERROR: archive contaions no data\n" if !$firstfile;
4951 chomp $firstfile;
4952
4953 return $firstfile;
4954 }
4955
4956 sub tar_restore_cleanup {
4957 my ($storecfg, $statfile) = @_;
4958
4959 print STDERR "starting cleanup\n";
4960
4961 if (my $fd = IO::File->new($statfile, "r")) {
4962 while (defined(my $line = <$fd>)) {
4963 if ($line =~ m/vzdump:([^\s:]*):(\S+)$/) {
4964 my $volid = $2;
4965 eval {
4966 if ($volid =~ m|^/|) {
4967 unlink $volid || die 'unlink failed\n';
4968 } else {
4969 PVE::Storage::vdisk_free($storecfg, $volid);
4970 }
4971 print STDERR "temporary volume '$volid' sucessfuly removed\n";
4972 };
4973 print STDERR "unable to cleanup '$volid' - $@" if $@;
4974 } else {
4975 print STDERR "unable to parse line in statfile - $line";
4976 }
4977 }
4978 $fd->close();
4979 }
4980 }
4981
4982 sub restore_archive {
4983 my ($archive, $vmid, $user, $opts) = @_;
4984
4985 my $format = $opts->{format};
4986 my $comp;
4987
4988 if ($archive =~ m/\.tgz$/ || $archive =~ m/\.tar\.gz$/) {
4989 $format = 'tar' if !$format;
4990 $comp = 'gzip';
4991 } elsif ($archive =~ m/\.tar$/) {
4992 $format = 'tar' if !$format;
4993 } elsif ($archive =~ m/.tar.lzo$/) {
4994 $format = 'tar' if !$format;
4995 $comp = 'lzop';
4996 } elsif ($archive =~ m/\.vma$/) {
4997 $format = 'vma' if !$format;
4998 } elsif ($archive =~ m/\.vma\.gz$/) {
4999 $format = 'vma' if !$format;
5000 $comp = 'gzip';
5001 } elsif ($archive =~ m/\.vma\.lzo$/) {
5002 $format = 'vma' if !$format;
5003 $comp = 'lzop';
5004 } else {
5005 $format = 'vma' if !$format; # default
5006 }
5007
5008 # try to detect archive format
5009 if ($format eq 'tar') {
5010 return restore_tar_archive($archive, $vmid, $user, $opts);
5011 } else {
5012 return restore_vma_archive($archive, $vmid, $user, $opts, $comp);
5013 }
5014 }
5015
5016 sub restore_update_config_line {
5017 my ($outfd, $cookie, $vmid, $map, $line, $unique) = @_;
5018
5019 return if $line =~ m/^\#qmdump\#/;
5020 return if $line =~ m/^\#vzdump\#/;
5021 return if $line =~ m/^lock:/;
5022 return if $line =~ m/^unused\d+:/;
5023 return if $line =~ m/^parent:/;
5024 return if $line =~ m/^template:/; # restored VM is never a template
5025
5026 if (($line =~ m/^(vlan(\d+)):\s*(\S+)\s*$/)) {
5027 # try to convert old 1.X settings
5028 my ($id, $ind, $ethcfg) = ($1, $2, $3);
5029 foreach my $devconfig (PVE::Tools::split_list($ethcfg)) {
5030 my ($model, $macaddr) = split(/\=/, $devconfig);
5031 $macaddr = PVE::Tools::random_ether_addr() if !$macaddr || $unique;
5032 my $net = {
5033 model => $model,
5034 bridge => "vmbr$ind",
5035 macaddr => $macaddr,
5036 };
5037 my $netstr = print_net($net);
5038
5039 print $outfd "net$cookie->{netcount}: $netstr\n";
5040 $cookie->{netcount}++;
5041 }
5042 } elsif (($line =~ m/^(net\d+):\s*(\S+)\s*$/) && $unique) {
5043 my ($id, $netstr) = ($1, $2);
5044 my $net = parse_net($netstr);
5045 $net->{macaddr} = PVE::Tools::random_ether_addr() if $net->{macaddr};
5046 $netstr = print_net($net);
5047 print $outfd "$id: $netstr\n";
5048 } elsif ($line =~ m/^((ide|scsi|virtio|sata)\d+):\s*(\S+)\s*$/) {
5049 my $virtdev = $1;
5050 my $value = $3;
5051 if ($line =~ m/backup=no/) {
5052 print $outfd "#$line";
5053 } elsif ($virtdev && $map->{$virtdev}) {
5054 my $di = parse_drive($virtdev, $value);
5055 delete $di->{format}; # format can change on restore
5056 $di->{file} = $map->{$virtdev};
5057 $value = print_drive($vmid, $di);
5058 print $outfd "$virtdev: $value\n";
5059 } else {
5060 print $outfd $line;
5061 }
5062 } else {
5063 print $outfd $line;
5064 }
5065 }
5066
5067 sub scan_volids {
5068 my ($cfg, $vmid) = @_;
5069
5070 my $info = PVE::Storage::vdisk_list($cfg, undef, $vmid);
5071
5072 my $volid_hash = {};
5073 foreach my $storeid (keys %$info) {
5074 foreach my $item (@{$info->{$storeid}}) {
5075 next if !($item->{volid} && $item->{size});
5076 $item->{path} = PVE::Storage::path($cfg, $item->{volid});
5077 $volid_hash->{$item->{volid}} = $item;
5078 }
5079 }
5080
5081 return $volid_hash;
5082 }
5083
5084 sub get_used_paths {
5085 my ($vmid, $storecfg, $conf, $scan_snapshots, $skip_drive) = @_;
5086
5087 my $used_path = {};
5088
5089 my $scan_config = sub {
5090 my ($cref, $snapname) = @_;
5091
5092 foreach my $key (keys %$cref) {
5093 my $value = $cref->{$key};
5094 if (valid_drivename($key)) {
5095 next if $skip_drive && $key eq $skip_drive;
5096 my $drive = parse_drive($key, $value);
5097 next if !$drive || !$drive->{file} || drive_is_cdrom($drive);
5098 if ($drive->{file} =~ m!^/!) {
5099 $used_path->{$drive->{file}}++; # = 1;
5100 } else {
5101 my ($storeid, $volname) = PVE::Storage::parse_volume_id($drive->{file}, 1);
5102 next if !$storeid;
5103 my $scfg = PVE::Storage::storage_config($storecfg, $storeid, 1);
5104 next if !$scfg;
5105 my $path = PVE::Storage::path($storecfg, $drive->{file}, $snapname);
5106 $used_path->{$path}++; # = 1;
5107 }
5108 }
5109 }
5110 };
5111
5112 &$scan_config($conf);
5113
5114 undef $skip_drive;
5115
5116 if ($scan_snapshots) {
5117 foreach my $snapname (keys %{$conf->{snapshots}}) {
5118 &$scan_config($conf->{snapshots}->{$snapname}, $snapname);
5119 }
5120 }
5121
5122 return $used_path;
5123 }
5124
5125 sub update_disksize {
5126 my ($vmid, $conf, $volid_hash) = @_;
5127
5128 my $changes;
5129
5130 my $used = {};
5131
5132 # Note: it is allowed to define multiple storages with same path (alias), so
5133 # we need to check both 'volid' and real 'path' (two different volid can point
5134 # to the same path).
5135
5136 my $usedpath = {};
5137
5138 # update size info
5139 foreach my $opt (keys %$conf) {
5140 if (valid_drivename($opt)) {
5141 my $drive = parse_drive($opt, $conf->{$opt});
5142 my $volid = $drive->{file};
5143 next if !$volid;
5144
5145 $used->{$volid} = 1;
5146 if ($volid_hash->{$volid} &&
5147 (my $path = $volid_hash->{$volid}->{path})) {
5148 $usedpath->{$path} = 1;
5149 }
5150
5151 next if drive_is_cdrom($drive);
5152 next if !$volid_hash->{$volid};
5153
5154 $drive->{size} = $volid_hash->{$volid}->{size};
5155 my $new = print_drive($vmid, $drive);
5156 if ($new ne $conf->{$opt}) {
5157 $changes = 1;
5158 $conf->{$opt} = $new;
5159 }
5160 }
5161 }
5162
5163 # remove 'unusedX' entry if volume is used
5164 foreach my $opt (keys %$conf) {
5165 next if $opt !~ m/^unused\d+$/;
5166 my $volid = $conf->{$opt};
5167 my $path = $volid_hash->{$volid}->{path} if $volid_hash->{$volid};
5168 if ($used->{$volid} || ($path && $usedpath->{$path})) {
5169 $changes = 1;
5170 delete $conf->{$opt};
5171 }
5172 }
5173
5174 foreach my $volid (sort keys %$volid_hash) {
5175 next if $volid =~ m/vm-$vmid-state-/;
5176 next if $used->{$volid};
5177 my $path = $volid_hash->{$volid}->{path};
5178 next if !$path; # just to be sure
5179 next if $usedpath->{$path};
5180 $changes = 1;
5181 add_unused_volume($conf, $volid);
5182 $usedpath->{$path} = 1; # avoid to add more than once (aliases)
5183 }
5184
5185 return $changes;
5186 }
5187
5188 sub rescan {
5189 my ($vmid, $nolock) = @_;
5190
5191 my $cfg = PVE::Cluster::cfs_read_file("storage.cfg");
5192
5193 my $volid_hash = scan_volids($cfg, $vmid);
5194
5195 my $updatefn = sub {
5196 my ($vmid) = @_;
5197
5198 my $conf = load_config($vmid);
5199
5200 check_lock($conf);
5201
5202 my $vm_volids = {};
5203 foreach my $volid (keys %$volid_hash) {
5204 my $info = $volid_hash->{$volid};
5205 $vm_volids->{$volid} = $info if $info->{vmid} && $info->{vmid} == $vmid;
5206 }
5207
5208 my $changes = update_disksize($vmid, $conf, $vm_volids);
5209
5210 update_config_nolock($vmid, $conf, 1) if $changes;
5211 };
5212
5213 if (defined($vmid)) {
5214 if ($nolock) {
5215 &$updatefn($vmid);
5216 } else {
5217 lock_config($vmid, $updatefn, $vmid);
5218 }
5219 } else {
5220 my $vmlist = config_list();
5221 foreach my $vmid (keys %$vmlist) {
5222 if ($nolock) {
5223 &$updatefn($vmid);
5224 } else {
5225 lock_config($vmid, $updatefn, $vmid);
5226 }
5227 }
5228 }
5229 }
5230
5231 sub restore_vma_archive {
5232 my ($archive, $vmid, $user, $opts, $comp) = @_;
5233
5234 my $input = $archive eq '-' ? "<&STDIN" : undef;
5235 my $readfrom = $archive;
5236
5237 my $uncomp = '';
5238 if ($comp) {
5239 $readfrom = '-';
5240 my $qarchive = PVE::Tools::shellquote($archive);
5241 if ($comp eq 'gzip') {
5242 $uncomp = "zcat $qarchive|";
5243 } elsif ($comp eq 'lzop') {
5244 $uncomp = "lzop -d -c $qarchive|";
5245 } else {
5246 die "unknown compression method '$comp'\n";
5247 }
5248
5249 }
5250
5251 my $tmpdir = "/var/tmp/vzdumptmp$$";
5252 rmtree $tmpdir;
5253
5254 # disable interrupts (always do cleanups)
5255 local $SIG{INT} = $SIG{TERM} = $SIG{QUIT} = $SIG{HUP} = sub {
5256 warn "got interrupt - ignored\n";
5257 };
5258
5259 my $mapfifo = "/var/tmp/vzdumptmp$$.fifo";
5260 POSIX::mkfifo($mapfifo, 0600);
5261 my $fifofh;
5262
5263 my $openfifo = sub {
5264 open($fifofh, '>', $mapfifo) || die $!;
5265 };
5266
5267 my $cmd = "${uncomp}vma extract -v -r $mapfifo $readfrom $tmpdir";
5268
5269 my $oldtimeout;
5270 my $timeout = 5;
5271
5272 my $devinfo = {};
5273
5274 my $rpcenv = PVE::RPCEnvironment::get();
5275
5276 my $conffile = config_file($vmid);
5277 my $tmpfn = "$conffile.$$.tmp";
5278
5279 # Note: $oldconf is undef if VM does not exists
5280 my $oldconf = PVE::Cluster::cfs_read_file(cfs_config_path($vmid));
5281
5282 my $print_devmap = sub {
5283 my $virtdev_hash = {};
5284
5285 my $cfgfn = "$tmpdir/qemu-server.conf";
5286
5287 # we can read the config - that is already extracted
5288 my $fh = IO::File->new($cfgfn, "r") ||
5289 "unable to read qemu-server.conf - $!\n";
5290
5291 while (defined(my $line = <$fh>)) {
5292 if ($line =~ m/^\#qmdump\#map:(\S+):(\S+):(\S*):(\S*):$/) {
5293 my ($virtdev, $devname, $storeid, $format) = ($1, $2, $3, $4);
5294 die "archive does not contain data for drive '$virtdev'\n"
5295 if !$devinfo->{$devname};
5296 if (defined($opts->{storage})) {
5297 $storeid = $opts->{storage} || 'local';
5298 } elsif (!$storeid) {
5299 $storeid = 'local';
5300 }
5301 $format = 'raw' if !$format;
5302 $devinfo->{$devname}->{devname} = $devname;
5303 $devinfo->{$devname}->{virtdev} = $virtdev;
5304 $devinfo->{$devname}->{format} = $format;
5305 $devinfo->{$devname}->{storeid} = $storeid;
5306
5307 # check permission on storage
5308 my $pool = $opts->{pool}; # todo: do we need that?
5309 if ($user ne 'root@pam') {
5310 $rpcenv->check($user, "/storage/$storeid", ['Datastore.AllocateSpace']);
5311 }
5312
5313 $virtdev_hash->{$virtdev} = $devinfo->{$devname};
5314 }
5315 }
5316
5317 foreach my $devname (keys %$devinfo) {
5318 die "found no device mapping information for device '$devname'\n"
5319 if !$devinfo->{$devname}->{virtdev};
5320 }
5321
5322 my $cfg = cfs_read_file('storage.cfg');
5323
5324 # create empty/temp config
5325 if ($oldconf) {
5326 PVE::Tools::file_set_contents($conffile, "memory: 128\n");
5327 foreach_drive($oldconf, sub {
5328 my ($ds, $drive) = @_;
5329
5330 return if drive_is_cdrom($drive);
5331
5332 my $volid = $drive->{file};
5333
5334 return if !$volid || $volid =~ m|^/|;
5335
5336 my ($path, $owner) = PVE::Storage::path($cfg, $volid);
5337 return if !$path || !$owner || ($owner != $vmid);
5338
5339 # Note: only delete disk we want to restore
5340 # other volumes will become unused
5341 if ($virtdev_hash->{$ds}) {
5342 PVE::Storage::vdisk_free($cfg, $volid);
5343 }
5344 });
5345 }
5346
5347 my $map = {};
5348 foreach my $virtdev (sort keys %$virtdev_hash) {
5349 my $d = $virtdev_hash->{$virtdev};
5350 my $alloc_size = int(($d->{size} + 1024 - 1)/1024);
5351 my $scfg = PVE::Storage::storage_config($cfg, $d->{storeid});
5352
5353 # test if requested format is supported
5354 my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($cfg, $d->{storeid});
5355 my $supported = grep { $_ eq $d->{format} } @$validFormats;
5356 $d->{format} = $defFormat if !$supported;
5357
5358 my $volid = PVE::Storage::vdisk_alloc($cfg, $d->{storeid}, $vmid,
5359 $d->{format}, undef, $alloc_size);
5360 print STDERR "new volume ID is '$volid'\n";
5361 $d->{volid} = $volid;
5362 my $path = PVE::Storage::path($cfg, $volid);
5363
5364 my $write_zeros = 1;
5365 # fixme: what other storages types initialize volumes with zero?
5366 if ($scfg->{type} eq 'dir' || $scfg->{type} eq 'nfs' || $scfg->{type} eq 'glusterfs' ||
5367 $scfg->{type} eq 'sheepdog' || $scfg->{type} eq 'rbd') {
5368 $write_zeros = 0;
5369 }
5370
5371 print $fifofh "${write_zeros}:$d->{devname}=$path\n";
5372
5373 print "map '$d->{devname}' to '$path' (write zeros = ${write_zeros})\n";
5374 $map->{$virtdev} = $volid;
5375 }
5376
5377 $fh->seek(0, 0) || die "seek failed - $!\n";
5378
5379 my $outfd = new IO::File ($tmpfn, "w") ||
5380 die "unable to write config for VM $vmid\n";
5381
5382 my $cookie = { netcount => 0 };
5383 while (defined(my $line = <$fh>)) {
5384 restore_update_config_line($outfd, $cookie, $vmid, $map, $line, $opts->{unique});
5385 }
5386
5387 $fh->close();
5388 $outfd->close();
5389 };
5390
5391 eval {
5392 # enable interrupts
5393 local $SIG{INT} = $SIG{TERM} = $SIG{QUIT} = $SIG{HUP} = $SIG{PIPE} = sub {
5394 die "interrupted by signal\n";
5395 };
5396 local $SIG{ALRM} = sub { die "got timeout\n"; };
5397
5398 $oldtimeout = alarm($timeout);
5399
5400 my $parser = sub {
5401 my $line = shift;
5402
5403 print "$line\n";
5404
5405 if ($line =~ m/^DEV:\sdev_id=(\d+)\ssize:\s(\d+)\sdevname:\s(\S+)$/) {
5406 my ($dev_id, $size, $devname) = ($1, $2, $3);
5407 $devinfo->{$devname} = { size => $size, dev_id => $dev_id };
5408 } elsif ($line =~ m/^CTIME: /) {
5409 # we correctly received the vma config, so we can disable
5410 # the timeout now for disk allocation (set to 10 minutes, so
5411 # that we always timeout if something goes wrong)
5412 alarm(600);
5413 &$print_devmap();
5414 print $fifofh "done\n";
5415 my $tmp = $oldtimeout || 0;
5416 $oldtimeout = undef;
5417 alarm($tmp);
5418 close($fifofh);
5419 }
5420 };
5421
5422 print "restore vma archive: $cmd\n";
5423 run_command($cmd, input => $input, outfunc => $parser, afterfork => $openfifo);
5424 };
5425 my $err = $@;
5426
5427 alarm($oldtimeout) if $oldtimeout;
5428
5429 unlink $mapfifo;
5430
5431 if ($err) {
5432 rmtree $tmpdir;
5433 unlink $tmpfn;
5434
5435 my $cfg = cfs_read_file('storage.cfg');
5436 foreach my $devname (keys %$devinfo) {
5437 my $volid = $devinfo->{$devname}->{volid};
5438 next if !$volid;
5439 eval {
5440 if ($volid =~ m|^/|) {
5441 unlink $volid || die 'unlink failed\n';
5442 } else {
5443 PVE::Storage::vdisk_free($cfg, $volid);
5444 }
5445 print STDERR "temporary volume '$volid' sucessfuly removed\n";
5446 };
5447 print STDERR "unable to cleanup '$volid' - $@" if $@;
5448 }
5449 die $err;
5450 }
5451
5452 rmtree $tmpdir;
5453
5454 rename($tmpfn, $conffile) ||
5455 die "unable to commit configuration file '$conffile'\n";
5456
5457 PVE::Cluster::cfs_update(); # make sure we read new file
5458
5459 eval { rescan($vmid, 1); };
5460 warn $@ if $@;
5461 }
5462
5463 sub restore_tar_archive {
5464 my ($archive, $vmid, $user, $opts) = @_;
5465
5466 if ($archive ne '-') {
5467 my $firstfile = tar_archive_read_firstfile($archive);
5468 die "ERROR: file '$archive' dos not lock like a QemuServer vzdump backup\n"
5469 if $firstfile ne 'qemu-server.conf';
5470 }
5471
5472 my $storecfg = cfs_read_file('storage.cfg');
5473
5474 # destroy existing data - keep empty config
5475 my $vmcfgfn = config_file($vmid);
5476 destroy_vm($storecfg, $vmid, 1) if -f $vmcfgfn;
5477
5478 my $tocmd = "/usr/lib/qemu-server/qmextract";
5479
5480 $tocmd .= " --storage " . PVE::Tools::shellquote($opts->{storage}) if $opts->{storage};
5481 $tocmd .= " --pool " . PVE::Tools::shellquote($opts->{pool}) if $opts->{pool};
5482 $tocmd .= ' --prealloc' if $opts->{prealloc};
5483 $tocmd .= ' --info' if $opts->{info};
5484
5485 # tar option "xf" does not autodetect compression when read from STDIN,
5486 # so we pipe to zcat
5487 my $cmd = "zcat -f|tar xf " . PVE::Tools::shellquote($archive) . " " .
5488 PVE::Tools::shellquote("--to-command=$tocmd");
5489
5490 my $tmpdir = "/var/tmp/vzdumptmp$$";
5491 mkpath $tmpdir;
5492
5493 local $ENV{VZDUMP_TMPDIR} = $tmpdir;
5494 local $ENV{VZDUMP_VMID} = $vmid;
5495 local $ENV{VZDUMP_USER} = $user;
5496
5497 my $conffile = config_file($vmid);
5498 my $tmpfn = "$conffile.$$.tmp";
5499
5500 # disable interrupts (always do cleanups)
5501 local $SIG{INT} = $SIG{TERM} = $SIG{QUIT} = $SIG{HUP} = sub {
5502 print STDERR "got interrupt - ignored\n";
5503 };
5504
5505 eval {
5506 # enable interrupts
5507 local $SIG{INT} = $SIG{TERM} = $SIG{QUIT} = $SIG{HUP} = $SIG{PIPE} = sub {
5508 die "interrupted by signal\n";
5509 };
5510
5511 if ($archive eq '-') {
5512 print "extracting archive from STDIN\n";
5513 run_command($cmd, input => "<&STDIN");
5514 } else {
5515 print "extracting archive '$archive'\n";
5516 run_command($cmd);
5517 }
5518
5519 return if $opts->{info};
5520
5521 # read new mapping
5522 my $map = {};
5523 my $statfile = "$tmpdir/qmrestore.stat";
5524 if (my $fd = IO::File->new($statfile, "r")) {
5525 while (defined (my $line = <$fd>)) {
5526 if ($line =~ m/vzdump:([^\s:]*):(\S+)$/) {
5527 $map->{$1} = $2 if $1;
5528 } else {
5529 print STDERR "unable to parse line in statfile - $line\n";
5530 }
5531 }
5532 $fd->close();
5533 }
5534
5535 my $confsrc = "$tmpdir/qemu-server.conf";
5536
5537 my $srcfd = new IO::File($confsrc, "r") ||
5538 die "unable to open file '$confsrc'\n";
5539
5540 my $outfd = new IO::File ($tmpfn, "w") ||
5541 die "unable to write config for VM $vmid\n";
5542
5543 my $cookie = { netcount => 0 };
5544 while (defined (my $line = <$srcfd>)) {
5545 restore_update_config_line($outfd, $cookie, $vmid, $map, $line, $opts->{unique});
5546 }
5547
5548 $srcfd->close();
5549 $outfd->close();
5550 };
5551 my $err = $@;
5552
5553 if ($err) {
5554
5555 unlink $tmpfn;
5556
5557 tar_restore_cleanup($storecfg, "$tmpdir/qmrestore.stat") if !$opts->{info};
5558
5559 die $err;
5560 }
5561
5562 rmtree $tmpdir;
5563
5564 rename $tmpfn, $conffile ||
5565 die "unable to commit configuration file '$conffile'\n";
5566
5567 PVE::Cluster::cfs_update(); # make sure we read new file
5568
5569 eval { rescan($vmid, 1); };
5570 warn $@ if $@;
5571 };
5572
5573
5574 # Internal snapshots
5575
5576 # NOTE: Snapshot create/delete involves several non-atomic
5577 # action, and can take a long time.
5578 # So we try to avoid locking the file and use 'lock' variable
5579 # inside the config file instead.
5580
5581 my $snapshot_copy_config = sub {
5582 my ($source, $dest) = @_;
5583
5584 foreach my $k (keys %$source) {
5585 next if $k eq 'snapshots';
5586 next if $k eq 'snapstate';
5587 next if $k eq 'snaptime';
5588 next if $k eq 'vmstate';
5589 next if $k eq 'lock';
5590 next if $k eq 'digest';
5591 next if $k eq 'description';
5592 next if $k =~ m/^unused\d+$/;
5593
5594 $dest->{$k} = $source->{$k};
5595 }
5596 };
5597
5598 my $snapshot_apply_config = sub {
5599 my ($conf, $snap) = @_;
5600
5601 # copy snapshot list
5602 my $newconf = {
5603 snapshots => $conf->{snapshots},
5604 };
5605
5606 # keep description and list of unused disks
5607 foreach my $k (keys %$conf) {
5608 next if !($k =~ m/^unused\d+$/ || $k eq 'description');
5609 $newconf->{$k} = $conf->{$k};
5610 }
5611
5612 &$snapshot_copy_config($snap, $newconf);
5613
5614 return $newconf;
5615 };
5616
5617 sub foreach_writable_storage {
5618 my ($conf, $func) = @_;
5619
5620 my $sidhash = {};
5621
5622 foreach my $ds (keys %$conf) {
5623 next if !valid_drivename($ds);
5624
5625 my $drive = parse_drive($ds, $conf->{$ds});
5626 next if !$drive;
5627 next if drive_is_cdrom($drive);
5628
5629 my $volid = $drive->{file};
5630
5631 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
5632 $sidhash->{$sid} = $sid if $sid;
5633 }
5634
5635 foreach my $sid (sort keys %$sidhash) {
5636 &$func($sid);
5637 }
5638 }
5639
5640 my $alloc_vmstate_volid = sub {
5641 my ($storecfg, $vmid, $conf, $snapname) = @_;
5642
5643 # Note: we try to be smart when selecting a $target storage
5644
5645 my $target;
5646
5647 # search shared storage first
5648 foreach_writable_storage($conf, sub {
5649 my ($sid) = @_;
5650 my $scfg = PVE::Storage::storage_config($storecfg, $sid);
5651 return if !$scfg->{shared};
5652
5653 $target = $sid if !$target || $scfg->{path}; # prefer file based storage
5654 });
5655
5656 if (!$target) {
5657 # now search local storage
5658 foreach_writable_storage($conf, sub {
5659 my ($sid) = @_;
5660 my $scfg = PVE::Storage::storage_config($storecfg, $sid);
5661 return if $scfg->{shared};
5662
5663 $target = $sid if !$target || $scfg->{path}; # prefer file based storage;
5664 });
5665 }
5666
5667 $target = 'local' if !$target;
5668
5669 my $driver_state_size = 500; # assume 32MB is enough to safe all driver state;
5670 # we abort live save after $conf->{memory}, so we need at max twice that space
5671 my $size = $conf->{memory}*2 + $driver_state_size;
5672
5673 my $name = "vm-$vmid-state-$snapname";
5674 my $scfg = PVE::Storage::storage_config($storecfg, $target);
5675 $name .= ".raw" if $scfg->{path}; # add filename extension for file base storage
5676 my $volid = PVE::Storage::vdisk_alloc($storecfg, $target, $vmid, 'raw', $name, $size*1024);
5677
5678 return $volid;
5679 };
5680
5681 my $snapshot_prepare = sub {
5682 my ($vmid, $snapname, $save_vmstate, $comment) = @_;
5683
5684 my $snap;
5685
5686 my $updatefn = sub {
5687
5688 my $conf = load_config($vmid);
5689
5690 die "you can't take a snapshot if it's a template\n"
5691 if is_template($conf);
5692
5693 check_lock($conf);
5694
5695 $conf->{lock} = 'snapshot';
5696
5697 die "snapshot name '$snapname' already used\n"
5698 if defined($conf->{snapshots}->{$snapname});
5699
5700 my $storecfg = PVE::Storage::config();
5701 die "snapshot feature is not available" if !has_feature('snapshot', $conf, $storecfg);
5702
5703 $snap = $conf->{snapshots}->{$snapname} = {};
5704
5705 if ($save_vmstate && check_running($vmid)) {
5706 $snap->{vmstate} = &$alloc_vmstate_volid($storecfg, $vmid, $conf, $snapname);
5707 }
5708
5709 &$snapshot_copy_config($conf, $snap);
5710
5711 $snap->{snapstate} = "prepare";
5712 $snap->{snaptime} = time();
5713 $snap->{description} = $comment if $comment;
5714
5715 # always overwrite machine if we save vmstate. This makes sure we
5716 # can restore it later using correct machine type
5717 $snap->{machine} = get_current_qemu_machine($vmid) if $snap->{vmstate};
5718
5719 update_config_nolock($vmid, $conf, 1);
5720 };
5721
5722 lock_config($vmid, $updatefn);
5723
5724 return $snap;
5725 };
5726
5727 my $snapshot_commit = sub {
5728 my ($vmid, $snapname) = @_;
5729
5730 my $updatefn = sub {
5731
5732 my $conf = load_config($vmid);
5733
5734 die "missing snapshot lock\n"
5735 if !($conf->{lock} && $conf->{lock} eq 'snapshot');
5736
5737 my $has_machine_config = defined($conf->{machine});
5738
5739 my $snap = $conf->{snapshots}->{$snapname};
5740
5741 die "snapshot '$snapname' does not exist\n" if !defined($snap);
5742
5743 die "wrong snapshot state\n"
5744 if !($snap->{snapstate} && $snap->{snapstate} eq "prepare");
5745
5746 delete $snap->{snapstate};
5747 delete $conf->{lock};
5748
5749 my $newconf = &$snapshot_apply_config($conf, $snap);
5750
5751 delete $newconf->{machine} if !$has_machine_config;
5752
5753 $newconf->{parent} = $snapname;
5754
5755 update_config_nolock($vmid, $newconf, 1);
5756 };
5757
5758 lock_config($vmid, $updatefn);
5759 };
5760
5761 sub snapshot_rollback {
5762 my ($vmid, $snapname) = @_;
5763
5764 my $prepare = 1;
5765
5766 my $storecfg = PVE::Storage::config();
5767
5768 my $conf = load_config($vmid);
5769
5770 my $get_snapshot_config = sub {
5771
5772 die "you can't rollback if vm is a template\n" if is_template($conf);
5773
5774 my $res = $conf->{snapshots}->{$snapname};
5775
5776 die "snapshot '$snapname' does not exist\n" if !defined($res);
5777
5778 return $res;
5779 };
5780
5781 my $snap = &$get_snapshot_config();
5782
5783 foreach_drive($snap, sub {
5784 my ($ds, $drive) = @_;
5785
5786 return if drive_is_cdrom($drive);
5787
5788 my $volid = $drive->{file};
5789
5790 PVE::Storage::volume_rollback_is_possible($storecfg, $volid, $snapname);
5791 });
5792
5793 my $updatefn = sub {
5794
5795 $conf = load_config($vmid);
5796
5797 $snap = &$get_snapshot_config();
5798
5799 die "unable to rollback to incomplete snapshot (snapstate = $snap->{snapstate})\n"
5800 if $snap->{snapstate};
5801
5802 if ($prepare) {
5803 check_lock($conf);
5804 vm_stop($storecfg, $vmid, undef, undef, 5, undef, undef);
5805 }
5806
5807 die "unable to rollback vm $vmid: vm is running\n"
5808 if check_running($vmid);
5809
5810 if ($prepare) {
5811 $conf->{lock} = 'rollback';
5812 } else {
5813 die "got wrong lock\n" if !($conf->{lock} && $conf->{lock} eq 'rollback');
5814 delete $conf->{lock};
5815 }
5816
5817 my $forcemachine;
5818
5819 if (!$prepare) {
5820 my $has_machine_config = defined($conf->{machine});
5821
5822 # copy snapshot config to current config
5823 $conf = &$snapshot_apply_config($conf, $snap);
5824 $conf->{parent} = $snapname;
5825
5826 # Note: old code did not store 'machine', so we try to be smart
5827 # and guess the snapshot was generated with kvm 1.4 (pc-i440fx-1.4).
5828 $forcemachine = $conf->{machine} || 'pc-i440fx-1.4';
5829 # we remove the 'machine' configuration if not explicitly specified
5830 # in the original config.
5831 delete $conf->{machine} if $snap->{vmstate} && !$has_machine_config;
5832 }
5833
5834 update_config_nolock($vmid, $conf, 1);
5835
5836 if (!$prepare && $snap->{vmstate}) {
5837 my $statefile = PVE::Storage::path($storecfg, $snap->{vmstate});
5838 vm_start($storecfg, $vmid, $statefile, undef, undef, undef, $forcemachine);
5839 }
5840 };
5841
5842 lock_config($vmid, $updatefn);
5843
5844 foreach_drive($snap, sub {
5845 my ($ds, $drive) = @_;
5846
5847 return if drive_is_cdrom($drive);
5848
5849 my $volid = $drive->{file};
5850 my $device = "drive-$ds";
5851
5852 PVE::Storage::volume_snapshot_rollback($storecfg, $volid, $snapname);
5853 });
5854
5855 $prepare = 0;
5856 lock_config($vmid, $updatefn);
5857 }
5858
5859 my $savevm_wait = sub {
5860 my ($vmid) = @_;
5861
5862 for(;;) {
5863 my $stat = vm_mon_cmd_nocheck($vmid, "query-savevm");
5864 if (!$stat->{status}) {
5865 die "savevm not active\n";
5866 } elsif ($stat->{status} eq 'active') {
5867 sleep(1);
5868 next;
5869 } elsif ($stat->{status} eq 'completed') {
5870 last;
5871 } else {
5872 die "query-savevm returned status '$stat->{status}'\n";
5873 }
5874 }
5875 };
5876
5877 sub do_snapshots_with_qemu {
5878 my ($storecfg, $volid) = @_;
5879
5880 my $storage_name = PVE::Storage::parse_volume_id($volid);
5881
5882 if ($qemu_snap_storage->{$storecfg->{ids}->{$storage_name}->{type}} ){
5883 return 1;
5884 }
5885
5886 if ($volid =~ m/\.(qcow2|qed)$/){
5887 return 1;
5888 }
5889
5890 return undef;
5891 }
5892
5893 sub snapshot_create {
5894 my ($vmid, $snapname, $save_vmstate, $comment) = @_;
5895
5896 my $snap = &$snapshot_prepare($vmid, $snapname, $save_vmstate, $comment);
5897
5898 $save_vmstate = 0 if !$snap->{vmstate}; # vm is not running
5899
5900 my $config = load_config($vmid);
5901
5902 my $running = check_running($vmid);
5903
5904 my $freezefs = $running && $config->{agent};
5905 $freezefs = 0 if $snap->{vmstate}; # not needed if we save RAM
5906
5907 my $drivehash = {};
5908
5909 if ($freezefs) {
5910 eval { vm_mon_cmd($vmid, "guest-fsfreeze-freeze"); };
5911 warn "guest-fsfreeze-freeze problems - $@" if $@;
5912 }
5913
5914 eval {
5915 # create internal snapshots of all drives
5916
5917 my $storecfg = PVE::Storage::config();
5918
5919 if ($running) {
5920 if ($snap->{vmstate}) {
5921 my $path = PVE::Storage::path($storecfg, $snap->{vmstate});
5922 vm_mon_cmd($vmid, "savevm-start", statefile => $path);
5923 &$savevm_wait($vmid);
5924 } else {
5925 vm_mon_cmd($vmid, "savevm-start");
5926 }
5927 };
5928
5929 foreach_drive($snap, sub {
5930 my ($ds, $drive) = @_;
5931
5932 return if drive_is_cdrom($drive);
5933
5934 my $volid = $drive->{file};
5935 my $device = "drive-$ds";
5936
5937 qemu_volume_snapshot($vmid, $device, $storecfg, $volid, $snapname);
5938 $drivehash->{$ds} = 1;
5939 });
5940 };
5941 my $err = $@;
5942
5943 if ($running) {
5944 eval { vm_mon_cmd($vmid, "savevm-end") };
5945 warn $@ if $@;
5946
5947 if ($freezefs) {
5948 eval { vm_mon_cmd($vmid, "guest-fsfreeze-thaw"); };
5949 warn "guest-fsfreeze-thaw problems - $@" if $@;
5950 }
5951
5952 # savevm-end is async, we need to wait
5953 for (;;) {
5954 my $stat = vm_mon_cmd_nocheck($vmid, "query-savevm");
5955 if (!$stat->{bytes}) {
5956 last;
5957 } else {
5958 print "savevm not yet finished\n";
5959 sleep(1);
5960 next;
5961 }
5962 }
5963 }
5964
5965 if ($err) {
5966 warn "snapshot create failed: starting cleanup\n";
5967 eval { snapshot_delete($vmid, $snapname, 0, $drivehash); };
5968 warn $@ if $@;
5969 die $err;
5970 }
5971
5972 &$snapshot_commit($vmid, $snapname);
5973 }
5974
5975 # Note: $drivehash is only set when called from snapshot_create.
5976 sub snapshot_delete {
5977 my ($vmid, $snapname, $force, $drivehash) = @_;
5978
5979 my $prepare = 1;
5980
5981 my $snap;
5982 my $unused = [];
5983
5984 my $unlink_parent = sub {
5985 my ($confref, $new_parent) = @_;
5986
5987 if ($confref->{parent} && $confref->{parent} eq $snapname) {
5988 if ($new_parent) {
5989 $confref->{parent} = $new_parent;
5990 } else {
5991 delete $confref->{parent};
5992 }
5993 }
5994 };
5995
5996 my $updatefn = sub {
5997 my ($remove_drive) = @_;
5998
5999 my $conf = load_config($vmid);
6000
6001 if (!$drivehash) {
6002 check_lock($conf);
6003 die "you can't delete a snapshot if vm is a template\n"
6004 if is_template($conf);
6005 }
6006
6007 $snap = $conf->{snapshots}->{$snapname};
6008
6009 die "snapshot '$snapname' does not exist\n" if !defined($snap);
6010
6011 # remove parent refs
6012 if (!$prepare) {
6013 &$unlink_parent($conf, $snap->{parent});
6014 foreach my $sn (keys %{$conf->{snapshots}}) {
6015 next if $sn eq $snapname;
6016 &$unlink_parent($conf->{snapshots}->{$sn}, $snap->{parent});
6017 }
6018 }
6019
6020 if ($remove_drive) {
6021 if ($remove_drive eq 'vmstate') {
6022 delete $snap->{$remove_drive};
6023 } else {
6024 my $drive = parse_drive($remove_drive, $snap->{$remove_drive});
6025 my $volid = $drive->{file};
6026 delete $snap->{$remove_drive};
6027 add_unused_volume($conf, $volid);
6028 }
6029 }
6030
6031 if ($prepare) {
6032 $snap->{snapstate} = 'delete';
6033 } else {
6034 delete $conf->{snapshots}->{$snapname};
6035 delete $conf->{lock} if $drivehash;
6036 foreach my $volid (@$unused) {
6037 add_unused_volume($conf, $volid);
6038 }
6039 }
6040
6041 update_config_nolock($vmid, $conf, 1);
6042 };
6043
6044 lock_config($vmid, $updatefn);
6045
6046 # now remove vmstate file
6047
6048 my $storecfg = PVE::Storage::config();
6049
6050 if ($snap->{vmstate}) {
6051 eval { PVE::Storage::vdisk_free($storecfg, $snap->{vmstate}); };
6052 if (my $err = $@) {
6053 die $err if !$force;
6054 warn $err;
6055 }
6056 # save changes (remove vmstate from snapshot)
6057 lock_config($vmid, $updatefn, 'vmstate') if !$force;
6058 };
6059
6060 # now remove all internal snapshots
6061 foreach_drive($snap, sub {
6062 my ($ds, $drive) = @_;
6063
6064 return if drive_is_cdrom($drive);
6065
6066 my $volid = $drive->{file};
6067 my $device = "drive-$ds";
6068
6069 if (!$drivehash || $drivehash->{$ds}) {
6070 eval { qemu_volume_snapshot_delete($vmid, $device, $storecfg, $volid, $snapname); };
6071 if (my $err = $@) {
6072 die $err if !$force;
6073 warn $err;
6074 }
6075 }
6076
6077 # save changes (remove drive fron snapshot)
6078 lock_config($vmid, $updatefn, $ds) if !$force;
6079 push @$unused, $volid;
6080 });
6081
6082 # now cleanup config
6083 $prepare = 0;
6084 lock_config($vmid, $updatefn);
6085 }
6086
6087 sub has_feature {
6088 my ($feature, $conf, $storecfg, $snapname, $running) = @_;
6089
6090 my $err;
6091 foreach_drive($conf, sub {
6092 my ($ds, $drive) = @_;
6093
6094 return if drive_is_cdrom($drive);
6095 my $volid = $drive->{file};
6096 $err = 1 if !PVE::Storage::volume_has_feature($storecfg, $feature, $volid, $snapname, $running);
6097 });
6098
6099 return $err ? 0 : 1;
6100 }
6101
6102 sub template_create {
6103 my ($vmid, $conf, $disk) = @_;
6104
6105 my $storecfg = PVE::Storage::config();
6106
6107 foreach_drive($conf, sub {
6108 my ($ds, $drive) = @_;
6109
6110 return if drive_is_cdrom($drive);
6111 return if $disk && $ds ne $disk;
6112
6113 my $volid = $drive->{file};
6114 return if !PVE::Storage::volume_has_feature($storecfg, 'template', $volid);
6115
6116 my $voliddst = PVE::Storage::vdisk_create_base($storecfg, $volid);
6117 $drive->{file} = $voliddst;
6118 $conf->{$ds} = print_drive($vmid, $drive);
6119 update_config_nolock($vmid, $conf, 1);
6120 });
6121 }
6122
6123 sub is_template {
6124 my ($conf) = @_;
6125
6126 return 1 if defined $conf->{template} && $conf->{template} == 1;
6127 }
6128
6129 sub qemu_img_convert {
6130 my ($src_volid, $dst_volid, $size, $snapname) = @_;
6131
6132 my $storecfg = PVE::Storage::config();
6133 my ($src_storeid, $src_volname) = PVE::Storage::parse_volume_id($src_volid, 1);
6134 my ($dst_storeid, $dst_volname) = PVE::Storage::parse_volume_id($dst_volid, 1);
6135
6136 if ($src_storeid && $dst_storeid) {
6137 my $src_scfg = PVE::Storage::storage_config($storecfg, $src_storeid);
6138 my $dst_scfg = PVE::Storage::storage_config($storecfg, $dst_storeid);
6139
6140 my $src_format = qemu_img_format($src_scfg, $src_volname);
6141 my $dst_format = qemu_img_format($dst_scfg, $dst_volname);
6142
6143 my $src_path = PVE::Storage::path($storecfg, $src_volid, $snapname);
6144 my $dst_path = PVE::Storage::path($storecfg, $dst_volid);
6145
6146 my $cmd = [];
6147 push @$cmd, '/usr/bin/qemu-img', 'convert', '-t', 'writeback', '-p', '-n';
6148 push @$cmd, '-s', $snapname if($snapname && $src_format eq "qcow2");
6149 push @$cmd, '-f', $src_format, '-O', $dst_format, $src_path, $dst_path;
6150
6151 my $parser = sub {
6152 my $line = shift;
6153 if($line =~ m/\((\S+)\/100\%\)/){
6154 my $percent = $1;
6155 my $transferred = int($size * $percent / 100);
6156 my $remaining = $size - $transferred;
6157
6158 print "transferred: $transferred bytes remaining: $remaining bytes total: $size bytes progression: $percent %\n";
6159 }
6160
6161 };
6162
6163 eval { run_command($cmd, timeout => undef, outfunc => $parser); };
6164 my $err = $@;
6165 die "copy failed: $err" if $err;
6166 }
6167 }
6168
6169 sub qemu_img_format {
6170 my ($scfg, $volname) = @_;
6171
6172 if ($scfg->{path} && $volname =~ m/\.(raw|cow|qcow|qcow2|qed|vmdk|cloop)$/) {
6173 return $1;
6174 } else {
6175 return "raw";
6176 }
6177 }
6178
6179 sub qemu_drive_mirror {
6180 my ($vmid, $drive, $dst_volid, $vmiddst) = @_;
6181
6182 my $storecfg = PVE::Storage::config();
6183 my ($dst_storeid, $dst_volname) = PVE::Storage::parse_volume_id($dst_volid);
6184
6185 my $dst_scfg = PVE::Storage::storage_config($storecfg, $dst_storeid);
6186
6187 my $format = qemu_img_format($dst_scfg, $dst_volname);
6188
6189 my $dst_path = PVE::Storage::path($storecfg, $dst_volid);
6190
6191 my $opts = { timeout => 10, device => "drive-$drive", mode => "existing", sync => "full", target => $dst_path };
6192 $opts->{format} = $format if $format;
6193
6194 print "drive mirror is starting (scanning bitmap) : this step can take some minutes/hours, depend of disk size and storage speed\n";
6195
6196 eval {
6197 vm_mon_cmd($vmid, "drive-mirror", %$opts);
6198 while (1) {
6199 my $stats = vm_mon_cmd($vmid, "query-block-jobs");
6200 my $stat = @$stats[0];
6201 die "mirroring job seem to have die. Maybe do you have bad sectors?" if !$stat;
6202 die "error job is not mirroring" if $stat->{type} ne "mirror";
6203
6204 my $busy = $stat->{busy};
6205 my $ready = $stat->{ready};
6206
6207 if (my $total = $stat->{len}) {
6208 my $transferred = $stat->{offset} || 0;
6209 my $remaining = $total - $transferred;
6210 my $percent = sprintf "%.2f", ($transferred * 100 / $total);
6211
6212 print "transferred: $transferred bytes remaining: $remaining bytes total: $total bytes progression: $percent % busy: $busy ready: $ready \n";
6213 }
6214
6215
6216 if ($stat->{ready} eq 'true') {
6217
6218 last if $vmiddst != $vmid;
6219
6220 # try to switch the disk if source and destination are on the same guest
6221 eval { vm_mon_cmd($vmid, "block-job-complete", device => "drive-$drive") };
6222 last if !$@;
6223 die $@ if $@ !~ m/cannot be completed/;
6224 }
6225 sleep 1;
6226 }
6227
6228
6229 };
6230 my $err = $@;
6231
6232 my $cancel_job = sub {
6233 vm_mon_cmd($vmid, "block-job-cancel", device => "drive-$drive");
6234 while (1) {
6235 my $stats = vm_mon_cmd($vmid, "query-block-jobs");
6236 my $stat = @$stats[0];
6237 last if !$stat;
6238 sleep 1;
6239 }
6240 };
6241
6242 if ($err) {
6243 eval { &$cancel_job(); };
6244 die "mirroring error: $err";
6245 }
6246
6247 if ($vmiddst != $vmid) {
6248 # if we clone a disk for a new target vm, we don't switch the disk
6249 &$cancel_job(); # so we call block-job-cancel
6250 }
6251 }
6252
6253 sub clone_disk {
6254 my ($storecfg, $vmid, $running, $drivename, $drive, $snapname,
6255 $newvmid, $storage, $format, $full, $newvollist) = @_;
6256
6257 my $newvolid;
6258
6259 if (!$full) {
6260 print "create linked clone of drive $drivename ($drive->{file})\n";
6261 $newvolid = PVE::Storage::vdisk_clone($storecfg, $drive->{file}, $newvmid, $snapname);
6262 push @$newvollist, $newvolid;
6263 } else {
6264 my ($storeid, $volname) = PVE::Storage::parse_volume_id($drive->{file});
6265 $storeid = $storage if $storage;
6266
6267 my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid);
6268 if (!$format) {
6269 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6270 $format = qemu_img_format($scfg, $volname);
6271 }
6272
6273 # test if requested format is supported - else use default
6274 my $supported = grep { $_ eq $format } @$validFormats;
6275 $format = $defFormat if !$supported;
6276
6277 my ($size) = PVE::Storage::volume_size_info($storecfg, $drive->{file}, 3);
6278
6279 print "create full clone of drive $drivename ($drive->{file})\n";
6280 $newvolid = PVE::Storage::vdisk_alloc($storecfg, $storeid, $newvmid, $format, undef, ($size/1024));
6281 push @$newvollist, $newvolid;
6282
6283 if (!$running || $snapname) {
6284 qemu_img_convert($drive->{file}, $newvolid, $size, $snapname);
6285 } else {
6286 qemu_drive_mirror($vmid, $drivename, $newvolid, $newvmid);
6287 }
6288 }
6289
6290 my ($size) = PVE::Storage::volume_size_info($storecfg, $newvolid, 3);
6291
6292 my $disk = $drive;
6293 $disk->{format} = undef;
6294 $disk->{file} = $newvolid;
6295 $disk->{size} = $size;
6296
6297 return $disk;
6298 }
6299
6300 # this only works if VM is running
6301 sub get_current_qemu_machine {
6302 my ($vmid) = @_;
6303
6304 my $cmd = { execute => 'query-machines', arguments => {} };
6305 my $res = vm_qmp_command($vmid, $cmd);
6306
6307 my ($current, $default);
6308 foreach my $e (@$res) {
6309 $default = $e->{name} if $e->{'is-default'};
6310 $current = $e->{name} if $e->{'is-current'};
6311 }
6312
6313 # fallback to the default machine if current is not supported by qemu
6314 return $current || $default || 'pc';
6315 }
6316
6317 sub qemu_machine_feature_enabled {
6318 my ($machine, $kvmver, $version_major, $version_minor) = @_;
6319
6320 my $current_major;
6321 my $current_minor;
6322
6323 if ($machine && $machine =~ m/^(pc(-i440fx|-q35)?-(\d+)\.(\d+))/) {
6324
6325 $current_major = $3;
6326 $current_minor = $4;
6327
6328 } elsif ($kvmver =~ m/^(\d+)\.(\d+)/) {
6329
6330 $current_major = $1;
6331 $current_minor = $2;
6332 }
6333
6334 return 1 if $current_major >= $version_major && $current_minor >= $version_minor;
6335
6336
6337 }
6338
6339 sub lspci {
6340
6341 my $devices = {};
6342
6343 dir_glob_foreach("$pcisysfs/devices", '[a-f0-9]{4}:([a-f0-9]{2}:[a-f0-9]{2})\.([0-9])', sub {
6344 my (undef, $id, $function) = @_;
6345 my $res = { id => $id, function => $function};
6346 push @{$devices->{$id}}, $res;
6347 });
6348
6349 return $devices;
6350 }
6351
6352 sub vm_iothreads_list {
6353 my ($vmid) = @_;
6354
6355 my $res = vm_mon_cmd($vmid, 'query-iothreads');
6356
6357 my $iothreads = {};
6358 foreach my $iothread (@$res) {
6359 $iothreads->{ $iothread->{id} } = $iothread->{"thread-id"};
6360 }
6361
6362 return $iothreads;
6363 }
6364
6365 sub scsihw_infos {
6366 my ($conf, $drive) = @_;
6367
6368 my $maxdev = 0;
6369
6370 if ($conf->{scsihw} && ($conf->{scsihw} =~ m/^lsi/)) {
6371 $maxdev = 7;
6372 } elsif ($conf->{scsihw} && ($conf->{scsihw} eq 'virtio-scsi-single')) {
6373 $maxdev = 1;
6374 } else {
6375 $maxdev = 256;
6376 }
6377
6378 my $controller = int($drive->{index} / $maxdev);
6379 my $controller_prefix = ($conf->{scsihw} && $conf->{scsihw} eq 'virtio-scsi-single') ? "virtioscsi" : "scsihw";
6380
6381 return ($maxdev, $controller, $controller_prefix);
6382 }
6383
6384 1;