]> git.proxmox.com Git - qemu-server.git/blob - PVE/QemuServer.pm
vm_deviceunplug: raise expection if something fail
[qemu-server.git] / PVE / QemuServer.pm
1 package PVE::QemuServer;
2
3 use strict;
4 use warnings;
5 use POSIX;
6 use IO::Handle;
7 use IO::Select;
8 use IO::File;
9 use IO::Dir;
10 use IO::Socket::UNIX;
11 use File::Basename;
12 use File::Path;
13 use File::stat;
14 use Getopt::Long;
15 use Digest::SHA;
16 use Fcntl ':flock';
17 use Cwd 'abs_path';
18 use IPC::Open3;
19 use JSON;
20 use Fcntl;
21 use PVE::SafeSyslog;
22 use Storable qw(dclone);
23 use PVE::Exception qw(raise raise_param_exc);
24 use PVE::Storage;
25 use PVE::Tools qw(run_command lock_file lock_file_full file_read_firstline dir_glob_foreach);
26 use PVE::JSONSchema qw(get_standard_option);
27 use PVE::Cluster qw(cfs_register_file cfs_read_file cfs_write_file cfs_lock_file);
28 use PVE::INotify;
29 use PVE::ProcFSTools;
30 use PVE::QMPClient;
31 use PVE::RPCEnvironment;
32 use Time::HiRes qw(gettimeofday);
33
34 my $cpuinfo = PVE::ProcFSTools::read_cpuinfo();
35
36 # Note about locking: we use flock on the config file protect
37 # against concurent actions.
38 # Aditionaly, we have a 'lock' setting in the config file. This
39 # can be set to 'migrate', 'backup', 'snapshot' or 'rollback'. Most actions are not
40 # allowed when such lock is set. But you can ignore this kind of
41 # lock with the --skiplock flag.
42
43 cfs_register_file('/qemu-server/',
44 \&parse_vm_config,
45 \&write_vm_config);
46
47 PVE::JSONSchema::register_standard_option('skiplock', {
48 description => "Ignore locks - only root is allowed to use this option.",
49 type => 'boolean',
50 optional => 1,
51 });
52
53 PVE::JSONSchema::register_standard_option('pve-qm-stateuri', {
54 description => "Some command save/restore state from this location.",
55 type => 'string',
56 maxLength => 128,
57 optional => 1,
58 });
59
60 PVE::JSONSchema::register_standard_option('pve-snapshot-name', {
61 description => "The name of the snapshot.",
62 type => 'string', format => 'pve-configid',
63 maxLength => 40,
64 });
65
66 #no warnings 'redefine';
67
68 unless(defined(&_VZSYSCALLS_H_)) {
69 eval 'sub _VZSYSCALLS_H_ () {1;}' unless defined(&_VZSYSCALLS_H_);
70 require 'sys/syscall.ph';
71 if(defined(&__x86_64__)) {
72 eval 'sub __NR_fairsched_vcpus () {499;}' unless defined(&__NR_fairsched_vcpus);
73 eval 'sub __NR_fairsched_mknod () {504;}' unless defined(&__NR_fairsched_mknod);
74 eval 'sub __NR_fairsched_rmnod () {505;}' unless defined(&__NR_fairsched_rmnod);
75 eval 'sub __NR_fairsched_chwt () {506;}' unless defined(&__NR_fairsched_chwt);
76 eval 'sub __NR_fairsched_mvpr () {507;}' unless defined(&__NR_fairsched_mvpr);
77 eval 'sub __NR_fairsched_rate () {508;}' unless defined(&__NR_fairsched_rate);
78 eval 'sub __NR_setluid () {501;}' unless defined(&__NR_setluid);
79 eval 'sub __NR_setublimit () {502;}' unless defined(&__NR_setublimit);
80 }
81 elsif(defined( &__i386__) ) {
82 eval 'sub __NR_fairsched_mknod () {500;}' unless defined(&__NR_fairsched_mknod);
83 eval 'sub __NR_fairsched_rmnod () {501;}' unless defined(&__NR_fairsched_rmnod);
84 eval 'sub __NR_fairsched_chwt () {502;}' unless defined(&__NR_fairsched_chwt);
85 eval 'sub __NR_fairsched_mvpr () {503;}' unless defined(&__NR_fairsched_mvpr);
86 eval 'sub __NR_fairsched_rate () {504;}' unless defined(&__NR_fairsched_rate);
87 eval 'sub __NR_fairsched_vcpus () {505;}' unless defined(&__NR_fairsched_vcpus);
88 eval 'sub __NR_setluid () {511;}' unless defined(&__NR_setluid);
89 eval 'sub __NR_setublimit () {512;}' unless defined(&__NR_setublimit);
90 } else {
91 die("no fairsched syscall for this arch");
92 }
93 require 'asm/ioctl.ph';
94 eval 'sub KVM_GET_API_VERSION () { &_IO(0xAE, 0x);}' unless defined(&KVM_GET_API_VERSION);
95 }
96
97 sub fairsched_mknod {
98 my ($parent, $weight, $desired) = @_;
99
100 return syscall(&__NR_fairsched_mknod, int($parent), int($weight), int($desired));
101 }
102
103 sub fairsched_rmnod {
104 my ($id) = @_;
105
106 return syscall(&__NR_fairsched_rmnod, int($id));
107 }
108
109 sub fairsched_mvpr {
110 my ($pid, $newid) = @_;
111
112 return syscall(&__NR_fairsched_mvpr, int($pid), int($newid));
113 }
114
115 sub fairsched_vcpus {
116 my ($id, $vcpus) = @_;
117
118 return syscall(&__NR_fairsched_vcpus, int($id), int($vcpus));
119 }
120
121 sub fairsched_rate {
122 my ($id, $op, $rate) = @_;
123
124 return syscall(&__NR_fairsched_rate, int($id), int($op), int($rate));
125 }
126
127 use constant FAIRSCHED_SET_RATE => 0;
128 use constant FAIRSCHED_DROP_RATE => 1;
129 use constant FAIRSCHED_GET_RATE => 2;
130
131 sub fairsched_cpulimit {
132 my ($id, $limit) = @_;
133
134 my $cpulim1024 = int($limit * 1024 / 100);
135 my $op = $cpulim1024 ? FAIRSCHED_SET_RATE : FAIRSCHED_DROP_RATE;
136
137 return fairsched_rate($id, $op, $cpulim1024);
138 }
139
140 my $nodename = PVE::INotify::nodename();
141
142 mkdir "/etc/pve/nodes/$nodename";
143 my $confdir = "/etc/pve/nodes/$nodename/qemu-server";
144 mkdir $confdir;
145
146 my $var_run_tmpdir = "/var/run/qemu-server";
147 mkdir $var_run_tmpdir;
148
149 my $lock_dir = "/var/lock/qemu-server";
150 mkdir $lock_dir;
151
152 my $pcisysfs = "/sys/bus/pci";
153
154 my $confdesc = {
155 iothread => {
156 optional => 1,
157 type => 'boolean',
158 description => "Enable iothread dataplane.",
159 default => 0,
160 },
161 onboot => {
162 optional => 1,
163 type => 'boolean',
164 description => "Specifies whether a VM will be started during system bootup.",
165 default => 0,
166 },
167 autostart => {
168 optional => 1,
169 type => 'boolean',
170 description => "Automatic restart after crash (currently ignored).",
171 default => 0,
172 },
173 hotplug => {
174 optional => 1,
175 type => 'boolean',
176 description => "Allow hotplug for disk and network device",
177 default => 0,
178 },
179 reboot => {
180 optional => 1,
181 type => 'boolean',
182 description => "Allow reboot. If set to '0' the VM exit on reboot.",
183 default => 1,
184 },
185 lock => {
186 optional => 1,
187 type => 'string',
188 description => "Lock/unlock the VM.",
189 enum => [qw(migrate backup snapshot rollback)],
190 },
191 cpulimit => {
192 optional => 1,
193 type => 'integer',
194 description => "Limit of CPU usage in per cent. Note if the computer has 2 CPUs, it has total of 200% CPU time. Value '0' indicates no CPU limit.\n\nNOTE: This option is currently ignored.",
195 minimum => 0,
196 default => 0,
197 },
198 cpuunits => {
199 optional => 1,
200 type => 'integer',
201 description => "CPU weight for a VM. Argument is used in the kernel fair scheduler. The larger the number is, the more CPU time this VM gets. Number is relative to weights of all the other running VMs.\n\nNOTE: You can disable fair-scheduler configuration by setting this to 0.",
202 minimum => 0,
203 maximum => 500000,
204 default => 1000,
205 },
206 memory => {
207 optional => 1,
208 type => 'integer',
209 description => "Amount of RAM for the VM in MB. This is the maximum available memory when you use the balloon device.",
210 minimum => 16,
211 default => 512,
212 },
213 balloon => {
214 optional => 1,
215 type => 'integer',
216 description => "Amount of target RAM for the VM in MB. Using zero disables the ballon driver.",
217 minimum => 0,
218 },
219 shares => {
220 optional => 1,
221 type => 'integer',
222 description => "Amount of memory shares for auto-ballooning. The larger the number is, the more memory this VM gets. Number is relative to weights of all other running VMs. Using zero disables auto-ballooning",
223 minimum => 0,
224 maximum => 50000,
225 default => 1000,
226 },
227 keyboard => {
228 optional => 1,
229 type => 'string',
230 description => "Keybord layout for vnc server. Default is read from the datacenter configuration file.",
231 enum => PVE::Tools::kvmkeymaplist(),
232 default => 'en-us',
233 },
234 name => {
235 optional => 1,
236 type => 'string', format => 'dns-name',
237 description => "Set a name for the VM. Only used on the configuration web interface.",
238 },
239 scsihw => {
240 optional => 1,
241 type => 'string',
242 description => "scsi controller model",
243 enum => [qw(lsi lsi53c810 virtio-scsi-pci megasas pvscsi)],
244 default => 'lsi',
245 },
246 description => {
247 optional => 1,
248 type => 'string',
249 description => "Description for the VM. Only used on the configuration web interface. This is saved as comment inside the configuration file.",
250 },
251 ostype => {
252 optional => 1,
253 type => 'string',
254 enum => [qw(other wxp w2k w2k3 w2k8 wvista win7 win8 l24 l26 solaris)],
255 description => <<EODESC,
256 Used to enable special optimization/features for specific
257 operating systems:
258
259 other => unspecified OS
260 wxp => Microsoft Windows XP
261 w2k => Microsoft Windows 2000
262 w2k3 => Microsoft Windows 2003
263 w2k8 => Microsoft Windows 2008
264 wvista => Microsoft Windows Vista
265 win7 => Microsoft Windows 7
266 win8 => Microsoft Windows 8/2012
267 l24 => Linux 2.4 Kernel
268 l26 => Linux 2.6/3.X Kernel
269 solaris => solaris/opensolaris/openindiania kernel
270
271 other|l24|l26|solaris ... no special behaviour
272 wxp|w2k|w2k3|w2k8|wvista|win7|win8 ... use --localtime switch
273 EODESC
274 },
275 boot => {
276 optional => 1,
277 type => 'string',
278 description => "Boot on floppy (a), hard disk (c), CD-ROM (d), or network (n).",
279 pattern => '[acdn]{1,4}',
280 default => 'cdn',
281 },
282 bootdisk => {
283 optional => 1,
284 type => 'string', format => 'pve-qm-bootdisk',
285 description => "Enable booting from specified disk.",
286 pattern => '(ide|sata|scsi|virtio)\d+',
287 },
288 smp => {
289 optional => 1,
290 type => 'integer',
291 description => "The number of CPUs. Please use option -sockets instead.",
292 minimum => 1,
293 default => 1,
294 },
295 sockets => {
296 optional => 1,
297 type => 'integer',
298 description => "The number of CPU sockets.",
299 minimum => 1,
300 default => 1,
301 },
302 cores => {
303 optional => 1,
304 type => 'integer',
305 description => "The number of cores per socket.",
306 minimum => 1,
307 default => 1,
308 },
309 numa => {
310 optional => 1,
311 type => 'boolean',
312 description => "Enable/disable Numa.",
313 default => 0,
314 },
315 maxcpus => {
316 optional => 1,
317 type => 'integer',
318 description => "Maximum cpus for hotplug.",
319 minimum => 1,
320 default => 1,
321 },
322 acpi => {
323 optional => 1,
324 type => 'boolean',
325 description => "Enable/disable ACPI.",
326 default => 1,
327 },
328 agent => {
329 optional => 1,
330 type => 'boolean',
331 description => "Enable/disable Qemu GuestAgent.",
332 default => 0,
333 },
334 kvm => {
335 optional => 1,
336 type => 'boolean',
337 description => "Enable/disable KVM hardware virtualization.",
338 default => 1,
339 },
340 tdf => {
341 optional => 1,
342 type => 'boolean',
343 description => "Enable/disable time drift fix.",
344 default => 0,
345 },
346 localtime => {
347 optional => 1,
348 type => 'boolean',
349 description => "Set the real time clock to local time. This is enabled by default if ostype indicates a Microsoft OS.",
350 },
351 freeze => {
352 optional => 1,
353 type => 'boolean',
354 description => "Freeze CPU at startup (use 'c' monitor command to start execution).",
355 },
356 vga => {
357 optional => 1,
358 type => 'string',
359 description => "Select VGA type. If you want to use high resolution modes (>= 1280x1024x16) then you should use option 'std' or 'vmware'. Default is 'std' for win8/win7/w2k8, and 'cirrur' for other OS types. Option 'qxl' enables the SPICE display sever. You can also run without any graphic card using a serial devive as terminal.",
360 enum => [qw(std cirrus vmware qxl serial0 serial1 serial2 serial3 qxl2 qxl3 qxl4)],
361 },
362 watchdog => {
363 optional => 1,
364 type => 'string', format => 'pve-qm-watchdog',
365 typetext => '[[model=]i6300esb|ib700] [,[action=]reset|shutdown|poweroff|pause|debug|none]',
366 description => "Create a virtual hardware watchdog device. Once enabled (by a guest action), the watchdog must be periodically polled by an agent inside the guest or else the guest will be restarted (or execute the action specified)",
367 },
368 startdate => {
369 optional => 1,
370 type => 'string',
371 typetext => "(now | YYYY-MM-DD | YYYY-MM-DDTHH:MM:SS)",
372 description => "Set the initial date of the real time clock. Valid format for date are: 'now' or '2006-06-17T16:01:21' or '2006-06-17'.",
373 pattern => '(now|\d{4}-\d{1,2}-\d{1,2}(T\d{1,2}:\d{1,2}:\d{1,2})?)',
374 default => 'now',
375 },
376 startup => {
377 optional => 1,
378 type => 'string', format => 'pve-qm-startup',
379 typetext => '[[order=]\d+] [,up=\d+] [,down=\d+] ',
380 description => "Startup and shutdown behavior. Order is a non-negative number defining the general startup order. Shutdown in done with reverse ordering. Additionally you can set the 'up' or 'down' delay in seconds, which specifies a delay to wait before the next VM is started or stopped.",
381 },
382 template => {
383 optional => 1,
384 type => 'boolean',
385 description => "Enable/disable Template.",
386 default => 0,
387 },
388 args => {
389 optional => 1,
390 type => 'string',
391 description => <<EODESCR,
392 Note: this option is for experts only. It allows you to pass arbitrary arguments to kvm, for example:
393
394 args: -no-reboot -no-hpet
395 EODESCR
396 },
397 tablet => {
398 optional => 1,
399 type => 'boolean',
400 default => 1,
401 description => "Enable/disable the usb tablet device. This device is usually needed to allow absolute mouse positioning with VNC. Else the mouse runs out of sync with normal VNC clients. If you're running lots of console-only guests on one host, you may consider disabling this to save some context switches. This is turned of by default if you use spice (vga=qxl).",
402 },
403 migrate_speed => {
404 optional => 1,
405 type => 'integer',
406 description => "Set maximum speed (in MB/s) for migrations. Value 0 is no limit.",
407 minimum => 0,
408 default => 0,
409 },
410 migrate_downtime => {
411 optional => 1,
412 type => 'number',
413 description => "Set maximum tolerated downtime (in seconds) for migrations.",
414 minimum => 0,
415 default => 0.1,
416 },
417 cdrom => {
418 optional => 1,
419 type => 'string', format => 'pve-qm-drive',
420 typetext => 'volume',
421 description => "This is an alias for option -ide2",
422 },
423 cpu => {
424 optional => 1,
425 description => "Emulated CPU type.",
426 type => 'string',
427 enum => [ qw(486 athlon pentium pentium2 pentium3 coreduo core2duo kvm32 kvm64 qemu32 qemu64 phenom Conroe Penryn Nehalem Westmere SandyBridge Haswell Broadwell Opteron_G1 Opteron_G2 Opteron_G3 Opteron_G4 Opteron_G5 host) ],
428 default => 'kvm64',
429 },
430 parent => get_standard_option('pve-snapshot-name', {
431 optional => 1,
432 description => "Parent snapshot name. This is used internally, and should not be modified.",
433 }),
434 snaptime => {
435 optional => 1,
436 description => "Timestamp for snapshots.",
437 type => 'integer',
438 minimum => 0,
439 },
440 vmstate => {
441 optional => 1,
442 type => 'string', format => 'pve-volume-id',
443 description => "Reference to a volume which stores the VM state. This is used internally for snapshots.",
444 },
445 machine => {
446 description => "Specific the Qemu machine type.",
447 type => 'string',
448 pattern => '(pc|pc(-i440fx)?-\d+\.\d+|q35|pc-q35-\d+\.\d+)',
449 maxLength => 40,
450 optional => 1,
451 },
452 smbios1 => {
453 description => "Specify SMBIOS type 1 fields.",
454 type => 'string', format => 'pve-qm-smbios1',
455 typetext => "[manufacturer=str][,product=str][,version=str][,serial=str] [,uuid=uuid][,sku=str][,family=str]",
456 maxLength => 256,
457 optional => 1,
458 },
459 };
460
461 # what about other qemu settings ?
462 #cpu => 'string',
463 #machine => 'string',
464 #fda => 'file',
465 #fdb => 'file',
466 #mtdblock => 'file',
467 #sd => 'file',
468 #pflash => 'file',
469 #snapshot => 'bool',
470 #bootp => 'file',
471 ##tftp => 'dir',
472 ##smb => 'dir',
473 #kernel => 'file',
474 #append => 'string',
475 #initrd => 'file',
476 ##soundhw => 'string',
477
478 while (my ($k, $v) = each %$confdesc) {
479 PVE::JSONSchema::register_standard_option("pve-qm-$k", $v);
480 }
481
482 my $MAX_IDE_DISKS = 4;
483 my $MAX_SCSI_DISKS = 14;
484 my $MAX_VIRTIO_DISKS = 16;
485 my $MAX_SATA_DISKS = 6;
486 my $MAX_USB_DEVICES = 5;
487 my $MAX_NETS = 32;
488 my $MAX_UNUSED_DISKS = 8;
489 my $MAX_HOSTPCI_DEVICES = 4;
490 my $MAX_SERIAL_PORTS = 4;
491 my $MAX_PARALLEL_PORTS = 3;
492 my $MAX_NUMA = 8;
493
494 my $numadesc = {
495 optional => 1,
496 type => 'string', format => 'pve-qm-numanode',
497 typetext => "cpus=<id[-id],memory=<mb>[[,hostnodes=<id[-id]>][,policy=<preferred|bind|interleave>]]",
498 description => "numa topology",
499 };
500 PVE::JSONSchema::register_standard_option("pve-qm-numanode", $numadesc);
501
502 for (my $i = 0; $i < $MAX_NUMA; $i++) {
503 $confdesc->{"numa$i"} = $numadesc;
504 }
505
506 my $nic_model_list = ['rtl8139', 'ne2k_pci', 'e1000', 'pcnet', 'virtio',
507 'ne2k_isa', 'i82551', 'i82557b', 'i82559er', 'vmxnet3'];
508 my $nic_model_list_txt = join(' ', sort @$nic_model_list);
509
510 my $netdesc = {
511 optional => 1,
512 type => 'string', format => 'pve-qm-net',
513 typetext => "MODEL=XX:XX:XX:XX:XX:XX [,bridge=<dev>][,queues=<nbqueues>][,rate=<mbps>][,tag=<vlanid>][,firewall=0|1]",
514 description => <<EODESCR,
515 Specify network devices.
516
517 MODEL is one of: $nic_model_list_txt
518
519 XX:XX:XX:XX:XX:XX should be an unique MAC address. This is
520 automatically generated if not specified.
521
522 The bridge parameter can be used to automatically add the interface to a bridge device. The Proxmox VE standard bridge is called 'vmbr0'.
523
524 Option 'rate' is used to limit traffic bandwidth from and to this interface. It is specified as floating point number, unit is 'Megabytes per second'.
525
526 If you specify no bridge, we create a kvm 'user' (NATed) network device, which provides DHCP and DNS services. The following addresses are used:
527
528 10.0.2.2 Gateway
529 10.0.2.3 DNS Server
530 10.0.2.4 SMB Server
531
532 The DHCP server assign addresses to the guest starting from 10.0.2.15.
533
534 EODESCR
535 };
536 PVE::JSONSchema::register_standard_option("pve-qm-net", $netdesc);
537
538 for (my $i = 0; $i < $MAX_NETS; $i++) {
539 $confdesc->{"net$i"} = $netdesc;
540 }
541
542 my $drivename_hash;
543
544 my $idedesc = {
545 optional => 1,
546 type => 'string', format => 'pve-qm-drive',
547 typetext => '[volume=]volume,] [,media=cdrom|disk] [,cyls=c,heads=h,secs=s[,trans=t]] [,snapshot=on|off] [,cache=none|writethrough|writeback|unsafe|directsync] [,format=f] [,backup=yes|no] [,rerror=ignore|report|stop] [,werror=enospc|ignore|report|stop] [,aio=native|threads] [,discard=ignore|on]',
548 description => "Use volume as IDE hard disk or CD-ROM (n is 0 to " .($MAX_IDE_DISKS -1) . ").",
549 };
550 PVE::JSONSchema::register_standard_option("pve-qm-ide", $idedesc);
551
552 my $scsidesc = {
553 optional => 1,
554 type => 'string', format => 'pve-qm-drive',
555 typetext => '[volume=]volume,] [,media=cdrom|disk] [,cyls=c,heads=h,secs=s[,trans=t]] [,snapshot=on|off] [,cache=none|writethrough|writeback|unsafe|directsync] [,format=f] [,backup=yes|no] [,rerror=ignore|report|stop] [,werror=enospc|ignore|report|stop] [,aio=native|threads] [,discard=ignore|on]',
556 description => "Use volume as SCSI hard disk or CD-ROM (n is 0 to " . ($MAX_SCSI_DISKS - 1) . ").",
557 };
558 PVE::JSONSchema::register_standard_option("pve-qm-scsi", $scsidesc);
559
560 my $satadesc = {
561 optional => 1,
562 type => 'string', format => 'pve-qm-drive',
563 typetext => '[volume=]volume,] [,media=cdrom|disk] [,cyls=c,heads=h,secs=s[,trans=t]] [,snapshot=on|off] [,cache=none|writethrough|writeback|unsafe|directsync] [,format=f] [,backup=yes|no] [,rerror=ignore|report|stop] [,werror=enospc|ignore|report|stop] [,aio=native|threads] [,discard=ignore|on]',
564 description => "Use volume as SATA hard disk or CD-ROM (n is 0 to " . ($MAX_SATA_DISKS - 1). ").",
565 };
566 PVE::JSONSchema::register_standard_option("pve-qm-sata", $satadesc);
567
568 my $virtiodesc = {
569 optional => 1,
570 type => 'string', format => 'pve-qm-drive',
571 typetext => '[volume=]volume,] [,media=cdrom|disk] [,cyls=c,heads=h,secs=s[,trans=t]] [,snapshot=on|off] [,cache=none|writethrough|writeback|unsafe|directsync] [,format=f] [,backup=yes|no] [,rerror=ignore|report|stop] [,werror=enospc|ignore|report|stop] [,aio=native|threads] [,discard=ignore|on]',
572 description => "Use volume as VIRTIO hard disk (n is 0 to " . ($MAX_VIRTIO_DISKS - 1) . ").",
573 };
574 PVE::JSONSchema::register_standard_option("pve-qm-virtio", $virtiodesc);
575
576 my $usbdesc = {
577 optional => 1,
578 type => 'string', format => 'pve-qm-usb-device',
579 typetext => 'host=HOSTUSBDEVICE|spice',
580 description => <<EODESCR,
581 Configure an USB device (n is 0 to 4). This can be used to
582 pass-through usb devices to the guest. HOSTUSBDEVICE syntax is:
583
584 'bus-port(.port)*' (decimal numbers) or
585 'vendor_id:product_id' (hexadeciaml numbers)
586
587 You can use the 'lsusb -t' command to list existing usb devices.
588
589 Note: This option allows direct access to host hardware. So it is no longer possible to migrate such machines - use with special care.
590
591 The value 'spice' can be used to add a usb redirection devices for spice.
592
593 EODESCR
594 };
595 PVE::JSONSchema::register_standard_option("pve-qm-usb", $usbdesc);
596
597 my $hostpcidesc = {
598 optional => 1,
599 type => 'string', format => 'pve-qm-hostpci',
600 typetext => "[host=]HOSTPCIDEVICE [,driver=kvm|vfio] [,rombar=on|off] [,pcie=0|1] [,x-vga=on|off]",
601 description => <<EODESCR,
602 Map host pci devices. HOSTPCIDEVICE syntax is:
603
604 'bus:dev.func' (hexadecimal numbers)
605
606 You can us the 'lspci' command to list existing pci devices.
607
608 The 'rombar' option determines whether or not the device's ROM will be visible in the guest's memory map (default is 'on').
609
610 Note: This option allows direct access to host hardware. So it is no longer possible to migrate such machines - use with special care.
611
612 Experimental: user reported problems with this option.
613 EODESCR
614 };
615 PVE::JSONSchema::register_standard_option("pve-qm-hostpci", $hostpcidesc);
616
617 my $serialdesc = {
618 optional => 1,
619 type => 'string',
620 pattern => '(/dev/.+|socket)',
621 description => <<EODESCR,
622 Create a serial device inside the VM (n is 0 to 3), and pass through a host serial device (i.e. /dev/ttyS0), or create a unix socket on the host side (use 'qm terminal' to open a terminal connection).
623
624 Note: This option allows direct access to host hardware. So it is no longer possible to migrate such machines - use with special care.
625
626 Experimental: user reported problems with this option.
627 EODESCR
628 };
629
630 my $paralleldesc= {
631 optional => 1,
632 type => 'string',
633 pattern => '/dev/parport\d+|/dev/usb/lp\d+',
634 description => <<EODESCR,
635 Map host parallel devices (n is 0 to 2).
636
637 Note: This option allows direct access to host hardware. So it is no longer possible to migrate such machines - use with special care.
638
639 Experimental: user reported problems with this option.
640 EODESCR
641 };
642
643 for (my $i = 0; $i < $MAX_PARALLEL_PORTS; $i++) {
644 $confdesc->{"parallel$i"} = $paralleldesc;
645 }
646
647 for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
648 $confdesc->{"serial$i"} = $serialdesc;
649 }
650
651 for (my $i = 0; $i < $MAX_HOSTPCI_DEVICES; $i++) {
652 $confdesc->{"hostpci$i"} = $hostpcidesc;
653 }
654
655 for (my $i = 0; $i < $MAX_IDE_DISKS; $i++) {
656 $drivename_hash->{"ide$i"} = 1;
657 $confdesc->{"ide$i"} = $idedesc;
658 }
659
660 for (my $i = 0; $i < $MAX_SATA_DISKS; $i++) {
661 $drivename_hash->{"sata$i"} = 1;
662 $confdesc->{"sata$i"} = $satadesc;
663 }
664
665 for (my $i = 0; $i < $MAX_SCSI_DISKS; $i++) {
666 $drivename_hash->{"scsi$i"} = 1;
667 $confdesc->{"scsi$i"} = $scsidesc ;
668 }
669
670 for (my $i = 0; $i < $MAX_VIRTIO_DISKS; $i++) {
671 $drivename_hash->{"virtio$i"} = 1;
672 $confdesc->{"virtio$i"} = $virtiodesc;
673 }
674
675 for (my $i = 0; $i < $MAX_USB_DEVICES; $i++) {
676 $confdesc->{"usb$i"} = $usbdesc;
677 }
678
679 my $unuseddesc = {
680 optional => 1,
681 type => 'string', format => 'pve-volume-id',
682 description => "Reference to unused volumes.",
683 };
684
685 for (my $i = 0; $i < $MAX_UNUSED_DISKS; $i++) {
686 $confdesc->{"unused$i"} = $unuseddesc;
687 }
688
689 my $kvm_api_version = 0;
690
691 sub kvm_version {
692
693 return $kvm_api_version if $kvm_api_version;
694
695 my $fh = IO::File->new("</dev/kvm") ||
696 return 0;
697
698 if (my $v = $fh->ioctl(KVM_GET_API_VERSION(), 0)) {
699 $kvm_api_version = $v;
700 }
701
702 $fh->close();
703
704 return $kvm_api_version;
705 }
706
707 my $kvm_user_version;
708
709 sub kvm_user_version {
710
711 return $kvm_user_version if $kvm_user_version;
712
713 $kvm_user_version = 'unknown';
714
715 my $tmp = `kvm -help 2>/dev/null`;
716
717 if ($tmp =~ m/^QEMU( PC)? emulator version (\d+\.\d+(\.\d+)?)[,\s]/) {
718 $kvm_user_version = $2;
719 }
720
721 return $kvm_user_version;
722
723 }
724
725 my $kernel_has_vhost_net = -c '/dev/vhost-net';
726
727 sub disknames {
728 # order is important - used to autoselect boot disk
729 return ((map { "ide$_" } (0 .. ($MAX_IDE_DISKS - 1))),
730 (map { "scsi$_" } (0 .. ($MAX_SCSI_DISKS - 1))),
731 (map { "virtio$_" } (0 .. ($MAX_VIRTIO_DISKS - 1))),
732 (map { "sata$_" } (0 .. ($MAX_SATA_DISKS - 1))));
733 }
734
735 sub valid_drivename {
736 my $dev = shift;
737
738 return defined($drivename_hash->{$dev});
739 }
740
741 sub option_exists {
742 my $key = shift;
743 return defined($confdesc->{$key});
744 }
745
746 sub nic_models {
747 return $nic_model_list;
748 }
749
750 sub os_list_description {
751
752 return {
753 other => 'Other',
754 wxp => 'Windows XP',
755 w2k => 'Windows 2000',
756 w2k3 =>, 'Windows 2003',
757 w2k8 => 'Windows 2008',
758 wvista => 'Windows Vista',
759 win7 => 'Windows 7',
760 win8 => 'Windows 8/2012',
761 l24 => 'Linux 2.4',
762 l26 => 'Linux 2.6',
763 };
764 }
765
766 my $cdrom_path;
767
768 sub get_cdrom_path {
769
770 return $cdrom_path if $cdrom_path;
771
772 return $cdrom_path = "/dev/cdrom" if -l "/dev/cdrom";
773 return $cdrom_path = "/dev/cdrom1" if -l "/dev/cdrom1";
774 return $cdrom_path = "/dev/cdrom2" if -l "/dev/cdrom2";
775 }
776
777 sub get_iso_path {
778 my ($storecfg, $vmid, $cdrom) = @_;
779
780 if ($cdrom eq 'cdrom') {
781 return get_cdrom_path();
782 } elsif ($cdrom eq 'none') {
783 return '';
784 } elsif ($cdrom =~ m|^/|) {
785 return $cdrom;
786 } else {
787 return PVE::Storage::path($storecfg, $cdrom);
788 }
789 }
790
791 # try to convert old style file names to volume IDs
792 sub filename_to_volume_id {
793 my ($vmid, $file, $media) = @_;
794
795 if (!($file eq 'none' || $file eq 'cdrom' ||
796 $file =~ m|^/dev/.+| || $file =~ m/^([^:]+):(.+)$/)) {
797
798 return undef if $file =~ m|/|;
799
800 if ($media && $media eq 'cdrom') {
801 $file = "local:iso/$file";
802 } else {
803 $file = "local:$vmid/$file";
804 }
805 }
806
807 return $file;
808 }
809
810 sub verify_media_type {
811 my ($opt, $vtype, $media) = @_;
812
813 return if !$media;
814
815 my $etype;
816 if ($media eq 'disk') {
817 $etype = 'images';
818 } elsif ($media eq 'cdrom') {
819 $etype = 'iso';
820 } else {
821 die "internal error";
822 }
823
824 return if ($vtype eq $etype);
825
826 raise_param_exc({ $opt => "unexpected media type ($vtype != $etype)" });
827 }
828
829 sub cleanup_drive_path {
830 my ($opt, $storecfg, $drive) = @_;
831
832 # try to convert filesystem paths to volume IDs
833
834 if (($drive->{file} !~ m/^(cdrom|none)$/) &&
835 ($drive->{file} !~ m|^/dev/.+|) &&
836 ($drive->{file} !~ m/^([^:]+):(.+)$/) &&
837 ($drive->{file} !~ m/^\d+$/)) {
838 my ($vtype, $volid) = PVE::Storage::path_to_volume_id($storecfg, $drive->{file});
839 raise_param_exc({ $opt => "unable to associate path '$drive->{file}' to any storage"}) if !$vtype;
840 $drive->{media} = 'cdrom' if !$drive->{media} && $vtype eq 'iso';
841 verify_media_type($opt, $vtype, $drive->{media});
842 $drive->{file} = $volid;
843 }
844
845 $drive->{media} = 'cdrom' if !$drive->{media} && $drive->{file} =~ m/^(cdrom|none)$/;
846 }
847
848 sub create_conf_nolock {
849 my ($vmid, $settings) = @_;
850
851 my $filename = config_file($vmid);
852
853 die "configuration file '$filename' already exists\n" if -f $filename;
854
855 my $defaults = load_defaults();
856
857 $settings->{name} = "vm$vmid" if !$settings->{name};
858 $settings->{memory} = $defaults->{memory} if !$settings->{memory};
859
860 my $data = '';
861 foreach my $opt (keys %$settings) {
862 next if !$confdesc->{$opt};
863
864 my $value = $settings->{$opt};
865 next if !$value;
866
867 $data .= "$opt: $value\n";
868 }
869
870 PVE::Tools::file_set_contents($filename, $data);
871 }
872
873 my $parse_size = sub {
874 my ($value) = @_;
875
876 return undef if $value !~ m/^(\d+(\.\d+)?)([KMG])?$/;
877 my ($size, $unit) = ($1, $3);
878 if ($unit) {
879 if ($unit eq 'K') {
880 $size = $size * 1024;
881 } elsif ($unit eq 'M') {
882 $size = $size * 1024 * 1024;
883 } elsif ($unit eq 'G') {
884 $size = $size * 1024 * 1024 * 1024;
885 }
886 }
887 return int($size);
888 };
889
890 my $format_size = sub {
891 my ($size) = @_;
892
893 $size = int($size);
894
895 my $kb = int($size/1024);
896 return $size if $kb*1024 != $size;
897
898 my $mb = int($kb/1024);
899 return "${kb}K" if $mb*1024 != $kb;
900
901 my $gb = int($mb/1024);
902 return "${mb}M" if $gb*1024 != $mb;
903
904 return "${gb}G";
905 };
906
907 # ideX = [volume=]volume-id[,media=d][,cyls=c,heads=h,secs=s[,trans=t]]
908 # [,snapshot=on|off][,cache=on|off][,format=f][,backup=yes|no]
909 # [,rerror=ignore|report|stop][,werror=enospc|ignore|report|stop]
910 # [,aio=native|threads][,discard=ignore|on]
911
912 sub parse_drive {
913 my ($key, $data) = @_;
914
915 my $res = {};
916
917 # $key may be undefined - used to verify JSON parameters
918 if (!defined($key)) {
919 $res->{interface} = 'unknown'; # should not harm when used to verify parameters
920 $res->{index} = 0;
921 } elsif ($key =~ m/^([^\d]+)(\d+)$/) {
922 $res->{interface} = $1;
923 $res->{index} = $2;
924 } else {
925 return undef;
926 }
927
928 foreach my $p (split (/,/, $data)) {
929 next if $p =~ m/^\s*$/;
930
931 if ($p =~ m/^(file|volume|cyls|heads|secs|trans|media|snapshot|cache|format|rerror|werror|backup|aio|bps|mbps|mbps_max|bps_rd|mbps_rd|mbps_rd_max|bps_wr|mbps_wr|mbps_wr_max|iops|iops_max|iops_rd|iops_rd_max|iops_wr|iops_wr_max|size|discard)=(.+)$/) {
932 my ($k, $v) = ($1, $2);
933
934 $k = 'file' if $k eq 'volume';
935
936 return undef if defined $res->{$k};
937
938 if ($k eq 'bps' || $k eq 'bps_rd' || $k eq 'bps_wr') {
939 return undef if !$v || $v !~ m/^\d+/;
940 $k = "m$k";
941 $v = sprintf("%.3f", $v / (1024*1024));
942 }
943 $res->{$k} = $v;
944 } else {
945 if (!$res->{file} && $p !~ m/=/) {
946 $res->{file} = $p;
947 } else {
948 return undef;
949 }
950 }
951 }
952
953 return undef if !$res->{file};
954
955 if($res->{file} =~ m/\.(raw|cow|qcow|qcow2|vmdk|cloop)$/){
956 $res->{format} = $1;
957 }
958
959 return undef if $res->{cache} &&
960 $res->{cache} !~ m/^(off|none|writethrough|writeback|unsafe|directsync)$/;
961 return undef if $res->{snapshot} && $res->{snapshot} !~ m/^(on|off)$/;
962 return undef if $res->{cyls} && $res->{cyls} !~ m/^\d+$/;
963 return undef if $res->{heads} && $res->{heads} !~ m/^\d+$/;
964 return undef if $res->{secs} && $res->{secs} !~ m/^\d+$/;
965 return undef if $res->{media} && $res->{media} !~ m/^(disk|cdrom)$/;
966 return undef if $res->{trans} && $res->{trans} !~ m/^(none|lba|auto)$/;
967 return undef if $res->{format} && $res->{format} !~ m/^(raw|cow|qcow|qcow2|vmdk|cloop)$/;
968 return undef if $res->{rerror} && $res->{rerror} !~ m/^(ignore|report|stop)$/;
969 return undef if $res->{werror} && $res->{werror} !~ m/^(enospc|ignore|report|stop)$/;
970 return undef if $res->{backup} && $res->{backup} !~ m/^(yes|no)$/;
971 return undef if $res->{aio} && $res->{aio} !~ m/^(native|threads)$/;
972 return undef if $res->{discard} && $res->{discard} !~ m/^(ignore|on)$/;
973
974 return undef if $res->{mbps_rd} && $res->{mbps};
975 return undef if $res->{mbps_wr} && $res->{mbps};
976
977 return undef if $res->{mbps} && $res->{mbps} !~ m/^\d+(\.\d+)?$/;
978 return undef if $res->{mbps_max} && $res->{mbps_max} !~ m/^\d+(\.\d+)?$/;
979 return undef if $res->{mbps_rd} && $res->{mbps_rd} !~ m/^\d+(\.\d+)?$/;
980 return undef if $res->{mbps_rd_max} && $res->{mbps_rd_max} !~ m/^\d+(\.\d+)?$/;
981 return undef if $res->{mbps_wr} && $res->{mbps_wr} !~ m/^\d+(\.\d+)?$/;
982 return undef if $res->{mbps_wr_max} && $res->{mbps_wr_max} !~ m/^\d+(\.\d+)?$/;
983
984 return undef if $res->{iops_rd} && $res->{iops};
985 return undef if $res->{iops_wr} && $res->{iops};
986
987
988 return undef if $res->{iops} && $res->{iops} !~ m/^\d+$/;
989 return undef if $res->{iops_max} && $res->{iops_max} !~ m/^\d+$/;
990 return undef if $res->{iops_rd} && $res->{iops_rd} !~ m/^\d+$/;
991 return undef if $res->{iops_rd_max} && $res->{iops_rd_max} !~ m/^\d+$/;
992 return undef if $res->{iops_wr} && $res->{iops_wr} !~ m/^\d+$/;
993 return undef if $res->{iops_wr_max} && $res->{iops_wr_max} !~ m/^\d+$/;
994
995
996 if ($res->{size}) {
997 return undef if !defined($res->{size} = &$parse_size($res->{size}));
998 }
999
1000 if ($res->{media} && ($res->{media} eq 'cdrom')) {
1001 return undef if $res->{snapshot} || $res->{trans} || $res->{format};
1002 return undef if $res->{heads} || $res->{secs} || $res->{cyls};
1003 return undef if $res->{interface} eq 'virtio';
1004 }
1005
1006 # rerror does not work with scsi drives
1007 if ($res->{rerror}) {
1008 return undef if $res->{interface} eq 'scsi';
1009 }
1010
1011 return $res;
1012 }
1013
1014 my @qemu_drive_options = qw(heads secs cyls trans media format cache snapshot rerror werror aio discard iops iops_rd iops_wr iops_max iops_rd_max iops_wr_max);
1015
1016 sub print_drive {
1017 my ($vmid, $drive) = @_;
1018
1019 my $opts = '';
1020 foreach my $o (@qemu_drive_options, 'mbps', 'mbps_rd', 'mbps_wr', 'mbps_max', 'mbps_rd_max', 'mbps_wr_max', 'backup') {
1021 $opts .= ",$o=$drive->{$o}" if $drive->{$o};
1022 }
1023
1024 if ($drive->{size}) {
1025 $opts .= ",size=" . &$format_size($drive->{size});
1026 }
1027
1028 return "$drive->{file}$opts";
1029 }
1030
1031 sub scsi_inquiry {
1032 my($fh, $noerr) = @_;
1033
1034 my $SG_IO = 0x2285;
1035 my $SG_GET_VERSION_NUM = 0x2282;
1036
1037 my $versionbuf = "\x00" x 8;
1038 my $ret = ioctl($fh, $SG_GET_VERSION_NUM, $versionbuf);
1039 if (!$ret) {
1040 die "scsi ioctl SG_GET_VERSION_NUM failoed - $!\n" if !$noerr;
1041 return undef;
1042 }
1043 my $version = unpack("I", $versionbuf);
1044 if ($version < 30000) {
1045 die "scsi generic interface too old\n" if !$noerr;
1046 return undef;
1047 }
1048
1049 my $buf = "\x00" x 36;
1050 my $sensebuf = "\x00" x 8;
1051 my $cmd = pack("C x3 C x1", 0x12, 36);
1052
1053 # see /usr/include/scsi/sg.h
1054 my $sg_io_hdr_t = "i i C C s I P P P I I i P C C C C S S i I I";
1055
1056 my $packet = pack($sg_io_hdr_t, ord('S'), -3, length($cmd),
1057 length($sensebuf), 0, length($buf), $buf,
1058 $cmd, $sensebuf, 6000);
1059
1060 $ret = ioctl($fh, $SG_IO, $packet);
1061 if (!$ret) {
1062 die "scsi ioctl SG_IO failed - $!\n" if !$noerr;
1063 return undef;
1064 }
1065
1066 my @res = unpack($sg_io_hdr_t, $packet);
1067 if ($res[17] || $res[18]) {
1068 die "scsi ioctl SG_IO status error - $!\n" if !$noerr;
1069 return undef;
1070 }
1071
1072 my $res = {};
1073 (my $byte0, my $byte1, $res->{vendor},
1074 $res->{product}, $res->{revision}) = unpack("C C x6 A8 A16 A4", $buf);
1075
1076 $res->{removable} = $byte1 & 128 ? 1 : 0;
1077 $res->{type} = $byte0 & 31;
1078
1079 return $res;
1080 }
1081
1082 sub path_is_scsi {
1083 my ($path) = @_;
1084
1085 my $fh = IO::File->new("+<$path") || return undef;
1086 my $res = scsi_inquiry($fh, 1);
1087 close($fh);
1088
1089 return $res;
1090 }
1091
1092 sub machine_type_is_q35 {
1093 my ($conf) = @_;
1094
1095 return $conf->{machine} && ($conf->{machine} =~ m/q35/) ? 1 : 0;
1096 }
1097
1098 sub print_tabletdevice_full {
1099 my ($conf) = @_;
1100
1101 my $q35 = machine_type_is_q35($conf);
1102
1103 # we use uhci for old VMs because tablet driver was buggy in older qemu
1104 my $usbbus = $q35 ? "ehci" : "uhci";
1105
1106 return "usb-tablet,id=tablet,bus=$usbbus.0,port=1";
1107 }
1108
1109 sub print_drivedevice_full {
1110 my ($storecfg, $conf, $vmid, $drive, $bridges) = @_;
1111
1112 my $device = '';
1113 my $maxdev = 0;
1114
1115 if ($drive->{interface} eq 'virtio') {
1116 my $pciaddr = print_pci_addr("$drive->{interface}$drive->{index}", $bridges);
1117 $device = "virtio-blk-pci,drive=drive-$drive->{interface}$drive->{index},id=$drive->{interface}$drive->{index}$pciaddr";
1118 $device .= ",iothread=iothread0" if $conf->{iothread};
1119 } elsif ($drive->{interface} eq 'scsi') {
1120 $maxdev = ($conf->{scsihw} && ($conf->{scsihw} !~ m/^lsi/)) ? 256 : 7;
1121 my $controller = int($drive->{index} / $maxdev);
1122 my $unit = $drive->{index} % $maxdev;
1123 my $devicetype = 'hd';
1124 my $path = '';
1125 if (drive_is_cdrom($drive)) {
1126 $devicetype = 'cd';
1127 } else {
1128 if ($drive->{file} =~ m|^/|) {
1129 $path = $drive->{file};
1130 } else {
1131 $path = PVE::Storage::path($storecfg, $drive->{file});
1132 }
1133
1134 if($path =~ m/^iscsi\:\/\//){
1135 $devicetype = 'generic';
1136 } else {
1137 if (my $info = path_is_scsi($path)) {
1138 if ($info->{type} == 0) {
1139 $devicetype = 'block';
1140 } elsif ($info->{type} == 1) { # tape
1141 $devicetype = 'generic';
1142 }
1143 }
1144 }
1145 }
1146
1147 if (!$conf->{scsihw} || ($conf->{scsihw} =~ m/^lsi/)){
1148 $device = "scsi-$devicetype,bus=scsihw$controller.0,scsi-id=$unit,drive=drive-$drive->{interface}$drive->{index},id=$drive->{interface}$drive->{index}";
1149 } else {
1150 $device = "scsi-$devicetype,bus=scsihw$controller.0,channel=0,scsi-id=0,lun=$drive->{index},drive=drive-$drive->{interface}$drive->{index},id=$drive->{interface}$drive->{index}";
1151 }
1152
1153 } elsif ($drive->{interface} eq 'ide'){
1154 $maxdev = 2;
1155 my $controller = int($drive->{index} / $maxdev);
1156 my $unit = $drive->{index} % $maxdev;
1157 my $devicetype = ($drive->{media} && $drive->{media} eq 'cdrom') ? "cd" : "hd";
1158
1159 $device = "ide-$devicetype,bus=ide.$controller,unit=$unit,drive=drive-$drive->{interface}$drive->{index},id=$drive->{interface}$drive->{index}";
1160 } elsif ($drive->{interface} eq 'sata'){
1161 my $controller = int($drive->{index} / $MAX_SATA_DISKS);
1162 my $unit = $drive->{index} % $MAX_SATA_DISKS;
1163 $device = "ide-drive,bus=ahci$controller.$unit,drive=drive-$drive->{interface}$drive->{index},id=$drive->{interface}$drive->{index}";
1164 } elsif ($drive->{interface} eq 'usb') {
1165 die "implement me";
1166 # -device ide-drive,bus=ide.1,unit=0,drive=drive-ide0-1-0,id=ide0-1-0
1167 } else {
1168 die "unsupported interface type";
1169 }
1170
1171 $device .= ",bootindex=$drive->{bootindex}" if $drive->{bootindex};
1172
1173 return $device;
1174 }
1175
1176 sub get_initiator_name {
1177 my $initiator;
1178
1179 my $fh = IO::File->new('/etc/iscsi/initiatorname.iscsi') || return undef;
1180 while (defined(my $line = <$fh>)) {
1181 next if $line !~ m/^\s*InitiatorName\s*=\s*([\.\-:\w]+)/;
1182 $initiator = $1;
1183 last;
1184 }
1185 $fh->close();
1186
1187 return $initiator;
1188 }
1189
1190 sub print_drive_full {
1191 my ($storecfg, $vmid, $drive) = @_;
1192
1193 my $opts = '';
1194 foreach my $o (@qemu_drive_options) {
1195 next if $o eq 'bootindex';
1196 $opts .= ",$o=$drive->{$o}" if $drive->{$o};
1197 }
1198
1199 foreach my $o (qw(bps bps_rd bps_wr)) {
1200 my $v = $drive->{"m$o"};
1201 $opts .= ",$o=" . int($v*1024*1024) if $v;
1202 }
1203
1204 # use linux-aio by default (qemu default is threads)
1205 $opts .= ",aio=native" if !$drive->{aio};
1206
1207 my $path;
1208 my $volid = $drive->{file};
1209 if (drive_is_cdrom($drive)) {
1210 $path = get_iso_path($storecfg, $vmid, $volid);
1211 } else {
1212 if ($volid =~ m|^/|) {
1213 $path = $volid;
1214 } else {
1215 $path = PVE::Storage::path($storecfg, $volid);
1216 }
1217 }
1218
1219 $opts .= ",cache=none" if !$drive->{cache} && !drive_is_cdrom($drive);
1220
1221 my $detectzeroes = $drive->{discard} ? "unmap" : "on";
1222 $opts .= ",detect-zeroes=$detectzeroes" if !drive_is_cdrom($drive);
1223
1224 my $pathinfo = $path ? "file=$path," : '';
1225
1226 return "${pathinfo}if=none,id=drive-$drive->{interface}$drive->{index}$opts";
1227 }
1228
1229 sub print_netdevice_full {
1230 my ($vmid, $conf, $net, $netid, $bridges) = @_;
1231
1232 my $bootorder = $conf->{boot} || $confdesc->{boot}->{default};
1233
1234 my $device = $net->{model};
1235 if ($net->{model} eq 'virtio') {
1236 $device = 'virtio-net-pci';
1237 };
1238
1239 # qemu > 0.15 always try to boot from network - we disable that by
1240 # not loading the pxe rom file
1241 my $extra = ($bootorder !~ m/n/) ? "romfile=," : '';
1242 my $pciaddr = print_pci_addr("$netid", $bridges);
1243 my $tmpstr = "$device,${extra}mac=$net->{macaddr},netdev=$netid$pciaddr,id=$netid";
1244 if ($net->{queues} && $net->{queues} > 1 && $net->{model} eq 'virtio'){
1245 #Consider we have N queues, the number of vectors needed is 2*N + 2 (plus one config interrupt and control vq)
1246 my $vectors = $net->{queues} * 2 + 2;
1247 $tmpstr .= ",vectors=$vectors,mq=on";
1248 }
1249 $tmpstr .= ",bootindex=$net->{bootindex}" if $net->{bootindex} ;
1250 return $tmpstr;
1251 }
1252
1253 sub print_netdev_full {
1254 my ($vmid, $conf, $net, $netid) = @_;
1255
1256 my $i = '';
1257 if ($netid =~ m/^net(\d+)$/) {
1258 $i = int($1);
1259 }
1260
1261 die "got strange net id '$i'\n" if $i >= ${MAX_NETS};
1262
1263 my $ifname = "tap${vmid}i$i";
1264
1265 # kvm uses TUNSETIFF ioctl, and that limits ifname length
1266 die "interface name '$ifname' is too long (max 15 character)\n"
1267 if length($ifname) >= 16;
1268
1269 my $vhostparam = '';
1270 $vhostparam = ',vhost=on' if $kernel_has_vhost_net && $net->{model} eq 'virtio';
1271
1272 my $vmname = $conf->{name} || "vm$vmid";
1273
1274 my $netdev = "";
1275
1276 if ($net->{bridge}) {
1277 $netdev = "type=tap,id=$netid,ifname=${ifname},script=/var/lib/qemu-server/pve-bridge,downscript=/var/lib/qemu-server/pve-bridgedown$vhostparam";
1278 } else {
1279 $netdev = "type=user,id=$netid,hostname=$vmname";
1280 }
1281
1282 $netdev .= ",queues=$net->{queues}" if ($net->{queues} && $net->{model} eq 'virtio');
1283
1284 return $netdev;
1285 }
1286
1287 sub drive_is_cdrom {
1288 my ($drive) = @_;
1289
1290 return $drive && $drive->{media} && ($drive->{media} eq 'cdrom');
1291
1292 }
1293
1294 sub parse_numa {
1295 my ($data) = @_;
1296
1297 my $res = {};
1298
1299 foreach my $kvp (split(/,/, $data)) {
1300
1301 if ($kvp =~ m/^memory=(\S+)$/) {
1302 $res->{memory} = $1;
1303 } elsif ($kvp =~ m/^policy=(preferred|bind|interleave)$/) {
1304 $res->{policy} = $1;
1305 } elsif ($kvp =~ m/^cpus=(\d+)(-(\d+))?$/) {
1306 $res->{cpus}->{start} = $1;
1307 $res->{cpus}->{end} = $3;
1308 } elsif ($kvp =~ m/^hostnodes=(\d+)(-(\d+))?$/) {
1309 $res->{hostnodes}->{start} = $1;
1310 $res->{hostnodes}->{end} = $3;
1311 } else {
1312 return undef;
1313 }
1314 }
1315
1316 return $res;
1317 }
1318
1319 sub parse_hostpci {
1320 my ($value) = @_;
1321
1322 return undef if !$value;
1323
1324
1325 my @list = split(/,/, $value);
1326 my $found;
1327
1328 my $res = {};
1329 foreach my $kv (@list) {
1330
1331 if ($kv =~ m/^(host=)?([a-f0-9]{2}:[a-f0-9]{2})(\.([a-f0-9]))?$/) {
1332 $found = 1;
1333 if(defined($4)){
1334 push @{$res->{pciid}}, { id => $2 , function => $4};
1335
1336 }else{
1337 my $pcidevices = lspci($2);
1338 $res->{pciid} = $pcidevices->{$2};
1339 }
1340 } elsif ($kv =~ m/^driver=(kvm|vfio)$/) {
1341 $res->{driver} = $1;
1342 } elsif ($kv =~ m/^rombar=(on|off)$/) {
1343 $res->{rombar} = $1;
1344 } elsif ($kv =~ m/^x-vga=(on|off)$/) {
1345 $res->{'x-vga'} = $1;
1346 } elsif ($kv =~ m/^pcie=(\d+)$/) {
1347 $res->{pcie} = 1 if $1 == 1;
1348 } else {
1349 warn "unknown hostpci setting '$kv'\n";
1350 }
1351 }
1352
1353 return undef if !$found;
1354
1355 return $res;
1356 }
1357
1358 # netX: e1000=XX:XX:XX:XX:XX:XX,bridge=vmbr0,rate=<mbps>
1359 sub parse_net {
1360 my ($data) = @_;
1361
1362 my $res = {};
1363
1364 foreach my $kvp (split(/,/, $data)) {
1365
1366 if ($kvp =~ m/^(ne2k_pci|e1000|rtl8139|pcnet|virtio|ne2k_isa|i82551|i82557b|i82559er|vmxnet3)(=([0-9a-f]{2}(:[0-9a-f]{2}){5}))?$/i) {
1367 my $model = lc($1);
1368 my $mac = defined($3) ? uc($3) : PVE::Tools::random_ether_addr();
1369 $res->{model} = $model;
1370 $res->{macaddr} = $mac;
1371 } elsif ($kvp =~ m/^bridge=(\S+)$/) {
1372 $res->{bridge} = $1;
1373 } elsif ($kvp =~ m/^queues=(\d+)$/) {
1374 $res->{queues} = $1;
1375 } elsif ($kvp =~ m/^rate=(\d+(\.\d+)?)$/) {
1376 $res->{rate} = $1;
1377 } elsif ($kvp =~ m/^tag=(\d+)$/) {
1378 $res->{tag} = $1;
1379 } elsif ($kvp =~ m/^firewall=(\d+)$/) {
1380 $res->{firewall} = $1;
1381 } else {
1382 return undef;
1383 }
1384
1385 }
1386
1387 return undef if !$res->{model};
1388
1389 return $res;
1390 }
1391
1392 sub print_net {
1393 my $net = shift;
1394
1395 my $res = "$net->{model}";
1396 $res .= "=$net->{macaddr}" if $net->{macaddr};
1397 $res .= ",bridge=$net->{bridge}" if $net->{bridge};
1398 $res .= ",rate=$net->{rate}" if $net->{rate};
1399 $res .= ",tag=$net->{tag}" if $net->{tag};
1400 $res .= ",firewall=$net->{firewall}" if $net->{firewall};
1401
1402 return $res;
1403 }
1404
1405 sub add_random_macs {
1406 my ($settings) = @_;
1407
1408 foreach my $opt (keys %$settings) {
1409 next if $opt !~ m/^net(\d+)$/;
1410 my $net = parse_net($settings->{$opt});
1411 next if !$net;
1412 $settings->{$opt} = print_net($net);
1413 }
1414 }
1415
1416 sub add_unused_volume {
1417 my ($config, $volid) = @_;
1418
1419 my $key;
1420 for (my $ind = $MAX_UNUSED_DISKS - 1; $ind >= 0; $ind--) {
1421 my $test = "unused$ind";
1422 if (my $vid = $config->{$test}) {
1423 return if $vid eq $volid; # do not add duplicates
1424 } else {
1425 $key = $test;
1426 }
1427 }
1428
1429 die "To many unused volume - please delete them first.\n" if !$key;
1430
1431 $config->{$key} = $volid;
1432
1433 return $key;
1434 }
1435
1436 sub vm_is_volid_owner {
1437 my ($storecfg, $vmid, $volid) = @_;
1438
1439 if ($volid !~ m|^/|) {
1440 my ($path, $owner);
1441 eval { ($path, $owner) = PVE::Storage::path($storecfg, $volid); };
1442 if ($owner && ($owner == $vmid)) {
1443 return 1;
1444 }
1445 }
1446
1447 return undef;
1448 }
1449
1450 sub vmconfig_delete_pending_option {
1451 my ($conf, $key) = @_;
1452
1453 delete $conf->{pending}->{$key};
1454 my $pending_delete_hash = { $key => 1 };
1455 foreach my $opt (PVE::Tools::split_list($conf->{pending}->{delete})) {
1456 $pending_delete_hash->{$opt} = 1;
1457 }
1458 $conf->{pending}->{delete} = join(',', keys %$pending_delete_hash);
1459 }
1460
1461 sub vmconfig_undelete_pending_option {
1462 my ($conf, $key) = @_;
1463
1464 my $pending_delete_hash = {};
1465 foreach my $opt (PVE::Tools::split_list($conf->{pending}->{delete})) {
1466 $pending_delete_hash->{$opt} = 1;
1467 }
1468 delete $pending_delete_hash->{$key};
1469
1470 my @keylist = keys %$pending_delete_hash;
1471 if (scalar(@keylist)) {
1472 $conf->{pending}->{delete} = join(',', @keylist);
1473 } else {
1474 delete $conf->{pending}->{delete};
1475 }
1476 }
1477
1478 sub vmconfig_register_unused_drive {
1479 my ($storecfg, $vmid, $conf, $drive) = @_;
1480
1481 if (!drive_is_cdrom($drive)) {
1482 my $volid = $drive->{file};
1483 if (vm_is_volid_owner($storecfg, $vmid, $volid)) {
1484 add_unused_volume($conf, $volid, $vmid);
1485 }
1486 }
1487 }
1488
1489 sub vmconfig_cleanup_pending {
1490 my ($conf) = @_;
1491
1492 # remove pending changes when nothing changed
1493 my $changes;
1494 foreach my $opt (keys %{$conf->{pending}}) {
1495 if (defined($conf->{$opt}) && ($conf->{pending}->{$opt} eq $conf->{$opt})) {
1496 $changes = 1;
1497 delete $conf->{pending}->{$opt};
1498 }
1499 }
1500
1501 # remove delete if option is not set
1502 my $pending_delete_hash = {};
1503 foreach my $opt (PVE::Tools::split_list($conf->{pending}->{delete})) {
1504 if (defined($conf->{$opt})) {
1505 $pending_delete_hash->{$opt} = 1;
1506 } else {
1507 $changes = 1;
1508 }
1509 }
1510
1511 my @keylist = keys %$pending_delete_hash;
1512 if (scalar(@keylist)) {
1513 $conf->{pending}->{delete} = join(',', @keylist);
1514 } else {
1515 delete $conf->{pending}->{delete};
1516 }
1517
1518 return $changes;
1519 }
1520
1521 my $valid_smbios1_options = {
1522 manufacturer => '\S+',
1523 product => '\S+',
1524 version => '\S+',
1525 serial => '\S+',
1526 uuid => '[a-fA-F0-9]{8}(?:-[a-fA-F0-9]{4}){3}-[a-fA-F0-9]{12}',
1527 sku => '\S+',
1528 family => '\S+',
1529 };
1530
1531 # smbios: [manufacturer=str][,product=str][,version=str][,serial=str][,uuid=uuid][,sku=str][,family=str]
1532 sub parse_smbios1 {
1533 my ($data) = @_;
1534
1535 my $res = {};
1536
1537 foreach my $kvp (split(/,/, $data)) {
1538 return undef if $kvp !~ m/^(\S+)=(.+)$/;
1539 my ($k, $v) = split(/=/, $kvp);
1540 return undef if !defined($k) || !defined($v);
1541 return undef if !$valid_smbios1_options->{$k};
1542 return undef if $v !~ m/^$valid_smbios1_options->{$k}$/;
1543 $res->{$k} = $v;
1544 }
1545
1546 return $res;
1547 }
1548
1549 sub print_smbios1 {
1550 my ($smbios1) = @_;
1551
1552 my $data = '';
1553 foreach my $k (keys %$smbios1) {
1554 next if !defined($smbios1->{$k});
1555 next if !$valid_smbios1_options->{$k};
1556 $data .= ',' if $data;
1557 $data .= "$k=$smbios1->{$k}";
1558 }
1559 return $data;
1560 }
1561
1562 PVE::JSONSchema::register_format('pve-qm-smbios1', \&verify_smbios1);
1563 sub verify_smbios1 {
1564 my ($value, $noerr) = @_;
1565
1566 return $value if parse_smbios1($value);
1567
1568 return undef if $noerr;
1569
1570 die "unable to parse smbios (type 1) options\n";
1571 }
1572
1573 PVE::JSONSchema::register_format('pve-qm-bootdisk', \&verify_bootdisk);
1574 sub verify_bootdisk {
1575 my ($value, $noerr) = @_;
1576
1577 return $value if valid_drivename($value);
1578
1579 return undef if $noerr;
1580
1581 die "invalid boot disk '$value'\n";
1582 }
1583
1584 PVE::JSONSchema::register_format('pve-qm-numanode', \&verify_numa);
1585 sub verify_numa {
1586 my ($value, $noerr) = @_;
1587
1588 return $value if parse_numa($value);
1589
1590 return undef if $noerr;
1591
1592 die "unable to parse numa options\n";
1593 }
1594
1595 PVE::JSONSchema::register_format('pve-qm-net', \&verify_net);
1596 sub verify_net {
1597 my ($value, $noerr) = @_;
1598
1599 return $value if parse_net($value);
1600
1601 return undef if $noerr;
1602
1603 die "unable to parse network options\n";
1604 }
1605
1606 PVE::JSONSchema::register_format('pve-qm-drive', \&verify_drive);
1607 sub verify_drive {
1608 my ($value, $noerr) = @_;
1609
1610 return $value if parse_drive(undef, $value);
1611
1612 return undef if $noerr;
1613
1614 die "unable to parse drive options\n";
1615 }
1616
1617 PVE::JSONSchema::register_format('pve-qm-hostpci', \&verify_hostpci);
1618 sub verify_hostpci {
1619 my ($value, $noerr) = @_;
1620
1621 return $value if parse_hostpci($value);
1622
1623 return undef if $noerr;
1624
1625 die "unable to parse pci id\n";
1626 }
1627
1628 PVE::JSONSchema::register_format('pve-qm-watchdog', \&verify_watchdog);
1629 sub verify_watchdog {
1630 my ($value, $noerr) = @_;
1631
1632 return $value if parse_watchdog($value);
1633
1634 return undef if $noerr;
1635
1636 die "unable to parse watchdog options\n";
1637 }
1638
1639 sub parse_watchdog {
1640 my ($value) = @_;
1641
1642 return undef if !$value;
1643
1644 my $res = {};
1645
1646 foreach my $p (split(/,/, $value)) {
1647 next if $p =~ m/^\s*$/;
1648
1649 if ($p =~ m/^(model=)?(i6300esb|ib700)$/) {
1650 $res->{model} = $2;
1651 } elsif ($p =~ m/^(action=)?(reset|shutdown|poweroff|pause|debug|none)$/) {
1652 $res->{action} = $2;
1653 } else {
1654 return undef;
1655 }
1656 }
1657
1658 return $res;
1659 }
1660
1661 PVE::JSONSchema::register_format('pve-qm-startup', \&verify_startup);
1662 sub verify_startup {
1663 my ($value, $noerr) = @_;
1664
1665 return $value if parse_startup($value);
1666
1667 return undef if $noerr;
1668
1669 die "unable to parse startup options\n";
1670 }
1671
1672 sub parse_startup {
1673 my ($value) = @_;
1674
1675 return undef if !$value;
1676
1677 my $res = {};
1678
1679 foreach my $p (split(/,/, $value)) {
1680 next if $p =~ m/^\s*$/;
1681
1682 if ($p =~ m/^(order=)?(\d+)$/) {
1683 $res->{order} = $2;
1684 } elsif ($p =~ m/^up=(\d+)$/) {
1685 $res->{up} = $1;
1686 } elsif ($p =~ m/^down=(\d+)$/) {
1687 $res->{down} = $1;
1688 } else {
1689 return undef;
1690 }
1691 }
1692
1693 return $res;
1694 }
1695
1696 sub parse_usb_device {
1697 my ($value) = @_;
1698
1699 return undef if !$value;
1700
1701 my @dl = split(/,/, $value);
1702 my $found;
1703
1704 my $res = {};
1705 foreach my $v (@dl) {
1706 if ($v =~ m/^host=(0x)?([0-9A-Fa-f]{4}):(0x)?([0-9A-Fa-f]{4})$/) {
1707 $found = 1;
1708 $res->{vendorid} = $2;
1709 $res->{productid} = $4;
1710 } elsif ($v =~ m/^host=(\d+)\-(\d+(\.\d+)*)$/) {
1711 $found = 1;
1712 $res->{hostbus} = $1;
1713 $res->{hostport} = $2;
1714 } elsif ($v =~ m/^spice$/) {
1715 $found = 1;
1716 $res->{spice} = 1;
1717 } else {
1718 return undef;
1719 }
1720 }
1721 return undef if !$found;
1722
1723 return $res;
1724 }
1725
1726 PVE::JSONSchema::register_format('pve-qm-usb-device', \&verify_usb_device);
1727 sub verify_usb_device {
1728 my ($value, $noerr) = @_;
1729
1730 return $value if parse_usb_device($value);
1731
1732 return undef if $noerr;
1733
1734 die "unable to parse usb device\n";
1735 }
1736
1737 # add JSON properties for create and set function
1738 sub json_config_properties {
1739 my $prop = shift;
1740
1741 foreach my $opt (keys %$confdesc) {
1742 next if $opt eq 'parent' || $opt eq 'snaptime' || $opt eq 'vmstate';
1743 $prop->{$opt} = $confdesc->{$opt};
1744 }
1745
1746 return $prop;
1747 }
1748
1749 sub check_type {
1750 my ($key, $value) = @_;
1751
1752 die "unknown setting '$key'\n" if !$confdesc->{$key};
1753
1754 my $type = $confdesc->{$key}->{type};
1755
1756 if (!defined($value)) {
1757 die "got undefined value\n";
1758 }
1759
1760 if ($value =~ m/[\n\r]/) {
1761 die "property contains a line feed\n";
1762 }
1763
1764 if ($type eq 'boolean') {
1765 return 1 if ($value eq '1') || ($value =~ m/^(on|yes|true)$/i);
1766 return 0 if ($value eq '0') || ($value =~ m/^(off|no|false)$/i);
1767 die "type check ('boolean') failed - got '$value'\n";
1768 } elsif ($type eq 'integer') {
1769 return int($1) if $value =~ m/^(\d+)$/;
1770 die "type check ('integer') failed - got '$value'\n";
1771 } elsif ($type eq 'number') {
1772 return $value if $value =~ m/^(\d+)(\.\d+)?$/;
1773 die "type check ('number') failed - got '$value'\n";
1774 } elsif ($type eq 'string') {
1775 if (my $fmt = $confdesc->{$key}->{format}) {
1776 if ($fmt eq 'pve-qm-drive') {
1777 # special case - we need to pass $key to parse_drive()
1778 my $drive = parse_drive($key, $value);
1779 return $value if $drive;
1780 die "unable to parse drive options\n";
1781 }
1782 PVE::JSONSchema::check_format($fmt, $value);
1783 return $value;
1784 }
1785 $value =~ s/^\"(.*)\"$/$1/;
1786 return $value;
1787 } else {
1788 die "internal error"
1789 }
1790 }
1791
1792 sub lock_config_full {
1793 my ($vmid, $timeout, $code, @param) = @_;
1794
1795 my $filename = config_file_lock($vmid);
1796
1797 my $res = lock_file($filename, $timeout, $code, @param);
1798
1799 die $@ if $@;
1800
1801 return $res;
1802 }
1803
1804 sub lock_config_mode {
1805 my ($vmid, $timeout, $shared, $code, @param) = @_;
1806
1807 my $filename = config_file_lock($vmid);
1808
1809 my $res = lock_file_full($filename, $timeout, $shared, $code, @param);
1810
1811 die $@ if $@;
1812
1813 return $res;
1814 }
1815
1816 sub lock_config {
1817 my ($vmid, $code, @param) = @_;
1818
1819 return lock_config_full($vmid, 10, $code, @param);
1820 }
1821
1822 sub cfs_config_path {
1823 my ($vmid, $node) = @_;
1824
1825 $node = $nodename if !$node;
1826 return "nodes/$node/qemu-server/$vmid.conf";
1827 }
1828
1829 sub check_iommu_support{
1830 #fixme : need to check IOMMU support
1831 #http://www.linux-kvm.org/page/How_to_assign_devices_with_VT-d_in_KVM
1832
1833 my $iommu=1;
1834 return $iommu;
1835
1836 }
1837
1838 sub config_file {
1839 my ($vmid, $node) = @_;
1840
1841 my $cfspath = cfs_config_path($vmid, $node);
1842 return "/etc/pve/$cfspath";
1843 }
1844
1845 sub config_file_lock {
1846 my ($vmid) = @_;
1847
1848 return "$lock_dir/lock-$vmid.conf";
1849 }
1850
1851 sub touch_config {
1852 my ($vmid) = @_;
1853
1854 my $conf = config_file($vmid);
1855 utime undef, undef, $conf;
1856 }
1857
1858 sub destroy_vm {
1859 my ($storecfg, $vmid, $keep_empty_config) = @_;
1860
1861 my $conffile = config_file($vmid);
1862
1863 my $conf = load_config($vmid);
1864
1865 check_lock($conf);
1866
1867 # only remove disks owned by this VM
1868 foreach_drive($conf, sub {
1869 my ($ds, $drive) = @_;
1870
1871 return if drive_is_cdrom($drive);
1872
1873 my $volid = $drive->{file};
1874
1875 return if !$volid || $volid =~ m|^/|;
1876
1877 my ($path, $owner) = PVE::Storage::path($storecfg, $volid);
1878 return if !$path || !$owner || ($owner != $vmid);
1879
1880 PVE::Storage::vdisk_free($storecfg, $volid);
1881 });
1882
1883 if ($keep_empty_config) {
1884 PVE::Tools::file_set_contents($conffile, "memory: 128\n");
1885 } else {
1886 unlink $conffile;
1887 }
1888
1889 # also remove unused disk
1890 eval {
1891 my $dl = PVE::Storage::vdisk_list($storecfg, undef, $vmid);
1892
1893 eval {
1894 PVE::Storage::foreach_volid($dl, sub {
1895 my ($volid, $sid, $volname, $d) = @_;
1896 PVE::Storage::vdisk_free($storecfg, $volid);
1897 });
1898 };
1899 warn $@ if $@;
1900
1901 };
1902 warn $@ if $@;
1903 }
1904
1905 sub load_config {
1906 my ($vmid, $node) = @_;
1907
1908 my $cfspath = cfs_config_path($vmid, $node);
1909
1910 my $conf = PVE::Cluster::cfs_read_file($cfspath);
1911
1912 die "no such VM ('$vmid')\n" if !defined($conf);
1913
1914 return $conf;
1915 }
1916
1917 sub parse_vm_config {
1918 my ($filename, $raw) = @_;
1919
1920 return undef if !defined($raw);
1921
1922 my $res = {
1923 digest => Digest::SHA::sha1_hex($raw),
1924 snapshots => {},
1925 pending => {},
1926 };
1927
1928 $filename =~ m|/qemu-server/(\d+)\.conf$|
1929 || die "got strange filename '$filename'";
1930
1931 my $vmid = $1;
1932
1933 my $conf = $res;
1934 my $descr = '';
1935 my $section = '';
1936
1937 my @lines = split(/\n/, $raw);
1938 foreach my $line (@lines) {
1939 next if $line =~ m/^\s*$/;
1940
1941 if ($line =~ m/^\[PENDING\]\s*$/i) {
1942 $section = 'pending';
1943 $conf->{description} = $descr if $descr;
1944 $descr = '';
1945 $conf = $res->{$section} = {};
1946 next;
1947
1948 } elsif ($line =~ m/^\[([a-z][a-z0-9_\-]+)\]\s*$/i) {
1949 $section = $1;
1950 $conf->{description} = $descr if $descr;
1951 $descr = '';
1952 $conf = $res->{snapshots}->{$section} = {};
1953 next;
1954 }
1955
1956 if ($line =~ m/^\#(.*)\s*$/) {
1957 $descr .= PVE::Tools::decode_text($1) . "\n";
1958 next;
1959 }
1960
1961 if ($line =~ m/^(description):\s*(.*\S)\s*$/) {
1962 $descr .= PVE::Tools::decode_text($2);
1963 } elsif ($line =~ m/snapstate:\s*(prepare|delete)\s*$/) {
1964 $conf->{snapstate} = $1;
1965 } elsif ($line =~ m/^(args):\s*(.*\S)\s*$/) {
1966 my $key = $1;
1967 my $value = $2;
1968 $conf->{$key} = $value;
1969 } elsif ($line =~ m/^delete:\s*(.*\S)\s*$/) {
1970 my $value = $1;
1971 if ($section eq 'pending') {
1972 $conf->{delete} = $value; # we parse this later
1973 } else {
1974 warn "vm $vmid - propertry 'delete' is only allowed in [PENDING]\n";
1975 }
1976 } elsif ($line =~ m/^([a-z][a-z_]*\d*):\s*(\S+)\s*$/) {
1977 my $key = $1;
1978 my $value = $2;
1979 eval { $value = check_type($key, $value); };
1980 if ($@) {
1981 warn "vm $vmid - unable to parse value of '$key' - $@";
1982 } else {
1983 my $fmt = $confdesc->{$key}->{format};
1984 if ($fmt && $fmt eq 'pve-qm-drive') {
1985 my $v = parse_drive($key, $value);
1986 if (my $volid = filename_to_volume_id($vmid, $v->{file}, $v->{media})) {
1987 $v->{file} = $volid;
1988 $value = print_drive($vmid, $v);
1989 } else {
1990 warn "vm $vmid - unable to parse value of '$key'\n";
1991 next;
1992 }
1993 }
1994
1995 if ($key eq 'cdrom') {
1996 $conf->{ide2} = $value;
1997 } else {
1998 $conf->{$key} = $value;
1999 }
2000 }
2001 }
2002 }
2003
2004 $conf->{description} = $descr if $descr;
2005
2006 delete $res->{snapstate}; # just to be sure
2007
2008 return $res;
2009 }
2010
2011 sub write_vm_config {
2012 my ($filename, $conf) = @_;
2013
2014 delete $conf->{snapstate}; # just to be sure
2015
2016 if ($conf->{cdrom}) {
2017 die "option ide2 conflicts with cdrom\n" if $conf->{ide2};
2018 $conf->{ide2} = $conf->{cdrom};
2019 delete $conf->{cdrom};
2020 }
2021
2022 # we do not use 'smp' any longer
2023 if ($conf->{sockets}) {
2024 delete $conf->{smp};
2025 } elsif ($conf->{smp}) {
2026 $conf->{sockets} = $conf->{smp};
2027 delete $conf->{cores};
2028 delete $conf->{smp};
2029 }
2030
2031 if ($conf->{maxcpus} && $conf->{sockets}) {
2032 delete $conf->{sockets};
2033 }
2034
2035 my $used_volids = {};
2036
2037 my $cleanup_config = sub {
2038 my ($cref, $pending, $snapname) = @_;
2039
2040 foreach my $key (keys %$cref) {
2041 next if $key eq 'digest' || $key eq 'description' || $key eq 'snapshots' ||
2042 $key eq 'snapstate' || $key eq 'pending';
2043 my $value = $cref->{$key};
2044 if ($key eq 'delete') {
2045 die "propertry 'delete' is only allowed in [PENDING]\n"
2046 if !$pending;
2047 # fixme: check syntax?
2048 next;
2049 }
2050 eval { $value = check_type($key, $value); };
2051 die "unable to parse value of '$key' - $@" if $@;
2052
2053 $cref->{$key} = $value;
2054
2055 if (!$snapname && valid_drivename($key)) {
2056 my $drive = parse_drive($key, $value);
2057 $used_volids->{$drive->{file}} = 1 if $drive && $drive->{file};
2058 }
2059 }
2060 };
2061
2062 &$cleanup_config($conf);
2063
2064 &$cleanup_config($conf->{pending}, 1);
2065
2066 foreach my $snapname (keys %{$conf->{snapshots}}) {
2067 die "internal error" if $snapname eq 'pending';
2068 &$cleanup_config($conf->{snapshots}->{$snapname}, undef, $snapname);
2069 }
2070
2071 # remove 'unusedX' settings if we re-add a volume
2072 foreach my $key (keys %$conf) {
2073 my $value = $conf->{$key};
2074 if ($key =~ m/^unused/ && $used_volids->{$value}) {
2075 delete $conf->{$key};
2076 }
2077 }
2078
2079 my $generate_raw_config = sub {
2080 my ($conf) = @_;
2081
2082 my $raw = '';
2083
2084 # add description as comment to top of file
2085 my $descr = $conf->{description} || '';
2086 foreach my $cl (split(/\n/, $descr)) {
2087 $raw .= '#' . PVE::Tools::encode_text($cl) . "\n";
2088 }
2089
2090 foreach my $key (sort keys %$conf) {
2091 next if $key eq 'digest' || $key eq 'description' || $key eq 'pending' || $key eq 'snapshots';
2092 $raw .= "$key: $conf->{$key}\n";
2093 }
2094 return $raw;
2095 };
2096
2097 my $raw = &$generate_raw_config($conf);
2098
2099 if (scalar(keys %{$conf->{pending}})){
2100 $raw .= "\n[PENDING]\n";
2101 $raw .= &$generate_raw_config($conf->{pending});
2102 }
2103
2104 foreach my $snapname (sort keys %{$conf->{snapshots}}) {
2105 $raw .= "\n[$snapname]\n";
2106 $raw .= &$generate_raw_config($conf->{snapshots}->{$snapname});
2107 }
2108
2109 return $raw;
2110 }
2111
2112 sub update_config_nolock {
2113 my ($vmid, $conf, $skiplock) = @_;
2114
2115 check_lock($conf) if !$skiplock;
2116
2117 my $cfspath = cfs_config_path($vmid);
2118
2119 PVE::Cluster::cfs_write_file($cfspath, $conf);
2120 }
2121
2122 sub update_config {
2123 my ($vmid, $conf, $skiplock) = @_;
2124
2125 lock_config($vmid, &update_config_nolock, $conf, $skiplock);
2126 }
2127
2128 sub load_defaults {
2129
2130 my $res = {};
2131
2132 # we use static defaults from our JSON schema configuration
2133 foreach my $key (keys %$confdesc) {
2134 if (defined(my $default = $confdesc->{$key}->{default})) {
2135 $res->{$key} = $default;
2136 }
2137 }
2138
2139 my $conf = PVE::Cluster::cfs_read_file('datacenter.cfg');
2140 $res->{keyboard} = $conf->{keyboard} if $conf->{keyboard};
2141
2142 return $res;
2143 }
2144
2145 sub config_list {
2146 my $vmlist = PVE::Cluster::get_vmlist();
2147 my $res = {};
2148 return $res if !$vmlist || !$vmlist->{ids};
2149 my $ids = $vmlist->{ids};
2150
2151 foreach my $vmid (keys %$ids) {
2152 my $d = $ids->{$vmid};
2153 next if !$d->{node} || $d->{node} ne $nodename;
2154 next if !$d->{type} || $d->{type} ne 'qemu';
2155 $res->{$vmid}->{exists} = 1;
2156 }
2157 return $res;
2158 }
2159
2160 # test if VM uses local resources (to prevent migration)
2161 sub check_local_resources {
2162 my ($conf, $noerr) = @_;
2163
2164 my $loc_res = 0;
2165
2166 $loc_res = 1 if $conf->{hostusb}; # old syntax
2167 $loc_res = 1 if $conf->{hostpci}; # old syntax
2168
2169 foreach my $k (keys %$conf) {
2170 next if $k =~ m/^usb/ && ($conf->{$k} eq 'spice');
2171 $loc_res = 1 if $k =~ m/^(usb|hostpci|serial|parallel)\d+$/;
2172 }
2173
2174 die "VM uses local resources\n" if $loc_res && !$noerr;
2175
2176 return $loc_res;
2177 }
2178
2179 # check if used storages are available on all nodes (use by migrate)
2180 sub check_storage_availability {
2181 my ($storecfg, $conf, $node) = @_;
2182
2183 foreach_drive($conf, sub {
2184 my ($ds, $drive) = @_;
2185
2186 my $volid = $drive->{file};
2187 return if !$volid;
2188
2189 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2190 return if !$sid;
2191
2192 # check if storage is available on both nodes
2193 my $scfg = PVE::Storage::storage_check_node($storecfg, $sid);
2194 PVE::Storage::storage_check_node($storecfg, $sid, $node);
2195 });
2196 }
2197
2198 # list nodes where all VM images are available (used by has_feature API)
2199 sub shared_nodes {
2200 my ($conf, $storecfg) = @_;
2201
2202 my $nodelist = PVE::Cluster::get_nodelist();
2203 my $nodehash = { map { $_ => 1 } @$nodelist };
2204 my $nodename = PVE::INotify::nodename();
2205
2206 foreach_drive($conf, sub {
2207 my ($ds, $drive) = @_;
2208
2209 my $volid = $drive->{file};
2210 return if !$volid;
2211
2212 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2213 if ($storeid) {
2214 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
2215 if ($scfg->{disable}) {
2216 $nodehash = {};
2217 } elsif (my $avail = $scfg->{nodes}) {
2218 foreach my $node (keys %$nodehash) {
2219 delete $nodehash->{$node} if !$avail->{$node};
2220 }
2221 } elsif (!$scfg->{shared}) {
2222 foreach my $node (keys %$nodehash) {
2223 delete $nodehash->{$node} if $node ne $nodename
2224 }
2225 }
2226 }
2227 });
2228
2229 return $nodehash
2230 }
2231
2232 sub check_lock {
2233 my ($conf) = @_;
2234
2235 die "VM is locked ($conf->{lock})\n" if $conf->{lock};
2236 }
2237
2238 sub check_cmdline {
2239 my ($pidfile, $pid) = @_;
2240
2241 my $fh = IO::File->new("/proc/$pid/cmdline", "r");
2242 if (defined($fh)) {
2243 my $line = <$fh>;
2244 $fh->close;
2245 return undef if !$line;
2246 my @param = split(/\0/, $line);
2247
2248 my $cmd = $param[0];
2249 return if !$cmd || ($cmd !~ m|kvm$| && $cmd !~ m|qemu-system-x86_64$|);
2250
2251 for (my $i = 0; $i < scalar (@param); $i++) {
2252 my $p = $param[$i];
2253 next if !$p;
2254 if (($p eq '-pidfile') || ($p eq '--pidfile')) {
2255 my $p = $param[$i+1];
2256 return 1 if $p && ($p eq $pidfile);
2257 return undef;
2258 }
2259 }
2260 }
2261 return undef;
2262 }
2263
2264 sub check_running {
2265 my ($vmid, $nocheck, $node) = @_;
2266
2267 my $filename = config_file($vmid, $node);
2268
2269 die "unable to find configuration file for VM $vmid - no such machine\n"
2270 if !$nocheck && ! -f $filename;
2271
2272 my $pidfile = pidfile_name($vmid);
2273
2274 if (my $fd = IO::File->new("<$pidfile")) {
2275 my $st = stat($fd);
2276 my $line = <$fd>;
2277 close($fd);
2278
2279 my $mtime = $st->mtime;
2280 if ($mtime > time()) {
2281 warn "file '$filename' modified in future\n";
2282 }
2283
2284 if ($line =~ m/^(\d+)$/) {
2285 my $pid = $1;
2286 if (check_cmdline($pidfile, $pid)) {
2287 if (my $pinfo = PVE::ProcFSTools::check_process_running($pid)) {
2288 return $pid;
2289 }
2290 }
2291 }
2292 }
2293
2294 return undef;
2295 }
2296
2297 sub vzlist {
2298
2299 my $vzlist = config_list();
2300
2301 my $fd = IO::Dir->new($var_run_tmpdir) || return $vzlist;
2302
2303 while (defined(my $de = $fd->read)) {
2304 next if $de !~ m/^(\d+)\.pid$/;
2305 my $vmid = $1;
2306 next if !defined($vzlist->{$vmid});
2307 if (my $pid = check_running($vmid)) {
2308 $vzlist->{$vmid}->{pid} = $pid;
2309 }
2310 }
2311
2312 return $vzlist;
2313 }
2314
2315 sub disksize {
2316 my ($storecfg, $conf) = @_;
2317
2318 my $bootdisk = $conf->{bootdisk};
2319 return undef if !$bootdisk;
2320 return undef if !valid_drivename($bootdisk);
2321
2322 return undef if !$conf->{$bootdisk};
2323
2324 my $drive = parse_drive($bootdisk, $conf->{$bootdisk});
2325 return undef if !defined($drive);
2326
2327 return undef if drive_is_cdrom($drive);
2328
2329 my $volid = $drive->{file};
2330 return undef if !$volid;
2331
2332 return $drive->{size};
2333 }
2334
2335 my $last_proc_pid_stat;
2336
2337 # get VM status information
2338 # This must be fast and should not block ($full == false)
2339 # We only query KVM using QMP if $full == true (this can be slow)
2340 sub vmstatus {
2341 my ($opt_vmid, $full) = @_;
2342
2343 my $res = {};
2344
2345 my $storecfg = PVE::Storage::config();
2346
2347 my $list = vzlist();
2348 my ($uptime) = PVE::ProcFSTools::read_proc_uptime(1);
2349
2350 my $cpucount = $cpuinfo->{cpus} || 1;
2351
2352 foreach my $vmid (keys %$list) {
2353 next if $opt_vmid && ($vmid ne $opt_vmid);
2354
2355 my $cfspath = cfs_config_path($vmid);
2356 my $conf = PVE::Cluster::cfs_read_file($cfspath) || {};
2357
2358 my $d = {};
2359 $d->{pid} = $list->{$vmid}->{pid};
2360
2361 # fixme: better status?
2362 $d->{status} = $list->{$vmid}->{pid} ? 'running' : 'stopped';
2363
2364 my $size = disksize($storecfg, $conf);
2365 if (defined($size)) {
2366 $d->{disk} = 0; # no info available
2367 $d->{maxdisk} = $size;
2368 } else {
2369 $d->{disk} = 0;
2370 $d->{maxdisk} = 0;
2371 }
2372
2373 $d->{cpus} = ($conf->{sockets} || 1) * ($conf->{cores} || 1);
2374 $d->{cpus} = $cpucount if $d->{cpus} > $cpucount;
2375
2376 $d->{name} = $conf->{name} || "VM $vmid";
2377 $d->{maxmem} = $conf->{memory} ? $conf->{memory}*(1024*1024) : 0;
2378
2379 if ($conf->{balloon}) {
2380 $d->{balloon_min} = $conf->{balloon}*(1024*1024);
2381 $d->{shares} = defined($conf->{shares}) ? $conf->{shares} : 1000;
2382 }
2383
2384 $d->{uptime} = 0;
2385 $d->{cpu} = 0;
2386 $d->{mem} = 0;
2387
2388 $d->{netout} = 0;
2389 $d->{netin} = 0;
2390
2391 $d->{diskread} = 0;
2392 $d->{diskwrite} = 0;
2393
2394 $d->{template} = is_template($conf);
2395
2396 $res->{$vmid} = $d;
2397 }
2398
2399 my $netdev = PVE::ProcFSTools::read_proc_net_dev();
2400 foreach my $dev (keys %$netdev) {
2401 next if $dev !~ m/^tap([1-9]\d*)i/;
2402 my $vmid = $1;
2403 my $d = $res->{$vmid};
2404 next if !$d;
2405
2406 $d->{netout} += $netdev->{$dev}->{receive};
2407 $d->{netin} += $netdev->{$dev}->{transmit};
2408 }
2409
2410 my $ctime = gettimeofday;
2411
2412 foreach my $vmid (keys %$list) {
2413
2414 my $d = $res->{$vmid};
2415 my $pid = $d->{pid};
2416 next if !$pid;
2417
2418 my $pstat = PVE::ProcFSTools::read_proc_pid_stat($pid);
2419 next if !$pstat; # not running
2420
2421 my $used = $pstat->{utime} + $pstat->{stime};
2422
2423 $d->{uptime} = int(($uptime - $pstat->{starttime})/$cpuinfo->{user_hz});
2424
2425 if ($pstat->{vsize}) {
2426 $d->{mem} = int(($pstat->{rss}/$pstat->{vsize})*$d->{maxmem});
2427 }
2428
2429 my $old = $last_proc_pid_stat->{$pid};
2430 if (!$old) {
2431 $last_proc_pid_stat->{$pid} = {
2432 time => $ctime,
2433 used => $used,
2434 cpu => 0,
2435 };
2436 next;
2437 }
2438
2439 my $dtime = ($ctime - $old->{time}) * $cpucount * $cpuinfo->{user_hz};
2440
2441 if ($dtime > 1000) {
2442 my $dutime = $used - $old->{used};
2443
2444 $d->{cpu} = (($dutime/$dtime)* $cpucount) / $d->{cpus};
2445 $last_proc_pid_stat->{$pid} = {
2446 time => $ctime,
2447 used => $used,
2448 cpu => $d->{cpu},
2449 };
2450 } else {
2451 $d->{cpu} = $old->{cpu};
2452 }
2453 }
2454
2455 return $res if !$full;
2456
2457 my $qmpclient = PVE::QMPClient->new();
2458
2459 my $ballooncb = sub {
2460 my ($vmid, $resp) = @_;
2461
2462 my $info = $resp->{'return'};
2463 return if !$info->{max_mem};
2464
2465 my $d = $res->{$vmid};
2466
2467 # use memory assigned to VM
2468 $d->{maxmem} = $info->{max_mem};
2469 $d->{balloon} = $info->{actual};
2470
2471 if (defined($info->{total_mem}) && defined($info->{free_mem})) {
2472 $d->{mem} = $info->{total_mem} - $info->{free_mem};
2473 $d->{freemem} = $info->{free_mem};
2474 }
2475
2476 };
2477
2478 my $blockstatscb = sub {
2479 my ($vmid, $resp) = @_;
2480 my $data = $resp->{'return'} || [];
2481 my $totalrdbytes = 0;
2482 my $totalwrbytes = 0;
2483 for my $blockstat (@$data) {
2484 $totalrdbytes = $totalrdbytes + $blockstat->{stats}->{rd_bytes};
2485 $totalwrbytes = $totalwrbytes + $blockstat->{stats}->{wr_bytes};
2486 }
2487 $res->{$vmid}->{diskread} = $totalrdbytes;
2488 $res->{$vmid}->{diskwrite} = $totalwrbytes;
2489 };
2490
2491 my $statuscb = sub {
2492 my ($vmid, $resp) = @_;
2493
2494 $qmpclient->queue_cmd($vmid, $blockstatscb, 'query-blockstats');
2495 # this fails if ballon driver is not loaded, so this must be
2496 # the last commnand (following command are aborted if this fails).
2497 $qmpclient->queue_cmd($vmid, $ballooncb, 'query-balloon');
2498
2499 my $status = 'unknown';
2500 if (!defined($status = $resp->{'return'}->{status})) {
2501 warn "unable to get VM status\n";
2502 return;
2503 }
2504
2505 $res->{$vmid}->{qmpstatus} = $resp->{'return'}->{status};
2506 };
2507
2508 foreach my $vmid (keys %$list) {
2509 next if $opt_vmid && ($vmid ne $opt_vmid);
2510 next if !$res->{$vmid}->{pid}; # not running
2511 $qmpclient->queue_cmd($vmid, $statuscb, 'query-status');
2512 }
2513
2514 $qmpclient->queue_execute(undef, 1);
2515
2516 foreach my $vmid (keys %$list) {
2517 next if $opt_vmid && ($vmid ne $opt_vmid);
2518 $res->{$vmid}->{qmpstatus} = $res->{$vmid}->{status} if !$res->{$vmid}->{qmpstatus};
2519 }
2520
2521 return $res;
2522 }
2523
2524 sub foreach_drive {
2525 my ($conf, $func) = @_;
2526
2527 foreach my $ds (keys %$conf) {
2528 next if !valid_drivename($ds);
2529
2530 my $drive = parse_drive($ds, $conf->{$ds});
2531 next if !$drive;
2532
2533 &$func($ds, $drive);
2534 }
2535 }
2536
2537 sub foreach_volid {
2538 my ($conf, $func) = @_;
2539
2540 my $volhash = {};
2541
2542 my $test_volid = sub {
2543 my ($volid, $is_cdrom) = @_;
2544
2545 return if !$volid;
2546
2547 $volhash->{$volid} = $is_cdrom || 0;
2548 };
2549
2550 foreach_drive($conf, sub {
2551 my ($ds, $drive) = @_;
2552 &$test_volid($drive->{file}, drive_is_cdrom($drive));
2553 });
2554
2555 foreach my $snapname (keys %{$conf->{snapshots}}) {
2556 my $snap = $conf->{snapshots}->{$snapname};
2557 &$test_volid($snap->{vmstate}, 0);
2558 foreach_drive($snap, sub {
2559 my ($ds, $drive) = @_;
2560 &$test_volid($drive->{file}, drive_is_cdrom($drive));
2561 });
2562 }
2563
2564 foreach my $volid (keys %$volhash) {
2565 &$func($volid, $volhash->{$volid});
2566 }
2567 }
2568
2569 sub vga_conf_has_spice {
2570 my ($vga) = @_;
2571
2572 return 0 if !$vga || $vga !~ m/^qxl([234])?$/;
2573
2574 return $1 || 1;
2575 }
2576
2577 sub config_to_command {
2578 my ($storecfg, $vmid, $conf, $defaults, $forcemachine) = @_;
2579
2580 my $cmd = [];
2581 my $globalFlags = [];
2582 my $machineFlags = [];
2583 my $rtcFlags = [];
2584 my $cpuFlags = [];
2585 my $devices = [];
2586 my $pciaddr = '';
2587 my $bridges = {};
2588 my $kvmver = kvm_user_version();
2589 my $vernum = 0; # unknown
2590 if ($kvmver =~ m/^(\d+)\.(\d+)$/) {
2591 $vernum = $1*1000000+$2*1000;
2592 } elsif ($kvmver =~ m/^(\d+)\.(\d+)\.(\d+)$/) {
2593 $vernum = $1*1000000+$2*1000+$3;
2594 }
2595
2596 die "detected old qemu-kvm binary ($kvmver)\n" if $vernum < 15000;
2597
2598 my $have_ovz = -f '/proc/vz/vestat';
2599
2600 my $q35 = machine_type_is_q35($conf);
2601
2602 push @$cmd, '/usr/bin/kvm';
2603
2604 push @$cmd, '-id', $vmid;
2605
2606 my $use_virtio = 0;
2607
2608 my $qmpsocket = qmp_socket($vmid);
2609 push @$cmd, '-chardev', "socket,id=qmp,path=$qmpsocket,server,nowait";
2610 push @$cmd, '-mon', "chardev=qmp,mode=control";
2611
2612 my $socket = vnc_socket($vmid);
2613 push @$cmd, '-vnc', "unix:$socket,x509,password";
2614
2615 push @$cmd, '-pidfile' , pidfile_name($vmid);
2616
2617 push @$cmd, '-daemonize';
2618
2619 if ($conf->{smbios1}) {
2620 push @$cmd, '-smbios', "type=1,$conf->{smbios1}";
2621 }
2622
2623 push @$cmd, '-object', "iothread,id=iothread0" if $conf->{iothread};
2624
2625 if ($q35) {
2626 # the q35 chipset support native usb2, so we enable usb controller
2627 # by default for this machine type
2628 push @$devices, '-readconfig', '/usr/share/qemu-server/pve-q35.cfg';
2629 } else {
2630 $pciaddr = print_pci_addr("piix3", $bridges);
2631 push @$devices, '-device', "piix3-usb-uhci,id=uhci$pciaddr.0x2";
2632
2633 my $use_usb2 = 0;
2634 for (my $i = 0; $i < $MAX_USB_DEVICES; $i++) {
2635 next if !$conf->{"usb$i"};
2636 $use_usb2 = 1;
2637 }
2638 # include usb device config
2639 push @$devices, '-readconfig', '/usr/share/qemu-server/pve-usb.cfg' if $use_usb2;
2640 }
2641
2642 my $vga = $conf->{vga};
2643
2644 my $qxlnum = vga_conf_has_spice($vga);
2645 $vga = 'qxl' if $qxlnum;
2646
2647 if (!$vga) {
2648 if ($conf->{ostype} && ($conf->{ostype} eq 'win8' ||
2649 $conf->{ostype} eq 'win7' ||
2650 $conf->{ostype} eq 'w2k8')) {
2651 $vga = 'std';
2652 } else {
2653 $vga = 'cirrus';
2654 }
2655 }
2656
2657 # enable absolute mouse coordinates (needed by vnc)
2658 my $tablet;
2659 if (defined($conf->{tablet})) {
2660 $tablet = $conf->{tablet};
2661 } else {
2662 $tablet = $defaults->{tablet};
2663 $tablet = 0 if $qxlnum; # disable for spice because it is not needed
2664 $tablet = 0 if $vga =~ m/^serial\d+$/; # disable if we use serial terminal (no vga card)
2665 }
2666
2667 push @$devices, '-device', print_tabletdevice_full($conf) if $tablet;
2668
2669 # host pci devices
2670 for (my $i = 0; $i < $MAX_HOSTPCI_DEVICES; $i++) {
2671 my $d = parse_hostpci($conf->{"hostpci$i"});
2672 next if !$d;
2673
2674 my $pcie = $d->{pcie};
2675 if($pcie){
2676 die "q35 machine model is not enabled" if !$q35;
2677 $pciaddr = print_pcie_addr("hostpci$i");
2678 }else{
2679 $pciaddr = print_pci_addr("hostpci$i", $bridges);
2680 }
2681
2682 my $rombar = $d->{rombar} && $d->{rombar} eq 'off' ? ",rombar=0" : "";
2683 my $driver = $d->{driver} && $d->{driver} eq 'vfio' ? "vfio-pci" : "pci-assign";
2684 my $xvga = $d->{'x-vga'} && $d->{'x-vga'} eq 'on' ? ",x-vga=on" : "";
2685 if ($xvga && $xvga ne '') {
2686 push @$cpuFlags, 'kvm=off';
2687 $vga = 'none';
2688 }
2689 $driver = "vfio-pci" if $xvga ne '';
2690 my $pcidevices = $d->{pciid};
2691 my $multifunction = 1 if @$pcidevices > 1;
2692
2693 my $j=0;
2694 foreach my $pcidevice (@$pcidevices) {
2695
2696 my $id = "hostpci$i";
2697 $id .= ".$j" if $multifunction;
2698 my $addr = $pciaddr;
2699 $addr .= ".$j" if $multifunction;
2700 my $devicestr = "$driver,host=$pcidevice->{id}.$pcidevice->{function},id=$id$addr";
2701
2702 if($j == 0){
2703 $devicestr .= "$rombar$xvga";
2704 $devicestr .= ",multifunction=on" if $multifunction;
2705 }
2706
2707 push @$devices, '-device', $devicestr;
2708 $j++;
2709 }
2710 }
2711
2712 # usb devices
2713 for (my $i = 0; $i < $MAX_USB_DEVICES; $i++) {
2714 my $d = parse_usb_device($conf->{"usb$i"});
2715 next if !$d;
2716 if ($d->{vendorid} && $d->{productid}) {
2717 push @$devices, '-device', "usb-host,vendorid=0x$d->{vendorid},productid=0x$d->{productid}";
2718 } elsif (defined($d->{hostbus}) && defined($d->{hostport})) {
2719 push @$devices, '-device', "usb-host,hostbus=$d->{hostbus},hostport=$d->{hostport}";
2720 } elsif ($d->{spice}) {
2721 # usb redir support for spice
2722 push @$devices, '-chardev', "spicevmc,id=usbredirchardev$i,name=usbredir";
2723 push @$devices, '-device', "usb-redir,chardev=usbredirchardev$i,id=usbredirdev$i,bus=ehci.0";
2724 }
2725 }
2726
2727 # serial devices
2728 for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
2729 if (my $path = $conf->{"serial$i"}) {
2730 if ($path eq 'socket') {
2731 my $socket = "/var/run/qemu-server/${vmid}.serial$i";
2732 push @$devices, '-chardev', "socket,id=serial$i,path=$socket,server,nowait";
2733 push @$devices, '-device', "isa-serial,chardev=serial$i";
2734 } else {
2735 die "no such serial device\n" if ! -c $path;
2736 push @$devices, '-chardev', "tty,id=serial$i,path=$path";
2737 push @$devices, '-device', "isa-serial,chardev=serial$i";
2738 }
2739 }
2740 }
2741
2742 # parallel devices
2743 for (my $i = 0; $i < $MAX_PARALLEL_PORTS; $i++) {
2744 if (my $path = $conf->{"parallel$i"}) {
2745 die "no such parallel device\n" if ! -c $path;
2746 my $devtype = $path =~ m!^/dev/usb/lp! ? 'tty' : 'parport';
2747 push @$devices, '-chardev', "$devtype,id=parallel$i,path=$path";
2748 push @$devices, '-device', "isa-parallel,chardev=parallel$i";
2749 }
2750 }
2751
2752 my $vmname = $conf->{name} || "vm$vmid";
2753
2754 push @$cmd, '-name', $vmname;
2755
2756 my $sockets = 1;
2757 $sockets = $conf->{smp} if $conf->{smp}; # old style - no longer iused
2758 $sockets = $conf->{sockets} if $conf->{sockets};
2759
2760 my $cores = $conf->{cores} || 1;
2761 my $maxcpus = $conf->{maxcpus} if $conf->{maxcpus};
2762
2763 my $total_cores = $sockets * $cores;
2764 my $allowed_cores = $cpuinfo->{cpus};
2765
2766 die "MAX $allowed_cores cores allowed per VM on this node\n"
2767 if ($allowed_cores < $total_cores);
2768
2769 if ($maxcpus) {
2770 push @$cmd, '-smp', "cpus=$cores,maxcpus=$maxcpus";
2771 } else {
2772 push @$cmd, '-smp', "sockets=$sockets,cores=$cores";
2773 }
2774
2775 push @$cmd, '-nodefaults';
2776
2777 my $bootorder = $conf->{boot} || $confdesc->{boot}->{default};
2778
2779 my $bootindex_hash = {};
2780 my $i = 1;
2781 foreach my $o (split(//, $bootorder)) {
2782 $bootindex_hash->{$o} = $i*100;
2783 $i++;
2784 }
2785
2786 push @$cmd, '-boot', "menu=on";
2787
2788 push @$cmd, '-no-acpi' if defined($conf->{acpi}) && $conf->{acpi} == 0;
2789
2790 push @$cmd, '-no-reboot' if defined($conf->{reboot}) && $conf->{reboot} == 0;
2791
2792 push @$cmd, '-vga', $vga if $vga && $vga !~ m/^serial\d+$/; # for kvm 77 and later
2793
2794 # time drift fix
2795 my $tdf = defined($conf->{tdf}) ? $conf->{tdf} : $defaults->{tdf};
2796
2797 my $nokvm = defined($conf->{kvm}) && $conf->{kvm} == 0 ? 1 : 0;
2798 my $useLocaltime = $conf->{localtime};
2799
2800 if (my $ost = $conf->{ostype}) {
2801 # other, wxp, w2k, w2k3, w2k8, wvista, win7, win8, l24, l26, solaris
2802
2803 if ($ost =~ m/^w/) { # windows
2804 $useLocaltime = 1 if !defined($conf->{localtime});
2805
2806 # use time drift fix when acpi is enabled
2807 if (!(defined($conf->{acpi}) && $conf->{acpi} == 0)) {
2808 $tdf = 1 if !defined($conf->{tdf});
2809 }
2810 }
2811
2812 if ($ost eq 'win7' || $ost eq 'win8' || $ost eq 'w2k8' ||
2813 $ost eq 'wvista') {
2814 push @$globalFlags, 'kvm-pit.lost_tick_policy=discard';
2815 push @$cmd, '-no-hpet';
2816 #push @$cpuFlags , 'hv_vapic" if !$nokvm; #fixme, my win2008R2 hang at boot with this
2817 push @$cpuFlags , 'hv_spinlocks=0xffff' if !$nokvm;
2818 }
2819
2820 if ($ost eq 'win7' || $ost eq 'win8') {
2821 push @$cpuFlags , 'hv_relaxed' if !$nokvm;
2822 }
2823 }
2824
2825 push @$rtcFlags, 'driftfix=slew' if $tdf;
2826
2827 if ($nokvm) {
2828 push @$machineFlags, 'accel=tcg';
2829 } else {
2830 die "No accelerator found!\n" if !$cpuinfo->{hvm};
2831 }
2832
2833 my $machine_type = $forcemachine || $conf->{machine};
2834 if ($machine_type) {
2835 push @$machineFlags, "type=${machine_type}";
2836 }
2837
2838 if ($conf->{startdate}) {
2839 push @$rtcFlags, "base=$conf->{startdate}";
2840 } elsif ($useLocaltime) {
2841 push @$rtcFlags, 'base=localtime';
2842 }
2843
2844 my $cpu = $nokvm ? "qemu64" : "kvm64";
2845 $cpu = $conf->{cpu} if $conf->{cpu};
2846
2847 push @$cpuFlags , '+lahf_lm' if $cpu eq 'kvm64';
2848
2849 push @$cpuFlags , '+x2apic' if !$nokvm && $conf->{ostype} ne 'solaris';
2850
2851 push @$cpuFlags , '-x2apic' if $conf->{ostype} eq 'solaris';
2852
2853 push @$cpuFlags, '+sep' if $cpu eq 'kvm64' || $cpu eq 'kvm32';
2854
2855 $cpu .= "," . join(',', @$cpuFlags) if scalar(@$cpuFlags);
2856
2857 # Note: enforce needs kernel 3.10, so we do not use it for now
2858 # push @$cmd, '-cpu', "$cpu,enforce";
2859 push @$cmd, '-cpu', $cpu;
2860
2861 my $memory = $conf->{memory} || $defaults->{memory};
2862 push @$cmd, '-m', $memory;
2863
2864 if ($conf->{numa}) {
2865
2866 my $numa_totalmemory = undef;
2867 for (my $i = 0; $i < $MAX_NUMA; $i++) {
2868 next if !$conf->{"numa$i"};
2869 my $numa = parse_numa($conf->{"numa$i"});
2870 next if !$numa;
2871 # memory
2872 die "missing numa node$i memory value\n" if !$numa->{memory};
2873 my $numa_memory = $numa->{memory};
2874 $numa_totalmemory += $numa_memory;
2875 my $numa_object = "memory-backend-ram,id=ram-node$i,size=$numa_memory"."M";
2876
2877 # cpus
2878 my $cpus_start = $numa->{cpus}->{start};
2879 die "missing numa node$i cpus\n" if !defined($cpus_start);
2880 my $cpus_end = $numa->{cpus}->{end} if defined($numa->{cpus}->{end});
2881 my $cpus = $cpus_start;
2882 if (defined($cpus_end)) {
2883 $cpus .= "-$cpus_end";
2884 die "numa node$i : cpu range $cpus is incorrect\n" if $cpus_end <= $cpus_start;
2885 }
2886
2887 # hostnodes
2888 my $hostnodes_start = $numa->{hostnodes}->{start};
2889 if (defined($hostnodes_start)) {
2890 my $hostnodes_end = $numa->{hostnodes}->{end} if defined($numa->{hostnodes}->{end});
2891 my $hostnodes = $hostnodes_start;
2892 if (defined($hostnodes_end)) {
2893 $hostnodes .= "-$hostnodes_end";
2894 die "host node $hostnodes range is incorrect\n" if $hostnodes_end <= $hostnodes_start;
2895 }
2896
2897 my $hostnodes_end_range = defined($hostnodes_end) ? $hostnodes_end : $hostnodes_start;
2898 for (my $i = $hostnodes_start; $i <= $hostnodes_end_range; $i++ ) {
2899 die "host numa node$i don't exist\n" if ! -d "/sys/devices/system/node/node$i/";
2900 }
2901
2902 # policy
2903 my $policy = $numa->{policy};
2904 die "you need to define a policy for hostnode $hostnodes\n" if !$policy;
2905 $numa_object .= ",host-nodes=$hostnodes,policy=$policy";
2906 }
2907
2908 push @$cmd, '-object', $numa_object;
2909 push @$cmd, '-numa', "node,nodeid=$i,cpus=$cpus,memdev=ram-node$i";
2910 }
2911
2912 die "total memory for NUMA nodes must be equal to vm memory\n"
2913 if $numa_totalmemory && $numa_totalmemory != $memory;
2914
2915 #if no custom tology, we split memory and cores across numa nodes
2916 if(!$numa_totalmemory) {
2917
2918 my $numa_memory = ($memory / $sockets) . "M";
2919
2920 for (my $i = 0; $i < $sockets; $i++) {
2921
2922 my $cpustart = ($cores * $i);
2923 my $cpuend = ($cpustart + $cores - 1) if $cores && $cores > 1;
2924 my $cpus = $cpustart;
2925 $cpus .= "-$cpuend" if $cpuend;
2926
2927 push @$cmd, '-object', "memory-backend-ram,size=$numa_memory,id=ram-node$i";
2928 push @$cmd, '-numa', "node,nodeid=$i,cpus=$cpus,memdev=ram-node$i";
2929 }
2930 }
2931 }
2932
2933 push @$cmd, '-S' if $conf->{freeze};
2934
2935 # set keyboard layout
2936 my $kb = $conf->{keyboard} || $defaults->{keyboard};
2937 push @$cmd, '-k', $kb if $kb;
2938
2939 # enable sound
2940 #my $soundhw = $conf->{soundhw} || $defaults->{soundhw};
2941 #push @$cmd, '-soundhw', 'es1370';
2942 #push @$cmd, '-soundhw', $soundhw if $soundhw;
2943
2944 if($conf->{agent}) {
2945 my $qgasocket = qmp_socket($vmid, 1);
2946 my $pciaddr = print_pci_addr("qga0", $bridges);
2947 push @$devices, '-chardev', "socket,path=$qgasocket,server,nowait,id=qga0";
2948 push @$devices, '-device', "virtio-serial,id=qga0$pciaddr";
2949 push @$devices, '-device', 'virtserialport,chardev=qga0,name=org.qemu.guest_agent.0';
2950 }
2951
2952 my $spice_port;
2953
2954 if ($qxlnum) {
2955 if ($qxlnum > 1) {
2956 if ($conf->{ostype} && $conf->{ostype} =~ m/^w/){
2957 for(my $i = 1; $i < $qxlnum; $i++){
2958 my $pciaddr = print_pci_addr("vga$i", $bridges);
2959 push @$cmd, '-device', "qxl,id=vga$i,ram_size=67108864,vram_size=33554432$pciaddr";
2960 }
2961 } else {
2962 # assume other OS works like Linux
2963 push @$cmd, '-global', 'qxl-vga.ram_size=134217728';
2964 push @$cmd, '-global', 'qxl-vga.vram_size=67108864';
2965 }
2966 }
2967
2968 my $pciaddr = print_pci_addr("spice", $bridges);
2969
2970 $spice_port = PVE::Tools::next_spice_port();
2971
2972 push @$devices, '-spice', "tls-port=${spice_port},addr=127.0.0.1,tls-ciphers=DES-CBC3-SHA,seamless-migration=on";
2973
2974 push @$devices, '-device', "virtio-serial,id=spice$pciaddr";
2975 push @$devices, '-chardev', "spicevmc,id=vdagent,name=vdagent";
2976 push @$devices, '-device', "virtserialport,chardev=vdagent,name=com.redhat.spice.0";
2977 }
2978
2979 # enable balloon by default, unless explicitly disabled
2980 if (!defined($conf->{balloon}) || $conf->{balloon}) {
2981 $pciaddr = print_pci_addr("balloon0", $bridges);
2982 push @$devices, '-device', "virtio-balloon-pci,id=balloon0$pciaddr";
2983 }
2984
2985 if ($conf->{watchdog}) {
2986 my $wdopts = parse_watchdog($conf->{watchdog});
2987 $pciaddr = print_pci_addr("watchdog", $bridges);
2988 my $watchdog = $wdopts->{model} || 'i6300esb';
2989 push @$devices, '-device', "$watchdog$pciaddr";
2990 push @$devices, '-watchdog-action', $wdopts->{action} if $wdopts->{action};
2991 }
2992
2993 my $vollist = [];
2994 my $scsicontroller = {};
2995 my $ahcicontroller = {};
2996 my $scsihw = defined($conf->{scsihw}) ? $conf->{scsihw} : $defaults->{scsihw};
2997
2998 # Add iscsi initiator name if available
2999 if (my $initiator = get_initiator_name()) {
3000 push @$devices, '-iscsi', "initiator-name=$initiator";
3001 }
3002
3003 foreach_drive($conf, sub {
3004 my ($ds, $drive) = @_;
3005
3006 if (PVE::Storage::parse_volume_id($drive->{file}, 1)) {
3007 push @$vollist, $drive->{file};
3008 }
3009
3010 $use_virtio = 1 if $ds =~ m/^virtio/;
3011
3012 if (drive_is_cdrom ($drive)) {
3013 if ($bootindex_hash->{d}) {
3014 $drive->{bootindex} = $bootindex_hash->{d};
3015 $bootindex_hash->{d} += 1;
3016 }
3017 } else {
3018 if ($bootindex_hash->{c}) {
3019 $drive->{bootindex} = $bootindex_hash->{c} if $conf->{bootdisk} && ($conf->{bootdisk} eq $ds);
3020 $bootindex_hash->{c} += 1;
3021 }
3022 }
3023
3024 if ($drive->{interface} eq 'scsi') {
3025
3026 my $maxdev = ($scsihw !~ m/^lsi/) ? 256 : 7;
3027 my $controller = int($drive->{index} / $maxdev);
3028 $pciaddr = print_pci_addr("scsihw$controller", $bridges);
3029 push @$devices, '-device', "$scsihw,id=scsihw$controller$pciaddr" if !$scsicontroller->{$controller};
3030 $scsicontroller->{$controller}=1;
3031 }
3032
3033 if ($drive->{interface} eq 'sata') {
3034 my $controller = int($drive->{index} / $MAX_SATA_DISKS);
3035 $pciaddr = print_pci_addr("ahci$controller", $bridges);
3036 push @$devices, '-device', "ahci,id=ahci$controller,multifunction=on$pciaddr" if !$ahcicontroller->{$controller};
3037 $ahcicontroller->{$controller}=1;
3038 }
3039
3040 my $drive_cmd = print_drive_full($storecfg, $vmid, $drive);
3041 push @$devices, '-drive',$drive_cmd;
3042 push @$devices, '-device', print_drivedevice_full($storecfg, $conf, $vmid, $drive, $bridges);
3043 });
3044
3045 for (my $i = 0; $i < $MAX_NETS; $i++) {
3046 next if !$conf->{"net$i"};
3047 my $d = parse_net($conf->{"net$i"});
3048 next if !$d;
3049
3050 $use_virtio = 1 if $d->{model} eq 'virtio';
3051
3052 if ($bootindex_hash->{n}) {
3053 $d->{bootindex} = $bootindex_hash->{n};
3054 $bootindex_hash->{n} += 1;
3055 }
3056
3057 my $netdevfull = print_netdev_full($vmid,$conf,$d,"net$i");
3058 push @$devices, '-netdev', $netdevfull;
3059
3060 my $netdevicefull = print_netdevice_full($vmid,$conf,$d,"net$i",$bridges);
3061 push @$devices, '-device', $netdevicefull;
3062 }
3063
3064 if (!$q35) {
3065 # add pci bridges
3066 while (my ($k, $v) = each %$bridges) {
3067 $pciaddr = print_pci_addr("pci.$k");
3068 unshift @$devices, '-device', "pci-bridge,id=pci.$k,chassis_nr=$k$pciaddr" if $k > 0;
3069 }
3070 }
3071
3072 # hack: virtio with fairsched is unreliable, so we do not use fairsched
3073 # when the VM uses virtio devices.
3074 if (!$use_virtio && $have_ovz) {
3075
3076 my $cpuunits = defined($conf->{cpuunits}) ?
3077 $conf->{cpuunits} : $defaults->{cpuunits};
3078
3079 push @$cmd, '-cpuunits', $cpuunits if $cpuunits;
3080
3081 # fixme: cpulimit is currently ignored
3082 #push @$cmd, '-cpulimit', $conf->{cpulimit} if $conf->{cpulimit};
3083 }
3084
3085 # add custom args
3086 if ($conf->{args}) {
3087 my $aa = PVE::Tools::split_args($conf->{args});
3088 push @$cmd, @$aa;
3089 }
3090
3091 push @$cmd, @$devices;
3092 push @$cmd, '-rtc', join(',', @$rtcFlags)
3093 if scalar(@$rtcFlags);
3094 push @$cmd, '-machine', join(',', @$machineFlags)
3095 if scalar(@$machineFlags);
3096 push @$cmd, '-global', join(',', @$globalFlags)
3097 if scalar(@$globalFlags);
3098
3099 return wantarray ? ($cmd, $vollist, $spice_port) : $cmd;
3100 }
3101
3102 sub vnc_socket {
3103 my ($vmid) = @_;
3104 return "${var_run_tmpdir}/$vmid.vnc";
3105 }
3106
3107 sub spice_port {
3108 my ($vmid) = @_;
3109
3110 my $res = vm_mon_cmd($vmid, 'query-spice');
3111
3112 return $res->{'tls-port'} || $res->{'port'} || die "no spice port\n";
3113 }
3114
3115 sub qmp_socket {
3116 my ($vmid, $qga) = @_;
3117 my $sockettype = $qga ? 'qga' : 'qmp';
3118 return "${var_run_tmpdir}/$vmid.$sockettype";
3119 }
3120
3121 sub pidfile_name {
3122 my ($vmid) = @_;
3123 return "${var_run_tmpdir}/$vmid.pid";
3124 }
3125
3126 sub vm_devices_list {
3127 my ($vmid) = @_;
3128
3129 my $res = vm_mon_cmd($vmid, 'query-pci');
3130 my $devices = {};
3131 foreach my $pcibus (@$res) {
3132 foreach my $device (@{$pcibus->{devices}}) {
3133 next if !$device->{'qdev_id'};
3134 $devices->{$device->{'qdev_id'}} = 1;
3135 }
3136 }
3137
3138 my $resblock = vm_mon_cmd($vmid, 'query-block');
3139 foreach my $block (@$resblock) {
3140 if($block->{device} =~ m/^drive-(\S+)/){
3141 $devices->{$1} = 1;
3142 }
3143 }
3144
3145 my $resmice = vm_mon_cmd($vmid, 'query-mice');
3146 foreach my $mice (@$resmice) {
3147 if ($mice->{name} eq 'QEMU HID Tablet') {
3148 $devices->{tablet} = 1;
3149 last;
3150 }
3151 }
3152
3153 return $devices;
3154 }
3155
3156 # fixme: this should raise exceptions on error!
3157 sub vm_deviceplug {
3158 my ($storecfg, $conf, $vmid, $deviceid, $device) = @_;
3159
3160 return 1 if !check_running($vmid);
3161
3162 my $q35 = machine_type_is_q35($conf);
3163
3164 return 1 if !$conf->{hotplug};
3165
3166 my $devices_list = vm_devices_list($vmid);
3167 return 1 if defined($devices_list->{$deviceid});
3168
3169 if ($deviceid eq 'tablet') {
3170 qemu_deviceadd($vmid, print_tabletdevice_full($conf));
3171 return 1;
3172 }
3173
3174 qemu_add_pci_bridge($storecfg, $conf, $vmid, $deviceid); # add PCI bridge if we need it for the device
3175
3176 if ($deviceid =~ m/^(virtio)(\d+)$/) {
3177 return undef if !qemu_driveadd($storecfg, $vmid, $device);
3178 my $devicefull = print_drivedevice_full($storecfg, $conf, $vmid, $device);
3179 qemu_deviceadd($vmid, $devicefull);
3180 if(!qemu_deviceaddverify($vmid, $deviceid)) {
3181 eval { qemu_drivedel($vmid, $deviceid); };
3182 warn $@ if $@;
3183 return undef;
3184 }
3185 }
3186
3187 if ($deviceid =~ m/^(scsihw)(\d+)$/) {
3188 my $scsihw = defined($conf->{scsihw}) ? $conf->{scsihw} : "lsi";
3189 my $pciaddr = print_pci_addr($deviceid);
3190 my $devicefull = "$scsihw,id=$deviceid$pciaddr";
3191 qemu_deviceadd($vmid, $devicefull);
3192 return undef if(!qemu_deviceaddverify($vmid, $deviceid));
3193 }
3194
3195 if ($deviceid =~ m/^(scsi)(\d+)$/) {
3196 return undef if !qemu_findorcreatescsihw($storecfg,$conf, $vmid, $device);
3197 return undef if !qemu_driveadd($storecfg, $vmid, $device);
3198 my $devicefull = print_drivedevice_full($storecfg, $conf, $vmid, $device);
3199 if(!qemu_deviceadd($vmid, $devicefull)) { # fixme: use qemu_deviceaddverify?
3200 eval { qemu_drivedel($vmid, $deviceid); };
3201 warn $@ if $@;
3202 return undef;
3203 }
3204 }
3205
3206 if ($deviceid =~ m/^(net)(\d+)$/) {
3207 return undef if !qemu_netdevadd($vmid, $conf, $device, $deviceid);
3208 my $netdevicefull = print_netdevice_full($vmid, $conf, $device, $deviceid);
3209 qemu_deviceadd($vmid, $netdevicefull);
3210 if(!qemu_deviceaddverify($vmid, $deviceid)) {
3211 qemu_netdevdel($vmid, $deviceid);
3212 return undef;
3213 }
3214 }
3215
3216
3217 if (!$q35 && $deviceid =~ m/^(pci\.)(\d+)$/) {
3218 my $bridgeid = $2;
3219 my $pciaddr = print_pci_addr($deviceid);
3220 my $devicefull = "pci-bridge,id=pci.$bridgeid,chassis_nr=$bridgeid$pciaddr";
3221 qemu_deviceadd($vmid, $devicefull);
3222 return undef if !qemu_deviceaddverify($vmid, $deviceid);
3223 }
3224
3225 return 1;
3226 }
3227
3228 # fixme: this should raise exceptions on error!
3229 sub vm_deviceunplug {
3230 my ($vmid, $conf, $deviceid) = @_;
3231
3232 die "internal error" if !$conf->{hotplug};
3233
3234 my $devices_list = vm_devices_list($vmid);
3235 return 1 if !defined($devices_list->{$deviceid});
3236
3237 die "can't unplug bootdisk" if $conf->{bootdisk} && $conf->{bootdisk} eq $deviceid;
3238
3239 if ($deviceid eq 'tablet') {
3240
3241 qemu_devicedel($vmid, $deviceid);
3242
3243 } elsif ($deviceid =~ m/^(virtio)(\d+)$/) {
3244
3245 qemu_devicedel($vmid, $deviceid);
3246 qemu_devicedelverify($vmid, $deviceid);
3247 qemu_drivedel($vmid, $deviceid);
3248
3249 } elsif ($deviceid =~ m/^(lsi)(\d+)$/) {
3250
3251 qemu_devicedel($vmid, $deviceid);
3252
3253 } elsif ($deviceid =~ m/^(scsi)(\d+)$/) {
3254
3255 qemu_devicedel($vmid, $deviceid);
3256 qemu_drivedel($vmid, $deviceid);
3257
3258 } elsif ($deviceid =~ m/^(net)(\d+)$/) {
3259
3260 qemu_devicedel($vmid, $deviceid);
3261 qemu_devicedelverify($vmid, $deviceid);
3262 qemu_netdevdel($vmid, $deviceid);
3263
3264 } else {
3265 die "can't unplug device '$deviceid'\n";
3266 }
3267
3268 return 1;
3269 }
3270
3271 sub qemu_deviceadd {
3272 my ($vmid, $devicefull) = @_;
3273
3274 $devicefull = "driver=".$devicefull;
3275 my %options = split(/[=,]/, $devicefull);
3276
3277 vm_mon_cmd($vmid, "device_add" , %options);
3278 return 1;
3279 }
3280
3281 sub qemu_devicedel {
3282 my($vmid, $deviceid) = @_;
3283
3284 my $ret = vm_mon_cmd($vmid, "device_del", id => $deviceid);
3285 }
3286
3287 sub qemu_driveadd {
3288 my($storecfg, $vmid, $device) = @_;
3289
3290 my $drive = print_drive_full($storecfg, $vmid, $device);
3291 my $ret = vm_human_monitor_command($vmid, "drive_add auto $drive");
3292 # If the command succeeds qemu prints: "OK"
3293 if ($ret !~ m/OK/s) {
3294 syslog("err", "adding drive failed: $ret");
3295 return undef;
3296 }
3297 return 1;
3298 }
3299
3300 sub qemu_drivedel {
3301 my($vmid, $deviceid) = @_;
3302
3303 my $ret = vm_human_monitor_command($vmid, "drive_del drive-$deviceid");
3304 $ret =~ s/^\s+//;
3305
3306 return 1 if $ret eq "";
3307
3308 # NB: device not found errors mean the drive was auto-deleted and we ignore the error
3309 return 1 if $ret =~ m/Device \'.*?\' not found/s;
3310
3311 die "deleting drive $deviceid failed : $ret\n";
3312 }
3313
3314 sub qemu_deviceaddverify {
3315 my ($vmid,$deviceid) = @_;
3316
3317 for (my $i = 0; $i <= 5; $i++) {
3318 my $devices_list = vm_devices_list($vmid);
3319 return 1 if defined($devices_list->{$deviceid});
3320 sleep 1;
3321 }
3322 syslog("err", "error on hotplug device $deviceid");
3323 return undef;
3324 }
3325
3326
3327 sub qemu_devicedelverify {
3328 my ($vmid, $deviceid) = @_;
3329
3330 # need to verify that the device is correctly removed as device_del
3331 # is async and empty return is not reliable
3332
3333 for (my $i = 0; $i <= 5; $i++) {
3334 my $devices_list = vm_devices_list($vmid);
3335 return 1 if !defined($devices_list->{$deviceid});
3336 sleep 1;
3337 }
3338
3339 die "error on hot-unplugging device '$deviceid'\n";
3340 }
3341
3342 sub qemu_findorcreatescsihw {
3343 my ($storecfg, $conf, $vmid, $device) = @_;
3344
3345 my $maxdev = ($conf->{scsihw} && ($conf->{scsihw} !~ m/^lsi/)) ? 256 : 7;
3346 my $controller = int($device->{index} / $maxdev);
3347 my $scsihwid="scsihw$controller";
3348 my $devices_list = vm_devices_list($vmid);
3349
3350 if(!defined($devices_list->{$scsihwid})) {
3351 return undef if !vm_deviceplug($storecfg, $conf, $vmid, $scsihwid);
3352 }
3353 return 1;
3354 }
3355
3356 sub qemu_add_pci_bridge {
3357 my ($storecfg, $conf, $vmid, $device) = @_;
3358
3359 my $bridges = {};
3360
3361 my $bridgeid;
3362
3363 print_pci_addr($device, $bridges);
3364
3365 while (my ($k, $v) = each %$bridges) {
3366 $bridgeid = $k;
3367 }
3368 return if !defined($bridgeid) || $bridgeid < 1;
3369
3370 my $bridge = "pci.$bridgeid";
3371 my $devices_list = vm_devices_list($vmid);
3372
3373 if (!defined($devices_list->{$bridge})) {
3374 return undef if !vm_deviceplug($storecfg, $conf, $vmid, $bridge);
3375 }
3376
3377 return 1;
3378 }
3379
3380 sub qemu_netdevadd {
3381 my ($vmid, $conf, $device, $deviceid) = @_;
3382
3383 my $netdev = print_netdev_full($vmid, $conf, $device, $deviceid);
3384 my %options = split(/[=,]/, $netdev);
3385
3386 vm_mon_cmd($vmid, "netdev_add", %options);
3387 return 1;
3388 }
3389
3390 sub qemu_netdevdel {
3391 my ($vmid, $deviceid) = @_;
3392
3393 vm_mon_cmd($vmid, "netdev_del", id => $deviceid);
3394 }
3395
3396 sub qemu_cpu_hotplug {
3397 my ($vmid, $conf, $cores) = @_;
3398
3399 my $sockets = $conf->{sockets} || 1;
3400 die "cpu hotplug only works with one socket\n"
3401 if $sockets > 1;
3402
3403 die "maxcpus is not defined\n"
3404 if !$conf->{maxcpus};
3405
3406 die "you can't add more cores than maxcpus\n"
3407 if $cores > $conf->{maxcpus};
3408
3409 my $currentcores = $conf->{cores} || 1;
3410 die "online cpu unplug is not yet possible\n"
3411 if $cores < $currentcores;
3412
3413 my $currentrunningcores = vm_mon_cmd($vmid, "query-cpus");
3414 die "cores number if running vm is different than configuration\n"
3415 if scalar(@{$currentrunningcores}) != $currentcores;
3416
3417 for (my $i = $currentcores; $i < $cores; $i++) {
3418 vm_mon_cmd($vmid, "cpu-add", id => int($i));
3419 }
3420 }
3421
3422 sub qemu_block_set_io_throttle {
3423 my ($vmid, $deviceid, $bps, $bps_rd, $bps_wr, $iops, $iops_rd, $iops_wr) = @_;
3424
3425 return if !check_running($vmid) ;
3426
3427 vm_mon_cmd($vmid, "block_set_io_throttle", device => $deviceid, bps => int($bps), bps_rd => int($bps_rd), bps_wr => int($bps_wr), iops => int($iops), iops_rd => int($iops_rd), iops_wr => int($iops_wr));
3428
3429 }
3430
3431 # old code, only used to shutdown old VM after update
3432 sub __read_avail {
3433 my ($fh, $timeout) = @_;
3434
3435 my $sel = new IO::Select;
3436 $sel->add($fh);
3437
3438 my $res = '';
3439 my $buf;
3440
3441 my @ready;
3442 while (scalar (@ready = $sel->can_read($timeout))) {
3443 my $count;
3444 if ($count = $fh->sysread($buf, 8192)) {
3445 if ($buf =~ /^(.*)\(qemu\) $/s) {
3446 $res .= $1;
3447 last;
3448 } else {
3449 $res .= $buf;
3450 }
3451 } else {
3452 if (!defined($count)) {
3453 die "$!\n";
3454 }
3455 last;
3456 }
3457 }
3458
3459 die "monitor read timeout\n" if !scalar(@ready);
3460
3461 return $res;
3462 }
3463
3464 # old code, only used to shutdown old VM after update
3465 sub vm_monitor_command {
3466 my ($vmid, $cmdstr, $nocheck) = @_;
3467
3468 my $res;
3469
3470 eval {
3471 die "VM $vmid not running\n" if !check_running($vmid, $nocheck);
3472
3473 my $sname = "${var_run_tmpdir}/$vmid.mon";
3474
3475 my $sock = IO::Socket::UNIX->new( Peer => $sname ) ||
3476 die "unable to connect to VM $vmid socket - $!\n";
3477
3478 my $timeout = 3;
3479
3480 # hack: migrate sometime blocks the monitor (when migrate_downtime
3481 # is set)
3482 if ($cmdstr =~ m/^(info\s+migrate|migrate\s)/) {
3483 $timeout = 60*60; # 1 hour
3484 }
3485
3486 # read banner;
3487 my $data = __read_avail($sock, $timeout);
3488
3489 if ($data !~ m/^QEMU\s+(\S+)\s+monitor\s/) {
3490 die "got unexpected qemu monitor banner\n";
3491 }
3492
3493 my $sel = new IO::Select;
3494 $sel->add($sock);
3495
3496 if (!scalar(my @ready = $sel->can_write($timeout))) {
3497 die "monitor write error - timeout";
3498 }
3499
3500 my $fullcmd = "$cmdstr\r";
3501
3502 # syslog('info', "VM $vmid monitor command: $cmdstr");
3503
3504 my $b;
3505 if (!($b = $sock->syswrite($fullcmd)) || ($b != length($fullcmd))) {
3506 die "monitor write error - $!";
3507 }
3508
3509 return if ($cmdstr eq 'q') || ($cmdstr eq 'quit');
3510
3511 $timeout = 20;
3512
3513 if ($cmdstr =~ m/^(info\s+migrate|migrate\s)/) {
3514 $timeout = 60*60; # 1 hour
3515 } elsif ($cmdstr =~ m/^(eject|change)/) {
3516 $timeout = 60; # note: cdrom mount command is slow
3517 }
3518 if ($res = __read_avail($sock, $timeout)) {
3519
3520 my @lines = split("\r?\n", $res);
3521
3522 shift @lines if $lines[0] !~ m/^unknown command/; # skip echo
3523
3524 $res = join("\n", @lines);
3525 $res .= "\n";
3526 }
3527 };
3528
3529 my $err = $@;
3530
3531 if ($err) {
3532 syslog("err", "VM $vmid monitor command failed - $err");
3533 die $err;
3534 }
3535
3536 return $res;
3537 }
3538
3539 sub qemu_block_resize {
3540 my ($vmid, $deviceid, $storecfg, $volid, $size) = @_;
3541
3542 my $running = check_running($vmid);
3543
3544 return if !PVE::Storage::volume_resize($storecfg, $volid, $size, $running);
3545
3546 return if !$running;
3547
3548 vm_mon_cmd($vmid, "block_resize", device => $deviceid, size => int($size));
3549
3550 }
3551
3552 sub qemu_volume_snapshot {
3553 my ($vmid, $deviceid, $storecfg, $volid, $snap) = @_;
3554
3555 my $running = check_running($vmid);
3556
3557 return if !PVE::Storage::volume_snapshot($storecfg, $volid, $snap, $running);
3558
3559 return if !$running;
3560
3561 vm_mon_cmd($vmid, "snapshot-drive", device => $deviceid, name => $snap);
3562
3563 }
3564
3565 sub qemu_volume_snapshot_delete {
3566 my ($vmid, $deviceid, $storecfg, $volid, $snap) = @_;
3567
3568 my $running = check_running($vmid);
3569
3570 return if !PVE::Storage::volume_snapshot_delete($storecfg, $volid, $snap, $running);
3571
3572 return if !$running;
3573
3574 vm_mon_cmd($vmid, "delete-drive-snapshot", device => $deviceid, name => $snap);
3575 }
3576
3577 sub set_migration_caps {
3578 my ($vmid) = @_;
3579
3580 my $cap_ref = [];
3581
3582 my $enabled_cap = {
3583 "auto-converge" => 1,
3584 "xbzrle" => 0,
3585 "x-rdma-pin-all" => 0,
3586 "zero-blocks" => 0,
3587 };
3588
3589 my $supported_capabilities = vm_mon_cmd_nocheck($vmid, "query-migrate-capabilities");
3590
3591 for my $supported_capability (@$supported_capabilities) {
3592 push @$cap_ref, {
3593 capability => $supported_capability->{capability},
3594 state => $enabled_cap->{$supported_capability->{capability}} ? JSON::true : JSON::false,
3595 };
3596 }
3597
3598 vm_mon_cmd_nocheck($vmid, "migrate-set-capabilities", capabilities => $cap_ref);
3599 }
3600
3601 # hotplug changes in [PENDING]
3602 # $selection hash can be used to only apply specified options, for
3603 # example: { cores => 1 } (only apply changed 'cores')
3604 # $errors ref is used to return error messages
3605 sub vmconfig_hotplug_pending {
3606 my ($vmid, $conf, $storecfg, $selection, $errors) = @_;
3607
3608 my $defaults = load_defaults();
3609
3610 # commit values which do not have any impact on running VM first
3611 # Note: those option cannot raise errors, we we do not care about
3612 # $selection and always apply them.
3613
3614 my $add_error = sub {
3615 my ($opt, $msg) = @_;
3616 $errors->{$opt} = "hotplug problem - $msg";
3617 };
3618
3619 my $changes = 0;
3620 foreach my $opt (keys %{$conf->{pending}}) { # add/change
3621 if ($opt eq 'name' || $opt eq 'hotplug' || $opt eq 'onboot' || $opt eq 'shares') {
3622 $conf->{$opt} = $conf->{pending}->{$opt};
3623 delete $conf->{pending}->{$opt};
3624 $changes = 1;
3625 }
3626 }
3627
3628 if ($changes) {
3629 update_config_nolock($vmid, $conf, 1);
3630 $conf = load_config($vmid); # update/reload
3631 }
3632
3633 my $hotplug = defined($conf->{hotplug}) ? $conf->{hotplug} : $defaults->{hotplug};
3634
3635 my @delete = PVE::Tools::split_list($conf->{pending}->{delete});
3636 foreach my $opt (@delete) {
3637 next if $selection && !$selection->{$opt};
3638 eval {
3639 if ($opt eq 'tablet') {
3640 die "skip\n" if !$hotplug;
3641 if ($defaults->{tablet}) {
3642 vm_deviceplug($storecfg, $conf, $vmid, $opt);
3643 } else {
3644 vm_deviceunplug($vmid, $conf, $opt);
3645 }
3646 } elsif ($opt eq 'cores') {
3647 die "skip\n" if !$hotplug;
3648 qemu_cpu_hotplug($vmid, $conf, 1);
3649 } elsif ($opt =~ m/^net(\d+)$/) {
3650 die "skip\n" if !$hotplug;
3651 vm_deviceunplug($vmid, $conf, $opt);
3652 } else {
3653 die "skip\n";
3654 }
3655 };
3656 if (my $err = $@) {
3657 &$add_error($opt, $err) if $err ne "skip\n";
3658 } else {
3659 # save new config if hotplug was successful
3660 delete $conf->{$opt};
3661 vmconfig_undelete_pending_option($conf, $opt);
3662 update_config_nolock($vmid, $conf, 1);
3663 $conf = load_config($vmid); # update/reload
3664 }
3665 }
3666
3667 foreach my $opt (keys %{$conf->{pending}}) {
3668 next if $selection && !$selection->{$opt};
3669 my $value = $conf->{pending}->{$opt};
3670 eval {
3671 if ($opt eq 'tablet') {
3672 die "skip\n" if !$hotplug;
3673 if ($value == 1) {
3674 vm_deviceplug($storecfg, $conf, $vmid, $opt);
3675 } elsif ($value == 0) {
3676 vm_deviceunplug($vmid, $conf, $opt);
3677 }
3678 } elsif ($opt eq 'cores') {
3679 die "skip\n" if !$hotplug;
3680 qemu_cpu_hotplug($vmid, $conf, $value);
3681 } elsif ($opt eq 'balloon') {
3682 die "skip\n" if !(defined($conf->{shares}) && ($conf->{shares} == 0));
3683 # allow manual ballooning if shares is set to zero
3684 my $balloon = $conf->{pending}->{balloon} || $conf->{memory} || $defaults->{memory};
3685 vm_mon_cmd($vmid, "balloon", value => $balloon*1024*1024);
3686 } elsif ($opt =~ m/^net(\d+)$/) {
3687 # some changes can be done without hotplug
3688 vmconfig_update_net($storecfg, $conf, $vmid, $opt, $value);
3689 } else {
3690 die "skip\n"; # skip non-hot-pluggable options
3691 }
3692 };
3693 if (my $err = $@) {
3694 &$add_error($opt, $err) if $err ne "skip\n";
3695 } else {
3696 # save new config if hotplug was successful
3697 $conf->{$opt} = $value;
3698 delete $conf->{pending}->{$opt};
3699 update_config_nolock($vmid, $conf, 1);
3700 $conf = load_config($vmid); # update/reload
3701 }
3702 }
3703 }
3704
3705 sub vmconfig_apply_pending {
3706 my ($vmid, $conf, $storecfg) = @_;
3707
3708 # cold plug
3709
3710 my @delete = PVE::Tools::split_list($conf->{pending}->{delete});
3711 foreach my $opt (@delete) { # delete
3712 die "internal error" if $opt =~ m/^unused/;
3713 $conf = load_config($vmid); # update/reload
3714 if (!defined($conf->{$opt})) {
3715 vmconfig_undelete_pending_option($conf, $opt);
3716 update_config_nolock($vmid, $conf, 1);
3717 } elsif (valid_drivename($opt)) {
3718 vmconfig_register_unused_drive($storecfg, $vmid, $conf, parse_drive($opt, $conf->{$opt}));
3719 vmconfig_undelete_pending_option($conf, $opt);
3720 delete $conf->{$opt};
3721 update_config_nolock($vmid, $conf, 1);
3722 } else {
3723 vmconfig_undelete_pending_option($conf, $opt);
3724 delete $conf->{$opt};
3725 update_config_nolock($vmid, $conf, 1);
3726 }
3727 }
3728
3729 $conf = load_config($vmid); # update/reload
3730
3731 foreach my $opt (keys %{$conf->{pending}}) { # add/change
3732 $conf = load_config($vmid); # update/reload
3733
3734 if (defined($conf->{$opt}) && ($conf->{$opt} eq $conf->{pending}->{$opt})) {
3735 # skip if nothing changed
3736 } elsif (valid_drivename($opt)) {
3737 vmconfig_register_unused_drive($storecfg, $vmid, $conf, parse_drive($opt, $conf->{$opt}))
3738 if defined($conf->{$opt});
3739 $conf->{$opt} = $conf->{pending}->{$opt};
3740 } else {
3741 $conf->{$opt} = $conf->{pending}->{$opt};
3742 }
3743
3744 delete $conf->{pending}->{$opt};
3745 update_config_nolock($vmid, $conf, 1);
3746 }
3747 }
3748
3749 my $safe_num_ne = sub {
3750 my ($a, $b) = @_;
3751
3752 return 0 if !defined($a) && !defined($b);
3753 return 1 if !defined($a);
3754 return 1 if !defined($b);
3755
3756 return $a != $b;
3757 };
3758
3759 my $safe_string_ne = sub {
3760 my ($a, $b) = @_;
3761
3762 return 0 if !defined($a) && !defined($b);
3763 return 1 if !defined($a);
3764 return 1 if !defined($b);
3765
3766 return $a ne $b;
3767 };
3768
3769 sub vmconfig_update_net {
3770 my ($storecfg, $conf, $vmid, $opt, $value) = @_;
3771
3772 my $newnet = parse_net($value);
3773
3774 if ($conf->{$opt}) {
3775 my $oldnet = parse_net($conf->{$opt});
3776
3777 if (&$safe_string_ne($oldnet->{model}, $newnet->{model}) ||
3778 &$safe_string_ne($oldnet->{macaddr}, $newnet->{macaddr}) ||
3779 &$safe_num_ne($oldnet->{queues}, $newnet->{queues}) ||
3780 !($newnet->{bridge} && $oldnet->{bridge})) { # bridge/nat mode change
3781
3782 # for non online change, we try to hot-unplug
3783 die "skip\n" if !$conf->{hotplug};
3784 vm_deviceunplug($vmid, $conf, $opt);
3785 } else {
3786
3787 die "internal error" if $opt !~ m/net(\d+)/;
3788 my $iface = "tap${vmid}i$1";
3789
3790 if (&$safe_num_ne($oldnet->{rate}, $newnet->{rate})) {
3791 PVE::Network::tap_rate_limit($iface, $newnet->{rate});
3792 }
3793
3794 if(&$safe_string_ne($oldnet->{bridge}, $newnet->{bridge}) ||
3795 &$safe_num_ne($oldnet->{tag}, $newnet->{tag}) ||
3796 &$safe_num_ne($oldnet->{firewall}, $newnet->{firewall})) {
3797 PVE::Network::tap_unplug($iface);
3798 PVE::Network::tap_plug($iface, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall});
3799 }
3800 }
3801 }
3802
3803 vm_deviceplug($storecfg, $conf, $vmid, $opt, $newnet);
3804 }
3805
3806 sub vm_start {
3807 my ($storecfg, $vmid, $statefile, $skiplock, $migratedfrom, $paused, $forcemachine, $spice_ticket) = @_;
3808
3809 lock_config($vmid, sub {
3810 my $conf = load_config($vmid, $migratedfrom);
3811
3812 die "you can't start a vm if it's a template\n" if is_template($conf);
3813
3814 check_lock($conf) if !$skiplock;
3815
3816 die "VM $vmid already running\n" if check_running($vmid, undef, $migratedfrom);
3817
3818 if (!$statefile && scalar(keys %{$conf->{pending}})) {
3819 vmconfig_apply_pending($vmid, $conf, $storecfg);
3820 $conf = load_config($vmid); # update/reload
3821 }
3822
3823 my $defaults = load_defaults();
3824
3825 # set environment variable useful inside network script
3826 $ENV{PVE_MIGRATED_FROM} = $migratedfrom if $migratedfrom;
3827
3828 my ($cmd, $vollist, $spice_port) = config_to_command($storecfg, $vmid, $conf, $defaults, $forcemachine);
3829
3830 my $migrate_port = 0;
3831 my $migrate_uri;
3832 if ($statefile) {
3833 if ($statefile eq 'tcp') {
3834 my $localip = "localhost";
3835 my $datacenterconf = PVE::Cluster::cfs_read_file('datacenter.cfg');
3836 if ($datacenterconf->{migration_unsecure}) {
3837 my $nodename = PVE::INotify::nodename();
3838 $localip = PVE::Cluster::remote_node_ip($nodename, 1);
3839 }
3840 $migrate_port = PVE::Tools::next_migrate_port();
3841 $migrate_uri = "tcp:${localip}:${migrate_port}";
3842 push @$cmd, '-incoming', $migrate_uri;
3843 push @$cmd, '-S';
3844 } else {
3845 push @$cmd, '-loadstate', $statefile;
3846 }
3847 } elsif ($paused) {
3848 push @$cmd, '-S';
3849 }
3850
3851 # host pci devices
3852 for (my $i = 0; $i < $MAX_HOSTPCI_DEVICES; $i++) {
3853 my $d = parse_hostpci($conf->{"hostpci$i"});
3854 next if !$d;
3855 my $pcidevices = $d->{pciid};
3856 foreach my $pcidevice (@$pcidevices) {
3857 my $pciid = $pcidevice->{id}.".".$pcidevice->{function};
3858
3859 my $info = pci_device_info("0000:$pciid");
3860 die "IOMMU not present\n" if !check_iommu_support();
3861 die "no pci device info for device '$pciid'\n" if !$info;
3862
3863 if ($d->{driver} && $d->{driver} eq "vfio") {
3864 die "can't unbind/bind pci group to vfio '$pciid'\n" if !pci_dev_group_bind_to_vfio($pciid);
3865 } else {
3866 die "can't unbind/bind to stub pci device '$pciid'\n" if !pci_dev_bind_to_stub($info);
3867 }
3868
3869 die "can't reset pci device '$pciid'\n" if $info->{has_fl_reset} and !pci_dev_reset($info);
3870 }
3871 }
3872
3873 PVE::Storage::activate_volumes($storecfg, $vollist);
3874
3875 eval { run_command($cmd, timeout => $statefile ? undef : 30,
3876 umask => 0077); };
3877 my $err = $@;
3878 die "start failed: $err" if $err;
3879
3880 print "migration listens on $migrate_uri\n" if $migrate_uri;
3881
3882 if ($statefile && $statefile ne 'tcp') {
3883 eval { vm_mon_cmd_nocheck($vmid, "cont"); };
3884 warn $@ if $@;
3885 }
3886
3887 if ($migratedfrom) {
3888
3889 eval {
3890 set_migration_caps($vmid);
3891 };
3892 warn $@ if $@;
3893
3894 if ($spice_port) {
3895 print "spice listens on port $spice_port\n";
3896 if ($spice_ticket) {
3897 vm_mon_cmd_nocheck($vmid, "set_password", protocol => 'spice', password => $spice_ticket);
3898 vm_mon_cmd_nocheck($vmid, "expire_password", protocol => 'spice', time => "+30");
3899 }
3900 }
3901
3902 } else {
3903
3904 if (!$statefile && (!defined($conf->{balloon}) || $conf->{balloon})) {
3905 vm_mon_cmd_nocheck($vmid, "balloon", value => $conf->{balloon}*1024*1024)
3906 if $conf->{balloon};
3907 vm_mon_cmd_nocheck($vmid, 'qom-set',
3908 path => "machine/peripheral/balloon0",
3909 property => "guest-stats-polling-interval",
3910 value => 2);
3911 }
3912 }
3913 });
3914 }
3915
3916 sub vm_mon_cmd {
3917 my ($vmid, $execute, %params) = @_;
3918
3919 my $cmd = { execute => $execute, arguments => \%params };
3920 vm_qmp_command($vmid, $cmd);
3921 }
3922
3923 sub vm_mon_cmd_nocheck {
3924 my ($vmid, $execute, %params) = @_;
3925
3926 my $cmd = { execute => $execute, arguments => \%params };
3927 vm_qmp_command($vmid, $cmd, 1);
3928 }
3929
3930 sub vm_qmp_command {
3931 my ($vmid, $cmd, $nocheck) = @_;
3932
3933 my $res;
3934
3935 my $timeout;
3936 if ($cmd->{arguments} && $cmd->{arguments}->{timeout}) {
3937 $timeout = $cmd->{arguments}->{timeout};
3938 delete $cmd->{arguments}->{timeout};
3939 }
3940
3941 eval {
3942 die "VM $vmid not running\n" if !check_running($vmid, $nocheck);
3943 my $sname = qmp_socket($vmid);
3944 if (-e $sname) { # test if VM is reasonambe new and supports qmp/qga
3945 my $qmpclient = PVE::QMPClient->new();
3946
3947 $res = $qmpclient->cmd($vmid, $cmd, $timeout);
3948 } elsif (-e "${var_run_tmpdir}/$vmid.mon") {
3949 die "can't execute complex command on old monitor - stop/start your vm to fix the problem\n"
3950 if scalar(%{$cmd->{arguments}});
3951 vm_monitor_command($vmid, $cmd->{execute}, $nocheck);
3952 } else {
3953 die "unable to open monitor socket\n";
3954 }
3955 };
3956 if (my $err = $@) {
3957 syslog("err", "VM $vmid qmp command failed - $err");
3958 die $err;
3959 }
3960
3961 return $res;
3962 }
3963
3964 sub vm_human_monitor_command {
3965 my ($vmid, $cmdline) = @_;
3966
3967 my $res;
3968
3969 my $cmd = {
3970 execute => 'human-monitor-command',
3971 arguments => { 'command-line' => $cmdline},
3972 };
3973
3974 return vm_qmp_command($vmid, $cmd);
3975 }
3976
3977 sub vm_commandline {
3978 my ($storecfg, $vmid) = @_;
3979
3980 my $conf = load_config($vmid);
3981
3982 my $defaults = load_defaults();
3983
3984 my $cmd = config_to_command($storecfg, $vmid, $conf, $defaults);
3985
3986 return join(' ', @$cmd);
3987 }
3988
3989 sub vm_reset {
3990 my ($vmid, $skiplock) = @_;
3991
3992 lock_config($vmid, sub {
3993
3994 my $conf = load_config($vmid);
3995
3996 check_lock($conf) if !$skiplock;
3997
3998 vm_mon_cmd($vmid, "system_reset");
3999 });
4000 }
4001
4002 sub get_vm_volumes {
4003 my ($conf) = @_;
4004
4005 my $vollist = [];
4006 foreach_volid($conf, sub {
4007 my ($volid, $is_cdrom) = @_;
4008
4009 return if $volid =~ m|^/|;
4010
4011 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
4012 return if !$sid;
4013
4014 push @$vollist, $volid;
4015 });
4016
4017 return $vollist;
4018 }
4019
4020 sub vm_stop_cleanup {
4021 my ($storecfg, $vmid, $conf, $keepActive) = @_;
4022
4023 eval {
4024 fairsched_rmnod($vmid); # try to destroy group
4025
4026 if (!$keepActive) {
4027 my $vollist = get_vm_volumes($conf);
4028 PVE::Storage::deactivate_volumes($storecfg, $vollist);
4029 }
4030
4031 foreach my $ext (qw(mon qmp pid vnc qga)) {
4032 unlink "/var/run/qemu-server/${vmid}.$ext";
4033 }
4034 };
4035 warn $@ if $@; # avoid errors - just warn
4036 }
4037
4038 # Note: use $nockeck to skip tests if VM configuration file exists.
4039 # We need that when migration VMs to other nodes (files already moved)
4040 # Note: we set $keepActive in vzdump stop mode - volumes need to stay active
4041 sub vm_stop {
4042 my ($storecfg, $vmid, $skiplock, $nocheck, $timeout, $shutdown, $force, $keepActive, $migratedfrom) = @_;
4043
4044 $force = 1 if !defined($force) && !$shutdown;
4045
4046 if ($migratedfrom){
4047 my $pid = check_running($vmid, $nocheck, $migratedfrom);
4048 kill 15, $pid if $pid;
4049 my $conf = load_config($vmid, $migratedfrom);
4050 vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive);
4051 return;
4052 }
4053
4054 lock_config($vmid, sub {
4055
4056 my $pid = check_running($vmid, $nocheck);
4057 return if !$pid;
4058
4059 my $conf;
4060 if (!$nocheck) {
4061 $conf = load_config($vmid);
4062 check_lock($conf) if !$skiplock;
4063 if (!defined($timeout) && $shutdown && $conf->{startup}) {
4064 my $opts = parse_startup($conf->{startup});
4065 $timeout = $opts->{down} if $opts->{down};
4066 }
4067 }
4068
4069 $timeout = 60 if !defined($timeout);
4070
4071 eval {
4072 if ($shutdown) {
4073 if (!$nocheck && $conf->{agent}) {
4074 vm_qmp_command($vmid, { execute => "guest-shutdown" }, $nocheck);
4075 } else {
4076 vm_qmp_command($vmid, { execute => "system_powerdown" }, $nocheck);
4077 }
4078 } else {
4079 vm_qmp_command($vmid, { execute => "quit" }, $nocheck);
4080 }
4081 };
4082 my $err = $@;
4083
4084 if (!$err) {
4085 my $count = 0;
4086 while (($count < $timeout) && check_running($vmid, $nocheck)) {
4087 $count++;
4088 sleep 1;
4089 }
4090
4091 if ($count >= $timeout) {
4092 if ($force) {
4093 warn "VM still running - terminating now with SIGTERM\n";
4094 kill 15, $pid;
4095 } else {
4096 die "VM quit/powerdown failed - got timeout\n";
4097 }
4098 } else {
4099 vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive) if $conf;
4100 return;
4101 }
4102 } else {
4103 if ($force) {
4104 warn "VM quit/powerdown failed - terminating now with SIGTERM\n";
4105 kill 15, $pid;
4106 } else {
4107 die "VM quit/powerdown failed\n";
4108 }
4109 }
4110
4111 # wait again
4112 $timeout = 10;
4113
4114 my $count = 0;
4115 while (($count < $timeout) && check_running($vmid, $nocheck)) {
4116 $count++;
4117 sleep 1;
4118 }
4119
4120 if ($count >= $timeout) {
4121 warn "VM still running - terminating now with SIGKILL\n";
4122 kill 9, $pid;
4123 sleep 1;
4124 }
4125
4126 vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive) if $conf;
4127 });
4128 }
4129
4130 sub vm_suspend {
4131 my ($vmid, $skiplock) = @_;
4132
4133 lock_config($vmid, sub {
4134
4135 my $conf = load_config($vmid);
4136
4137 check_lock($conf) if !($skiplock || ($conf->{lock} && $conf->{lock} eq 'backup'));
4138
4139 vm_mon_cmd($vmid, "stop");
4140 });
4141 }
4142
4143 sub vm_resume {
4144 my ($vmid, $skiplock) = @_;
4145
4146 lock_config($vmid, sub {
4147
4148 my $conf = load_config($vmid);
4149
4150 check_lock($conf) if !($skiplock || ($conf->{lock} && $conf->{lock} eq 'backup'));
4151
4152 vm_mon_cmd($vmid, "cont");
4153 });
4154 }
4155
4156 sub vm_sendkey {
4157 my ($vmid, $skiplock, $key) = @_;
4158
4159 lock_config($vmid, sub {
4160
4161 my $conf = load_config($vmid);
4162
4163 # there is no qmp command, so we use the human monitor command
4164 vm_human_monitor_command($vmid, "sendkey $key");
4165 });
4166 }
4167
4168 sub vm_destroy {
4169 my ($storecfg, $vmid, $skiplock) = @_;
4170
4171 lock_config($vmid, sub {
4172
4173 my $conf = load_config($vmid);
4174
4175 check_lock($conf) if !$skiplock;
4176
4177 if (!check_running($vmid)) {
4178 fairsched_rmnod($vmid); # try to destroy group
4179 destroy_vm($storecfg, $vmid);
4180 } else {
4181 die "VM $vmid is running - destroy failed\n";
4182 }
4183 });
4184 }
4185
4186 # pci helpers
4187
4188 sub file_write {
4189 my ($filename, $buf) = @_;
4190
4191 my $fh = IO::File->new($filename, "w");
4192 return undef if !$fh;
4193
4194 my $res = print $fh $buf;
4195
4196 $fh->close();
4197
4198 return $res;
4199 }
4200
4201 sub pci_device_info {
4202 my ($name) = @_;
4203
4204 my $res;
4205
4206 return undef if $name !~ m/^([a-f0-9]{4}):([a-f0-9]{2}):([a-f0-9]{2})\.([a-f0-9])$/;
4207 my ($domain, $bus, $slot, $func) = ($1, $2, $3, $4);
4208
4209 my $irq = file_read_firstline("$pcisysfs/devices/$name/irq");
4210 return undef if !defined($irq) || $irq !~ m/^\d+$/;
4211
4212 my $vendor = file_read_firstline("$pcisysfs/devices/$name/vendor");
4213 return undef if !defined($vendor) || $vendor !~ s/^0x//;
4214
4215 my $product = file_read_firstline("$pcisysfs/devices/$name/device");
4216 return undef if !defined($product) || $product !~ s/^0x//;
4217
4218 $res = {
4219 name => $name,
4220 vendor => $vendor,
4221 product => $product,
4222 domain => $domain,
4223 bus => $bus,
4224 slot => $slot,
4225 func => $func,
4226 irq => $irq,
4227 has_fl_reset => -f "$pcisysfs/devices/$name/reset" || 0,
4228 };
4229
4230 return $res;
4231 }
4232
4233 sub pci_dev_reset {
4234 my ($dev) = @_;
4235
4236 my $name = $dev->{name};
4237
4238 my $fn = "$pcisysfs/devices/$name/reset";
4239
4240 return file_write($fn, "1");
4241 }
4242
4243 sub pci_dev_bind_to_stub {
4244 my ($dev) = @_;
4245
4246 my $name = $dev->{name};
4247
4248 my $testdir = "$pcisysfs/drivers/pci-stub/$name";
4249 return 1 if -d $testdir;
4250
4251 my $data = "$dev->{vendor} $dev->{product}";
4252 return undef if !file_write("$pcisysfs/drivers/pci-stub/new_id", $data);
4253
4254 my $fn = "$pcisysfs/devices/$name/driver/unbind";
4255 if (!file_write($fn, $name)) {
4256 return undef if -f $fn;
4257 }
4258
4259 $fn = "$pcisysfs/drivers/pci-stub/bind";
4260 if (! -d $testdir) {
4261 return undef if !file_write($fn, $name);
4262 }
4263
4264 return -d $testdir;
4265 }
4266
4267 sub pci_dev_bind_to_vfio {
4268 my ($dev) = @_;
4269
4270 my $name = $dev->{name};
4271
4272 my $vfio_basedir = "$pcisysfs/drivers/vfio-pci";
4273
4274 if (!-d $vfio_basedir) {
4275 system("/sbin/modprobe vfio-pci >/dev/null 2>/dev/null");
4276 }
4277 die "Cannot find vfio-pci module!\n" if !-d $vfio_basedir;
4278
4279 my $testdir = "$vfio_basedir/$name";
4280 return 1 if -d $testdir;
4281
4282 my $data = "$dev->{vendor} $dev->{product}";
4283 return undef if !file_write("$vfio_basedir/new_id", $data);
4284
4285 my $fn = "$pcisysfs/devices/$name/driver/unbind";
4286 if (!file_write($fn, $name)) {
4287 return undef if -f $fn;
4288 }
4289
4290 $fn = "$vfio_basedir/bind";
4291 if (! -d $testdir) {
4292 return undef if !file_write($fn, $name);
4293 }
4294
4295 return -d $testdir;
4296 }
4297
4298 sub pci_dev_group_bind_to_vfio {
4299 my ($pciid) = @_;
4300
4301 my $vfio_basedir = "$pcisysfs/drivers/vfio-pci";
4302
4303 if (!-d $vfio_basedir) {
4304 system("/sbin/modprobe vfio-pci >/dev/null 2>/dev/null");
4305 }
4306 die "Cannot find vfio-pci module!\n" if !-d $vfio_basedir;
4307
4308 # get IOMMU group devices
4309 opendir(my $D, "$pcisysfs/devices/0000:$pciid/iommu_group/devices/") || die "Cannot open iommu_group: $!\n";
4310 my @devs = grep /^0000:/, readdir($D);
4311 closedir($D);
4312
4313 foreach my $pciid (@devs) {
4314 $pciid =~ m/^([:\.\da-f]+)$/ or die "PCI ID $pciid not valid!\n";
4315 my $info = pci_device_info($1);
4316 pci_dev_bind_to_vfio($info) || die "Cannot bind $pciid to vfio\n";
4317 }
4318
4319 return 1;
4320 }
4321
4322 sub print_pci_addr {
4323 my ($id, $bridges) = @_;
4324
4325 my $res = '';
4326 my $devices = {
4327 piix3 => { bus => 0, addr => 1 },
4328 #addr2 : first videocard
4329 balloon0 => { bus => 0, addr => 3 },
4330 watchdog => { bus => 0, addr => 4 },
4331 scsihw0 => { bus => 0, addr => 5 },
4332 scsihw1 => { bus => 0, addr => 6 },
4333 ahci0 => { bus => 0, addr => 7 },
4334 qga0 => { bus => 0, addr => 8 },
4335 spice => { bus => 0, addr => 9 },
4336 virtio0 => { bus => 0, addr => 10 },
4337 virtio1 => { bus => 0, addr => 11 },
4338 virtio2 => { bus => 0, addr => 12 },
4339 virtio3 => { bus => 0, addr => 13 },
4340 virtio4 => { bus => 0, addr => 14 },
4341 virtio5 => { bus => 0, addr => 15 },
4342 hostpci0 => { bus => 0, addr => 16 },
4343 hostpci1 => { bus => 0, addr => 17 },
4344 net0 => { bus => 0, addr => 18 },
4345 net1 => { bus => 0, addr => 19 },
4346 net2 => { bus => 0, addr => 20 },
4347 net3 => { bus => 0, addr => 21 },
4348 net4 => { bus => 0, addr => 22 },
4349 net5 => { bus => 0, addr => 23 },
4350 vga1 => { bus => 0, addr => 24 },
4351 vga2 => { bus => 0, addr => 25 },
4352 vga3 => { bus => 0, addr => 26 },
4353 hostpci2 => { bus => 0, addr => 27 },
4354 hostpci3 => { bus => 0, addr => 28 },
4355 #addr29 : usb-host (pve-usb.cfg)
4356 'pci.1' => { bus => 0, addr => 30 },
4357 'pci.2' => { bus => 0, addr => 31 },
4358 'net6' => { bus => 1, addr => 1 },
4359 'net7' => { bus => 1, addr => 2 },
4360 'net8' => { bus => 1, addr => 3 },
4361 'net9' => { bus => 1, addr => 4 },
4362 'net10' => { bus => 1, addr => 5 },
4363 'net11' => { bus => 1, addr => 6 },
4364 'net12' => { bus => 1, addr => 7 },
4365 'net13' => { bus => 1, addr => 8 },
4366 'net14' => { bus => 1, addr => 9 },
4367 'net15' => { bus => 1, addr => 10 },
4368 'net16' => { bus => 1, addr => 11 },
4369 'net17' => { bus => 1, addr => 12 },
4370 'net18' => { bus => 1, addr => 13 },
4371 'net19' => { bus => 1, addr => 14 },
4372 'net20' => { bus => 1, addr => 15 },
4373 'net21' => { bus => 1, addr => 16 },
4374 'net22' => { bus => 1, addr => 17 },
4375 'net23' => { bus => 1, addr => 18 },
4376 'net24' => { bus => 1, addr => 19 },
4377 'net25' => { bus => 1, addr => 20 },
4378 'net26' => { bus => 1, addr => 21 },
4379 'net27' => { bus => 1, addr => 22 },
4380 'net28' => { bus => 1, addr => 23 },
4381 'net29' => { bus => 1, addr => 24 },
4382 'net30' => { bus => 1, addr => 25 },
4383 'net31' => { bus => 1, addr => 26 },
4384 'virtio6' => { bus => 2, addr => 1 },
4385 'virtio7' => { bus => 2, addr => 2 },
4386 'virtio8' => { bus => 2, addr => 3 },
4387 'virtio9' => { bus => 2, addr => 4 },
4388 'virtio10' => { bus => 2, addr => 5 },
4389 'virtio11' => { bus => 2, addr => 6 },
4390 'virtio12' => { bus => 2, addr => 7 },
4391 'virtio13' => { bus => 2, addr => 8 },
4392 'virtio14' => { bus => 2, addr => 9 },
4393 'virtio15' => { bus => 2, addr => 10 },
4394 };
4395
4396 if (defined($devices->{$id}->{bus}) && defined($devices->{$id}->{addr})) {
4397 my $addr = sprintf("0x%x", $devices->{$id}->{addr});
4398 my $bus = $devices->{$id}->{bus};
4399 $res = ",bus=pci.$bus,addr=$addr";
4400 $bridges->{$bus} = 1 if $bridges;
4401 }
4402 return $res;
4403
4404 }
4405
4406 sub print_pcie_addr {
4407 my ($id) = @_;
4408
4409 my $res = '';
4410 my $devices = {
4411 hostpci0 => { bus => "ich9-pcie-port-1", addr => 0 },
4412 hostpci1 => { bus => "ich9-pcie-port-2", addr => 0 },
4413 hostpci2 => { bus => "ich9-pcie-port-3", addr => 0 },
4414 hostpci3 => { bus => "ich9-pcie-port-4", addr => 0 },
4415 };
4416
4417 if (defined($devices->{$id}->{bus}) && defined($devices->{$id}->{addr})) {
4418 my $addr = sprintf("0x%x", $devices->{$id}->{addr});
4419 my $bus = $devices->{$id}->{bus};
4420 $res = ",bus=$bus,addr=$addr";
4421 }
4422 return $res;
4423
4424 }
4425
4426 # vzdump restore implementaion
4427
4428 sub tar_archive_read_firstfile {
4429 my $archive = shift;
4430
4431 die "ERROR: file '$archive' does not exist\n" if ! -f $archive;
4432
4433 # try to detect archive type first
4434 my $pid = open (TMP, "tar tf '$archive'|") ||
4435 die "unable to open file '$archive'\n";
4436 my $firstfile = <TMP>;
4437 kill 15, $pid;
4438 close TMP;
4439
4440 die "ERROR: archive contaions no data\n" if !$firstfile;
4441 chomp $firstfile;
4442
4443 return $firstfile;
4444 }
4445
4446 sub tar_restore_cleanup {
4447 my ($storecfg, $statfile) = @_;
4448
4449 print STDERR "starting cleanup\n";
4450
4451 if (my $fd = IO::File->new($statfile, "r")) {
4452 while (defined(my $line = <$fd>)) {
4453 if ($line =~ m/vzdump:([^\s:]*):(\S+)$/) {
4454 my $volid = $2;
4455 eval {
4456 if ($volid =~ m|^/|) {
4457 unlink $volid || die 'unlink failed\n';
4458 } else {
4459 PVE::Storage::vdisk_free($storecfg, $volid);
4460 }
4461 print STDERR "temporary volume '$volid' sucessfuly removed\n";
4462 };
4463 print STDERR "unable to cleanup '$volid' - $@" if $@;
4464 } else {
4465 print STDERR "unable to parse line in statfile - $line";
4466 }
4467 }
4468 $fd->close();
4469 }
4470 }
4471
4472 sub restore_archive {
4473 my ($archive, $vmid, $user, $opts) = @_;
4474
4475 my $format = $opts->{format};
4476 my $comp;
4477
4478 if ($archive =~ m/\.tgz$/ || $archive =~ m/\.tar\.gz$/) {
4479 $format = 'tar' if !$format;
4480 $comp = 'gzip';
4481 } elsif ($archive =~ m/\.tar$/) {
4482 $format = 'tar' if !$format;
4483 } elsif ($archive =~ m/.tar.lzo$/) {
4484 $format = 'tar' if !$format;
4485 $comp = 'lzop';
4486 } elsif ($archive =~ m/\.vma$/) {
4487 $format = 'vma' if !$format;
4488 } elsif ($archive =~ m/\.vma\.gz$/) {
4489 $format = 'vma' if !$format;
4490 $comp = 'gzip';
4491 } elsif ($archive =~ m/\.vma\.lzo$/) {
4492 $format = 'vma' if !$format;
4493 $comp = 'lzop';
4494 } else {
4495 $format = 'vma' if !$format; # default
4496 }
4497
4498 # try to detect archive format
4499 if ($format eq 'tar') {
4500 return restore_tar_archive($archive, $vmid, $user, $opts);
4501 } else {
4502 return restore_vma_archive($archive, $vmid, $user, $opts, $comp);
4503 }
4504 }
4505
4506 sub restore_update_config_line {
4507 my ($outfd, $cookie, $vmid, $map, $line, $unique) = @_;
4508
4509 return if $line =~ m/^\#qmdump\#/;
4510 return if $line =~ m/^\#vzdump\#/;
4511 return if $line =~ m/^lock:/;
4512 return if $line =~ m/^unused\d+:/;
4513 return if $line =~ m/^parent:/;
4514 return if $line =~ m/^template:/; # restored VM is never a template
4515
4516 if (($line =~ m/^(vlan(\d+)):\s*(\S+)\s*$/)) {
4517 # try to convert old 1.X settings
4518 my ($id, $ind, $ethcfg) = ($1, $2, $3);
4519 foreach my $devconfig (PVE::Tools::split_list($ethcfg)) {
4520 my ($model, $macaddr) = split(/\=/, $devconfig);
4521 $macaddr = PVE::Tools::random_ether_addr() if !$macaddr || $unique;
4522 my $net = {
4523 model => $model,
4524 bridge => "vmbr$ind",
4525 macaddr => $macaddr,
4526 };
4527 my $netstr = print_net($net);
4528
4529 print $outfd "net$cookie->{netcount}: $netstr\n";
4530 $cookie->{netcount}++;
4531 }
4532 } elsif (($line =~ m/^(net\d+):\s*(\S+)\s*$/) && $unique) {
4533 my ($id, $netstr) = ($1, $2);
4534 my $net = parse_net($netstr);
4535 $net->{macaddr} = PVE::Tools::random_ether_addr() if $net->{macaddr};
4536 $netstr = print_net($net);
4537 print $outfd "$id: $netstr\n";
4538 } elsif ($line =~ m/^((ide|scsi|virtio|sata)\d+):\s*(\S+)\s*$/) {
4539 my $virtdev = $1;
4540 my $value = $3;
4541 if ($line =~ m/backup=no/) {
4542 print $outfd "#$line";
4543 } elsif ($virtdev && $map->{$virtdev}) {
4544 my $di = parse_drive($virtdev, $value);
4545 delete $di->{format}; # format can change on restore
4546 $di->{file} = $map->{$virtdev};
4547 $value = print_drive($vmid, $di);
4548 print $outfd "$virtdev: $value\n";
4549 } else {
4550 print $outfd $line;
4551 }
4552 } else {
4553 print $outfd $line;
4554 }
4555 }
4556
4557 sub scan_volids {
4558 my ($cfg, $vmid) = @_;
4559
4560 my $info = PVE::Storage::vdisk_list($cfg, undef, $vmid);
4561
4562 my $volid_hash = {};
4563 foreach my $storeid (keys %$info) {
4564 foreach my $item (@{$info->{$storeid}}) {
4565 next if !($item->{volid} && $item->{size});
4566 $item->{path} = PVE::Storage::path($cfg, $item->{volid});
4567 $volid_hash->{$item->{volid}} = $item;
4568 }
4569 }
4570
4571 return $volid_hash;
4572 }
4573
4574 sub get_used_paths {
4575 my ($vmid, $storecfg, $conf, $scan_snapshots, $skip_drive) = @_;
4576
4577 my $used_path = {};
4578
4579 my $scan_config = sub {
4580 my ($cref, $snapname) = @_;
4581
4582 foreach my $key (keys %$cref) {
4583 my $value = $cref->{$key};
4584 if (valid_drivename($key)) {
4585 next if $skip_drive && $key eq $skip_drive;
4586 my $drive = parse_drive($key, $value);
4587 next if !$drive || !$drive->{file} || drive_is_cdrom($drive);
4588 if ($drive->{file} =~ m!^/!) {
4589 $used_path->{$drive->{file}}++; # = 1;
4590 } else {
4591 my ($storeid, $volname) = PVE::Storage::parse_volume_id($drive->{file}, 1);
4592 next if !$storeid;
4593 my $scfg = PVE::Storage::storage_config($storecfg, $storeid, 1);
4594 next if !$scfg;
4595 my $path = PVE::Storage::path($storecfg, $drive->{file}, $snapname);
4596 $used_path->{$path}++; # = 1;
4597 }
4598 }
4599 }
4600 };
4601
4602 &$scan_config($conf);
4603
4604 undef $skip_drive;
4605
4606 if ($scan_snapshots) {
4607 foreach my $snapname (keys %{$conf->{snapshots}}) {
4608 &$scan_config($conf->{snapshots}->{$snapname}, $snapname);
4609 }
4610 }
4611
4612 return $used_path;
4613 }
4614
4615 sub update_disksize {
4616 my ($vmid, $conf, $volid_hash) = @_;
4617
4618 my $changes;
4619
4620 my $used = {};
4621
4622 # Note: it is allowed to define multiple storages with same path (alias), so
4623 # we need to check both 'volid' and real 'path' (two different volid can point
4624 # to the same path).
4625
4626 my $usedpath = {};
4627
4628 # update size info
4629 foreach my $opt (keys %$conf) {
4630 if (valid_drivename($opt)) {
4631 my $drive = parse_drive($opt, $conf->{$opt});
4632 my $volid = $drive->{file};
4633 next if !$volid;
4634
4635 $used->{$volid} = 1;
4636 if ($volid_hash->{$volid} &&
4637 (my $path = $volid_hash->{$volid}->{path})) {
4638 $usedpath->{$path} = 1;
4639 }
4640
4641 next if drive_is_cdrom($drive);
4642 next if !$volid_hash->{$volid};
4643
4644 $drive->{size} = $volid_hash->{$volid}->{size};
4645 my $new = print_drive($vmid, $drive);
4646 if ($new ne $conf->{$opt}) {
4647 $changes = 1;
4648 $conf->{$opt} = $new;
4649 }
4650 }
4651 }
4652
4653 # remove 'unusedX' entry if volume is used
4654 foreach my $opt (keys %$conf) {
4655 next if $opt !~ m/^unused\d+$/;
4656 my $volid = $conf->{$opt};
4657 my $path = $volid_hash->{$volid}->{path} if $volid_hash->{$volid};
4658 if ($used->{$volid} || ($path && $usedpath->{$path})) {
4659 $changes = 1;
4660 delete $conf->{$opt};
4661 }
4662 }
4663
4664 foreach my $volid (sort keys %$volid_hash) {
4665 next if $volid =~ m/vm-$vmid-state-/;
4666 next if $used->{$volid};
4667 my $path = $volid_hash->{$volid}->{path};
4668 next if !$path; # just to be sure
4669 next if $usedpath->{$path};
4670 $changes = 1;
4671 add_unused_volume($conf, $volid);
4672 $usedpath->{$path} = 1; # avoid to add more than once (aliases)
4673 }
4674
4675 return $changes;
4676 }
4677
4678 sub rescan {
4679 my ($vmid, $nolock) = @_;
4680
4681 my $cfg = PVE::Cluster::cfs_read_file("storage.cfg");
4682
4683 my $volid_hash = scan_volids($cfg, $vmid);
4684
4685 my $updatefn = sub {
4686 my ($vmid) = @_;
4687
4688 my $conf = load_config($vmid);
4689
4690 check_lock($conf);
4691
4692 my $vm_volids = {};
4693 foreach my $volid (keys %$volid_hash) {
4694 my $info = $volid_hash->{$volid};
4695 $vm_volids->{$volid} = $info if $info->{vmid} && $info->{vmid} == $vmid;
4696 }
4697
4698 my $changes = update_disksize($vmid, $conf, $vm_volids);
4699
4700 update_config_nolock($vmid, $conf, 1) if $changes;
4701 };
4702
4703 if (defined($vmid)) {
4704 if ($nolock) {
4705 &$updatefn($vmid);
4706 } else {
4707 lock_config($vmid, $updatefn, $vmid);
4708 }
4709 } else {
4710 my $vmlist = config_list();
4711 foreach my $vmid (keys %$vmlist) {
4712 if ($nolock) {
4713 &$updatefn($vmid);
4714 } else {
4715 lock_config($vmid, $updatefn, $vmid);
4716 }
4717 }
4718 }
4719 }
4720
4721 sub restore_vma_archive {
4722 my ($archive, $vmid, $user, $opts, $comp) = @_;
4723
4724 my $input = $archive eq '-' ? "<&STDIN" : undef;
4725 my $readfrom = $archive;
4726
4727 my $uncomp = '';
4728 if ($comp) {
4729 $readfrom = '-';
4730 my $qarchive = PVE::Tools::shellquote($archive);
4731 if ($comp eq 'gzip') {
4732 $uncomp = "zcat $qarchive|";
4733 } elsif ($comp eq 'lzop') {
4734 $uncomp = "lzop -d -c $qarchive|";
4735 } else {
4736 die "unknown compression method '$comp'\n";
4737 }
4738
4739 }
4740
4741 my $tmpdir = "/var/tmp/vzdumptmp$$";
4742 rmtree $tmpdir;
4743
4744 # disable interrupts (always do cleanups)
4745 local $SIG{INT} = $SIG{TERM} = $SIG{QUIT} = $SIG{HUP} = sub {
4746 warn "got interrupt - ignored\n";
4747 };
4748
4749 my $mapfifo = "/var/tmp/vzdumptmp$$.fifo";
4750 POSIX::mkfifo($mapfifo, 0600);
4751 my $fifofh;
4752
4753 my $openfifo = sub {
4754 open($fifofh, '>', $mapfifo) || die $!;
4755 };
4756
4757 my $cmd = "${uncomp}vma extract -v -r $mapfifo $readfrom $tmpdir";
4758
4759 my $oldtimeout;
4760 my $timeout = 5;
4761
4762 my $devinfo = {};
4763
4764 my $rpcenv = PVE::RPCEnvironment::get();
4765
4766 my $conffile = config_file($vmid);
4767 my $tmpfn = "$conffile.$$.tmp";
4768
4769 # Note: $oldconf is undef if VM does not exists
4770 my $oldconf = PVE::Cluster::cfs_read_file(cfs_config_path($vmid));
4771
4772 my $print_devmap = sub {
4773 my $virtdev_hash = {};
4774
4775 my $cfgfn = "$tmpdir/qemu-server.conf";
4776
4777 # we can read the config - that is already extracted
4778 my $fh = IO::File->new($cfgfn, "r") ||
4779 "unable to read qemu-server.conf - $!\n";
4780
4781 while (defined(my $line = <$fh>)) {
4782 if ($line =~ m/^\#qmdump\#map:(\S+):(\S+):(\S*):(\S*):$/) {
4783 my ($virtdev, $devname, $storeid, $format) = ($1, $2, $3, $4);
4784 die "archive does not contain data for drive '$virtdev'\n"
4785 if !$devinfo->{$devname};
4786 if (defined($opts->{storage})) {
4787 $storeid = $opts->{storage} || 'local';
4788 } elsif (!$storeid) {
4789 $storeid = 'local';
4790 }
4791 $format = 'raw' if !$format;
4792 $devinfo->{$devname}->{devname} = $devname;
4793 $devinfo->{$devname}->{virtdev} = $virtdev;
4794 $devinfo->{$devname}->{format} = $format;
4795 $devinfo->{$devname}->{storeid} = $storeid;
4796
4797 # check permission on storage
4798 my $pool = $opts->{pool}; # todo: do we need that?
4799 if ($user ne 'root@pam') {
4800 $rpcenv->check($user, "/storage/$storeid", ['Datastore.AllocateSpace']);
4801 }
4802
4803 $virtdev_hash->{$virtdev} = $devinfo->{$devname};
4804 }
4805 }
4806
4807 foreach my $devname (keys %$devinfo) {
4808 die "found no device mapping information for device '$devname'\n"
4809 if !$devinfo->{$devname}->{virtdev};
4810 }
4811
4812 my $cfg = cfs_read_file('storage.cfg');
4813
4814 # create empty/temp config
4815 if ($oldconf) {
4816 PVE::Tools::file_set_contents($conffile, "memory: 128\n");
4817 foreach_drive($oldconf, sub {
4818 my ($ds, $drive) = @_;
4819
4820 return if drive_is_cdrom($drive);
4821
4822 my $volid = $drive->{file};
4823
4824 return if !$volid || $volid =~ m|^/|;
4825
4826 my ($path, $owner) = PVE::Storage::path($cfg, $volid);
4827 return if !$path || !$owner || ($owner != $vmid);
4828
4829 # Note: only delete disk we want to restore
4830 # other volumes will become unused
4831 if ($virtdev_hash->{$ds}) {
4832 PVE::Storage::vdisk_free($cfg, $volid);
4833 }
4834 });
4835 }
4836
4837 my $map = {};
4838 foreach my $virtdev (sort keys %$virtdev_hash) {
4839 my $d = $virtdev_hash->{$virtdev};
4840 my $alloc_size = int(($d->{size} + 1024 - 1)/1024);
4841 my $scfg = PVE::Storage::storage_config($cfg, $d->{storeid});
4842
4843 # test if requested format is supported
4844 my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($cfg, $d->{storeid});
4845 my $supported = grep { $_ eq $d->{format} } @$validFormats;
4846 $d->{format} = $defFormat if !$supported;
4847
4848 my $volid = PVE::Storage::vdisk_alloc($cfg, $d->{storeid}, $vmid,
4849 $d->{format}, undef, $alloc_size);
4850 print STDERR "new volume ID is '$volid'\n";
4851 $d->{volid} = $volid;
4852 my $path = PVE::Storage::path($cfg, $volid);
4853
4854 my $write_zeros = 1;
4855 # fixme: what other storages types initialize volumes with zero?
4856 if ($scfg->{type} eq 'dir' || $scfg->{type} eq 'nfs' || $scfg->{type} eq 'glusterfs' ||
4857 $scfg->{type} eq 'sheepdog' || $scfg->{type} eq 'rbd') {
4858 $write_zeros = 0;
4859 }
4860
4861 print $fifofh "${write_zeros}:$d->{devname}=$path\n";
4862
4863 print "map '$d->{devname}' to '$path' (write zeros = ${write_zeros})\n";
4864 $map->{$virtdev} = $volid;
4865 }
4866
4867 $fh->seek(0, 0) || die "seek failed - $!\n";
4868
4869 my $outfd = new IO::File ($tmpfn, "w") ||
4870 die "unable to write config for VM $vmid\n";
4871
4872 my $cookie = { netcount => 0 };
4873 while (defined(my $line = <$fh>)) {
4874 restore_update_config_line($outfd, $cookie, $vmid, $map, $line, $opts->{unique});
4875 }
4876
4877 $fh->close();
4878 $outfd->close();
4879 };
4880
4881 eval {
4882 # enable interrupts
4883 local $SIG{INT} = $SIG{TERM} = $SIG{QUIT} = $SIG{HUP} = $SIG{PIPE} = sub {
4884 die "interrupted by signal\n";
4885 };
4886 local $SIG{ALRM} = sub { die "got timeout\n"; };
4887
4888 $oldtimeout = alarm($timeout);
4889
4890 my $parser = sub {
4891 my $line = shift;
4892
4893 print "$line\n";
4894
4895 if ($line =~ m/^DEV:\sdev_id=(\d+)\ssize:\s(\d+)\sdevname:\s(\S+)$/) {
4896 my ($dev_id, $size, $devname) = ($1, $2, $3);
4897 $devinfo->{$devname} = { size => $size, dev_id => $dev_id };
4898 } elsif ($line =~ m/^CTIME: /) {
4899 # we correctly received the vma config, so we can disable
4900 # the timeout now for disk allocation (set to 10 minutes, so
4901 # that we always timeout if something goes wrong)
4902 alarm(600);
4903 &$print_devmap();
4904 print $fifofh "done\n";
4905 my $tmp = $oldtimeout || 0;
4906 $oldtimeout = undef;
4907 alarm($tmp);
4908 close($fifofh);
4909 }
4910 };
4911
4912 print "restore vma archive: $cmd\n";
4913 run_command($cmd, input => $input, outfunc => $parser, afterfork => $openfifo);
4914 };
4915 my $err = $@;
4916
4917 alarm($oldtimeout) if $oldtimeout;
4918
4919 unlink $mapfifo;
4920
4921 if ($err) {
4922 rmtree $tmpdir;
4923 unlink $tmpfn;
4924
4925 my $cfg = cfs_read_file('storage.cfg');
4926 foreach my $devname (keys %$devinfo) {
4927 my $volid = $devinfo->{$devname}->{volid};
4928 next if !$volid;
4929 eval {
4930 if ($volid =~ m|^/|) {
4931 unlink $volid || die 'unlink failed\n';
4932 } else {
4933 PVE::Storage::vdisk_free($cfg, $volid);
4934 }
4935 print STDERR "temporary volume '$volid' sucessfuly removed\n";
4936 };
4937 print STDERR "unable to cleanup '$volid' - $@" if $@;
4938 }
4939 die $err;
4940 }
4941
4942 rmtree $tmpdir;
4943
4944 rename($tmpfn, $conffile) ||
4945 die "unable to commit configuration file '$conffile'\n";
4946
4947 PVE::Cluster::cfs_update(); # make sure we read new file
4948
4949 eval { rescan($vmid, 1); };
4950 warn $@ if $@;
4951 }
4952
4953 sub restore_tar_archive {
4954 my ($archive, $vmid, $user, $opts) = @_;
4955
4956 if ($archive ne '-') {
4957 my $firstfile = tar_archive_read_firstfile($archive);
4958 die "ERROR: file '$archive' dos not lock like a QemuServer vzdump backup\n"
4959 if $firstfile ne 'qemu-server.conf';
4960 }
4961
4962 my $storecfg = cfs_read_file('storage.cfg');
4963
4964 # destroy existing data - keep empty config
4965 my $vmcfgfn = config_file($vmid);
4966 destroy_vm($storecfg, $vmid, 1) if -f $vmcfgfn;
4967
4968 my $tocmd = "/usr/lib/qemu-server/qmextract";
4969
4970 $tocmd .= " --storage " . PVE::Tools::shellquote($opts->{storage}) if $opts->{storage};
4971 $tocmd .= " --pool " . PVE::Tools::shellquote($opts->{pool}) if $opts->{pool};
4972 $tocmd .= ' --prealloc' if $opts->{prealloc};
4973 $tocmd .= ' --info' if $opts->{info};
4974
4975 # tar option "xf" does not autodetect compression when read from STDIN,
4976 # so we pipe to zcat
4977 my $cmd = "zcat -f|tar xf " . PVE::Tools::shellquote($archive) . " " .
4978 PVE::Tools::shellquote("--to-command=$tocmd");
4979
4980 my $tmpdir = "/var/tmp/vzdumptmp$$";
4981 mkpath $tmpdir;
4982
4983 local $ENV{VZDUMP_TMPDIR} = $tmpdir;
4984 local $ENV{VZDUMP_VMID} = $vmid;
4985 local $ENV{VZDUMP_USER} = $user;
4986
4987 my $conffile = config_file($vmid);
4988 my $tmpfn = "$conffile.$$.tmp";
4989
4990 # disable interrupts (always do cleanups)
4991 local $SIG{INT} = $SIG{TERM} = $SIG{QUIT} = $SIG{HUP} = sub {
4992 print STDERR "got interrupt - ignored\n";
4993 };
4994
4995 eval {
4996 # enable interrupts
4997 local $SIG{INT} = $SIG{TERM} = $SIG{QUIT} = $SIG{HUP} = $SIG{PIPE} = sub {
4998 die "interrupted by signal\n";
4999 };
5000
5001 if ($archive eq '-') {
5002 print "extracting archive from STDIN\n";
5003 run_command($cmd, input => "<&STDIN");
5004 } else {
5005 print "extracting archive '$archive'\n";
5006 run_command($cmd);
5007 }
5008
5009 return if $opts->{info};
5010
5011 # read new mapping
5012 my $map = {};
5013 my $statfile = "$tmpdir/qmrestore.stat";
5014 if (my $fd = IO::File->new($statfile, "r")) {
5015 while (defined (my $line = <$fd>)) {
5016 if ($line =~ m/vzdump:([^\s:]*):(\S+)$/) {
5017 $map->{$1} = $2 if $1;
5018 } else {
5019 print STDERR "unable to parse line in statfile - $line\n";
5020 }
5021 }
5022 $fd->close();
5023 }
5024
5025 my $confsrc = "$tmpdir/qemu-server.conf";
5026
5027 my $srcfd = new IO::File($confsrc, "r") ||
5028 die "unable to open file '$confsrc'\n";
5029
5030 my $outfd = new IO::File ($tmpfn, "w") ||
5031 die "unable to write config for VM $vmid\n";
5032
5033 my $cookie = { netcount => 0 };
5034 while (defined (my $line = <$srcfd>)) {
5035 restore_update_config_line($outfd, $cookie, $vmid, $map, $line, $opts->{unique});
5036 }
5037
5038 $srcfd->close();
5039 $outfd->close();
5040 };
5041 my $err = $@;
5042
5043 if ($err) {
5044
5045 unlink $tmpfn;
5046
5047 tar_restore_cleanup($storecfg, "$tmpdir/qmrestore.stat") if !$opts->{info};
5048
5049 die $err;
5050 }
5051
5052 rmtree $tmpdir;
5053
5054 rename $tmpfn, $conffile ||
5055 die "unable to commit configuration file '$conffile'\n";
5056
5057 PVE::Cluster::cfs_update(); # make sure we read new file
5058
5059 eval { rescan($vmid, 1); };
5060 warn $@ if $@;
5061 };
5062
5063
5064 # Internal snapshots
5065
5066 # NOTE: Snapshot create/delete involves several non-atomic
5067 # action, and can take a long time.
5068 # So we try to avoid locking the file and use 'lock' variable
5069 # inside the config file instead.
5070
5071 my $snapshot_copy_config = sub {
5072 my ($source, $dest) = @_;
5073
5074 foreach my $k (keys %$source) {
5075 next if $k eq 'snapshots';
5076 next if $k eq 'snapstate';
5077 next if $k eq 'snaptime';
5078 next if $k eq 'vmstate';
5079 next if $k eq 'lock';
5080 next if $k eq 'digest';
5081 next if $k eq 'description';
5082 next if $k =~ m/^unused\d+$/;
5083
5084 $dest->{$k} = $source->{$k};
5085 }
5086 };
5087
5088 my $snapshot_apply_config = sub {
5089 my ($conf, $snap) = @_;
5090
5091 # copy snapshot list
5092 my $newconf = {
5093 snapshots => $conf->{snapshots},
5094 };
5095
5096 # keep description and list of unused disks
5097 foreach my $k (keys %$conf) {
5098 next if !($k =~ m/^unused\d+$/ || $k eq 'description');
5099 $newconf->{$k} = $conf->{$k};
5100 }
5101
5102 &$snapshot_copy_config($snap, $newconf);
5103
5104 return $newconf;
5105 };
5106
5107 sub foreach_writable_storage {
5108 my ($conf, $func) = @_;
5109
5110 my $sidhash = {};
5111
5112 foreach my $ds (keys %$conf) {
5113 next if !valid_drivename($ds);
5114
5115 my $drive = parse_drive($ds, $conf->{$ds});
5116 next if !$drive;
5117 next if drive_is_cdrom($drive);
5118
5119 my $volid = $drive->{file};
5120
5121 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
5122 $sidhash->{$sid} = $sid if $sid;
5123 }
5124
5125 foreach my $sid (sort keys %$sidhash) {
5126 &$func($sid);
5127 }
5128 }
5129
5130 my $alloc_vmstate_volid = sub {
5131 my ($storecfg, $vmid, $conf, $snapname) = @_;
5132
5133 # Note: we try to be smart when selecting a $target storage
5134
5135 my $target;
5136
5137 # search shared storage first
5138 foreach_writable_storage($conf, sub {
5139 my ($sid) = @_;
5140 my $scfg = PVE::Storage::storage_config($storecfg, $sid);
5141 return if !$scfg->{shared};
5142
5143 $target = $sid if !$target || $scfg->{path}; # prefer file based storage
5144 });
5145
5146 if (!$target) {
5147 # now search local storage
5148 foreach_writable_storage($conf, sub {
5149 my ($sid) = @_;
5150 my $scfg = PVE::Storage::storage_config($storecfg, $sid);
5151 return if $scfg->{shared};
5152
5153 $target = $sid if !$target || $scfg->{path}; # prefer file based storage;
5154 });
5155 }
5156
5157 $target = 'local' if !$target;
5158
5159 my $driver_state_size = 500; # assume 32MB is enough to safe all driver state;
5160 # we abort live save after $conf->{memory}, so we need at max twice that space
5161 my $size = $conf->{memory}*2 + $driver_state_size;
5162
5163 my $name = "vm-$vmid-state-$snapname";
5164 my $scfg = PVE::Storage::storage_config($storecfg, $target);
5165 $name .= ".raw" if $scfg->{path}; # add filename extension for file base storage
5166 my $volid = PVE::Storage::vdisk_alloc($storecfg, $target, $vmid, 'raw', $name, $size*1024);
5167
5168 return $volid;
5169 };
5170
5171 my $snapshot_prepare = sub {
5172 my ($vmid, $snapname, $save_vmstate, $comment) = @_;
5173
5174 my $snap;
5175
5176 my $updatefn = sub {
5177
5178 my $conf = load_config($vmid);
5179
5180 die "you can't take a snapshot if it's a template\n"
5181 if is_template($conf);
5182
5183 check_lock($conf);
5184
5185 $conf->{lock} = 'snapshot';
5186
5187 die "snapshot name '$snapname' already used\n"
5188 if defined($conf->{snapshots}->{$snapname});
5189
5190 my $storecfg = PVE::Storage::config();
5191 die "snapshot feature is not available" if !has_feature('snapshot', $conf, $storecfg);
5192
5193 $snap = $conf->{snapshots}->{$snapname} = {};
5194
5195 if ($save_vmstate && check_running($vmid)) {
5196 $snap->{vmstate} = &$alloc_vmstate_volid($storecfg, $vmid, $conf, $snapname);
5197 }
5198
5199 &$snapshot_copy_config($conf, $snap);
5200
5201 $snap->{snapstate} = "prepare";
5202 $snap->{snaptime} = time();
5203 $snap->{description} = $comment if $comment;
5204
5205 # always overwrite machine if we save vmstate. This makes sure we
5206 # can restore it later using correct machine type
5207 $snap->{machine} = get_current_qemu_machine($vmid) if $snap->{vmstate};
5208
5209 update_config_nolock($vmid, $conf, 1);
5210 };
5211
5212 lock_config($vmid, $updatefn);
5213
5214 return $snap;
5215 };
5216
5217 my $snapshot_commit = sub {
5218 my ($vmid, $snapname) = @_;
5219
5220 my $updatefn = sub {
5221
5222 my $conf = load_config($vmid);
5223
5224 die "missing snapshot lock\n"
5225 if !($conf->{lock} && $conf->{lock} eq 'snapshot');
5226
5227 my $has_machine_config = defined($conf->{machine});
5228
5229 my $snap = $conf->{snapshots}->{$snapname};
5230
5231 die "snapshot '$snapname' does not exist\n" if !defined($snap);
5232
5233 die "wrong snapshot state\n"
5234 if !($snap->{snapstate} && $snap->{snapstate} eq "prepare");
5235
5236 delete $snap->{snapstate};
5237 delete $conf->{lock};
5238
5239 my $newconf = &$snapshot_apply_config($conf, $snap);
5240
5241 delete $newconf->{machine} if !$has_machine_config;
5242
5243 $newconf->{parent} = $snapname;
5244
5245 update_config_nolock($vmid, $newconf, 1);
5246 };
5247
5248 lock_config($vmid, $updatefn);
5249 };
5250
5251 sub snapshot_rollback {
5252 my ($vmid, $snapname) = @_;
5253
5254 my $snap;
5255
5256 my $prepare = 1;
5257
5258 my $storecfg = PVE::Storage::config();
5259
5260 my $updatefn = sub {
5261
5262 my $conf = load_config($vmid);
5263
5264 die "you can't rollback if vm is a template\n" if is_template($conf);
5265
5266 $snap = $conf->{snapshots}->{$snapname};
5267
5268 die "snapshot '$snapname' does not exist\n" if !defined($snap);
5269
5270 die "unable to rollback to incomplete snapshot (snapstate = $snap->{snapstate})\n"
5271 if $snap->{snapstate};
5272
5273 if ($prepare) {
5274 check_lock($conf);
5275 vm_stop($storecfg, $vmid, undef, undef, 5, undef, undef);
5276 }
5277
5278 die "unable to rollback vm $vmid: vm is running\n"
5279 if check_running($vmid);
5280
5281 if ($prepare) {
5282 $conf->{lock} = 'rollback';
5283 } else {
5284 die "got wrong lock\n" if !($conf->{lock} && $conf->{lock} eq 'rollback');
5285 delete $conf->{lock};
5286 }
5287
5288 my $forcemachine;
5289
5290 if (!$prepare) {
5291 my $has_machine_config = defined($conf->{machine});
5292
5293 # copy snapshot config to current config
5294 $conf = &$snapshot_apply_config($conf, $snap);
5295 $conf->{parent} = $snapname;
5296
5297 # Note: old code did not store 'machine', so we try to be smart
5298 # and guess the snapshot was generated with kvm 1.4 (pc-i440fx-1.4).
5299 $forcemachine = $conf->{machine} || 'pc-i440fx-1.4';
5300 # we remove the 'machine' configuration if not explicitly specified
5301 # in the original config.
5302 delete $conf->{machine} if $snap->{vmstate} && !$has_machine_config;
5303 }
5304
5305 update_config_nolock($vmid, $conf, 1);
5306
5307 if (!$prepare && $snap->{vmstate}) {
5308 my $statefile = PVE::Storage::path($storecfg, $snap->{vmstate});
5309 vm_start($storecfg, $vmid, $statefile, undef, undef, undef, $forcemachine);
5310 }
5311 };
5312
5313 lock_config($vmid, $updatefn);
5314
5315 foreach_drive($snap, sub {
5316 my ($ds, $drive) = @_;
5317
5318 return if drive_is_cdrom($drive);
5319
5320 my $volid = $drive->{file};
5321 my $device = "drive-$ds";
5322
5323 PVE::Storage::volume_snapshot_rollback($storecfg, $volid, $snapname);
5324 });
5325
5326 $prepare = 0;
5327 lock_config($vmid, $updatefn);
5328 }
5329
5330 my $savevm_wait = sub {
5331 my ($vmid) = @_;
5332
5333 for(;;) {
5334 my $stat = vm_mon_cmd_nocheck($vmid, "query-savevm");
5335 if (!$stat->{status}) {
5336 die "savevm not active\n";
5337 } elsif ($stat->{status} eq 'active') {
5338 sleep(1);
5339 next;
5340 } elsif ($stat->{status} eq 'completed') {
5341 last;
5342 } else {
5343 die "query-savevm returned status '$stat->{status}'\n";
5344 }
5345 }
5346 };
5347
5348 sub snapshot_create {
5349 my ($vmid, $snapname, $save_vmstate, $comment) = @_;
5350
5351 my $snap = &$snapshot_prepare($vmid, $snapname, $save_vmstate, $comment);
5352
5353 $save_vmstate = 0 if !$snap->{vmstate}; # vm is not running
5354
5355 my $config = load_config($vmid);
5356
5357 my $running = check_running($vmid);
5358
5359 my $freezefs = $running && $config->{agent};
5360 $freezefs = 0 if $snap->{vmstate}; # not needed if we save RAM
5361
5362 my $drivehash = {};
5363
5364 if ($freezefs) {
5365 eval { vm_mon_cmd($vmid, "guest-fsfreeze-freeze"); };
5366 warn "guest-fsfreeze-freeze problems - $@" if $@;
5367 }
5368
5369 eval {
5370 # create internal snapshots of all drives
5371
5372 my $storecfg = PVE::Storage::config();
5373
5374 if ($running) {
5375 if ($snap->{vmstate}) {
5376 my $path = PVE::Storage::path($storecfg, $snap->{vmstate});
5377 vm_mon_cmd($vmid, "savevm-start", statefile => $path);
5378 &$savevm_wait($vmid);
5379 } else {
5380 vm_mon_cmd($vmid, "savevm-start");
5381 }
5382 };
5383
5384 foreach_drive($snap, sub {
5385 my ($ds, $drive) = @_;
5386
5387 return if drive_is_cdrom($drive);
5388
5389 my $volid = $drive->{file};
5390 my $device = "drive-$ds";
5391
5392 qemu_volume_snapshot($vmid, $device, $storecfg, $volid, $snapname);
5393 $drivehash->{$ds} = 1;
5394 });
5395 };
5396 my $err = $@;
5397
5398 if ($running) {
5399 eval { vm_mon_cmd($vmid, "savevm-end") };
5400 warn $@ if $@;
5401
5402 if ($freezefs) {
5403 eval { vm_mon_cmd($vmid, "guest-fsfreeze-thaw"); };
5404 warn "guest-fsfreeze-thaw problems - $@" if $@;
5405 }
5406
5407 # savevm-end is async, we need to wait
5408 for (;;) {
5409 my $stat = vm_mon_cmd_nocheck($vmid, "query-savevm");
5410 if (!$stat->{bytes}) {
5411 last;
5412 } else {
5413 print "savevm not yet finished\n";
5414 sleep(1);
5415 next;
5416 }
5417 }
5418 }
5419
5420 if ($err) {
5421 warn "snapshot create failed: starting cleanup\n";
5422 eval { snapshot_delete($vmid, $snapname, 0, $drivehash); };
5423 warn $@ if $@;
5424 die $err;
5425 }
5426
5427 &$snapshot_commit($vmid, $snapname);
5428 }
5429
5430 # Note: $drivehash is only set when called from snapshot_create.
5431 sub snapshot_delete {
5432 my ($vmid, $snapname, $force, $drivehash) = @_;
5433
5434 my $prepare = 1;
5435
5436 my $snap;
5437 my $unused = [];
5438
5439 my $unlink_parent = sub {
5440 my ($confref, $new_parent) = @_;
5441
5442 if ($confref->{parent} && $confref->{parent} eq $snapname) {
5443 if ($new_parent) {
5444 $confref->{parent} = $new_parent;
5445 } else {
5446 delete $confref->{parent};
5447 }
5448 }
5449 };
5450
5451 my $updatefn = sub {
5452 my ($remove_drive) = @_;
5453
5454 my $conf = load_config($vmid);
5455
5456 if (!$drivehash) {
5457 check_lock($conf);
5458 die "you can't delete a snapshot if vm is a template\n"
5459 if is_template($conf);
5460 }
5461
5462 $snap = $conf->{snapshots}->{$snapname};
5463
5464 die "snapshot '$snapname' does not exist\n" if !defined($snap);
5465
5466 # remove parent refs
5467 if (!$prepare) {
5468 &$unlink_parent($conf, $snap->{parent});
5469 foreach my $sn (keys %{$conf->{snapshots}}) {
5470 next if $sn eq $snapname;
5471 &$unlink_parent($conf->{snapshots}->{$sn}, $snap->{parent});
5472 }
5473 }
5474
5475 if ($remove_drive) {
5476 if ($remove_drive eq 'vmstate') {
5477 delete $snap->{$remove_drive};
5478 } else {
5479 my $drive = parse_drive($remove_drive, $snap->{$remove_drive});
5480 my $volid = $drive->{file};
5481 delete $snap->{$remove_drive};
5482 add_unused_volume($conf, $volid);
5483 }
5484 }
5485
5486 if ($prepare) {
5487 $snap->{snapstate} = 'delete';
5488 } else {
5489 delete $conf->{snapshots}->{$snapname};
5490 delete $conf->{lock} if $drivehash;
5491 foreach my $volid (@$unused) {
5492 add_unused_volume($conf, $volid);
5493 }
5494 }
5495
5496 update_config_nolock($vmid, $conf, 1);
5497 };
5498
5499 lock_config($vmid, $updatefn);
5500
5501 # now remove vmstate file
5502
5503 my $storecfg = PVE::Storage::config();
5504
5505 if ($snap->{vmstate}) {
5506 eval { PVE::Storage::vdisk_free($storecfg, $snap->{vmstate}); };
5507 if (my $err = $@) {
5508 die $err if !$force;
5509 warn $err;
5510 }
5511 # save changes (remove vmstate from snapshot)
5512 lock_config($vmid, $updatefn, 'vmstate') if !$force;
5513 };
5514
5515 # now remove all internal snapshots
5516 foreach_drive($snap, sub {
5517 my ($ds, $drive) = @_;
5518
5519 return if drive_is_cdrom($drive);
5520
5521 my $volid = $drive->{file};
5522 my $device = "drive-$ds";
5523
5524 if (!$drivehash || $drivehash->{$ds}) {
5525 eval { qemu_volume_snapshot_delete($vmid, $device, $storecfg, $volid, $snapname); };
5526 if (my $err = $@) {
5527 die $err if !$force;
5528 warn $err;
5529 }
5530 }
5531
5532 # save changes (remove drive fron snapshot)
5533 lock_config($vmid, $updatefn, $ds) if !$force;
5534 push @$unused, $volid;
5535 });
5536
5537 # now cleanup config
5538 $prepare = 0;
5539 lock_config($vmid, $updatefn);
5540 }
5541
5542 sub has_feature {
5543 my ($feature, $conf, $storecfg, $snapname, $running) = @_;
5544
5545 my $err;
5546 foreach_drive($conf, sub {
5547 my ($ds, $drive) = @_;
5548
5549 return if drive_is_cdrom($drive);
5550 my $volid = $drive->{file};
5551 $err = 1 if !PVE::Storage::volume_has_feature($storecfg, $feature, $volid, $snapname, $running);
5552 });
5553
5554 return $err ? 0 : 1;
5555 }
5556
5557 sub template_create {
5558 my ($vmid, $conf, $disk) = @_;
5559
5560 my $storecfg = PVE::Storage::config();
5561
5562 foreach_drive($conf, sub {
5563 my ($ds, $drive) = @_;
5564
5565 return if drive_is_cdrom($drive);
5566 return if $disk && $ds ne $disk;
5567
5568 my $volid = $drive->{file};
5569 return if !PVE::Storage::volume_has_feature($storecfg, 'template', $volid);
5570
5571 my $voliddst = PVE::Storage::vdisk_create_base($storecfg, $volid);
5572 $drive->{file} = $voliddst;
5573 $conf->{$ds} = print_drive($vmid, $drive);
5574 update_config_nolock($vmid, $conf, 1);
5575 });
5576 }
5577
5578 sub is_template {
5579 my ($conf) = @_;
5580
5581 return 1 if defined $conf->{template} && $conf->{template} == 1;
5582 }
5583
5584 sub qemu_img_convert {
5585 my ($src_volid, $dst_volid, $size, $snapname) = @_;
5586
5587 my $storecfg = PVE::Storage::config();
5588 my ($src_storeid, $src_volname) = PVE::Storage::parse_volume_id($src_volid, 1);
5589 my ($dst_storeid, $dst_volname) = PVE::Storage::parse_volume_id($dst_volid, 1);
5590
5591 if ($src_storeid && $dst_storeid) {
5592 my $src_scfg = PVE::Storage::storage_config($storecfg, $src_storeid);
5593 my $dst_scfg = PVE::Storage::storage_config($storecfg, $dst_storeid);
5594
5595 my $src_format = qemu_img_format($src_scfg, $src_volname);
5596 my $dst_format = qemu_img_format($dst_scfg, $dst_volname);
5597
5598 my $src_path = PVE::Storage::path($storecfg, $src_volid, $snapname);
5599 my $dst_path = PVE::Storage::path($storecfg, $dst_volid);
5600
5601 my $cmd = [];
5602 push @$cmd, '/usr/bin/qemu-img', 'convert', '-t', 'writeback', '-p', '-n';
5603 push @$cmd, '-s', $snapname if($snapname && $src_format eq "qcow2");
5604 push @$cmd, '-f', $src_format, '-O', $dst_format, $src_path, $dst_path;
5605
5606 my $parser = sub {
5607 my $line = shift;
5608 if($line =~ m/\((\S+)\/100\%\)/){
5609 my $percent = $1;
5610 my $transferred = int($size * $percent / 100);
5611 my $remaining = $size - $transferred;
5612
5613 print "transferred: $transferred bytes remaining: $remaining bytes total: $size bytes progression: $percent %\n";
5614 }
5615
5616 };
5617
5618 eval { run_command($cmd, timeout => undef, outfunc => $parser); };
5619 my $err = $@;
5620 die "copy failed: $err" if $err;
5621 }
5622 }
5623
5624 sub qemu_img_format {
5625 my ($scfg, $volname) = @_;
5626
5627 if ($scfg->{path} && $volname =~ m/\.(raw|qcow2|qed|vmdk)$/) {
5628 return $1;
5629 } elsif ($scfg->{type} eq 'iscsi') {
5630 return "host_device";
5631 } else {
5632 return "raw";
5633 }
5634 }
5635
5636 sub qemu_drive_mirror {
5637 my ($vmid, $drive, $dst_volid, $vmiddst) = @_;
5638
5639 my $count = 0;
5640 my $old_len = 0;
5641 my $frozen = undef;
5642 my $maxwait = 120;
5643
5644 my $storecfg = PVE::Storage::config();
5645 my ($dst_storeid, $dst_volname) = PVE::Storage::parse_volume_id($dst_volid);
5646
5647 my $dst_scfg = PVE::Storage::storage_config($storecfg, $dst_storeid);
5648
5649 my $format;
5650 if ($dst_volname =~ m/\.(raw|qcow2)$/){
5651 $format = $1;
5652 }
5653
5654 my $dst_path = PVE::Storage::path($storecfg, $dst_volid);
5655
5656 my $opts = { timeout => 10, device => "drive-$drive", mode => "existing", sync => "full", target => $dst_path };
5657 $opts->{format} = $format if $format;
5658
5659 #fixme : sometime drive-mirror timeout, but works fine after.
5660 # (I have see the problem with big volume > 200GB), so we need to eval
5661 eval { vm_mon_cmd($vmid, "drive-mirror", %$opts); };
5662 # ignore errors here
5663
5664 eval {
5665 while (1) {
5666 my $stats = vm_mon_cmd($vmid, "query-block-jobs");
5667 my $stat = @$stats[0];
5668 die "mirroring job seem to have die. Maybe do you have bad sectors?" if !$stat;
5669 die "error job is not mirroring" if $stat->{type} ne "mirror";
5670
5671 my $busy = $stat->{busy};
5672
5673 if (my $total = $stat->{len}) {
5674 my $transferred = $stat->{offset} || 0;
5675 my $remaining = $total - $transferred;
5676 my $percent = sprintf "%.2f", ($transferred * 100 / $total);
5677
5678 print "transferred: $transferred bytes remaining: $remaining bytes total: $total bytes progression: $percent % busy: $busy\n";
5679 }
5680
5681 if ($stat->{len} == $stat->{offset}) {
5682 if ($busy eq 'false') {
5683
5684 last if $vmiddst != $vmid;
5685
5686 # try to switch the disk if source and destination are on the same guest
5687 eval { vm_mon_cmd($vmid, "block-job-complete", device => "drive-$drive") };
5688 last if !$@;
5689 die $@ if $@ !~ m/cannot be completed/;
5690 }
5691
5692 if ($count > $maxwait) {
5693 # if too much writes to disk occurs at the end of migration
5694 #the disk needs to be freezed to be able to complete the migration
5695 vm_suspend($vmid,1);
5696 $frozen = 1;
5697 }
5698 $count ++
5699 }
5700 $old_len = $stat->{offset};
5701 sleep 1;
5702 }
5703
5704 vm_resume($vmid, 1) if $frozen;
5705
5706 };
5707 my $err = $@;
5708
5709 my $cancel_job = sub {
5710 vm_mon_cmd($vmid, "block-job-cancel", device => "drive-$drive");
5711 while (1) {
5712 my $stats = vm_mon_cmd($vmid, "query-block-jobs");
5713 my $stat = @$stats[0];
5714 last if !$stat;
5715 sleep 1;
5716 }
5717 };
5718
5719 if ($err) {
5720 eval { &$cancel_job(); };
5721 die "mirroring error: $err";
5722 }
5723
5724 if ($vmiddst != $vmid) {
5725 # if we clone a disk for a new target vm, we don't switch the disk
5726 &$cancel_job(); # so we call block-job-cancel
5727 }
5728 }
5729
5730 sub clone_disk {
5731 my ($storecfg, $vmid, $running, $drivename, $drive, $snapname,
5732 $newvmid, $storage, $format, $full, $newvollist) = @_;
5733
5734 my $newvolid;
5735
5736 if (!$full) {
5737 print "create linked clone of drive $drivename ($drive->{file})\n";
5738 $newvolid = PVE::Storage::vdisk_clone($storecfg, $drive->{file}, $newvmid, $snapname);
5739 push @$newvollist, $newvolid;
5740 } else {
5741 my ($storeid, $volname) = PVE::Storage::parse_volume_id($drive->{file});
5742 $storeid = $storage if $storage;
5743
5744 my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid);
5745 if (!$format) {
5746 $format = $drive->{format} || $defFormat;
5747 }
5748
5749 # test if requested format is supported - else use default
5750 my $supported = grep { $_ eq $format } @$validFormats;
5751 $format = $defFormat if !$supported;
5752
5753 my ($size) = PVE::Storage::volume_size_info($storecfg, $drive->{file}, 3);
5754
5755 print "create full clone of drive $drivename ($drive->{file})\n";
5756 $newvolid = PVE::Storage::vdisk_alloc($storecfg, $storeid, $newvmid, $format, undef, ($size/1024));
5757 push @$newvollist, $newvolid;
5758
5759 if (!$running || $snapname) {
5760 qemu_img_convert($drive->{file}, $newvolid, $size, $snapname);
5761 } else {
5762 qemu_drive_mirror($vmid, $drivename, $newvolid, $newvmid);
5763 }
5764 }
5765
5766 my ($size) = PVE::Storage::volume_size_info($storecfg, $newvolid, 3);
5767
5768 my $disk = $drive;
5769 $disk->{format} = undef;
5770 $disk->{file} = $newvolid;
5771 $disk->{size} = $size;
5772
5773 return $disk;
5774 }
5775
5776 # this only works if VM is running
5777 sub get_current_qemu_machine {
5778 my ($vmid) = @_;
5779
5780 my $cmd = { execute => 'query-machines', arguments => {} };
5781 my $res = vm_qmp_command($vmid, $cmd);
5782
5783 my ($current, $default);
5784 foreach my $e (@$res) {
5785 $default = $e->{name} if $e->{'is-default'};
5786 $current = $e->{name} if $e->{'is-current'};
5787 }
5788
5789 # fallback to the default machine if current is not supported by qemu
5790 return $current || $default || 'pc';
5791 }
5792
5793 sub lspci {
5794
5795 my $devices = {};
5796
5797 dir_glob_foreach("$pcisysfs/devices", '[a-f0-9]{4}:([a-f0-9]{2}:[a-f0-9]{2})\.([0-9])', sub {
5798 my (undef, $id, $function) = @_;
5799 my $res = { id => $id, function => $function};
5800 push @{$devices->{$id}}, $res;
5801 });
5802
5803 return $devices;
5804 }
5805
5806 1;