]> git.proxmox.com Git - qemu-server.git/blob - PVE/QemuServer.pm
fix hostpci addr
[qemu-server.git] / PVE / QemuServer.pm
1 package PVE::QemuServer;
2
3 use strict;
4 use POSIX;
5 use IO::Handle;
6 use IO::Select;
7 use IO::File;
8 use IO::Dir;
9 use IO::Socket::UNIX;
10 use File::Basename;
11 use File::Path;
12 use File::stat;
13 use Getopt::Long;
14 use Digest::SHA1;
15 use Fcntl ':flock';
16 use Cwd 'abs_path';
17 use IPC::Open3;
18 use Fcntl;
19 use PVE::SafeSyslog;
20 use Storable qw(dclone);
21 use PVE::Exception qw(raise raise_param_exc);
22 use PVE::Storage;
23 use PVE::Tools qw(run_command lock_file file_read_firstline);
24 use PVE::Cluster qw(cfs_register_file cfs_read_file cfs_write_file cfs_lock_file);
25 use PVE::INotify;
26 use PVE::ProcFSTools;
27 use Time::HiRes qw(gettimeofday);
28
29 my $cpuinfo = PVE::ProcFSTools::read_cpuinfo();
30
31 # Note about locking: we use flock on the config file protect
32 # against concurent actions.
33 # Aditionaly, we have a 'lock' setting in the config file. This
34 # can be set to 'migrate' or 'backup'. Most actions are not
35 # allowed when such lock is set. But you can ignore this kind of
36 # lock with the --skiplock flag.
37
38 cfs_register_file('/qemu-server/', \&parse_vm_config);
39
40 PVE::JSONSchema::register_standard_option('skiplock', {
41 description => "Ignore locks - only root is allowed to use this option.",
42 type => 'boolean',
43 optional => 1,
44 });
45
46 PVE::JSONSchema::register_standard_option('pve-qm-stateuri', {
47 description => "Some command save/restore state from this location.",
48 type => 'string',
49 maxLength => 128,
50 optional => 1,
51 });
52
53 #no warnings 'redefine';
54
55 unless(defined(&_VZSYSCALLS_H_)) {
56 eval 'sub _VZSYSCALLS_H_ () {1;}' unless defined(&_VZSYSCALLS_H_);
57 require 'sys/syscall.ph';
58 if(defined(&__x86_64__)) {
59 eval 'sub __NR_fairsched_vcpus () {499;}' unless defined(&__NR_fairsched_vcpus);
60 eval 'sub __NR_fairsched_mknod () {504;}' unless defined(&__NR_fairsched_mknod);
61 eval 'sub __NR_fairsched_rmnod () {505;}' unless defined(&__NR_fairsched_rmnod);
62 eval 'sub __NR_fairsched_chwt () {506;}' unless defined(&__NR_fairsched_chwt);
63 eval 'sub __NR_fairsched_mvpr () {507;}' unless defined(&__NR_fairsched_mvpr);
64 eval 'sub __NR_fairsched_rate () {508;}' unless defined(&__NR_fairsched_rate);
65 eval 'sub __NR_setluid () {501;}' unless defined(&__NR_setluid);
66 eval 'sub __NR_setublimit () {502;}' unless defined(&__NR_setublimit);
67 }
68 elsif(defined( &__i386__) ) {
69 eval 'sub __NR_fairsched_mknod () {500;}' unless defined(&__NR_fairsched_mknod);
70 eval 'sub __NR_fairsched_rmnod () {501;}' unless defined(&__NR_fairsched_rmnod);
71 eval 'sub __NR_fairsched_chwt () {502;}' unless defined(&__NR_fairsched_chwt);
72 eval 'sub __NR_fairsched_mvpr () {503;}' unless defined(&__NR_fairsched_mvpr);
73 eval 'sub __NR_fairsched_rate () {504;}' unless defined(&__NR_fairsched_rate);
74 eval 'sub __NR_fairsched_vcpus () {505;}' unless defined(&__NR_fairsched_vcpus);
75 eval 'sub __NR_setluid () {511;}' unless defined(&__NR_setluid);
76 eval 'sub __NR_setublimit () {512;}' unless defined(&__NR_setublimit);
77 } else {
78 die("no fairsched syscall for this arch");
79 }
80 require 'asm/ioctl.ph';
81 eval 'sub KVM_GET_API_VERSION () { &_IO(0xAE, 0x);}' unless defined(&KVM_GET_API_VERSION);
82 }
83
84 sub fairsched_mknod {
85 my ($parent, $weight, $desired) = @_;
86
87 return syscall(&__NR_fairsched_mknod, int($parent), int($weight), int($desired));
88 }
89
90 sub fairsched_rmnod {
91 my ($id) = @_;
92
93 return syscall(&__NR_fairsched_rmnod, int($id));
94 }
95
96 sub fairsched_mvpr {
97 my ($pid, $newid) = @_;
98
99 return syscall(&__NR_fairsched_mvpr, int($pid), int($newid));
100 }
101
102 sub fairsched_vcpus {
103 my ($id, $vcpus) = @_;
104
105 return syscall(&__NR_fairsched_vcpus, int($id), int($vcpus));
106 }
107
108 sub fairsched_rate {
109 my ($id, $op, $rate) = @_;
110
111 return syscall(&__NR_fairsched_rate, int($id), int($op), int($rate));
112 }
113
114 use constant FAIRSCHED_SET_RATE => 0;
115 use constant FAIRSCHED_DROP_RATE => 1;
116 use constant FAIRSCHED_GET_RATE => 2;
117
118 sub fairsched_cpulimit {
119 my ($id, $limit) = @_;
120
121 my $cpulim1024 = int($limit * 1024 / 100);
122 my $op = $cpulim1024 ? FAIRSCHED_SET_RATE : FAIRSCHED_DROP_RATE;
123
124 return fairsched_rate($id, $op, $cpulim1024);
125 }
126
127 my $nodename = PVE::INotify::nodename();
128
129 mkdir "/etc/pve/nodes/$nodename";
130 my $confdir = "/etc/pve/nodes/$nodename/qemu-server";
131 mkdir $confdir;
132
133 my $var_run_tmpdir = "/var/run/qemu-server";
134 mkdir $var_run_tmpdir;
135
136 my $lock_dir = "/var/lock/qemu-server";
137 mkdir $lock_dir;
138
139 my $pcisysfs = "/sys/bus/pci";
140
141 my $keymaphash = PVE::Tools::kvmkeymaps();
142
143 my $confdesc = {
144 onboot => {
145 optional => 1,
146 type => 'boolean',
147 description => "Specifies whether a VM will be started during system bootup.",
148 default => 0,
149 },
150 autostart => {
151 optional => 1,
152 type => 'boolean',
153 description => "Automatic restart after crash (currently ignored).",
154 default => 0,
155 },
156 reboot => {
157 optional => 1,
158 type => 'boolean',
159 description => "Allow reboot. If set to '0' the VM exit on reboot.",
160 default => 1,
161 },
162 lock => {
163 optional => 1,
164 type => 'string',
165 description => "Lock/unlock the VM.",
166 enum => [qw(migrate backup)],
167 },
168 cpulimit => {
169 optional => 1,
170 type => 'integer',
171 description => "Limit of CPU usage in per cent. Note if the computer has 2 CPUs, it has total of 200% CPU time. Value '0' indicates no CPU limit.\n\nNOTE: This option is currently ignored.",
172 minimum => 0,
173 default => 0,
174 },
175 cpuunits => {
176 optional => 1,
177 type => 'integer',
178 description => "CPU weight for a VM. Argument is used in the kernel fair scheduler. The larger the number is, the more CPU time this VM gets. Number is relative to weights of all the other running VMs.\n\nNOTE: You can disable fair-scheduler configuration by setting this to 0.",
179 minimum => 0,
180 maximum => 500000,
181 default => 1000,
182 },
183 memory => {
184 optional => 1,
185 type => 'integer',
186 description => "Amount of RAM for the VM in MB. This is the maximum available memory when you use the balloon device.",
187 minimum => 16,
188 default => 512,
189 },
190 balloon => {
191 optional => 1,
192 type => 'integer',
193 description => "Amount of target RAM for the VM in MB.",
194 minimum => 16,
195 },
196 keyboard => {
197 optional => 1,
198 type => 'string',
199 description => "Keybord layout for vnc server. Default is read from the datacenter configuration file.",
200 enum => [ keys %$keymaphash ],
201 default => 'en-us',
202 },
203 name => {
204 optional => 1,
205 type => 'string',
206 description => "Set a name for the VM. Only used on the configuration web interface.",
207 },
208 description => {
209 optional => 1,
210 type => 'string',
211 description => "Description for the VM. Only used on the configuration web interface.",
212 },
213 ostype => {
214 optional => 1,
215 type => 'string',
216 enum => [qw(other wxp w2k w2k3 w2k8 wvista win7 l24 l26)],
217 description => <<EODESC,
218 Used to enable special optimization/features for specific
219 operating systems:
220
221 other => unspecified OS
222 wxp => Microsoft Windows XP
223 w2k => Microsoft Windows 2000
224 w2k3 => Microsoft Windows 2003
225 w2k8 => Microsoft Windows 2008
226 wvista => Microsoft Windows Vista
227 win7 => Microsoft Windows 7
228 l24 => Linux 2.4 Kernel
229 l26 => Linux 2.6/3.X Kernel
230
231 other|l24|l26 ... no special behaviour
232 wxp|w2k|w2k3|w2k8|wvista|win7 ... use --localtime switch
233 EODESC
234 },
235 boot => {
236 optional => 1,
237 type => 'string',
238 description => "Boot on floppy (a), hard disk (c), CD-ROM (d), or network (n).",
239 pattern => '[acdn]{1,4}',
240 default => 'cdn',
241 },
242 bootdisk => {
243 optional => 1,
244 type => 'string', format => 'pve-qm-bootdisk',
245 description => "Enable booting from specified disk.",
246 pattern => '(ide|scsi|virtio)\d+',
247 },
248 smp => {
249 optional => 1,
250 type => 'integer',
251 description => "The number of CPUs. Please use option -sockets instead.",
252 minimum => 1,
253 default => 1,
254 },
255 sockets => {
256 optional => 1,
257 type => 'integer',
258 description => "The number of CPU sockets.",
259 minimum => 1,
260 default => 1,
261 },
262 cores => {
263 optional => 1,
264 type => 'integer',
265 description => "The number of cores per socket.",
266 minimum => 1,
267 default => 1,
268 },
269 acpi => {
270 optional => 1,
271 type => 'boolean',
272 description => "Enable/disable ACPI.",
273 default => 1,
274 },
275 kvm => {
276 optional => 1,
277 type => 'boolean',
278 description => "Enable/disable KVM hardware virtualization.",
279 default => 1,
280 },
281 tdf => {
282 optional => 1,
283 type => 'boolean',
284 description => "Enable/disable time drift fix.",
285 default => 1,
286 },
287 localtime => {
288 optional => 1,
289 type => 'boolean',
290 description => "Set the real time clock to local time. This is enabled by default if ostype indicates a Microsoft OS.",
291 },
292 freeze => {
293 optional => 1,
294 type => 'boolean',
295 description => "Freeze CPU at startup (use 'c' monitor command to start execution).",
296 },
297 vga => {
298 optional => 1,
299 type => 'string',
300 description => "Select VGA type. If you want to use high resolution modes (>= 1280x1024x16) then you should use option 'std' or 'vmware'. Default is 'std' for win7/w2k8, and 'cirrur' for other OS types",
301 enum => [qw(std cirrus vmware)],
302 },
303 watchdog => {
304 optional => 1,
305 type => 'string', format => 'pve-qm-watchdog',
306 typetext => '[[model=]i6300esb|ib700] [,[action=]reset|shutdown|poweroff|pause|debug|none]',
307 description => "Create a virtual hardware watchdog device. Once enabled (by a guest action), the watchdog must be periodically polled by an agent inside the guest or else the guest will be restarted (or execute the action specified)",
308 },
309 startdate => {
310 optional => 1,
311 type => 'string',
312 typetext => "(now | YYYY-MM-DD | YYYY-MM-DDTHH:MM:SS)",
313 description => "Set the initial date of the real time clock. Valid format for date are: 'now' or '2006-06-17T16:01:21' or '2006-06-17'.",
314 pattern => '(now|\d{4}-\d{1,2}-\d{1,2}(T\d{1,2}:\d{1,2}:\d{1,2})?)',
315 default => 'now',
316 },
317 args => {
318 optional => 1,
319 type => 'string',
320 description => <<EODESCR,
321 Note: this option is for experts only. It allows you to pass arbitrary arguments to kvm, for example:
322
323 args: -no-reboot -no-hpet
324 EODESCR
325 },
326 tablet => {
327 optional => 1,
328 type => 'boolean',
329 default => 1,
330 description => "Enable/disable the usb tablet device. This device is usually needed to allow absolute mouse positioning. Else the mouse runs out of sync with normal vnc clients. If you're running lots of console-only guests on one host, you may consider disabling this to save some context switches.",
331 },
332 migrate_speed => {
333 optional => 1,
334 type => 'integer',
335 description => "Set maximum speed (in MB/s) for migrations. Value 0 is no limit.",
336 minimum => 0,
337 default => 0,
338 },
339 migrate_downtime => {
340 optional => 1,
341 type => 'integer',
342 description => "Set maximum tolerated downtime (in seconds) for migrations.",
343 minimum => 0,
344 default => 1,
345 },
346 cdrom => {
347 optional => 1,
348 type => 'string', format => 'pve-qm-drive',
349 typetext => 'volume',
350 description => "This is an alias for option -ide2",
351 },
352 cpu => {
353 optional => 1,
354 description => "Emulated CPU type.",
355 type => 'string',
356 enum => [ qw(486 athlon pentium pentium2 pentium3 coreduo core2duo kvm32 kvm64 qemu32 qemu64 phenom host) ],
357 default => 'qemu64',
358 },
359 };
360
361 # what about other qemu settings ?
362 #cpu => 'string',
363 #machine => 'string',
364 #fda => 'file',
365 #fdb => 'file',
366 #mtdblock => 'file',
367 #sd => 'file',
368 #pflash => 'file',
369 #snapshot => 'bool',
370 #bootp => 'file',
371 ##tftp => 'dir',
372 ##smb => 'dir',
373 #kernel => 'file',
374 #append => 'string',
375 #initrd => 'file',
376 ##soundhw => 'string',
377
378 while (my ($k, $v) = each %$confdesc) {
379 PVE::JSONSchema::register_standard_option("pve-qm-$k", $v);
380 }
381
382 my $MAX_IDE_DISKS = 4;
383 my $MAX_SCSI_DISKS = 14;
384 my $MAX_VIRTIO_DISKS = 6;
385 my $MAX_USB_DEVICES = 5;
386 my $MAX_NETS = 6;
387 my $MAX_UNUSED_DISKS = 8;
388 my $MAX_HOSTPCI_DEVICES = 2;
389 my $MAX_SERIAL_PORTS = 4;
390 my $MAX_PARALLEL_PORTS = 3;
391
392 my $nic_model_list = ['rtl8139', 'ne2k_pci', 'e1000', 'pcnet', 'virtio',
393 'ne2k_isa', 'i82551', 'i82557b', 'i82559er'];
394 my $nic_model_list_txt = join(' ', sort @$nic_model_list);
395
396 # fixme:
397 my $netdesc = {
398 optional => 1,
399 type => 'string', format => 'pve-qm-net',
400 typetext => "MODEL=XX:XX:XX:XX:XX:XX [,bridge=<dev>][,rate=<mbps>]",
401 description => <<EODESCR,
402 Specify network devices.
403
404 MODEL is one of: $nic_model_list_txt
405
406 XX:XX:XX:XX:XX:XX should be an unique MAC address. This is
407 automatically generated if not specified.
408
409 The bridge parameter can be used to automatically add the interface to a bridge device. The Proxmox VE standard bridge is called 'vmbr0'.
410
411 Option 'rate' is used to limit traffic bandwidth from and to this interface. It is specified as floating point number, unit is 'Megabytes per second'.
412
413 If you specify no bridge, we create a kvm 'user' (NATed) network device, which provides DHCP and DNS services. The following addresses are used:
414
415 10.0.2.2 Gateway
416 10.0.2.3 DNS Server
417 10.0.2.4 SMB Server
418
419 The DHCP server assign addresses to the guest starting from 10.0.2.15.
420
421 EODESCR
422 };
423 PVE::JSONSchema::register_standard_option("pve-qm-net", $netdesc);
424
425 for (my $i = 0; $i < $MAX_NETS; $i++) {
426 $confdesc->{"net$i"} = $netdesc;
427 }
428
429 my $drivename_hash;
430
431 my $idedesc = {
432 optional => 1,
433 type => 'string', format => 'pve-qm-drive',
434 typetext => '[volume=]volume,] [,media=cdrom|disk] [,cyls=c,heads=h,secs=s[,trans=t]] [,snapshot=on|off] [,cache=none|writethrough|writeback] [,format=f] [,backup=yes|no] [,aio=native|threads]',
435 description => "Use volume as IDE hard disk or CD-ROM (n is 0 to 3).",
436 };
437 PVE::JSONSchema::register_standard_option("pve-qm-ide", $idedesc);
438
439 my $scsidesc = {
440 optional => 1,
441 type => 'string', format => 'pve-qm-drive',
442 typetext => '[volume=]volume,] [,media=cdrom|disk] [,cyls=c,heads=h,secs=s[,trans=t]] [,snapshot=on|off] [,cache=none|writethrough|writeback] [,format=f] [,backup=yes|no] [,aio=native|threads]',
443 description => "Use volume as SCSI hard disk or CD-ROM (n is 0 to 13).",
444 };
445 PVE::JSONSchema::register_standard_option("pve-qm-scsi", $scsidesc);
446
447 my $virtiodesc = {
448 optional => 1,
449 type => 'string', format => 'pve-qm-drive',
450 typetext => '[volume=]volume,] [,media=cdrom|disk] [,cyls=c,heads=h,secs=s[,trans=t]] [,snapshot=on|off] [,cache=none|writethrough|writeback] [,format=f] [,backup=yes|no] [,aio=native|threads]',
451 description => "Use volume as VIRTIO hard disk (n is 0 to 5).",
452 };
453 PVE::JSONSchema::register_standard_option("pve-qm-virtio", $virtiodesc);
454
455 my $usbdesc = {
456 optional => 1,
457 type => 'string', format => 'pve-qm-usb-device',
458 typetext => 'host=HOSTUSBDEVICE',
459 description => <<EODESCR,
460 Configure an USB device (n is 0 to 4). This can be used to
461 pass-through usb devices to the guest. HOSTUSBDEVICE syntax is:
462
463 'bus-port(.port)*' (decimal numbers) or
464 'vendor_id:product_id' (hexadeciaml numbers)
465
466 You can use the 'lsusb -t' command to list existing usb devices.
467
468 Note: This option allows direct access to host hardware. So it is no longer possible to migrate such machines - use with special care.
469
470 EODESCR
471 };
472 PVE::JSONSchema::register_standard_option("pve-qm-usb", $usbdesc);
473
474 my $hostpcidesc = {
475 optional => 1,
476 type => 'string', format => 'pve-qm-hostpci',
477 typetext => "HOSTPCIDEVICE",
478 description => <<EODESCR,
479 Map host pci devices. HOSTPCIDEVICE syntax is:
480
481 'bus:dev.func' (hexadecimal numbers)
482
483 You can us the 'lspci' command to list existing pci devices.
484
485 Note: This option allows direct access to host hardware. So it is no longer possible to migrate such machines - use with special care.
486
487 Experimental: user reported problems with this option.
488 EODESCR
489 };
490 PVE::JSONSchema::register_standard_option("pve-qm-hostpci", $hostpcidesc);
491
492 my $serialdesc = {
493 optional => 1,
494 type => 'string',
495 pattern => '/dev/ttyS\d+',
496 description => <<EODESCR,
497 Map host serial devices (n is 0 to 3).
498
499 Note: This option allows direct access to host hardware. So it is no longer possible to migrate such machines - use with special care.
500
501 Experimental: user reported problems with this option.
502 EODESCR
503 };
504
505 my $paralleldesc= {
506 optional => 1,
507 type => 'string',
508 pattern => '/dev/parport\d+',
509 description => <<EODESCR,
510 Map host parallel devices (n is 0 to 2).
511
512 Note: This option allows direct access to host hardware. So it is no longer possible to migrate such machines - use with special care.
513
514 Experimental: user reported problems with this option.
515 EODESCR
516 };
517
518 for (my $i = 0; $i < $MAX_PARALLEL_PORTS; $i++) {
519 $confdesc->{"parallel$i"} = $paralleldesc;
520 }
521
522 for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
523 $confdesc->{"serial$i"} = $serialdesc;
524 }
525
526 for (my $i = 0; $i < $MAX_HOSTPCI_DEVICES; $i++) {
527 $confdesc->{"hostpci$i"} = $hostpcidesc;
528 }
529
530 for (my $i = 0; $i < $MAX_IDE_DISKS; $i++) {
531 $drivename_hash->{"ide$i"} = 1;
532 $confdesc->{"ide$i"} = $idedesc;
533 }
534
535 for (my $i = 0; $i < $MAX_SCSI_DISKS; $i++) {
536 $drivename_hash->{"scsi$i"} = 1;
537 $confdesc->{"scsi$i"} = $scsidesc ;
538 }
539
540 for (my $i = 0; $i < $MAX_VIRTIO_DISKS; $i++) {
541 $drivename_hash->{"virtio$i"} = 1;
542 $confdesc->{"virtio$i"} = $virtiodesc;
543 }
544
545 for (my $i = 0; $i < $MAX_USB_DEVICES; $i++) {
546 $confdesc->{"usb$i"} = $usbdesc;
547 }
548
549 my $unuseddesc = {
550 optional => 1,
551 type => 'string', format => 'pve-volume-id',
552 description => "Reference to unused volumes.",
553 };
554
555 for (my $i = 0; $i < $MAX_UNUSED_DISKS; $i++) {
556 $confdesc->{"unused$i"} = $unuseddesc;
557 }
558
559 my $kvm_api_version = 0;
560
561 sub kvm_version {
562
563 return $kvm_api_version if $kvm_api_version;
564
565 my $fh = IO::File->new("</dev/kvm") ||
566 return 0;
567
568 if (my $v = $fh->ioctl(KVM_GET_API_VERSION(), 0)) {
569 $kvm_api_version = $v;
570 }
571
572 $fh->close();
573
574 return $kvm_api_version;
575 }
576
577 my $kvm_user_version;
578
579 sub kvm_user_version {
580
581 return $kvm_user_version if $kvm_user_version;
582
583 $kvm_user_version = 'unknown';
584
585 my $tmp = `kvm -help 2>/dev/null`;
586
587 if ($tmp =~ m/^QEMU( PC)? emulator version (\d+\.\d+\.\d+) /) {
588 $kvm_user_version = $2;
589 }
590
591 return $kvm_user_version;
592
593 }
594
595 my $kernel_has_vhost_net = -c '/dev/vhost-net';
596
597 sub disknames {
598 # order is important - used to autoselect boot disk
599 return ((map { "ide$_" } (0 .. ($MAX_IDE_DISKS - 1))),
600 (map { "scsi$_" } (0 .. ($MAX_SCSI_DISKS - 1))),
601 (map { "virtio$_" } (0 .. ($MAX_VIRTIO_DISKS - 1))));
602 }
603
604 sub valid_drivename {
605 my $dev = shift;
606
607 return defined($drivename_hash->{$dev});
608 }
609
610 sub option_exists {
611 my $key = shift;
612 return defined($confdesc->{$key});
613 }
614
615 sub nic_models {
616 return $nic_model_list;
617 }
618
619 sub os_list_description {
620
621 return {
622 other => 'Other',
623 wxp => 'Windows XP',
624 w2k => 'Windows 2000',
625 w2k3 =>, 'Windows 2003',
626 w2k8 => 'Windows 2008',
627 wvista => 'Windows Vista',
628 win7 => 'Windows 7',
629 l24 => 'Linux 2.4',
630 l26 => 'Linux 2.6',
631 };
632 }
633
634 # a clumsy way to split an argument string into an array,
635 # we simply pass it to the cli (exec call)
636 # fixme: use Text::ParseWords::shellwords() ?
637 sub split_args {
638 my ($str) = @_;
639
640 my $args = [];
641
642 return $args if !$str;
643
644 my $cmd = 'perl -e \'foreach my $a (@ARGV) { print "$a\n"; } \' -- ' . $str;
645
646 eval {
647 run_command($cmd, outfunc => sub {
648 my $data = shift;
649 push @$args, $data;
650 });
651 };
652
653 my $err = $@;
654
655 die "unable to parse args: $str\n" if $err;
656
657 return $args;
658 }
659
660 sub disk_devive_info {
661 my $dev = shift;
662
663 die "unknown disk device format '$dev'" if $dev !~ m/^(ide|scsi|virtio)(\d+)$/;
664
665 my $bus = $1;
666 my $index = $2;
667 my $maxdev = 1024;
668
669 if ($bus eq 'ide') {
670 $maxdev = 2;
671 } elsif ($bus eq 'scsi') {
672 $maxdev = 7;
673 }
674
675 my $controller = int($index / $maxdev);
676 my $unit = $index % $maxdev;
677
678
679 return { bus => $bus, desc => uc($bus) . " $controller:$unit",
680 controller => $controller, unit => $unit, index => $index };
681
682 }
683
684 sub qemu_drive_name {
685 my ($dev, $media) = @_;
686
687 my $info = disk_devive_info($dev);
688 my $mediastr = '';
689
690 if (($info->{bus} eq 'ide') || ($info->{bus} eq 'scsi')) {
691 $mediastr = ($media eq 'cdrom') ? "-cd" : "-hd";
692 return sprintf("%s%i%s%i", $info->{bus}, $info->{controller},
693 $mediastr, $info->{unit});
694 } else {
695 return sprintf("%s%i", $info->{bus}, $info->{index});
696 }
697 }
698
699 my $cdrom_path;
700
701 sub get_cdrom_path {
702
703 return $cdrom_path if $cdrom_path;
704
705 return $cdrom_path = "/dev/cdrom" if -l "/dev/cdrom";
706 return $cdrom_path = "/dev/cdrom1" if -l "/dev/cdrom1";
707 return $cdrom_path = "/dev/cdrom2" if -l "/dev/cdrom2";
708 }
709
710 sub get_iso_path {
711 my ($storecfg, $vmid, $cdrom) = @_;
712
713 if ($cdrom eq 'cdrom') {
714 return get_cdrom_path();
715 } elsif ($cdrom eq 'none') {
716 return '';
717 } elsif ($cdrom =~ m|^/|) {
718 return $cdrom;
719 } else {
720 return PVE::Storage::path($storecfg, $cdrom);
721 }
722 }
723
724 # try to convert old style file names to volume IDs
725 sub filename_to_volume_id {
726 my ($vmid, $file, $media) = @_;
727
728 if (!($file eq 'none' || $file eq 'cdrom' ||
729 $file =~ m|^/dev/.+| || $file =~ m/^([^:]+):(.+)$/)) {
730
731 return undef if $file =~ m|/|;
732
733 if ($media && $media eq 'cdrom') {
734 $file = "local:iso/$file";
735 } else {
736 $file = "local:$vmid/$file";
737 }
738 }
739
740 return $file;
741 }
742
743 sub verify_media_type {
744 my ($opt, $vtype, $media) = @_;
745
746 return if !$media;
747
748 my $etype;
749 if ($media eq 'disk') {
750 $etype = 'image';
751 } elsif ($media eq 'cdrom') {
752 $etype = 'iso';
753 } else {
754 die "internal error";
755 }
756
757 return if ($vtype eq $etype);
758
759 raise_param_exc({ $opt => "unexpected media type ($vtype != $etype)" });
760 }
761
762 sub cleanup_drive_path {
763 my ($opt, $storecfg, $drive) = @_;
764
765 # try to convert filesystem paths to volume IDs
766
767 if (($drive->{file} !~ m/^(cdrom|none)$/) &&
768 ($drive->{file} !~ m|^/dev/.+|) &&
769 ($drive->{file} !~ m/^([^:]+):(.+)$/) &&
770 ($drive->{file} !~ m/^\d+$/)) {
771 my ($vtype, $volid) = PVE::Storage::path_to_volume_id($storecfg, $drive->{file});
772 raise_param_exc({ $opt => "unable to associate path '$drive->{file}' to any storage"}) if !$vtype;
773 $drive->{media} = 'cdrom' if !$drive->{media} && $vtype eq 'iso';
774 verify_media_type($opt, $vtype, $drive->{media});
775 $drive->{file} = $volid;
776 }
777
778 $drive->{media} = 'cdrom' if !$drive->{media} && $drive->{file} =~ m/^(cdrom|none)$/;
779 }
780
781 sub create_conf_nolock {
782 my ($vmid, $settings) = @_;
783
784 my $filename = config_file($vmid);
785
786 die "configuration file '$filename' already exists\n" if -f $filename;
787
788 my $defaults = load_defaults();
789
790 $settings->{name} = "vm$vmid" if !$settings->{name};
791 $settings->{memory} = $defaults->{memory} if !$settings->{memory};
792
793 my $data = '';
794 foreach my $opt (keys %$settings) {
795 next if !$confdesc->{$opt};
796
797 my $value = $settings->{$opt};
798 next if !$value;
799
800 $data .= "$opt: $value\n";
801 }
802
803 PVE::Tools::file_set_contents($filename, $data);
804 }
805
806 # ideX = [volume=]volume-id[,media=d][,cyls=c,heads=h,secs=s[,trans=t]]
807 # [,snapshot=on|off][,cache=on|off][,format=f][,backup=yes|no]
808 # [,aio=native|threads]
809
810 sub parse_drive {
811 my ($key, $data) = @_;
812
813 my $res = {};
814
815 # $key may be undefined - used to verify JSON parameters
816 if (!defined($key)) {
817 $res->{interface} = 'unknown'; # should not harm when used to verify parameters
818 $res->{index} = 0;
819 } elsif ($key =~ m/^([^\d]+)(\d+)$/) {
820 $res->{interface} = $1;
821 $res->{index} = $2;
822 } else {
823 return undef;
824 }
825
826 foreach my $p (split (/,/, $data)) {
827 next if $p =~ m/^\s*$/;
828
829 if ($p =~ m/^(file|volume|cyls|heads|secs|trans|media|snapshot|cache|format|rerror|werror|backup|aio)=(.+)$/) {
830 my ($k, $v) = ($1, $2);
831
832 $k = 'file' if $k eq 'volume';
833
834 return undef if defined $res->{$k};
835
836 $res->{$k} = $v;
837 } else {
838 if (!$res->{file} && $p !~ m/=/) {
839 $res->{file} = $p;
840 } else {
841 return undef;
842 }
843 }
844 }
845
846 return undef if !$res->{file};
847
848 return undef if $res->{cache} &&
849 $res->{cache} !~ m/^(off|none|writethrough|writeback)$/;
850 return undef if $res->{snapshot} && $res->{snapshot} !~ m/^(on|off)$/;
851 return undef if $res->{cyls} && $res->{cyls} !~ m/^\d+$/;
852 return undef if $res->{heads} && $res->{heads} !~ m/^\d+$/;
853 return undef if $res->{secs} && $res->{secs} !~ m/^\d+$/;
854 return undef if $res->{media} && $res->{media} !~ m/^(disk|cdrom)$/;
855 return undef if $res->{trans} && $res->{trans} !~ m/^(none|lba|auto)$/;
856 return undef if $res->{format} && $res->{format} !~ m/^(raw|cow|qcow|qcow2|vmdk|cloop)$/;
857 return undef if $res->{rerror} && $res->{rerror} !~ m/^(ignore|report|stop)$/;
858 return undef if $res->{werror} && $res->{werror} !~ m/^(enospc|ignore|report|stop)$/;
859 return undef if $res->{backup} && $res->{backup} !~ m/^(yes|no)$/;
860 return undef if $res->{aio} && $res->{aio} !~ m/^(native|threads)$/;
861
862 if ($res->{media} && ($res->{media} eq 'cdrom')) {
863 return undef if $res->{snapshot} || $res->{trans} || $res->{format};
864 return undef if $res->{heads} || $res->{secs} || $res->{cyls};
865 return undef if $res->{interface} eq 'virtio';
866 }
867
868 # rerror does not work with scsi drives
869 if ($res->{rerror}) {
870 return undef if $res->{interface} eq 'scsi';
871 }
872
873 return $res;
874 }
875
876 my @qemu_drive_options = qw(heads secs cyls trans media format cache snapshot rerror werror aio);
877
878 sub print_drive {
879 my ($vmid, $drive) = @_;
880
881 my $opts = '';
882 foreach my $o (@qemu_drive_options, 'backup') {
883 $opts .= ",$o=$drive->{$o}" if $drive->{$o};
884 }
885
886 return "$drive->{file}$opts";
887 }
888
889 sub print_drivedevice_full {
890 my ($storecfg, $vmid, $drive) = @_;
891
892 my $device = '';
893 my $maxdev = 0;
894
895 if ($drive->{interface} eq 'virtio') {
896 my $pciaddr = print_pci_addr("$drive->{interface}$drive->{index}");
897 $device = "virtio-blk-pci,drive=drive-$drive->{interface}$drive->{index},id=device-$drive->{interface}$drive->{index}$pciaddr";
898 }
899
900 elsif ($drive->{interface} eq 'scsi') {
901
902 $maxdev = 7;
903 my $controller = int($drive->{index} / $maxdev);
904 my $unit = $drive->{index} % $maxdev;
905
906 $device = "scsi-disk,bus=scsi$controller.0,scsi-id=$unit,drive=drive-$drive->{interface}$drive->{index},id=device-$drive->{interface}$drive->{index}";
907 }
908
909 elsif ($drive->{interface} eq 'ide'){
910
911 $maxdev = 2;
912 my $controller = int($drive->{index} / $maxdev);
913 my $unit = $drive->{index} % $maxdev;
914
915 $device = "ide-drive,bus=ide.$controller,unit=$unit,drive=drive-$drive->{interface}$drive->{index},id=device-$drive->{interface}$drive->{index}";
916 }
917
918 if ($drive->{interface} eq 'usb'){
919 # -device ide-drive,bus=ide.1,unit=0,drive=drive-ide0-1-0,id=ide0-1-0
920 }
921
922 return $device;
923 }
924
925 sub print_drive_full {
926 my ($storecfg, $vmid, $drive) = @_;
927
928 my $opts = '';
929 foreach my $o (@qemu_drive_options) {
930 $opts .= ",$o=$drive->{$o}" if $drive->{$o};
931 }
932
933 # use linux-aio by default (qemu default is threads)
934 $opts .= ",aio=native" if !$drive->{aio};
935
936 my $path;
937 my $volid = $drive->{file};
938 if (drive_is_cdrom($drive)) {
939 $path = get_iso_path($storecfg, $vmid, $volid);
940 } else {
941 if ($volid =~ m|^/|) {
942 $path = $volid;
943 } else {
944 $path = PVE::Storage::path($storecfg, $volid);
945 }
946 }
947
948 my $pathinfo = $path ? "file=$path," : '';
949
950 return "${pathinfo}if=none,id=drive-$drive->{interface}$drive->{index}$opts";
951 }
952
953
954 sub drive_is_cdrom {
955 my ($drive) = @_;
956
957 return $drive && $drive->{media} && ($drive->{media} eq 'cdrom');
958
959 }
960
961 sub parse_hostpci {
962 my ($value) = @_;
963
964 return undef if !$value;
965
966 my $res = {};
967
968 if ($value =~ m/^[a-f0-9]{2}:[a-f0-9]{2}\.[a-f0-9]$/) {
969 $res->{pciid} = $value;
970 } else {
971 return undef;
972 }
973
974 return $res;
975 }
976
977 # netX: e1000=XX:XX:XX:XX:XX:XX,bridge=vmbr0,rate=<mbps>
978 sub parse_net {
979 my ($data) = @_;
980
981 my $res = {};
982
983 foreach my $kvp (split(/,/, $data)) {
984
985 if ($kvp =~ m/^(ne2k_pci|e1000|rtl8139|pcnet|virtio|ne2k_isa|i82551|i82557b|i82559er)(=([0-9a-f]{2}(:[0-9a-f]{2}){5}))?$/i) {
986 my $model = lc($1);
987 my $mac = uc($3) || random_ether_addr();
988 $res->{model} = $model;
989 $res->{macaddr} = $mac;
990 } elsif ($kvp =~ m/^bridge=(\S+)$/) {
991 $res->{bridge} = $1;
992 } elsif ($kvp =~ m/^rate=(\d+(\.\d+)?)$/) {
993 $res->{rate} = $1;
994 } else {
995 return undef;
996 }
997
998 }
999
1000 return undef if !$res->{model};
1001
1002 return $res;
1003 }
1004
1005 sub print_net {
1006 my $net = shift;
1007
1008 my $res = "$net->{model}";
1009 $res .= "=$net->{macaddr}" if $net->{macaddr};
1010 $res .= ",bridge=$net->{bridge}" if $net->{bridge};
1011 $res .= ",rate=$net->{rate}" if $net->{rate};
1012
1013 return $res;
1014 }
1015
1016 sub add_random_macs {
1017 my ($settings) = @_;
1018
1019 foreach my $opt (keys %$settings) {
1020 next if $opt !~ m/^net(\d+)$/;
1021 my $net = parse_net($settings->{$opt});
1022 next if !$net;
1023 $settings->{$opt} = print_net($net);
1024 }
1025 }
1026
1027 sub add_unused_volume {
1028 my ($config, $res, $volid) = @_;
1029
1030 my $key;
1031 for (my $ind = $MAX_UNUSED_DISKS - 1; $ind >= 0; $ind--) {
1032 my $test = "unused$ind";
1033 if (my $vid = $config->{$test}) {
1034 return if $vid eq $volid; # do not add duplicates
1035 } else {
1036 $key = $test;
1037 }
1038 }
1039
1040 die "To many unused volume - please delete them first.\n" if !$key;
1041
1042 $res->{$key} = $volid;
1043 }
1044
1045 # fixme: remove all thos $noerr parameters?
1046
1047 PVE::JSONSchema::register_format('pve-qm-bootdisk', \&verify_bootdisk);
1048 sub verify_bootdisk {
1049 my ($value, $noerr) = @_;
1050
1051 return $value if valid_drivename($value);
1052
1053 return undef if $noerr;
1054
1055 die "invalid boot disk '$value'\n";
1056 }
1057
1058 PVE::JSONSchema::register_format('pve-qm-net', \&verify_net);
1059 sub verify_net {
1060 my ($value, $noerr) = @_;
1061
1062 return $value if parse_net($value);
1063
1064 return undef if $noerr;
1065
1066 die "unable to parse network options\n";
1067 }
1068
1069 PVE::JSONSchema::register_format('pve-qm-drive', \&verify_drive);
1070 sub verify_drive {
1071 my ($value, $noerr) = @_;
1072
1073 return $value if parse_drive(undef, $value);
1074
1075 return undef if $noerr;
1076
1077 die "unable to parse drive options\n";
1078 }
1079
1080 PVE::JSONSchema::register_format('pve-qm-hostpci', \&verify_hostpci);
1081 sub verify_hostpci {
1082 my ($value, $noerr) = @_;
1083
1084 return $value if parse_hostpci($value);
1085
1086 return undef if $noerr;
1087
1088 die "unable to parse pci id\n";
1089 }
1090
1091 PVE::JSONSchema::register_format('pve-qm-watchdog', \&verify_watchdog);
1092 sub verify_watchdog {
1093 my ($value, $noerr) = @_;
1094
1095 return $value if parse_watchdog($value);
1096
1097 return undef if $noerr;
1098
1099 die "unable to parse watchdog options\n";
1100 }
1101
1102 sub parse_watchdog {
1103 my ($value) = @_;
1104
1105 return undef if !$value;
1106
1107 my $res = {};
1108
1109 foreach my $p (split(/,/, $value)) {
1110 next if $p =~ m/^\s*$/;
1111
1112 if ($p =~ m/^(model=)?(i6300esb|ib700)$/) {
1113 $res->{model} = $2;
1114 } elsif ($p =~ m/^(action=)?(reset|shutdown|poweroff|pause|debug|none)$/) {
1115 $res->{action} = $2;
1116 } else {
1117 return undef;
1118 }
1119 }
1120
1121 return $res;
1122 }
1123
1124 sub parse_usb_device {
1125 my ($value) = @_;
1126
1127 return undef if !$value;
1128
1129 my @dl = split(/,/, $value);
1130 my $found;
1131
1132 my $res = {};
1133 foreach my $v (@dl) {
1134 if ($v =~ m/^host=([0-9A-Fa-f]{4}):([0-9A-Fa-f]{4})$/) {
1135 $found = 1;
1136 $res->{vendorid} = $1;
1137 $res->{productid} = $2;
1138 } elsif ($v =~ m/^host=(\d+)\-(\d+(\.\d+)*)$/) {
1139 $found = 1;
1140 $res->{hostbus} = $1;
1141 $res->{hostport} = $2;
1142 } else {
1143 return undef;
1144 }
1145 }
1146 return undef if !$found;
1147
1148 return $res;
1149 }
1150
1151 PVE::JSONSchema::register_format('pve-qm-usb-device', \&verify_usb_device);
1152 sub verify_usb_device {
1153 my ($value, $noerr) = @_;
1154
1155 return $value if parse_usb_device($value);
1156
1157 return undef if $noerr;
1158
1159 die "unable to parse usb device\n";
1160 }
1161
1162 # add JSON properties for create and set function
1163 sub json_config_properties {
1164 my $prop = shift;
1165
1166 foreach my $opt (keys %$confdesc) {
1167 $prop->{$opt} = $confdesc->{$opt};
1168 }
1169
1170 return $prop;
1171 }
1172
1173 sub check_type {
1174 my ($key, $value) = @_;
1175
1176 die "unknown setting '$key'\n" if !$confdesc->{$key};
1177
1178 my $type = $confdesc->{$key}->{type};
1179
1180 if (!defined($value)) {
1181 die "got undefined value\n";
1182 }
1183
1184 if ($value =~ m/[\n\r]/) {
1185 die "property contains a line feed\n";
1186 }
1187
1188 if ($type eq 'boolean') {
1189 return 1 if ($value eq '1') || ($value =~ m/^(on|yes|true)$/i);
1190 return 0 if ($value eq '0') || ($value =~ m/^(off|no|false)$/i);
1191 die "type check ('boolean') failed - got '$value'\n";
1192 } elsif ($type eq 'integer') {
1193 return int($1) if $value =~ m/^(\d+)$/;
1194 die "type check ('integer') failed - got '$value'\n";
1195 } elsif ($type eq 'string') {
1196 if (my $fmt = $confdesc->{$key}->{format}) {
1197 if ($fmt eq 'pve-qm-drive') {
1198 # special case - we need to pass $key to parse_drive()
1199 my $drive = parse_drive($key, $value);
1200 return $value if $drive;
1201 die "unable to parse drive options\n";
1202 }
1203 PVE::JSONSchema::check_format($fmt, $value);
1204 return $value;
1205 }
1206 $value =~ s/^\"(.*)\"$/$1/;
1207 return $value;
1208 } else {
1209 die "internal error"
1210 }
1211 }
1212
1213 sub lock_config {
1214 my ($vmid, $code, @param) = @_;
1215
1216 my $filename = config_file_lock($vmid);
1217
1218 lock_file($filename, 10, $code, @param);
1219
1220 die $@ if $@;
1221 }
1222
1223 sub cfs_config_path {
1224 my ($vmid, $node) = @_;
1225
1226 $node = $nodename if !$node;
1227 return "nodes/$node/qemu-server/$vmid.conf";
1228 }
1229
1230 sub check_iommu_support{
1231 #fixme : need to check IOMMU support
1232 #http://www.linux-kvm.org/page/How_to_assign_devices_with_VT-d_in_KVM
1233
1234 my $iommu=1;
1235 return $iommu;
1236
1237 }
1238
1239 sub config_file {
1240 my ($vmid, $node) = @_;
1241
1242 my $cfspath = cfs_config_path($vmid, $node);
1243 return "/etc/pve/$cfspath";
1244 }
1245
1246 sub config_file_lock {
1247 my ($vmid) = @_;
1248
1249 return "$lock_dir/lock-$vmid.conf";
1250 }
1251
1252 sub touch_config {
1253 my ($vmid) = @_;
1254
1255 my $conf = config_file($vmid);
1256 utime undef, undef, $conf;
1257 }
1258
1259 sub create_disks {
1260 my ($storecfg, $vmid, $settings) = @_;
1261
1262 my $vollist = [];
1263
1264 eval {
1265 foreach_drive($settings, sub {
1266 my ($ds, $disk) = @_;
1267
1268 return if drive_is_cdrom($disk);
1269
1270 my $file = $disk->{file};
1271
1272 if ($file =~ m/^(([^:\s]+):)?(\d+(\.\d+)?)$/) {
1273 my $storeid = $2 || 'local';
1274 my $size = $3;
1275 my $defformat = PVE::Storage::storage_default_format($storecfg, $storeid);
1276 my $fmt = $disk->{format} || $defformat;
1277 syslog('info', "VM $vmid creating new disk - size is $size GB");
1278
1279 my $volid = PVE::Storage::vdisk_alloc($storecfg, $storeid, $vmid,
1280 $fmt, undef, $size*1024*1024);
1281
1282 $disk->{file} = $volid;
1283 delete $disk->{format}; # no longer needed
1284 push @$vollist, $volid;
1285 $settings->{$ds} = PVE::QemuServer::print_drive($vmid, $disk);
1286 } else {
1287 my $path;
1288 if ($disk->{file} =~ m|^/dev/.+|) {
1289 $path = $disk->{file};
1290 } else {
1291 $path = PVE::Storage::path($storecfg, $disk->{file});
1292 }
1293 if (!(-f $path || -b $path)) {
1294 die "image '$path' does not exists\n";
1295 }
1296 }
1297 });
1298 };
1299
1300 my $err = $@;
1301
1302 if ($err) {
1303 syslog('err', "VM $vmid creating disks failed");
1304 foreach my $volid (@$vollist) {
1305 eval { PVE::Storage::vdisk_free($storecfg, $volid); };
1306 warn $@ if $@;
1307 }
1308 die $err;
1309 }
1310
1311 return $vollist;
1312 }
1313
1314 sub unlink_image {
1315 my ($storecfg, $vmid, $volid) = @_;
1316
1317 die "reject to unlink absolute path '$volid'"
1318 if $volid =~ m|^/|;
1319
1320 my ($path, $owner) = PVE::Storage::path($storecfg, $volid);
1321
1322 die "reject to unlink '$volid' - not owned by this VM"
1323 if !$owner || ($owner != $vmid);
1324
1325 syslog('info', "VM $vmid deleting volume '$volid'");
1326
1327 PVE::Storage::vdisk_free($storecfg, $volid);
1328
1329 touch_config($vmid);
1330 }
1331
1332 sub destroy_vm {
1333 my ($storecfg, $vmid) = @_;
1334
1335 my $conffile = config_file($vmid);
1336
1337 my $conf = load_config($vmid);
1338
1339 check_lock($conf);
1340
1341 # only remove disks owned by this VM
1342 foreach_drive($conf, sub {
1343 my ($ds, $drive) = @_;
1344
1345 return if drive_is_cdrom($drive);
1346
1347 my $volid = $drive->{file};
1348 next if !$volid || $volid =~ m|^/|;
1349
1350 my ($path, $owner) = PVE::Storage::path($storecfg, $volid);
1351 next if !$path || !$owner || ($owner != $vmid);
1352
1353 PVE::Storage::vdisk_free($storecfg, $volid);
1354 });
1355
1356 unlink $conffile;
1357
1358 # also remove unused disk
1359 eval {
1360 my $dl = PVE::Storage::vdisk_list($storecfg, undef, $vmid);
1361
1362 eval {
1363 PVE::Storage::foreach_volid($dl, sub {
1364 my ($volid, $sid, $volname, $d) = @_;
1365 PVE::Storage::vdisk_free($storecfg, $volid);
1366 });
1367 };
1368 warn $@ if $@;
1369
1370 };
1371 warn $@ if $@;
1372 }
1373
1374 # fixme: remove?
1375 sub load_diskinfo_old {
1376 my ($storecfg, $vmid, $conf) = @_;
1377
1378 my $info = {};
1379 my $res = {};
1380 my $vollist;
1381
1382 foreach_drive($conf, sub {
1383 my ($ds, $di) = @_;
1384
1385 $res->{$ds} = $di;
1386
1387 return if drive_is_cdrom($di);
1388
1389 if ($di->{file} =~ m|^/dev/.+|) {
1390 $info->{$di->{file}}->{size} = PVE::Storage::file_size_info($di->{file});
1391 } else {
1392 push @$vollist, $di->{file};
1393 }
1394 });
1395
1396 eval {
1397 my $dl = PVE::Storage::vdisk_list($storecfg, undef, $vmid, $vollist);
1398
1399 PVE::Storage::foreach_volid($dl, sub {
1400 my ($volid, $sid, $volname, $d) = @_;
1401 $info->{$volid} = $d;
1402 });
1403 };
1404 warn $@ if $@;
1405
1406 foreach my $ds (keys %$res) {
1407 my $di = $res->{$ds};
1408
1409 $res->{$ds}->{disksize} = $info->{$di->{file}} ?
1410 $info->{$di->{file}}->{size} / (1024*1024) : 0;
1411 }
1412
1413 return $res;
1414 }
1415
1416 sub load_config {
1417 my ($vmid) = @_;
1418
1419 my $cfspath = cfs_config_path($vmid);
1420
1421 my $conf = PVE::Cluster::cfs_read_file($cfspath);
1422
1423 die "no such VM ('$vmid')\n" if !defined($conf);
1424
1425 return $conf;
1426 }
1427
1428 sub parse_vm_config {
1429 my ($filename, $raw) = @_;
1430
1431 return undef if !defined($raw);
1432
1433 my $res = {
1434 digest => Digest::SHA1::sha1_hex($raw),
1435 };
1436
1437 $filename =~ m|/qemu-server/(\d+)\.conf$|
1438 || die "got strange filename '$filename'";
1439
1440 my $vmid = $1;
1441
1442 while ($raw && $raw =~ s/^(.*?)(\n|$)//) {
1443 my $line = $1;
1444
1445 next if $line =~ m/^\#/;
1446
1447 next if $line =~ m/^\s*$/;
1448
1449 if ($line =~ m/^(description):\s*(.*\S)\s*$/) {
1450 my $key = $1;
1451 my $value = PVE::Tools::decode_text($2);
1452 $res->{$key} = $value;
1453 } elsif ($line =~ m/^(args):\s*(.*\S)\s*$/) {
1454 my $key = $1;
1455 my $value = $2;
1456 $res->{$key} = $value;
1457 } elsif ($line =~ m/^([a-z][a-z_]*\d*):\s*(\S+)\s*$/) {
1458 my $key = $1;
1459 my $value = $2;
1460 eval { $value = check_type($key, $value); };
1461 if ($@) {
1462 warn "vm $vmid - unable to parse value of '$key' - $@";
1463 } else {
1464 my $fmt = $confdesc->{$key}->{format};
1465 if ($fmt && $fmt eq 'pve-qm-drive') {
1466 my $v = parse_drive($key, $value);
1467 if (my $volid = filename_to_volume_id($vmid, $v->{file}, $v->{media})) {
1468 $v->{file} = $volid;
1469 $value = print_drive($vmid, $v);
1470 } else {
1471 warn "vm $vmid - unable to parse value of '$key'\n";
1472 next;
1473 }
1474 }
1475
1476 if ($key eq 'cdrom') {
1477 $res->{ide2} = $value;
1478 } else {
1479 $res->{$key} = $value;
1480 }
1481 }
1482 }
1483 }
1484
1485 # convert old smp to sockets
1486 if ($res->{smp} && !$res->{sockets}) {
1487 $res->{sockets} = $res->{smp};
1488 }
1489 delete $res->{smp};
1490
1491 return $res;
1492 }
1493
1494 sub change_config {
1495 my ($vmid, $settings, $unset, $skiplock) = @_;
1496
1497 lock_config($vmid, &change_config_nolock, $settings, $unset, $skiplock);
1498 }
1499
1500 sub change_config_nolock {
1501 my ($vmid, $settings, $unset, $skiplock) = @_;
1502
1503 my $res = {};
1504
1505 $unset->{ide2} = $unset->{cdrom} if $unset->{cdrom};
1506
1507 check_lock($settings) if !$skiplock;
1508
1509 # we do not use 'smp' any longer
1510 if ($settings->{sockets}) {
1511 $unset->{smp} = 1;
1512 } elsif ($settings->{smp}) {
1513 $settings->{sockets} = $settings->{smp};
1514 $unset->{smp} = 1;
1515 }
1516
1517 my $new_volids = {};
1518
1519 foreach my $key (keys %$settings) {
1520 next if $key eq 'digest';
1521 my $value = $settings->{$key};
1522 if ($key eq 'description') {
1523 $value = PVE::Tools::encode_text($value);
1524 }
1525 eval { $value = check_type($key, $value); };
1526 die "unable to parse value of '$key' - $@" if $@;
1527 if ($key eq 'cdrom') {
1528 $res->{ide2} = $value;
1529 } else {
1530 $res->{$key} = $value;
1531 }
1532 if (valid_drivename($key)) {
1533 my $drive = PVE::QemuServer::parse_drive($key, $value);
1534 $new_volids->{$drive->{file}} = 1 if $drive && $drive->{file};
1535 }
1536 }
1537
1538 my $filename = config_file($vmid);
1539 my $tmpfn = "$filename.$$.tmp";
1540
1541 my $fh = new IO::File($filename, "r") ||
1542 die "unable to read config for VM $vmid\n";
1543
1544 my $werror = "unable to write config for VM $vmid\n";
1545
1546 my $out = new IO::File($tmpfn, "w") || die $werror;
1547
1548 eval {
1549
1550 my $done;
1551
1552 while (my $line = <$fh>) {
1553
1554 if (($line =~ m/^\#/) || ($line =~ m/^\s*$/)) {
1555 die $werror unless print $out $line;
1556 next;
1557 }
1558
1559 if ($line =~ m/^([a-z][a-z_]*\d*):\s*(.*\S)\s*$/) {
1560 my $key = $1;
1561 my $value = $2;
1562
1563 # remove 'unusedX' settings if we re-add a volume
1564 next if $key =~ m/^unused/ && $new_volids->{$value};
1565
1566 # convert 'smp' to 'sockets'
1567 $key = 'sockets' if $key eq 'smp';
1568
1569 next if $done->{$key};
1570 $done->{$key} = 1;
1571
1572 if (defined($res->{$key})) {
1573 $value = $res->{$key};
1574 delete $res->{$key};
1575 }
1576 if (!defined($unset->{$key})) {
1577 die $werror unless print $out "$key: $value\n";
1578 }
1579
1580 next;
1581 }
1582
1583 die "unable to parse config file: $line\n";
1584 }
1585
1586 foreach my $key (keys %$res) {
1587
1588 if (!defined($unset->{$key})) {
1589 die $werror unless print $out "$key: $res->{$key}\n";
1590 }
1591 }
1592 };
1593
1594 my $err = $@;
1595
1596 $fh->close();
1597
1598 if ($err) {
1599 $out->close();
1600 unlink $tmpfn;
1601 die $err;
1602 }
1603
1604 if (!$out->close()) {
1605 $err = "close failed - $!\n";
1606 unlink $tmpfn;
1607 die $err;
1608 }
1609
1610 if (!rename($tmpfn, $filename)) {
1611 $err = "rename failed - $!\n";
1612 unlink $tmpfn;
1613 die $err;
1614 }
1615 }
1616
1617 sub load_defaults {
1618
1619 my $res = {};
1620
1621 # we use static defaults from our JSON schema configuration
1622 foreach my $key (keys %$confdesc) {
1623 if (defined(my $default = $confdesc->{$key}->{default})) {
1624 $res->{$key} = $default;
1625 }
1626 }
1627
1628 my $conf = PVE::Cluster::cfs_read_file('datacenter.cfg');
1629 $res->{keyboard} = $conf->{keyboard} if $conf->{keyboard};
1630
1631 return $res;
1632 }
1633
1634 sub config_list {
1635 my $vmlist = PVE::Cluster::get_vmlist();
1636 my $res = {};
1637 return $res if !$vmlist || !$vmlist->{ids};
1638 my $ids = $vmlist->{ids};
1639
1640 foreach my $vmid (keys %$ids) {
1641 my $d = $ids->{$vmid};
1642 next if !$d->{node} || $d->{node} ne $nodename;
1643 next if !$d->{type} || $d->{type} ne 'qemu';
1644 $res->{$vmid}->{exists} = 1;
1645 }
1646 return $res;
1647 }
1648
1649 # test if VM uses local resources (to prevent migration)
1650 sub check_local_resources {
1651 my ($conf, $noerr) = @_;
1652
1653 my $loc_res = 0;
1654
1655 $loc_res = 1 if $conf->{hostusb}; # old syntax
1656 $loc_res = 1 if $conf->{hostpci}; # old syntax
1657
1658 foreach my $k (keys %$conf) {
1659 $loc_res = 1 if $k =~ m/^(usb|hostpci|serial|parallel)\d+$/;
1660 }
1661
1662 die "VM uses local resources\n" if $loc_res && !$noerr;
1663
1664 return $loc_res;
1665 }
1666
1667 sub check_lock {
1668 my ($conf) = @_;
1669
1670 die "VM is locked ($conf->{lock})\n" if $conf->{lock};
1671 }
1672
1673 sub check_cmdline {
1674 my ($pidfile, $pid) = @_;
1675
1676 my $fh = IO::File->new("/proc/$pid/cmdline", "r");
1677 if (defined($fh)) {
1678 my $line = <$fh>;
1679 $fh->close;
1680 return undef if !$line;
1681 my @param = split(/\0/, $line);
1682
1683 my $cmd = $param[0];
1684 return if !$cmd || ($cmd !~ m|kvm$|);
1685
1686 for (my $i = 0; $i < scalar (@param); $i++) {
1687 my $p = $param[$i];
1688 next if !$p;
1689 if (($p eq '-pidfile') || ($p eq '--pidfile')) {
1690 my $p = $param[$i+1];
1691 return 1 if $p && ($p eq $pidfile);
1692 return undef;
1693 }
1694 }
1695 }
1696 return undef;
1697 }
1698
1699 sub check_running {
1700 my ($vmid, $nocheck) = @_;
1701
1702 my $filename = config_file($vmid);
1703
1704 die "unable to find configuration file for VM $vmid - no such machine\n"
1705 if !$nocheck && ! -f $filename;
1706
1707 my $pidfile = pidfile_name($vmid);
1708
1709 if (my $fd = IO::File->new("<$pidfile")) {
1710 my $st = stat($fd);
1711 my $line = <$fd>;
1712 close($fd);
1713
1714 my $mtime = $st->mtime;
1715 if ($mtime > time()) {
1716 warn "file '$filename' modified in future\n";
1717 }
1718
1719 if ($line =~ m/^(\d+)$/) {
1720 my $pid = $1;
1721 if (check_cmdline($pidfile, $pid)) {
1722 if (my $pinfo = PVE::ProcFSTools::check_process_running($pid)) {
1723 return $pid;
1724 }
1725 }
1726 }
1727 }
1728
1729 return undef;
1730 }
1731
1732 sub vzlist {
1733
1734 my $vzlist = config_list();
1735
1736 my $fd = IO::Dir->new($var_run_tmpdir) || return $vzlist;
1737
1738 while (defined(my $de = $fd->read)) {
1739 next if $de !~ m/^(\d+)\.pid$/;
1740 my $vmid = $1;
1741 next if !defined($vzlist->{$vmid});
1742 if (my $pid = check_running($vmid)) {
1743 $vzlist->{$vmid}->{pid} = $pid;
1744 }
1745 }
1746
1747 return $vzlist;
1748 }
1749
1750 my $storage_timeout_hash = {};
1751
1752 sub disksize {
1753 my ($storecfg, $conf) = @_;
1754
1755 my $bootdisk = $conf->{bootdisk};
1756 return undef if !$bootdisk;
1757 return undef if !valid_drivename($bootdisk);
1758
1759 return undef if !$conf->{$bootdisk};
1760
1761 my $drive = parse_drive($bootdisk, $conf->{$bootdisk});
1762 return undef if !defined($drive);
1763
1764 return undef if drive_is_cdrom($drive);
1765
1766 my $volid = $drive->{file};
1767 return undef if !$volid;
1768
1769 my $path;
1770 my $storeid;
1771 my $timeoutid;
1772
1773 if ($volid =~ m|^/|) {
1774 $path = $timeoutid = $volid;
1775 } else {
1776 $storeid = $timeoutid = PVE::Storage::parse_volume_id($volid);
1777 $path = PVE::Storage::path($storecfg, $volid);
1778 }
1779
1780 my $last_timeout = $storage_timeout_hash->{$timeoutid};
1781 if ($last_timeout) {
1782 if ((time() - $last_timeout) < 30) {
1783 # skip storage with errors
1784 return undef ;
1785 }
1786 delete $storage_timeout_hash->{$timeoutid};
1787 }
1788
1789 my ($size, $format, $used);
1790
1791 ($size, $format, $used) = PVE::Storage::file_size_info($path, 1);
1792
1793 if (!defined($format)) {
1794 # got timeout
1795 $storage_timeout_hash->{$timeoutid} = time();
1796 return undef;
1797 }
1798
1799 return wantarray ? ($size, $used) : $size;
1800 }
1801
1802 my $last_proc_pid_stat;
1803
1804 sub vmstatus {
1805 my ($opt_vmid) = @_;
1806
1807 my $res = {};
1808
1809 my $storecfg = PVE::Storage::config();
1810
1811 my $list = vzlist();
1812 my ($uptime) = PVE::ProcFSTools::read_proc_uptime(1);
1813
1814 foreach my $vmid (keys %$list) {
1815 next if $opt_vmid && ($vmid ne $opt_vmid);
1816
1817 my $cfspath = cfs_config_path($vmid);
1818 my $conf = PVE::Cluster::cfs_read_file($cfspath) || {};
1819
1820 my $d = {};
1821 $d->{pid} = $list->{$vmid}->{pid};
1822
1823 # fixme: better status?
1824 $d->{status} = $list->{$vmid}->{pid} ? 'running' : 'stopped';
1825
1826 my ($size, $used) = disksize($storecfg, $conf);
1827 if (defined($size) && defined($used)) {
1828 $d->{disk} = $used;
1829 $d->{maxdisk} = $size;
1830 } else {
1831 $d->{disk} = 0;
1832 $d->{maxdisk} = 0;
1833 }
1834
1835 $d->{cpus} = ($conf->{sockets} || 1) * ($conf->{cores} || 1);
1836 $d->{name} = $conf->{name} || "VM $vmid";
1837 $d->{maxmem} = $conf->{memory} ? $conf->{memory}*(1024*1024) : 0;
1838
1839 $d->{uptime} = 0;
1840 $d->{cpu} = 0;
1841 $d->{relcpu} = 0;
1842 $d->{mem} = 0;
1843
1844 $d->{netout} = 0;
1845 $d->{netin} = 0;
1846
1847 $d->{diskread} = 0;
1848 $d->{diskwrite} = 0;
1849
1850 $res->{$vmid} = $d;
1851 }
1852
1853 my $netdev = PVE::ProcFSTools::read_proc_net_dev();
1854 foreach my $dev (keys %$netdev) {
1855 next if $dev !~ m/^tap([1-9]\d*)i/;
1856 my $vmid = $1;
1857 my $d = $res->{$vmid};
1858 next if !$d;
1859
1860 $d->{netout} += $netdev->{$dev}->{receive};
1861 $d->{netin} += $netdev->{$dev}->{transmit};
1862 }
1863
1864 my $cpucount = $cpuinfo->{cpus} || 1;
1865 my $ctime = gettimeofday;
1866
1867 foreach my $vmid (keys %$list) {
1868
1869 my $d = $res->{$vmid};
1870 my $pid = $d->{pid};
1871 next if !$pid;
1872
1873 if (my $fh = IO::File->new("/proc/$pid/io", "r")) {
1874 my $data = {};
1875 while (defined(my $line = <$fh>)) {
1876 if ($line =~ m/^([rw]char):\s+(\d+)$/) {
1877 $data->{$1} = $2;
1878 }
1879 }
1880 close($fh);
1881 $d->{diskread} = $data->{rchar} || 0;
1882 $d->{diskwrite} = $data->{wchar} || 0;
1883 }
1884
1885 my $pstat = PVE::ProcFSTools::read_proc_pid_stat($pid);
1886 next if !$pstat; # not running
1887
1888 my $used = $pstat->{utime} + $pstat->{stime};
1889
1890 my $vcpus = $d->{cpus} > $cpucount ? $cpucount : $d->{cpus};
1891
1892 $d->{uptime} = int(($uptime - $pstat->{starttime})/$cpuinfo->{user_hz});
1893
1894 if ($pstat->{vsize}) {
1895 $d->{mem} = int(($pstat->{rss}/$pstat->{vsize})*$d->{maxmem});
1896 }
1897
1898 my $old = $last_proc_pid_stat->{$pid};
1899 if (!$old) {
1900 $last_proc_pid_stat->{$pid} = {
1901 time => $ctime,
1902 used => $used,
1903 cpu => 0,
1904 relcpu => 0,
1905 };
1906 next;
1907 }
1908
1909 my $dtime = ($ctime - $old->{time}) * $cpucount * $cpuinfo->{user_hz};
1910
1911 if ($dtime > 1000) {
1912 my $dutime = $used - $old->{used};
1913
1914 $d->{cpu} = $dutime/$dtime;
1915 $d->{relcpu} = ($d->{cpu} * $cpucount) / $vcpus;
1916 $last_proc_pid_stat->{$pid} = {
1917 time => $ctime,
1918 used => $used,
1919 cpu => $d->{cpu},
1920 relcpu => $d->{relcpu},
1921 };
1922 } else {
1923 $d->{cpu} = $old->{cpu};
1924 $d->{relcpu} = $old->{relcpu};
1925 }
1926 }
1927
1928 return $res;
1929 }
1930
1931 sub foreach_drive {
1932 my ($conf, $func) = @_;
1933
1934 foreach my $ds (keys %$conf) {
1935 next if !valid_drivename($ds);
1936
1937 my $drive = parse_drive($ds, $conf->{$ds});
1938 next if !$drive;
1939
1940 &$func($ds, $drive);
1941 }
1942 }
1943
1944 sub config_to_command {
1945 my ($storecfg, $vmid, $conf, $defaults, $migrate_uri) = @_;
1946
1947 my $cmd = [];
1948 my $pciaddr = '';
1949 my $kvmver = kvm_user_version();
1950 my $vernum = 0; # unknown
1951 if ($kvmver =~ m/^(\d+)\.(\d+)\.(\d+)$/) {
1952 $vernum = $1*1000000+$2*1000+$3;
1953 }
1954
1955 die "detected old qemu-kvm binary ($kvmver)\n" if $vernum < 14000;
1956
1957 my $have_ovz = -f '/proc/vz/vestat';
1958
1959 push @$cmd, '/usr/bin/kvm';
1960
1961 push @$cmd, '-id', $vmid;
1962
1963 my $use_virtio = 0;
1964
1965 my $socket = monitor_socket($vmid);
1966 push @$cmd, '-chardev', "socket,id=monitor,path=$socket,server,nowait";
1967 push @$cmd, '-mon', "chardev=monitor,mode=readline";
1968
1969 $socket = vnc_socket($vmid);
1970 push @$cmd, '-vnc', "unix:$socket,x509,password";
1971
1972 push @$cmd, '-pidfile' , pidfile_name($vmid);
1973
1974 push @$cmd, '-daemonize';
1975
1976 push @$cmd, '-incoming', $migrate_uri if $migrate_uri;
1977
1978 # include usb device config
1979 push @$cmd, '-readconfig', '/usr/share/qemu-server/pve-usb.cfg';
1980
1981 # enable absolute mouse coordinates (needed by vnc)
1982 my $tablet = defined($conf->{tablet}) ? $conf->{tablet} : $defaults->{tablet};
1983 push @$cmd, '-device', 'usb-tablet,bus=ehci.0,port=6' if $tablet;
1984
1985 # host pci devices
1986 for (my $i = 0; $i < $MAX_HOSTPCI_DEVICES; $i++) {
1987 my $d = parse_hostpci($conf->{"hostpci$i"});
1988 next if !$d;
1989 $pciaddr = print_pci_addr("hostpci$i");
1990 push @$cmd, '-device', "pci-assign,host=$d->{pciid},id=hostpci$i$pciaddr";
1991 }
1992
1993 # usb devices
1994 for (my $i = 0; $i < $MAX_USB_DEVICES; $i++) {
1995 my $d = parse_usb_device($conf->{"usb$i"});
1996 next if !$d;
1997 if ($d->{vendorid} && $d->{productid}) {
1998 push @$cmd, '-device', "usb-host,vendorid=$d->{vendorid},productid=$d->{productid}";
1999 } elsif (defined($d->{hostbus}) && defined($d->{hostport})) {
2000 push @$cmd, '-device', "usb-host,hostbus=$d->{hostbus},hostport=$d->{hostport}";
2001 }
2002 }
2003
2004 # serial devices
2005 for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
2006 if (my $path = $conf->{"serial$i"}) {
2007 die "no such serial device\n" if ! -c $path;
2008 push @$cmd, '-chardev', "tty,id=serial$i,path=$path";
2009 push @$cmd, '-device', "isa-serial,chardev=serial$i";
2010 }
2011 }
2012
2013 # parallel devices
2014 for (my $i = 0; $i < $MAX_PARALLEL_PORTS; $i++) {
2015 if (my $path = $conf->{"parallel$i"}) {
2016 die "no such parallel device\n" if ! -c $path;
2017 push @$cmd, '-chardev', "parport,id=parallel$i,path=$path";
2018 push @$cmd, '-device', "isa-parallel,chardev=parallel$i";
2019 }
2020 }
2021
2022 my $vmname = $conf->{name} || "vm$vmid";
2023
2024 push @$cmd, '-name', $vmname;
2025
2026 my $sockets = 1;
2027 $sockets = $conf->{smp} if $conf->{smp}; # old style - no longer iused
2028 $sockets = $conf->{sockets} if $conf->{sockets};
2029
2030 my $cores = $conf->{cores} || 1;
2031
2032 my $boot_opt;
2033
2034 push @$cmd, '-smp', "sockets=$sockets,cores=$cores";
2035
2036 push @$cmd, '-cpu', $conf->{cpu} if $conf->{cpu};
2037
2038 push @$cmd, '-nodefaults';
2039
2040 my $bootorder = $conf->{boot} || $confdesc->{boot}->{default};
2041 push @$cmd, '-boot', "menu=on,order=$bootorder";
2042
2043 push @$cmd, '-no-acpi' if defined($conf->{acpi}) && $conf->{acpi} == 0;
2044
2045 push @$cmd, '-no-reboot' if defined($conf->{reboot}) && $conf->{reboot} == 0;
2046
2047 my $vga = $conf->{vga};
2048 if (!$vga) {
2049 if ($conf->{ostype} && ($conf->{ostype} eq 'win7' || $conf->{ostype} eq 'w2k8')) {
2050 $vga = 'std';
2051 } else {
2052 $vga = 'cirrus';
2053 }
2054 }
2055
2056 push @$cmd, '-vga', $vga if $vga; # for kvm 77 and later
2057
2058 # time drift fix
2059 my $tdf = defined($conf->{tdf}) ? $conf->{tdf} : $defaults->{tdf};
2060 push @$cmd, '-tdf' if $tdf;
2061
2062 my $nokvm = defined($conf->{kvm}) && $conf->{kvm} == 0 ? 1 : 0;
2063
2064 if (my $ost = $conf->{ostype}) {
2065 # other, wxp, w2k, w2k3, w2k8, wvista, win7, l24, l26
2066
2067 if ($ost =~ m/^w/) { # windows
2068 push @$cmd, '-localtime' if !defined($conf->{localtime});
2069
2070 # use rtc-td-hack when acpi is enabled
2071 if (!(defined($conf->{acpi}) && $conf->{acpi} == 0)) {
2072 push @$cmd, '-rtc-td-hack';
2073 }
2074 }
2075
2076 # -tdf ?
2077 # -no-acpi
2078 # -no-kvm
2079 # -win2k-hack ?
2080 }
2081
2082 if ($nokvm) {
2083 push @$cmd, '-no-kvm';
2084 } else {
2085 die "No accelerator found!\n" if !$cpuinfo->{hvm};
2086 }
2087
2088 push @$cmd, '-localtime' if $conf->{localtime};
2089
2090 push @$cmd, '-startdate', $conf->{startdate} if $conf->{startdate};
2091
2092 push @$cmd, '-S' if $conf->{freeze};
2093
2094 # set keyboard layout
2095 my $kb = $conf->{keyboard} || $defaults->{keyboard};
2096 push @$cmd, '-k', $kb if $kb;
2097
2098 # enable sound
2099 #my $soundhw = $conf->{soundhw} || $defaults->{soundhw};
2100 #push @$cmd, '-soundhw', 'es1370';
2101 #push @$cmd, '-soundhw', $soundhw if $soundhw;
2102 $pciaddr = print_pci_addr("balloon0");
2103 push @$cmd, '-device', "virtio-balloon-pci,id=balloon0$pciaddr" if $conf->{balloon};
2104
2105 if ($conf->{watchdog}) {
2106 my $wdopts = parse_watchdog($conf->{watchdog});
2107 push @$cmd, '-watchdog', $wdopts->{model} || 'i6300esb';
2108 push @$cmd, '-watchdog-action', $wdopts->{action} if $wdopts->{action};
2109 }
2110
2111 my $vollist = [];
2112 my $scsicontroller = {};
2113
2114 foreach_drive($conf, sub {
2115 my ($ds, $drive) = @_;
2116
2117 eval {
2118 PVE::Storage::parse_volume_id($drive->{file});
2119 push @$vollist, $drive->{file};
2120 }; # ignore errors
2121
2122 $use_virtio = 1 if $ds =~ m/^virtio/;
2123 if ($drive->{interface} eq 'scsi') {
2124 my $maxdev = 7;
2125 my $controller = int($drive->{index} / $maxdev);
2126 push @$cmd, '-device', "lsi,id=scsi$controller" if !$scsicontroller->{$controller};
2127 my $scsicontroller->{$controller}=1;
2128 }
2129 my $tmp = print_drive_full($storecfg, $vmid, $drive);
2130 $tmp .= ",boot=on" if $conf->{bootdisk} && ($conf->{bootdisk} eq $ds);
2131 push @$cmd, '-drive', $tmp;
2132 push @$cmd, '-device',print_drivedevice_full($storecfg,$vmid, $drive);
2133 });
2134
2135 push @$cmd, '-m', $conf->{memory} || $defaults->{memory};
2136
2137 my $foundnet = 0;
2138
2139 foreach my $k (sort keys %$conf) {
2140 next if $k !~ m/^net(\d+)$/;
2141 my $i = int($1);
2142
2143 die "got strange net id '$i'\n" if $i >= ${MAX_NETS};
2144
2145 if ($conf->{"net$i"} && (my $net = parse_net($conf->{"net$i"}))) {
2146
2147 $foundnet = 1;
2148
2149 my $ifname = "tap${vmid}i$i";
2150
2151 # kvm uses TUNSETIFF ioctl, and that limits ifname length
2152 die "interface name '$ifname' is too long (max 15 character)\n"
2153 if length($ifname) >= 16;
2154
2155 my $device = $net->{model};
2156 my $vhostparam = '';
2157 if ($net->{model} eq 'virtio') {
2158 $use_virtio = 1;
2159 $device = 'virtio-net-pci';
2160 $vhostparam = ',vhost=on' if $kernel_has_vhost_net;
2161 };
2162
2163 if ($net->{bridge}) {
2164 push @$cmd, '-netdev', "type=tap,id=${k},ifname=${ifname},script=/var/lib/qemu-server/pve-bridge$vhostparam";
2165 } else {
2166 push @$cmd, '-netdev', "type=user,id=${k},hostname=$vmname";
2167 }
2168
2169 # qemu > 0.15 always try to boot from network - we disable that by
2170 # not loading the pxe rom file
2171 my $extra = (!$conf->{boot} || ($conf->{boot} !~ m/n/)) ?
2172 "romfile=," : '';
2173 push @$cmd, '-device', "$device,${extra}mac=$net->{macaddr},netdev=${k}";
2174 }
2175 }
2176
2177 push @$cmd, '-net', 'none' if !$foundnet;
2178
2179 # hack: virtio with fairsched is unreliable, so we do not use fairsched
2180 # when the VM uses virtio devices.
2181 if (!$use_virtio && $have_ovz) {
2182
2183 my $cpuunits = defined($conf->{cpuunits}) ?
2184 $conf->{cpuunits} : $defaults->{cpuunits};
2185
2186 push @$cmd, '-cpuunits', $cpuunits if $cpuunits;
2187
2188 # fixme: cpulimit is currently ignored
2189 #push @$cmd, '-cpulimit', $conf->{cpulimit} if $conf->{cpulimit};
2190 }
2191
2192 # add custom args
2193 if ($conf->{args}) {
2194 my $aa = split_args($conf->{args});
2195 push @$cmd, @$aa;
2196 }
2197
2198 return wantarray ? ($cmd, $vollist) : $cmd;
2199 }
2200
2201 sub vnc_socket {
2202 my ($vmid) = @_;
2203 return "${var_run_tmpdir}/$vmid.vnc";
2204 }
2205
2206 sub monitor_socket {
2207 my ($vmid) = @_;
2208 return "${var_run_tmpdir}/$vmid.mon";
2209 }
2210
2211 sub pidfile_name {
2212 my ($vmid) = @_;
2213 return "${var_run_tmpdir}/$vmid.pid";
2214 }
2215
2216 sub random_ether_addr {
2217
2218 my $rand = Digest::SHA1::sha1_hex(rand(), time());
2219
2220 my $mac = '';
2221 for (my $i = 0; $i < 6; $i++) {
2222 my $ss = hex(substr($rand, $i*2, 2));
2223 if (!$i) {
2224 $ss &= 0xfe; # clear multicast
2225 $ss |= 2; # set local id
2226 }
2227 $ss = sprintf("%02X", $ss);
2228
2229 if (!$i) {
2230 $mac .= "$ss";
2231 } else {
2232 $mac .= ":$ss";
2233 }
2234 }
2235
2236 return $mac;
2237 }
2238
2239 sub next_migrate_port {
2240
2241 for (my $p = 60000; $p < 60010; $p++) {
2242
2243 my $sock = IO::Socket::INET->new(Listen => 5,
2244 LocalAddr => 'localhost',
2245 LocalPort => $p,
2246 ReuseAddr => 1,
2247 Proto => 0);
2248
2249 if ($sock) {
2250 close($sock);
2251 return $p;
2252 }
2253 }
2254
2255 die "unable to find free migration port";
2256 }
2257
2258 sub vm_start {
2259 my ($storecfg, $vmid, $statefile, $skiplock) = @_;
2260
2261 lock_config($vmid, sub {
2262 my $conf = load_config($vmid);
2263
2264 check_lock($conf) if !$skiplock;
2265
2266 if (check_running($vmid)) {
2267 my $msg = "VM $vmid already running - start failed\n" ;
2268 syslog('err', $msg);
2269 die $msg;
2270 } else {
2271 syslog('info', "VM $vmid start");
2272 }
2273
2274 my $migrate_uri;
2275 my $migrate_port = 0;
2276
2277 if ($statefile) {
2278 if ($statefile eq 'tcp') {
2279 $migrate_port = next_migrate_port();
2280 $migrate_uri = "tcp:localhost:${migrate_port}";
2281 } else {
2282 if (-f $statefile) {
2283 $migrate_uri = "exec:cat $statefile";
2284 } else {
2285 warn "state file '$statefile' does not exist - doing normal startup\n";
2286 }
2287 }
2288 }
2289
2290 my $defaults = load_defaults();
2291
2292 my ($cmd, $vollist) = config_to_command($storecfg, $vmid, $conf, $defaults, $migrate_uri);
2293 # host pci devices
2294 for (my $i = 0; $i < $MAX_HOSTPCI_DEVICES; $i++) {
2295 my $d = parse_hostpci($conf->{"hostpci$i"});
2296 next if !$d;
2297 my $info = pci_device_info("0000:$d->{pciid}");
2298 die "IOMMU not present\n" if !check_iommu_support();
2299 die "no pci device info for device '$d->{pciid}'\n" if !$info;
2300 die "can't unbind pci device '$d->{pciid}'\n" if !pci_dev_bind_to_stub($info);
2301 die "can't reset pci device '$d->{pciid}'\n" if !pci_dev_reset($info);
2302 }
2303
2304 PVE::Storage::activate_volumes($storecfg, $vollist);
2305
2306 eval { run_command($cmd, timeout => $migrate_uri ? undef : 30); };
2307
2308 my $err = $@;
2309
2310 if ($err) {
2311 my $msg = "start failed: $err";
2312 syslog('err', "VM $vmid $msg");
2313 die $msg;
2314 }
2315
2316 if ($statefile) {
2317
2318 if ($statefile eq 'tcp') {
2319 print "migration listens on port $migrate_port\n";
2320 } else {
2321 unlink $statefile;
2322 # fixme: send resume - is that necessary ?
2323 eval { vm_monitor_command($vmid, "cont", 1) };
2324 }
2325 }
2326
2327 if (my $migrate_speed =
2328 $conf->{migrate_speed} || $defaults->{migrate_speed}) {
2329 my $cmd = "migrate_set_speed ${migrate_speed}m";
2330 eval { vm_monitor_command($vmid, $cmd, 1); };
2331 }
2332
2333 if (my $migrate_downtime =
2334 $conf->{migrate_downtime} || $defaults->{migrate_downtime}) {
2335 my $cmd = "migrate_set_downtime ${migrate_downtime}";
2336 eval { vm_monitor_command($vmid, $cmd, 1); };
2337 }
2338
2339 vm_balloonset($vmid, $conf->{balloon}) if $conf->{balloon};
2340 });
2341 }
2342
2343 sub __read_avail {
2344 my ($fh, $timeout) = @_;
2345
2346 my $sel = new IO::Select;
2347 $sel->add($fh);
2348
2349 my $res = '';
2350 my $buf;
2351
2352 my @ready;
2353 while (scalar (@ready = $sel->can_read($timeout))) {
2354 my $count;
2355 if ($count = $fh->sysread($buf, 8192)) {
2356 if ($buf =~ /^(.*)\(qemu\) $/s) {
2357 $res .= $1;
2358 last;
2359 } else {
2360 $res .= $buf;
2361 }
2362 } else {
2363 if (!defined($count)) {
2364 die "$!\n";
2365 }
2366 last;
2367 }
2368 }
2369
2370 die "monitor read timeout\n" if !scalar(@ready);
2371
2372 return $res;
2373 }
2374
2375 sub vm_monitor_command {
2376 my ($vmid, $cmdstr, $nolog, $nocheck) = @_;
2377
2378 my $res;
2379
2380 syslog("info", "VM $vmid monitor command '$cmdstr'") if !$nolog;
2381
2382 eval {
2383 die "VM not running\n" if !check_running($vmid, $nocheck);
2384
2385 my $sname = monitor_socket($vmid);
2386
2387 my $sock = IO::Socket::UNIX->new( Peer => $sname ) ||
2388 die "unable to connect to VM $vmid socket - $!\n";
2389
2390 my $timeout = 3;
2391
2392 # hack: migrate sometime blocks the monitor (when migrate_downtime
2393 # is set)
2394 if ($cmdstr =~ m/^(info\s+migrate|migrate\s)/) {
2395 $timeout = 60*60; # 1 hour
2396 }
2397
2398 # read banner;
2399 my $data = __read_avail($sock, $timeout);
2400
2401 if ($data !~ m/^QEMU\s+(\S+)\s+monitor\s/) {
2402 die "got unexpected qemu monitor banner\n";
2403 }
2404
2405 my $sel = new IO::Select;
2406 $sel->add($sock);
2407
2408 if (!scalar(my @ready = $sel->can_write($timeout))) {
2409 die "monitor write error - timeout";
2410 }
2411
2412 my $fullcmd = "$cmdstr\r";
2413
2414 my $b;
2415 if (!($b = $sock->syswrite($fullcmd)) || ($b != length($fullcmd))) {
2416 die "monitor write error - $!";
2417 }
2418
2419 return if ($cmdstr eq 'q') || ($cmdstr eq 'quit');
2420
2421 $timeout = 20;
2422
2423 if ($cmdstr =~ m/^(info\s+migrate|migrate\s)/) {
2424 $timeout = 60*60; # 1 hour
2425 } elsif ($cmdstr =~ m/^(eject|change)/) {
2426 $timeout = 60; # note: cdrom mount command is slow
2427 }
2428 if ($res = __read_avail($sock, $timeout)) {
2429
2430 my @lines = split("\r?\n", $res);
2431
2432 shift @lines if $lines[0] !~ m/^unknown command/; # skip echo
2433
2434 $res = join("\n", @lines);
2435 $res .= "\n";
2436 }
2437 };
2438
2439 my $err = $@;
2440
2441 if ($err) {
2442 syslog("err", "VM $vmid monitor command failed - $err");
2443 die $err;
2444 }
2445
2446 return $res;
2447 }
2448
2449 sub vm_commandline {
2450 my ($storecfg, $vmid) = @_;
2451
2452 my $conf = load_config($vmid);
2453
2454 my $defaults = load_defaults();
2455
2456 my $cmd = config_to_command($storecfg, $vmid, $conf, $defaults);
2457
2458 return join(' ', @$cmd);
2459 }
2460
2461 sub vm_reset {
2462 my ($vmid, $skiplock) = @_;
2463
2464 lock_config($vmid, sub {
2465
2466 my $conf = load_config($vmid);
2467
2468 check_lock($conf) if !$skiplock;
2469
2470 syslog("info", "VM $vmid sending 'reset'");
2471
2472 vm_monitor_command($vmid, "system_reset", 1);
2473 });
2474 }
2475
2476 sub vm_shutdown {
2477 my ($vmid, $skiplock) = @_;
2478
2479 lock_config($vmid, sub {
2480
2481 my $conf = load_config($vmid);
2482
2483 check_lock($conf) if !$skiplock;
2484
2485 syslog("info", "VM $vmid sending 'shutdown'");
2486
2487 vm_monitor_command($vmid, "system_powerdown", 1);
2488 });
2489 }
2490
2491 # Note: use $nockeck to skip tests if VM configuration file exists.
2492 # We need that when migration VMs to other nodes (files already moved)
2493 sub vm_stop {
2494 my ($vmid, $skiplock, $nocheck) = @_;
2495
2496 lock_config($vmid, sub {
2497
2498 my $pid = check_running($vmid, $nocheck);
2499
2500 if (!$pid) {
2501 syslog('info', "VM $vmid already stopped");
2502 return;
2503 }
2504
2505 if (!$nocheck) {
2506 my $conf = load_config($vmid);
2507 check_lock($conf) if !$skiplock;
2508 }
2509
2510 syslog("info", "VM $vmid stopping");
2511
2512 eval { vm_monitor_command($vmid, "quit", 1, $nocheck); };
2513
2514 my $err = $@;
2515
2516 if (!$err) {
2517 # wait some time
2518 my $timeout = 50; # fixme: how long?
2519
2520 my $count = 0;
2521 while (($count < $timeout) && check_running($vmid, $nocheck)) {
2522 $count++;
2523 sleep 1;
2524 }
2525
2526 if ($count >= $timeout) {
2527 syslog('info', "VM $vmid still running - terminating now with SIGTERM");
2528 kill 15, $pid;
2529 }
2530 } else {
2531 syslog('info', "VM $vmid quit failed - terminating now with SIGTERM");
2532 kill 15, $pid;
2533 }
2534
2535 # wait again
2536 my $timeout = 10;
2537
2538 my $count = 0;
2539 while (($count < $timeout) && check_running($vmid, $nocheck)) {
2540 $count++;
2541 sleep 1;
2542 }
2543
2544 if ($count >= $timeout) {
2545 syslog('info', "VM $vmid still running - terminating now with SIGKILL\n");
2546 kill 9, $pid;
2547 }
2548
2549 fairsched_rmnod($vmid); # try to destroy group
2550 });
2551 }
2552
2553 sub vm_suspend {
2554 my ($vmid, $skiplock) = @_;
2555
2556 lock_config($vmid, sub {
2557
2558 my $conf = load_config($vmid);
2559
2560 check_lock($conf) if !$skiplock;
2561
2562 syslog("info", "VM $vmid suspend");
2563
2564 vm_monitor_command($vmid, "stop", 1);
2565 });
2566 }
2567
2568 sub vm_resume {
2569 my ($vmid, $skiplock) = @_;
2570
2571 lock_config($vmid, sub {
2572
2573 my $conf = load_config($vmid);
2574
2575 check_lock($conf) if !$skiplock;
2576
2577 syslog("info", "VM $vmid resume");
2578
2579 vm_monitor_command($vmid, "cont", 1);
2580 });
2581 }
2582
2583 sub vm_cad {
2584 my ($vmid, $skiplock) = @_;
2585
2586 lock_config($vmid, sub {
2587
2588 my $conf = load_config($vmid);
2589
2590 check_lock($conf) if !$skiplock;
2591
2592 syslog("info", "VM $vmid sending cntl-alt-delete");
2593
2594 vm_monitor_command($vmid, "sendkey ctrl-alt-delete", 1);
2595 });
2596 }
2597
2598 sub vm_destroy {
2599 my ($storecfg, $vmid, $skiplock) = @_;
2600
2601 lock_config($vmid, sub {
2602
2603 my $conf = load_config($vmid);
2604
2605 check_lock($conf) if !$skiplock;
2606
2607 syslog("info", "VM $vmid destroy called (removing all data)");
2608
2609 eval {
2610 if (!check_running($vmid)) {
2611 fairsched_rmnod($vmid); # try to destroy group
2612 destroy_vm($storecfg, $vmid);
2613 } else {
2614 die "VM is running\n";
2615 }
2616 };
2617
2618 my $err = $@;
2619
2620 if ($err) {
2621 syslog("err", "VM $vmid destroy failed - $err");
2622 die $err;
2623 }
2624 });
2625 }
2626
2627 sub vm_stopall {
2628 my ($timeout) = @_;
2629
2630 $timeout = 3*60 if !$timeout;
2631
2632 my $vzlist = vzlist();
2633 my $count = 0;
2634 foreach my $vmid (keys %$vzlist) {
2635 next if !$vzlist->{$vmid}->{pid};
2636 $count++;
2637 }
2638
2639 if ($count) {
2640
2641 my $msg = "Stopping Qemu Server - sending shutdown requests to all VMs\n";
2642 syslog('info', $msg);
2643 print STDERR $msg;
2644
2645 foreach my $vmid (keys %$vzlist) {
2646 next if !$vzlist->{$vmid}->{pid};
2647 eval { vm_shutdown($vmid, 1); };
2648 print STDERR $@ if $@;
2649 }
2650
2651 my $wt = 5;
2652 my $maxtries = int(($timeout + $wt -1)/$wt);
2653 my $try = 0;
2654 while (($try < $maxtries) && $count) {
2655 $try++;
2656 sleep $wt;
2657
2658 $vzlist = vzlist();
2659 $count = 0;
2660 foreach my $vmid (keys %$vzlist) {
2661 next if !$vzlist->{$vmid}->{pid};
2662 $count++;
2663 }
2664 last if !$count;
2665 }
2666
2667 return if !$count;
2668
2669 foreach my $vmid (keys %$vzlist) {
2670 next if !$vzlist->{$vmid}->{pid};
2671
2672 $msg = "VM $vmid still running - sending stop now\n";
2673 syslog('info', $msg);
2674 print $msg;
2675
2676 eval { vm_monitor_command($vmid, "quit", 1); };
2677 print STDERR $@ if $@;
2678
2679 }
2680
2681 $timeout = 30;
2682 $maxtries = int(($timeout + $wt -1)/$wt);
2683 $try = 0;
2684 while (($try < $maxtries) && $count) {
2685 $try++;
2686 sleep $wt;
2687
2688 $vzlist = vzlist();
2689 $count = 0;
2690 foreach my $vmid (keys %$vzlist) {
2691 next if !$vzlist->{$vmid}->{pid};
2692 $count++;
2693 }
2694 last if !$count;
2695 }
2696
2697 return if !$count;
2698
2699 foreach my $vmid (keys %$vzlist) {
2700 next if !$vzlist->{$vmid}->{pid};
2701
2702 $msg = "VM $vmid still running - terminating now with SIGTERM\n";
2703 syslog('info', $msg);
2704 print $msg;
2705 kill 15, $vzlist->{$vmid}->{pid};
2706 }
2707
2708 # this is called by system shotdown scripts, so remaining
2709 # processes gets killed anyways (no need to send kill -9 here)
2710
2711 $msg = "Qemu Server stopped\n";
2712 syslog('info', $msg);
2713 print STDERR $msg;
2714 }
2715 }
2716
2717 # pci helpers
2718
2719 sub file_write {
2720 my ($filename, $buf) = @_;
2721
2722 my $fh = IO::File->new($filename, "w");
2723 return undef if !$fh;
2724
2725 my $res = print $fh $buf;
2726
2727 $fh->close();
2728
2729 return $res;
2730 }
2731
2732 sub pci_device_info {
2733 my ($name) = @_;
2734
2735 my $res;
2736
2737 return undef if $name !~ m/^([a-f0-9]{4}):([a-f0-9]{2}):([a-f0-9]{2})\.([a-f0-9])$/;
2738 my ($domain, $bus, $slot, $func) = ($1, $2, $3, $4);
2739
2740 my $irq = file_read_firstline("$pcisysfs/devices/$name/irq");
2741 return undef if !defined($irq) || $irq !~ m/^\d+$/;
2742
2743 my $vendor = file_read_firstline("$pcisysfs/devices/$name/vendor");
2744 return undef if !defined($vendor) || $vendor !~ s/^0x//;
2745
2746 my $product = file_read_firstline("$pcisysfs/devices/$name/device");
2747 return undef if !defined($product) || $product !~ s/^0x//;
2748
2749 $res = {
2750 name => $name,
2751 vendor => $vendor,
2752 product => $product,
2753 domain => $domain,
2754 bus => $bus,
2755 slot => $slot,
2756 func => $func,
2757 irq => $irq,
2758 has_fl_reset => -f "$pcisysfs/devices/$name/reset" || 0,
2759 };
2760
2761 return $res;
2762 }
2763
2764 sub pci_dev_reset {
2765 my ($dev) = @_;
2766
2767 my $name = $dev->{name};
2768
2769 my $fn = "$pcisysfs/devices/$name/reset";
2770
2771 return file_write($fn, "1");
2772 }
2773
2774 sub pci_dev_bind_to_stub {
2775 my ($dev) = @_;
2776
2777 my $name = $dev->{name};
2778
2779 my $testdir = "$pcisysfs/drivers/pci-stub/$name";
2780 return 1 if -d $testdir;
2781
2782 my $data = "$dev->{vendor} $dev->{product}";
2783 return undef if !file_write("$pcisysfs/drivers/pci-stub/new_id", $data);
2784
2785 my $fn = "$pcisysfs/devices/$name/driver/unbind";
2786 if (!file_write($fn, $name)) {
2787 return undef if -f $fn;
2788 }
2789
2790 $fn = "$pcisysfs/drivers/pci-stub/bind";
2791 if (! -d $testdir) {
2792 return undef if !file_write($fn, $name);
2793 }
2794
2795 return -d $testdir;
2796 }
2797
2798 sub print_pci_addr {
2799 my ($id) = @_;
2800
2801 my $res = '';
2802 my $devices = {
2803 balloon0 => { bus => 0, addr => 3 },
2804 virtio0 => { bus => 0, addr => 10 },
2805 virtio1 => { bus => 0, addr => 11 },
2806 virtio2 => { bus => 0, addr => 12 },
2807 virtio3 => { bus => 0, addr => 13 },
2808 virtio4 => { bus => 0, addr => 14 },
2809 virtio5 => { bus => 0, addr => 15 },
2810 hostpci0 => { bus => 0, addr => 16 },
2811 hostpci1 => { bus => 0, addr => 17 },
2812
2813 };
2814
2815 if (defined($devices->{$id}->{bus}) && defined($devices->{$id}->{addr})) {
2816 my $addr = sprintf("0x%x", $devices->{$id}->{addr});
2817 $res = ",bus=pci.$devices->{$id}->{bus},addr=$addr";
2818 }
2819 return $res;
2820
2821 }
2822
2823 sub vm_balloonset {
2824 my ($vmid, $value) = @_;
2825
2826 vm_monitor_command($vmid, "balloon $value", 1);
2827 }
2828
2829 1;