]> git.proxmox.com Git - qemu-server.git/blob - PVE/QemuServer.pm
remove support for old hostusb syntax
[qemu-server.git] / PVE / QemuServer.pm
1 package PVE::QemuServer;
2
3 use strict;
4 use POSIX;
5 use IO::Handle;
6 use IO::Select;
7 use IO::File;
8 use IO::Dir;
9 use IO::Socket::UNIX;
10 use File::Basename;
11 use File::Path;
12 use File::stat;
13 use Getopt::Long;
14 use Digest::SHA1;
15 use Fcntl ':flock';
16 use Cwd 'abs_path';
17 use IPC::Open3;
18 use Fcntl;
19 use PVE::SafeSyslog;
20 use Storable qw(dclone);
21 use PVE::Exception qw(raise raise_param_exc);
22 use PVE::Storage;
23 use PVE::Tools qw(run_command lock_file file_read_firstline);
24 use PVE::Cluster qw(cfs_register_file cfs_read_file cfs_write_file cfs_lock_file);
25 use PVE::INotify;
26 use PVE::ProcFSTools;
27 use Time::HiRes qw (gettimeofday);
28
29 my $clock_ticks = POSIX::sysconf(&POSIX::_SC_CLK_TCK);
30
31 # Note about locking: we use flock on the config file protect
32 # against concurent actions.
33 # Aditionaly, we have a 'lock' setting in the config file. This
34 # can be set to 'migrate' or 'backup'. Most actions are not
35 # allowed when such lock is set. But you can ignore this kind of
36 # lock with the --skiplock flag.
37
38 cfs_register_file('/qemu-server/', \&parse_vm_config);
39
40 #no warnings 'redefine';
41
42 unless(defined(&_VZSYSCALLS_H_)) {
43 eval 'sub _VZSYSCALLS_H_ () {1;}' unless defined(&_VZSYSCALLS_H_);
44 require 'sys/syscall.ph';
45 if(defined(&__x86_64__)) {
46 eval 'sub __NR_fairsched_vcpus () {499;}' unless defined(&__NR_fairsched_vcpus);
47 eval 'sub __NR_fairsched_mknod () {504;}' unless defined(&__NR_fairsched_mknod);
48 eval 'sub __NR_fairsched_rmnod () {505;}' unless defined(&__NR_fairsched_rmnod);
49 eval 'sub __NR_fairsched_chwt () {506;}' unless defined(&__NR_fairsched_chwt);
50 eval 'sub __NR_fairsched_mvpr () {507;}' unless defined(&__NR_fairsched_mvpr);
51 eval 'sub __NR_fairsched_rate () {508;}' unless defined(&__NR_fairsched_rate);
52 eval 'sub __NR_setluid () {501;}' unless defined(&__NR_setluid);
53 eval 'sub __NR_setublimit () {502;}' unless defined(&__NR_setublimit);
54 }
55 elsif(defined( &__i386__) ) {
56 eval 'sub __NR_fairsched_mknod () {500;}' unless defined(&__NR_fairsched_mknod);
57 eval 'sub __NR_fairsched_rmnod () {501;}' unless defined(&__NR_fairsched_rmnod);
58 eval 'sub __NR_fairsched_chwt () {502;}' unless defined(&__NR_fairsched_chwt);
59 eval 'sub __NR_fairsched_mvpr () {503;}' unless defined(&__NR_fairsched_mvpr);
60 eval 'sub __NR_fairsched_rate () {504;}' unless defined(&__NR_fairsched_rate);
61 eval 'sub __NR_fairsched_vcpus () {505;}' unless defined(&__NR_fairsched_vcpus);
62 eval 'sub __NR_setluid () {511;}' unless defined(&__NR_setluid);
63 eval 'sub __NR_setublimit () {512;}' unless defined(&__NR_setublimit);
64 } else {
65 die("no fairsched syscall for this arch");
66 }
67 require 'asm/ioctl.ph';
68 eval 'sub KVM_GET_API_VERSION () { &_IO(0xAE, 0x);}' unless defined(&KVM_GET_API_VERSION);
69 }
70
71 sub fairsched_mknod {
72 my ($parent, $weight, $desired) = @_;
73
74 return syscall(&__NR_fairsched_mknod, int ($parent), int ($weight), int ($desired));
75 }
76
77 sub fairsched_rmnod {
78 my ($id) = @_;
79
80 return syscall(&__NR_fairsched_rmnod, int ($id));
81 }
82
83 sub fairsched_mvpr {
84 my ($pid, $newid) = @_;
85
86 return syscall(&__NR_fairsched_mvpr, int ($pid), int ($newid));
87 }
88
89 sub fairsched_vcpus {
90 my ($id, $vcpus) = @_;
91
92 return syscall(&__NR_fairsched_vcpus, int ($id), int ($vcpus));
93 }
94
95 sub fairsched_rate {
96 my ($id, $op, $rate) = @_;
97
98 return syscall(&__NR_fairsched_rate, int ($id), int ($op), int ($rate));
99 }
100
101 use constant FAIRSCHED_SET_RATE => 0;
102 use constant FAIRSCHED_DROP_RATE => 1;
103 use constant FAIRSCHED_GET_RATE => 2;
104
105 sub fairsched_cpulimit {
106 my ($id, $limit) = @_;
107
108 my $cpulim1024 = int ($limit * 1024 / 100);
109 my $op = $cpulim1024 ? FAIRSCHED_SET_RATE : FAIRSCHED_DROP_RATE;
110
111 return fairsched_rate ($id, $op, $cpulim1024);
112 }
113
114 my $nodename = PVE::INotify::nodename();
115
116 mkdir "/etc/pve/nodes/$nodename";
117 my $confdir = "/etc/pve/nodes/$nodename/qemu-server";
118 mkdir $confdir;
119
120 my $var_run_tmpdir = "/var/run/qemu-server";
121 mkdir $var_run_tmpdir;
122
123 my $lock_dir = "/var/lock/qemu-server";
124 mkdir $lock_dir;
125
126 my $pcisysfs = "/sys/bus/pci";
127
128 my $keymaphash = PVE::Tools::kvmkeymaps();
129
130 my $confdesc = {
131 onboot => {
132 optional => 1,
133 type => 'boolean',
134 description => "Specifies whether a VM will be started during system bootup.",
135 default => 0,
136 },
137 autostart => {
138 optional => 1,
139 type => 'boolean',
140 description => "Automatic restart after crash (currently ignored).",
141 default => 0,
142 },
143 reboot => {
144 optional => 1,
145 type => 'boolean',
146 description => "Allow reboot. If set to '0' the VM exit on reboot.",
147 default => 1,
148 },
149 lock => {
150 optional => 1,
151 type => 'string',
152 description => "Lock/unlock the VM.",
153 enum => [qw(migrate backup)],
154 },
155 cpulimit => {
156 optional => 1,
157 type => 'integer',
158 description => "Limit of CPU usage in per cent. Note if the computer has 2 CPUs, it has total of 200% CPU time. Value '0' indicates no CPU limit.\n\nNOTE: This option is currently ignored.",
159 minimum => 0,
160 default => 0,
161 },
162 cpuunits => {
163 optional => 1,
164 type => 'integer',
165 description => "CPU weight for a VM. Argument is used in the kernel fair scheduler. The larger the number is, the more CPU time this VM gets. Number is relative to weights of all the other running VMs.\n\nNOTE: You can disable fair-scheduler configuration by setting this to 0.",
166 minimum => 0,
167 maximum => 500000,
168 default => 1000,
169 },
170 memory => {
171 optional => 1,
172 type => 'integer',
173 description => "Amount of RAM for the VM in MB.",
174 minimum => 16,
175 default => 512,
176 },
177 keyboard => {
178 optional => 1,
179 type => 'string',
180 description => "Keybord layout for vnc server. Default is read from the datacenter configuration file.",
181 enum => [ keys %$keymaphash ],
182 default => 'en-us',
183 },
184 name => {
185 optional => 1,
186 type => 'string',
187 description => "Set a name for the VM. Only used on the configuration web interface.",
188 },
189 description => {
190 optional => 1,
191 type => 'string',
192 description => "Description for the VM. Only used on the configuration web interface.",
193 },
194 ostype => {
195 optional => 1,
196 type => 'string',
197 enum => [qw(other wxp w2k w2k3 w2k8 wvista win7 l24 l26)],
198 description => <<EODESC,
199 Used to enable special optimization/features for specific
200 operating systems:
201
202 other => unspecified OS
203 wxp => Microsoft Windows XP
204 w2k => Microsoft Windows 2000
205 w2k3 => Microsoft Windows 2003
206 w2k8 => Microsoft Windows 2008
207 wvista => Microsoft Windows Vista
208 win7 => Microsoft Windows 7
209 l24 => Linux 2.4 Kernel
210 l26 => Linux 2.6/3.X Kernel
211
212 other|l24|l26 ... no special behaviour
213 wxp|w2k|w2k3|w2k8|wvista|win7 ... use --localtime switch
214 EODESC
215 },
216 boot => {
217 optional => 1,
218 type => 'string',
219 description => "Boot on floppy (a), hard disk (c), CD-ROM (d), or network (n).",
220 pattern => '[acdn]{1,4}',
221 default => 'cad',
222 },
223 bootdisk => {
224 optional => 1,
225 type => 'string', format => 'pve-qm-bootdisk',
226 description => "Enable booting from specified disk.",
227 pattern => '(ide|scsi|virtio)\d+',
228 },
229 smp => {
230 optional => 1,
231 type => 'integer',
232 description => "The number of CPUs. Please use option -sockets instead.",
233 minimum => 1,
234 default => 1,
235 },
236 sockets => {
237 optional => 1,
238 type => 'integer',
239 description => "The number of CPU sockets.",
240 minimum => 1,
241 default => 1,
242 },
243 cores => {
244 optional => 1,
245 type => 'integer',
246 description => "The number of cores per socket.",
247 minimum => 1,
248 default => 1,
249 },
250 acpi => {
251 optional => 1,
252 type => 'boolean',
253 description => "Enable/disable ACPI.",
254 default => 1,
255 },
256 kvm => {
257 optional => 1,
258 type => 'boolean',
259 description => "Enable/disable KVM hardware virtualization.",
260 default => 1,
261 },
262 tdf => {
263 optional => 1,
264 type => 'boolean',
265 description => "Enable/disable time drift fix.",
266 default => 1,
267 },
268 localtime => {
269 optional => 1,
270 type => 'boolean',
271 description => "Set the real time clock to local time. This is enabled by default if ostype indicates a Microsoft OS.",
272 },
273 freeze => {
274 optional => 1,
275 type => 'boolean',
276 description => "Freeze CPU at startup (use 'c' monitor command to start execution).",
277 },
278 vga => {
279 optional => 1,
280 type => 'string',
281 description => "Select VGA type. If you want to use high resolution modes (>= 1280x1024x16) then you should use option 'std' or 'vmware'. Default is 'std' for win7/w2k8, and 'cirrur' for other OS types",
282 enum => [qw(std cirrus vmware)],
283 },
284 watchdog => {
285 optional => 1,
286 type => 'string', format => 'pve-qm-watchdog',
287 typetext => '[[model=]i6300esb|ib700] [,[action=]reset|shutdown|poweroff|pause|debug|none]',
288 description => "Create a virtual hardware watchdog device. Once enabled (by a guest action), the watchdog must be periodically polled by an agent inside the guest or else the guest will be restarted (or execute the action specified)",
289 },
290 startdate => {
291 optional => 1,
292 type => 'string',
293 typetext => "(now | YYYY-MM-DD | YYYY-MM-DDTHH:MM:SS)",
294 description => "Set the initial date of the real time clock. Valid format for date are: 'now' or '2006-06-17T16:01:21' or '2006-06-17'.",
295 pattern => '(now|\d{4}-\d{1,2}-\d{1,2}(T\d{1,2}:\d{1,2}:\d{1,2})?)',
296 default => 'now',
297 },
298 args => {
299 optional => 1,
300 type => 'string',
301 description => <<EODESCR,
302 Note: this option is for experts only. It allows you to pass arbitrary arguments to kvm, for example:
303
304 args: -no-reboot -no-hpet
305 EODESCR
306 },
307 tablet => {
308 optional => 1,
309 type => 'boolean',
310 default => 1,
311 description => "Enable/disable the usb tablet device. This device is usually needed to allow absolute mouse positioning. Else the mouse runs out of sync with normal vnc clients. If you're running lots of console-only guests on one host, you may consider disabling this to save some context switches.",
312 },
313 migrate_speed => {
314 optional => 1,
315 type => 'integer',
316 description => "Set maximum speed (in MB/s) for migrations. Value 0 is no limit.",
317 minimum => 0,
318 default => 0,
319 },
320 migrate_downtime => {
321 optional => 1,
322 type => 'integer',
323 description => "Set maximum tolerated downtime (in seconds) for migrations.",
324 minimum => 0,
325 default => 1,
326 },
327 cdrom => {
328 optional => 1,
329 type => 'string', format => 'pve-qm-drive',
330 typetext => 'volume',
331 description => "This is an alias for option -ide2",
332 },
333 cpu => {
334 optional => 1,
335 description => "Emulated CPU type.",
336 type => 'string',
337 enum => [ qw(486 athlon pentium pentium2 pentium3 coreduo core2duo kvm32 kvm64 qemu32 qemu64 phenom host) ],
338 default => 'qemu64',
339 },
340 };
341
342 # what about other qemu settings ?
343 #cpu => 'string',
344 #machine => 'string',
345 #fda => 'file',
346 #fdb => 'file',
347 #mtdblock => 'file',
348 #sd => 'file',
349 #pflash => 'file',
350 #snapshot => 'bool',
351 #bootp => 'file',
352 ##tftp => 'dir',
353 ##smb => 'dir',
354 #kernel => 'file',
355 #append => 'string',
356 #initrd => 'file',
357 ##soundhw => 'string',
358
359 while (my ($k, $v) = each %$confdesc) {
360 PVE::JSONSchema::register_standard_option("pve-qm-$k", $v);
361 }
362
363 my $MAX_IDE_DISKS = 4;
364 my $MAX_SCSI_DISKS = 14;
365 my $MAX_VIRTIO_DISKS = 6;
366 my $MAX_USB_DEVICES = 5;
367 my $MAX_NETS = 6;
368 my $MAX_UNUSED_DISKS = 8;
369 my $MAX_HOSTPCI_DEVICES = 2;
370 my $MAX_SERIAL_PORTS = 4;
371 my $MAX_PARALLEL_PORTS = 3;
372
373 my $nic_model_list = ['rtl8139', 'ne2k_pci', 'e1000', 'pcnet', 'virtio',
374 'ne2k_isa', 'i82551', 'i82557b', 'i82559er'];
375 my $nic_model_list_txt = join (' ', sort @$nic_model_list);
376
377 # fixme:
378 my $netdesc = {
379 optional => 1,
380 type => 'string', format => 'pve-qm-net',
381 typetext => "MODEL=XX:XX:XX:XX:XX:XX [,bridge=<dev>][,rate=<mbps>]",
382 description => <<EODESCR,
383 Specify network devices.
384
385 MODEL is one of: $nic_model_list_txt
386
387 XX:XX:XX:XX:XX:XX should be an unique MAC address. This is
388 automatically generated if not specified.
389
390 The bridge parameter can be used to automatically add the interface to a bridge device. The Proxmox VE standard bridge is called 'vmbr0'.
391
392 Option 'rate' is used to limit traffic bandwidth from and to this interface. It is specified as floating point number, unit is 'Megabytes per second'.
393
394 If you specify no bridge, we create a kvm 'user' (NATed) network device, which provides DHCP and DNS services. The following addresses are used:
395
396 10.0.2.2 Gateway
397 10.0.2.3 DNS Server
398 10.0.2.4 SMB Server
399
400 The DHCP server assign addresses to the guest starting from 10.0.2.15.
401
402 EODESCR
403 };
404 PVE::JSONSchema::register_standard_option("pve-qm-net", $netdesc);
405
406 for (my $i = 0; $i < $MAX_NETS; $i++) {
407 $confdesc->{"net$i"} = $netdesc;
408 }
409
410 my $drivename_hash;
411
412 my $idedesc = {
413 optional => 1,
414 type => 'string', format => 'pve-qm-drive',
415 typetext => '[volume=]volume,] [,media=cdrom|disk] [,cyls=c,heads=h,secs=s[,trans=t]] [,snapshot=on|off] [,cache=none|writethrough|writeback] [,format=f] [,backup=yes|no] [,aio=native|threads]',
416 description => "Use volume as IDE hard disk or CD-ROM (n is 0 to 3).",
417 };
418 PVE::JSONSchema::register_standard_option("pve-qm-ide", $idedesc);
419
420 my $scsidesc = {
421 optional => 1,
422 type => 'string', format => 'pve-qm-drive',
423 typetext => '[volume=]volume,] [,media=cdrom|disk] [,cyls=c,heads=h,secs=s[,trans=t]] [,snapshot=on|off] [,cache=none|writethrough|writeback] [,format=f] [,backup=yes|no] [,aio=native|threads]',
424 description => "Use volume as SCSI hard disk or CD-ROM (n is 0 to 13).",
425 };
426 PVE::JSONSchema::register_standard_option("pve-qm-scsi", $scsidesc);
427
428 my $virtiodesc = {
429 optional => 1,
430 type => 'string', format => 'pve-qm-drive',
431 typetext => '[volume=]volume,] [,media=cdrom|disk] [,cyls=c,heads=h,secs=s[,trans=t]] [,snapshot=on|off] [,cache=none|writethrough|writeback] [,format=f] [,backup=yes|no] [,aio=native|threads]',
432 description => "Use volume as VIRTIO hard disk (n is 0 to 5).",
433 };
434 PVE::JSONSchema::register_standard_option("pve-qm-virtio", $virtiodesc);
435
436 my $usbdesc = {
437 optional => 1,
438 type => 'string', format => 'pve-qm-usb-device',
439 typetext => 'host=HOSTUSBDEVICE',
440 description => <<EODESCR,
441 Configure an USB device (n is 0 to 4). This can be used to
442 pass-through usb devices to the guest. HOSTUSBDEVICE syntax is:
443
444 'bus-port(.port)*' (decimal numbers) or
445 'vendor_id:product_id' (hexadeciaml numbers)
446
447 You can use the 'lsusb -t' command to list existing usb devices.
448
449 Note: This option allows direct access to host hardware. So it is no longer possible to migrate such machines - use with special care.
450
451 EODESCR
452 };
453 PVE::JSONSchema::register_standard_option("pve-qm-usb", $usbdesc);
454
455 my $hostpcidesc = {
456 optional => 1,
457 type => 'string', format => 'pve-qm-hostpci',
458 typetext => "HOSTPCIDEVICE",
459 description => <<EODESCR,
460 Map host pci devices. HOSTPCIDEVICE syntax is:
461
462 'bus:dev.func' (hexadecimal numbers)
463
464 You can us the 'lspci' command to list existing pci devices.
465
466 Note: This option allows direct access to host hardware. So it is no longer possible to migrate such machines - use with special care.
467
468 Experimental: user reported problems with this option.
469 EODESCR
470 };
471 PVE::JSONSchema::register_standard_option("pve-qm-hostpci", $hostpcidesc);
472
473 my $serialdesc = {
474 optional => 1,
475 type => 'string',
476 pattern => '/dev/ttyS\d+',
477 description => <<EODESCR,
478 Map host serial devices (n is 0 to 3).
479
480 Note: This option allows direct access to host hardware. So it is no longer possible to migrate such machines - use with special care.
481
482 Experimental: user reported problems with this option.
483 EODESCR
484 };
485
486 my $paralleldesc= {
487 optional => 1,
488 type => 'string',
489 pattern => '/dev/parport\d+',
490 description => <<EODESCR,
491 Map host parallel devices (n is 0 to 2).
492
493 Note: This option allows direct access to host hardware. So it is no longer possible to migrate such machines - use with special care.
494
495 Experimental: user reported problems with this option.
496 EODESCR
497 };
498
499 for (my $i = 0; $i < $MAX_PARALLEL_PORTS; $i++) {
500 $confdesc->{"parallel$i"} = $paralleldesc;
501 }
502
503 for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
504 $confdesc->{"serial$i"} = $serialdesc;
505 }
506
507 for (my $i = 0; $i < $MAX_HOSTPCI_DEVICES; $i++) {
508 $confdesc->{"hostpci$i"} = $hostpcidesc;
509 }
510
511 for (my $i = 0; $i < $MAX_IDE_DISKS; $i++) {
512 $drivename_hash->{"ide$i"} = 1;
513 $confdesc->{"ide$i"} = $idedesc;
514 }
515
516 for (my $i = 0; $i < $MAX_SCSI_DISKS; $i++) {
517 $drivename_hash->{"scsi$i"} = 1;
518 $confdesc->{"scsi$i"} = $scsidesc ;
519 }
520
521 for (my $i = 0; $i < $MAX_VIRTIO_DISKS; $i++) {
522 $drivename_hash->{"virtio$i"} = 1;
523 $confdesc->{"virtio$i"} = $virtiodesc;
524 }
525
526 for (my $i = 0; $i < $MAX_USB_DEVICES; $i++) {
527 $confdesc->{"usb$i"} = $usbdesc;
528 }
529
530 my $unuseddesc = {
531 optional => 1,
532 type => 'string', format => 'pve-volume-id',
533 description => "Reference to unused volumes.",
534 };
535
536 for (my $i = 0; $i < $MAX_UNUSED_DISKS; $i++) {
537 $confdesc->{"unused$i"} = $unuseddesc;
538 }
539
540 my $kvm_api_version = 0;
541
542 sub kvm_version {
543
544 return $kvm_api_version if $kvm_api_version;
545
546 my $fh = IO::File->new ("</dev/kvm") ||
547 return 0;
548
549 if (my $v = $fh->ioctl (KVM_GET_API_VERSION(), 0)) {
550 $kvm_api_version = $v;
551 }
552
553 $fh->close();
554
555 return $kvm_api_version;
556 }
557
558 my $kvm_user_version;
559
560 sub kvm_user_version {
561
562 return $kvm_user_version if $kvm_user_version;
563
564 $kvm_user_version = 'unknown';
565
566 my $tmp = `kvm -help 2>/dev/null`;
567
568 if ($tmp =~ m/^QEMU( PC)? emulator version (\d+\.\d+\.\d+) /) {
569 $kvm_user_version = $2;
570 }
571
572 return $kvm_user_version;
573
574 }
575
576 my $kernel_has_vhost_net = -c '/dev/vhost-net';
577
578 sub disknames {
579 # order is important - used to autoselect boot disk
580 return ((map { "ide$_" } (0 .. ($MAX_IDE_DISKS - 1))),
581 (map { "scsi$_" } (0 .. ($MAX_SCSI_DISKS - 1))),
582 (map { "virtio$_" } (0 .. ($MAX_VIRTIO_DISKS - 1))));
583 }
584
585 sub valid_drivename {
586 my $dev = shift;
587
588 return defined ($drivename_hash->{$dev});
589 }
590
591 sub option_exists {
592 my $key = shift;
593 return defined($confdesc->{$key});
594 }
595
596 sub nic_models {
597 return $nic_model_list;
598 }
599
600 sub os_list_description {
601
602 return {
603 other => 'Other',
604 wxp => 'Windows XP',
605 w2k => 'Windows 2000',
606 w2k3 =>, 'Windows 2003',
607 w2k8 => 'Windows 2008',
608 wvista => 'Windows Vista',
609 win7 => 'Windows 7',
610 l24 => 'Linux 2.4',
611 l26 => 'Linux 2.6',
612 };
613 }
614
615 # a clumsy way to split an argument string into an array,
616 # we simply pass it to the cli (exec call)
617 # fixme: use Text::ParseWords::shellwords() ?
618 sub split_args {
619 my ($str) = @_;
620
621 my $args = [];
622
623 return $args if !$str;
624
625 my $cmd = 'perl -e \'foreach my $a (@ARGV) { print "$a\n"; } \' -- ' . $str;
626
627 eval {
628 run_command ($cmd, outfunc => sub {
629 my $data = shift;
630 push @$args, $data;
631 });
632 };
633
634 my $err = $@;
635
636 die "unable to parse args: $str\n" if $err;
637
638 return $args;
639 }
640
641 sub disk_devive_info {
642 my $dev = shift;
643
644 die "unknown disk device format '$dev'" if $dev !~ m/^(ide|scsi|virtio)(\d+)$/;
645
646 my $bus = $1;
647 my $index = $2;
648 my $maxdev = 1024;
649
650 if ($bus eq 'ide') {
651 $maxdev = 2;
652 } elsif ($bus eq 'scsi') {
653 $maxdev = 7;
654 }
655
656 my $controller = int ($index / $maxdev);
657 my $unit = $index % $maxdev;
658
659
660 return { bus => $bus, desc => uc($bus) . " $controller:$unit",
661 controller => $controller, unit => $unit, index => $index };
662
663 }
664
665 sub qemu_drive_name {
666 my ($dev, $media) = @_;
667
668 my $info = disk_devive_info ($dev);
669 my $mediastr = '';
670
671 if (($info->{bus} eq 'ide') || ($info->{bus} eq 'scsi')) {
672 $mediastr = ($media eq 'cdrom') ? "-cd" : "-hd";
673 return sprintf("%s%i%s%i", $info->{bus}, $info->{controller},
674 $mediastr, $info->{unit});
675 } else {
676 return sprintf("%s%i", $info->{bus}, $info->{index});
677 }
678 }
679
680 my $cdrom_path;
681
682 sub get_cdrom_path {
683
684 return $cdrom_path if $cdrom_path;
685
686 return $cdrom_path = "/dev/cdrom" if -l "/dev/cdrom";
687 return $cdrom_path = "/dev/cdrom1" if -l "/dev/cdrom1";
688 return $cdrom_path = "/dev/cdrom2" if -l "/dev/cdrom2";
689 }
690
691 sub get_iso_path {
692 my ($storecfg, $vmid, $cdrom) = @_;
693
694 if ($cdrom eq 'cdrom') {
695 return get_cdrom_path();
696 } elsif ($cdrom eq 'none') {
697 return '';
698 } elsif ($cdrom =~ m|^/|) {
699 return $cdrom;
700 } else {
701 return PVE::Storage::path ($storecfg, $cdrom);
702 }
703 }
704
705 # try to convert old style file names to volume IDs
706 sub filename_to_volume_id {
707 my ($vmid, $file, $media) = @_;
708
709 if (!($file eq 'none' || $file eq 'cdrom' ||
710 $file =~ m|^/dev/.+| || $file =~ m/^([^:]+):(.+)$/)) {
711
712 return undef if $file =~ m|/|;
713
714 if ($media && $media eq 'cdrom') {
715 $file = "local:iso/$file";
716 } else {
717 $file = "local:$vmid/$file";
718 }
719 }
720
721 return $file;
722 }
723
724 sub verify_media_type {
725 my ($opt, $vtype, $media) = @_;
726
727 return if !$media;
728
729 my $etype;
730 if ($media eq 'disk') {
731 $etype = 'image';
732 } elsif ($media eq 'cdrom') {
733 $etype = 'iso';
734 } else {
735 die "internal error";
736 }
737
738 return if ($vtype eq $etype);
739
740 raise_param_exc({ $opt => "unexpected media type ($vtype != $etype)" });
741 }
742
743 sub cleanup_drive_path {
744 my ($opt, $storecfg, $drive) = @_;
745
746 # try to convert filesystem paths to volume IDs
747
748 if (($drive->{file} !~ m/^(cdrom|none)$/) &&
749 ($drive->{file} !~ m|^/dev/.+|) &&
750 ($drive->{file} !~ m/^([^:]+):(.+)$/) &&
751 ($drive->{file} !~ m/^\d+$/)) {
752 my ($vtype, $volid) = PVE::Storage::path_to_volume_id($storecfg, $drive->{file});
753 raise_param_exc({ $opt => "unable to associate path '$drive->{file}' to any storage"}) if !$vtype;
754 $drive->{media} = 'cdrom' if !$drive->{media} && $vtype eq 'iso';
755 verify_media_type($opt, $vtype, $drive->{media});
756 $drive->{file} = $volid;
757 }
758
759 $drive->{media} = 'cdrom' if !$drive->{media} && $drive->{file} =~ m/^(cdrom|none)$/;
760 }
761
762 sub create_conf_nolock {
763 my ($vmid, $settings) = @_;
764
765 my $filename = config_file ($vmid);
766
767 die "configuration file '$filename' already exists\n" if -f $filename;
768
769 my $defaults = load_defaults();
770
771 $settings->{name} = "vm$vmid" if !$settings->{name};
772 $settings->{memory} = $defaults->{memory} if !$settings->{memory};
773
774 my $data = '';
775 foreach my $opt (keys %$settings) {
776 next if !$confdesc->{$opt};
777
778 my $value = $settings->{$opt};
779 next if !$value;
780
781 $data .= "$opt: $value\n";
782 }
783
784 PVE::Tools::file_set_contents($filename, $data);
785 }
786
787 # ideX = [volume=]volume-id[,media=d][,cyls=c,heads=h,secs=s[,trans=t]]
788 # [,snapshot=on|off][,cache=on|off][,format=f][,backup=yes|no]
789 # [,aio=native|threads]
790
791 sub parse_drive {
792 my ($key, $data) = @_;
793
794 my $res = {};
795
796 # $key may be undefined - used to verify JSON parameters
797 if (!defined($key)) {
798 $res->{interface} = 'unknown'; # should not harm when used to verify parameters
799 $res->{index} = 0;
800 } elsif ($key =~ m/^([^\d]+)(\d+)$/) {
801 $res->{interface} = $1;
802 $res->{index} = $2;
803 } else {
804 return undef;
805 }
806
807 foreach my $p (split (/,/, $data)) {
808 next if $p =~ m/^\s*$/;
809
810 if ($p =~ m/^(file|volume|cyls|heads|secs|trans|media|snapshot|cache|format|rerror|werror|backup|aio)=(.+)$/) {
811 my ($k, $v) = ($1, $2);
812
813 $k = 'file' if $k eq 'volume';
814
815 return undef if defined $res->{$k};
816
817 $res->{$k} = $v;
818 } else {
819 if (!$res->{file} && $p !~ m/=/) {
820 $res->{file} = $p;
821 } else {
822 return undef;
823 }
824 }
825 }
826
827 return undef if !$res->{file};
828
829 return undef if $res->{cache} &&
830 $res->{cache} !~ m/^(off|none|writethrough|writeback)$/;
831 return undef if $res->{snapshot} && $res->{snapshot} !~ m/^(on|off)$/;
832 return undef if $res->{cyls} && $res->{cyls} !~ m/^\d+$/;
833 return undef if $res->{heads} && $res->{heads} !~ m/^\d+$/;
834 return undef if $res->{secs} && $res->{secs} !~ m/^\d+$/;
835 return undef if $res->{media} && $res->{media} !~ m/^(disk|cdrom)$/;
836 return undef if $res->{trans} && $res->{trans} !~ m/^(none|lba|auto)$/;
837 return undef if $res->{format} && $res->{format} !~ m/^(raw|cow|qcow|qcow2|vmdk|cloop)$/;
838 return undef if $res->{rerror} && $res->{rerror} !~ m/^(ignore|report|stop)$/;
839 return undef if $res->{werror} && $res->{werror} !~ m/^(enospc|ignore|report|stop)$/;
840 return undef if $res->{backup} && $res->{backup} !~ m/^(yes|no)$/;
841 return undef if $res->{aio} && $res->{aio} !~ m/^(native|threads)$/;
842
843 if ($res->{media} && ($res->{media} eq 'cdrom')) {
844 return undef if $res->{snapshot} || $res->{trans} || $res->{format};
845 return undef if $res->{heads} || $res->{secs} || $res->{cyls};
846 return undef if $res->{interface} eq 'virtio';
847 }
848
849 # rerror does not work with scsi drives
850 if ($res->{rerror}) {
851 return undef if $res->{interface} eq 'scsi';
852 }
853
854 return $res;
855 }
856
857 my @qemu_drive_options = qw(heads secs cyls trans media format cache snapshot rerror werror aio);
858
859 sub print_drive {
860 my ($vmid, $drive) = @_;
861
862 my $opts = '';
863 foreach my $o (@qemu_drive_options, 'backup') {
864 $opts .= ",$o=$drive->{$o}" if $drive->{$o};
865 }
866
867 return "$drive->{file}$opts";
868 }
869
870 sub print_drivedevice_full {
871 my ($storecfg, $vmid, $drive) = @_;
872
873 my $device = '';
874 my $maxdev = 0;
875
876 if ($drive->{interface} eq 'virtio') {
877
878 $device="virtio-blk-pci,drive=drive-$drive->{interface}$drive->{index},id=device-$drive->{interface}$drive->{index}";
879 }
880
881 elsif ($drive->{interface} eq 'scsi') {
882
883 $maxdev = 7;
884 my $controller = int ($drive->{index} / $maxdev);
885 my $unit = $drive->{index} % $maxdev;
886
887 $device="scsi-disk,bus=scsi$controller.0,scsi-id=$unit,drive=drive-$drive->{interface}$drive->{index},id=device-$drive->{interface}$drive->{index}";
888 }
889
890 elsif ($drive->{interface} eq 'ide'){
891
892 $maxdev = 2;
893 my $controller = int ($drive->{index} / $maxdev);
894 my $unit = $drive->{index} % $maxdev;
895
896 $device="ide-drive,bus=ide.$controller,unit=$unit,drive=drive-$drive->{interface}$drive->{index},id=device-$drive->{interface}$drive->{index}";
897 }
898
899 if ($drive->{interface} eq 'usb'){
900 # -device ide-drive,bus=ide.1,unit=0,drive=drive-ide0-1-0,id=ide0-1-0
901 }
902
903 return $device;
904 }
905
906 sub print_drive_full {
907 my ($storecfg, $vmid, $drive) = @_;
908
909 my $opts = '';
910 foreach my $o (@qemu_drive_options) {
911 $opts .= ",$o=$drive->{$o}" if $drive->{$o};
912 }
913
914 # use linux-aio by default (qemu default is threads)
915 $opts .= ",aio=native" if !$drive->{aio};
916
917 my $path;
918 my $volid = $drive->{file};
919 if (drive_is_cdrom ($drive)) {
920 $path = get_iso_path ($storecfg, $vmid, $volid);
921 } else {
922 if ($volid =~ m|^/|) {
923 $path = $volid;
924 } else {
925 $path = PVE::Storage::path ($storecfg, $volid);
926 }
927 }
928
929 my $pathinfo = $path ? "file=$path," : '';
930
931 return "${pathinfo}if=none,id=drive-$drive->{interface}$drive->{index}$opts";
932 }
933
934
935 sub drive_is_cdrom {
936 my ($drive) = @_;
937
938 return $drive && $drive->{media} && ($drive->{media} eq 'cdrom');
939
940 }
941
942 sub parse_hostpci {
943 my ($value) = @_;
944
945 return undef if !$value;
946
947 my $res = {};
948
949 if ($value =~ m/^[a-f0-9]{2}:[a-f0-9]{2}\.[a-f0-9]$/) {
950 $res->{pciid} = $value;
951 } else {
952 return undef;
953 }
954
955 return $res;
956 }
957
958 # netX: e1000=XX:XX:XX:XX:XX:XX,bridge=vmbr0,rate=<mbps>
959 sub parse_net {
960 my ($data) = @_;
961
962 my $res = {};
963
964 foreach my $kvp (split (/,/, $data)) {
965
966 if ($kvp =~ m/^(ne2k_pci|e1000|rtl8139|pcnet|virtio|ne2k_isa|i82551|i82557b|i82559er)(=([0-9a-f]{2}(:[0-9a-f]{2}){5}))?$/i) {
967 my $model = lc ($1);
968 my $mac = uc($3) || random_ether_addr ();
969 $res->{model} = $model;
970 $res->{macaddr} = $mac;
971 } elsif ($kvp =~ m/^bridge=(\S+)$/) {
972 $res->{bridge} = $1;
973 } elsif ($kvp =~ m/^rate=(\d+(\.\d+)?)$/) {
974 $res->{rate} = $1;
975 } else {
976 return undef;
977 }
978
979 }
980
981 return undef if !$res->{model};
982
983 return $res;
984 }
985
986 sub print_net {
987 my $net = shift;
988
989 my $res = "$net->{model}";
990 $res .= "=$net->{macaddr}" if $net->{macaddr};
991 $res .= ",bridge=$net->{bridge}" if $net->{bridge};
992 $res .= ",rate=$net->{rate}" if $net->{rate};
993
994 return $res;
995 }
996
997 sub add_random_macs {
998 my ($settings) = @_;
999
1000 foreach my $opt (keys %$settings) {
1001 next if $opt !~ m/^net(\d+)$/;
1002 my $net = parse_net($settings->{$opt});
1003 next if !$net;
1004 $settings->{$opt} = print_net($net);
1005 }
1006 }
1007
1008 sub add_unused_volume {
1009 my ($config, $res, $volid) = @_;
1010
1011 my $key;
1012 for (my $ind = $MAX_UNUSED_DISKS - 1; $ind >= 0; $ind--) {
1013 my $test = "unused$ind";
1014 if (my $vid = $config->{$test}) {
1015 return if $vid eq $volid; # do not add duplicates
1016 } else {
1017 $key = $test;
1018 }
1019 }
1020
1021 die "To many unused volume - please delete them first.\n" if !$key;
1022
1023 $res->{$key} = $volid;
1024 }
1025
1026 # fixme: remove all thos $noerr parameters?
1027
1028 PVE::JSONSchema::register_format('pve-qm-bootdisk', \&verify_bootdisk);
1029 sub verify_bootdisk {
1030 my ($value, $noerr) = @_;
1031
1032 return $value if valid_drivename($value);
1033
1034 return undef if $noerr;
1035
1036 die "invalid boot disk '$value'\n";
1037 }
1038
1039 PVE::JSONSchema::register_format('pve-qm-net', \&verify_net);
1040 sub verify_net {
1041 my ($value, $noerr) = @_;
1042
1043 return $value if parse_net($value);
1044
1045 return undef if $noerr;
1046
1047 die "unable to parse network options\n";
1048 }
1049
1050 PVE::JSONSchema::register_format('pve-qm-drive', \&verify_drive);
1051 sub verify_drive {
1052 my ($value, $noerr) = @_;
1053
1054 return $value if parse_drive (undef, $value);
1055
1056 return undef if $noerr;
1057
1058 die "unable to parse drive options\n";
1059 }
1060
1061 PVE::JSONSchema::register_format('pve-qm-hostpci', \&verify_hostpci);
1062 sub verify_hostpci {
1063 my ($value, $noerr) = @_;
1064
1065 return $value if parse_hostpci($value);
1066
1067 return undef if $noerr;
1068
1069 die "unable to parse pci id\n";
1070 }
1071
1072 PVE::JSONSchema::register_format('pve-qm-watchdog', \&verify_watchdog);
1073 sub verify_watchdog {
1074 my ($value, $noerr) = @_;
1075
1076 return $value if parse_watchdog($value);
1077
1078 return undef if $noerr;
1079
1080 die "unable to parse watchdog options\n";
1081 }
1082
1083 sub parse_watchdog {
1084 my ($value) = @_;
1085
1086 return undef if !$value;
1087
1088 my $res = {};
1089
1090 foreach my $p (split (/,/, $value)) {
1091 next if $p =~ m/^\s*$/;
1092
1093 if ($p =~ m/^(model=)?(i6300esb|ib700)$/) {
1094 $res->{model} = $2;
1095 } elsif ($p =~ m/^(action=)?(reset|shutdown|poweroff|pause|debug|none)$/) {
1096 $res->{action} = $2;
1097 } else {
1098 return undef;
1099 }
1100 }
1101
1102 return $res;
1103 }
1104
1105 sub parse_usb_device {
1106 my ($value) = @_;
1107
1108 return undef if !$value;
1109
1110 my @dl = split (/,/, $value);
1111 my $found;
1112
1113 my $res = {};
1114 foreach my $v (@dl) {
1115 if ($v =~ m/^host=([0-9A-Fa-f]{4}):([0-9A-Fa-f]{4})$/) {
1116 $found = 1;
1117 $res->{vendorid} = $1;
1118 $res->{productid} = $2;
1119 } elsif ($v =~ m/^host=(\d+)\-(\d+(\.\d+)*)$/) {
1120 $found = 1;
1121 $res->{hostbus} = $1;
1122 $res->{hostport} = $2;
1123 } else {
1124 return undef;
1125 }
1126 }
1127 return undef if !$found;
1128
1129 return $res;
1130 }
1131
1132 PVE::JSONSchema::register_format('pve-qm-usb-device', \&verify_usb_device);
1133 sub verify_usb_device {
1134 my ($value, $noerr) = @_;
1135
1136 return $value if parse_usb_device($value);
1137
1138 return undef if $noerr;
1139
1140 die "unable to parse usb device\n";
1141 }
1142
1143 # add JSON properties for create and set function
1144 sub json_config_properties {
1145 my $prop = shift;
1146
1147 foreach my $opt (keys %$confdesc) {
1148 $prop->{$opt} = $confdesc->{$opt};
1149 }
1150
1151 return $prop;
1152 }
1153
1154 sub check_type {
1155 my ($key, $value) = @_;
1156
1157 die "unknown setting '$key'\n" if !$confdesc->{$key};
1158
1159 my $type = $confdesc->{$key}->{type};
1160
1161 if (!defined ($value)) {
1162 die "got undefined value\n";
1163 }
1164
1165 if ($value =~ m/[\n\r]/) {
1166 die "property contains a line feed\n";
1167 }
1168
1169 if ($type eq 'boolean') {
1170 return 1 if ($value eq '1') || ($value =~ m/^(on|yes|true)$/i);
1171 return 0 if ($value eq '0') || ($value =~ m/^(off|no|false)$/i);
1172 die "type check ('boolean') failed - got '$value'\n";
1173 } elsif ($type eq 'integer') {
1174 return int($1) if $value =~ m/^(\d+)$/;
1175 die "type check ('integer') failed - got '$value'\n";
1176 } elsif ($type eq 'string') {
1177 if (my $fmt = $confdesc->{$key}->{format}) {
1178 if ($fmt eq 'pve-qm-drive') {
1179 # special case - we need to pass $key to parse_drive()
1180 my $drive = parse_drive ($key, $value);
1181 return $value if $drive;
1182 die "unable to parse drive options\n";
1183 }
1184 PVE::JSONSchema::check_format($fmt, $value);
1185 return $value;
1186 }
1187 $value =~ s/^\"(.*)\"$/$1/;
1188 return $value;
1189 } else {
1190 die "internal error"
1191 }
1192 }
1193
1194 sub lock_config {
1195 my ($vmid, $code, @param) = @_;
1196
1197 my $filename = config_file_lock ($vmid);
1198
1199 lock_file($filename, 10, $code, @param);
1200
1201 die $@ if $@;
1202 }
1203
1204 sub cfs_config_path {
1205 my ($vmid, $node) = @_;
1206
1207 $node = $nodename if !$node;
1208 return "nodes/$node/qemu-server/$vmid.conf";
1209 }
1210
1211 sub check_iommu_support{
1212 #fixme : need to check IOMMU support
1213 #http://www.linux-kvm.org/page/How_to_assign_devices_with_VT-d_in_KVM
1214
1215 my $iommu=1;
1216 return $iommu;
1217
1218 }
1219
1220 sub config_file {
1221 my ($vmid, $node) = @_;
1222
1223 my $cfspath = cfs_config_path($vmid, $node);
1224 return "/etc/pve/$cfspath";
1225 }
1226
1227 sub config_file_lock {
1228 my ($vmid) = @_;
1229
1230 return "$lock_dir/lock-$vmid.conf";
1231 }
1232
1233 sub touch_config {
1234 my ($vmid) = @_;
1235
1236 my $conf = config_file ($vmid);
1237 utime undef, undef, $conf;
1238 }
1239
1240 sub create_disks {
1241 my ($storecfg, $vmid, $settings) = @_;
1242
1243 my $vollist = [];
1244
1245 eval {
1246 foreach_drive($settings, sub {
1247 my ($ds, $disk) = @_;
1248
1249 return if drive_is_cdrom ($disk);
1250
1251 my $file = $disk->{file};
1252
1253 if ($file =~ m/^(([^:\s]+):)?(\d+(\.\d+)?)$/) {
1254 my $storeid = $2 || 'local';
1255 my $size = $3;
1256 my $defformat = PVE::Storage::storage_default_format ($storecfg, $storeid);
1257 my $fmt = $disk->{format} || $defformat;
1258 syslog ('info', "VM $vmid creating new disk - size is $size GB");
1259
1260 my $volid = PVE::Storage::vdisk_alloc ($storecfg, $storeid, $vmid,
1261 $fmt, undef, $size*1024*1024);
1262
1263 $disk->{file} = $volid;
1264 delete ($disk->{format}); # no longer needed
1265 push @$vollist, $volid;
1266 $settings->{$ds} = PVE::QemuServer::print_drive ($vmid, $disk);
1267 } else {
1268 my $path;
1269 if ($disk->{file} =~ m|^/dev/.+|) {
1270 $path = $disk->{file};
1271 } else {
1272 $path = PVE::Storage::path ($storecfg, $disk->{file});
1273 }
1274 if (!(-f $path || -b $path)) {
1275 die "image '$path' does not exists\n";
1276 }
1277 }
1278 });
1279 };
1280
1281 my $err = $@;
1282
1283 if ($err) {
1284 syslog ('err', "VM $vmid creating disks failed");
1285 foreach my $volid (@$vollist) {
1286 eval { PVE::Storage::vdisk_free ($storecfg, $volid); };
1287 warn $@ if $@;
1288 }
1289 die $err;
1290 }
1291
1292 return $vollist;
1293 }
1294
1295 sub unlink_image {
1296 my ($storecfg, $vmid, $volid) = @_;
1297
1298 die "reject to unlink absolute path '$volid'"
1299 if $volid =~ m|^/|;
1300
1301 my ($path, $owner) = PVE::Storage::path ($storecfg, $volid);
1302
1303 die "reject to unlink '$volid' - not owned by this VM"
1304 if !$owner || ($owner != $vmid);
1305
1306 syslog ('info', "VM $vmid deleting volume '$volid'");
1307
1308 PVE::Storage::vdisk_free ($storecfg, $volid);
1309
1310 touch_config ($vmid);
1311 }
1312
1313 sub destroy_vm {
1314 my ($storecfg, $vmid) = @_;
1315
1316 my $conffile = config_file ($vmid);
1317
1318 my $conf = load_config ($vmid);
1319
1320 check_lock ($conf);
1321
1322 # only remove disks owned by this VM
1323 foreach_drive($conf, sub {
1324 my ($ds, $drive) = @_;
1325
1326 return if drive_is_cdrom ($drive);
1327
1328 my $volid = $drive->{file};
1329 next if !$volid || $volid =~ m|^/|;
1330
1331 my ($path, $owner) = PVE::Storage::path ($storecfg, $volid);
1332 next if !$path || !$owner || ($owner != $vmid);
1333
1334 PVE::Storage::vdisk_free ($storecfg, $volid);
1335 });
1336
1337 unlink $conffile;
1338
1339 # also remove unused disk
1340 eval {
1341 my $dl = PVE::Storage::vdisk_list ($storecfg, undef, $vmid);
1342
1343 eval {
1344 PVE::Storage::foreach_volid ($dl, sub {
1345 my ($volid, $sid, $volname, $d) = @_;
1346 PVE::Storage::vdisk_free ($storecfg, $volid);
1347 });
1348 };
1349 warn $@ if $@;
1350
1351 };
1352 warn $@ if $@;
1353 }
1354
1355 # fixme: remove?
1356 sub load_diskinfo_old {
1357 my ($storecfg, $vmid, $conf) = @_;
1358
1359 my $info = {};
1360 my $res = {};
1361 my $vollist;
1362
1363 foreach_drive($conf, sub {
1364 my ($ds, $di) = @_;
1365
1366 $res->{$ds} = $di;
1367
1368 return if drive_is_cdrom ($di);
1369
1370 if ($di->{file} =~ m|^/dev/.+|) {
1371 $info->{$di->{file}}->{size} = PVE::Storage::file_size_info ($di->{file});
1372 } else {
1373 push @$vollist, $di->{file};
1374 }
1375 });
1376
1377 eval {
1378 my $dl = PVE::Storage::vdisk_list ($storecfg, undef, $vmid, $vollist);
1379
1380 PVE::Storage::foreach_volid ($dl, sub {
1381 my ($volid, $sid, $volname, $d) = @_;
1382 $info->{$volid} = $d;
1383 });
1384 };
1385 warn $@ if $@;
1386
1387 foreach my $ds (keys %$res) {
1388 my $di = $res->{$ds};
1389
1390 $res->{$ds}->{disksize} = $info->{$di->{file}} ?
1391 $info->{$di->{file}}->{size} / (1024*1024) : 0;
1392 }
1393
1394 return $res;
1395 }
1396
1397 sub load_config {
1398 my ($vmid) = @_;
1399
1400 my $cfspath = cfs_config_path($vmid);
1401
1402 my $conf = PVE::Cluster::cfs_read_file($cfspath);
1403
1404 die "no such VM ('$vmid')\n" if !defined($conf);
1405
1406 return $conf;
1407 }
1408
1409 sub parse_vm_config {
1410 my ($filename, $raw) = @_;
1411
1412 return undef if !defined($raw);
1413
1414 my $res = {
1415 digest => Digest::SHA1::sha1_hex($raw),
1416 };
1417
1418 $filename =~ m|/qemu-server/(\d+)\.conf$|
1419 || die "got strange filename '$filename'";
1420
1421 my $vmid = $1;
1422
1423 while ($raw && $raw =~ s/^(.*?)(\n|$)//) {
1424 my $line = $1;
1425
1426 next if $line =~ m/^\#/;
1427
1428 next if $line =~ m/^\s*$/;
1429
1430 if ($line =~ m/^(description):\s*(.*\S)\s*$/) {
1431 my $key = $1;
1432 my $value = PVE::Tools::decode_text($2);
1433 $res->{$key} = $value;
1434 } elsif ($line =~ m/^(args):\s*(.*\S)\s*$/) {
1435 my $key = $1;
1436 my $value = $2;
1437 $res->{$key} = $value;
1438 } elsif ($line =~ m/^([a-z][a-z_]*\d*):\s*(\S+)\s*$/) {
1439 my $key = $1;
1440 my $value = $2;
1441 eval { $value = check_type($key, $value); };
1442 if ($@) {
1443 warn "vm $vmid - unable to parse value of '$key' - $@";
1444 } else {
1445 my $fmt = $confdesc->{$key}->{format};
1446 if ($fmt && $fmt eq 'pve-qm-drive') {
1447 my $v = parse_drive($key, $value);
1448 if (my $volid = filename_to_volume_id($vmid, $v->{file}, $v->{media})) {
1449 $v->{file} = $volid;
1450 $value = print_drive ($vmid, $v);
1451 } else {
1452 warn "vm $vmid - unable to parse value of '$key'\n";
1453 next;
1454 }
1455 }
1456
1457 if ($key eq 'cdrom') {
1458 $res->{ide2} = $value;
1459 } else {
1460 $res->{$key} = $value;
1461 }
1462 }
1463 }
1464 }
1465
1466 # convert old smp to sockets
1467 if ($res->{smp} && !$res->{sockets}) {
1468 $res->{sockets} = $res->{smp};
1469 }
1470 delete $res->{smp};
1471
1472 return $res;
1473 }
1474
1475 sub change_config {
1476 my ($vmid, $settings, $unset, $skiplock) = @_;
1477
1478 lock_config ($vmid, &change_config_nolock, $settings, $unset, $skiplock);
1479 }
1480
1481 sub change_config_nolock {
1482 my ($vmid, $settings, $unset, $skiplock) = @_;
1483
1484 my $res = {};
1485
1486 $unset->{ide2} = $unset->{cdrom} if $unset->{cdrom};
1487
1488 check_lock($settings) if !$skiplock;
1489
1490 # we do not use 'smp' any longer
1491 if ($settings->{sockets}) {
1492 $unset->{smp} = 1;
1493 } elsif ($settings->{smp}) {
1494 $settings->{sockets} = $settings->{smp};
1495 $unset->{smp} = 1;
1496 }
1497
1498 my $new_volids = {};
1499
1500 foreach my $key (keys %$settings) {
1501 next if $key eq 'digest';
1502 my $value = $settings->{$key};
1503 if ($key eq 'description') {
1504 $value = PVE::Tools::encode_text($value);
1505 }
1506 eval { $value = check_type($key, $value); };
1507 die "unable to parse value of '$key' - $@" if $@;
1508 if ($key eq 'cdrom') {
1509 $res->{ide2} = $value;
1510 } else {
1511 $res->{$key} = $value;
1512 }
1513 if (valid_drivename($key)) {
1514 my $drive = PVE::QemuServer::parse_drive($key, $value);
1515 $new_volids->{$drive->{file}} = 1 if $drive && $drive->{file};
1516 }
1517 }
1518
1519 my $filename = config_file($vmid);
1520 my $tmpfn = "$filename.$$.tmp";
1521
1522 my $fh = new IO::File ($filename, "r") ||
1523 die "unable to read config for VM $vmid\n";
1524
1525 my $werror = "unable to write config for VM $vmid\n";
1526
1527 my $out = new IO::File ($tmpfn, "w") || die $werror;
1528
1529 eval {
1530
1531 my $done;
1532
1533 while (my $line = <$fh>) {
1534
1535 if (($line =~ m/^\#/) || ($line =~ m/^\s*$/)) {
1536 die $werror unless print $out $line;
1537 next;
1538 }
1539
1540 if ($line =~ m/^([a-z][a-z_]*\d*):\s*(.*\S)\s*$/) {
1541 my $key = $1;
1542 my $value = $2;
1543
1544 # remove 'unusedX' settings if we re-add a volume
1545 next if $key =~ m/^unused/ && $new_volids->{$value};
1546
1547 # convert 'smp' to 'sockets'
1548 $key = 'sockets' if $key eq 'smp';
1549
1550 next if $done->{$key};
1551 $done->{$key} = 1;
1552
1553 if (defined ($res->{$key})) {
1554 $value = $res->{$key};
1555 delete $res->{$key};
1556 }
1557 if (!defined ($unset->{$key})) {
1558 die $werror unless print $out "$key: $value\n";
1559 }
1560
1561 next;
1562 }
1563
1564 die "unable to parse config file: $line\n";
1565 }
1566
1567 foreach my $key (keys %$res) {
1568
1569 if (!defined ($unset->{$key})) {
1570 die $werror unless print $out "$key: $res->{$key}\n";
1571 }
1572 }
1573 };
1574
1575 my $err = $@;
1576
1577 $fh->close();
1578
1579 if ($err) {
1580 $out->close();
1581 unlink $tmpfn;
1582 die $err;
1583 }
1584
1585 if (!$out->close()) {
1586 $err = "close failed - $!\n";
1587 unlink $tmpfn;
1588 die $err;
1589 }
1590
1591 if (!rename($tmpfn, $filename)) {
1592 $err = "rename failed - $!\n";
1593 unlink $tmpfn;
1594 die $err;
1595 }
1596 }
1597
1598 sub load_defaults {
1599
1600 my $res = {};
1601
1602 # we use static defaults from our JSON schema configuration
1603 foreach my $key (keys %$confdesc) {
1604 if (defined(my $default = $confdesc->{$key}->{default})) {
1605 $res->{$key} = $default;
1606 }
1607 }
1608
1609 my $conf = PVE::Cluster::cfs_read_file('datacenter.cfg');
1610 $res->{keyboard} = $conf->{keyboard} if $conf->{keyboard};
1611
1612 return $res;
1613 }
1614
1615 sub config_list {
1616 my $vmlist = PVE::Cluster::get_vmlist();
1617 my $res = {};
1618 return $res if !$vmlist || !$vmlist->{ids};
1619 my $ids = $vmlist->{ids};
1620
1621 foreach my $vmid (keys %$ids) {
1622 my $d = $ids->{$vmid};
1623 next if !$d->{node} || $d->{node} ne $nodename;
1624 $res->{$vmid}->{exists} = 1;
1625 }
1626 return $res;
1627 }
1628
1629 # test if VM uses local resources (to prevent migration)
1630 sub check_local_resources {
1631 my ($conf, $noerr) = @_;
1632
1633 my $loc_res = 0;
1634 # fixme:
1635 die "implement me";
1636 $loc_res = 1 if $conf->{hostusb}; # old syntax
1637 $loc_res = 1 if $conf->{hostpci}; # old syntax
1638
1639 foreach my $k (keys %$conf) {
1640 $loc_res = 1 if $k =~ m/^(usb|hostpci|serial|parallel)\d+$/;
1641 }
1642
1643 die "VM uses local resources\n" if $loc_res && !$noerr;
1644
1645 return $loc_res;
1646 }
1647
1648 sub check_lock {
1649 my ($conf) = @_;
1650
1651 die "VM is locked ($conf->{lock})\n" if $conf->{lock};
1652 }
1653
1654 sub check_cmdline {
1655 my ($pidfile, $pid) = @_;
1656
1657 my $fh = IO::File->new ("/proc/$pid/cmdline", "r");
1658 if (defined ($fh)) {
1659 my $line = <$fh>;
1660 $fh->close;
1661 return undef if !$line;
1662 my @param = split (/\0/, $line);
1663
1664 my $cmd = $param[0];
1665 return if !$cmd || ($cmd !~ m|kvm$|);
1666
1667 for (my $i = 0; $i < scalar (@param); $i++) {
1668 my $p = $param[$i];
1669 next if !$p;
1670 if (($p eq '-pidfile') || ($p eq '--pidfile')) {
1671 my $p = $param[$i+1];
1672 return 1 if $p && ($p eq $pidfile);
1673 return undef;
1674 }
1675 }
1676 }
1677 return undef;
1678 }
1679
1680 sub check_running {
1681 my ($vmid) = @_;
1682
1683 my $filename = config_file ($vmid);
1684
1685 die "unable to find configuration file for VM $vmid - no such machine\n"
1686 if ! -f $filename;
1687
1688 my $pidfile = pidfile_name ($vmid);
1689
1690 if (my $fd = IO::File->new ("<$pidfile")) {
1691 my $st = stat ($fd);
1692 my $line = <$fd>;
1693 close ($fd);
1694
1695 my $mtime = $st->mtime;
1696 if ($mtime > time()) {
1697 warn "file '$filename' modified in future\n";
1698 }
1699
1700 if ($line =~ m/^(\d+)$/) {
1701 my $pid = $1;
1702
1703 return $pid if ((-d "/proc/$pid") && check_cmdline ($pidfile, $pid));
1704 }
1705 }
1706
1707 return undef;
1708 }
1709
1710 sub vzlist {
1711
1712 my $vzlist = config_list();
1713
1714 my $fd = IO::Dir->new ($var_run_tmpdir) || return $vzlist;
1715
1716 while (defined(my $de = $fd->read)) {
1717 next if $de !~ m/^(\d+)\.pid$/;
1718 my $vmid = $1;
1719 next if !defined ($vzlist->{$vmid});
1720 if (my $pid = check_running ($vmid)) {
1721 $vzlist->{$vmid}->{pid} = $pid;
1722 }
1723 }
1724
1725 return $vzlist;
1726 }
1727
1728 my $storage_timeout_hash = {};
1729
1730 sub disksize {
1731 my ($storecfg, $conf) = @_;
1732
1733 my $bootdisk = $conf->{bootdisk};
1734 return undef if !$bootdisk;
1735 return undef if !valid_drivename($bootdisk);
1736
1737 return undef if !$conf->{$bootdisk};
1738
1739 my $drive = parse_drive($bootdisk, $conf->{$bootdisk});
1740 return undef if !defined($drive);
1741
1742 return undef if drive_is_cdrom($drive);
1743
1744 my $volid = $drive->{file};
1745 return undef if !$volid;
1746
1747 my $path;
1748 my $storeid;
1749 my $timeoutid;
1750
1751 if ($volid =~ m|^/|) {
1752 $path = $timeoutid = $volid;
1753 } else {
1754 $storeid = $timeoutid = PVE::Storage::parse_volume_id ($volid);
1755 $path = PVE::Storage::path($storecfg, $volid);
1756 }
1757
1758 my $last_timeout = $storage_timeout_hash->{$timeoutid};
1759 if ($last_timeout) {
1760 if ((time() - $last_timeout) < 30) {
1761 # skip storage with errors
1762 return undef ;
1763 }
1764 delete $storage_timeout_hash->{$timeoutid};
1765 }
1766
1767 my ($size, $format, $used);
1768
1769 ($size, $format, $used) = PVE::Storage::file_size_info($path, 1);
1770
1771 if (!defined($format)) {
1772 # got timeout
1773 $storage_timeout_hash->{$timeoutid} = time();
1774 return undef;
1775 }
1776
1777 return wantarray ? ($size, $used) : $size;
1778 }
1779
1780 my $last_proc_pid_stat;
1781
1782 sub vmstatus {
1783 my ($opt_vmid) = @_;
1784
1785 my $res = {};
1786
1787 my $storecfg = PVE::Storage::config();
1788
1789 my $list = vzlist();
1790 my ($uptime) = PVE::ProcFSTools::read_proc_uptime();
1791
1792 foreach my $vmid (keys %$list) {
1793 next if $opt_vmid && ($vmid ne $opt_vmid);
1794
1795 my $cfspath = cfs_config_path($vmid);
1796 my $conf = PVE::Cluster::cfs_read_file($cfspath) || {};
1797
1798 my $d = {};
1799 $d->{pid} = $list->{$vmid}->{pid};
1800
1801 # fixme: better status?
1802 $d->{status} = $list->{$vmid}->{pid} ? 'running' : 'stopped';
1803
1804 my ($size, $used) = disksize($storecfg, $conf);
1805 if (defined($size) && defined($used)) {
1806 $d->{disk} = $used;
1807 $d->{maxdisk} = $size;
1808 } else {
1809 $d->{disk} = 0;
1810 $d->{maxdisk} = 0;
1811 }
1812
1813 $d->{cpus} = ($conf->{sockets} || 1) * ($conf->{cores} || 1);
1814 $d->{name} = $conf->{name} || "VM $vmid";
1815 $d->{maxmem} = $conf->{memory} ? $conf->{memory}*(1024*1024) : 0;
1816
1817
1818 $d->{uptime} = 0;
1819 $d->{cpu} = 0;
1820 $d->{relcpu} = 0;
1821 $d->{mem} = 0;
1822
1823 $d->{netout} = 0;
1824 $d->{netin} = 0;
1825
1826 $d->{diskread} = 0;
1827 $d->{diskwrite} = 0;
1828
1829 $res->{$vmid} = $d;
1830 }
1831
1832 my $netdev = PVE::ProcFSTools::read_proc_net_dev();
1833 foreach my $dev (keys %$netdev) {
1834 next if $dev !~ m/^tap([1-9]\d*)i/;
1835 my $vmid = $1;
1836 my $d = $res->{$vmid};
1837 next if !$d;
1838
1839 $d->{netout} += $netdev->{$dev}->{receive};
1840 $d->{netin} += $netdev->{$dev}->{transmit};
1841 }
1842
1843 my $cpuinfo = PVE::ProcFSTools::read_cpuinfo();
1844 my $cpucount = $cpuinfo->{cpus} || 1;
1845 my $ctime = gettimeofday;
1846
1847 foreach my $vmid (keys %$list) {
1848
1849 my $d = $res->{$vmid};
1850 my $pid = $d->{pid};
1851 next if !$pid;
1852
1853 if (my $fh = IO::File->new("/proc/$pid/io", "r")) {
1854 my $data = {};
1855 while (defined (my $line = <$fh>)) {
1856 if ($line =~ m/^([rw]char):\s+(\d+)$/) {
1857 $data->{$1} = $2;
1858 }
1859 }
1860 close($fh);
1861 $d->{diskread} = $data->{rchar} || 0;
1862 $d->{diskwrite} = $data->{wchar} || 0;
1863 }
1864
1865 my $statstr = file_read_firstline("/proc/$pid/stat");
1866 next if !$statstr;
1867
1868 my ($utime, $stime, $vsize, $rss, $starttime);
1869 if ($statstr =~ m/^$pid \(.*\) \S (-?\d+) -?\d+ -?\d+ -?\d+ -?\d+ \d+ \d+ \d+ \d+ \d+ (\d+) (\d+) (-?\d+) (-?\d+) -?\d+ -?\d+ -?\d+ 0 (\d+) (\d+) (-?\d+) \d+ \d+ \d+ \d+ \d+ \d+ \d+ \d+ \d+ \d+ \d+ \d+ \d+ -?\d+ -?\d+ \d+ \d+ \d+/) {
1870 ($utime, $stime, $vsize, $rss, $starttime) = ($2, $3, $7, $8 * 4096, $6);
1871 } else {
1872 next;
1873 }
1874
1875 my $used = $utime + $stime;
1876
1877 my $vcpus = $d->{cpus} > $cpucount ? $cpucount : $d->{cpus};
1878
1879 $d->{uptime} = int ($uptime - ($starttime/100));
1880
1881 if ($vsize) {
1882 $d->{mem} = int (($rss/$vsize)*$d->{maxmem});
1883 }
1884
1885 my $old = $last_proc_pid_stat->{$pid};
1886 if (!$old) {
1887 $last_proc_pid_stat->{$pid} = {
1888 time => $ctime,
1889 used => $used,
1890 cpu => 0,
1891 relcpu => 0,
1892 };
1893 next;
1894 }
1895
1896 my $dtime = ($ctime - $old->{time}) * $cpucount * $clock_ticks;
1897
1898 if ($dtime > 1000) {
1899 my $dutime = $used - $old->{used};
1900
1901 $d->{cpu} = $dutime/$dtime;
1902 $d->{relcpu} = ($d->{cpu} * $cpucount) / $vcpus;
1903 $last_proc_pid_stat->{$pid} = {
1904 time => $ctime,
1905 used => $used,
1906 cpu => $d->{cpu},
1907 relcpu => $d->{relcpu},
1908 };
1909 } else {
1910 $d->{cpu} = $old->{cpu};
1911 $d->{relcpu} = $old->{relcpu};
1912 }
1913 }
1914
1915 return $res;
1916 }
1917
1918 sub foreach_drive {
1919 my ($conf, $func) = @_;
1920
1921 foreach my $ds (keys %$conf) {
1922 next if !valid_drivename($ds);
1923
1924 my $drive = parse_drive ($ds, $conf->{$ds});
1925 next if !$drive;
1926
1927 &$func($ds, $drive);
1928 }
1929 }
1930
1931 sub config_to_command {
1932 my ($storecfg, $vmid, $conf, $defaults, $migrate_uri) = @_;
1933
1934 my $cmd = [];
1935
1936 my $kvmver = kvm_user_version();
1937 my $vernum = 0; # unknown
1938 if ($kvmver =~ m/^(\d+)\.(\d+)\.(\d+)$/) {
1939 $vernum = $1*1000000+$2*1000+$3;
1940 }
1941
1942 die "detected old qemu-kvm binary ($kvmver)\n" if $vernum < 14000;
1943
1944 my $have_ovz = -f '/proc/vz/vestat';
1945
1946 push @$cmd, '/usr/bin/kvm';
1947
1948 push @$cmd, '-id', $vmid;
1949
1950 my $use_virtio = 0;
1951
1952 my $socket = monitor_socket ($vmid);
1953 push @$cmd, '-chardev', "socket,id=monitor,path=$socket,server,nowait";
1954 push @$cmd, '-mon', "chardev=monitor,mode=readline";
1955
1956 $socket = vnc_socket ($vmid);
1957 push @$cmd, '-vnc', "unix:$socket,x509,password";
1958
1959 push @$cmd, '-pidfile' , pidfile_name ($vmid);
1960
1961 push @$cmd, '-daemonize';
1962
1963 push @$cmd, '-incoming', $migrate_uri if $migrate_uri;
1964
1965 # include usb device config
1966 push @$cmd, '-readconfig', '/usr/share/qemu-server/pve-usb.cfg';
1967
1968 # enable absolute mouse coordinates (needed by vnc)
1969 my $tablet = defined ($conf->{tablet}) ? $conf->{tablet} : $defaults->{tablet};
1970 push @$cmd, '-device', 'usb-tablet,bus=ehci.0,port=6' if $tablet;
1971
1972 # host pci devices
1973 for (my $i = 0; $i < $MAX_HOSTPCI_DEVICES; $i++) {
1974 my $d = parse_hostpci($conf->{"hostpci$i"});
1975 next if !$d;
1976 push @$cmd, '-device', "pci-assign,host=$d->{pciid},id=hostpci$i";
1977 }
1978
1979 # usb devices
1980 for (my $i = 0; $i < $MAX_USB_DEVICES; $i++) {
1981 my $d = parse_usb_device($conf->{"usb$i"});
1982 next if !$d;
1983 if ($d->{vendorid} && $d->{productid}) {
1984 push @$cmd, '-device', "usb-host,vendorid=$d->{vendorid},productid=$d->{productid}";
1985 } elsif (defined($d->{hostbus}) && defined($d->{hostport})) {
1986 push @$cmd, '-device', "usb-host,hostbus=$d->{hostbus},hostport=$d->{hostport}";
1987 }
1988 }
1989
1990 # serial devices
1991 for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
1992 push @$cmd, '-chardev', "tty,id=serial$i,path=$conf->{serial$i}";
1993 push @$cmd, '-device', "isa-serial,chardev=serial$i";
1994 }
1995
1996 # parallel devices
1997 for (my $i = 0; $i < $MAX_PARALLEL_PORTS; $i++) {
1998 push @$cmd, '-chardev', "parport,id=parallel$i,path=$conf->{parallel$i}";
1999 push @$cmd, '-device', "isa-parallel,chardev=parallel$i";
2000 }
2001
2002 my $vmname = $conf->{name} || "vm$vmid";
2003
2004 push @$cmd, '-name', $vmname;
2005
2006 my $sockets = 1;
2007 $sockets = $conf->{smp} if $conf->{smp}; # old style - no longer iused
2008 $sockets = $conf->{sockets} if $conf->{sockets};
2009
2010 my $cores = $conf->{cores} || 1;
2011
2012 my $boot_opt;
2013
2014 push @$cmd, '-smp', "sockets=$sockets,cores=$cores";
2015
2016 push @$cmd, '-cpu', $conf->{cpu} if $conf->{cpu};
2017
2018 $boot_opt = "menu=on";
2019 if ($conf->{boot}) {
2020 $boot_opt .= ",order=$conf->{boot}";
2021 }
2022
2023 push @$cmd, '-nodefaults';
2024
2025 push @$cmd, '-boot', $boot_opt if $boot_opt;
2026
2027 push @$cmd, '-no-acpi' if defined ($conf->{acpi}) && $conf->{acpi} == 0;
2028
2029 push @$cmd, '-no-reboot' if defined ($conf->{reboot}) && $conf->{reboot} == 0;
2030
2031 my $vga = $conf->{vga};
2032 if (!$vga) {
2033 if ($conf->{ostype} && ($conf->{ostype} eq 'win7' || $conf->{ostype} eq 'w2k8')) {
2034 $vga = 'std';
2035 } else {
2036 $vga = 'cirrus';
2037 }
2038 }
2039
2040 push @$cmd, '-vga', $vga if $vga; # for kvm 77 and later
2041
2042 # time drift fix
2043 my $tdf = defined ($conf->{tdf}) ? $conf->{tdf} : $defaults->{tdf};
2044 push @$cmd, '-tdf' if $tdf;
2045
2046 my $nokvm = defined ($conf->{kvm}) && $conf->{kvm} == 0 ? 1 : 0;
2047
2048 if (my $ost = $conf->{ostype}) {
2049 # other, wxp, w2k, w2k3, w2k8, wvista, win7, l24, l26
2050
2051 if ($ost =~ m/^w/) { # windows
2052 push @$cmd, '-localtime' if !defined ($conf->{localtime});
2053
2054 # use rtc-td-hack when acpi is enabled
2055 if (!(defined ($conf->{acpi}) && $conf->{acpi} == 0)) {
2056 push @$cmd, '-rtc-td-hack';
2057 }
2058 }
2059
2060 # -tdf ?
2061 # -no-acpi
2062 # -no-kvm
2063 # -win2k-hack ?
2064 }
2065
2066 push @$cmd, '-no-kvm' if $nokvm;
2067
2068 push @$cmd, '-localtime' if $conf->{localtime};
2069
2070 push @$cmd, '-startdate', $conf->{startdate} if $conf->{startdate};
2071
2072 push @$cmd, '-S' if $conf->{freeze};
2073
2074 # set keyboard layout
2075 my $kb = $conf->{keyboard} || $defaults->{keyboard};
2076 push @$cmd, '-k', $kb if $kb;
2077
2078 # enable sound
2079 #my $soundhw = $conf->{soundhw} || $defaults->{soundhw};
2080 #push @$cmd, '-soundhw', 'es1370';
2081 #push @$cmd, '-soundhw', $soundhw if $soundhw;
2082
2083 if ($conf->{watchdog}) {
2084 my $wdopts = parse_watchdog($conf->{watchdog});
2085 push @$cmd, '-watchdog', $wdopts->{model} || 'i6300esb';
2086 push @$cmd, '-watchdog-action', $wdopts->{action} if $wdopts->{action};
2087 }
2088
2089 my $vollist = [];
2090 my $scsicontroller = {};
2091
2092 foreach_drive($conf, sub {
2093 my ($ds, $drive) = @_;
2094
2095 eval {
2096 PVE::Storage::parse_volume_id ($drive->{file});
2097 push @$vollist, $drive->{file};
2098 }; # ignore errors
2099
2100 $use_virtio = 1 if $ds =~ m/^virtio/;
2101 if ($drive->{interface} eq 'scsi') {
2102 my $maxdev = 7;
2103 my $controller = int ($drive->{index} / $maxdev);
2104 push @$cmd, '-device', "lsi,id=scsi$controller" if !$scsicontroller->{$controller};
2105 my $scsicontroller->{$controller}=1;
2106 }
2107 my $tmp = print_drive_full ($storecfg, $vmid, $drive);
2108 $tmp .= ",boot=on" if $conf->{bootdisk} && ($conf->{bootdisk} eq $ds);
2109 push @$cmd, '-drive', $tmp;
2110 push @$cmd, '-device',print_drivedevice_full ($storecfg,$vmid, $drive);
2111 });
2112
2113 push @$cmd, '-m', $conf->{memory} || $defaults->{memory};
2114
2115 my $foundnet = 0;
2116
2117 foreach my $k (sort keys %$conf) {
2118 next if $k !~ m/^net(\d+)$/;
2119 my $i = int ($1);
2120
2121 die "got strange net id '$i'\n" if $i >= ${MAX_NETS};
2122
2123 if ($conf->{"net$i"} && (my $net = parse_net($conf->{"net$i"}))) {
2124
2125 $foundnet = 1;
2126
2127 my $ifname = "tap${vmid}i$i";
2128
2129 # kvm uses TUNSETIFF ioctl, and that limits ifname length
2130 die "interface name '$ifname' is too long (max 15 character)\n"
2131 if length($ifname) >= 16;
2132
2133 my $device = $net->{model};
2134 my $vhostparam = '';
2135 if ($net->{model} eq 'virtio') {
2136 $use_virtio = 1;
2137 $device = 'virtio-net-pci';
2138 $vhostparam = ',vhost=on' if $kernel_has_vhost_net;
2139 };
2140
2141 if ($net->{bridge}) {
2142 push @$cmd, '-netdev', "type=tap,id=${k},ifname=${ifname},script=/var/lib/qemu-server/pve-bridge$vhostparam";
2143 } else {
2144 push @$cmd, '-netdev', "type=user,id=${k},hostname=$vmname";
2145 }
2146
2147 # qemu > 0.15 always try to boot from network - we disable that by
2148 # not loading the pxe rom file
2149 my $extra = (!$conf->{boot} || ($conf->{boot} !~ m/n/)) ?
2150 "romfile=," : '';
2151 push @$cmd, '-device', "$device,${extra}mac=$net->{macaddr},netdev=${k}";
2152 }
2153 }
2154
2155 push @$cmd, '-net', 'none' if !$foundnet;
2156
2157 # hack: virtio with fairsched is unreliable, so we do not use fairsched
2158 # when the VM uses virtio devices.
2159 if (!$use_virtio && $have_ovz) {
2160
2161 my $cpuunits = defined ($conf->{cpuunits}) ?
2162 $conf->{cpuunits} : $defaults->{cpuunits};
2163
2164 push @$cmd, '-cpuunits', $cpuunits if $cpuunits;
2165
2166 # fixme: cpulimit is currently ignored
2167 #push @$cmd, '-cpulimit', $conf->{cpulimit} if $conf->{cpulimit};
2168 }
2169
2170 # add custom args
2171 if ($conf->{args}) {
2172 my $aa = split_args ($conf->{args});
2173 push @$cmd, @$aa;
2174 }
2175
2176 return wantarray ? ($cmd, $vollist) : $cmd;
2177 }
2178
2179 sub vnc_socket {
2180 my ($vmid) = @_;
2181 return "${var_run_tmpdir}/$vmid.vnc";
2182 }
2183
2184 sub monitor_socket {
2185 my ($vmid) = @_;
2186 return "${var_run_tmpdir}/$vmid.mon";
2187 }
2188
2189 sub pidfile_name {
2190 my ($vmid) = @_;
2191 return "${var_run_tmpdir}/$vmid.pid";
2192 }
2193
2194 sub random_ether_addr {
2195
2196 my $rand = Digest::SHA1::sha1_hex (rand(), time());
2197
2198 my $mac = '';
2199 for (my $i = 0; $i < 6; $i++) {
2200 my $ss = hex (substr ($rand, $i*2, 2));
2201 if (!$i) {
2202 $ss &= 0xfe; # clear multicast
2203 $ss |= 2; # set local id
2204 }
2205 $ss = sprintf ("%02X", $ss);
2206
2207 if (!$i) {
2208 $mac .= "$ss";
2209 } else {
2210 $mac .= ":$ss";
2211 }
2212 }
2213
2214 return $mac;
2215 }
2216
2217 sub next_migrate_port {
2218
2219 for (my $p = 60000; $p < 60010; $p++) {
2220
2221 my $sock = IO::Socket::INET->new (Listen => 5,
2222 LocalAddr => 'localhost',
2223 LocalPort => $p,
2224 ReuseAddr => 1,
2225 Proto => 0);
2226
2227 if ($sock) {
2228 close ($sock);
2229 return $p;
2230 }
2231 }
2232
2233 die "unable to find free migration port";
2234 }
2235
2236 sub vm_start {
2237 my ($storecfg, $vmid, $statefile, $skiplock) = @_;
2238
2239 lock_config ($vmid, sub {
2240 my $conf = load_config ($vmid);
2241
2242 check_lock ($conf) if !$skiplock;
2243
2244 if (check_running ($vmid)) {
2245 my $msg = "VM $vmid already running - start failed\n" ;
2246 syslog ('err', $msg);
2247 die $msg;
2248 } else {
2249 syslog ('info', "VM $vmid start");
2250 }
2251
2252 my $migrate_uri;
2253 my $migrate_port = 0;
2254
2255 if ($statefile) {
2256 if ($statefile eq 'tcp') {
2257 $migrate_port = next_migrate_port();
2258 $migrate_uri = "tcp:localhost:${migrate_port}";
2259 } else {
2260 if (-f $statefile) {
2261 $migrate_uri = "exec:cat $statefile";
2262 } else {
2263 warn "state file '$statefile' does not exist - doing normal startup\n";
2264 }
2265 }
2266 }
2267
2268 my $defaults = load_defaults();
2269
2270 my ($cmd, $vollist) = config_to_command ($storecfg, $vmid, $conf, $defaults, $migrate_uri);
2271 # host pci devices
2272 for (my $i = 0; $i < $MAX_HOSTPCI_DEVICES; $i++) {
2273 my $d = parse_hostpci($conf->{"hostpci$i"});
2274 next if !$d;
2275 my $info = pci_device_info("0000:$d->{pciid}");
2276 die "IOMMU not present\n" if !check_iommu_support();
2277 die "no pci device info for device '$d->{pciid}'\n" if !$info;
2278 die "can't unbind pci device '$d->{pciid}'\n" if !pci_dev_bind_to_stub($info);
2279 die "can't reset pci device '$d->{pciid}'\n" if !pci_dev_reset($info);
2280 }
2281
2282 PVE::Storage::activate_volumes($storecfg, $vollist);
2283
2284 eval { run_command ($cmd, timeout => $migrate_uri ? undef : 30); };
2285
2286 my $err = $@;
2287
2288 if ($err) {
2289 my $msg = "start failed: $err";
2290 syslog ('err', "VM $vmid $msg");
2291 die $msg;
2292 }
2293
2294 if ($statefile) {
2295
2296 if ($statefile eq 'tcp') {
2297 print "migration listens on port $migrate_port\n";
2298 } else {
2299 unlink $statefile;
2300 # fixme: send resume - is that necessary ?
2301 eval { vm_monitor_command ($vmid, "cont", 1) };
2302 }
2303 }
2304
2305 if (my $migrate_speed =
2306 $conf->{migrate_speed} || $defaults->{migrate_speed}) {
2307 my $cmd = "migrate_set_speed ${migrate_speed}m";
2308 eval { vm_monitor_command ($vmid, $cmd, 1); };
2309 }
2310
2311 if (my $migrate_downtime =
2312 $conf->{migrate_downtime} || $defaults->{migrate_downtime}) {
2313 my $cmd = "migrate_set_downtime ${migrate_downtime}";
2314 eval { vm_monitor_command ($vmid, $cmd, 1); };
2315 }
2316 });
2317 }
2318
2319 sub __read_avail {
2320 my ($fh, $timeout) = @_;
2321
2322 my $sel = new IO::Select;
2323 $sel->add ($fh);
2324
2325 my $res = '';
2326 my $buf;
2327
2328 my @ready;
2329 while (scalar (@ready = $sel->can_read ($timeout))) {
2330 my $count;
2331 if ($count = $fh->sysread ($buf, 8192)) {
2332 if ($buf =~ /^(.*)\(qemu\) $/s) {
2333 $res .= $1;
2334 last;
2335 } else {
2336 $res .= $buf;
2337 }
2338 } else {
2339 if (!defined ($count)) {
2340 die "$!\n";
2341 }
2342 last;
2343 }
2344 }
2345
2346 die "monitor read timeout\n" if !scalar (@ready);
2347
2348 return $res;
2349 }
2350
2351 sub vm_monitor_command {
2352 my ($vmid, $cmdstr, $nolog) = @_;
2353
2354 my $res;
2355
2356 syslog ("info", "VM $vmid monitor command '$cmdstr'") if !$nolog;
2357
2358 eval {
2359 die "VM not running\n" if !check_running ($vmid);
2360
2361 my $sname = monitor_socket ($vmid);
2362
2363 my $sock = IO::Socket::UNIX->new ( Peer => $sname ) ||
2364 die "unable to connect to VM $vmid socket - $!\n";
2365
2366 my $timeout = 3;
2367
2368 # hack: migrate sometime blocks the monitor (when migrate_downtime
2369 # is set)
2370 if ($cmdstr =~ m/^(info\s+migrate|migrate\s)/) {
2371 $timeout = 60*60; # 1 hour
2372 }
2373
2374 # read banner;
2375 my $data = __read_avail ($sock, $timeout);
2376
2377 if ($data !~ m/^QEMU\s+(\S+)\s+monitor\s/) {
2378 die "got unexpected qemu monitor banner\n";
2379 }
2380
2381 my $sel = new IO::Select;
2382 $sel->add ($sock);
2383
2384 if (!scalar (my @ready = $sel->can_write ($timeout))) {
2385 die "monitor write error - timeout";
2386 }
2387
2388 my $fullcmd = "$cmdstr\r";
2389
2390 my $b;
2391 if (!($b = $sock->syswrite ($fullcmd)) || ($b != length ($fullcmd))) {
2392 die "monitor write error - $!";
2393 }
2394
2395 return if ($cmdstr eq 'q') || ($cmdstr eq 'quit');
2396
2397 $timeout = 20;
2398
2399 if ($cmdstr =~ m/^(info\s+migrate|migrate\s)/) {
2400 $timeout = 60*60; # 1 hour
2401 } elsif ($cmdstr =~ m/^(eject|change)/) {
2402 $timeout = 60; # note: cdrom mount command is slow
2403 }
2404 if ($res = __read_avail ($sock, $timeout)) {
2405
2406 my @lines = split ("\r?\n", $res);
2407
2408 shift @lines if $lines[0] !~ m/^unknown command/; # skip echo
2409
2410 $res = join ("\n", @lines);
2411 $res .= "\n";
2412 }
2413 };
2414
2415 my $err = $@;
2416
2417 if ($err) {
2418 syslog ("err", "VM $vmid monitor command failed - $err");
2419 die $err;
2420 }
2421
2422 return $res;
2423 }
2424
2425 sub vm_commandline {
2426 my ($storecfg, $vmid) = @_;
2427
2428 my $conf = load_config ($vmid);
2429
2430 my $defaults = load_defaults();
2431
2432 my $cmd = config_to_command ($storecfg, $vmid, $conf, $defaults);
2433
2434 return join (' ', @$cmd);
2435 }
2436
2437 sub vm_reset {
2438 my ($vmid, $skiplock) = @_;
2439
2440 lock_config ($vmid, sub {
2441
2442 my $conf = load_config ($vmid);
2443
2444 check_lock ($conf) if !$skiplock;
2445
2446 syslog ("info", "VM $vmid sending 'reset'");
2447
2448 vm_monitor_command ($vmid, "system_reset", 1);
2449 });
2450 }
2451
2452 sub vm_shutdown {
2453 my ($vmid, $skiplock) = @_;
2454
2455 lock_config ($vmid, sub {
2456
2457 my $conf = load_config ($vmid);
2458
2459 check_lock ($conf) if !$skiplock;
2460
2461 syslog ("info", "VM $vmid sending 'shutdown'");
2462
2463 vm_monitor_command ($vmid, "system_powerdown", 1);
2464 });
2465 }
2466
2467 sub vm_stop {
2468 my ($vmid, $skiplock) = @_;
2469
2470 lock_config ($vmid, sub {
2471
2472 my $pid = check_running ($vmid);
2473
2474 if (!$pid) {
2475 syslog ('info', "VM $vmid already stopped");
2476 return;
2477 }
2478
2479 my $conf = load_config ($vmid);
2480
2481 check_lock ($conf) if !$skiplock;
2482
2483 syslog ("info", "VM $vmid stopping");
2484
2485 eval { vm_monitor_command ($vmid, "quit", 1); };
2486
2487 my $err = $@;
2488
2489 if (!$err) {
2490 # wait some time
2491 my $timeout = 50; # fixme: how long?
2492
2493 my $count = 0;
2494 while (($count < $timeout) && check_running ($vmid)) {
2495 $count++;
2496 sleep 1;
2497 }
2498
2499 if ($count >= $timeout) {
2500 syslog ('info', "VM $vmid still running - terminating now with SIGTERM");
2501 kill 15, $pid;
2502 }
2503 } else {
2504 syslog ('info', "VM $vmid quit failed - terminating now with SIGTERM");
2505 kill 15, $pid;
2506 }
2507
2508 # wait again
2509 my $timeout = 10;
2510
2511 my $count = 0;
2512 while (($count < $timeout) && check_running ($vmid)) {
2513 $count++;
2514 sleep 1;
2515 }
2516
2517 if ($count >= $timeout) {
2518 syslog ('info', "VM $vmid still running - terminating now with SIGKILL\n");
2519 kill 9, $pid;
2520 }
2521
2522 fairsched_rmnod ($vmid); # try to destroy group
2523 });
2524 }
2525
2526 sub vm_suspend {
2527 my ($vmid, $skiplock) = @_;
2528
2529 lock_config ($vmid, sub {
2530
2531 my $conf = load_config ($vmid);
2532
2533 check_lock ($conf) if !$skiplock;
2534
2535 syslog ("info", "VM $vmid suspend");
2536
2537 vm_monitor_command ($vmid, "stop", 1);
2538 });
2539 }
2540
2541 sub vm_resume {
2542 my ($vmid, $skiplock) = @_;
2543
2544 lock_config ($vmid, sub {
2545
2546 my $conf = load_config ($vmid);
2547
2548 check_lock ($conf) if !$skiplock;
2549
2550 syslog ("info", "VM $vmid resume");
2551
2552 vm_monitor_command ($vmid, "cont", 1);
2553 });
2554 }
2555
2556 sub vm_cad {
2557 my ($vmid, $skiplock) = @_;
2558
2559 lock_config ($vmid, sub {
2560
2561 my $conf = load_config ($vmid);
2562
2563 check_lock ($conf) if !$skiplock;
2564
2565 syslog ("info", "VM $vmid sending cntl-alt-delete");
2566
2567 vm_monitor_command ($vmid, "sendkey ctrl-alt-delete", 1);
2568 });
2569 }
2570
2571 sub vm_destroy {
2572 my ($storecfg, $vmid, $skiplock) = @_;
2573
2574 lock_config ($vmid, sub {
2575
2576 my $conf = load_config ($vmid);
2577
2578 check_lock ($conf) if !$skiplock;
2579
2580 syslog ("info", "VM $vmid destroy called (removing all data)");
2581
2582 eval {
2583 if (!check_running($vmid)) {
2584 fairsched_rmnod($vmid); # try to destroy group
2585 destroy_vm($storecfg, $vmid);
2586 } else {
2587 die "VM is running\n";
2588 }
2589 };
2590
2591 my $err = $@;
2592
2593 if ($err) {
2594 syslog ("err", "VM $vmid destroy failed - $err");
2595 die $err;
2596 }
2597 });
2598 }
2599
2600 sub vm_stopall {
2601 my ($timeout) = @_;
2602
2603 $timeout = 3*60 if !$timeout;
2604
2605 my $vzlist = vzlist();
2606 my $count = 0;
2607 foreach my $vmid (keys %$vzlist) {
2608 next if !$vzlist->{$vmid}->{pid};
2609 $count++;
2610 }
2611
2612 if ($count) {
2613
2614 my $msg = "Stopping Qemu Server - sending shutdown requests to all VMs\n";
2615 syslog ('info', $msg);
2616 print STDERR $msg;
2617
2618 foreach my $vmid (keys %$vzlist) {
2619 next if !$vzlist->{$vmid}->{pid};
2620 eval { vm_shutdown ($vmid, 1); };
2621 print STDERR $@ if $@;
2622 }
2623
2624 my $wt = 5;
2625 my $maxtries = int (($timeout + $wt -1)/$wt);
2626 my $try = 0;
2627 while (($try < $maxtries) && $count) {
2628 $try++;
2629 sleep $wt;
2630
2631 $vzlist = vzlist();
2632 $count = 0;
2633 foreach my $vmid (keys %$vzlist) {
2634 next if !$vzlist->{$vmid}->{pid};
2635 $count++;
2636 }
2637 last if !$count;
2638 }
2639
2640 return if !$count;
2641
2642 foreach my $vmid (keys %$vzlist) {
2643 next if !$vzlist->{$vmid}->{pid};
2644
2645 $msg = "VM $vmid still running - sending stop now\n";
2646 syslog ('info', $msg);
2647 print $msg;
2648
2649 eval { vm_monitor_command ($vmid, "quit", 1); };
2650 print STDERR $@ if $@;
2651
2652 }
2653
2654 $timeout = 30;
2655 $maxtries = int (($timeout + $wt -1)/$wt);
2656 $try = 0;
2657 while (($try < $maxtries) && $count) {
2658 $try++;
2659 sleep $wt;
2660
2661 $vzlist = vzlist();
2662 $count = 0;
2663 foreach my $vmid (keys %$vzlist) {
2664 next if !$vzlist->{$vmid}->{pid};
2665 $count++;
2666 }
2667 last if !$count;
2668 }
2669
2670 return if !$count;
2671
2672 foreach my $vmid (keys %$vzlist) {
2673 next if !$vzlist->{$vmid}->{pid};
2674
2675 $msg = "VM $vmid still running - terminating now with SIGTERM\n";
2676 syslog ('info', $msg);
2677 print $msg;
2678 kill 15, $vzlist->{$vmid}->{pid};
2679 }
2680
2681 # this is called by system shotdown scripts, so remaining
2682 # processes gets killed anyways (no need to send kill -9 here)
2683
2684 $msg = "Qemu Server stopped\n";
2685 syslog ('info', $msg);
2686 print STDERR $msg;
2687 }
2688 }
2689
2690 # pci helpers
2691
2692 sub file_write {
2693 my ($filename, $buf) = @_;
2694
2695 my $fh = IO::File->new ($filename, "w");
2696 return undef if !$fh;
2697
2698 my $res = print $fh $buf;
2699
2700 $fh->close();
2701
2702 return $res;
2703 }
2704
2705 sub pci_device_info {
2706 my ($name) = @_;
2707
2708 my $res;
2709
2710 return undef if $name !~ m/^([a-f0-9]{4}):([a-f0-9]{2}):([a-f0-9]{2})\.([a-f0-9])$/;
2711 my ($domain, $bus, $slot, $func) = ($1, $2, $3, $4);
2712
2713 my $irq = file_read_firstline("$pcisysfs/devices/$name/irq");
2714 return undef if !defined($irq) || $irq !~ m/^\d+$/;
2715
2716 my $vendor = file_read_firstline("$pcisysfs/devices/$name/vendor");
2717 return undef if !defined($vendor) || $vendor !~ s/^0x//;
2718
2719 my $product = file_read_firstline("$pcisysfs/devices/$name/device");
2720 return undef if !defined($product) || $product !~ s/^0x//;
2721
2722 $res = {
2723 name => $name,
2724 vendor => $vendor,
2725 product => $product,
2726 domain => $domain,
2727 bus => $bus,
2728 slot => $slot,
2729 func => $func,
2730 irq => $irq,
2731 has_fl_reset => -f "$pcisysfs/devices/$name/reset" || 0,
2732 };
2733
2734 return $res;
2735 }
2736
2737 sub pci_dev_reset {
2738 my ($dev) = @_;
2739
2740 my $name = $dev->{name};
2741
2742 my $fn = "$pcisysfs/devices/$name/reset";
2743
2744 return file_write ($fn, "1");
2745 }
2746
2747 sub pci_dev_bind_to_stub {
2748 my ($dev) = @_;
2749
2750 my $name = $dev->{name};
2751
2752 my $testdir = "$pcisysfs/drivers/pci-stub/$name";
2753 return 1 if -d $testdir;
2754
2755 my $data = "$dev->{vendor} $dev->{product}";
2756 return undef if !file_write ("$pcisysfs/drivers/pci-stub/new_id", $data);
2757
2758 my $fn = "$pcisysfs/devices/$name/driver/unbind";
2759 if (!file_write ($fn, $name)) {
2760 return undef if -f $fn;
2761 }
2762
2763 $fn = "$pcisysfs/drivers/pci-stub/bind";
2764 if (! -d $testdir) {
2765 return undef if !file_write ($fn, $name);
2766 }
2767
2768 return -d $testdir;
2769 }
2770
2771 1;