]> git.proxmox.com Git - qemu-server.git/blame - PVE/QemuServer.pm
Add delay in startall to reduce load at boot time
[qemu-server.git] / PVE / QemuServer.pm
CommitLineData
1e3baf05
DM
1package PVE::QemuServer;
2
3use strict;
4use POSIX;
5use IO::Handle;
6use IO::Select;
7use IO::File;
8use IO::Dir;
9use IO::Socket::UNIX;
10use File::Basename;
11use File::Path;
12use File::stat;
13use Getopt::Long;
14use Digest::SHA1;
15use Fcntl ':flock';
16use Cwd 'abs_path';
17use IPC::Open3;
18use Fcntl;
19use PVE::SafeSyslog;
20use Storable qw(dclone);
21use PVE::Exception qw(raise raise_param_exc);
22use PVE::Storage;
23use PVE::Tools qw(run_command lock_file file_read_firstline);
24use PVE::Cluster qw(cfs_register_file cfs_read_file cfs_write_file cfs_lock_file);
25use PVE::INotify;
26use PVE::ProcFSTools;
27use Time::HiRes qw (gettimeofday);
28
29my $clock_ticks = POSIX::sysconf(&POSIX::_SC_CLK_TCK);
30
31# Note about locking: we use flock on the config file protect
32# against concurent actions.
33# Aditionaly, we have a 'lock' setting in the config file. This
34# can be set to 'migrate' or 'backup'. Most actions are not
35# allowed when such lock is set. But you can ignore this kind of
36# lock with the --skiplock flag.
37
38cfs_register_file('/qemu-server/', \&parse_vm_config);
39
40#no warnings 'redefine';
41
42unless(defined(&_VZSYSCALLS_H_)) {
43 eval 'sub _VZSYSCALLS_H_ () {1;}' unless defined(&_VZSYSCALLS_H_);
44 require 'sys/syscall.ph';
45 if(defined(&__x86_64__)) {
46 eval 'sub __NR_fairsched_vcpus () {499;}' unless defined(&__NR_fairsched_vcpus);
47 eval 'sub __NR_fairsched_mknod () {504;}' unless defined(&__NR_fairsched_mknod);
48 eval 'sub __NR_fairsched_rmnod () {505;}' unless defined(&__NR_fairsched_rmnod);
49 eval 'sub __NR_fairsched_chwt () {506;}' unless defined(&__NR_fairsched_chwt);
50 eval 'sub __NR_fairsched_mvpr () {507;}' unless defined(&__NR_fairsched_mvpr);
51 eval 'sub __NR_fairsched_rate () {508;}' unless defined(&__NR_fairsched_rate);
52 eval 'sub __NR_setluid () {501;}' unless defined(&__NR_setluid);
53 eval 'sub __NR_setublimit () {502;}' unless defined(&__NR_setublimit);
54 }
55 elsif(defined( &__i386__) ) {
56 eval 'sub __NR_fairsched_mknod () {500;}' unless defined(&__NR_fairsched_mknod);
57 eval 'sub __NR_fairsched_rmnod () {501;}' unless defined(&__NR_fairsched_rmnod);
58 eval 'sub __NR_fairsched_chwt () {502;}' unless defined(&__NR_fairsched_chwt);
59 eval 'sub __NR_fairsched_mvpr () {503;}' unless defined(&__NR_fairsched_mvpr);
60 eval 'sub __NR_fairsched_rate () {504;}' unless defined(&__NR_fairsched_rate);
61 eval 'sub __NR_fairsched_vcpus () {505;}' unless defined(&__NR_fairsched_vcpus);
62 eval 'sub __NR_setluid () {511;}' unless defined(&__NR_setluid);
63 eval 'sub __NR_setublimit () {512;}' unless defined(&__NR_setublimit);
64 } else {
65 die("no fairsched syscall for this arch");
66 }
67 require 'asm/ioctl.ph';
68 eval 'sub KVM_GET_API_VERSION () { &_IO(0xAE, 0x);}' unless defined(&KVM_GET_API_VERSION);
69}
70
71sub fairsched_mknod {
72 my ($parent, $weight, $desired) = @_;
73
74 return syscall(&__NR_fairsched_mknod, int ($parent), int ($weight), int ($desired));
75}
76
77sub fairsched_rmnod {
78 my ($id) = @_;
79
80 return syscall(&__NR_fairsched_rmnod, int ($id));
81}
82
83sub fairsched_mvpr {
84 my ($pid, $newid) = @_;
85
86 return syscall(&__NR_fairsched_mvpr, int ($pid), int ($newid));
87}
88
89sub fairsched_vcpus {
90 my ($id, $vcpus) = @_;
91
92 return syscall(&__NR_fairsched_vcpus, int ($id), int ($vcpus));
93}
94
95sub fairsched_rate {
96 my ($id, $op, $rate) = @_;
97
98 return syscall(&__NR_fairsched_rate, int ($id), int ($op), int ($rate));
99}
100
101use constant FAIRSCHED_SET_RATE => 0;
102use constant FAIRSCHED_DROP_RATE => 1;
103use constant FAIRSCHED_GET_RATE => 2;
104
105sub fairsched_cpulimit {
106 my ($id, $limit) = @_;
107
108 my $cpulim1024 = int ($limit * 1024 / 100);
109 my $op = $cpulim1024 ? FAIRSCHED_SET_RATE : FAIRSCHED_DROP_RATE;
110
111 return fairsched_rate ($id, $op, $cpulim1024);
112}
113
114my $nodename = PVE::INotify::nodename();
115
116mkdir "/etc/pve/nodes/$nodename";
117my $confdir = "/etc/pve/nodes/$nodename/qemu-server";
118mkdir $confdir;
119
120my $var_run_tmpdir = "/var/run/qemu-server";
121mkdir $var_run_tmpdir;
122
123my $lock_dir = "/var/lock/qemu-server";
124mkdir $lock_dir;
125
126my $pcisysfs = "/sys/bus/pci";
127
128my $keymaphash = PVE::Tools::kvmkeymaps();
129
130my $confdesc = {
131 onboot => {
132 optional => 1,
133 type => 'boolean',
134 description => "Specifies whether a VM will be started during system bootup.",
135 default => 0,
136 },
137 autostart => {
138 optional => 1,
139 type => 'boolean',
140 description => "Automatic restart after crash (currently ignored).",
141 default => 0,
142 },
143 reboot => {
144 optional => 1,
145 type => 'boolean',
146 description => "Allow reboot. If set to '0' the VM exit on reboot.",
147 default => 1,
148 },
149 lock => {
150 optional => 1,
151 type => 'string',
152 description => "Lock/unlock the VM.",
153 enum => [qw(migrate backup)],
154 },
155 cpulimit => {
156 optional => 1,
157 type => 'integer',
158 description => "Limit of CPU usage in per cent. Note if the computer has 2 CPUs, it has total of 200% CPU time. Value '0' indicates no CPU limit.\n\nNOTE: This option is currently ignored.",
159 minimum => 0,
160 default => 0,
161 },
162 cpuunits => {
163 optional => 1,
164 type => 'integer',
165 description => "CPU weight for a VM. Argument is used in the kernel fair scheduler. The larger the number is, the more CPU time this VM gets. Number is relative to weights of all the other running VMs.\n\nNOTE: You can disable fair-scheduler configuration by setting this to 0.",
166 minimum => 0,
167 maximum => 500000,
168 default => 1000,
169 },
170 memory => {
171 optional => 1,
172 type => 'integer',
173 description => "Amount of RAM for the VM in MB.",
174 minimum => 16,
175 default => 512,
176 },
177 keyboard => {
178 optional => 1,
179 type => 'string',
180 description => "Keybord layout for vnc server. Default is read from the datacenter configuration file.",
181 enum => [ keys %$keymaphash ],
182 default => 'en-us',
183 },
184 name => {
185 optional => 1,
186 type => 'string',
187 description => "Set a name for the VM. Only used on the configuration web interface.",
188 },
189 description => {
190 optional => 1,
191 type => 'string',
192 description => "Description for the VM. Only used on the configuration web interface.",
193 },
194 ostype => {
195 optional => 1,
196 type => 'string',
197 enum => [qw(other wxp w2k w2k3 w2k8 wvista win7 l24 l26)],
198 description => <<EODESC,
199Used to enable special optimization/features for specific
200operating systems:
201
202other => unspecified OS
203wxp => Microsoft Windows XP
204w2k => Microsoft Windows 2000
205w2k3 => Microsoft Windows 2003
206w2k8 => Microsoft Windows 2008
207wvista => Microsoft Windows Vista
208win7 => Microsoft Windows 7
209l24 => Linux 2.4 Kernel
210l26 => Linux 2.6/3.X Kernel
211
212other|l24|l26 ... no special behaviour
213wxp|w2k|w2k3|w2k8|wvista|win7 ... use --localtime switch
214EODESC
215 },
216 boot => {
217 optional => 1,
218 type => 'string',
219 description => "Boot on floppy (a), hard disk (c), CD-ROM (d), or network (n).",
220 pattern => '[acdn]{1,4}',
221 default => 'cad',
222 },
223 bootdisk => {
224 optional => 1,
225 type => 'string', format => 'pve-qm-bootdisk',
226 description => "Enable booting from specified disk.",
227 pattern => '(ide|scsi|virtio)\d+',
228 },
229 smp => {
230 optional => 1,
231 type => 'integer',
232 description => "The number of CPUs. Please use option -sockets instead.",
233 minimum => 1,
234 default => 1,
235 },
236 sockets => {
237 optional => 1,
238 type => 'integer',
239 description => "The number of CPU sockets.",
240 minimum => 1,
241 default => 1,
242 },
243 cores => {
244 optional => 1,
245 type => 'integer',
246 description => "The number of cores per socket.",
247 minimum => 1,
248 default => 1,
249 },
250 acpi => {
251 optional => 1,
252 type => 'boolean',
253 description => "Enable/disable ACPI.",
254 default => 1,
255 },
256 kvm => {
257 optional => 1,
258 type => 'boolean',
259 description => "Enable/disable KVM hardware virtualization.",
260 default => 1,
261 },
262 tdf => {
263 optional => 1,
264 type => 'boolean',
265 description => "Enable/disable time drift fix.",
266 default => 1,
267 },
268 localtime => {
269 optional => 1,
270 type => 'boolean',
271 description => "Set the real time clock to local time. This is enabled by default if ostype indicates a Microsoft OS.",
272 },
273 freeze => {
274 optional => 1,
275 type => 'boolean',
276 description => "Freeze CPU at startup (use 'c' monitor command to start execution).",
277 },
278 vga => {
279 optional => 1,
280 type => 'string',
281 description => "Select VGA type. If you want to use high resolution modes (>= 1280x1024x16) then you should use option 'std' or 'vmware'. Default is 'std' for win7/w2k8, and 'cirrur' for other OS types",
282 enum => [qw(std cirrus vmware)],
283 },
284 hostpci => {
285 optional => 1,
286 type => 'string', format => 'pve-qm-hostpci',
287 typetext => "HOSTPCIDEVICE { , HOSTPCIDEVICE }",
288 description => <<EODESCR,
289Map host pci devices. HOSTPCIDEVICE syntax is:
290
291'bus:dev.func' (hexadecimal numbers)
292
293You can us the 'lspci' command to list existing pci devices.
294
295Note: This option allows direct access to host hardware. So it is no longer possible to migrate such machines - use with special care.
296
297Experimental: user reported problems with this option.
298EODESCR
299 },
300 serial => {
301 optional => 1,
302 type => 'string', format => 'pve-qm-serial',
303 typetext => "SERIALDEVICE { , SERIALDEVICE }",
304 description => <<EODESCR,
305Map host serial devices. SERIALDEVICE syntax is /dev/ttyS*
306
307Note: This option allows direct access to host hardware. So it is no longer possible to migrate such machines - use with special care.
308
309Experimental: user reported problems with this option.
310EODESCR
311 },
312 parallel => {
313 optional => 1,
314 type => 'string', format => 'pve-qm-parallel',
315 typetext => "PARALLELDEVICE { , PARALLELDEVICE }",
316 description => <<EODESCR,
317Map host parallel devices. PARALLELDEVICE syntax is /dev/parport*
318
319Note: This option allows direct access to host hardware. So it is no longer possible to migrate such machines - use with special care.
320
321Experimental: user reported problems with this option.
322EODESCR
323 },
324 startdate => {
325 optional => 1,
326 type => 'string',
327 typetext => "(now | YYYY-MM-DD | YYYY-MM-DDTHH:MM:SS)",
328 description => "Set the initial date of the real time clock. Valid format for date are: 'now' or '2006-06-17T16:01:21' or '2006-06-17'.",
329 pattern => '(now|\d{4}-\d{1,2}-\d{1,2}(T\d{1,2}:\d{1,2}:\d{1,2})?)',
330 default => 'now',
331 },
332 args => {
333 optional => 1,
334 type => 'string',
335 description => <<EODESCR,
336Note: this option is for experts only. It allows you to pass arbitrary arguments to kvm, for example:
337
338args: -no-reboot -no-hpet
339EODESCR
340 },
341 tablet => {
342 optional => 1,
343 type => 'boolean',
344 default => 1,
345 description => "Enable/disable the usb tablet device. This device is usually needed to allow absolute mouse positioning. Else the mouse runs out of sync with normal vnc clients. If you're running lots of console-only guests on one host, you may consider disabling this to save some context switches.",
346 },
347 migrate_speed => {
348 optional => 1,
349 type => 'integer',
350 description => "Set maximum speed (in MB/s) for migrations. Value 0 is no limit.",
351 minimum => 0,
352 default => 0,
353 },
354 migrate_downtime => {
355 optional => 1,
356 type => 'integer',
357 description => "Set maximum tolerated downtime (in seconds) for migrations.",
358 minimum => 0,
359 default => 1,
360 },
361 cdrom => {
362 optional => 1,
363 type => 'string', format => 'pve-qm-drive',
364 typetext => 'volume',
365 description => "This is an alias for option -ide2",
366 },
367 cpu => {
368 optional => 1,
369 description => "Emulated CPU type.",
370 type => 'string',
371 enum => [ qw(486 athlon pentium pentium2 pentium3 coreduo core2duo kvm32 kvm64 qemu32 qemu64 phenom host) ],
372 default => 'qemu64',
373 },
374};
375
376# what about other qemu settings ?
377#cpu => 'string',
378#machine => 'string',
379#fda => 'file',
380#fdb => 'file',
381#mtdblock => 'file',
382#sd => 'file',
383#pflash => 'file',
384#snapshot => 'bool',
385#bootp => 'file',
386##tftp => 'dir',
387##smb => 'dir',
388#kernel => 'file',
389#append => 'string',
390#initrd => 'file',
391##soundhw => 'string',
392
393while (my ($k, $v) = each %$confdesc) {
394 PVE::JSONSchema::register_standard_option("pve-qm-$k", $v);
395}
396
397my $MAX_IDE_DISKS = 4;
398my $MAX_SCSI_DISKS = 16;
399my $MAX_VIRTIO_DISKS = 16;
400my $MAX_USB_DEVICES = 5;
401my $MAX_NETS = 32;
402my $MAX_UNUSED_DISKS = 8;
403
404my $nic_model_list = ['rtl8139', 'ne2k_pci', 'e1000', 'pcnet', 'virtio',
405 'ne2k_isa', 'i82551', 'i82557b', 'i82559er'];
406my $nic_model_list_txt = join (' ', sort @$nic_model_list);
407
408# fixme:
409my $netdesc = {
410 optional => 1,
411 type => 'string', format => 'pve-qm-net',
412 typetext => "MODEL=XX:XX:XX:XX:XX:XX [,bridge=<dev>][,rate=<mbps>]",
413 description => <<EODESCR,
414Specify network devices.
415
416MODEL is one of: $nic_model_list_txt
417
418XX:XX:XX:XX:XX:XX should be an unique MAC address. This is
419automatically generated if not specified.
420
421The bridge parameter can be used to automatically add the interface to a bridge device. The Proxmox VE standard bridge is called 'vmbr0'.
422
423Option 'rate' is used to limit traffic bandwidth from and to this interface. It is specified as floating point number, unit is 'Megabytes per second'.
424
425If you specify no bridge, we create a kvm 'user' (NATed) network device, which provides DHCP and DNS services. The following addresses are used:
426
42710.0.2.2 Gateway
42810.0.2.3 DNS Server
42910.0.2.4 SMB Server
430
431The DHCP server assign addresses to the guest starting from 10.0.2.15.
432
433EODESCR
434};
435PVE::JSONSchema::register_standard_option("pve-qm-net", $netdesc);
436
437for (my $i = 0; $i < $MAX_NETS; $i++) {
438 $confdesc->{"net$i"} = $netdesc;
439}
440
441my $drivename_hash;
442
443my $idedesc = {
444 optional => 1,
445 type => 'string', format => 'pve-qm-drive',
446 typetext => '[volume=]volume,] [,media=cdrom|disk] [,cyls=c,heads=h,secs=s[,trans=t]] [,snapshot=on|off] [,cache=none|writethrough|writeback] [,format=f] [,backup=yes|no] [,aio=native|threads]',
447 description => "Use volume as IDE hard disk or CD-ROM (n is 0 to 3).",
448};
449PVE::JSONSchema::register_standard_option("pve-qm-ide", $idedesc);
450
451my $scsidesc = {
452 optional => 1,
453 type => 'string', format => 'pve-qm-drive',
454 typetext => '[volume=]volume,] [,media=cdrom|disk] [,cyls=c,heads=h,secs=s[,trans=t]] [,snapshot=on|off] [,cache=none|writethrough|writeback] [,format=f] [,backup=yes|no] [,aio=native|threads]',
455 description => "Use volume as SCSI hard disk or CD-ROM (n is 0 to 15).",
456};
457PVE::JSONSchema::register_standard_option("pve-qm-scsi", $scsidesc);
458
459my $virtiodesc = {
460 optional => 1,
461 type => 'string', format => 'pve-qm-drive',
462 typetext => '[volume=]volume,] [,media=cdrom|disk] [,cyls=c,heads=h,secs=s[,trans=t]] [,snapshot=on|off] [,cache=none|writethrough|writeback] [,format=f] [,backup=yes|no] [,aio=native|threads]',
463 description => "Use volume as VIRTIO hard disk (n is 0 to 15).",
464};
465PVE::JSONSchema::register_standard_option("pve-qm-virtio", $virtiodesc);
466
467my $usbdesc = {
468 optional => 1,
469 type => 'string', format => 'pve-qm-usb-device',
470 typetext => 'host=HOSTUSBDEVICE',
471 description => <<EODESCR,
472Configure an USB device (n is 0 to 5). This can be used to
473pass-through usb devices to the guest. HOSTUSBDEVICE syntax is:
474
475'bus-port(.port)*' (decimal numbers) or
476'vendor_id:product_id' (hexadeciaml numbers)
477
478You can use the 'lsusb -t' command to list existing usb devices.
479
480Note: This option allows direct access to host hardware. So it is no longer possible to migrate such machines - use with special care.
481
482EODESCR
483};
484PVE::JSONSchema::register_standard_option("pve-qm-usb", $usbdesc);
485
486
487for (my $i = 0; $i < $MAX_IDE_DISKS; $i++) {
488 $drivename_hash->{"ide$i"} = 1;
489 $confdesc->{"ide$i"} = $idedesc;
490}
491
492for (my $i = 0; $i < $MAX_SCSI_DISKS; $i++) {
493 $drivename_hash->{"scsi$i"} = 1;
494 $confdesc->{"scsi$i"} = $scsidesc ;
495}
496
497for (my $i = 0; $i < $MAX_VIRTIO_DISKS; $i++) {
498 $drivename_hash->{"virtio$i"} = 1;
499 $confdesc->{"virtio$i"} = $virtiodesc;
500}
501
502for (my $i = 0; $i < $MAX_USB_DEVICES; $i++) {
503 $confdesc->{"usb$i"} = $usbdesc;
504}
505
506my $unuseddesc = {
507 optional => 1,
508 type => 'string', format => 'pve-volume-id',
509 description => "Reference to unused volumes.",
510};
511
512for (my $i = 0; $i < $MAX_UNUSED_DISKS; $i++) {
513 $confdesc->{"unused$i"} = $unuseddesc;
514}
515
516my $kvm_api_version = 0;
517
518sub kvm_version {
519
520 return $kvm_api_version if $kvm_api_version;
521
522 my $fh = IO::File->new ("</dev/kvm") ||
523 return 0;
524
525 if (my $v = $fh->ioctl (KVM_GET_API_VERSION(), 0)) {
526 $kvm_api_version = $v;
527 }
528
529 $fh->close();
530
531 return $kvm_api_version;
532}
533
534my $kvm_user_version;
535
536sub kvm_user_version {
537
538 return $kvm_user_version if $kvm_user_version;
539
540 $kvm_user_version = 'unknown';
541
542 my $tmp = `kvm -help 2>/dev/null`;
543
544 if ($tmp =~ m/^QEMU( PC)? emulator version (\d+\.\d+\.\d+) /) {
545 $kvm_user_version = $2;
546 }
547
548 return $kvm_user_version;
549
550}
551
552my $kernel_has_vhost_net = -c '/dev/vhost-net';
553
554sub disknames {
555 # order is important - used to autoselect boot disk
556 return ((map { "ide$_" } (0 .. ($MAX_IDE_DISKS - 1))),
557 (map { "scsi$_" } (0 .. ($MAX_SCSI_DISKS - 1))),
558 (map { "virtio$_" } (0 .. ($MAX_VIRTIO_DISKS - 1))));
559}
560
561sub valid_drivename {
562 my $dev = shift;
563
564 return defined ($drivename_hash->{$dev});
565}
566
567sub option_exists {
568 my $key = shift;
569 return defined($confdesc->{$key});
570}
571
572sub nic_models {
573 return $nic_model_list;
574}
575
576sub os_list_description {
577
578 return {
579 other => 'Other',
580 wxp => 'Windows XP',
581 w2k => 'Windows 2000',
582 w2k3 =>, 'Windows 2003',
583 w2k8 => 'Windows 2008',
584 wvista => 'Windows Vista',
585 win7 => 'Windows 7',
586 l24 => 'Linux 2.4',
587 l26 => 'Linux 2.6',
588 };
589}
590
591# a clumsy way to split an argument string into an array,
592# we simply pass it to the cli (exec call)
593# fixme: use Text::ParseWords::shellwords() ?
594sub split_args {
595 my ($str) = @_;
596
597 my $args = [];
598
599 return $args if !$str;
600
601 my $cmd = 'perl -e \'foreach my $a (@ARGV) { print "$a\n"; } \' -- ' . $str;
602
603 eval {
604 run_command ($cmd, outfunc => sub {
605 my $data = shift;
606 push @$args, $data;
607 });
608 };
609
610 my $err = $@;
611
612 die "unable to parse args: $str\n" if $err;
613
614 return $args;
615}
616
617sub disk_devive_info {
618 my $dev = shift;
619
620 die "unknown disk device format '$dev'" if $dev !~ m/^(ide|scsi|virtio)(\d+)$/;
621
622 my $bus = $1;
623 my $index = $2;
624 my $maxdev = 1024;
625
626 if ($bus eq 'ide') {
627 $maxdev = 2;
628 } elsif ($bus eq 'scsi') {
629 $maxdev = 8;
630 }
631
632 my $controller = int ($index / $maxdev);
633 my $unit = $index % $maxdev;
634
635
636 return { bus => $bus, desc => uc($bus) . " $controller:$unit",
637 controller => $controller, unit => $unit, index => $index };
638
639}
640
641sub qemu_drive_name {
642 my ($dev, $media) = @_;
643
644 my $info = disk_devive_info ($dev);
645 my $mediastr = '';
646
647 if (($info->{bus} eq 'ide') || ($info->{bus} eq 'scsi')) {
648 $mediastr = ($media eq 'cdrom') ? "-cd" : "-hd";
649 return sprintf("%s%i%s%i", $info->{bus}, $info->{controller},
650 $mediastr, $info->{unit});
651 } else {
652 return sprintf("%s%i", $info->{bus}, $info->{index});
653 }
654}
655
656my $cdrom_path;
657
658sub get_cdrom_path {
659
660 return $cdrom_path if $cdrom_path;
661
662 return $cdrom_path = "/dev/cdrom" if -l "/dev/cdrom";
663 return $cdrom_path = "/dev/cdrom1" if -l "/dev/cdrom1";
664 return $cdrom_path = "/dev/cdrom2" if -l "/dev/cdrom2";
665}
666
667sub get_iso_path {
668 my ($storecfg, $vmid, $cdrom) = @_;
669
670 if ($cdrom eq 'cdrom') {
671 return get_cdrom_path();
672 } elsif ($cdrom eq 'none') {
673 return '';
674 } elsif ($cdrom =~ m|^/|) {
675 return $cdrom;
676 } else {
677 return PVE::Storage::path ($storecfg, $cdrom);
678 }
679}
680
681# try to convert old style file names to volume IDs
682sub filename_to_volume_id {
683 my ($vmid, $file, $media) = @_;
684
685 if (!($file eq 'none' || $file eq 'cdrom' ||
686 $file =~ m|^/dev/.+| || $file =~ m/^([^:]+):(.+)$/)) {
687
688 return undef if $file =~ m|/|;
689
690 if ($media && $media eq 'cdrom') {
691 $file = "local:iso/$file";
692 } else {
693 $file = "local:$vmid/$file";
694 }
695 }
696
697 return $file;
698}
699
700sub verify_media_type {
701 my ($opt, $vtype, $media) = @_;
702
703 return if !$media;
704
705 my $etype;
706 if ($media eq 'disk') {
707 $etype = 'image';
708 } elsif ($media eq 'cdrom') {
709 $etype = 'iso';
710 } else {
711 die "internal error";
712 }
713
714 return if ($vtype eq $etype);
715
716 raise_param_exc({ $opt => "unexpected media type ($vtype != $etype)" });
717}
718
719sub cleanup_drive_path {
720 my ($opt, $storecfg, $drive) = @_;
721
722 # try to convert filesystem paths to volume IDs
723
724 if (($drive->{file} !~ m/^(cdrom|none)$/) &&
725 ($drive->{file} !~ m|^/dev/.+|) &&
726 ($drive->{file} !~ m/^([^:]+):(.+)$/) &&
727 ($drive->{file} !~ m/^\d+$/)) {
728 my ($vtype, $volid) = PVE::Storage::path_to_volume_id($storecfg, $drive->{file});
729 raise_param_exc({ $opt => "unable to associate path '$drive->{file}' to any storage"}) if !$vtype;
730 $drive->{media} = 'cdrom' if !$drive->{media} && $vtype eq 'iso';
731 verify_media_type($opt, $vtype, $drive->{media});
732 $drive->{file} = $volid;
733 }
734
735 $drive->{media} = 'cdrom' if !$drive->{media} && $drive->{file} =~ m/^(cdrom|none)$/;
736}
737
738sub create_conf_nolock {
739 my ($vmid, $settings) = @_;
740
741 my $filename = config_file ($vmid);
742
743 die "configuration file '$filename' already exists\n" if -f $filename;
744
745 my $defaults = load_defaults();
746
747 $settings->{name} = "vm$vmid" if !$settings->{name};
748 $settings->{memory} = $defaults->{memory} if !$settings->{memory};
749
750 my $data = '';
751 foreach my $opt (keys %$settings) {
752 next if !$confdesc->{$opt};
753
754 my $value = $settings->{$opt};
755 next if !$value;
756
757 $data .= "$opt: $value\n";
758 }
759
760 PVE::Tools::file_set_contents($filename, $data);
761}
762
763# ideX = [volume=]volume-id[,media=d][,cyls=c,heads=h,secs=s[,trans=t]]
764# [,snapshot=on|off][,cache=on|off][,format=f][,backup=yes|no]
765# [,aio=native|threads]
766
767sub parse_drive {
768 my ($key, $data) = @_;
769
770 my $res = {};
771
772 # $key may be undefined - used to verify JSON parameters
773 if (!defined($key)) {
774 $res->{interface} = 'unknown'; # should not harm when used to verify parameters
775 $res->{index} = 0;
776 } elsif ($key =~ m/^([^\d]+)(\d+)$/) {
777 $res->{interface} = $1;
778 $res->{index} = $2;
779 } else {
780 return undef;
781 }
782
783 foreach my $p (split (/,/, $data)) {
784 next if $p =~ m/^\s*$/;
785
786 if ($p =~ m/^(file|volume|cyls|heads|secs|trans|media|snapshot|cache|format|rerror|werror|backup|aio)=(.+)$/) {
787 my ($k, $v) = ($1, $2);
788
789 $k = 'file' if $k eq 'volume';
790
791 return undef if defined $res->{$k};
792
793 $res->{$k} = $v;
794 } else {
795 if (!$res->{file} && $p !~ m/=/) {
796 $res->{file} = $p;
797 } else {
798 return undef;
799 }
800 }
801 }
802
803 return undef if !$res->{file};
804
805 return undef if $res->{cache} &&
806 $res->{cache} !~ m/^(off|none|writethrough|writeback)$/;
807 return undef if $res->{snapshot} && $res->{snapshot} !~ m/^(on|off)$/;
808 return undef if $res->{cyls} && $res->{cyls} !~ m/^\d+$/;
809 return undef if $res->{heads} && $res->{heads} !~ m/^\d+$/;
810 return undef if $res->{secs} && $res->{secs} !~ m/^\d+$/;
811 return undef if $res->{media} && $res->{media} !~ m/^(disk|cdrom)$/;
812 return undef if $res->{trans} && $res->{trans} !~ m/^(none|lba|auto)$/;
813 return undef if $res->{format} && $res->{format} !~ m/^(raw|cow|qcow|qcow2|vmdk|cloop)$/;
814 return undef if $res->{rerror} && $res->{rerror} !~ m/^(ignore|report|stop)$/;
815 return undef if $res->{werror} && $res->{werror} !~ m/^(enospc|ignore|report|stop)$/;
816 return undef if $res->{backup} && $res->{backup} !~ m/^(yes|no)$/;
817 return undef if $res->{aio} && $res->{aio} !~ m/^(native|threads)$/;
818
819 if ($res->{media} && ($res->{media} eq 'cdrom')) {
820 return undef if $res->{snapshot} || $res->{trans} || $res->{format};
821 return undef if $res->{heads} || $res->{secs} || $res->{cyls};
822 return undef if $res->{interface} eq 'virtio';
823 }
824
825 # rerror does not work with scsi drives
826 if ($res->{rerror}) {
827 return undef if $res->{interface} eq 'scsi';
828 }
829
830 return $res;
831}
832
833my @qemu_drive_options = qw(heads secs cyls trans media format cache snapshot rerror werror aio);
834
835sub print_drive {
836 my ($vmid, $drive) = @_;
837
838 my $opts = '';
839 foreach my $o (@qemu_drive_options, 'backup') {
840 $opts .= ",$o=$drive->{$o}" if $drive->{$o};
841 }
842
843 return "$drive->{file}$opts";
844}
845
846sub print_drive_full {
847 my ($storecfg, $vmid, $drive) = @_;
848
849 my $opts = '';
850 foreach my $o (@qemu_drive_options) {
851 $opts .= ",$o=$drive->{$o}" if $drive->{$o};
852 }
853
854 # use linux-aio by default (qemu default is threads)
855 $opts .= ",aio=native" if !$drive->{aio};
856
857 my $path;
858 my $volid = $drive->{file};
859 if (drive_is_cdrom ($drive)) {
860 $path = get_iso_path ($storecfg, $vmid, $volid);
861 } else {
862 if ($volid =~ m|^/|) {
863 $path = $volid;
864 } else {
865 $path = PVE::Storage::path ($storecfg, $volid);
866 }
867 }
868
869 my $pathinfo = $path ? "file=$path," : '';
870
871 return "${pathinfo}if=$drive->{interface},index=$drive->{index}$opts";
872}
873
874
875sub drive_is_cdrom {
876 my ($drive) = @_;
877
878 return $drive && $drive->{media} && ($drive->{media} eq 'cdrom');
879
880}
881
882# netX: e1000=XX:XX:XX:XX:XX:XX,bridge=vmbr0,rate=<mbps>
883sub parse_net {
884 my ($data) = @_;
885
886 my $res = {};
887
888 foreach my $kvp (split (/,/, $data)) {
889
890 if ($kvp =~ m/^(ne2k_pci|e1000|rtl8139|pcnet|virtio|ne2k_isa|i82551|i82557b|i82559er)(=([0-9a-f]{2}(:[0-9a-f]{2}){5}))?$/i) {
891 my $model = lc ($1);
892 my $mac = uc($3) || random_ether_addr ();
893 $res->{model} = $model;
894 $res->{macaddr} = $mac;
895 } elsif ($kvp =~ m/^bridge=(\S+)$/) {
896 $res->{bridge} = $1;
897 } elsif ($kvp =~ m/^rate=(\d+(\.\d+)?)$/) {
898 $res->{rate} = $1;
899 } else {
900 return undef;
901 }
902
903 }
904
905 return undef if !$res->{model};
906
907 return $res;
908}
909
910sub print_net {
911 my $net = shift;
912
913 my $res = "$net->{model}";
914 $res .= "=$net->{macaddr}" if $net->{macaddr};
915 $res .= ",bridge=$net->{bridge}" if $net->{bridge};
916 $res .= ",rate=$net->{rate}" if $net->{rate};
917
918 return $res;
919}
920
921sub add_random_macs {
922 my ($settings) = @_;
923
924 foreach my $opt (keys %$settings) {
925 next if $opt !~ m/^net(\d+)$/;
926 my $net = parse_net($settings->{$opt});
927 next if !$net;
928 $settings->{$opt} = print_net($net);
929 }
930}
931
932sub add_unused_volume {
933 my ($config, $res, $volid) = @_;
934
935 my $key;
936 for (my $ind = $MAX_UNUSED_DISKS - 1; $ind >= 0; $ind--) {
937 my $test = "unused$ind";
938 if (my $vid = $config->{$test}) {
939 return if $vid eq $volid; # do not add duplicates
940 } else {
941 $key = $test;
942 }
943 }
944
945 die "To many unused volume - please delete them first.\n" if !$key;
946
947 $res->{$key} = $volid;
948}
949
950# fixme: remove all thos $noerr parameters?
951
952PVE::JSONSchema::register_format('pve-qm-bootdisk', \&verify_bootdisk);
953sub verify_bootdisk {
954 my ($value, $noerr) = @_;
955
956 return $value if valid_drivename($value);
957
958 return undef if $noerr;
959
960 die "invalid boot disk '$value'\n";
961}
962
963PVE::JSONSchema::register_format('pve-qm-net', \&verify_net);
964sub verify_net {
965 my ($value, $noerr) = @_;
966
967 return $value if parse_net($value);
968
969 return undef if $noerr;
970
971 die "unable to parse network options\n";
972}
973
974PVE::JSONSchema::register_format('pve-qm-drive', \&verify_drive);
975sub verify_drive {
976 my ($value, $noerr) = @_;
977
978 return $value if parse_drive (undef, $value);
979
980 return undef if $noerr;
981
982 die "unable to parse drive options\n";
983}
984
985PVE::JSONSchema::register_format('pve-qm-hostpci', \&verify_hostpci);
986sub verify_hostpci {
987 my ($value, $noerr) = @_;
988
989 my @dl = split (/,/, $value);
990 foreach my $v (@dl) {
991 if ($v !~ m/^[a-f0-9]{2}:[a-f0-9]{2}\.[a-f0-9]$/i) {
992 return undef if $noerr;
993 die "unable to parse pci id\n";
994 }
995 }
996 return $value;
997}
998
999sub parse_usb_device {
1000 my ($value) = @_;
1001
1002 return undef if !$value;
1003
1004 my @dl = split (/,/, $value);
1005 my $found;
1006
1007 my $res = {};
1008 foreach my $v (@dl) {
1009 if ($v =~ m/^host=([0-9A-Fa-f]{4}):([0-9A-Fa-f]{4})$/) {
1010 $found = 1;
1011 $res->{vendorid} = $1;
1012 $res->{productid} = $2;
1013 } elsif ($v =~ m/^host=(\d+)\-(\d+(\.\d+)*)$/) {
1014 $found = 1;
1015 $res->{hostbus} = $1;
1016 $res->{hostport} = $2;
1017 } else {
1018 return undef;
1019 }
1020 }
1021 return undef if !$found;
1022
1023 return $res;
1024}
1025
1026PVE::JSONSchema::register_format('pve-qm-usb-device', \&verify_usb_device);
1027sub verify_usb_device {
1028 my ($value, $noerr) = @_;
1029
1030 return $value if parse_usb_device($value);
1031
1032 return undef if $noerr;
1033
1034 die "unable to parse usb device\n";
1035}
1036
1037PVE::JSONSchema::register_format('pve-qm-parallel', \&verify_parallel);
1038sub verify_parallel {
1039 my ($value, $noerr) = @_;
1040
1041 my @dl = split (/,/, $value);
1042 foreach my $v (@dl) {
1043 if ($v !~ m|^/dev/parport\d+$|) {
1044 return undef if $noerr;
1045 die "invalid device name\n";
1046 }
1047 }
1048 return $value;
1049}
1050
1051PVE::JSONSchema::register_format('pve-qm-serial', \&verify_serial);
1052sub verify_serial {
1053 my ($value, $noerr) = @_;
1054
1055 my @dl = split (/,/, $value);
1056 foreach my $v (@dl) {
1057 if ($v !~ m|^/dev/ttyS\d+$|) {
1058 return undef if $noerr;
1059 die "invalid device name\n";
1060 }
1061 }
1062 return $value;
1063}
1064
1065# add JSON properties for create and set function
1066sub json_config_properties {
1067 my $prop = shift;
1068
1069 foreach my $opt (keys %$confdesc) {
1070 $prop->{$opt} = $confdesc->{$opt};
1071 }
1072
1073 return $prop;
1074}
1075
1076sub check_type {
1077 my ($key, $value) = @_;
1078
1079 die "unknown setting '$key'\n" if !$confdesc->{$key};
1080
1081 my $type = $confdesc->{$key}->{type};
1082
1083 if (!defined ($value)) {
1084 die "got undefined value\n";
1085 }
1086
1087 if ($value =~ m/[\n\r]/) {
1088 die "property contains a line feed\n";
1089 }
1090
1091 if ($type eq 'boolean') {
1092 return 1 if ($value eq '1') || ($value =~ m/^(on|yes|true)$/i);
1093 return 0 if ($value eq '0') || ($value =~ m/^(off|no|false)$/i);
1094 die "type check ('boolean') failed - got '$value'\n";
1095 } elsif ($type eq 'integer') {
1096 return int($1) if $value =~ m/^(\d+)$/;
1097 die "type check ('integer') failed - got '$value'\n";
1098 } elsif ($type eq 'string') {
1099 if (my $fmt = $confdesc->{$key}->{format}) {
1100 if ($fmt eq 'pve-qm-drive') {
1101 # special case - we need to pass $key to parse_drive()
1102 my $drive = parse_drive ($key, $value);
1103 return $value if $drive;
1104 die "unable to parse drive options\n";
1105 }
1106 PVE::JSONSchema::check_format($fmt, $value);
1107 return $value;
1108 }
1109 $value =~ s/^\"(.*)\"$/$1/;
1110 return $value;
1111 } else {
1112 die "internal error"
1113 }
1114}
1115
1116sub lock_config {
1117 my ($vmid, $code, @param) = @_;
1118
1119 my $filename = config_file_lock ($vmid);
1120
1121 lock_file($filename, 10, $code, @param);
1122
1123 die $@ if $@;
1124}
1125
1126sub cfs_config_path {
1127 my ($vmid) = @_;
1128
1129 return "nodes/$nodename/qemu-server/$vmid.conf";
1130}
1131
1132sub config_file {
1133 my ($vmid) = @_;
1134
1135 my $cfspath = cfs_config_path($vmid);
1136 return "/etc/pve/$cfspath";
1137}
1138
1139sub config_file_lock {
1140 my ($vmid) = @_;
1141
1142 return "$lock_dir/lock-$vmid.conf";
1143}
1144
1145sub touch_config {
1146 my ($vmid) = @_;
1147
1148 my $conf = config_file ($vmid);
1149 utime undef, undef, $conf;
1150}
1151
1152sub create_disks {
1153 my ($storecfg, $vmid, $settings) = @_;
1154
1155 my $vollist = [];
1156
1157 eval {
1158 foreach_drive($settings, sub {
1159 my ($ds, $disk) = @_;
1160
1161 return if drive_is_cdrom ($disk);
1162
1163 my $file = $disk->{file};
1164
1165 if ($file =~ m/^(([^:\s]+):)?(\d+(\.\d+)?)$/) {
1166 my $storeid = $2 || 'local';
1167 my $size = $3;
1168 my $defformat = PVE::Storage::storage_default_format ($storecfg, $storeid);
1169 my $fmt = $disk->{format} || $defformat;
1170 syslog ('info', "VM $vmid creating new disk - size is $size GB");
1171
1172 my $volid = PVE::Storage::vdisk_alloc ($storecfg, $storeid, $vmid,
1173 $fmt, undef, $size*1024*1024);
1174
1175 $disk->{file} = $volid;
1176 delete ($disk->{format}); # no longer needed
1177 push @$vollist, $volid;
1178 $settings->{$ds} = PVE::QemuServer::print_drive ($vmid, $disk);
1179 } else {
1180 my $path;
1181 if ($disk->{file} =~ m|^/dev/.+|) {
1182 $path = $disk->{file};
1183 } else {
1184 $path = PVE::Storage::path ($storecfg, $disk->{file});
1185 }
1186 if (!(-f $path || -b $path)) {
1187 die "image '$path' does not exists\n";
1188 }
1189 }
1190 });
1191 };
1192
1193 my $err = $@;
1194
1195 if ($err) {
1196 syslog ('err', "VM $vmid creating disks failed");
1197 foreach my $volid (@$vollist) {
1198 eval { PVE::Storage::vdisk_free ($storecfg, $volid); };
1199 warn $@ if $@;
1200 }
1201 die $err;
1202 }
1203
1204 return $vollist;
1205}
1206
1207sub unlink_image {
1208 my ($storecfg, $vmid, $volid) = @_;
1209
1210 die "reject to unlink absolute path '$volid'"
1211 if $volid =~ m|^/|;
1212
1213 my ($path, $owner) = PVE::Storage::path ($storecfg, $volid);
1214
1215 die "reject to unlink '$volid' - not owned by this VM"
1216 if !$owner || ($owner != $vmid);
1217
1218 syslog ('info', "VM $vmid deleting volume '$volid'");
1219
1220 PVE::Storage::vdisk_free ($storecfg, $volid);
1221
1222 touch_config ($vmid);
1223}
1224
1225sub destroy_vm {
1226 my ($storecfg, $vmid) = @_;
1227
1228 my $conffile = config_file ($vmid);
1229
1230 my $conf = load_config ($vmid);
1231
1232 check_lock ($conf);
1233
1234 # only remove disks owned by this VM
1235 foreach_drive($conf, sub {
1236 my ($ds, $drive) = @_;
1237
1238 return if drive_is_cdrom ($drive);
1239
1240 my $volid = $drive->{file};
1241 next if !$volid || $volid =~ m|^/|;
1242
1243 my ($path, $owner) = PVE::Storage::path ($storecfg, $volid);
1244 next if !$path || !$owner || ($owner != $vmid);
1245
1246 PVE::Storage::vdisk_free ($storecfg, $volid);
1247 });
1248
1249 unlink $conffile;
1250
1251 # also remove unused disk
1252 eval {
1253 my $dl = PVE::Storage::vdisk_list ($storecfg, undef, $vmid);
1254
1255 eval {
1256 PVE::Storage::foreach_volid ($dl, sub {
1257 my ($volid, $sid, $volname, $d) = @_;
1258 PVE::Storage::vdisk_free ($storecfg, $volid);
1259 });
1260 };
1261 warn $@ if $@;
1262
1263 };
1264 warn $@ if $@;
1265}
1266
1267# fixme: remove?
1268sub load_diskinfo_old {
1269 my ($storecfg, $vmid, $conf) = @_;
1270
1271 my $info = {};
1272 my $res = {};
1273 my $vollist;
1274
1275 foreach_drive($conf, sub {
1276 my ($ds, $di) = @_;
1277
1278 $res->{$ds} = $di;
1279
1280 return if drive_is_cdrom ($di);
1281
1282 if ($di->{file} =~ m|^/dev/.+|) {
1283 $info->{$di->{file}}->{size} = PVE::Storage::file_size_info ($di->{file});
1284 } else {
1285 push @$vollist, $di->{file};
1286 }
1287 });
1288
1289 eval {
1290 my $dl = PVE::Storage::vdisk_list ($storecfg, undef, $vmid, $vollist);
1291
1292 PVE::Storage::foreach_volid ($dl, sub {
1293 my ($volid, $sid, $volname, $d) = @_;
1294 $info->{$volid} = $d;
1295 });
1296 };
1297 warn $@ if $@;
1298
1299 foreach my $ds (keys %$res) {
1300 my $di = $res->{$ds};
1301
1302 $res->{$ds}->{disksize} = $info->{$di->{file}} ?
1303 $info->{$di->{file}}->{size} / (1024*1024) : 0;
1304 }
1305
1306 return $res;
1307}
1308
1309sub load_config {
1310 my ($vmid) = @_;
1311
1312 my $cfspath = cfs_config_path($vmid);
1313
1314 my $conf = PVE::Cluster::cfs_read_file($cfspath);
1315
1316 die "no such VM ('$vmid')\n" if !defined($conf);
1317
1318 return $conf;
1319}
1320
1321sub parse_vm_config {
1322 my ($filename, $raw) = @_;
1323
1324 return undef if !defined($raw);
1325
1326 my $res = {};
1327
1328 $filename =~ m|/qemu-server/(\d+)\.conf$|
1329 || die "got strange filename '$filename'";
1330
1331 my $vmid = $1;
1332
1333 while ($raw && $raw =~ s/^(.*?)(\n|$)//) {
1334 my $line = $1;
1335
1336 next if $line =~ m/^\#/;
1337
1338 next if $line =~ m/^\s*$/;
1339
1340 if ($line =~ m/^(description):\s*(.*\S)\s*$/) {
1341 my $key = $1;
1342 my $value = PVE::Tools::decode_text($2);
1343 $res->{$key} = $value;
1344 } elsif ($line =~ m/^(args):\s*(.*\S)\s*$/) {
1345 my $key = $1;
1346 my $value = $2;
1347 $res->{$key} = $value;
1348 } elsif ($line =~ m/^([a-z][a-z_]*\d*):\s*(\S+)\s*$/) {
1349 my $key = $1;
1350 my $value = $2;
1351 eval { $value = check_type($key, $value); };
1352 if ($@) {
1353 warn "vm $vmid - unable to parse value of '$key' - $@";
1354 } else {
1355 my $fmt = $confdesc->{$key}->{format};
1356 if ($fmt && $fmt eq 'pve-qm-drive') {
1357 my $v = parse_drive($key, $value);
1358 if (my $volid = filename_to_volume_id($vmid, $v->{file}, $v->{media})) {
1359 $v->{file} = $volid;
1360 $value = print_drive ($vmid, $v);
1361 } else {
1362 warn "vm $vmid - unable to parse value of '$key'\n";
1363 next;
1364 }
1365 }
1366
1367 if ($key eq 'cdrom') {
1368 $res->{ide2} = $value;
1369 } else {
1370 $res->{$key} = $value;
1371 }
1372 }
1373 }
1374 }
1375
1376 # convert old smp to sockets
1377 if ($res->{smp} && !$res->{sockets}) {
1378 $res->{sockets} = $res->{smp};
1379 }
1380 delete $res->{smp};
1381
1382 return $res;
1383}
1384
1385sub change_config {
1386 my ($vmid, $settings, $unset, $skiplock) = @_;
1387
1388 lock_config ($vmid, &change_config_nolock, $settings, $unset, $skiplock);
1389}
1390
1391sub change_config_nolock {
1392 my ($vmid, $settings, $unset, $skiplock) = @_;
1393
1394 my $res = {};
1395
1396 $unset->{ide2} = $unset->{cdrom} if $unset->{cdrom};
1397
1398 check_lock($settings) if !$skiplock;
1399
1400 # we do not use 'smp' any longer
1401 if ($settings->{sockets}) {
1402 $unset->{smp} = 1;
1403 } elsif ($settings->{smp}) {
1404 $settings->{sockets} = $settings->{smp};
1405 $unset->{smp} = 1;
1406 }
1407
1408 my $new_volids = {};
1409
1410 foreach my $key (keys %$settings) {
1411 my $value = $settings->{$key};
1412 if ($key eq 'description') {
1413 $value = PVE::Tools::encode_text($value);
1414 }
1415 eval { $value = check_type($key, $value); };
1416 die "unable to parse value of '$key' - $@" if $@;
1417 if ($key eq 'cdrom') {
1418 $res->{ide2} = $value;
1419 } else {
1420 $res->{$key} = $value;
1421 }
1422 if (valid_drivename($key)) {
1423 my $drive = PVE::QemuServer::parse_drive($key, $value);
1424 $new_volids->{$drive->{file}} = 1 if $drive && $drive->{file};
1425 }
1426 }
1427
1428 my $filename = config_file($vmid);
1429 my $tmpfn = "$filename.$$.tmp";
1430
1431 my $fh = new IO::File ($filename, "r") ||
1432 die "unable to read config for VM $vmid\n";
1433
1434 my $werror = "unable to write config for VM $vmid\n";
1435
1436 my $out = new IO::File ($tmpfn, "w") || die $werror;
1437
1438 eval {
1439
1440 my $done;
1441
1442 while (my $line = <$fh>) {
1443
1444 if (($line =~ m/^\#/) || ($line =~ m/^\s*$/)) {
1445 die $werror unless print $out $line;
1446 next;
1447 }
1448
1449 if ($line =~ m/^([a-z][a-z_]*\d*):\s*(.*\S)\s*$/) {
1450 my $key = $1;
1451 my $value = $2;
1452
1453 # remove 'unusedX' settings if we re-add a volume
1454 next if $key =~ m/^unused/ && $new_volids->{$value};
1455
1456 # convert 'smp' to 'sockets'
1457 $key = 'sockets' if $key eq 'smp';
1458
1459 next if $done->{$key};
1460 $done->{$key} = 1;
1461
1462 if (defined ($res->{$key})) {
1463 $value = $res->{$key};
1464 delete $res->{$key};
1465 }
1466 if (!defined ($unset->{$key})) {
1467 die $werror unless print $out "$key: $value\n";
1468 }
1469
1470 next;
1471 }
1472
1473 die "unable to parse config file: $line\n";
1474 }
1475
1476 foreach my $key (keys %$res) {
1477
1478 if (!defined ($unset->{$key})) {
1479 die $werror unless print $out "$key: $res->{$key}\n";
1480 }
1481 }
1482 };
1483
1484 my $err = $@;
1485
1486 $fh->close();
1487
1488 if ($err) {
1489 $out->close();
1490 unlink $tmpfn;
1491 die $err;
1492 }
1493
1494 if (!$out->close()) {
1495 $err = "close failed - $!\n";
1496 unlink $tmpfn;
1497 die $err;
1498 }
1499
1500 if (!rename($tmpfn, $filename)) {
1501 $err = "rename failed - $!\n";
1502 unlink $tmpfn;
1503 die $err;
1504 }
1505}
1506
1507sub load_defaults {
1508
1509 my $res = {};
1510
1511 # we use static defaults from our JSON schema configuration
1512 foreach my $key (keys %$confdesc) {
1513 if (defined(my $default = $confdesc->{$key}->{default})) {
1514 $res->{$key} = $default;
1515 }
1516 }
1517
1518 my $conf = PVE::Cluster::cfs_read_file('datacenter.cfg');
1519 $res->{keyboard} = $conf->{keyboard} if $conf->{keyboard};
1520
1521 return $res;
1522}
1523
1524sub config_list {
1525 my $vmlist = PVE::Cluster::get_vmlist();
1526 my $res = {};
1527 return $res if !$vmlist || !$vmlist->{ids};
1528 my $ids = $vmlist->{ids};
1529
1530 my $nodename = PVE::INotify::nodename();
1531 foreach my $vmid (keys %$ids) {
1532 my $d = $ids->{$vmid};
1533 next if !$d->{node} || $d->{node} ne $nodename;
1534 $res->{$vmid}->{exists} = 1;
1535 }
1536 return $res;
1537}
1538
1539sub check_lock {
1540 my ($conf) = @_;
1541
1542 die "VM is locked ($conf->{lock})\n" if $conf->{lock};
1543}
1544
1545sub check_cmdline {
1546 my ($pidfile, $pid) = @_;
1547
1548 my $fh = IO::File->new ("/proc/$pid/cmdline", "r");
1549 if (defined ($fh)) {
1550 my $line = <$fh>;
1551 $fh->close;
1552 return undef if !$line;
1553 my @param = split (/\0/, $line);
1554
1555 my $cmd = $param[0];
1556 return if !$cmd || ($cmd !~ m|kvm$|);
1557
1558 for (my $i = 0; $i < scalar (@param); $i++) {
1559 my $p = $param[$i];
1560 next if !$p;
1561 if (($p eq '-pidfile') || ($p eq '--pidfile')) {
1562 my $p = $param[$i+1];
1563 return 1 if $p && ($p eq $pidfile);
1564 return undef;
1565 }
1566 }
1567 }
1568 return undef;
1569}
1570
1571sub check_running {
1572 my ($vmid) = @_;
1573
1574 my $filename = config_file ($vmid);
1575
1576 die "unable to find configuration file for VM $vmid - no such machine\n"
1577 if ! -f $filename;
1578
1579 my $pidfile = pidfile_name ($vmid);
1580
1581 if (my $fd = IO::File->new ("<$pidfile")) {
1582 my $st = stat ($fd);
1583 my $line = <$fd>;
1584 close ($fd);
1585
1586 my $mtime = $st->mtime;
1587 if ($mtime > time()) {
1588 warn "file '$filename' modified in future\n";
1589 }
1590
1591 if ($line =~ m/^(\d+)$/) {
1592 my $pid = $1;
1593
1594 return $pid if ((-d "/proc/$pid") && check_cmdline ($pidfile, $pid));
1595 }
1596 }
1597
1598 return undef;
1599}
1600
1601sub vzlist {
1602
1603 my $vzlist = config_list();
1604
1605 my $fd = IO::Dir->new ($var_run_tmpdir) || return $vzlist;
1606
1607 while (defined(my $de = $fd->read)) {
1608 next if $de !~ m/^(\d+)\.pid$/;
1609 my $vmid = $1;
1610 next if !defined ($vzlist->{$vmid});
1611 if (my $pid = check_running ($vmid)) {
1612 $vzlist->{$vmid}->{pid} = $pid;
1613 }
1614 }
1615
1616 return $vzlist;
1617}
1618
1619my $storage_timeout_hash = {};
1620
1621sub disksize {
1622 my ($storecfg, $conf) = @_;
1623
1624 my $bootdisk = $conf->{bootdisk};
1625 return undef if !$bootdisk;
1626 return undef if !valid_drivename($bootdisk);
1627
1628 return undef if !$conf->{$bootdisk};
1629
1630 my $drive = parse_drive($bootdisk, $conf->{$bootdisk});
1631 return undef if !defined($drive);
1632
1633 return undef if drive_is_cdrom($drive);
1634
1635 my $volid = $drive->{file};
1636 return undef if !$volid;
1637
1638 my $path;
1639 my $storeid;
1640 my $timeoutid;
1641
1642 if ($volid =~ m|^/|) {
1643 $path = $timeoutid = $volid;
1644 } else {
1645 $storeid = $timeoutid = PVE::Storage::parse_volume_id ($volid);
1646 $path = PVE::Storage::path($storecfg, $volid);
1647 }
1648
1649 my $last_timeout = $storage_timeout_hash->{$timeoutid};
1650 if ($last_timeout) {
1651 if ((time() - $last_timeout) < 30) {
1652 # skip storage with errors
1653 return undef ;
1654 }
1655 delete $storage_timeout_hash->{$timeoutid};
1656 }
1657
1658 my ($size, $format, $used);
1659
1660 ($size, $format, $used) = PVE::Storage::file_size_info($path, 1);
1661
1662 if (!defined($format)) {
1663 # got timeout
1664 $storage_timeout_hash->{$timeoutid} = time();
1665 return undef;
1666 }
1667
1668 return wantarray ? ($size, $used) : $size;
1669}
1670
1671my $last_proc_pid_stat;
1672
1673sub vmstatus {
1674 my ($opt_vmid) = @_;
1675
1676 my $res = {};
1677
1678 my $storecfg = PVE::Storage::config();
1679
1680 my $list = vzlist();
1681 my ($uptime) = PVE::ProcFSTools::read_proc_uptime();
1682
1683 foreach my $vmid (keys %$list) {
1684 next if $opt_vmid && ($vmid ne $opt_vmid);
1685
1686 my $cfspath = cfs_config_path($vmid);
1687 my $conf = PVE::Cluster::cfs_read_file($cfspath) || {};
1688
1689 my $d = {};
1690 $d->{pid} = $list->{$vmid}->{pid};
1691
1692 # fixme: better status?
1693 $d->{status} = $list->{$vmid}->{pid} ? 'running' : 'stopped';
1694
1695 my ($size, $used) = disksize($storecfg, $conf);
1696 if (defined($size) && defined($used)) {
1697 $d->{disk} = $used;
1698 $d->{maxdisk} = $size;
1699 } else {
1700 $d->{disk} = 0;
1701 $d->{maxdisk} = 0;
1702 }
1703
1704 $d->{cpus} = ($conf->{sockets} || 1) * ($conf->{cores} || 1);
1705 $d->{name} = $conf->{name} || "VM $vmid";
1706 $d->{maxmem} = $conf->{memory} ? $conf->{memory}*(1024*1024) : 0;
1707
1708
1709 $d->{uptime} = 0;
1710 $d->{cpu} = 0;
1711 $d->{relcpu} = 0;
1712 $d->{mem} = 0;
1713
1714 $d->{netout} = 0;
1715 $d->{netin} = 0;
1716
1717 $d->{diskread} = 0;
1718 $d->{diskwrite} = 0;
1719
1720 $res->{$vmid} = $d;
1721 }
1722
1723 my $netdev = PVE::ProcFSTools::read_proc_net_dev();
1724 foreach my $dev (keys %$netdev) {
1725 next if $dev !~ m/^tap([1-9]\d*)i/;
1726 my $vmid = $1;
1727 my $d = $res->{$vmid};
1728 next if !$d;
1729
1730 $d->{netout} += $netdev->{$dev}->{receive};
1731 $d->{netin} += $netdev->{$dev}->{transmit};
1732 }
1733
1734 my $cpuinfo = PVE::ProcFSTools::read_cpuinfo();
1735 my $cpucount = $cpuinfo->{cpus} || 1;
1736 my $ctime = gettimeofday;
1737
1738 foreach my $vmid (keys %$list) {
1739
1740 my $d = $res->{$vmid};
1741 my $pid = $d->{pid};
1742 next if !$pid;
1743
1744 if (my $fh = IO::File->new("/proc/$pid/io", "r")) {
1745 my $data = {};
1746 while (defined (my $line = <$fh>)) {
1747 if ($line =~ m/^([rw]char):\s+(\d+)$/) {
1748 $data->{$1} = $2;
1749 }
1750 }
1751 close($fh);
1752 $d->{diskread} = $data->{rchar} || 0;
1753 $d->{diskwrite} = $data->{wchar} || 0;
1754 }
1755
1756 my $statstr = file_read_firstline("/proc/$pid/stat");
1757 next if !$statstr;
1758
1759 my ($utime, $stime, $vsize, $rss, $starttime);
1760 if ($statstr =~ m/^$pid \(.*\) \S (-?\d+) -?\d+ -?\d+ -?\d+ -?\d+ \d+ \d+ \d+ \d+ \d+ (\d+) (\d+) (-?\d+) (-?\d+) -?\d+ -?\d+ -?\d+ 0 (\d+) (\d+) (-?\d+) \d+ \d+ \d+ \d+ \d+ \d+ \d+ \d+ \d+ \d+ \d+ \d+ \d+ -?\d+ -?\d+ \d+ \d+ \d+/) {
1761 ($utime, $stime, $vsize, $rss, $starttime) = ($2, $3, $7, $8 * 4096, $6);
1762 } else {
1763 next;
1764 }
1765
1766 my $used = $utime + $stime;
1767
1768 my $vcpus = $d->{cpus} > $cpucount ? $cpucount : $d->{cpus};
1769
1770 $d->{uptime} = int ($uptime - ($starttime/100));
1771
1772 if ($vsize) {
1773 $d->{mem} = int (($rss/$vsize)*$d->{maxmem});
1774 }
1775
1776 my $old = $last_proc_pid_stat->{$pid};
1777 if (!$old) {
1778 $last_proc_pid_stat->{$pid} = {
1779 time => $ctime,
1780 used => $used,
1781 cpu => 0,
1782 relcpu => 0,
1783 };
1784 next;
1785 }
1786
1787 my $dtime = ($ctime - $old->{time}) * $cpucount * $clock_ticks;
1788
1789 if ($dtime > 1000) {
1790 my $dutime = $used - $old->{used};
1791
1792 $d->{cpu} = $dutime/$dtime;
1793 $d->{relcpu} = ($d->{cpu} * $cpucount) / $vcpus;
1794 $last_proc_pid_stat->{$pid} = {
1795 time => $ctime,
1796 used => $used,
1797 cpu => $d->{cpu},
1798 relcpu => $d->{relcpu},
1799 };
1800 } else {
1801 $d->{cpu} = $old->{cpu};
1802 $d->{relcpu} = $old->{relcpu};
1803 }
1804 }
1805
1806 return $res;
1807}
1808
1809sub foreach_drive {
1810 my ($conf, $func) = @_;
1811
1812 foreach my $ds (keys %$conf) {
1813 next if !valid_drivename($ds);
1814
1815 my $drive = parse_drive ($ds, $conf->{$ds});
1816 next if !$drive;
1817
1818 &$func($ds, $drive);
1819 }
1820}
1821
1822sub config_to_command {
1823 my ($storecfg, $vmid, $conf, $defaults, $migrate_uri) = @_;
1824
1825 my $cmd = [];
1826
1827 my $kvmver = kvm_user_version();
1828 my $vernum = 0; # unknown
1829 if ($kvmver =~ m/^(\d+)\.(\d+)\.(\d+)$/) {
1830 $vernum = $1*1000000+$2*1000+$3;
1831 }
1832
1833 die "detected old qemu-kvm binary ($kvmver)\n" if $vernum < 14000;
1834
1835 my $have_ovz = -f '/proc/vz/vestat';
1836
1837 push @$cmd, '/usr/bin/kvm';
1838
1839 push @$cmd, '-id', $vmid;
1840
1841 my $use_virtio = 0;
1842
1843 my $socket = monitor_socket ($vmid);
1844 push @$cmd, '-monitor', "unix:$socket,server,nowait";
1845
1846 $socket = vnc_socket ($vmid);
1847 push @$cmd, '-vnc', "unix:$socket,x509,password";
1848
1849 push @$cmd, '-pidfile' , pidfile_name ($vmid);
1850
1851 push @$cmd, '-daemonize';
1852
1853 push @$cmd, '-incoming', $migrate_uri if $migrate_uri;
1854
1855 # include usb device config
1856 push @$cmd, '-readconfig', '/usr/share/qemu-server/pve-usb.cfg';
1857
1858 # enable absolute mouse coordinates (needed by vnc)
1859 my $tablet = defined ($conf->{tablet}) ? $conf->{tablet} : $defaults->{tablet};
1860 push @$cmd, '-device', 'usb-tablet,bus=ehci.0,port=6' if $tablet;
1861
1862 # host pci devices
1863 if (my $pcidl = $conf->{hostpci}) {
1864 my @dl = split (/,/, $pcidl);
1865 foreach my $dev (@dl) {
1866 push @$cmd, '-device', "pci-assign,host=$dev" if $dev;
1867 }
1868 }
1869
1870 # usb devices
1871 for (my $i = 0; $i < $MAX_USB_DEVICES; $i++) {
1872 my $d = parse_usb_device($conf->{"usb$i"});
1873 next if !$d;
1874 if ($d->{vendorid} && $d->{productid}) {
1875 push @$cmd, '-device', "usb-host,vendorid=$d->{vendorid},productid=$d->{productid}";
1876 } elsif (defined($d->{hostbus}) && defined($d->{hostport})) {
1877 push @$cmd, '-device', "usb-host,hostbus=$d->{hostbus},hostport=$d->{hostport}";
1878 }
1879 }
1880
1881 if (my $usbdl = $conf->{hostusb}) {
1882 my @dl = split (/,/, $usbdl);
1883 foreach my $dev (@dl) {
1884 push @$cmd, '-usbdevice', "host:$dev" if $dev;
1885 }
1886 }
1887
1888 # serial devices
1889 if (my $serdl = $conf->{serial}) {
1890 my @dl = split (/,/, $serdl);
1891 foreach my $dev (@dl) {
1892 next if !$dev;
1893 if (-c $dev) {
1894 push @$cmd, '-serial', "$dev";
1895 }
1896 }
1897 }
1898
1899 # parallel devices
1900 if (my $pardl = $conf->{parallel}) {
1901 my @dl = split (/,/, $pardl);
1902 foreach my $dev (@dl) {
1903 next if !$dev;
1904 if (-c $dev) {
1905 push @$cmd, '-parallel', "$dev";
1906 }
1907 }
1908 }
1909
1910 my $vmname = $conf->{name} || "vm$vmid";
1911
1912 push @$cmd, '-name', $vmname;
1913
1914 my $sockets = 1;
1915 $sockets = $conf->{smp} if $conf->{smp}; # old style - no longer iused
1916 $sockets = $conf->{sockets} if $conf->{sockets};
1917
1918 my $cores = $conf->{cores} || 1;
1919
1920 my $boot_opt;
1921
1922 push @$cmd, '-smp', "sockets=$sockets,cores=$cores";
1923
1924 push @$cmd, '-cpu', $conf->{cpu} if $conf->{cpu};
1925
1926 $boot_opt = "menu=on";
1927 if ($conf->{boot}) {
1928 $boot_opt .= ",order=$conf->{boot}";
1929 }
1930
1931 push @$cmd, '-nodefaults';
1932
1933 push @$cmd, '-boot', $boot_opt if $boot_opt;
1934
1935 push @$cmd, '-no-acpi' if defined ($conf->{acpi}) && $conf->{acpi} == 0;
1936
1937 push @$cmd, '-no-reboot' if defined ($conf->{reboot}) && $conf->{reboot} == 0;
1938
1939 my $vga = $conf->{vga};
1940 if (!$vga) {
1941 if ($conf->{ostype} && ($conf->{ostype} eq 'win7' || $conf->{ostype} eq 'w2k8')) {
1942 $vga = 'std';
1943 } else {
1944 $vga = 'cirrus';
1945 }
1946 }
1947
1948 push @$cmd, '-vga', $vga if $vga; # for kvm 77 and later
1949
1950 # time drift fix
1951 my $tdf = defined ($conf->{tdf}) ? $conf->{tdf} : $defaults->{tdf};
1952 push @$cmd, '-tdf' if $tdf;
1953
1954 my $nokvm = defined ($conf->{kvm}) && $conf->{kvm} == 0 ? 1 : 0;
1955
1956 if (my $ost = $conf->{ostype}) {
1957 # other, wxp, w2k, w2k3, w2k8, wvista, win7, l24, l26
1958
1959 if ($ost =~ m/^w/) { # windows
1960 push @$cmd, '-localtime' if !defined ($conf->{localtime});
1961
1962 # use rtc-td-hack when acpi is enabled
1963 if (!(defined ($conf->{acpi}) && $conf->{acpi} == 0)) {
1964 push @$cmd, '-rtc-td-hack';
1965 }
1966 }
1967
1968 # -tdf ?
1969 # -no-acpi
1970 # -no-kvm
1971 # -win2k-hack ?
1972 }
1973
1974 push @$cmd, '-no-kvm' if $nokvm;
1975
1976 push @$cmd, '-localtime' if $conf->{localtime};
1977
1978 push @$cmd, '-startdate', $conf->{startdate} if $conf->{startdate};
1979
1980 push @$cmd, '-S' if $conf->{freeze};
1981
1982 # set keyboard layout
1983 my $kb = $conf->{keyboard} || $defaults->{keyboard};
1984 push @$cmd, '-k', $kb if $kb;
1985
1986 # enable sound
1987 #my $soundhw = $conf->{soundhw} || $defaults->{soundhw};
1988 #push @$cmd, '-soundhw', 'es1370';
1989 #push @$cmd, '-soundhw', $soundhw if $soundhw;
1990
1991 my $vollist = [];
1992
1993 foreach_drive($conf, sub {
1994 my ($ds, $drive) = @_;
1995
1996 eval {
1997 PVE::Storage::parse_volume_id ($drive->{file});
1998 push @$vollist, $drive->{file};
1999 }; # ignore errors
2000
2001 $use_virtio = 1 if $ds =~ m/^virtio/;
2002 my $tmp = print_drive_full ($storecfg, $vmid, $drive);
2003 $tmp .= ",boot=on" if $conf->{bootdisk} && ($conf->{bootdisk} eq $ds);
2004 push @$cmd, '-drive', $tmp;
2005 });
2006
2007 push @$cmd, '-m', $conf->{memory} || $defaults->{memory};
2008
2009 my $foundnet = 0;
2010
2011 foreach my $k (sort keys %$conf) {
2012 next if $k !~ m/^net(\d+)$/;
2013 my $i = int ($1);
2014
2015 die "got strange net id '$i'\n" if $i >= ${MAX_NETS};
2016
2017 if ($conf->{"net$i"} && (my $net = parse_net($conf->{"net$i"}))) {
2018
2019 $foundnet = 1;
2020
2021 my $ifname = "tap${vmid}i$i";
2022
2023 # kvm uses TUNSETIFF ioctl, and that limits ifname length
2024 die "interface name '$ifname' is too long (max 15 character)\n"
2025 if length($ifname) >= 16;
2026
2027 my $device = $net->{model};
2028 my $vhostparam = '';
2029 if ($net->{model} eq 'virtio') {
2030 $use_virtio = 1;
2031 $device = 'virtio-net-pci';
2032 $vhostparam = ',vhost=on' if $kernel_has_vhost_net;
2033 };
2034
2035 if ($net->{bridge}) {
2036 push @$cmd, '-netdev', "type=tap,id=${k},ifname=${ifname},script=/var/lib/qemu-server/pve-bridge$vhostparam";
2037 } else {
2038 push @$cmd, '-netdev', "type=user,id=${k},hostname=$vmname";
2039 }
2040
2041 # qemu > 0.15 always try to boot from network - we disable that by
2042 # not loading the pxe rom file
2043 my $extra = (!$conf->{boot} || ($conf->{boot} !~ m/n/)) ?
2044 "romfile=," : '';
2045 push @$cmd, '-device', "$device,${extra}mac=$net->{macaddr},netdev=${k}";
2046 }
2047 }
2048
2049 push @$cmd, '-net', 'none' if !$foundnet;
2050
2051 # hack: virtio with fairsched is unreliable, so we do not use fairsched
2052 # when the VM uses virtio devices.
2053 if (!$use_virtio && $have_ovz) {
2054
2055 my $cpuunits = defined ($conf->{cpuunits}) ?
2056 $conf->{cpuunits} : $defaults->{cpuunits};
2057
2058 push @$cmd, '-cpuunits', $cpuunits if $cpuunits;
2059
2060 # fixme: cpulimit is currently ignored
2061 #push @$cmd, '-cpulimit', $conf->{cpulimit} if $conf->{cpulimit};
2062 }
2063
2064 # add custom args
2065 if ($conf->{args}) {
2066 my $aa = split_args ($conf->{args});
2067 push @$cmd, @$aa;
2068 }
2069
2070 return wantarray ? ($cmd, $vollist) : $cmd;
2071}
2072
2073sub vnc_socket {
2074 my ($vmid) = @_;
2075 return "${var_run_tmpdir}/$vmid.vnc";
2076}
2077
2078sub monitor_socket {
2079 my ($vmid) = @_;
2080 return "${var_run_tmpdir}/$vmid.mon";
2081}
2082
2083sub pidfile_name {
2084 my ($vmid) = @_;
2085 return "${var_run_tmpdir}/$vmid.pid";
2086}
2087
2088sub random_ether_addr {
2089
2090 my $rand = Digest::SHA1::sha1_hex (rand(), time());
2091
2092 my $mac = '';
2093 for (my $i = 0; $i < 6; $i++) {
2094 my $ss = hex (substr ($rand, $i*2, 2));
2095 if (!$i) {
2096 $ss &= 0xfe; # clear multicast
2097 $ss |= 2; # set local id
2098 }
2099 $ss = sprintf ("%02X", $ss);
2100
2101 if (!$i) {
2102 $mac .= "$ss";
2103 } else {
2104 $mac .= ":$ss";
2105 }
2106 }
2107
2108 return $mac;
2109}
2110
2111sub next_migrate_port {
2112
2113 for (my $p = 60000; $p < 60010; $p++) {
2114
2115 my $sock = IO::Socket::INET->new (Listen => 5,
2116 LocalAddr => 'localhost',
2117 LocalPort => $p,
2118 ReuseAddr => 1,
2119 Proto => 0);
2120
2121 if ($sock) {
2122 close ($sock);
2123 return $p;
2124 }
2125 }
2126
2127 die "unable to find free migration port";
2128}
2129
2130sub vm_start {
2131 my ($storecfg, $vmid, $statefile, $skiplock) = @_;
2132
2133 lock_config ($vmid, sub {
2134 my $conf = load_config ($vmid);
2135
2136 check_lock ($conf) if !$skiplock;
2137
2138 if (check_running ($vmid)) {
2139 my $msg = "VM $vmid already running - start failed\n" ;
2140 syslog ('err', $msg);
2141 die $msg;
2142 } else {
2143 syslog ('info', "VM $vmid start");
2144 }
2145
2146 my $migrate_uri;
2147 my $migrate_port = 0;
2148
2149 if ($statefile) {
2150 if ($statefile eq 'tcp') {
2151 $migrate_port = next_migrate_port();
2152 $migrate_uri = "tcp:localhost:${migrate_port}";
2153 } else {
2154 if (-f $statefile) {
2155 $migrate_uri = "exec:cat $statefile";
2156 } else {
2157 warn "state file '$statefile' does not exist - doing normal startup\n";
2158 }
2159 }
2160 }
2161
2162 my $defaults = load_defaults();
2163
2164 my ($cmd, $vollist) = config_to_command ($storecfg, $vmid, $conf, $defaults, $migrate_uri);
2165 # host pci devices
2166 if (my $pcidl = $conf->{hostpci}) {
2167 my @dl = split (/,/, $pcidl);
2168 foreach my $dev (@dl) {
2169 $dev = lc($dev);
2170 my $info = pci_device_info("0000:$dev");
2171 die "no pci device info for device '$dev'\n" if !$info;
2172 die "can't unbind pci device '$dev'\n" if !pci_dev_bind_to_stub($info);
2173 die "can't reset pci device '$dev'\n" if !pci_dev_reset($info);
2174 }
2175 }
2176
2177 PVE::Storage::activate_volumes($storecfg, $vollist);
2178
2179 eval { run_command ($cmd, timeout => $migrate_uri ? undef : 30); };
2180
2181 my $err = $@;
2182
2183 if ($err) {
2184 my $msg = "start failed: $err";
2185 syslog ('err', "VM $vmid $msg");
2186 die $msg;
2187 }
2188
2189 if ($statefile) {
2190
2191 if ($statefile eq 'tcp') {
2192 print "migration listens on port $migrate_port\n";
2193 } else {
2194 unlink $statefile;
2195 # fixme: send resume - is that necessary ?
2196 eval { vm_monitor_command ($vmid, "cont", 1) };
2197 }
2198 }
2199
2200 if (my $migrate_speed =
2201 $conf->{migrate_speed} || $defaults->{migrate_speed}) {
2202 my $cmd = "migrate_set_speed ${migrate_speed}m";
2203 eval { vm_monitor_command ($vmid, $cmd, 1); };
2204 }
2205
2206 if (my $migrate_downtime =
2207 $conf->{migrate_downtime} || $defaults->{migrate_downtime}) {
2208 my $cmd = "migrate_set_downtime ${migrate_downtime}";
2209 eval { vm_monitor_command ($vmid, $cmd, 1); };
2210 }
2211 });
2212}
2213
2214sub __read_avail {
2215 my ($fh, $timeout) = @_;
2216
2217 my $sel = new IO::Select;
2218 $sel->add ($fh);
2219
2220 my $res = '';
2221 my $buf;
2222
2223 my @ready;
2224 while (scalar (@ready = $sel->can_read ($timeout))) {
2225 my $count;
2226 if ($count = $fh->sysread ($buf, 8192)) {
2227 if ($buf =~ /^(.*)\(qemu\) $/s) {
2228 $res .= $1;
2229 last;
2230 } else {
2231 $res .= $buf;
2232 }
2233 } else {
2234 if (!defined ($count)) {
2235 die "$!\n";
2236 }
2237 last;
2238 }
2239 }
2240
2241 die "monitor read timeout\n" if !scalar (@ready);
2242
2243 return $res;
2244}
2245
2246sub vm_monitor_command {
2247 my ($vmid, $cmdstr, $nolog) = @_;
2248
2249 my $res;
2250
2251 syslog ("info", "VM $vmid monitor command '$cmdstr'") if !$nolog;
2252
2253 eval {
2254 die "VM not running\n" if !check_running ($vmid);
2255
2256 my $sname = monitor_socket ($vmid);
2257
2258 my $sock = IO::Socket::UNIX->new ( Peer => $sname ) ||
2259 die "unable to connect to VM $vmid socket - $!\n";
2260
2261 my $timeout = 3;
2262
2263 # hack: migrate sometime blocks the monitor (when migrate_downtime
2264 # is set)
2265 if ($cmdstr =~ m/^(info\s+migrate|migrate\s)/) {
2266 $timeout = 60*60; # 1 hour
2267 }
2268
2269 # read banner;
2270 my $data = __read_avail ($sock, $timeout);
2271
2272 if ($data !~ m/^QEMU\s+(\S+)\s+monitor\s/) {
2273 die "got unexpected qemu monitor banner\n";
2274 }
2275
2276 my $sel = new IO::Select;
2277 $sel->add ($sock);
2278
2279 if (!scalar (my @ready = $sel->can_write ($timeout))) {
2280 die "monitor write error - timeout";
2281 }
2282
2283 my $fullcmd = "$cmdstr\r";
2284
2285 my $b;
2286 if (!($b = $sock->syswrite ($fullcmd)) || ($b != length ($fullcmd))) {
2287 die "monitor write error - $!";
2288 }
2289
2290 return if ($cmdstr eq 'q') || ($cmdstr eq 'quit');
2291
2292 $timeout = 20;
2293
2294 if ($cmdstr =~ m/^(info\s+migrate|migrate\s)/) {
2295 $timeout = 60*60; # 1 hour
2296 } elsif ($cmdstr =~ m/^(eject|change)/) {
2297 $timeout = 60; # note: cdrom mount command is slow
2298 }
2299 if ($res = __read_avail ($sock, $timeout)) {
2300
2301 my @lines = split ("\r?\n", $res);
2302
2303 shift @lines if $lines[0] !~ m/^unknown command/; # skip echo
2304
2305 $res = join ("\n", @lines);
2306 $res .= "\n";
2307 }
2308 };
2309
2310 my $err = $@;
2311
2312 if ($err) {
2313 syslog ("err", "VM $vmid monitor command failed - $err");
2314 die $err;
2315 }
2316
2317 return $res;
2318}
2319
2320sub vm_commandline {
2321 my ($storecfg, $vmid) = @_;
2322
2323 my $conf = load_config ($vmid);
2324
2325 my $defaults = load_defaults();
2326
2327 my $cmd = config_to_command ($storecfg, $vmid, $conf, $defaults);
2328
2329 return join (' ', @$cmd);
2330}
2331
2332sub vm_reset {
2333 my ($vmid, $skiplock) = @_;
2334
2335 lock_config ($vmid, sub {
2336
2337 my $conf = load_config ($vmid);
2338
2339 check_lock ($conf) if !$skiplock;
2340
2341 syslog ("info", "VM $vmid sending 'reset'");
2342
2343 vm_monitor_command ($vmid, "system_reset", 1);
2344 });
2345}
2346
2347sub vm_shutdown {
2348 my ($vmid, $skiplock) = @_;
2349
2350 lock_config ($vmid, sub {
2351
2352 my $conf = load_config ($vmid);
2353
2354 check_lock ($conf) if !$skiplock;
2355
2356 syslog ("info", "VM $vmid sending 'shutdown'");
2357
2358 vm_monitor_command ($vmid, "system_powerdown", 1);
2359 });
2360}
2361
2362sub vm_stop {
2363 my ($vmid, $skiplock) = @_;
2364
2365 lock_config ($vmid, sub {
2366
2367 my $pid = check_running ($vmid);
2368
2369 if (!$pid) {
2370 syslog ('info', "VM $vmid already stopped");
2371 return;
2372 }
2373
2374 my $conf = load_config ($vmid);
2375
2376 check_lock ($conf) if !$skiplock;
2377
2378 syslog ("info", "VM $vmid stopping");
2379
2380 eval { vm_monitor_command ($vmid, "quit", 1); };
2381
2382 my $err = $@;
2383
2384 if (!$err) {
2385 # wait some time
2386 my $timeout = 50; # fixme: how long?
2387
2388 my $count = 0;
2389 while (($count < $timeout) && check_running ($vmid)) {
2390 $count++;
2391 sleep 1;
2392 }
2393
2394 if ($count >= $timeout) {
2395 syslog ('info', "VM $vmid still running - terminating now with SIGTERM");
2396 kill 15, $pid;
2397 }
2398 } else {
2399 syslog ('info', "VM $vmid quit failed - terminating now with SIGTERM");
2400 kill 15, $pid;
2401 }
2402
2403 # wait again
2404 my $timeout = 10;
2405
2406 my $count = 0;
2407 while (($count < $timeout) && check_running ($vmid)) {
2408 $count++;
2409 sleep 1;
2410 }
2411
2412 if ($count >= $timeout) {
2413 syslog ('info', "VM $vmid still running - terminating now with SIGKILL\n");
2414 kill 9, $pid;
2415 }
2416
2417 fairsched_rmnod ($vmid); # try to destroy group
2418 });
2419}
2420
2421sub vm_suspend {
2422 my ($vmid, $skiplock) = @_;
2423
2424 lock_config ($vmid, sub {
2425
2426 my $conf = load_config ($vmid);
2427
2428 check_lock ($conf) if !$skiplock;
2429
2430 syslog ("info", "VM $vmid suspend");
2431
2432 vm_monitor_command ($vmid, "stop", 1);
2433 });
2434}
2435
2436sub vm_resume {
2437 my ($vmid, $skiplock) = @_;
2438
2439 lock_config ($vmid, sub {
2440
2441 my $conf = load_config ($vmid);
2442
2443 check_lock ($conf) if !$skiplock;
2444
2445 syslog ("info", "VM $vmid resume");
2446
2447 vm_monitor_command ($vmid, "cont", 1);
2448 });
2449}
2450
2451sub vm_cad {
2452 my ($vmid, $skiplock) = @_;
2453
2454 lock_config ($vmid, sub {
2455
2456 my $conf = load_config ($vmid);
2457
2458 check_lock ($conf) if !$skiplock;
2459
2460 syslog ("info", "VM $vmid sending cntl-alt-delete");
2461
2462 vm_monitor_command ($vmid, "sendkey ctrl-alt-delete", 1);
2463 });
2464}
2465
2466sub vm_destroy {
2467 my ($storecfg, $vmid, $skiplock) = @_;
2468
2469 lock_config ($vmid, sub {
2470
2471 my $conf = load_config ($vmid);
2472
2473 check_lock ($conf) if !$skiplock;
2474
2475 syslog ("info", "VM $vmid destroy called (removing all data)");
2476
2477 eval {
2478 if (!check_running($vmid)) {
2479 fairsched_rmnod($vmid); # try to destroy group
2480 destroy_vm($storecfg, $vmid);
2481 } else {
2482 die "VM is running\n";
2483 }
2484 };
2485
2486 my $err = $@;
2487
2488 if ($err) {
2489 syslog ("err", "VM $vmid destroy failed - $err");
2490 die $err;
2491 }
2492 });
2493}
2494
2495sub vm_stopall {
2496 my ($timeout) = @_;
2497
2498 $timeout = 3*60 if !$timeout;
2499
2500 my $vzlist = vzlist();
2501 my $count = 0;
2502 foreach my $vmid (keys %$vzlist) {
2503 next if !$vzlist->{$vmid}->{pid};
2504 $count++;
2505 }
2506
2507 if ($count) {
2508
2509 my $msg = "Stopping Qemu Server - sending shutdown requests to all VMs\n";
2510 syslog ('info', $msg);
2511 print STDERR $msg;
2512
2513 foreach my $vmid (keys %$vzlist) {
2514 next if !$vzlist->{$vmid}->{pid};
2515 eval { vm_shutdown ($vmid, 1); };
2516 print STDERR $@ if $@;
2517 }
2518
2519 my $wt = 5;
2520 my $maxtries = int (($timeout + $wt -1)/$wt);
2521 my $try = 0;
2522 while (($try < $maxtries) && $count) {
2523 $try++;
2524 sleep $wt;
2525
2526 $vzlist = vzlist();
2527 $count = 0;
2528 foreach my $vmid (keys %$vzlist) {
2529 next if !$vzlist->{$vmid}->{pid};
2530 $count++;
2531 }
2532 last if !$count;
2533 }
2534
2535 return if !$count;
2536
2537 foreach my $vmid (keys %$vzlist) {
2538 next if !$vzlist->{$vmid}->{pid};
2539
2540 $msg = "VM $vmid still running - sending stop now\n";
2541 syslog ('info', $msg);
2542 print $msg;
2543
2544 eval { vm_monitor_command ($vmid, "quit", 1); };
2545 print STDERR $@ if $@;
2546
2547 }
2548
2549 $timeout = 30;
2550 $maxtries = int (($timeout + $wt -1)/$wt);
2551 $try = 0;
2552 while (($try < $maxtries) && $count) {
2553 $try++;
2554 sleep $wt;
2555
2556 $vzlist = vzlist();
2557 $count = 0;
2558 foreach my $vmid (keys %$vzlist) {
2559 next if !$vzlist->{$vmid}->{pid};
2560 $count++;
2561 }
2562 last if !$count;
2563 }
2564
2565 return if !$count;
2566
2567 foreach my $vmid (keys %$vzlist) {
2568 next if !$vzlist->{$vmid}->{pid};
2569
2570 $msg = "VM $vmid still running - terminating now with SIGTERM\n";
2571 syslog ('info', $msg);
2572 print $msg;
2573 kill 15, $vzlist->{$vmid}->{pid};
2574 }
2575
2576 # this is called by system shotdown scripts, so remaining
2577 # processes gets killed anyways (no need to send kill -9 here)
2578
2579 $msg = "Qemu Server stopped\n";
2580 syslog ('info', $msg);
2581 print STDERR $msg;
2582 }
2583}
2584
2585# pci helpers
2586
2587sub file_write {
2588 my ($filename, $buf) = @_;
2589
2590 my $fh = IO::File->new ($filename, "w");
2591 return undef if !$fh;
2592
2593 my $res = print $fh $buf;
2594
2595 $fh->close();
2596
2597 return $res;
2598}
2599
2600sub pci_device_info {
2601 my ($name) = @_;
2602
2603 my $res;
2604
2605 return undef if $name !~ m/^([a-f0-9]{4}):([a-f0-9]{2}):([a-f0-9]{2})\.([a-f0-9])$/;
2606 my ($domain, $bus, $slot, $func) = ($1, $2, $3, $4);
2607
2608 my $irq = file_read_firstline("$pcisysfs/devices/$name/irq");
2609 return undef if !defined($irq) || $irq !~ m/^\d+$/;
2610
2611 my $vendor = file_read_firstline("$pcisysfs/devices/$name/vendor");
2612 return undef if !defined($vendor) || $vendor !~ s/^0x//;
2613
2614 my $product = file_read_firstline("$pcisysfs/devices/$name/device");
2615 return undef if !defined($product) || $product !~ s/^0x//;
2616
2617 $res = {
2618 name => $name,
2619 vendor => $vendor,
2620 product => $product,
2621 domain => $domain,
2622 bus => $bus,
2623 slot => $slot,
2624 func => $func,
2625 irq => $irq,
2626 has_fl_reset => -f "$pcisysfs/devices/$name/reset" || 0,
2627 };
2628
2629 return $res;
2630}
2631
2632sub pci_dev_reset {
2633 my ($dev) = @_;
2634
2635 my $name = $dev->{name};
2636
2637 my $fn = "$pcisysfs/devices/$name/reset";
2638
2639 return file_write ($fn, "1");
2640}
2641
2642sub pci_dev_bind_to_stub {
2643 my ($dev) = @_;
2644
2645 my $name = $dev->{name};
2646
2647 my $testdir = "$pcisysfs/drivers/pci-stub/$name";
2648 return 1 if -d $testdir;
2649
2650 my $data = "$dev->{vendor} $dev->{product}";
2651 return undef if !file_write ("$pcisysfs/drivers/pci-stub/new_id", $data);
2652
2653 my $fn = "$pcisysfs/devices/$name/driver/unbind";
2654 if (!file_write ($fn, $name)) {
2655 return undef if -f $fn;
2656 }
2657
2658 $fn = "$pcisysfs/drivers/pci-stub/bind";
2659 if (! -d $testdir) {
2660 return undef if !file_write ($fn, $name);
2661 }
2662
2663 return -d $testdir;
2664}
2665
26661;