]> git.proxmox.com Git - qemu-server.git/blame - PVE/QemuServer.pm
change print_drive sub to new device syntax
[qemu-server.git] / PVE / QemuServer.pm
CommitLineData
1e3baf05
DM
1package PVE::QemuServer;
2
3use strict;
4use POSIX;
5use IO::Handle;
6use IO::Select;
7use IO::File;
8use IO::Dir;
9use IO::Socket::UNIX;
10use File::Basename;
11use File::Path;
12use File::stat;
13use Getopt::Long;
14use Digest::SHA1;
15use Fcntl ':flock';
16use Cwd 'abs_path';
17use IPC::Open3;
18use Fcntl;
19use PVE::SafeSyslog;
20use Storable qw(dclone);
21use PVE::Exception qw(raise raise_param_exc);
22use PVE::Storage;
23use PVE::Tools qw(run_command lock_file file_read_firstline);
24use PVE::Cluster qw(cfs_register_file cfs_read_file cfs_write_file cfs_lock_file);
25use PVE::INotify;
26use PVE::ProcFSTools;
27use Time::HiRes qw (gettimeofday);
28
29my $clock_ticks = POSIX::sysconf(&POSIX::_SC_CLK_TCK);
30
31# Note about locking: we use flock on the config file protect
32# against concurent actions.
33# Aditionaly, we have a 'lock' setting in the config file. This
34# can be set to 'migrate' or 'backup'. Most actions are not
35# allowed when such lock is set. But you can ignore this kind of
36# lock with the --skiplock flag.
37
38cfs_register_file('/qemu-server/', \&parse_vm_config);
39
40#no warnings 'redefine';
41
42unless(defined(&_VZSYSCALLS_H_)) {
43 eval 'sub _VZSYSCALLS_H_ () {1;}' unless defined(&_VZSYSCALLS_H_);
44 require 'sys/syscall.ph';
45 if(defined(&__x86_64__)) {
46 eval 'sub __NR_fairsched_vcpus () {499;}' unless defined(&__NR_fairsched_vcpus);
47 eval 'sub __NR_fairsched_mknod () {504;}' unless defined(&__NR_fairsched_mknod);
48 eval 'sub __NR_fairsched_rmnod () {505;}' unless defined(&__NR_fairsched_rmnod);
49 eval 'sub __NR_fairsched_chwt () {506;}' unless defined(&__NR_fairsched_chwt);
50 eval 'sub __NR_fairsched_mvpr () {507;}' unless defined(&__NR_fairsched_mvpr);
51 eval 'sub __NR_fairsched_rate () {508;}' unless defined(&__NR_fairsched_rate);
52 eval 'sub __NR_setluid () {501;}' unless defined(&__NR_setluid);
53 eval 'sub __NR_setublimit () {502;}' unless defined(&__NR_setublimit);
54 }
55 elsif(defined( &__i386__) ) {
56 eval 'sub __NR_fairsched_mknod () {500;}' unless defined(&__NR_fairsched_mknod);
57 eval 'sub __NR_fairsched_rmnod () {501;}' unless defined(&__NR_fairsched_rmnod);
58 eval 'sub __NR_fairsched_chwt () {502;}' unless defined(&__NR_fairsched_chwt);
59 eval 'sub __NR_fairsched_mvpr () {503;}' unless defined(&__NR_fairsched_mvpr);
60 eval 'sub __NR_fairsched_rate () {504;}' unless defined(&__NR_fairsched_rate);
61 eval 'sub __NR_fairsched_vcpus () {505;}' unless defined(&__NR_fairsched_vcpus);
62 eval 'sub __NR_setluid () {511;}' unless defined(&__NR_setluid);
63 eval 'sub __NR_setublimit () {512;}' unless defined(&__NR_setublimit);
64 } else {
65 die("no fairsched syscall for this arch");
66 }
67 require 'asm/ioctl.ph';
68 eval 'sub KVM_GET_API_VERSION () { &_IO(0xAE, 0x);}' unless defined(&KVM_GET_API_VERSION);
69}
70
71sub fairsched_mknod {
72 my ($parent, $weight, $desired) = @_;
73
74 return syscall(&__NR_fairsched_mknod, int ($parent), int ($weight), int ($desired));
75}
76
77sub fairsched_rmnod {
78 my ($id) = @_;
79
80 return syscall(&__NR_fairsched_rmnod, int ($id));
81}
82
83sub fairsched_mvpr {
84 my ($pid, $newid) = @_;
85
86 return syscall(&__NR_fairsched_mvpr, int ($pid), int ($newid));
87}
88
89sub fairsched_vcpus {
90 my ($id, $vcpus) = @_;
91
92 return syscall(&__NR_fairsched_vcpus, int ($id), int ($vcpus));
93}
94
95sub fairsched_rate {
96 my ($id, $op, $rate) = @_;
97
98 return syscall(&__NR_fairsched_rate, int ($id), int ($op), int ($rate));
99}
100
101use constant FAIRSCHED_SET_RATE => 0;
102use constant FAIRSCHED_DROP_RATE => 1;
103use constant FAIRSCHED_GET_RATE => 2;
104
105sub fairsched_cpulimit {
106 my ($id, $limit) = @_;
107
108 my $cpulim1024 = int ($limit * 1024 / 100);
109 my $op = $cpulim1024 ? FAIRSCHED_SET_RATE : FAIRSCHED_DROP_RATE;
110
111 return fairsched_rate ($id, $op, $cpulim1024);
112}
113
114my $nodename = PVE::INotify::nodename();
115
116mkdir "/etc/pve/nodes/$nodename";
117my $confdir = "/etc/pve/nodes/$nodename/qemu-server";
118mkdir $confdir;
119
120my $var_run_tmpdir = "/var/run/qemu-server";
121mkdir $var_run_tmpdir;
122
123my $lock_dir = "/var/lock/qemu-server";
124mkdir $lock_dir;
125
126my $pcisysfs = "/sys/bus/pci";
127
128my $keymaphash = PVE::Tools::kvmkeymaps();
129
130my $confdesc = {
131 onboot => {
132 optional => 1,
133 type => 'boolean',
134 description => "Specifies whether a VM will be started during system bootup.",
135 default => 0,
136 },
137 autostart => {
138 optional => 1,
139 type => 'boolean',
140 description => "Automatic restart after crash (currently ignored).",
141 default => 0,
142 },
143 reboot => {
144 optional => 1,
145 type => 'boolean',
146 description => "Allow reboot. If set to '0' the VM exit on reboot.",
147 default => 1,
148 },
149 lock => {
150 optional => 1,
151 type => 'string',
152 description => "Lock/unlock the VM.",
153 enum => [qw(migrate backup)],
154 },
155 cpulimit => {
156 optional => 1,
157 type => 'integer',
158 description => "Limit of CPU usage in per cent. Note if the computer has 2 CPUs, it has total of 200% CPU time. Value '0' indicates no CPU limit.\n\nNOTE: This option is currently ignored.",
159 minimum => 0,
160 default => 0,
161 },
162 cpuunits => {
163 optional => 1,
164 type => 'integer',
165 description => "CPU weight for a VM. Argument is used in the kernel fair scheduler. The larger the number is, the more CPU time this VM gets. Number is relative to weights of all the other running VMs.\n\nNOTE: You can disable fair-scheduler configuration by setting this to 0.",
166 minimum => 0,
167 maximum => 500000,
168 default => 1000,
169 },
170 memory => {
171 optional => 1,
172 type => 'integer',
173 description => "Amount of RAM for the VM in MB.",
174 minimum => 16,
175 default => 512,
176 },
177 keyboard => {
178 optional => 1,
179 type => 'string',
180 description => "Keybord layout for vnc server. Default is read from the datacenter configuration file.",
181 enum => [ keys %$keymaphash ],
182 default => 'en-us',
183 },
184 name => {
185 optional => 1,
186 type => 'string',
187 description => "Set a name for the VM. Only used on the configuration web interface.",
188 },
189 description => {
190 optional => 1,
191 type => 'string',
192 description => "Description for the VM. Only used on the configuration web interface.",
193 },
194 ostype => {
195 optional => 1,
196 type => 'string',
197 enum => [qw(other wxp w2k w2k3 w2k8 wvista win7 l24 l26)],
198 description => <<EODESC,
199Used to enable special optimization/features for specific
200operating systems:
201
202other => unspecified OS
203wxp => Microsoft Windows XP
204w2k => Microsoft Windows 2000
205w2k3 => Microsoft Windows 2003
206w2k8 => Microsoft Windows 2008
207wvista => Microsoft Windows Vista
208win7 => Microsoft Windows 7
209l24 => Linux 2.4 Kernel
210l26 => Linux 2.6/3.X Kernel
211
212other|l24|l26 ... no special behaviour
213wxp|w2k|w2k3|w2k8|wvista|win7 ... use --localtime switch
214EODESC
215 },
216 boot => {
217 optional => 1,
218 type => 'string',
219 description => "Boot on floppy (a), hard disk (c), CD-ROM (d), or network (n).",
220 pattern => '[acdn]{1,4}',
221 default => 'cad',
222 },
223 bootdisk => {
224 optional => 1,
225 type => 'string', format => 'pve-qm-bootdisk',
226 description => "Enable booting from specified disk.",
227 pattern => '(ide|scsi|virtio)\d+',
228 },
229 smp => {
230 optional => 1,
231 type => 'integer',
232 description => "The number of CPUs. Please use option -sockets instead.",
233 minimum => 1,
234 default => 1,
235 },
236 sockets => {
237 optional => 1,
238 type => 'integer',
239 description => "The number of CPU sockets.",
240 minimum => 1,
241 default => 1,
242 },
243 cores => {
244 optional => 1,
245 type => 'integer',
246 description => "The number of cores per socket.",
247 minimum => 1,
248 default => 1,
249 },
250 acpi => {
251 optional => 1,
252 type => 'boolean',
253 description => "Enable/disable ACPI.",
254 default => 1,
255 },
256 kvm => {
257 optional => 1,
258 type => 'boolean',
259 description => "Enable/disable KVM hardware virtualization.",
260 default => 1,
261 },
262 tdf => {
263 optional => 1,
264 type => 'boolean',
265 description => "Enable/disable time drift fix.",
266 default => 1,
267 },
268 localtime => {
269 optional => 1,
270 type => 'boolean',
271 description => "Set the real time clock to local time. This is enabled by default if ostype indicates a Microsoft OS.",
272 },
273 freeze => {
274 optional => 1,
275 type => 'boolean',
276 description => "Freeze CPU at startup (use 'c' monitor command to start execution).",
277 },
278 vga => {
279 optional => 1,
280 type => 'string',
281 description => "Select VGA type. If you want to use high resolution modes (>= 1280x1024x16) then you should use option 'std' or 'vmware'. Default is 'std' for win7/w2k8, and 'cirrur' for other OS types",
282 enum => [qw(std cirrus vmware)],
283 },
284 hostpci => {
285 optional => 1,
286 type => 'string', format => 'pve-qm-hostpci',
287 typetext => "HOSTPCIDEVICE { , HOSTPCIDEVICE }",
288 description => <<EODESCR,
289Map host pci devices. HOSTPCIDEVICE syntax is:
290
291'bus:dev.func' (hexadecimal numbers)
292
293You can us the 'lspci' command to list existing pci devices.
294
295Note: This option allows direct access to host hardware. So it is no longer possible to migrate such machines - use with special care.
296
297Experimental: user reported problems with this option.
298EODESCR
299 },
300 serial => {
301 optional => 1,
302 type => 'string', format => 'pve-qm-serial',
303 typetext => "SERIALDEVICE { , SERIALDEVICE }",
304 description => <<EODESCR,
305Map host serial devices. SERIALDEVICE syntax is /dev/ttyS*
306
307Note: This option allows direct access to host hardware. So it is no longer possible to migrate such machines - use with special care.
308
309Experimental: user reported problems with this option.
310EODESCR
311 },
312 parallel => {
313 optional => 1,
314 type => 'string', format => 'pve-qm-parallel',
315 typetext => "PARALLELDEVICE { , PARALLELDEVICE }",
316 description => <<EODESCR,
317Map host parallel devices. PARALLELDEVICE syntax is /dev/parport*
318
319Note: This option allows direct access to host hardware. So it is no longer possible to migrate such machines - use with special care.
320
321Experimental: user reported problems with this option.
322EODESCR
323 },
324 startdate => {
325 optional => 1,
326 type => 'string',
327 typetext => "(now | YYYY-MM-DD | YYYY-MM-DDTHH:MM:SS)",
328 description => "Set the initial date of the real time clock. Valid format for date are: 'now' or '2006-06-17T16:01:21' or '2006-06-17'.",
329 pattern => '(now|\d{4}-\d{1,2}-\d{1,2}(T\d{1,2}:\d{1,2}:\d{1,2})?)',
330 default => 'now',
331 },
332 args => {
333 optional => 1,
334 type => 'string',
335 description => <<EODESCR,
336Note: this option is for experts only. It allows you to pass arbitrary arguments to kvm, for example:
337
338args: -no-reboot -no-hpet
339EODESCR
340 },
341 tablet => {
342 optional => 1,
343 type => 'boolean',
344 default => 1,
345 description => "Enable/disable the usb tablet device. This device is usually needed to allow absolute mouse positioning. Else the mouse runs out of sync with normal vnc clients. If you're running lots of console-only guests on one host, you may consider disabling this to save some context switches.",
346 },
347 migrate_speed => {
348 optional => 1,
349 type => 'integer',
350 description => "Set maximum speed (in MB/s) for migrations. Value 0 is no limit.",
351 minimum => 0,
352 default => 0,
353 },
354 migrate_downtime => {
355 optional => 1,
356 type => 'integer',
357 description => "Set maximum tolerated downtime (in seconds) for migrations.",
358 minimum => 0,
359 default => 1,
360 },
361 cdrom => {
362 optional => 1,
363 type => 'string', format => 'pve-qm-drive',
364 typetext => 'volume',
365 description => "This is an alias for option -ide2",
366 },
367 cpu => {
368 optional => 1,
369 description => "Emulated CPU type.",
370 type => 'string',
371 enum => [ qw(486 athlon pentium pentium2 pentium3 coreduo core2duo kvm32 kvm64 qemu32 qemu64 phenom host) ],
372 default => 'qemu64',
373 },
374};
375
376# what about other qemu settings ?
377#cpu => 'string',
378#machine => 'string',
379#fda => 'file',
380#fdb => 'file',
381#mtdblock => 'file',
382#sd => 'file',
383#pflash => 'file',
384#snapshot => 'bool',
385#bootp => 'file',
386##tftp => 'dir',
387##smb => 'dir',
388#kernel => 'file',
389#append => 'string',
390#initrd => 'file',
391##soundhw => 'string',
392
393while (my ($k, $v) = each %$confdesc) {
394 PVE::JSONSchema::register_standard_option("pve-qm-$k", $v);
395}
396
397my $MAX_IDE_DISKS = 4;
f62db2a4
DA
398my $MAX_SCSI_DISKS = 14;
399my $MAX_VIRTIO_DISKS = 6;
1e3baf05 400my $MAX_USB_DEVICES = 5;
f62db2a4 401my $MAX_NETS = 6;
1e3baf05
DM
402my $MAX_UNUSED_DISKS = 8;
403
404my $nic_model_list = ['rtl8139', 'ne2k_pci', 'e1000', 'pcnet', 'virtio',
405 'ne2k_isa', 'i82551', 'i82557b', 'i82559er'];
406my $nic_model_list_txt = join (' ', sort @$nic_model_list);
407
408# fixme:
409my $netdesc = {
410 optional => 1,
411 type => 'string', format => 'pve-qm-net',
412 typetext => "MODEL=XX:XX:XX:XX:XX:XX [,bridge=<dev>][,rate=<mbps>]",
413 description => <<EODESCR,
414Specify network devices.
415
416MODEL is one of: $nic_model_list_txt
417
418XX:XX:XX:XX:XX:XX should be an unique MAC address. This is
419automatically generated if not specified.
420
421The bridge parameter can be used to automatically add the interface to a bridge device. The Proxmox VE standard bridge is called 'vmbr0'.
422
423Option 'rate' is used to limit traffic bandwidth from and to this interface. It is specified as floating point number, unit is 'Megabytes per second'.
424
425If you specify no bridge, we create a kvm 'user' (NATed) network device, which provides DHCP and DNS services. The following addresses are used:
426
42710.0.2.2 Gateway
42810.0.2.3 DNS Server
42910.0.2.4 SMB Server
430
431The DHCP server assign addresses to the guest starting from 10.0.2.15.
432
433EODESCR
434};
435PVE::JSONSchema::register_standard_option("pve-qm-net", $netdesc);
436
437for (my $i = 0; $i < $MAX_NETS; $i++) {
438 $confdesc->{"net$i"} = $netdesc;
439}
440
441my $drivename_hash;
442
443my $idedesc = {
444 optional => 1,
445 type => 'string', format => 'pve-qm-drive',
446 typetext => '[volume=]volume,] [,media=cdrom|disk] [,cyls=c,heads=h,secs=s[,trans=t]] [,snapshot=on|off] [,cache=none|writethrough|writeback] [,format=f] [,backup=yes|no] [,aio=native|threads]',
447 description => "Use volume as IDE hard disk or CD-ROM (n is 0 to 3).",
448};
449PVE::JSONSchema::register_standard_option("pve-qm-ide", $idedesc);
450
451my $scsidesc = {
452 optional => 1,
453 type => 'string', format => 'pve-qm-drive',
454 typetext => '[volume=]volume,] [,media=cdrom|disk] [,cyls=c,heads=h,secs=s[,trans=t]] [,snapshot=on|off] [,cache=none|writethrough|writeback] [,format=f] [,backup=yes|no] [,aio=native|threads]',
455 description => "Use volume as SCSI hard disk or CD-ROM (n is 0 to 15).",
456};
457PVE::JSONSchema::register_standard_option("pve-qm-scsi", $scsidesc);
458
459my $virtiodesc = {
460 optional => 1,
461 type => 'string', format => 'pve-qm-drive',
462 typetext => '[volume=]volume,] [,media=cdrom|disk] [,cyls=c,heads=h,secs=s[,trans=t]] [,snapshot=on|off] [,cache=none|writethrough|writeback] [,format=f] [,backup=yes|no] [,aio=native|threads]',
463 description => "Use volume as VIRTIO hard disk (n is 0 to 15).",
464};
465PVE::JSONSchema::register_standard_option("pve-qm-virtio", $virtiodesc);
466
467my $usbdesc = {
468 optional => 1,
469 type => 'string', format => 'pve-qm-usb-device',
470 typetext => 'host=HOSTUSBDEVICE',
471 description => <<EODESCR,
472Configure an USB device (n is 0 to 5). This can be used to
473pass-through usb devices to the guest. HOSTUSBDEVICE syntax is:
474
475'bus-port(.port)*' (decimal numbers) or
476'vendor_id:product_id' (hexadeciaml numbers)
477
478You can use the 'lsusb -t' command to list existing usb devices.
479
480Note: This option allows direct access to host hardware. So it is no longer possible to migrate such machines - use with special care.
481
482EODESCR
483};
484PVE::JSONSchema::register_standard_option("pve-qm-usb", $usbdesc);
485
486
487for (my $i = 0; $i < $MAX_IDE_DISKS; $i++) {
488 $drivename_hash->{"ide$i"} = 1;
489 $confdesc->{"ide$i"} = $idedesc;
490}
491
492for (my $i = 0; $i < $MAX_SCSI_DISKS; $i++) {
493 $drivename_hash->{"scsi$i"} = 1;
494 $confdesc->{"scsi$i"} = $scsidesc ;
495}
496
497for (my $i = 0; $i < $MAX_VIRTIO_DISKS; $i++) {
498 $drivename_hash->{"virtio$i"} = 1;
499 $confdesc->{"virtio$i"} = $virtiodesc;
500}
501
502for (my $i = 0; $i < $MAX_USB_DEVICES; $i++) {
503 $confdesc->{"usb$i"} = $usbdesc;
504}
505
506my $unuseddesc = {
507 optional => 1,
508 type => 'string', format => 'pve-volume-id',
509 description => "Reference to unused volumes.",
510};
511
512for (my $i = 0; $i < $MAX_UNUSED_DISKS; $i++) {
513 $confdesc->{"unused$i"} = $unuseddesc;
514}
515
516my $kvm_api_version = 0;
517
518sub kvm_version {
519
520 return $kvm_api_version if $kvm_api_version;
521
522 my $fh = IO::File->new ("</dev/kvm") ||
523 return 0;
524
525 if (my $v = $fh->ioctl (KVM_GET_API_VERSION(), 0)) {
526 $kvm_api_version = $v;
527 }
528
529 $fh->close();
530
531 return $kvm_api_version;
532}
533
534my $kvm_user_version;
535
536sub kvm_user_version {
537
538 return $kvm_user_version if $kvm_user_version;
539
540 $kvm_user_version = 'unknown';
541
542 my $tmp = `kvm -help 2>/dev/null`;
543
544 if ($tmp =~ m/^QEMU( PC)? emulator version (\d+\.\d+\.\d+) /) {
545 $kvm_user_version = $2;
546 }
547
548 return $kvm_user_version;
549
550}
551
552my $kernel_has_vhost_net = -c '/dev/vhost-net';
553
554sub disknames {
555 # order is important - used to autoselect boot disk
556 return ((map { "ide$_" } (0 .. ($MAX_IDE_DISKS - 1))),
557 (map { "scsi$_" } (0 .. ($MAX_SCSI_DISKS - 1))),
558 (map { "virtio$_" } (0 .. ($MAX_VIRTIO_DISKS - 1))));
559}
560
561sub valid_drivename {
562 my $dev = shift;
563
564 return defined ($drivename_hash->{$dev});
565}
566
567sub option_exists {
568 my $key = shift;
569 return defined($confdesc->{$key});
570}
571
572sub nic_models {
573 return $nic_model_list;
574}
575
576sub os_list_description {
577
578 return {
579 other => 'Other',
580 wxp => 'Windows XP',
581 w2k => 'Windows 2000',
582 w2k3 =>, 'Windows 2003',
583 w2k8 => 'Windows 2008',
584 wvista => 'Windows Vista',
585 win7 => 'Windows 7',
586 l24 => 'Linux 2.4',
587 l26 => 'Linux 2.6',
588 };
589}
590
591# a clumsy way to split an argument string into an array,
592# we simply pass it to the cli (exec call)
593# fixme: use Text::ParseWords::shellwords() ?
594sub split_args {
595 my ($str) = @_;
596
597 my $args = [];
598
599 return $args if !$str;
600
601 my $cmd = 'perl -e \'foreach my $a (@ARGV) { print "$a\n"; } \' -- ' . $str;
602
603 eval {
604 run_command ($cmd, outfunc => sub {
605 my $data = shift;
606 push @$args, $data;
607 });
608 };
609
610 my $err = $@;
611
612 die "unable to parse args: $str\n" if $err;
613
614 return $args;
615}
616
617sub disk_devive_info {
618 my $dev = shift;
619
620 die "unknown disk device format '$dev'" if $dev !~ m/^(ide|scsi|virtio)(\d+)$/;
621
622 my $bus = $1;
623 my $index = $2;
624 my $maxdev = 1024;
625
626 if ($bus eq 'ide') {
627 $maxdev = 2;
628 } elsif ($bus eq 'scsi') {
f62db2a4 629 $maxdev = 7;
1e3baf05
DM
630 }
631
632 my $controller = int ($index / $maxdev);
633 my $unit = $index % $maxdev;
634
635
636 return { bus => $bus, desc => uc($bus) . " $controller:$unit",
637 controller => $controller, unit => $unit, index => $index };
638
639}
640
641sub qemu_drive_name {
642 my ($dev, $media) = @_;
643
644 my $info = disk_devive_info ($dev);
645 my $mediastr = '';
646
647 if (($info->{bus} eq 'ide') || ($info->{bus} eq 'scsi')) {
648 $mediastr = ($media eq 'cdrom') ? "-cd" : "-hd";
649 return sprintf("%s%i%s%i", $info->{bus}, $info->{controller},
650 $mediastr, $info->{unit});
651 } else {
652 return sprintf("%s%i", $info->{bus}, $info->{index});
653 }
654}
655
656my $cdrom_path;
657
658sub get_cdrom_path {
659
660 return $cdrom_path if $cdrom_path;
661
662 return $cdrom_path = "/dev/cdrom" if -l "/dev/cdrom";
663 return $cdrom_path = "/dev/cdrom1" if -l "/dev/cdrom1";
664 return $cdrom_path = "/dev/cdrom2" if -l "/dev/cdrom2";
665}
666
667sub get_iso_path {
668 my ($storecfg, $vmid, $cdrom) = @_;
669
670 if ($cdrom eq 'cdrom') {
671 return get_cdrom_path();
672 } elsif ($cdrom eq 'none') {
673 return '';
674 } elsif ($cdrom =~ m|^/|) {
675 return $cdrom;
676 } else {
677 return PVE::Storage::path ($storecfg, $cdrom);
678 }
679}
680
681# try to convert old style file names to volume IDs
682sub filename_to_volume_id {
683 my ($vmid, $file, $media) = @_;
684
685 if (!($file eq 'none' || $file eq 'cdrom' ||
686 $file =~ m|^/dev/.+| || $file =~ m/^([^:]+):(.+)$/)) {
687
688 return undef if $file =~ m|/|;
689
690 if ($media && $media eq 'cdrom') {
691 $file = "local:iso/$file";
692 } else {
693 $file = "local:$vmid/$file";
694 }
695 }
696
697 return $file;
698}
699
700sub verify_media_type {
701 my ($opt, $vtype, $media) = @_;
702
703 return if !$media;
704
705 my $etype;
706 if ($media eq 'disk') {
707 $etype = 'image';
708 } elsif ($media eq 'cdrom') {
709 $etype = 'iso';
710 } else {
711 die "internal error";
712 }
713
714 return if ($vtype eq $etype);
715
716 raise_param_exc({ $opt => "unexpected media type ($vtype != $etype)" });
717}
718
719sub cleanup_drive_path {
720 my ($opt, $storecfg, $drive) = @_;
721
722 # try to convert filesystem paths to volume IDs
723
724 if (($drive->{file} !~ m/^(cdrom|none)$/) &&
725 ($drive->{file} !~ m|^/dev/.+|) &&
726 ($drive->{file} !~ m/^([^:]+):(.+)$/) &&
727 ($drive->{file} !~ m/^\d+$/)) {
728 my ($vtype, $volid) = PVE::Storage::path_to_volume_id($storecfg, $drive->{file});
729 raise_param_exc({ $opt => "unable to associate path '$drive->{file}' to any storage"}) if !$vtype;
730 $drive->{media} = 'cdrom' if !$drive->{media} && $vtype eq 'iso';
731 verify_media_type($opt, $vtype, $drive->{media});
732 $drive->{file} = $volid;
733 }
734
735 $drive->{media} = 'cdrom' if !$drive->{media} && $drive->{file} =~ m/^(cdrom|none)$/;
736}
737
738sub create_conf_nolock {
739 my ($vmid, $settings) = @_;
740
741 my $filename = config_file ($vmid);
742
743 die "configuration file '$filename' already exists\n" if -f $filename;
744
745 my $defaults = load_defaults();
746
747 $settings->{name} = "vm$vmid" if !$settings->{name};
748 $settings->{memory} = $defaults->{memory} if !$settings->{memory};
749
750 my $data = '';
751 foreach my $opt (keys %$settings) {
752 next if !$confdesc->{$opt};
753
754 my $value = $settings->{$opt};
755 next if !$value;
756
757 $data .= "$opt: $value\n";
758 }
759
760 PVE::Tools::file_set_contents($filename, $data);
761}
762
763# ideX = [volume=]volume-id[,media=d][,cyls=c,heads=h,secs=s[,trans=t]]
764# [,snapshot=on|off][,cache=on|off][,format=f][,backup=yes|no]
765# [,aio=native|threads]
766
767sub parse_drive {
768 my ($key, $data) = @_;
769
770 my $res = {};
771
772 # $key may be undefined - used to verify JSON parameters
773 if (!defined($key)) {
774 $res->{interface} = 'unknown'; # should not harm when used to verify parameters
775 $res->{index} = 0;
776 } elsif ($key =~ m/^([^\d]+)(\d+)$/) {
777 $res->{interface} = $1;
778 $res->{index} = $2;
779 } else {
780 return undef;
781 }
782
783 foreach my $p (split (/,/, $data)) {
784 next if $p =~ m/^\s*$/;
785
786 if ($p =~ m/^(file|volume|cyls|heads|secs|trans|media|snapshot|cache|format|rerror|werror|backup|aio)=(.+)$/) {
787 my ($k, $v) = ($1, $2);
788
789 $k = 'file' if $k eq 'volume';
790
791 return undef if defined $res->{$k};
792
793 $res->{$k} = $v;
794 } else {
795 if (!$res->{file} && $p !~ m/=/) {
796 $res->{file} = $p;
797 } else {
798 return undef;
799 }
800 }
801 }
802
803 return undef if !$res->{file};
804
805 return undef if $res->{cache} &&
806 $res->{cache} !~ m/^(off|none|writethrough|writeback)$/;
807 return undef if $res->{snapshot} && $res->{snapshot} !~ m/^(on|off)$/;
808 return undef if $res->{cyls} && $res->{cyls} !~ m/^\d+$/;
809 return undef if $res->{heads} && $res->{heads} !~ m/^\d+$/;
810 return undef if $res->{secs} && $res->{secs} !~ m/^\d+$/;
811 return undef if $res->{media} && $res->{media} !~ m/^(disk|cdrom)$/;
812 return undef if $res->{trans} && $res->{trans} !~ m/^(none|lba|auto)$/;
813 return undef if $res->{format} && $res->{format} !~ m/^(raw|cow|qcow|qcow2|vmdk|cloop)$/;
814 return undef if $res->{rerror} && $res->{rerror} !~ m/^(ignore|report|stop)$/;
815 return undef if $res->{werror} && $res->{werror} !~ m/^(enospc|ignore|report|stop)$/;
816 return undef if $res->{backup} && $res->{backup} !~ m/^(yes|no)$/;
817 return undef if $res->{aio} && $res->{aio} !~ m/^(native|threads)$/;
818
819 if ($res->{media} && ($res->{media} eq 'cdrom')) {
820 return undef if $res->{snapshot} || $res->{trans} || $res->{format};
821 return undef if $res->{heads} || $res->{secs} || $res->{cyls};
822 return undef if $res->{interface} eq 'virtio';
823 }
824
825 # rerror does not work with scsi drives
826 if ($res->{rerror}) {
827 return undef if $res->{interface} eq 'scsi';
828 }
829
830 return $res;
831}
832
833my @qemu_drive_options = qw(heads secs cyls trans media format cache snapshot rerror werror aio);
834
835sub print_drive {
836 my ($vmid, $drive) = @_;
837
838 my $opts = '';
839 foreach my $o (@qemu_drive_options, 'backup') {
840 $opts .= ",$o=$drive->{$o}" if $drive->{$o};
841 }
842
843 return "$drive->{file}$opts";
844}
845
846sub print_drive_full {
847 my ($storecfg, $vmid, $drive) = @_;
848
849 my $opts = '';
850 foreach my $o (@qemu_drive_options) {
851 $opts .= ",$o=$drive->{$o}" if $drive->{$o};
852 }
853
854 # use linux-aio by default (qemu default is threads)
855 $opts .= ",aio=native" if !$drive->{aio};
856
857 my $path;
858 my $volid = $drive->{file};
859 if (drive_is_cdrom ($drive)) {
860 $path = get_iso_path ($storecfg, $vmid, $volid);
861 } else {
862 if ($volid =~ m|^/|) {
863 $path = $volid;
864 } else {
865 $path = PVE::Storage::path ($storecfg, $volid);
866 }
867 }
868
869 my $pathinfo = $path ? "file=$path," : '';
870
3ebfcc86 871 return "${pathinfo}if=none,id=drive-$drive->{interface}$drive->{index}$opts";
1e3baf05
DM
872}
873
874
875sub drive_is_cdrom {
876 my ($drive) = @_;
877
878 return $drive && $drive->{media} && ($drive->{media} eq 'cdrom');
879
880}
881
882# netX: e1000=XX:XX:XX:XX:XX:XX,bridge=vmbr0,rate=<mbps>
883sub parse_net {
884 my ($data) = @_;
885
886 my $res = {};
887
888 foreach my $kvp (split (/,/, $data)) {
889
890 if ($kvp =~ m/^(ne2k_pci|e1000|rtl8139|pcnet|virtio|ne2k_isa|i82551|i82557b|i82559er)(=([0-9a-f]{2}(:[0-9a-f]{2}){5}))?$/i) {
891 my $model = lc ($1);
892 my $mac = uc($3) || random_ether_addr ();
893 $res->{model} = $model;
894 $res->{macaddr} = $mac;
895 } elsif ($kvp =~ m/^bridge=(\S+)$/) {
896 $res->{bridge} = $1;
897 } elsif ($kvp =~ m/^rate=(\d+(\.\d+)?)$/) {
898 $res->{rate} = $1;
899 } else {
900 return undef;
901 }
902
903 }
904
905 return undef if !$res->{model};
906
907 return $res;
908}
909
910sub print_net {
911 my $net = shift;
912
913 my $res = "$net->{model}";
914 $res .= "=$net->{macaddr}" if $net->{macaddr};
915 $res .= ",bridge=$net->{bridge}" if $net->{bridge};
916 $res .= ",rate=$net->{rate}" if $net->{rate};
917
918 return $res;
919}
920
921sub add_random_macs {
922 my ($settings) = @_;
923
924 foreach my $opt (keys %$settings) {
925 next if $opt !~ m/^net(\d+)$/;
926 my $net = parse_net($settings->{$opt});
927 next if !$net;
928 $settings->{$opt} = print_net($net);
929 }
930}
931
932sub add_unused_volume {
933 my ($config, $res, $volid) = @_;
934
935 my $key;
936 for (my $ind = $MAX_UNUSED_DISKS - 1; $ind >= 0; $ind--) {
937 my $test = "unused$ind";
938 if (my $vid = $config->{$test}) {
939 return if $vid eq $volid; # do not add duplicates
940 } else {
941 $key = $test;
942 }
943 }
944
945 die "To many unused volume - please delete them first.\n" if !$key;
946
947 $res->{$key} = $volid;
948}
949
950# fixme: remove all thos $noerr parameters?
951
952PVE::JSONSchema::register_format('pve-qm-bootdisk', \&verify_bootdisk);
953sub verify_bootdisk {
954 my ($value, $noerr) = @_;
955
956 return $value if valid_drivename($value);
957
958 return undef if $noerr;
959
960 die "invalid boot disk '$value'\n";
961}
962
963PVE::JSONSchema::register_format('pve-qm-net', \&verify_net);
964sub verify_net {
965 my ($value, $noerr) = @_;
966
967 return $value if parse_net($value);
968
969 return undef if $noerr;
970
971 die "unable to parse network options\n";
972}
973
974PVE::JSONSchema::register_format('pve-qm-drive', \&verify_drive);
975sub verify_drive {
976 my ($value, $noerr) = @_;
977
978 return $value if parse_drive (undef, $value);
979
980 return undef if $noerr;
981
982 die "unable to parse drive options\n";
983}
984
985PVE::JSONSchema::register_format('pve-qm-hostpci', \&verify_hostpci);
986sub verify_hostpci {
987 my ($value, $noerr) = @_;
988
989 my @dl = split (/,/, $value);
990 foreach my $v (@dl) {
991 if ($v !~ m/^[a-f0-9]{2}:[a-f0-9]{2}\.[a-f0-9]$/i) {
992 return undef if $noerr;
993 die "unable to parse pci id\n";
994 }
995 }
996 return $value;
997}
998
999sub parse_usb_device {
1000 my ($value) = @_;
1001
1002 return undef if !$value;
1003
1004 my @dl = split (/,/, $value);
1005 my $found;
1006
1007 my $res = {};
1008 foreach my $v (@dl) {
1009 if ($v =~ m/^host=([0-9A-Fa-f]{4}):([0-9A-Fa-f]{4})$/) {
1010 $found = 1;
1011 $res->{vendorid} = $1;
1012 $res->{productid} = $2;
1013 } elsif ($v =~ m/^host=(\d+)\-(\d+(\.\d+)*)$/) {
1014 $found = 1;
1015 $res->{hostbus} = $1;
1016 $res->{hostport} = $2;
1017 } else {
1018 return undef;
1019 }
1020 }
1021 return undef if !$found;
1022
1023 return $res;
1024}
1025
1026PVE::JSONSchema::register_format('pve-qm-usb-device', \&verify_usb_device);
1027sub verify_usb_device {
1028 my ($value, $noerr) = @_;
1029
1030 return $value if parse_usb_device($value);
1031
1032 return undef if $noerr;
1033
1034 die "unable to parse usb device\n";
1035}
1036
1037PVE::JSONSchema::register_format('pve-qm-parallel', \&verify_parallel);
1038sub verify_parallel {
1039 my ($value, $noerr) = @_;
1040
1041 my @dl = split (/,/, $value);
1042 foreach my $v (@dl) {
1043 if ($v !~ m|^/dev/parport\d+$|) {
1044 return undef if $noerr;
1045 die "invalid device name\n";
1046 }
1047 }
1048 return $value;
1049}
1050
1051PVE::JSONSchema::register_format('pve-qm-serial', \&verify_serial);
1052sub verify_serial {
1053 my ($value, $noerr) = @_;
1054
1055 my @dl = split (/,/, $value);
1056 foreach my $v (@dl) {
1057 if ($v !~ m|^/dev/ttyS\d+$|) {
1058 return undef if $noerr;
1059 die "invalid device name\n";
1060 }
1061 }
1062 return $value;
1063}
1064
1065# add JSON properties for create and set function
1066sub json_config_properties {
1067 my $prop = shift;
1068
1069 foreach my $opt (keys %$confdesc) {
1070 $prop->{$opt} = $confdesc->{$opt};
1071 }
1072
1073 return $prop;
1074}
1075
1076sub check_type {
1077 my ($key, $value) = @_;
1078
1079 die "unknown setting '$key'\n" if !$confdesc->{$key};
1080
1081 my $type = $confdesc->{$key}->{type};
1082
1083 if (!defined ($value)) {
1084 die "got undefined value\n";
1085 }
1086
1087 if ($value =~ m/[\n\r]/) {
1088 die "property contains a line feed\n";
1089 }
1090
1091 if ($type eq 'boolean') {
1092 return 1 if ($value eq '1') || ($value =~ m/^(on|yes|true)$/i);
1093 return 0 if ($value eq '0') || ($value =~ m/^(off|no|false)$/i);
1094 die "type check ('boolean') failed - got '$value'\n";
1095 } elsif ($type eq 'integer') {
1096 return int($1) if $value =~ m/^(\d+)$/;
1097 die "type check ('integer') failed - got '$value'\n";
1098 } elsif ($type eq 'string') {
1099 if (my $fmt = $confdesc->{$key}->{format}) {
1100 if ($fmt eq 'pve-qm-drive') {
1101 # special case - we need to pass $key to parse_drive()
1102 my $drive = parse_drive ($key, $value);
1103 return $value if $drive;
1104 die "unable to parse drive options\n";
1105 }
1106 PVE::JSONSchema::check_format($fmt, $value);
1107 return $value;
1108 }
1109 $value =~ s/^\"(.*)\"$/$1/;
1110 return $value;
1111 } else {
1112 die "internal error"
1113 }
1114}
1115
1116sub lock_config {
1117 my ($vmid, $code, @param) = @_;
1118
1119 my $filename = config_file_lock ($vmid);
1120
1121 lock_file($filename, 10, $code, @param);
1122
1123 die $@ if $@;
1124}
1125
1126sub cfs_config_path {
1127 my ($vmid) = @_;
1128
1129 return "nodes/$nodename/qemu-server/$vmid.conf";
1130}
1131
1132sub config_file {
1133 my ($vmid) = @_;
1134
1135 my $cfspath = cfs_config_path($vmid);
1136 return "/etc/pve/$cfspath";
1137}
1138
1139sub config_file_lock {
1140 my ($vmid) = @_;
1141
1142 return "$lock_dir/lock-$vmid.conf";
1143}
1144
1145sub touch_config {
1146 my ($vmid) = @_;
1147
1148 my $conf = config_file ($vmid);
1149 utime undef, undef, $conf;
1150}
1151
1152sub create_disks {
1153 my ($storecfg, $vmid, $settings) = @_;
1154
1155 my $vollist = [];
1156
1157 eval {
1158 foreach_drive($settings, sub {
1159 my ($ds, $disk) = @_;
1160
1161 return if drive_is_cdrom ($disk);
1162
1163 my $file = $disk->{file};
1164
1165 if ($file =~ m/^(([^:\s]+):)?(\d+(\.\d+)?)$/) {
1166 my $storeid = $2 || 'local';
1167 my $size = $3;
1168 my $defformat = PVE::Storage::storage_default_format ($storecfg, $storeid);
1169 my $fmt = $disk->{format} || $defformat;
1170 syslog ('info', "VM $vmid creating new disk - size is $size GB");
1171
1172 my $volid = PVE::Storage::vdisk_alloc ($storecfg, $storeid, $vmid,
1173 $fmt, undef, $size*1024*1024);
1174
1175 $disk->{file} = $volid;
1176 delete ($disk->{format}); # no longer needed
1177 push @$vollist, $volid;
1178 $settings->{$ds} = PVE::QemuServer::print_drive ($vmid, $disk);
1179 } else {
1180 my $path;
1181 if ($disk->{file} =~ m|^/dev/.+|) {
1182 $path = $disk->{file};
1183 } else {
1184 $path = PVE::Storage::path ($storecfg, $disk->{file});
1185 }
1186 if (!(-f $path || -b $path)) {
1187 die "image '$path' does not exists\n";
1188 }
1189 }
1190 });
1191 };
1192
1193 my $err = $@;
1194
1195 if ($err) {
1196 syslog ('err', "VM $vmid creating disks failed");
1197 foreach my $volid (@$vollist) {
1198 eval { PVE::Storage::vdisk_free ($storecfg, $volid); };
1199 warn $@ if $@;
1200 }
1201 die $err;
1202 }
1203
1204 return $vollist;
1205}
1206
1207sub unlink_image {
1208 my ($storecfg, $vmid, $volid) = @_;
1209
1210 die "reject to unlink absolute path '$volid'"
1211 if $volid =~ m|^/|;
1212
1213 my ($path, $owner) = PVE::Storage::path ($storecfg, $volid);
1214
1215 die "reject to unlink '$volid' - not owned by this VM"
1216 if !$owner || ($owner != $vmid);
1217
1218 syslog ('info', "VM $vmid deleting volume '$volid'");
1219
1220 PVE::Storage::vdisk_free ($storecfg, $volid);
1221
1222 touch_config ($vmid);
1223}
1224
1225sub destroy_vm {
1226 my ($storecfg, $vmid) = @_;
1227
1228 my $conffile = config_file ($vmid);
1229
1230 my $conf = load_config ($vmid);
1231
1232 check_lock ($conf);
1233
1234 # only remove disks owned by this VM
1235 foreach_drive($conf, sub {
1236 my ($ds, $drive) = @_;
1237
1238 return if drive_is_cdrom ($drive);
1239
1240 my $volid = $drive->{file};
1241 next if !$volid || $volid =~ m|^/|;
1242
1243 my ($path, $owner) = PVE::Storage::path ($storecfg, $volid);
1244 next if !$path || !$owner || ($owner != $vmid);
1245
1246 PVE::Storage::vdisk_free ($storecfg, $volid);
1247 });
1248
1249 unlink $conffile;
1250
1251 # also remove unused disk
1252 eval {
1253 my $dl = PVE::Storage::vdisk_list ($storecfg, undef, $vmid);
1254
1255 eval {
1256 PVE::Storage::foreach_volid ($dl, sub {
1257 my ($volid, $sid, $volname, $d) = @_;
1258 PVE::Storage::vdisk_free ($storecfg, $volid);
1259 });
1260 };
1261 warn $@ if $@;
1262
1263 };
1264 warn $@ if $@;
1265}
1266
1267# fixme: remove?
1268sub load_diskinfo_old {
1269 my ($storecfg, $vmid, $conf) = @_;
1270
1271 my $info = {};
1272 my $res = {};
1273 my $vollist;
1274
1275 foreach_drive($conf, sub {
1276 my ($ds, $di) = @_;
1277
1278 $res->{$ds} = $di;
1279
1280 return if drive_is_cdrom ($di);
1281
1282 if ($di->{file} =~ m|^/dev/.+|) {
1283 $info->{$di->{file}}->{size} = PVE::Storage::file_size_info ($di->{file});
1284 } else {
1285 push @$vollist, $di->{file};
1286 }
1287 });
1288
1289 eval {
1290 my $dl = PVE::Storage::vdisk_list ($storecfg, undef, $vmid, $vollist);
1291
1292 PVE::Storage::foreach_volid ($dl, sub {
1293 my ($volid, $sid, $volname, $d) = @_;
1294 $info->{$volid} = $d;
1295 });
1296 };
1297 warn $@ if $@;
1298
1299 foreach my $ds (keys %$res) {
1300 my $di = $res->{$ds};
1301
1302 $res->{$ds}->{disksize} = $info->{$di->{file}} ?
1303 $info->{$di->{file}}->{size} / (1024*1024) : 0;
1304 }
1305
1306 return $res;
1307}
1308
1309sub load_config {
1310 my ($vmid) = @_;
1311
1312 my $cfspath = cfs_config_path($vmid);
1313
1314 my $conf = PVE::Cluster::cfs_read_file($cfspath);
1315
1316 die "no such VM ('$vmid')\n" if !defined($conf);
1317
1318 return $conf;
1319}
1320
1321sub parse_vm_config {
1322 my ($filename, $raw) = @_;
1323
1324 return undef if !defined($raw);
1325
554ac7e7
DM
1326 my $res = {
1327 digest => Digest::SHA1::sha1_hex($raw),
1328 };
1e3baf05
DM
1329
1330 $filename =~ m|/qemu-server/(\d+)\.conf$|
1331 || die "got strange filename '$filename'";
1332
1333 my $vmid = $1;
1334
1335 while ($raw && $raw =~ s/^(.*?)(\n|$)//) {
1336 my $line = $1;
1337
1338 next if $line =~ m/^\#/;
1339
1340 next if $line =~ m/^\s*$/;
1341
1342 if ($line =~ m/^(description):\s*(.*\S)\s*$/) {
1343 my $key = $1;
1344 my $value = PVE::Tools::decode_text($2);
1345 $res->{$key} = $value;
1346 } elsif ($line =~ m/^(args):\s*(.*\S)\s*$/) {
1347 my $key = $1;
1348 my $value = $2;
1349 $res->{$key} = $value;
1350 } elsif ($line =~ m/^([a-z][a-z_]*\d*):\s*(\S+)\s*$/) {
1351 my $key = $1;
1352 my $value = $2;
1353 eval { $value = check_type($key, $value); };
1354 if ($@) {
1355 warn "vm $vmid - unable to parse value of '$key' - $@";
1356 } else {
1357 my $fmt = $confdesc->{$key}->{format};
1358 if ($fmt && $fmt eq 'pve-qm-drive') {
1359 my $v = parse_drive($key, $value);
1360 if (my $volid = filename_to_volume_id($vmid, $v->{file}, $v->{media})) {
1361 $v->{file} = $volid;
1362 $value = print_drive ($vmid, $v);
1363 } else {
1364 warn "vm $vmid - unable to parse value of '$key'\n";
1365 next;
1366 }
1367 }
1368
1369 if ($key eq 'cdrom') {
1370 $res->{ide2} = $value;
1371 } else {
1372 $res->{$key} = $value;
1373 }
1374 }
1375 }
1376 }
1377
1378 # convert old smp to sockets
1379 if ($res->{smp} && !$res->{sockets}) {
1380 $res->{sockets} = $res->{smp};
1381 }
1382 delete $res->{smp};
1383
1384 return $res;
1385}
1386
1387sub change_config {
1388 my ($vmid, $settings, $unset, $skiplock) = @_;
1389
1390 lock_config ($vmid, &change_config_nolock, $settings, $unset, $skiplock);
1391}
1392
1393sub change_config_nolock {
1394 my ($vmid, $settings, $unset, $skiplock) = @_;
1395
1396 my $res = {};
1397
1398 $unset->{ide2} = $unset->{cdrom} if $unset->{cdrom};
1399
1400 check_lock($settings) if !$skiplock;
1401
1402 # we do not use 'smp' any longer
1403 if ($settings->{sockets}) {
1404 $unset->{smp} = 1;
1405 } elsif ($settings->{smp}) {
1406 $settings->{sockets} = $settings->{smp};
1407 $unset->{smp} = 1;
1408 }
1409
1410 my $new_volids = {};
1411
1412 foreach my $key (keys %$settings) {
554ac7e7 1413 next if $key eq 'digest';
1e3baf05
DM
1414 my $value = $settings->{$key};
1415 if ($key eq 'description') {
1416 $value = PVE::Tools::encode_text($value);
1417 }
1418 eval { $value = check_type($key, $value); };
1419 die "unable to parse value of '$key' - $@" if $@;
1420 if ($key eq 'cdrom') {
1421 $res->{ide2} = $value;
1422 } else {
1423 $res->{$key} = $value;
1424 }
1425 if (valid_drivename($key)) {
1426 my $drive = PVE::QemuServer::parse_drive($key, $value);
1427 $new_volids->{$drive->{file}} = 1 if $drive && $drive->{file};
1428 }
1429 }
1430
1431 my $filename = config_file($vmid);
1432 my $tmpfn = "$filename.$$.tmp";
1433
1434 my $fh = new IO::File ($filename, "r") ||
1435 die "unable to read config for VM $vmid\n";
1436
1437 my $werror = "unable to write config for VM $vmid\n";
1438
1439 my $out = new IO::File ($tmpfn, "w") || die $werror;
1440
1441 eval {
1442
1443 my $done;
1444
1445 while (my $line = <$fh>) {
1446
1447 if (($line =~ m/^\#/) || ($line =~ m/^\s*$/)) {
1448 die $werror unless print $out $line;
1449 next;
1450 }
1451
1452 if ($line =~ m/^([a-z][a-z_]*\d*):\s*(.*\S)\s*$/) {
1453 my $key = $1;
1454 my $value = $2;
1455
1456 # remove 'unusedX' settings if we re-add a volume
1457 next if $key =~ m/^unused/ && $new_volids->{$value};
1458
1459 # convert 'smp' to 'sockets'
1460 $key = 'sockets' if $key eq 'smp';
1461
1462 next if $done->{$key};
1463 $done->{$key} = 1;
1464
1465 if (defined ($res->{$key})) {
1466 $value = $res->{$key};
1467 delete $res->{$key};
1468 }
1469 if (!defined ($unset->{$key})) {
1470 die $werror unless print $out "$key: $value\n";
1471 }
1472
1473 next;
1474 }
1475
1476 die "unable to parse config file: $line\n";
1477 }
1478
1479 foreach my $key (keys %$res) {
1480
1481 if (!defined ($unset->{$key})) {
1482 die $werror unless print $out "$key: $res->{$key}\n";
1483 }
1484 }
1485 };
1486
1487 my $err = $@;
1488
1489 $fh->close();
1490
1491 if ($err) {
1492 $out->close();
1493 unlink $tmpfn;
1494 die $err;
1495 }
1496
1497 if (!$out->close()) {
1498 $err = "close failed - $!\n";
1499 unlink $tmpfn;
1500 die $err;
1501 }
1502
1503 if (!rename($tmpfn, $filename)) {
1504 $err = "rename failed - $!\n";
1505 unlink $tmpfn;
1506 die $err;
1507 }
1508}
1509
1510sub load_defaults {
1511
1512 my $res = {};
1513
1514 # we use static defaults from our JSON schema configuration
1515 foreach my $key (keys %$confdesc) {
1516 if (defined(my $default = $confdesc->{$key}->{default})) {
1517 $res->{$key} = $default;
1518 }
1519 }
1520
1521 my $conf = PVE::Cluster::cfs_read_file('datacenter.cfg');
1522 $res->{keyboard} = $conf->{keyboard} if $conf->{keyboard};
1523
1524 return $res;
1525}
1526
1527sub config_list {
1528 my $vmlist = PVE::Cluster::get_vmlist();
1529 my $res = {};
1530 return $res if !$vmlist || !$vmlist->{ids};
1531 my $ids = $vmlist->{ids};
1532
1533 my $nodename = PVE::INotify::nodename();
1534 foreach my $vmid (keys %$ids) {
1535 my $d = $ids->{$vmid};
1536 next if !$d->{node} || $d->{node} ne $nodename;
1537 $res->{$vmid}->{exists} = 1;
1538 }
1539 return $res;
1540}
1541
1542sub check_lock {
1543 my ($conf) = @_;
1544
1545 die "VM is locked ($conf->{lock})\n" if $conf->{lock};
1546}
1547
1548sub check_cmdline {
1549 my ($pidfile, $pid) = @_;
1550
1551 my $fh = IO::File->new ("/proc/$pid/cmdline", "r");
1552 if (defined ($fh)) {
1553 my $line = <$fh>;
1554 $fh->close;
1555 return undef if !$line;
1556 my @param = split (/\0/, $line);
1557
1558 my $cmd = $param[0];
1559 return if !$cmd || ($cmd !~ m|kvm$|);
1560
1561 for (my $i = 0; $i < scalar (@param); $i++) {
1562 my $p = $param[$i];
1563 next if !$p;
1564 if (($p eq '-pidfile') || ($p eq '--pidfile')) {
1565 my $p = $param[$i+1];
1566 return 1 if $p && ($p eq $pidfile);
1567 return undef;
1568 }
1569 }
1570 }
1571 return undef;
1572}
1573
1574sub check_running {
1575 my ($vmid) = @_;
1576
1577 my $filename = config_file ($vmid);
1578
1579 die "unable to find configuration file for VM $vmid - no such machine\n"
1580 if ! -f $filename;
1581
1582 my $pidfile = pidfile_name ($vmid);
1583
1584 if (my $fd = IO::File->new ("<$pidfile")) {
1585 my $st = stat ($fd);
1586 my $line = <$fd>;
1587 close ($fd);
1588
1589 my $mtime = $st->mtime;
1590 if ($mtime > time()) {
1591 warn "file '$filename' modified in future\n";
1592 }
1593
1594 if ($line =~ m/^(\d+)$/) {
1595 my $pid = $1;
1596
1597 return $pid if ((-d "/proc/$pid") && check_cmdline ($pidfile, $pid));
1598 }
1599 }
1600
1601 return undef;
1602}
1603
1604sub vzlist {
1605
1606 my $vzlist = config_list();
1607
1608 my $fd = IO::Dir->new ($var_run_tmpdir) || return $vzlist;
1609
1610 while (defined(my $de = $fd->read)) {
1611 next if $de !~ m/^(\d+)\.pid$/;
1612 my $vmid = $1;
1613 next if !defined ($vzlist->{$vmid});
1614 if (my $pid = check_running ($vmid)) {
1615 $vzlist->{$vmid}->{pid} = $pid;
1616 }
1617 }
1618
1619 return $vzlist;
1620}
1621
1622my $storage_timeout_hash = {};
1623
1624sub disksize {
1625 my ($storecfg, $conf) = @_;
1626
1627 my $bootdisk = $conf->{bootdisk};
1628 return undef if !$bootdisk;
1629 return undef if !valid_drivename($bootdisk);
1630
1631 return undef if !$conf->{$bootdisk};
1632
1633 my $drive = parse_drive($bootdisk, $conf->{$bootdisk});
1634 return undef if !defined($drive);
1635
1636 return undef if drive_is_cdrom($drive);
1637
1638 my $volid = $drive->{file};
1639 return undef if !$volid;
1640
1641 my $path;
1642 my $storeid;
1643 my $timeoutid;
1644
1645 if ($volid =~ m|^/|) {
1646 $path = $timeoutid = $volid;
1647 } else {
1648 $storeid = $timeoutid = PVE::Storage::parse_volume_id ($volid);
1649 $path = PVE::Storage::path($storecfg, $volid);
1650 }
1651
1652 my $last_timeout = $storage_timeout_hash->{$timeoutid};
1653 if ($last_timeout) {
1654 if ((time() - $last_timeout) < 30) {
1655 # skip storage with errors
1656 return undef ;
1657 }
1658 delete $storage_timeout_hash->{$timeoutid};
1659 }
1660
1661 my ($size, $format, $used);
1662
1663 ($size, $format, $used) = PVE::Storage::file_size_info($path, 1);
1664
1665 if (!defined($format)) {
1666 # got timeout
1667 $storage_timeout_hash->{$timeoutid} = time();
1668 return undef;
1669 }
1670
1671 return wantarray ? ($size, $used) : $size;
1672}
1673
1674my $last_proc_pid_stat;
1675
1676sub vmstatus {
1677 my ($opt_vmid) = @_;
1678
1679 my $res = {};
1680
1681 my $storecfg = PVE::Storage::config();
1682
1683 my $list = vzlist();
1684 my ($uptime) = PVE::ProcFSTools::read_proc_uptime();
1685
1686 foreach my $vmid (keys %$list) {
1687 next if $opt_vmid && ($vmid ne $opt_vmid);
1688
1689 my $cfspath = cfs_config_path($vmid);
1690 my $conf = PVE::Cluster::cfs_read_file($cfspath) || {};
1691
1692 my $d = {};
1693 $d->{pid} = $list->{$vmid}->{pid};
1694
1695 # fixme: better status?
1696 $d->{status} = $list->{$vmid}->{pid} ? 'running' : 'stopped';
1697
1698 my ($size, $used) = disksize($storecfg, $conf);
1699 if (defined($size) && defined($used)) {
1700 $d->{disk} = $used;
1701 $d->{maxdisk} = $size;
1702 } else {
1703 $d->{disk} = 0;
1704 $d->{maxdisk} = 0;
1705 }
1706
1707 $d->{cpus} = ($conf->{sockets} || 1) * ($conf->{cores} || 1);
1708 $d->{name} = $conf->{name} || "VM $vmid";
1709 $d->{maxmem} = $conf->{memory} ? $conf->{memory}*(1024*1024) : 0;
1710
1711
1712 $d->{uptime} = 0;
1713 $d->{cpu} = 0;
1714 $d->{relcpu} = 0;
1715 $d->{mem} = 0;
1716
1717 $d->{netout} = 0;
1718 $d->{netin} = 0;
1719
1720 $d->{diskread} = 0;
1721 $d->{diskwrite} = 0;
1722
1723 $res->{$vmid} = $d;
1724 }
1725
1726 my $netdev = PVE::ProcFSTools::read_proc_net_dev();
1727 foreach my $dev (keys %$netdev) {
1728 next if $dev !~ m/^tap([1-9]\d*)i/;
1729 my $vmid = $1;
1730 my $d = $res->{$vmid};
1731 next if !$d;
1732
1733 $d->{netout} += $netdev->{$dev}->{receive};
1734 $d->{netin} += $netdev->{$dev}->{transmit};
1735 }
1736
1737 my $cpuinfo = PVE::ProcFSTools::read_cpuinfo();
1738 my $cpucount = $cpuinfo->{cpus} || 1;
1739 my $ctime = gettimeofday;
1740
1741 foreach my $vmid (keys %$list) {
1742
1743 my $d = $res->{$vmid};
1744 my $pid = $d->{pid};
1745 next if !$pid;
1746
1747 if (my $fh = IO::File->new("/proc/$pid/io", "r")) {
1748 my $data = {};
1749 while (defined (my $line = <$fh>)) {
1750 if ($line =~ m/^([rw]char):\s+(\d+)$/) {
1751 $data->{$1} = $2;
1752 }
1753 }
1754 close($fh);
1755 $d->{diskread} = $data->{rchar} || 0;
1756 $d->{diskwrite} = $data->{wchar} || 0;
1757 }
1758
1759 my $statstr = file_read_firstline("/proc/$pid/stat");
1760 next if !$statstr;
1761
1762 my ($utime, $stime, $vsize, $rss, $starttime);
1763 if ($statstr =~ m/^$pid \(.*\) \S (-?\d+) -?\d+ -?\d+ -?\d+ -?\d+ \d+ \d+ \d+ \d+ \d+ (\d+) (\d+) (-?\d+) (-?\d+) -?\d+ -?\d+ -?\d+ 0 (\d+) (\d+) (-?\d+) \d+ \d+ \d+ \d+ \d+ \d+ \d+ \d+ \d+ \d+ \d+ \d+ \d+ -?\d+ -?\d+ \d+ \d+ \d+/) {
1764 ($utime, $stime, $vsize, $rss, $starttime) = ($2, $3, $7, $8 * 4096, $6);
1765 } else {
1766 next;
1767 }
1768
1769 my $used = $utime + $stime;
1770
1771 my $vcpus = $d->{cpus} > $cpucount ? $cpucount : $d->{cpus};
1772
1773 $d->{uptime} = int ($uptime - ($starttime/100));
1774
1775 if ($vsize) {
1776 $d->{mem} = int (($rss/$vsize)*$d->{maxmem});
1777 }
1778
1779 my $old = $last_proc_pid_stat->{$pid};
1780 if (!$old) {
1781 $last_proc_pid_stat->{$pid} = {
1782 time => $ctime,
1783 used => $used,
1784 cpu => 0,
1785 relcpu => 0,
1786 };
1787 next;
1788 }
1789
1790 my $dtime = ($ctime - $old->{time}) * $cpucount * $clock_ticks;
1791
1792 if ($dtime > 1000) {
1793 my $dutime = $used - $old->{used};
1794
1795 $d->{cpu} = $dutime/$dtime;
1796 $d->{relcpu} = ($d->{cpu} * $cpucount) / $vcpus;
1797 $last_proc_pid_stat->{$pid} = {
1798 time => $ctime,
1799 used => $used,
1800 cpu => $d->{cpu},
1801 relcpu => $d->{relcpu},
1802 };
1803 } else {
1804 $d->{cpu} = $old->{cpu};
1805 $d->{relcpu} = $old->{relcpu};
1806 }
1807 }
1808
1809 return $res;
1810}
1811
1812sub foreach_drive {
1813 my ($conf, $func) = @_;
1814
1815 foreach my $ds (keys %$conf) {
1816 next if !valid_drivename($ds);
1817
1818 my $drive = parse_drive ($ds, $conf->{$ds});
1819 next if !$drive;
1820
1821 &$func($ds, $drive);
1822 }
1823}
1824
1825sub config_to_command {
1826 my ($storecfg, $vmid, $conf, $defaults, $migrate_uri) = @_;
1827
1828 my $cmd = [];
1829
1830 my $kvmver = kvm_user_version();
1831 my $vernum = 0; # unknown
1832 if ($kvmver =~ m/^(\d+)\.(\d+)\.(\d+)$/) {
1833 $vernum = $1*1000000+$2*1000+$3;
1834 }
1835
1836 die "detected old qemu-kvm binary ($kvmver)\n" if $vernum < 14000;
1837
1838 my $have_ovz = -f '/proc/vz/vestat';
1839
1840 push @$cmd, '/usr/bin/kvm';
1841
1842 push @$cmd, '-id', $vmid;
1843
1844 my $use_virtio = 0;
1845
1846 my $socket = monitor_socket ($vmid);
1847 push @$cmd, '-monitor', "unix:$socket,server,nowait";
1848
1849 $socket = vnc_socket ($vmid);
1850 push @$cmd, '-vnc', "unix:$socket,x509,password";
1851
1852 push @$cmd, '-pidfile' , pidfile_name ($vmid);
1853
1854 push @$cmd, '-daemonize';
1855
1856 push @$cmd, '-incoming', $migrate_uri if $migrate_uri;
1857
1858 # include usb device config
1859 push @$cmd, '-readconfig', '/usr/share/qemu-server/pve-usb.cfg';
1860
1861 # enable absolute mouse coordinates (needed by vnc)
1862 my $tablet = defined ($conf->{tablet}) ? $conf->{tablet} : $defaults->{tablet};
1863 push @$cmd, '-device', 'usb-tablet,bus=ehci.0,port=6' if $tablet;
1864
1865 # host pci devices
1866 if (my $pcidl = $conf->{hostpci}) {
1867 my @dl = split (/,/, $pcidl);
1868 foreach my $dev (@dl) {
1869 push @$cmd, '-device', "pci-assign,host=$dev" if $dev;
1870 }
1871 }
1872
1873 # usb devices
1874 for (my $i = 0; $i < $MAX_USB_DEVICES; $i++) {
1875 my $d = parse_usb_device($conf->{"usb$i"});
1876 next if !$d;
1877 if ($d->{vendorid} && $d->{productid}) {
1878 push @$cmd, '-device', "usb-host,vendorid=$d->{vendorid},productid=$d->{productid}";
1879 } elsif (defined($d->{hostbus}) && defined($d->{hostport})) {
1880 push @$cmd, '-device', "usb-host,hostbus=$d->{hostbus},hostport=$d->{hostport}";
1881 }
1882 }
1883
1884 if (my $usbdl = $conf->{hostusb}) {
1885 my @dl = split (/,/, $usbdl);
1886 foreach my $dev (@dl) {
1887 push @$cmd, '-usbdevice', "host:$dev" if $dev;
1888 }
1889 }
1890
1891 # serial devices
1892 if (my $serdl = $conf->{serial}) {
1893 my @dl = split (/,/, $serdl);
1894 foreach my $dev (@dl) {
1895 next if !$dev;
1896 if (-c $dev) {
1897 push @$cmd, '-serial', "$dev";
1898 }
1899 }
1900 }
1901
1902 # parallel devices
1903 if (my $pardl = $conf->{parallel}) {
1904 my @dl = split (/,/, $pardl);
1905 foreach my $dev (@dl) {
1906 next if !$dev;
1907 if (-c $dev) {
1908 push @$cmd, '-parallel', "$dev";
1909 }
1910 }
1911 }
1912
1913 my $vmname = $conf->{name} || "vm$vmid";
1914
1915 push @$cmd, '-name', $vmname;
1916
1917 my $sockets = 1;
1918 $sockets = $conf->{smp} if $conf->{smp}; # old style - no longer iused
1919 $sockets = $conf->{sockets} if $conf->{sockets};
1920
1921 my $cores = $conf->{cores} || 1;
1922
1923 my $boot_opt;
1924
1925 push @$cmd, '-smp', "sockets=$sockets,cores=$cores";
1926
1927 push @$cmd, '-cpu', $conf->{cpu} if $conf->{cpu};
1928
1929 $boot_opt = "menu=on";
1930 if ($conf->{boot}) {
1931 $boot_opt .= ",order=$conf->{boot}";
1932 }
1933
1934 push @$cmd, '-nodefaults';
1935
1936 push @$cmd, '-boot', $boot_opt if $boot_opt;
1937
1938 push @$cmd, '-no-acpi' if defined ($conf->{acpi}) && $conf->{acpi} == 0;
1939
1940 push @$cmd, '-no-reboot' if defined ($conf->{reboot}) && $conf->{reboot} == 0;
1941
1942 my $vga = $conf->{vga};
1943 if (!$vga) {
1944 if ($conf->{ostype} && ($conf->{ostype} eq 'win7' || $conf->{ostype} eq 'w2k8')) {
1945 $vga = 'std';
1946 } else {
1947 $vga = 'cirrus';
1948 }
1949 }
1950
1951 push @$cmd, '-vga', $vga if $vga; # for kvm 77 and later
1952
1953 # time drift fix
1954 my $tdf = defined ($conf->{tdf}) ? $conf->{tdf} : $defaults->{tdf};
1955 push @$cmd, '-tdf' if $tdf;
1956
1957 my $nokvm = defined ($conf->{kvm}) && $conf->{kvm} == 0 ? 1 : 0;
1958
1959 if (my $ost = $conf->{ostype}) {
1960 # other, wxp, w2k, w2k3, w2k8, wvista, win7, l24, l26
1961
1962 if ($ost =~ m/^w/) { # windows
1963 push @$cmd, '-localtime' if !defined ($conf->{localtime});
1964
1965 # use rtc-td-hack when acpi is enabled
1966 if (!(defined ($conf->{acpi}) && $conf->{acpi} == 0)) {
1967 push @$cmd, '-rtc-td-hack';
1968 }
1969 }
1970
1971 # -tdf ?
1972 # -no-acpi
1973 # -no-kvm
1974 # -win2k-hack ?
1975 }
1976
1977 push @$cmd, '-no-kvm' if $nokvm;
1978
1979 push @$cmd, '-localtime' if $conf->{localtime};
1980
1981 push @$cmd, '-startdate', $conf->{startdate} if $conf->{startdate};
1982
1983 push @$cmd, '-S' if $conf->{freeze};
1984
1985 # set keyboard layout
1986 my $kb = $conf->{keyboard} || $defaults->{keyboard};
1987 push @$cmd, '-k', $kb if $kb;
1988
1989 # enable sound
1990 #my $soundhw = $conf->{soundhw} || $defaults->{soundhw};
1991 #push @$cmd, '-soundhw', 'es1370';
1992 #push @$cmd, '-soundhw', $soundhw if $soundhw;
1993
1994 my $vollist = [];
1995
1996 foreach_drive($conf, sub {
1997 my ($ds, $drive) = @_;
1998
1999 eval {
2000 PVE::Storage::parse_volume_id ($drive->{file});
2001 push @$vollist, $drive->{file};
2002 }; # ignore errors
2003
2004 $use_virtio = 1 if $ds =~ m/^virtio/;
2005 my $tmp = print_drive_full ($storecfg, $vmid, $drive);
2006 $tmp .= ",boot=on" if $conf->{bootdisk} && ($conf->{bootdisk} eq $ds);
2007 push @$cmd, '-drive', $tmp;
2008 });
2009
2010 push @$cmd, '-m', $conf->{memory} || $defaults->{memory};
2011
2012 my $foundnet = 0;
2013
2014 foreach my $k (sort keys %$conf) {
2015 next if $k !~ m/^net(\d+)$/;
2016 my $i = int ($1);
2017
2018 die "got strange net id '$i'\n" if $i >= ${MAX_NETS};
2019
2020 if ($conf->{"net$i"} && (my $net = parse_net($conf->{"net$i"}))) {
2021
2022 $foundnet = 1;
2023
2024 my $ifname = "tap${vmid}i$i";
2025
2026 # kvm uses TUNSETIFF ioctl, and that limits ifname length
2027 die "interface name '$ifname' is too long (max 15 character)\n"
2028 if length($ifname) >= 16;
2029
2030 my $device = $net->{model};
2031 my $vhostparam = '';
2032 if ($net->{model} eq 'virtio') {
2033 $use_virtio = 1;
2034 $device = 'virtio-net-pci';
2035 $vhostparam = ',vhost=on' if $kernel_has_vhost_net;
2036 };
2037
2038 if ($net->{bridge}) {
2039 push @$cmd, '-netdev', "type=tap,id=${k},ifname=${ifname},script=/var/lib/qemu-server/pve-bridge$vhostparam";
2040 } else {
2041 push @$cmd, '-netdev', "type=user,id=${k},hostname=$vmname";
2042 }
2043
2044 # qemu > 0.15 always try to boot from network - we disable that by
2045 # not loading the pxe rom file
2046 my $extra = (!$conf->{boot} || ($conf->{boot} !~ m/n/)) ?
2047 "romfile=," : '';
2048 push @$cmd, '-device', "$device,${extra}mac=$net->{macaddr},netdev=${k}";
2049 }
2050 }
2051
2052 push @$cmd, '-net', 'none' if !$foundnet;
2053
2054 # hack: virtio with fairsched is unreliable, so we do not use fairsched
2055 # when the VM uses virtio devices.
2056 if (!$use_virtio && $have_ovz) {
2057
2058 my $cpuunits = defined ($conf->{cpuunits}) ?
2059 $conf->{cpuunits} : $defaults->{cpuunits};
2060
2061 push @$cmd, '-cpuunits', $cpuunits if $cpuunits;
2062
2063 # fixme: cpulimit is currently ignored
2064 #push @$cmd, '-cpulimit', $conf->{cpulimit} if $conf->{cpulimit};
2065 }
2066
2067 # add custom args
2068 if ($conf->{args}) {
2069 my $aa = split_args ($conf->{args});
2070 push @$cmd, @$aa;
2071 }
2072
2073 return wantarray ? ($cmd, $vollist) : $cmd;
2074}
2075
2076sub vnc_socket {
2077 my ($vmid) = @_;
2078 return "${var_run_tmpdir}/$vmid.vnc";
2079}
2080
2081sub monitor_socket {
2082 my ($vmid) = @_;
2083 return "${var_run_tmpdir}/$vmid.mon";
2084}
2085
2086sub pidfile_name {
2087 my ($vmid) = @_;
2088 return "${var_run_tmpdir}/$vmid.pid";
2089}
2090
2091sub random_ether_addr {
2092
2093 my $rand = Digest::SHA1::sha1_hex (rand(), time());
2094
2095 my $mac = '';
2096 for (my $i = 0; $i < 6; $i++) {
2097 my $ss = hex (substr ($rand, $i*2, 2));
2098 if (!$i) {
2099 $ss &= 0xfe; # clear multicast
2100 $ss |= 2; # set local id
2101 }
2102 $ss = sprintf ("%02X", $ss);
2103
2104 if (!$i) {
2105 $mac .= "$ss";
2106 } else {
2107 $mac .= ":$ss";
2108 }
2109 }
2110
2111 return $mac;
2112}
2113
2114sub next_migrate_port {
2115
2116 for (my $p = 60000; $p < 60010; $p++) {
2117
2118 my $sock = IO::Socket::INET->new (Listen => 5,
2119 LocalAddr => 'localhost',
2120 LocalPort => $p,
2121 ReuseAddr => 1,
2122 Proto => 0);
2123
2124 if ($sock) {
2125 close ($sock);
2126 return $p;
2127 }
2128 }
2129
2130 die "unable to find free migration port";
2131}
2132
2133sub vm_start {
2134 my ($storecfg, $vmid, $statefile, $skiplock) = @_;
2135
2136 lock_config ($vmid, sub {
2137 my $conf = load_config ($vmid);
2138
2139 check_lock ($conf) if !$skiplock;
2140
2141 if (check_running ($vmid)) {
2142 my $msg = "VM $vmid already running - start failed\n" ;
2143 syslog ('err', $msg);
2144 die $msg;
2145 } else {
2146 syslog ('info', "VM $vmid start");
2147 }
2148
2149 my $migrate_uri;
2150 my $migrate_port = 0;
2151
2152 if ($statefile) {
2153 if ($statefile eq 'tcp') {
2154 $migrate_port = next_migrate_port();
2155 $migrate_uri = "tcp:localhost:${migrate_port}";
2156 } else {
2157 if (-f $statefile) {
2158 $migrate_uri = "exec:cat $statefile";
2159 } else {
2160 warn "state file '$statefile' does not exist - doing normal startup\n";
2161 }
2162 }
2163 }
2164
2165 my $defaults = load_defaults();
2166
2167 my ($cmd, $vollist) = config_to_command ($storecfg, $vmid, $conf, $defaults, $migrate_uri);
2168 # host pci devices
2169 if (my $pcidl = $conf->{hostpci}) {
2170 my @dl = split (/,/, $pcidl);
2171 foreach my $dev (@dl) {
2172 $dev = lc($dev);
2173 my $info = pci_device_info("0000:$dev");
2174 die "no pci device info for device '$dev'\n" if !$info;
2175 die "can't unbind pci device '$dev'\n" if !pci_dev_bind_to_stub($info);
2176 die "can't reset pci device '$dev'\n" if !pci_dev_reset($info);
2177 }
2178 }
2179
2180 PVE::Storage::activate_volumes($storecfg, $vollist);
2181
2182 eval { run_command ($cmd, timeout => $migrate_uri ? undef : 30); };
2183
2184 my $err = $@;
2185
2186 if ($err) {
2187 my $msg = "start failed: $err";
2188 syslog ('err', "VM $vmid $msg");
2189 die $msg;
2190 }
2191
2192 if ($statefile) {
2193
2194 if ($statefile eq 'tcp') {
2195 print "migration listens on port $migrate_port\n";
2196 } else {
2197 unlink $statefile;
2198 # fixme: send resume - is that necessary ?
2199 eval { vm_monitor_command ($vmid, "cont", 1) };
2200 }
2201 }
2202
2203 if (my $migrate_speed =
2204 $conf->{migrate_speed} || $defaults->{migrate_speed}) {
2205 my $cmd = "migrate_set_speed ${migrate_speed}m";
2206 eval { vm_monitor_command ($vmid, $cmd, 1); };
2207 }
2208
2209 if (my $migrate_downtime =
2210 $conf->{migrate_downtime} || $defaults->{migrate_downtime}) {
2211 my $cmd = "migrate_set_downtime ${migrate_downtime}";
2212 eval { vm_monitor_command ($vmid, $cmd, 1); };
2213 }
2214 });
2215}
2216
2217sub __read_avail {
2218 my ($fh, $timeout) = @_;
2219
2220 my $sel = new IO::Select;
2221 $sel->add ($fh);
2222
2223 my $res = '';
2224 my $buf;
2225
2226 my @ready;
2227 while (scalar (@ready = $sel->can_read ($timeout))) {
2228 my $count;
2229 if ($count = $fh->sysread ($buf, 8192)) {
2230 if ($buf =~ /^(.*)\(qemu\) $/s) {
2231 $res .= $1;
2232 last;
2233 } else {
2234 $res .= $buf;
2235 }
2236 } else {
2237 if (!defined ($count)) {
2238 die "$!\n";
2239 }
2240 last;
2241 }
2242 }
2243
2244 die "monitor read timeout\n" if !scalar (@ready);
2245
2246 return $res;
2247}
2248
2249sub vm_monitor_command {
2250 my ($vmid, $cmdstr, $nolog) = @_;
2251
2252 my $res;
2253
2254 syslog ("info", "VM $vmid monitor command '$cmdstr'") if !$nolog;
2255
2256 eval {
2257 die "VM not running\n" if !check_running ($vmid);
2258
2259 my $sname = monitor_socket ($vmid);
2260
2261 my $sock = IO::Socket::UNIX->new ( Peer => $sname ) ||
2262 die "unable to connect to VM $vmid socket - $!\n";
2263
2264 my $timeout = 3;
2265
2266 # hack: migrate sometime blocks the monitor (when migrate_downtime
2267 # is set)
2268 if ($cmdstr =~ m/^(info\s+migrate|migrate\s)/) {
2269 $timeout = 60*60; # 1 hour
2270 }
2271
2272 # read banner;
2273 my $data = __read_avail ($sock, $timeout);
2274
2275 if ($data !~ m/^QEMU\s+(\S+)\s+monitor\s/) {
2276 die "got unexpected qemu monitor banner\n";
2277 }
2278
2279 my $sel = new IO::Select;
2280 $sel->add ($sock);
2281
2282 if (!scalar (my @ready = $sel->can_write ($timeout))) {
2283 die "monitor write error - timeout";
2284 }
2285
2286 my $fullcmd = "$cmdstr\r";
2287
2288 my $b;
2289 if (!($b = $sock->syswrite ($fullcmd)) || ($b != length ($fullcmd))) {
2290 die "monitor write error - $!";
2291 }
2292
2293 return if ($cmdstr eq 'q') || ($cmdstr eq 'quit');
2294
2295 $timeout = 20;
2296
2297 if ($cmdstr =~ m/^(info\s+migrate|migrate\s)/) {
2298 $timeout = 60*60; # 1 hour
2299 } elsif ($cmdstr =~ m/^(eject|change)/) {
2300 $timeout = 60; # note: cdrom mount command is slow
2301 }
2302 if ($res = __read_avail ($sock, $timeout)) {
2303
2304 my @lines = split ("\r?\n", $res);
2305
2306 shift @lines if $lines[0] !~ m/^unknown command/; # skip echo
2307
2308 $res = join ("\n", @lines);
2309 $res .= "\n";
2310 }
2311 };
2312
2313 my $err = $@;
2314
2315 if ($err) {
2316 syslog ("err", "VM $vmid monitor command failed - $err");
2317 die $err;
2318 }
2319
2320 return $res;
2321}
2322
2323sub vm_commandline {
2324 my ($storecfg, $vmid) = @_;
2325
2326 my $conf = load_config ($vmid);
2327
2328 my $defaults = load_defaults();
2329
2330 my $cmd = config_to_command ($storecfg, $vmid, $conf, $defaults);
2331
2332 return join (' ', @$cmd);
2333}
2334
2335sub vm_reset {
2336 my ($vmid, $skiplock) = @_;
2337
2338 lock_config ($vmid, sub {
2339
2340 my $conf = load_config ($vmid);
2341
2342 check_lock ($conf) if !$skiplock;
2343
2344 syslog ("info", "VM $vmid sending 'reset'");
2345
2346 vm_monitor_command ($vmid, "system_reset", 1);
2347 });
2348}
2349
2350sub vm_shutdown {
2351 my ($vmid, $skiplock) = @_;
2352
2353 lock_config ($vmid, sub {
2354
2355 my $conf = load_config ($vmid);
2356
2357 check_lock ($conf) if !$skiplock;
2358
2359 syslog ("info", "VM $vmid sending 'shutdown'");
2360
2361 vm_monitor_command ($vmid, "system_powerdown", 1);
2362 });
2363}
2364
2365sub vm_stop {
2366 my ($vmid, $skiplock) = @_;
2367
2368 lock_config ($vmid, sub {
2369
2370 my $pid = check_running ($vmid);
2371
2372 if (!$pid) {
2373 syslog ('info', "VM $vmid already stopped");
2374 return;
2375 }
2376
2377 my $conf = load_config ($vmid);
2378
2379 check_lock ($conf) if !$skiplock;
2380
2381 syslog ("info", "VM $vmid stopping");
2382
2383 eval { vm_monitor_command ($vmid, "quit", 1); };
2384
2385 my $err = $@;
2386
2387 if (!$err) {
2388 # wait some time
2389 my $timeout = 50; # fixme: how long?
2390
2391 my $count = 0;
2392 while (($count < $timeout) && check_running ($vmid)) {
2393 $count++;
2394 sleep 1;
2395 }
2396
2397 if ($count >= $timeout) {
2398 syslog ('info', "VM $vmid still running - terminating now with SIGTERM");
2399 kill 15, $pid;
2400 }
2401 } else {
2402 syslog ('info', "VM $vmid quit failed - terminating now with SIGTERM");
2403 kill 15, $pid;
2404 }
2405
2406 # wait again
2407 my $timeout = 10;
2408
2409 my $count = 0;
2410 while (($count < $timeout) && check_running ($vmid)) {
2411 $count++;
2412 sleep 1;
2413 }
2414
2415 if ($count >= $timeout) {
2416 syslog ('info', "VM $vmid still running - terminating now with SIGKILL\n");
2417 kill 9, $pid;
2418 }
2419
2420 fairsched_rmnod ($vmid); # try to destroy group
2421 });
2422}
2423
2424sub vm_suspend {
2425 my ($vmid, $skiplock) = @_;
2426
2427 lock_config ($vmid, sub {
2428
2429 my $conf = load_config ($vmid);
2430
2431 check_lock ($conf) if !$skiplock;
2432
2433 syslog ("info", "VM $vmid suspend");
2434
2435 vm_monitor_command ($vmid, "stop", 1);
2436 });
2437}
2438
2439sub vm_resume {
2440 my ($vmid, $skiplock) = @_;
2441
2442 lock_config ($vmid, sub {
2443
2444 my $conf = load_config ($vmid);
2445
2446 check_lock ($conf) if !$skiplock;
2447
2448 syslog ("info", "VM $vmid resume");
2449
2450 vm_monitor_command ($vmid, "cont", 1);
2451 });
2452}
2453
2454sub vm_cad {
2455 my ($vmid, $skiplock) = @_;
2456
2457 lock_config ($vmid, sub {
2458
2459 my $conf = load_config ($vmid);
2460
2461 check_lock ($conf) if !$skiplock;
2462
2463 syslog ("info", "VM $vmid sending cntl-alt-delete");
2464
2465 vm_monitor_command ($vmid, "sendkey ctrl-alt-delete", 1);
2466 });
2467}
2468
2469sub vm_destroy {
2470 my ($storecfg, $vmid, $skiplock) = @_;
2471
2472 lock_config ($vmid, sub {
2473
2474 my $conf = load_config ($vmid);
2475
2476 check_lock ($conf) if !$skiplock;
2477
2478 syslog ("info", "VM $vmid destroy called (removing all data)");
2479
2480 eval {
2481 if (!check_running($vmid)) {
2482 fairsched_rmnod($vmid); # try to destroy group
2483 destroy_vm($storecfg, $vmid);
2484 } else {
2485 die "VM is running\n";
2486 }
2487 };
2488
2489 my $err = $@;
2490
2491 if ($err) {
2492 syslog ("err", "VM $vmid destroy failed - $err");
2493 die $err;
2494 }
2495 });
2496}
2497
2498sub vm_stopall {
2499 my ($timeout) = @_;
2500
2501 $timeout = 3*60 if !$timeout;
2502
2503 my $vzlist = vzlist();
2504 my $count = 0;
2505 foreach my $vmid (keys %$vzlist) {
2506 next if !$vzlist->{$vmid}->{pid};
2507 $count++;
2508 }
2509
2510 if ($count) {
2511
2512 my $msg = "Stopping Qemu Server - sending shutdown requests to all VMs\n";
2513 syslog ('info', $msg);
2514 print STDERR $msg;
2515
2516 foreach my $vmid (keys %$vzlist) {
2517 next if !$vzlist->{$vmid}->{pid};
2518 eval { vm_shutdown ($vmid, 1); };
2519 print STDERR $@ if $@;
2520 }
2521
2522 my $wt = 5;
2523 my $maxtries = int (($timeout + $wt -1)/$wt);
2524 my $try = 0;
2525 while (($try < $maxtries) && $count) {
2526 $try++;
2527 sleep $wt;
2528
2529 $vzlist = vzlist();
2530 $count = 0;
2531 foreach my $vmid (keys %$vzlist) {
2532 next if !$vzlist->{$vmid}->{pid};
2533 $count++;
2534 }
2535 last if !$count;
2536 }
2537
2538 return if !$count;
2539
2540 foreach my $vmid (keys %$vzlist) {
2541 next if !$vzlist->{$vmid}->{pid};
2542
2543 $msg = "VM $vmid still running - sending stop now\n";
2544 syslog ('info', $msg);
2545 print $msg;
2546
2547 eval { vm_monitor_command ($vmid, "quit", 1); };
2548 print STDERR $@ if $@;
2549
2550 }
2551
2552 $timeout = 30;
2553 $maxtries = int (($timeout + $wt -1)/$wt);
2554 $try = 0;
2555 while (($try < $maxtries) && $count) {
2556 $try++;
2557 sleep $wt;
2558
2559 $vzlist = vzlist();
2560 $count = 0;
2561 foreach my $vmid (keys %$vzlist) {
2562 next if !$vzlist->{$vmid}->{pid};
2563 $count++;
2564 }
2565 last if !$count;
2566 }
2567
2568 return if !$count;
2569
2570 foreach my $vmid (keys %$vzlist) {
2571 next if !$vzlist->{$vmid}->{pid};
2572
2573 $msg = "VM $vmid still running - terminating now with SIGTERM\n";
2574 syslog ('info', $msg);
2575 print $msg;
2576 kill 15, $vzlist->{$vmid}->{pid};
2577 }
2578
2579 # this is called by system shotdown scripts, so remaining
2580 # processes gets killed anyways (no need to send kill -9 here)
2581
2582 $msg = "Qemu Server stopped\n";
2583 syslog ('info', $msg);
2584 print STDERR $msg;
2585 }
2586}
2587
2588# pci helpers
2589
2590sub file_write {
2591 my ($filename, $buf) = @_;
2592
2593 my $fh = IO::File->new ($filename, "w");
2594 return undef if !$fh;
2595
2596 my $res = print $fh $buf;
2597
2598 $fh->close();
2599
2600 return $res;
2601}
2602
2603sub pci_device_info {
2604 my ($name) = @_;
2605
2606 my $res;
2607
2608 return undef if $name !~ m/^([a-f0-9]{4}):([a-f0-9]{2}):([a-f0-9]{2})\.([a-f0-9])$/;
2609 my ($domain, $bus, $slot, $func) = ($1, $2, $3, $4);
2610
2611 my $irq = file_read_firstline("$pcisysfs/devices/$name/irq");
2612 return undef if !defined($irq) || $irq !~ m/^\d+$/;
2613
2614 my $vendor = file_read_firstline("$pcisysfs/devices/$name/vendor");
2615 return undef if !defined($vendor) || $vendor !~ s/^0x//;
2616
2617 my $product = file_read_firstline("$pcisysfs/devices/$name/device");
2618 return undef if !defined($product) || $product !~ s/^0x//;
2619
2620 $res = {
2621 name => $name,
2622 vendor => $vendor,
2623 product => $product,
2624 domain => $domain,
2625 bus => $bus,
2626 slot => $slot,
2627 func => $func,
2628 irq => $irq,
2629 has_fl_reset => -f "$pcisysfs/devices/$name/reset" || 0,
2630 };
2631
2632 return $res;
2633}
2634
2635sub pci_dev_reset {
2636 my ($dev) = @_;
2637
2638 my $name = $dev->{name};
2639
2640 my $fn = "$pcisysfs/devices/$name/reset";
2641
2642 return file_write ($fn, "1");
2643}
2644
2645sub pci_dev_bind_to_stub {
2646 my ($dev) = @_;
2647
2648 my $name = $dev->{name};
2649
2650 my $testdir = "$pcisysfs/drivers/pci-stub/$name";
2651 return 1 if -d $testdir;
2652
2653 my $data = "$dev->{vendor} $dev->{product}";
2654 return undef if !file_write ("$pcisysfs/drivers/pci-stub/new_id", $data);
2655
2656 my $fn = "$pcisysfs/devices/$name/driver/unbind";
2657 if (!file_write ($fn, $name)) {
2658 return undef if -f $fn;
2659 }
2660
2661 $fn = "$pcisysfs/drivers/pci-stub/bind";
2662 if (! -d $testdir) {
2663 return undef if !file_write ($fn, $name);
2664 }
2665
2666 return -d $testdir;
2667}
2668
26691;