]> git.proxmox.com Git - qemu-server.git/blame_incremental - PVE/QemuServer.pm
change default boot order to cdn
[qemu-server.git] / PVE / QemuServer.pm
... / ...
CommitLineData
1package PVE::QemuServer;
2
3use strict;
4use POSIX;
5use IO::Handle;
6use IO::Select;
7use IO::File;
8use IO::Dir;
9use IO::Socket::UNIX;
10use File::Basename;
11use File::Path;
12use File::stat;
13use Getopt::Long;
14use Digest::SHA1;
15use Fcntl ':flock';
16use Cwd 'abs_path';
17use IPC::Open3;
18use Fcntl;
19use PVE::SafeSyslog;
20use Storable qw(dclone);
21use PVE::Exception qw(raise raise_param_exc);
22use PVE::Storage;
23use PVE::Tools qw(run_command lock_file file_read_firstline);
24use PVE::Cluster qw(cfs_register_file cfs_read_file cfs_write_file cfs_lock_file);
25use PVE::INotify;
26use PVE::ProcFSTools;
27use Time::HiRes qw(gettimeofday);
28
29my $cpuinfo = PVE::ProcFSTools::read_cpuinfo();
30
31# Note about locking: we use flock on the config file protect
32# against concurent actions.
33# Aditionaly, we have a 'lock' setting in the config file. This
34# can be set to 'migrate' or 'backup'. Most actions are not
35# allowed when such lock is set. But you can ignore this kind of
36# lock with the --skiplock flag.
37
38cfs_register_file('/qemu-server/', \&parse_vm_config);
39
40PVE::JSONSchema::register_standard_option('skiplock', {
41 description => "Ignore locks - only root is allowed to use this option.",
42 type => 'boolean',
43 optional => 1,
44});
45
46PVE::JSONSchema::register_standard_option('pve-qm-stateuri', {
47 description => "Some command save/restore state from this location.",
48 type => 'string',
49 maxLength => 128,
50 optional => 1,
51});
52
53#no warnings 'redefine';
54
55unless(defined(&_VZSYSCALLS_H_)) {
56 eval 'sub _VZSYSCALLS_H_ () {1;}' unless defined(&_VZSYSCALLS_H_);
57 require 'sys/syscall.ph';
58 if(defined(&__x86_64__)) {
59 eval 'sub __NR_fairsched_vcpus () {499;}' unless defined(&__NR_fairsched_vcpus);
60 eval 'sub __NR_fairsched_mknod () {504;}' unless defined(&__NR_fairsched_mknod);
61 eval 'sub __NR_fairsched_rmnod () {505;}' unless defined(&__NR_fairsched_rmnod);
62 eval 'sub __NR_fairsched_chwt () {506;}' unless defined(&__NR_fairsched_chwt);
63 eval 'sub __NR_fairsched_mvpr () {507;}' unless defined(&__NR_fairsched_mvpr);
64 eval 'sub __NR_fairsched_rate () {508;}' unless defined(&__NR_fairsched_rate);
65 eval 'sub __NR_setluid () {501;}' unless defined(&__NR_setluid);
66 eval 'sub __NR_setublimit () {502;}' unless defined(&__NR_setublimit);
67 }
68 elsif(defined( &__i386__) ) {
69 eval 'sub __NR_fairsched_mknod () {500;}' unless defined(&__NR_fairsched_mknod);
70 eval 'sub __NR_fairsched_rmnod () {501;}' unless defined(&__NR_fairsched_rmnod);
71 eval 'sub __NR_fairsched_chwt () {502;}' unless defined(&__NR_fairsched_chwt);
72 eval 'sub __NR_fairsched_mvpr () {503;}' unless defined(&__NR_fairsched_mvpr);
73 eval 'sub __NR_fairsched_rate () {504;}' unless defined(&__NR_fairsched_rate);
74 eval 'sub __NR_fairsched_vcpus () {505;}' unless defined(&__NR_fairsched_vcpus);
75 eval 'sub __NR_setluid () {511;}' unless defined(&__NR_setluid);
76 eval 'sub __NR_setublimit () {512;}' unless defined(&__NR_setublimit);
77 } else {
78 die("no fairsched syscall for this arch");
79 }
80 require 'asm/ioctl.ph';
81 eval 'sub KVM_GET_API_VERSION () { &_IO(0xAE, 0x);}' unless defined(&KVM_GET_API_VERSION);
82}
83
84sub fairsched_mknod {
85 my ($parent, $weight, $desired) = @_;
86
87 return syscall(&__NR_fairsched_mknod, int($parent), int($weight), int($desired));
88}
89
90sub fairsched_rmnod {
91 my ($id) = @_;
92
93 return syscall(&__NR_fairsched_rmnod, int($id));
94}
95
96sub fairsched_mvpr {
97 my ($pid, $newid) = @_;
98
99 return syscall(&__NR_fairsched_mvpr, int($pid), int($newid));
100}
101
102sub fairsched_vcpus {
103 my ($id, $vcpus) = @_;
104
105 return syscall(&__NR_fairsched_vcpus, int($id), int($vcpus));
106}
107
108sub fairsched_rate {
109 my ($id, $op, $rate) = @_;
110
111 return syscall(&__NR_fairsched_rate, int($id), int($op), int($rate));
112}
113
114use constant FAIRSCHED_SET_RATE => 0;
115use constant FAIRSCHED_DROP_RATE => 1;
116use constant FAIRSCHED_GET_RATE => 2;
117
118sub fairsched_cpulimit {
119 my ($id, $limit) = @_;
120
121 my $cpulim1024 = int($limit * 1024 / 100);
122 my $op = $cpulim1024 ? FAIRSCHED_SET_RATE : FAIRSCHED_DROP_RATE;
123
124 return fairsched_rate($id, $op, $cpulim1024);
125}
126
127my $nodename = PVE::INotify::nodename();
128
129mkdir "/etc/pve/nodes/$nodename";
130my $confdir = "/etc/pve/nodes/$nodename/qemu-server";
131mkdir $confdir;
132
133my $var_run_tmpdir = "/var/run/qemu-server";
134mkdir $var_run_tmpdir;
135
136my $lock_dir = "/var/lock/qemu-server";
137mkdir $lock_dir;
138
139my $pcisysfs = "/sys/bus/pci";
140
141my $keymaphash = PVE::Tools::kvmkeymaps();
142
143my $confdesc = {
144 onboot => {
145 optional => 1,
146 type => 'boolean',
147 description => "Specifies whether a VM will be started during system bootup.",
148 default => 0,
149 },
150 autostart => {
151 optional => 1,
152 type => 'boolean',
153 description => "Automatic restart after crash (currently ignored).",
154 default => 0,
155 },
156 reboot => {
157 optional => 1,
158 type => 'boolean',
159 description => "Allow reboot. If set to '0' the VM exit on reboot.",
160 default => 1,
161 },
162 lock => {
163 optional => 1,
164 type => 'string',
165 description => "Lock/unlock the VM.",
166 enum => [qw(migrate backup)],
167 },
168 cpulimit => {
169 optional => 1,
170 type => 'integer',
171 description => "Limit of CPU usage in per cent. Note if the computer has 2 CPUs, it has total of 200% CPU time. Value '0' indicates no CPU limit.\n\nNOTE: This option is currently ignored.",
172 minimum => 0,
173 default => 0,
174 },
175 cpuunits => {
176 optional => 1,
177 type => 'integer',
178 description => "CPU weight for a VM. Argument is used in the kernel fair scheduler. The larger the number is, the more CPU time this VM gets. Number is relative to weights of all the other running VMs.\n\nNOTE: You can disable fair-scheduler configuration by setting this to 0.",
179 minimum => 0,
180 maximum => 500000,
181 default => 1000,
182 },
183 memory => {
184 optional => 1,
185 type => 'integer',
186 description => "Amount of RAM for the VM in MB. This is the maximum available memory when you use the balloon device.",
187 minimum => 16,
188 default => 512,
189 },
190 balloon => {
191 optional => 1,
192 type => 'integer',
193 description => "Amount of target RAM for the VM in MB.",
194 minimum => 16,
195 },
196 keyboard => {
197 optional => 1,
198 type => 'string',
199 description => "Keybord layout for vnc server. Default is read from the datacenter configuration file.",
200 enum => [ keys %$keymaphash ],
201 default => 'en-us',
202 },
203 name => {
204 optional => 1,
205 type => 'string',
206 description => "Set a name for the VM. Only used on the configuration web interface.",
207 },
208 description => {
209 optional => 1,
210 type => 'string',
211 description => "Description for the VM. Only used on the configuration web interface.",
212 },
213 ostype => {
214 optional => 1,
215 type => 'string',
216 enum => [qw(other wxp w2k w2k3 w2k8 wvista win7 l24 l26)],
217 description => <<EODESC,
218Used to enable special optimization/features for specific
219operating systems:
220
221other => unspecified OS
222wxp => Microsoft Windows XP
223w2k => Microsoft Windows 2000
224w2k3 => Microsoft Windows 2003
225w2k8 => Microsoft Windows 2008
226wvista => Microsoft Windows Vista
227win7 => Microsoft Windows 7
228l24 => Linux 2.4 Kernel
229l26 => Linux 2.6/3.X Kernel
230
231other|l24|l26 ... no special behaviour
232wxp|w2k|w2k3|w2k8|wvista|win7 ... use --localtime switch
233EODESC
234 },
235 boot => {
236 optional => 1,
237 type => 'string',
238 description => "Boot on floppy (a), hard disk (c), CD-ROM (d), or network (n).",
239 pattern => '[acdn]{1,4}',
240 default => 'cdn',
241 },
242 bootdisk => {
243 optional => 1,
244 type => 'string', format => 'pve-qm-bootdisk',
245 description => "Enable booting from specified disk.",
246 pattern => '(ide|scsi|virtio)\d+',
247 },
248 smp => {
249 optional => 1,
250 type => 'integer',
251 description => "The number of CPUs. Please use option -sockets instead.",
252 minimum => 1,
253 default => 1,
254 },
255 sockets => {
256 optional => 1,
257 type => 'integer',
258 description => "The number of CPU sockets.",
259 minimum => 1,
260 default => 1,
261 },
262 cores => {
263 optional => 1,
264 type => 'integer',
265 description => "The number of cores per socket.",
266 minimum => 1,
267 default => 1,
268 },
269 acpi => {
270 optional => 1,
271 type => 'boolean',
272 description => "Enable/disable ACPI.",
273 default => 1,
274 },
275 kvm => {
276 optional => 1,
277 type => 'boolean',
278 description => "Enable/disable KVM hardware virtualization.",
279 default => 1,
280 },
281 tdf => {
282 optional => 1,
283 type => 'boolean',
284 description => "Enable/disable time drift fix.",
285 default => 1,
286 },
287 localtime => {
288 optional => 1,
289 type => 'boolean',
290 description => "Set the real time clock to local time. This is enabled by default if ostype indicates a Microsoft OS.",
291 },
292 freeze => {
293 optional => 1,
294 type => 'boolean',
295 description => "Freeze CPU at startup (use 'c' monitor command to start execution).",
296 },
297 vga => {
298 optional => 1,
299 type => 'string',
300 description => "Select VGA type. If you want to use high resolution modes (>= 1280x1024x16) then you should use option 'std' or 'vmware'. Default is 'std' for win7/w2k8, and 'cirrur' for other OS types",
301 enum => [qw(std cirrus vmware)],
302 },
303 watchdog => {
304 optional => 1,
305 type => 'string', format => 'pve-qm-watchdog',
306 typetext => '[[model=]i6300esb|ib700] [,[action=]reset|shutdown|poweroff|pause|debug|none]',
307 description => "Create a virtual hardware watchdog device. Once enabled (by a guest action), the watchdog must be periodically polled by an agent inside the guest or else the guest will be restarted (or execute the action specified)",
308 },
309 startdate => {
310 optional => 1,
311 type => 'string',
312 typetext => "(now | YYYY-MM-DD | YYYY-MM-DDTHH:MM:SS)",
313 description => "Set the initial date of the real time clock. Valid format for date are: 'now' or '2006-06-17T16:01:21' or '2006-06-17'.",
314 pattern => '(now|\d{4}-\d{1,2}-\d{1,2}(T\d{1,2}:\d{1,2}:\d{1,2})?)',
315 default => 'now',
316 },
317 args => {
318 optional => 1,
319 type => 'string',
320 description => <<EODESCR,
321Note: this option is for experts only. It allows you to pass arbitrary arguments to kvm, for example:
322
323args: -no-reboot -no-hpet
324EODESCR
325 },
326 tablet => {
327 optional => 1,
328 type => 'boolean',
329 default => 1,
330 description => "Enable/disable the usb tablet device. This device is usually needed to allow absolute mouse positioning. Else the mouse runs out of sync with normal vnc clients. If you're running lots of console-only guests on one host, you may consider disabling this to save some context switches.",
331 },
332 migrate_speed => {
333 optional => 1,
334 type => 'integer',
335 description => "Set maximum speed (in MB/s) for migrations. Value 0 is no limit.",
336 minimum => 0,
337 default => 0,
338 },
339 migrate_downtime => {
340 optional => 1,
341 type => 'integer',
342 description => "Set maximum tolerated downtime (in seconds) for migrations.",
343 minimum => 0,
344 default => 1,
345 },
346 cdrom => {
347 optional => 1,
348 type => 'string', format => 'pve-qm-drive',
349 typetext => 'volume',
350 description => "This is an alias for option -ide2",
351 },
352 cpu => {
353 optional => 1,
354 description => "Emulated CPU type.",
355 type => 'string',
356 enum => [ qw(486 athlon pentium pentium2 pentium3 coreduo core2duo kvm32 kvm64 qemu32 qemu64 phenom host) ],
357 default => 'qemu64',
358 },
359};
360
361# what about other qemu settings ?
362#cpu => 'string',
363#machine => 'string',
364#fda => 'file',
365#fdb => 'file',
366#mtdblock => 'file',
367#sd => 'file',
368#pflash => 'file',
369#snapshot => 'bool',
370#bootp => 'file',
371##tftp => 'dir',
372##smb => 'dir',
373#kernel => 'file',
374#append => 'string',
375#initrd => 'file',
376##soundhw => 'string',
377
378while (my ($k, $v) = each %$confdesc) {
379 PVE::JSONSchema::register_standard_option("pve-qm-$k", $v);
380}
381
382my $MAX_IDE_DISKS = 4;
383my $MAX_SCSI_DISKS = 14;
384my $MAX_VIRTIO_DISKS = 6;
385my $MAX_USB_DEVICES = 5;
386my $MAX_NETS = 6;
387my $MAX_UNUSED_DISKS = 8;
388my $MAX_HOSTPCI_DEVICES = 2;
389my $MAX_SERIAL_PORTS = 4;
390my $MAX_PARALLEL_PORTS = 3;
391
392my $nic_model_list = ['rtl8139', 'ne2k_pci', 'e1000', 'pcnet', 'virtio',
393 'ne2k_isa', 'i82551', 'i82557b', 'i82559er'];
394my $nic_model_list_txt = join(' ', sort @$nic_model_list);
395
396# fixme:
397my $netdesc = {
398 optional => 1,
399 type => 'string', format => 'pve-qm-net',
400 typetext => "MODEL=XX:XX:XX:XX:XX:XX [,bridge=<dev>][,rate=<mbps>]",
401 description => <<EODESCR,
402Specify network devices.
403
404MODEL is one of: $nic_model_list_txt
405
406XX:XX:XX:XX:XX:XX should be an unique MAC address. This is
407automatically generated if not specified.
408
409The bridge parameter can be used to automatically add the interface to a bridge device. The Proxmox VE standard bridge is called 'vmbr0'.
410
411Option 'rate' is used to limit traffic bandwidth from and to this interface. It is specified as floating point number, unit is 'Megabytes per second'.
412
413If you specify no bridge, we create a kvm 'user' (NATed) network device, which provides DHCP and DNS services. The following addresses are used:
414
41510.0.2.2 Gateway
41610.0.2.3 DNS Server
41710.0.2.4 SMB Server
418
419The DHCP server assign addresses to the guest starting from 10.0.2.15.
420
421EODESCR
422};
423PVE::JSONSchema::register_standard_option("pve-qm-net", $netdesc);
424
425for (my $i = 0; $i < $MAX_NETS; $i++) {
426 $confdesc->{"net$i"} = $netdesc;
427}
428
429my $drivename_hash;
430
431my $idedesc = {
432 optional => 1,
433 type => 'string', format => 'pve-qm-drive',
434 typetext => '[volume=]volume,] [,media=cdrom|disk] [,cyls=c,heads=h,secs=s[,trans=t]] [,snapshot=on|off] [,cache=none|writethrough|writeback] [,format=f] [,backup=yes|no] [,aio=native|threads]',
435 description => "Use volume as IDE hard disk or CD-ROM (n is 0 to 3).",
436};
437PVE::JSONSchema::register_standard_option("pve-qm-ide", $idedesc);
438
439my $scsidesc = {
440 optional => 1,
441 type => 'string', format => 'pve-qm-drive',
442 typetext => '[volume=]volume,] [,media=cdrom|disk] [,cyls=c,heads=h,secs=s[,trans=t]] [,snapshot=on|off] [,cache=none|writethrough|writeback] [,format=f] [,backup=yes|no] [,aio=native|threads]',
443 description => "Use volume as SCSI hard disk or CD-ROM (n is 0 to 13).",
444};
445PVE::JSONSchema::register_standard_option("pve-qm-scsi", $scsidesc);
446
447my $virtiodesc = {
448 optional => 1,
449 type => 'string', format => 'pve-qm-drive',
450 typetext => '[volume=]volume,] [,media=cdrom|disk] [,cyls=c,heads=h,secs=s[,trans=t]] [,snapshot=on|off] [,cache=none|writethrough|writeback] [,format=f] [,backup=yes|no] [,aio=native|threads]',
451 description => "Use volume as VIRTIO hard disk (n is 0 to 5).",
452};
453PVE::JSONSchema::register_standard_option("pve-qm-virtio", $virtiodesc);
454
455my $usbdesc = {
456 optional => 1,
457 type => 'string', format => 'pve-qm-usb-device',
458 typetext => 'host=HOSTUSBDEVICE',
459 description => <<EODESCR,
460Configure an USB device (n is 0 to 4). This can be used to
461pass-through usb devices to the guest. HOSTUSBDEVICE syntax is:
462
463'bus-port(.port)*' (decimal numbers) or
464'vendor_id:product_id' (hexadeciaml numbers)
465
466You can use the 'lsusb -t' command to list existing usb devices.
467
468Note: This option allows direct access to host hardware. So it is no longer possible to migrate such machines - use with special care.
469
470EODESCR
471};
472PVE::JSONSchema::register_standard_option("pve-qm-usb", $usbdesc);
473
474my $hostpcidesc = {
475 optional => 1,
476 type => 'string', format => 'pve-qm-hostpci',
477 typetext => "HOSTPCIDEVICE",
478 description => <<EODESCR,
479Map host pci devices. HOSTPCIDEVICE syntax is:
480
481'bus:dev.func' (hexadecimal numbers)
482
483You can us the 'lspci' command to list existing pci devices.
484
485Note: This option allows direct access to host hardware. So it is no longer possible to migrate such machines - use with special care.
486
487Experimental: user reported problems with this option.
488EODESCR
489};
490PVE::JSONSchema::register_standard_option("pve-qm-hostpci", $hostpcidesc);
491
492my $serialdesc = {
493 optional => 1,
494 type => 'string',
495 pattern => '/dev/ttyS\d+',
496 description => <<EODESCR,
497Map host serial devices (n is 0 to 3).
498
499Note: This option allows direct access to host hardware. So it is no longer possible to migrate such machines - use with special care.
500
501Experimental: user reported problems with this option.
502EODESCR
503};
504
505my $paralleldesc= {
506 optional => 1,
507 type => 'string',
508 pattern => '/dev/parport\d+',
509 description => <<EODESCR,
510Map host parallel devices (n is 0 to 2).
511
512Note: This option allows direct access to host hardware. So it is no longer possible to migrate such machines - use with special care.
513
514Experimental: user reported problems with this option.
515EODESCR
516};
517
518for (my $i = 0; $i < $MAX_PARALLEL_PORTS; $i++) {
519 $confdesc->{"parallel$i"} = $paralleldesc;
520}
521
522for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
523 $confdesc->{"serial$i"} = $serialdesc;
524}
525
526for (my $i = 0; $i < $MAX_HOSTPCI_DEVICES; $i++) {
527 $confdesc->{"hostpci$i"} = $hostpcidesc;
528}
529
530for (my $i = 0; $i < $MAX_IDE_DISKS; $i++) {
531 $drivename_hash->{"ide$i"} = 1;
532 $confdesc->{"ide$i"} = $idedesc;
533}
534
535for (my $i = 0; $i < $MAX_SCSI_DISKS; $i++) {
536 $drivename_hash->{"scsi$i"} = 1;
537 $confdesc->{"scsi$i"} = $scsidesc ;
538}
539
540for (my $i = 0; $i < $MAX_VIRTIO_DISKS; $i++) {
541 $drivename_hash->{"virtio$i"} = 1;
542 $confdesc->{"virtio$i"} = $virtiodesc;
543}
544
545for (my $i = 0; $i < $MAX_USB_DEVICES; $i++) {
546 $confdesc->{"usb$i"} = $usbdesc;
547}
548
549my $unuseddesc = {
550 optional => 1,
551 type => 'string', format => 'pve-volume-id',
552 description => "Reference to unused volumes.",
553};
554
555for (my $i = 0; $i < $MAX_UNUSED_DISKS; $i++) {
556 $confdesc->{"unused$i"} = $unuseddesc;
557}
558
559my $kvm_api_version = 0;
560
561sub kvm_version {
562
563 return $kvm_api_version if $kvm_api_version;
564
565 my $fh = IO::File->new("</dev/kvm") ||
566 return 0;
567
568 if (my $v = $fh->ioctl(KVM_GET_API_VERSION(), 0)) {
569 $kvm_api_version = $v;
570 }
571
572 $fh->close();
573
574 return $kvm_api_version;
575}
576
577my $kvm_user_version;
578
579sub kvm_user_version {
580
581 return $kvm_user_version if $kvm_user_version;
582
583 $kvm_user_version = 'unknown';
584
585 my $tmp = `kvm -help 2>/dev/null`;
586
587 if ($tmp =~ m/^QEMU( PC)? emulator version (\d+\.\d+\.\d+) /) {
588 $kvm_user_version = $2;
589 }
590
591 return $kvm_user_version;
592
593}
594
595my $kernel_has_vhost_net = -c '/dev/vhost-net';
596
597sub disknames {
598 # order is important - used to autoselect boot disk
599 return ((map { "ide$_" } (0 .. ($MAX_IDE_DISKS - 1))),
600 (map { "scsi$_" } (0 .. ($MAX_SCSI_DISKS - 1))),
601 (map { "virtio$_" } (0 .. ($MAX_VIRTIO_DISKS - 1))));
602}
603
604sub valid_drivename {
605 my $dev = shift;
606
607 return defined($drivename_hash->{$dev});
608}
609
610sub option_exists {
611 my $key = shift;
612 return defined($confdesc->{$key});
613}
614
615sub nic_models {
616 return $nic_model_list;
617}
618
619sub os_list_description {
620
621 return {
622 other => 'Other',
623 wxp => 'Windows XP',
624 w2k => 'Windows 2000',
625 w2k3 =>, 'Windows 2003',
626 w2k8 => 'Windows 2008',
627 wvista => 'Windows Vista',
628 win7 => 'Windows 7',
629 l24 => 'Linux 2.4',
630 l26 => 'Linux 2.6',
631 };
632}
633
634# a clumsy way to split an argument string into an array,
635# we simply pass it to the cli (exec call)
636# fixme: use Text::ParseWords::shellwords() ?
637sub split_args {
638 my ($str) = @_;
639
640 my $args = [];
641
642 return $args if !$str;
643
644 my $cmd = 'perl -e \'foreach my $a (@ARGV) { print "$a\n"; } \' -- ' . $str;
645
646 eval {
647 run_command($cmd, outfunc => sub {
648 my $data = shift;
649 push @$args, $data;
650 });
651 };
652
653 my $err = $@;
654
655 die "unable to parse args: $str\n" if $err;
656
657 return $args;
658}
659
660sub disk_devive_info {
661 my $dev = shift;
662
663 die "unknown disk device format '$dev'" if $dev !~ m/^(ide|scsi|virtio)(\d+)$/;
664
665 my $bus = $1;
666 my $index = $2;
667 my $maxdev = 1024;
668
669 if ($bus eq 'ide') {
670 $maxdev = 2;
671 } elsif ($bus eq 'scsi') {
672 $maxdev = 7;
673 }
674
675 my $controller = int($index / $maxdev);
676 my $unit = $index % $maxdev;
677
678
679 return { bus => $bus, desc => uc($bus) . " $controller:$unit",
680 controller => $controller, unit => $unit, index => $index };
681
682}
683
684sub qemu_drive_name {
685 my ($dev, $media) = @_;
686
687 my $info = disk_devive_info($dev);
688 my $mediastr = '';
689
690 if (($info->{bus} eq 'ide') || ($info->{bus} eq 'scsi')) {
691 $mediastr = ($media eq 'cdrom') ? "-cd" : "-hd";
692 return sprintf("%s%i%s%i", $info->{bus}, $info->{controller},
693 $mediastr, $info->{unit});
694 } else {
695 return sprintf("%s%i", $info->{bus}, $info->{index});
696 }
697}
698
699my $cdrom_path;
700
701sub get_cdrom_path {
702
703 return $cdrom_path if $cdrom_path;
704
705 return $cdrom_path = "/dev/cdrom" if -l "/dev/cdrom";
706 return $cdrom_path = "/dev/cdrom1" if -l "/dev/cdrom1";
707 return $cdrom_path = "/dev/cdrom2" if -l "/dev/cdrom2";
708}
709
710sub get_iso_path {
711 my ($storecfg, $vmid, $cdrom) = @_;
712
713 if ($cdrom eq 'cdrom') {
714 return get_cdrom_path();
715 } elsif ($cdrom eq 'none') {
716 return '';
717 } elsif ($cdrom =~ m|^/|) {
718 return $cdrom;
719 } else {
720 return PVE::Storage::path($storecfg, $cdrom);
721 }
722}
723
724# try to convert old style file names to volume IDs
725sub filename_to_volume_id {
726 my ($vmid, $file, $media) = @_;
727
728 if (!($file eq 'none' || $file eq 'cdrom' ||
729 $file =~ m|^/dev/.+| || $file =~ m/^([^:]+):(.+)$/)) {
730
731 return undef if $file =~ m|/|;
732
733 if ($media && $media eq 'cdrom') {
734 $file = "local:iso/$file";
735 } else {
736 $file = "local:$vmid/$file";
737 }
738 }
739
740 return $file;
741}
742
743sub verify_media_type {
744 my ($opt, $vtype, $media) = @_;
745
746 return if !$media;
747
748 my $etype;
749 if ($media eq 'disk') {
750 $etype = 'image';
751 } elsif ($media eq 'cdrom') {
752 $etype = 'iso';
753 } else {
754 die "internal error";
755 }
756
757 return if ($vtype eq $etype);
758
759 raise_param_exc({ $opt => "unexpected media type ($vtype != $etype)" });
760}
761
762sub cleanup_drive_path {
763 my ($opt, $storecfg, $drive) = @_;
764
765 # try to convert filesystem paths to volume IDs
766
767 if (($drive->{file} !~ m/^(cdrom|none)$/) &&
768 ($drive->{file} !~ m|^/dev/.+|) &&
769 ($drive->{file} !~ m/^([^:]+):(.+)$/) &&
770 ($drive->{file} !~ m/^\d+$/)) {
771 my ($vtype, $volid) = PVE::Storage::path_to_volume_id($storecfg, $drive->{file});
772 raise_param_exc({ $opt => "unable to associate path '$drive->{file}' to any storage"}) if !$vtype;
773 $drive->{media} = 'cdrom' if !$drive->{media} && $vtype eq 'iso';
774 verify_media_type($opt, $vtype, $drive->{media});
775 $drive->{file} = $volid;
776 }
777
778 $drive->{media} = 'cdrom' if !$drive->{media} && $drive->{file} =~ m/^(cdrom|none)$/;
779}
780
781sub create_conf_nolock {
782 my ($vmid, $settings) = @_;
783
784 my $filename = config_file($vmid);
785
786 die "configuration file '$filename' already exists\n" if -f $filename;
787
788 my $defaults = load_defaults();
789
790 $settings->{name} = "vm$vmid" if !$settings->{name};
791 $settings->{memory} = $defaults->{memory} if !$settings->{memory};
792
793 my $data = '';
794 foreach my $opt (keys %$settings) {
795 next if !$confdesc->{$opt};
796
797 my $value = $settings->{$opt};
798 next if !$value;
799
800 $data .= "$opt: $value\n";
801 }
802
803 PVE::Tools::file_set_contents($filename, $data);
804}
805
806# ideX = [volume=]volume-id[,media=d][,cyls=c,heads=h,secs=s[,trans=t]]
807# [,snapshot=on|off][,cache=on|off][,format=f][,backup=yes|no]
808# [,aio=native|threads]
809
810sub parse_drive {
811 my ($key, $data) = @_;
812
813 my $res = {};
814
815 # $key may be undefined - used to verify JSON parameters
816 if (!defined($key)) {
817 $res->{interface} = 'unknown'; # should not harm when used to verify parameters
818 $res->{index} = 0;
819 } elsif ($key =~ m/^([^\d]+)(\d+)$/) {
820 $res->{interface} = $1;
821 $res->{index} = $2;
822 } else {
823 return undef;
824 }
825
826 foreach my $p (split (/,/, $data)) {
827 next if $p =~ m/^\s*$/;
828
829 if ($p =~ m/^(file|volume|cyls|heads|secs|trans|media|snapshot|cache|format|rerror|werror|backup|aio)=(.+)$/) {
830 my ($k, $v) = ($1, $2);
831
832 $k = 'file' if $k eq 'volume';
833
834 return undef if defined $res->{$k};
835
836 $res->{$k} = $v;
837 } else {
838 if (!$res->{file} && $p !~ m/=/) {
839 $res->{file} = $p;
840 } else {
841 return undef;
842 }
843 }
844 }
845
846 return undef if !$res->{file};
847
848 return undef if $res->{cache} &&
849 $res->{cache} !~ m/^(off|none|writethrough|writeback)$/;
850 return undef if $res->{snapshot} && $res->{snapshot} !~ m/^(on|off)$/;
851 return undef if $res->{cyls} && $res->{cyls} !~ m/^\d+$/;
852 return undef if $res->{heads} && $res->{heads} !~ m/^\d+$/;
853 return undef if $res->{secs} && $res->{secs} !~ m/^\d+$/;
854 return undef if $res->{media} && $res->{media} !~ m/^(disk|cdrom)$/;
855 return undef if $res->{trans} && $res->{trans} !~ m/^(none|lba|auto)$/;
856 return undef if $res->{format} && $res->{format} !~ m/^(raw|cow|qcow|qcow2|vmdk|cloop)$/;
857 return undef if $res->{rerror} && $res->{rerror} !~ m/^(ignore|report|stop)$/;
858 return undef if $res->{werror} && $res->{werror} !~ m/^(enospc|ignore|report|stop)$/;
859 return undef if $res->{backup} && $res->{backup} !~ m/^(yes|no)$/;
860 return undef if $res->{aio} && $res->{aio} !~ m/^(native|threads)$/;
861
862 if ($res->{media} && ($res->{media} eq 'cdrom')) {
863 return undef if $res->{snapshot} || $res->{trans} || $res->{format};
864 return undef if $res->{heads} || $res->{secs} || $res->{cyls};
865 return undef if $res->{interface} eq 'virtio';
866 }
867
868 # rerror does not work with scsi drives
869 if ($res->{rerror}) {
870 return undef if $res->{interface} eq 'scsi';
871 }
872
873 return $res;
874}
875
876my @qemu_drive_options = qw(heads secs cyls trans media format cache snapshot rerror werror aio);
877
878sub print_drive {
879 my ($vmid, $drive) = @_;
880
881 my $opts = '';
882 foreach my $o (@qemu_drive_options, 'backup') {
883 $opts .= ",$o=$drive->{$o}" if $drive->{$o};
884 }
885
886 return "$drive->{file}$opts";
887}
888
889sub print_drivedevice_full {
890 my ($storecfg, $vmid, $drive) = @_;
891
892 my $device = '';
893 my $maxdev = 0;
894
895 if ($drive->{interface} eq 'virtio') {
896 my $pciaddr = print_pci_addr("$drive->{interface}$drive->{index}");
897 $device = "virtio-blk-pci,drive=drive-$drive->{interface}$drive->{index},id=device-$drive->{interface}$drive->{index}$pciaddr";
898 }
899
900 elsif ($drive->{interface} eq 'scsi') {
901
902 $maxdev = 7;
903 my $controller = int($drive->{index} / $maxdev);
904 my $unit = $drive->{index} % $maxdev;
905
906 $device = "scsi-disk,bus=scsi$controller.0,scsi-id=$unit,drive=drive-$drive->{interface}$drive->{index},id=device-$drive->{interface}$drive->{index}";
907 }
908
909 elsif ($drive->{interface} eq 'ide'){
910
911 $maxdev = 2;
912 my $controller = int($drive->{index} / $maxdev);
913 my $unit = $drive->{index} % $maxdev;
914
915 $device = "ide-drive,bus=ide.$controller,unit=$unit,drive=drive-$drive->{interface}$drive->{index},id=device-$drive->{interface}$drive->{index}";
916 }
917
918 if ($drive->{interface} eq 'usb'){
919 # -device ide-drive,bus=ide.1,unit=0,drive=drive-ide0-1-0,id=ide0-1-0
920 }
921
922 return $device;
923}
924
925sub print_drive_full {
926 my ($storecfg, $vmid, $drive) = @_;
927
928 my $opts = '';
929 foreach my $o (@qemu_drive_options) {
930 $opts .= ",$o=$drive->{$o}" if $drive->{$o};
931 }
932
933 # use linux-aio by default (qemu default is threads)
934 $opts .= ",aio=native" if !$drive->{aio};
935
936 my $path;
937 my $volid = $drive->{file};
938 if (drive_is_cdrom($drive)) {
939 $path = get_iso_path($storecfg, $vmid, $volid);
940 } else {
941 if ($volid =~ m|^/|) {
942 $path = $volid;
943 } else {
944 $path = PVE::Storage::path($storecfg, $volid);
945 }
946 }
947
948 my $pathinfo = $path ? "file=$path," : '';
949
950 return "${pathinfo}if=none,id=drive-$drive->{interface}$drive->{index}$opts";
951}
952
953
954sub drive_is_cdrom {
955 my ($drive) = @_;
956
957 return $drive && $drive->{media} && ($drive->{media} eq 'cdrom');
958
959}
960
961sub parse_hostpci {
962 my ($value) = @_;
963
964 return undef if !$value;
965
966 my $res = {};
967
968 if ($value =~ m/^[a-f0-9]{2}:[a-f0-9]{2}\.[a-f0-9]$/) {
969 $res->{pciid} = $value;
970 } else {
971 return undef;
972 }
973
974 return $res;
975}
976
977# netX: e1000=XX:XX:XX:XX:XX:XX,bridge=vmbr0,rate=<mbps>
978sub parse_net {
979 my ($data) = @_;
980
981 my $res = {};
982
983 foreach my $kvp (split(/,/, $data)) {
984
985 if ($kvp =~ m/^(ne2k_pci|e1000|rtl8139|pcnet|virtio|ne2k_isa|i82551|i82557b|i82559er)(=([0-9a-f]{2}(:[0-9a-f]{2}){5}))?$/i) {
986 my $model = lc($1);
987 my $mac = uc($3) || random_ether_addr();
988 $res->{model} = $model;
989 $res->{macaddr} = $mac;
990 } elsif ($kvp =~ m/^bridge=(\S+)$/) {
991 $res->{bridge} = $1;
992 } elsif ($kvp =~ m/^rate=(\d+(\.\d+)?)$/) {
993 $res->{rate} = $1;
994 } else {
995 return undef;
996 }
997
998 }
999
1000 return undef if !$res->{model};
1001
1002 return $res;
1003}
1004
1005sub print_net {
1006 my $net = shift;
1007
1008 my $res = "$net->{model}";
1009 $res .= "=$net->{macaddr}" if $net->{macaddr};
1010 $res .= ",bridge=$net->{bridge}" if $net->{bridge};
1011 $res .= ",rate=$net->{rate}" if $net->{rate};
1012
1013 return $res;
1014}
1015
1016sub add_random_macs {
1017 my ($settings) = @_;
1018
1019 foreach my $opt (keys %$settings) {
1020 next if $opt !~ m/^net(\d+)$/;
1021 my $net = parse_net($settings->{$opt});
1022 next if !$net;
1023 $settings->{$opt} = print_net($net);
1024 }
1025}
1026
1027sub add_unused_volume {
1028 my ($config, $res, $volid) = @_;
1029
1030 my $key;
1031 for (my $ind = $MAX_UNUSED_DISKS - 1; $ind >= 0; $ind--) {
1032 my $test = "unused$ind";
1033 if (my $vid = $config->{$test}) {
1034 return if $vid eq $volid; # do not add duplicates
1035 } else {
1036 $key = $test;
1037 }
1038 }
1039
1040 die "To many unused volume - please delete them first.\n" if !$key;
1041
1042 $res->{$key} = $volid;
1043}
1044
1045# fixme: remove all thos $noerr parameters?
1046
1047PVE::JSONSchema::register_format('pve-qm-bootdisk', \&verify_bootdisk);
1048sub verify_bootdisk {
1049 my ($value, $noerr) = @_;
1050
1051 return $value if valid_drivename($value);
1052
1053 return undef if $noerr;
1054
1055 die "invalid boot disk '$value'\n";
1056}
1057
1058PVE::JSONSchema::register_format('pve-qm-net', \&verify_net);
1059sub verify_net {
1060 my ($value, $noerr) = @_;
1061
1062 return $value if parse_net($value);
1063
1064 return undef if $noerr;
1065
1066 die "unable to parse network options\n";
1067}
1068
1069PVE::JSONSchema::register_format('pve-qm-drive', \&verify_drive);
1070sub verify_drive {
1071 my ($value, $noerr) = @_;
1072
1073 return $value if parse_drive(undef, $value);
1074
1075 return undef if $noerr;
1076
1077 die "unable to parse drive options\n";
1078}
1079
1080PVE::JSONSchema::register_format('pve-qm-hostpci', \&verify_hostpci);
1081sub verify_hostpci {
1082 my ($value, $noerr) = @_;
1083
1084 return $value if parse_hostpci($value);
1085
1086 return undef if $noerr;
1087
1088 die "unable to parse pci id\n";
1089}
1090
1091PVE::JSONSchema::register_format('pve-qm-watchdog', \&verify_watchdog);
1092sub verify_watchdog {
1093 my ($value, $noerr) = @_;
1094
1095 return $value if parse_watchdog($value);
1096
1097 return undef if $noerr;
1098
1099 die "unable to parse watchdog options\n";
1100}
1101
1102sub parse_watchdog {
1103 my ($value) = @_;
1104
1105 return undef if !$value;
1106
1107 my $res = {};
1108
1109 foreach my $p (split(/,/, $value)) {
1110 next if $p =~ m/^\s*$/;
1111
1112 if ($p =~ m/^(model=)?(i6300esb|ib700)$/) {
1113 $res->{model} = $2;
1114 } elsif ($p =~ m/^(action=)?(reset|shutdown|poweroff|pause|debug|none)$/) {
1115 $res->{action} = $2;
1116 } else {
1117 return undef;
1118 }
1119 }
1120
1121 return $res;
1122}
1123
1124sub parse_usb_device {
1125 my ($value) = @_;
1126
1127 return undef if !$value;
1128
1129 my @dl = split(/,/, $value);
1130 my $found;
1131
1132 my $res = {};
1133 foreach my $v (@dl) {
1134 if ($v =~ m/^host=([0-9A-Fa-f]{4}):([0-9A-Fa-f]{4})$/) {
1135 $found = 1;
1136 $res->{vendorid} = $1;
1137 $res->{productid} = $2;
1138 } elsif ($v =~ m/^host=(\d+)\-(\d+(\.\d+)*)$/) {
1139 $found = 1;
1140 $res->{hostbus} = $1;
1141 $res->{hostport} = $2;
1142 } else {
1143 return undef;
1144 }
1145 }
1146 return undef if !$found;
1147
1148 return $res;
1149}
1150
1151PVE::JSONSchema::register_format('pve-qm-usb-device', \&verify_usb_device);
1152sub verify_usb_device {
1153 my ($value, $noerr) = @_;
1154
1155 return $value if parse_usb_device($value);
1156
1157 return undef if $noerr;
1158
1159 die "unable to parse usb device\n";
1160}
1161
1162# add JSON properties for create and set function
1163sub json_config_properties {
1164 my $prop = shift;
1165
1166 foreach my $opt (keys %$confdesc) {
1167 $prop->{$opt} = $confdesc->{$opt};
1168 }
1169
1170 return $prop;
1171}
1172
1173sub check_type {
1174 my ($key, $value) = @_;
1175
1176 die "unknown setting '$key'\n" if !$confdesc->{$key};
1177
1178 my $type = $confdesc->{$key}->{type};
1179
1180 if (!defined($value)) {
1181 die "got undefined value\n";
1182 }
1183
1184 if ($value =~ m/[\n\r]/) {
1185 die "property contains a line feed\n";
1186 }
1187
1188 if ($type eq 'boolean') {
1189 return 1 if ($value eq '1') || ($value =~ m/^(on|yes|true)$/i);
1190 return 0 if ($value eq '0') || ($value =~ m/^(off|no|false)$/i);
1191 die "type check ('boolean') failed - got '$value'\n";
1192 } elsif ($type eq 'integer') {
1193 return int($1) if $value =~ m/^(\d+)$/;
1194 die "type check ('integer') failed - got '$value'\n";
1195 } elsif ($type eq 'string') {
1196 if (my $fmt = $confdesc->{$key}->{format}) {
1197 if ($fmt eq 'pve-qm-drive') {
1198 # special case - we need to pass $key to parse_drive()
1199 my $drive = parse_drive($key, $value);
1200 return $value if $drive;
1201 die "unable to parse drive options\n";
1202 }
1203 PVE::JSONSchema::check_format($fmt, $value);
1204 return $value;
1205 }
1206 $value =~ s/^\"(.*)\"$/$1/;
1207 return $value;
1208 } else {
1209 die "internal error"
1210 }
1211}
1212
1213sub lock_config {
1214 my ($vmid, $code, @param) = @_;
1215
1216 my $filename = config_file_lock($vmid);
1217
1218 lock_file($filename, 10, $code, @param);
1219
1220 die $@ if $@;
1221}
1222
1223sub cfs_config_path {
1224 my ($vmid, $node) = @_;
1225
1226 $node = $nodename if !$node;
1227 return "nodes/$node/qemu-server/$vmid.conf";
1228}
1229
1230sub check_iommu_support{
1231 #fixme : need to check IOMMU support
1232 #http://www.linux-kvm.org/page/How_to_assign_devices_with_VT-d_in_KVM
1233
1234 my $iommu=1;
1235 return $iommu;
1236
1237}
1238
1239sub config_file {
1240 my ($vmid, $node) = @_;
1241
1242 my $cfspath = cfs_config_path($vmid, $node);
1243 return "/etc/pve/$cfspath";
1244}
1245
1246sub config_file_lock {
1247 my ($vmid) = @_;
1248
1249 return "$lock_dir/lock-$vmid.conf";
1250}
1251
1252sub touch_config {
1253 my ($vmid) = @_;
1254
1255 my $conf = config_file($vmid);
1256 utime undef, undef, $conf;
1257}
1258
1259sub create_disks {
1260 my ($storecfg, $vmid, $settings) = @_;
1261
1262 my $vollist = [];
1263
1264 eval {
1265 foreach_drive($settings, sub {
1266 my ($ds, $disk) = @_;
1267
1268 return if drive_is_cdrom($disk);
1269
1270 my $file = $disk->{file};
1271
1272 if ($file =~ m/^(([^:\s]+):)?(\d+(\.\d+)?)$/) {
1273 my $storeid = $2 || 'local';
1274 my $size = $3;
1275 my $defformat = PVE::Storage::storage_default_format($storecfg, $storeid);
1276 my $fmt = $disk->{format} || $defformat;
1277 syslog('info', "VM $vmid creating new disk - size is $size GB");
1278
1279 my $volid = PVE::Storage::vdisk_alloc($storecfg, $storeid, $vmid,
1280 $fmt, undef, $size*1024*1024);
1281
1282 $disk->{file} = $volid;
1283 delete $disk->{format}; # no longer needed
1284 push @$vollist, $volid;
1285 $settings->{$ds} = PVE::QemuServer::print_drive($vmid, $disk);
1286 } else {
1287 my $path;
1288 if ($disk->{file} =~ m|^/dev/.+|) {
1289 $path = $disk->{file};
1290 } else {
1291 $path = PVE::Storage::path($storecfg, $disk->{file});
1292 }
1293 if (!(-f $path || -b $path)) {
1294 die "image '$path' does not exists\n";
1295 }
1296 }
1297 });
1298 };
1299
1300 my $err = $@;
1301
1302 if ($err) {
1303 syslog('err', "VM $vmid creating disks failed");
1304 foreach my $volid (@$vollist) {
1305 eval { PVE::Storage::vdisk_free($storecfg, $volid); };
1306 warn $@ if $@;
1307 }
1308 die $err;
1309 }
1310
1311 return $vollist;
1312}
1313
1314sub unlink_image {
1315 my ($storecfg, $vmid, $volid) = @_;
1316
1317 die "reject to unlink absolute path '$volid'"
1318 if $volid =~ m|^/|;
1319
1320 my ($path, $owner) = PVE::Storage::path($storecfg, $volid);
1321
1322 die "reject to unlink '$volid' - not owned by this VM"
1323 if !$owner || ($owner != $vmid);
1324
1325 syslog('info', "VM $vmid deleting volume '$volid'");
1326
1327 PVE::Storage::vdisk_free($storecfg, $volid);
1328
1329 touch_config($vmid);
1330}
1331
1332sub destroy_vm {
1333 my ($storecfg, $vmid) = @_;
1334
1335 my $conffile = config_file($vmid);
1336
1337 my $conf = load_config($vmid);
1338
1339 check_lock($conf);
1340
1341 # only remove disks owned by this VM
1342 foreach_drive($conf, sub {
1343 my ($ds, $drive) = @_;
1344
1345 return if drive_is_cdrom($drive);
1346
1347 my $volid = $drive->{file};
1348 next if !$volid || $volid =~ m|^/|;
1349
1350 my ($path, $owner) = PVE::Storage::path($storecfg, $volid);
1351 next if !$path || !$owner || ($owner != $vmid);
1352
1353 PVE::Storage::vdisk_free($storecfg, $volid);
1354 });
1355
1356 unlink $conffile;
1357
1358 # also remove unused disk
1359 eval {
1360 my $dl = PVE::Storage::vdisk_list($storecfg, undef, $vmid);
1361
1362 eval {
1363 PVE::Storage::foreach_volid($dl, sub {
1364 my ($volid, $sid, $volname, $d) = @_;
1365 PVE::Storage::vdisk_free($storecfg, $volid);
1366 });
1367 };
1368 warn $@ if $@;
1369
1370 };
1371 warn $@ if $@;
1372}
1373
1374# fixme: remove?
1375sub load_diskinfo_old {
1376 my ($storecfg, $vmid, $conf) = @_;
1377
1378 my $info = {};
1379 my $res = {};
1380 my $vollist;
1381
1382 foreach_drive($conf, sub {
1383 my ($ds, $di) = @_;
1384
1385 $res->{$ds} = $di;
1386
1387 return if drive_is_cdrom($di);
1388
1389 if ($di->{file} =~ m|^/dev/.+|) {
1390 $info->{$di->{file}}->{size} = PVE::Storage::file_size_info($di->{file});
1391 } else {
1392 push @$vollist, $di->{file};
1393 }
1394 });
1395
1396 eval {
1397 my $dl = PVE::Storage::vdisk_list($storecfg, undef, $vmid, $vollist);
1398
1399 PVE::Storage::foreach_volid($dl, sub {
1400 my ($volid, $sid, $volname, $d) = @_;
1401 $info->{$volid} = $d;
1402 });
1403 };
1404 warn $@ if $@;
1405
1406 foreach my $ds (keys %$res) {
1407 my $di = $res->{$ds};
1408
1409 $res->{$ds}->{disksize} = $info->{$di->{file}} ?
1410 $info->{$di->{file}}->{size} / (1024*1024) : 0;
1411 }
1412
1413 return $res;
1414}
1415
1416sub load_config {
1417 my ($vmid) = @_;
1418
1419 my $cfspath = cfs_config_path($vmid);
1420
1421 my $conf = PVE::Cluster::cfs_read_file($cfspath);
1422
1423 die "no such VM ('$vmid')\n" if !defined($conf);
1424
1425 return $conf;
1426}
1427
1428sub parse_vm_config {
1429 my ($filename, $raw) = @_;
1430
1431 return undef if !defined($raw);
1432
1433 my $res = {
1434 digest => Digest::SHA1::sha1_hex($raw),
1435 };
1436
1437 $filename =~ m|/qemu-server/(\d+)\.conf$|
1438 || die "got strange filename '$filename'";
1439
1440 my $vmid = $1;
1441
1442 while ($raw && $raw =~ s/^(.*?)(\n|$)//) {
1443 my $line = $1;
1444
1445 next if $line =~ m/^\#/;
1446
1447 next if $line =~ m/^\s*$/;
1448
1449 if ($line =~ m/^(description):\s*(.*\S)\s*$/) {
1450 my $key = $1;
1451 my $value = PVE::Tools::decode_text($2);
1452 $res->{$key} = $value;
1453 } elsif ($line =~ m/^(args):\s*(.*\S)\s*$/) {
1454 my $key = $1;
1455 my $value = $2;
1456 $res->{$key} = $value;
1457 } elsif ($line =~ m/^([a-z][a-z_]*\d*):\s*(\S+)\s*$/) {
1458 my $key = $1;
1459 my $value = $2;
1460 eval { $value = check_type($key, $value); };
1461 if ($@) {
1462 warn "vm $vmid - unable to parse value of '$key' - $@";
1463 } else {
1464 my $fmt = $confdesc->{$key}->{format};
1465 if ($fmt && $fmt eq 'pve-qm-drive') {
1466 my $v = parse_drive($key, $value);
1467 if (my $volid = filename_to_volume_id($vmid, $v->{file}, $v->{media})) {
1468 $v->{file} = $volid;
1469 $value = print_drive($vmid, $v);
1470 } else {
1471 warn "vm $vmid - unable to parse value of '$key'\n";
1472 next;
1473 }
1474 }
1475
1476 if ($key eq 'cdrom') {
1477 $res->{ide2} = $value;
1478 } else {
1479 $res->{$key} = $value;
1480 }
1481 }
1482 }
1483 }
1484
1485 # convert old smp to sockets
1486 if ($res->{smp} && !$res->{sockets}) {
1487 $res->{sockets} = $res->{smp};
1488 }
1489 delete $res->{smp};
1490
1491 return $res;
1492}
1493
1494sub change_config {
1495 my ($vmid, $settings, $unset, $skiplock) = @_;
1496
1497 lock_config($vmid, &change_config_nolock, $settings, $unset, $skiplock);
1498}
1499
1500sub change_config_nolock {
1501 my ($vmid, $settings, $unset, $skiplock) = @_;
1502
1503 my $res = {};
1504
1505 $unset->{ide2} = $unset->{cdrom} if $unset->{cdrom};
1506
1507 check_lock($settings) if !$skiplock;
1508
1509 # we do not use 'smp' any longer
1510 if ($settings->{sockets}) {
1511 $unset->{smp} = 1;
1512 } elsif ($settings->{smp}) {
1513 $settings->{sockets} = $settings->{smp};
1514 $unset->{smp} = 1;
1515 }
1516
1517 my $new_volids = {};
1518
1519 foreach my $key (keys %$settings) {
1520 next if $key eq 'digest';
1521 my $value = $settings->{$key};
1522 if ($key eq 'description') {
1523 $value = PVE::Tools::encode_text($value);
1524 }
1525 eval { $value = check_type($key, $value); };
1526 die "unable to parse value of '$key' - $@" if $@;
1527 if ($key eq 'cdrom') {
1528 $res->{ide2} = $value;
1529 } else {
1530 $res->{$key} = $value;
1531 }
1532 if (valid_drivename($key)) {
1533 my $drive = PVE::QemuServer::parse_drive($key, $value);
1534 $new_volids->{$drive->{file}} = 1 if $drive && $drive->{file};
1535 }
1536 }
1537
1538 my $filename = config_file($vmid);
1539 my $tmpfn = "$filename.$$.tmp";
1540
1541 my $fh = new IO::File($filename, "r") ||
1542 die "unable to read config for VM $vmid\n";
1543
1544 my $werror = "unable to write config for VM $vmid\n";
1545
1546 my $out = new IO::File($tmpfn, "w") || die $werror;
1547
1548 eval {
1549
1550 my $done;
1551
1552 while (my $line = <$fh>) {
1553
1554 if (($line =~ m/^\#/) || ($line =~ m/^\s*$/)) {
1555 die $werror unless print $out $line;
1556 next;
1557 }
1558
1559 if ($line =~ m/^([a-z][a-z_]*\d*):\s*(.*\S)\s*$/) {
1560 my $key = $1;
1561 my $value = $2;
1562
1563 # remove 'unusedX' settings if we re-add a volume
1564 next if $key =~ m/^unused/ && $new_volids->{$value};
1565
1566 # convert 'smp' to 'sockets'
1567 $key = 'sockets' if $key eq 'smp';
1568
1569 next if $done->{$key};
1570 $done->{$key} = 1;
1571
1572 if (defined($res->{$key})) {
1573 $value = $res->{$key};
1574 delete $res->{$key};
1575 }
1576 if (!defined($unset->{$key})) {
1577 die $werror unless print $out "$key: $value\n";
1578 }
1579
1580 next;
1581 }
1582
1583 die "unable to parse config file: $line\n";
1584 }
1585
1586 foreach my $key (keys %$res) {
1587
1588 if (!defined($unset->{$key})) {
1589 die $werror unless print $out "$key: $res->{$key}\n";
1590 }
1591 }
1592 };
1593
1594 my $err = $@;
1595
1596 $fh->close();
1597
1598 if ($err) {
1599 $out->close();
1600 unlink $tmpfn;
1601 die $err;
1602 }
1603
1604 if (!$out->close()) {
1605 $err = "close failed - $!\n";
1606 unlink $tmpfn;
1607 die $err;
1608 }
1609
1610 if (!rename($tmpfn, $filename)) {
1611 $err = "rename failed - $!\n";
1612 unlink $tmpfn;
1613 die $err;
1614 }
1615}
1616
1617sub load_defaults {
1618
1619 my $res = {};
1620
1621 # we use static defaults from our JSON schema configuration
1622 foreach my $key (keys %$confdesc) {
1623 if (defined(my $default = $confdesc->{$key}->{default})) {
1624 $res->{$key} = $default;
1625 }
1626 }
1627
1628 my $conf = PVE::Cluster::cfs_read_file('datacenter.cfg');
1629 $res->{keyboard} = $conf->{keyboard} if $conf->{keyboard};
1630
1631 return $res;
1632}
1633
1634sub config_list {
1635 my $vmlist = PVE::Cluster::get_vmlist();
1636 my $res = {};
1637 return $res if !$vmlist || !$vmlist->{ids};
1638 my $ids = $vmlist->{ids};
1639
1640 foreach my $vmid (keys %$ids) {
1641 my $d = $ids->{$vmid};
1642 next if !$d->{node} || $d->{node} ne $nodename;
1643 next if !$d->{type} || $d->{type} ne 'qemu';
1644 $res->{$vmid}->{exists} = 1;
1645 }
1646 return $res;
1647}
1648
1649# test if VM uses local resources (to prevent migration)
1650sub check_local_resources {
1651 my ($conf, $noerr) = @_;
1652
1653 my $loc_res = 0;
1654
1655 $loc_res = 1 if $conf->{hostusb}; # old syntax
1656 $loc_res = 1 if $conf->{hostpci}; # old syntax
1657
1658 foreach my $k (keys %$conf) {
1659 $loc_res = 1 if $k =~ m/^(usb|hostpci|serial|parallel)\d+$/;
1660 }
1661
1662 die "VM uses local resources\n" if $loc_res && !$noerr;
1663
1664 return $loc_res;
1665}
1666
1667sub check_lock {
1668 my ($conf) = @_;
1669
1670 die "VM is locked ($conf->{lock})\n" if $conf->{lock};
1671}
1672
1673sub check_cmdline {
1674 my ($pidfile, $pid) = @_;
1675
1676 my $fh = IO::File->new("/proc/$pid/cmdline", "r");
1677 if (defined($fh)) {
1678 my $line = <$fh>;
1679 $fh->close;
1680 return undef if !$line;
1681 my @param = split(/\0/, $line);
1682
1683 my $cmd = $param[0];
1684 return if !$cmd || ($cmd !~ m|kvm$|);
1685
1686 for (my $i = 0; $i < scalar (@param); $i++) {
1687 my $p = $param[$i];
1688 next if !$p;
1689 if (($p eq '-pidfile') || ($p eq '--pidfile')) {
1690 my $p = $param[$i+1];
1691 return 1 if $p && ($p eq $pidfile);
1692 return undef;
1693 }
1694 }
1695 }
1696 return undef;
1697}
1698
1699sub check_running {
1700 my ($vmid, $nocheck) = @_;
1701
1702 my $filename = config_file($vmid);
1703
1704 die "unable to find configuration file for VM $vmid - no such machine\n"
1705 if !$nocheck && ! -f $filename;
1706
1707 my $pidfile = pidfile_name($vmid);
1708
1709 if (my $fd = IO::File->new("<$pidfile")) {
1710 my $st = stat($fd);
1711 my $line = <$fd>;
1712 close($fd);
1713
1714 my $mtime = $st->mtime;
1715 if ($mtime > time()) {
1716 warn "file '$filename' modified in future\n";
1717 }
1718
1719 if ($line =~ m/^(\d+)$/) {
1720 my $pid = $1;
1721 if (check_cmdline($pidfile, $pid)) {
1722 if (my $pinfo = PVE::ProcFSTools::check_process_running($pid)) {
1723 return $pid;
1724 }
1725 }
1726 }
1727 }
1728
1729 return undef;
1730}
1731
1732sub vzlist {
1733
1734 my $vzlist = config_list();
1735
1736 my $fd = IO::Dir->new($var_run_tmpdir) || return $vzlist;
1737
1738 while (defined(my $de = $fd->read)) {
1739 next if $de !~ m/^(\d+)\.pid$/;
1740 my $vmid = $1;
1741 next if !defined($vzlist->{$vmid});
1742 if (my $pid = check_running($vmid)) {
1743 $vzlist->{$vmid}->{pid} = $pid;
1744 }
1745 }
1746
1747 return $vzlist;
1748}
1749
1750my $storage_timeout_hash = {};
1751
1752sub disksize {
1753 my ($storecfg, $conf) = @_;
1754
1755 my $bootdisk = $conf->{bootdisk};
1756 return undef if !$bootdisk;
1757 return undef if !valid_drivename($bootdisk);
1758
1759 return undef if !$conf->{$bootdisk};
1760
1761 my $drive = parse_drive($bootdisk, $conf->{$bootdisk});
1762 return undef if !defined($drive);
1763
1764 return undef if drive_is_cdrom($drive);
1765
1766 my $volid = $drive->{file};
1767 return undef if !$volid;
1768
1769 my $path;
1770 my $storeid;
1771 my $timeoutid;
1772
1773 if ($volid =~ m|^/|) {
1774 $path = $timeoutid = $volid;
1775 } else {
1776 $storeid = $timeoutid = PVE::Storage::parse_volume_id($volid);
1777 $path = PVE::Storage::path($storecfg, $volid);
1778 }
1779
1780 my $last_timeout = $storage_timeout_hash->{$timeoutid};
1781 if ($last_timeout) {
1782 if ((time() - $last_timeout) < 30) {
1783 # skip storage with errors
1784 return undef ;
1785 }
1786 delete $storage_timeout_hash->{$timeoutid};
1787 }
1788
1789 my ($size, $format, $used);
1790
1791 ($size, $format, $used) = PVE::Storage::file_size_info($path, 1);
1792
1793 if (!defined($format)) {
1794 # got timeout
1795 $storage_timeout_hash->{$timeoutid} = time();
1796 return undef;
1797 }
1798
1799 return wantarray ? ($size, $used) : $size;
1800}
1801
1802my $last_proc_pid_stat;
1803
1804sub vmstatus {
1805 my ($opt_vmid) = @_;
1806
1807 my $res = {};
1808
1809 my $storecfg = PVE::Storage::config();
1810
1811 my $list = vzlist();
1812 my ($uptime) = PVE::ProcFSTools::read_proc_uptime(1);
1813
1814 foreach my $vmid (keys %$list) {
1815 next if $opt_vmid && ($vmid ne $opt_vmid);
1816
1817 my $cfspath = cfs_config_path($vmid);
1818 my $conf = PVE::Cluster::cfs_read_file($cfspath) || {};
1819
1820 my $d = {};
1821 $d->{pid} = $list->{$vmid}->{pid};
1822
1823 # fixme: better status?
1824 $d->{status} = $list->{$vmid}->{pid} ? 'running' : 'stopped';
1825
1826 my ($size, $used) = disksize($storecfg, $conf);
1827 if (defined($size) && defined($used)) {
1828 $d->{disk} = $used;
1829 $d->{maxdisk} = $size;
1830 } else {
1831 $d->{disk} = 0;
1832 $d->{maxdisk} = 0;
1833 }
1834
1835 $d->{cpus} = ($conf->{sockets} || 1) * ($conf->{cores} || 1);
1836 $d->{name} = $conf->{name} || "VM $vmid";
1837 $d->{maxmem} = $conf->{memory} ? $conf->{memory}*(1024*1024) : 0;
1838
1839 $d->{uptime} = 0;
1840 $d->{cpu} = 0;
1841 $d->{relcpu} = 0;
1842 $d->{mem} = 0;
1843
1844 $d->{netout} = 0;
1845 $d->{netin} = 0;
1846
1847 $d->{diskread} = 0;
1848 $d->{diskwrite} = 0;
1849
1850 $res->{$vmid} = $d;
1851 }
1852
1853 my $netdev = PVE::ProcFSTools::read_proc_net_dev();
1854 foreach my $dev (keys %$netdev) {
1855 next if $dev !~ m/^tap([1-9]\d*)i/;
1856 my $vmid = $1;
1857 my $d = $res->{$vmid};
1858 next if !$d;
1859
1860 $d->{netout} += $netdev->{$dev}->{receive};
1861 $d->{netin} += $netdev->{$dev}->{transmit};
1862 }
1863
1864 my $cpucount = $cpuinfo->{cpus} || 1;
1865 my $ctime = gettimeofday;
1866
1867 foreach my $vmid (keys %$list) {
1868
1869 my $d = $res->{$vmid};
1870 my $pid = $d->{pid};
1871 next if !$pid;
1872
1873 if (my $fh = IO::File->new("/proc/$pid/io", "r")) {
1874 my $data = {};
1875 while (defined(my $line = <$fh>)) {
1876 if ($line =~ m/^([rw]char):\s+(\d+)$/) {
1877 $data->{$1} = $2;
1878 }
1879 }
1880 close($fh);
1881 $d->{diskread} = $data->{rchar} || 0;
1882 $d->{diskwrite} = $data->{wchar} || 0;
1883 }
1884
1885 my $pstat = PVE::ProcFSTools::read_proc_pid_stat($pid);
1886 next if !$pstat; # not running
1887
1888 my $used = $pstat->{utime} + $pstat->{stime};
1889
1890 my $vcpus = $d->{cpus} > $cpucount ? $cpucount : $d->{cpus};
1891
1892 $d->{uptime} = int(($uptime - $pstat->{starttime})/$cpuinfo->{user_hz});
1893
1894 if ($pstat->{vsize}) {
1895 $d->{mem} = int(($pstat->{rss}/$pstat->{vsize})*$d->{maxmem});
1896 }
1897
1898 my $old = $last_proc_pid_stat->{$pid};
1899 if (!$old) {
1900 $last_proc_pid_stat->{$pid} = {
1901 time => $ctime,
1902 used => $used,
1903 cpu => 0,
1904 relcpu => 0,
1905 };
1906 next;
1907 }
1908
1909 my $dtime = ($ctime - $old->{time}) * $cpucount * $cpuinfo->{user_hz};
1910
1911 if ($dtime > 1000) {
1912 my $dutime = $used - $old->{used};
1913
1914 $d->{cpu} = $dutime/$dtime;
1915 $d->{relcpu} = ($d->{cpu} * $cpucount) / $vcpus;
1916 $last_proc_pid_stat->{$pid} = {
1917 time => $ctime,
1918 used => $used,
1919 cpu => $d->{cpu},
1920 relcpu => $d->{relcpu},
1921 };
1922 } else {
1923 $d->{cpu} = $old->{cpu};
1924 $d->{relcpu} = $old->{relcpu};
1925 }
1926 }
1927
1928 return $res;
1929}
1930
1931sub foreach_drive {
1932 my ($conf, $func) = @_;
1933
1934 foreach my $ds (keys %$conf) {
1935 next if !valid_drivename($ds);
1936
1937 my $drive = parse_drive($ds, $conf->{$ds});
1938 next if !$drive;
1939
1940 &$func($ds, $drive);
1941 }
1942}
1943
1944sub config_to_command {
1945 my ($storecfg, $vmid, $conf, $defaults, $migrate_uri) = @_;
1946
1947 my $cmd = [];
1948
1949 my $kvmver = kvm_user_version();
1950 my $vernum = 0; # unknown
1951 if ($kvmver =~ m/^(\d+)\.(\d+)\.(\d+)$/) {
1952 $vernum = $1*1000000+$2*1000+$3;
1953 }
1954
1955 die "detected old qemu-kvm binary ($kvmver)\n" if $vernum < 14000;
1956
1957 my $have_ovz = -f '/proc/vz/vestat';
1958
1959 push @$cmd, '/usr/bin/kvm';
1960
1961 push @$cmd, '-id', $vmid;
1962
1963 my $use_virtio = 0;
1964
1965 my $socket = monitor_socket($vmid);
1966 push @$cmd, '-chardev', "socket,id=monitor,path=$socket,server,nowait";
1967 push @$cmd, '-mon', "chardev=monitor,mode=readline";
1968
1969 $socket = vnc_socket($vmid);
1970 push @$cmd, '-vnc', "unix:$socket,x509,password";
1971
1972 push @$cmd, '-pidfile' , pidfile_name($vmid);
1973
1974 push @$cmd, '-daemonize';
1975
1976 push @$cmd, '-incoming', $migrate_uri if $migrate_uri;
1977
1978 # include usb device config
1979 push @$cmd, '-readconfig', '/usr/share/qemu-server/pve-usb.cfg';
1980
1981 # enable absolute mouse coordinates (needed by vnc)
1982 my $tablet = defined($conf->{tablet}) ? $conf->{tablet} : $defaults->{tablet};
1983 push @$cmd, '-device', 'usb-tablet,bus=ehci.0,port=6' if $tablet;
1984
1985 # host pci devices
1986 for (my $i = 0; $i < $MAX_HOSTPCI_DEVICES; $i++) {
1987 my $d = parse_hostpci($conf->{"hostpci$i"});
1988 next if !$d;
1989 push @$cmd, '-device', "pci-assign,host=$d->{pciid},id=hostpci$i";
1990 }
1991
1992 # usb devices
1993 for (my $i = 0; $i < $MAX_USB_DEVICES; $i++) {
1994 my $d = parse_usb_device($conf->{"usb$i"});
1995 next if !$d;
1996 if ($d->{vendorid} && $d->{productid}) {
1997 push @$cmd, '-device', "usb-host,vendorid=$d->{vendorid},productid=$d->{productid}";
1998 } elsif (defined($d->{hostbus}) && defined($d->{hostport})) {
1999 push @$cmd, '-device', "usb-host,hostbus=$d->{hostbus},hostport=$d->{hostport}";
2000 }
2001 }
2002
2003 # serial devices
2004 for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
2005 if (my $path = $conf->{"serial$i"}) {
2006 die "no such serial device\n" if ! -c $path;
2007 push @$cmd, '-chardev', "tty,id=serial$i,path=$path";
2008 push @$cmd, '-device', "isa-serial,chardev=serial$i";
2009 }
2010 }
2011
2012 # parallel devices
2013 for (my $i = 0; $i < $MAX_PARALLEL_PORTS; $i++) {
2014 if (my $path = $conf->{"parallel$i"}) {
2015 die "no such parallel device\n" if ! -c $path;
2016 push @$cmd, '-chardev', "parport,id=parallel$i,path=$path";
2017 push @$cmd, '-device', "isa-parallel,chardev=parallel$i";
2018 }
2019 }
2020
2021 my $vmname = $conf->{name} || "vm$vmid";
2022
2023 push @$cmd, '-name', $vmname;
2024
2025 my $sockets = 1;
2026 $sockets = $conf->{smp} if $conf->{smp}; # old style - no longer iused
2027 $sockets = $conf->{sockets} if $conf->{sockets};
2028
2029 my $cores = $conf->{cores} || 1;
2030
2031 my $boot_opt;
2032
2033 push @$cmd, '-smp', "sockets=$sockets,cores=$cores";
2034
2035 push @$cmd, '-cpu', $conf->{cpu} if $conf->{cpu};
2036
2037 push @$cmd, '-nodefaults';
2038
2039 my $bootorder = $conf->{boot} || $confdesc->{boot}->{default};
2040 push @$cmd, '-boot', "menu=on,order=$bootorder";
2041
2042 push @$cmd, '-no-acpi' if defined($conf->{acpi}) && $conf->{acpi} == 0;
2043
2044 push @$cmd, '-no-reboot' if defined($conf->{reboot}) && $conf->{reboot} == 0;
2045
2046 my $vga = $conf->{vga};
2047 if (!$vga) {
2048 if ($conf->{ostype} && ($conf->{ostype} eq 'win7' || $conf->{ostype} eq 'w2k8')) {
2049 $vga = 'std';
2050 } else {
2051 $vga = 'cirrus';
2052 }
2053 }
2054
2055 push @$cmd, '-vga', $vga if $vga; # for kvm 77 and later
2056
2057 # time drift fix
2058 my $tdf = defined($conf->{tdf}) ? $conf->{tdf} : $defaults->{tdf};
2059 push @$cmd, '-tdf' if $tdf;
2060
2061 my $nokvm = defined($conf->{kvm}) && $conf->{kvm} == 0 ? 1 : 0;
2062
2063 if (my $ost = $conf->{ostype}) {
2064 # other, wxp, w2k, w2k3, w2k8, wvista, win7, l24, l26
2065
2066 if ($ost =~ m/^w/) { # windows
2067 push @$cmd, '-localtime' if !defined($conf->{localtime});
2068
2069 # use rtc-td-hack when acpi is enabled
2070 if (!(defined($conf->{acpi}) && $conf->{acpi} == 0)) {
2071 push @$cmd, '-rtc-td-hack';
2072 }
2073 }
2074
2075 # -tdf ?
2076 # -no-acpi
2077 # -no-kvm
2078 # -win2k-hack ?
2079 }
2080
2081 if ($nokvm) {
2082 push @$cmd, '-no-kvm';
2083 } else {
2084 die "No accelerator found!\n" if !$cpuinfo->{hvm};
2085 }
2086
2087 push @$cmd, '-localtime' if $conf->{localtime};
2088
2089 push @$cmd, '-startdate', $conf->{startdate} if $conf->{startdate};
2090
2091 push @$cmd, '-S' if $conf->{freeze};
2092
2093 # set keyboard layout
2094 my $kb = $conf->{keyboard} || $defaults->{keyboard};
2095 push @$cmd, '-k', $kb if $kb;
2096
2097 # enable sound
2098 #my $soundhw = $conf->{soundhw} || $defaults->{soundhw};
2099 #push @$cmd, '-soundhw', 'es1370';
2100 #push @$cmd, '-soundhw', $soundhw if $soundhw;
2101
2102 push @$cmd, '-device', 'virtio-balloon-pci,id=balloon0' if $conf->{balloon};
2103
2104 if ($conf->{watchdog}) {
2105 my $wdopts = parse_watchdog($conf->{watchdog});
2106 push @$cmd, '-watchdog', $wdopts->{model} || 'i6300esb';
2107 push @$cmd, '-watchdog-action', $wdopts->{action} if $wdopts->{action};
2108 }
2109
2110 my $vollist = [];
2111 my $scsicontroller = {};
2112
2113 foreach_drive($conf, sub {
2114 my ($ds, $drive) = @_;
2115
2116 eval {
2117 PVE::Storage::parse_volume_id($drive->{file});
2118 push @$vollist, $drive->{file};
2119 }; # ignore errors
2120
2121 $use_virtio = 1 if $ds =~ m/^virtio/;
2122 if ($drive->{interface} eq 'scsi') {
2123 my $maxdev = 7;
2124 my $controller = int($drive->{index} / $maxdev);
2125 push @$cmd, '-device', "lsi,id=scsi$controller" if !$scsicontroller->{$controller};
2126 my $scsicontroller->{$controller}=1;
2127 }
2128 my $tmp = print_drive_full($storecfg, $vmid, $drive);
2129 $tmp .= ",boot=on" if $conf->{bootdisk} && ($conf->{bootdisk} eq $ds);
2130 push @$cmd, '-drive', $tmp;
2131 push @$cmd, '-device',print_drivedevice_full($storecfg,$vmid, $drive);
2132 });
2133
2134 push @$cmd, '-m', $conf->{memory} || $defaults->{memory};
2135
2136 my $foundnet = 0;
2137
2138 foreach my $k (sort keys %$conf) {
2139 next if $k !~ m/^net(\d+)$/;
2140 my $i = int($1);
2141
2142 die "got strange net id '$i'\n" if $i >= ${MAX_NETS};
2143
2144 if ($conf->{"net$i"} && (my $net = parse_net($conf->{"net$i"}))) {
2145
2146 $foundnet = 1;
2147
2148 my $ifname = "tap${vmid}i$i";
2149
2150 # kvm uses TUNSETIFF ioctl, and that limits ifname length
2151 die "interface name '$ifname' is too long (max 15 character)\n"
2152 if length($ifname) >= 16;
2153
2154 my $device = $net->{model};
2155 my $vhostparam = '';
2156 if ($net->{model} eq 'virtio') {
2157 $use_virtio = 1;
2158 $device = 'virtio-net-pci';
2159 $vhostparam = ',vhost=on' if $kernel_has_vhost_net;
2160 };
2161
2162 if ($net->{bridge}) {
2163 push @$cmd, '-netdev', "type=tap,id=${k},ifname=${ifname},script=/var/lib/qemu-server/pve-bridge$vhostparam";
2164 } else {
2165 push @$cmd, '-netdev', "type=user,id=${k},hostname=$vmname";
2166 }
2167
2168 # qemu > 0.15 always try to boot from network - we disable that by
2169 # not loading the pxe rom file
2170 my $extra = (!$conf->{boot} || ($conf->{boot} !~ m/n/)) ?
2171 "romfile=," : '';
2172 push @$cmd, '-device', "$device,${extra}mac=$net->{macaddr},netdev=${k}";
2173 }
2174 }
2175
2176 push @$cmd, '-net', 'none' if !$foundnet;
2177
2178 # hack: virtio with fairsched is unreliable, so we do not use fairsched
2179 # when the VM uses virtio devices.
2180 if (!$use_virtio && $have_ovz) {
2181
2182 my $cpuunits = defined($conf->{cpuunits}) ?
2183 $conf->{cpuunits} : $defaults->{cpuunits};
2184
2185 push @$cmd, '-cpuunits', $cpuunits if $cpuunits;
2186
2187 # fixme: cpulimit is currently ignored
2188 #push @$cmd, '-cpulimit', $conf->{cpulimit} if $conf->{cpulimit};
2189 }
2190
2191 # add custom args
2192 if ($conf->{args}) {
2193 my $aa = split_args($conf->{args});
2194 push @$cmd, @$aa;
2195 }
2196
2197 return wantarray ? ($cmd, $vollist) : $cmd;
2198}
2199
2200sub vnc_socket {
2201 my ($vmid) = @_;
2202 return "${var_run_tmpdir}/$vmid.vnc";
2203}
2204
2205sub monitor_socket {
2206 my ($vmid) = @_;
2207 return "${var_run_tmpdir}/$vmid.mon";
2208}
2209
2210sub pidfile_name {
2211 my ($vmid) = @_;
2212 return "${var_run_tmpdir}/$vmid.pid";
2213}
2214
2215sub random_ether_addr {
2216
2217 my $rand = Digest::SHA1::sha1_hex(rand(), time());
2218
2219 my $mac = '';
2220 for (my $i = 0; $i < 6; $i++) {
2221 my $ss = hex(substr($rand, $i*2, 2));
2222 if (!$i) {
2223 $ss &= 0xfe; # clear multicast
2224 $ss |= 2; # set local id
2225 }
2226 $ss = sprintf("%02X", $ss);
2227
2228 if (!$i) {
2229 $mac .= "$ss";
2230 } else {
2231 $mac .= ":$ss";
2232 }
2233 }
2234
2235 return $mac;
2236}
2237
2238sub next_migrate_port {
2239
2240 for (my $p = 60000; $p < 60010; $p++) {
2241
2242 my $sock = IO::Socket::INET->new(Listen => 5,
2243 LocalAddr => 'localhost',
2244 LocalPort => $p,
2245 ReuseAddr => 1,
2246 Proto => 0);
2247
2248 if ($sock) {
2249 close($sock);
2250 return $p;
2251 }
2252 }
2253
2254 die "unable to find free migration port";
2255}
2256
2257sub vm_start {
2258 my ($storecfg, $vmid, $statefile, $skiplock) = @_;
2259
2260 lock_config($vmid, sub {
2261 my $conf = load_config($vmid);
2262
2263 check_lock($conf) if !$skiplock;
2264
2265 if (check_running($vmid)) {
2266 my $msg = "VM $vmid already running - start failed\n" ;
2267 syslog('err', $msg);
2268 die $msg;
2269 } else {
2270 syslog('info', "VM $vmid start");
2271 }
2272
2273 my $migrate_uri;
2274 my $migrate_port = 0;
2275
2276 if ($statefile) {
2277 if ($statefile eq 'tcp') {
2278 $migrate_port = next_migrate_port();
2279 $migrate_uri = "tcp:localhost:${migrate_port}";
2280 } else {
2281 if (-f $statefile) {
2282 $migrate_uri = "exec:cat $statefile";
2283 } else {
2284 warn "state file '$statefile' does not exist - doing normal startup\n";
2285 }
2286 }
2287 }
2288
2289 my $defaults = load_defaults();
2290
2291 my ($cmd, $vollist) = config_to_command($storecfg, $vmid, $conf, $defaults, $migrate_uri);
2292 # host pci devices
2293 for (my $i = 0; $i < $MAX_HOSTPCI_DEVICES; $i++) {
2294 my $d = parse_hostpci($conf->{"hostpci$i"});
2295 next if !$d;
2296 my $info = pci_device_info("0000:$d->{pciid}");
2297 die "IOMMU not present\n" if !check_iommu_support();
2298 die "no pci device info for device '$d->{pciid}'\n" if !$info;
2299 die "can't unbind pci device '$d->{pciid}'\n" if !pci_dev_bind_to_stub($info);
2300 die "can't reset pci device '$d->{pciid}'\n" if !pci_dev_reset($info);
2301 }
2302
2303 PVE::Storage::activate_volumes($storecfg, $vollist);
2304
2305 eval { run_command($cmd, timeout => $migrate_uri ? undef : 30); };
2306
2307 my $err = $@;
2308
2309 if ($err) {
2310 my $msg = "start failed: $err";
2311 syslog('err', "VM $vmid $msg");
2312 die $msg;
2313 }
2314
2315 if ($statefile) {
2316
2317 if ($statefile eq 'tcp') {
2318 print "migration listens on port $migrate_port\n";
2319 } else {
2320 unlink $statefile;
2321 # fixme: send resume - is that necessary ?
2322 eval { vm_monitor_command($vmid, "cont", 1) };
2323 }
2324 }
2325
2326 if (my $migrate_speed =
2327 $conf->{migrate_speed} || $defaults->{migrate_speed}) {
2328 my $cmd = "migrate_set_speed ${migrate_speed}m";
2329 eval { vm_monitor_command($vmid, $cmd, 1); };
2330 }
2331
2332 if (my $migrate_downtime =
2333 $conf->{migrate_downtime} || $defaults->{migrate_downtime}) {
2334 my $cmd = "migrate_set_downtime ${migrate_downtime}";
2335 eval { vm_monitor_command($vmid, $cmd, 1); };
2336 }
2337
2338 vm_balloonset($vmid, $conf->{balloon}) if $conf->{balloon};
2339 });
2340}
2341
2342sub __read_avail {
2343 my ($fh, $timeout) = @_;
2344
2345 my $sel = new IO::Select;
2346 $sel->add($fh);
2347
2348 my $res = '';
2349 my $buf;
2350
2351 my @ready;
2352 while (scalar (@ready = $sel->can_read($timeout))) {
2353 my $count;
2354 if ($count = $fh->sysread($buf, 8192)) {
2355 if ($buf =~ /^(.*)\(qemu\) $/s) {
2356 $res .= $1;
2357 last;
2358 } else {
2359 $res .= $buf;
2360 }
2361 } else {
2362 if (!defined($count)) {
2363 die "$!\n";
2364 }
2365 last;
2366 }
2367 }
2368
2369 die "monitor read timeout\n" if !scalar(@ready);
2370
2371 return $res;
2372}
2373
2374sub vm_monitor_command {
2375 my ($vmid, $cmdstr, $nolog, $nocheck) = @_;
2376
2377 my $res;
2378
2379 syslog("info", "VM $vmid monitor command '$cmdstr'") if !$nolog;
2380
2381 eval {
2382 die "VM not running\n" if !check_running($vmid, $nocheck);
2383
2384 my $sname = monitor_socket($vmid);
2385
2386 my $sock = IO::Socket::UNIX->new( Peer => $sname ) ||
2387 die "unable to connect to VM $vmid socket - $!\n";
2388
2389 my $timeout = 3;
2390
2391 # hack: migrate sometime blocks the monitor (when migrate_downtime
2392 # is set)
2393 if ($cmdstr =~ m/^(info\s+migrate|migrate\s)/) {
2394 $timeout = 60*60; # 1 hour
2395 }
2396
2397 # read banner;
2398 my $data = __read_avail($sock, $timeout);
2399
2400 if ($data !~ m/^QEMU\s+(\S+)\s+monitor\s/) {
2401 die "got unexpected qemu monitor banner\n";
2402 }
2403
2404 my $sel = new IO::Select;
2405 $sel->add($sock);
2406
2407 if (!scalar(my @ready = $sel->can_write($timeout))) {
2408 die "monitor write error - timeout";
2409 }
2410
2411 my $fullcmd = "$cmdstr\r";
2412
2413 my $b;
2414 if (!($b = $sock->syswrite($fullcmd)) || ($b != length($fullcmd))) {
2415 die "monitor write error - $!";
2416 }
2417
2418 return if ($cmdstr eq 'q') || ($cmdstr eq 'quit');
2419
2420 $timeout = 20;
2421
2422 if ($cmdstr =~ m/^(info\s+migrate|migrate\s)/) {
2423 $timeout = 60*60; # 1 hour
2424 } elsif ($cmdstr =~ m/^(eject|change)/) {
2425 $timeout = 60; # note: cdrom mount command is slow
2426 }
2427 if ($res = __read_avail($sock, $timeout)) {
2428
2429 my @lines = split("\r?\n", $res);
2430
2431 shift @lines if $lines[0] !~ m/^unknown command/; # skip echo
2432
2433 $res = join("\n", @lines);
2434 $res .= "\n";
2435 }
2436 };
2437
2438 my $err = $@;
2439
2440 if ($err) {
2441 syslog("err", "VM $vmid monitor command failed - $err");
2442 die $err;
2443 }
2444
2445 return $res;
2446}
2447
2448sub vm_commandline {
2449 my ($storecfg, $vmid) = @_;
2450
2451 my $conf = load_config($vmid);
2452
2453 my $defaults = load_defaults();
2454
2455 my $cmd = config_to_command($storecfg, $vmid, $conf, $defaults);
2456
2457 return join(' ', @$cmd);
2458}
2459
2460sub vm_reset {
2461 my ($vmid, $skiplock) = @_;
2462
2463 lock_config($vmid, sub {
2464
2465 my $conf = load_config($vmid);
2466
2467 check_lock($conf) if !$skiplock;
2468
2469 syslog("info", "VM $vmid sending 'reset'");
2470
2471 vm_monitor_command($vmid, "system_reset", 1);
2472 });
2473}
2474
2475sub vm_shutdown {
2476 my ($vmid, $skiplock) = @_;
2477
2478 lock_config($vmid, sub {
2479
2480 my $conf = load_config($vmid);
2481
2482 check_lock($conf) if !$skiplock;
2483
2484 syslog("info", "VM $vmid sending 'shutdown'");
2485
2486 vm_monitor_command($vmid, "system_powerdown", 1);
2487 });
2488}
2489
2490# Note: use $nockeck to skip tests if VM configuration file exists.
2491# We need that when migration VMs to other nodes (files already moved)
2492sub vm_stop {
2493 my ($vmid, $skiplock, $nocheck) = @_;
2494
2495 lock_config($vmid, sub {
2496
2497 my $pid = check_running($vmid, $nocheck);
2498
2499 if (!$pid) {
2500 syslog('info', "VM $vmid already stopped");
2501 return;
2502 }
2503
2504 if (!$nocheck) {
2505 my $conf = load_config($vmid);
2506 check_lock($conf) if !$skiplock;
2507 }
2508
2509 syslog("info", "VM $vmid stopping");
2510
2511 eval { vm_monitor_command($vmid, "quit", 1, $nocheck); };
2512
2513 my $err = $@;
2514
2515 if (!$err) {
2516 # wait some time
2517 my $timeout = 50; # fixme: how long?
2518
2519 my $count = 0;
2520 while (($count < $timeout) && check_running($vmid, $nocheck)) {
2521 $count++;
2522 sleep 1;
2523 }
2524
2525 if ($count >= $timeout) {
2526 syslog('info', "VM $vmid still running - terminating now with SIGTERM");
2527 kill 15, $pid;
2528 }
2529 } else {
2530 syslog('info', "VM $vmid quit failed - terminating now with SIGTERM");
2531 kill 15, $pid;
2532 }
2533
2534 # wait again
2535 my $timeout = 10;
2536
2537 my $count = 0;
2538 while (($count < $timeout) && check_running($vmid, $nocheck)) {
2539 $count++;
2540 sleep 1;
2541 }
2542
2543 if ($count >= $timeout) {
2544 syslog('info', "VM $vmid still running - terminating now with SIGKILL\n");
2545 kill 9, $pid;
2546 }
2547
2548 fairsched_rmnod($vmid); # try to destroy group
2549 });
2550}
2551
2552sub vm_suspend {
2553 my ($vmid, $skiplock) = @_;
2554
2555 lock_config($vmid, sub {
2556
2557 my $conf = load_config($vmid);
2558
2559 check_lock($conf) if !$skiplock;
2560
2561 syslog("info", "VM $vmid suspend");
2562
2563 vm_monitor_command($vmid, "stop", 1);
2564 });
2565}
2566
2567sub vm_resume {
2568 my ($vmid, $skiplock) = @_;
2569
2570 lock_config($vmid, sub {
2571
2572 my $conf = load_config($vmid);
2573
2574 check_lock($conf) if !$skiplock;
2575
2576 syslog("info", "VM $vmid resume");
2577
2578 vm_monitor_command($vmid, "cont", 1);
2579 });
2580}
2581
2582sub vm_cad {
2583 my ($vmid, $skiplock) = @_;
2584
2585 lock_config($vmid, sub {
2586
2587 my $conf = load_config($vmid);
2588
2589 check_lock($conf) if !$skiplock;
2590
2591 syslog("info", "VM $vmid sending cntl-alt-delete");
2592
2593 vm_monitor_command($vmid, "sendkey ctrl-alt-delete", 1);
2594 });
2595}
2596
2597sub vm_destroy {
2598 my ($storecfg, $vmid, $skiplock) = @_;
2599
2600 lock_config($vmid, sub {
2601
2602 my $conf = load_config($vmid);
2603
2604 check_lock($conf) if !$skiplock;
2605
2606 syslog("info", "VM $vmid destroy called (removing all data)");
2607
2608 eval {
2609 if (!check_running($vmid)) {
2610 fairsched_rmnod($vmid); # try to destroy group
2611 destroy_vm($storecfg, $vmid);
2612 } else {
2613 die "VM is running\n";
2614 }
2615 };
2616
2617 my $err = $@;
2618
2619 if ($err) {
2620 syslog("err", "VM $vmid destroy failed - $err");
2621 die $err;
2622 }
2623 });
2624}
2625
2626sub vm_stopall {
2627 my ($timeout) = @_;
2628
2629 $timeout = 3*60 if !$timeout;
2630
2631 my $vzlist = vzlist();
2632 my $count = 0;
2633 foreach my $vmid (keys %$vzlist) {
2634 next if !$vzlist->{$vmid}->{pid};
2635 $count++;
2636 }
2637
2638 if ($count) {
2639
2640 my $msg = "Stopping Qemu Server - sending shutdown requests to all VMs\n";
2641 syslog('info', $msg);
2642 print STDERR $msg;
2643
2644 foreach my $vmid (keys %$vzlist) {
2645 next if !$vzlist->{$vmid}->{pid};
2646 eval { vm_shutdown($vmid, 1); };
2647 print STDERR $@ if $@;
2648 }
2649
2650 my $wt = 5;
2651 my $maxtries = int(($timeout + $wt -1)/$wt);
2652 my $try = 0;
2653 while (($try < $maxtries) && $count) {
2654 $try++;
2655 sleep $wt;
2656
2657 $vzlist = vzlist();
2658 $count = 0;
2659 foreach my $vmid (keys %$vzlist) {
2660 next if !$vzlist->{$vmid}->{pid};
2661 $count++;
2662 }
2663 last if !$count;
2664 }
2665
2666 return if !$count;
2667
2668 foreach my $vmid (keys %$vzlist) {
2669 next if !$vzlist->{$vmid}->{pid};
2670
2671 $msg = "VM $vmid still running - sending stop now\n";
2672 syslog('info', $msg);
2673 print $msg;
2674
2675 eval { vm_monitor_command($vmid, "quit", 1); };
2676 print STDERR $@ if $@;
2677
2678 }
2679
2680 $timeout = 30;
2681 $maxtries = int(($timeout + $wt -1)/$wt);
2682 $try = 0;
2683 while (($try < $maxtries) && $count) {
2684 $try++;
2685 sleep $wt;
2686
2687 $vzlist = vzlist();
2688 $count = 0;
2689 foreach my $vmid (keys %$vzlist) {
2690 next if !$vzlist->{$vmid}->{pid};
2691 $count++;
2692 }
2693 last if !$count;
2694 }
2695
2696 return if !$count;
2697
2698 foreach my $vmid (keys %$vzlist) {
2699 next if !$vzlist->{$vmid}->{pid};
2700
2701 $msg = "VM $vmid still running - terminating now with SIGTERM\n";
2702 syslog('info', $msg);
2703 print $msg;
2704 kill 15, $vzlist->{$vmid}->{pid};
2705 }
2706
2707 # this is called by system shotdown scripts, so remaining
2708 # processes gets killed anyways (no need to send kill -9 here)
2709
2710 $msg = "Qemu Server stopped\n";
2711 syslog('info', $msg);
2712 print STDERR $msg;
2713 }
2714}
2715
2716# pci helpers
2717
2718sub file_write {
2719 my ($filename, $buf) = @_;
2720
2721 my $fh = IO::File->new($filename, "w");
2722 return undef if !$fh;
2723
2724 my $res = print $fh $buf;
2725
2726 $fh->close();
2727
2728 return $res;
2729}
2730
2731sub pci_device_info {
2732 my ($name) = @_;
2733
2734 my $res;
2735
2736 return undef if $name !~ m/^([a-f0-9]{4}):([a-f0-9]{2}):([a-f0-9]{2})\.([a-f0-9])$/;
2737 my ($domain, $bus, $slot, $func) = ($1, $2, $3, $4);
2738
2739 my $irq = file_read_firstline("$pcisysfs/devices/$name/irq");
2740 return undef if !defined($irq) || $irq !~ m/^\d+$/;
2741
2742 my $vendor = file_read_firstline("$pcisysfs/devices/$name/vendor");
2743 return undef if !defined($vendor) || $vendor !~ s/^0x//;
2744
2745 my $product = file_read_firstline("$pcisysfs/devices/$name/device");
2746 return undef if !defined($product) || $product !~ s/^0x//;
2747
2748 $res = {
2749 name => $name,
2750 vendor => $vendor,
2751 product => $product,
2752 domain => $domain,
2753 bus => $bus,
2754 slot => $slot,
2755 func => $func,
2756 irq => $irq,
2757 has_fl_reset => -f "$pcisysfs/devices/$name/reset" || 0,
2758 };
2759
2760 return $res;
2761}
2762
2763sub pci_dev_reset {
2764 my ($dev) = @_;
2765
2766 my $name = $dev->{name};
2767
2768 my $fn = "$pcisysfs/devices/$name/reset";
2769
2770 return file_write($fn, "1");
2771}
2772
2773sub pci_dev_bind_to_stub {
2774 my ($dev) = @_;
2775
2776 my $name = $dev->{name};
2777
2778 my $testdir = "$pcisysfs/drivers/pci-stub/$name";
2779 return 1 if -d $testdir;
2780
2781 my $data = "$dev->{vendor} $dev->{product}";
2782 return undef if !file_write("$pcisysfs/drivers/pci-stub/new_id", $data);
2783
2784 my $fn = "$pcisysfs/devices/$name/driver/unbind";
2785 if (!file_write($fn, $name)) {
2786 return undef if -f $fn;
2787 }
2788
2789 $fn = "$pcisysfs/drivers/pci-stub/bind";
2790 if (! -d $testdir) {
2791 return undef if !file_write($fn, $name);
2792 }
2793
2794 return -d $testdir;
2795}
2796
2797sub print_pci_addr {
2798 my ($id) = @_;
2799
2800 my $res = '';
2801 my $devices = {
2802 virtio0 => { bus => 0, addr => 10 },
2803 virtio1 => { bus => 0, addr => 11 },
2804 virtio2 => { bus => 0, addr => 12 },
2805 virtio3 => { bus => 0, addr => 13 },
2806 virtio4 => { bus => 0, addr => 14 },
2807 virtio5 => { bus => 0, addr => 15 },
2808 };
2809
2810 if (defined($devices->{$id}->{bus}) && defined($devices->{$id}->{addr})) {
2811 my $addr = sprintf("0x%x", $devices->{$id}->{addr});
2812 $res = ",bus=pci.$devices->{$id}->{bus},addr=$addr";
2813 }
2814 return $res;
2815
2816}
2817
2818sub vm_balloonset {
2819 my ($vmid, $value) = @_;
2820
2821 vm_monitor_command($vmid, "balloon $value", 1);
2822}
2823
28241;