]> git.proxmox.com Git - qemu-server.git/blame - PVE/QemuServer.pm
do not allow to migrate VM which uses local resources
[qemu-server.git] / PVE / QemuServer.pm
CommitLineData
1e3baf05
DM
1package PVE::QemuServer;
2
3use strict;
4use POSIX;
5use IO::Handle;
6use IO::Select;
7use IO::File;
8use IO::Dir;
9use IO::Socket::UNIX;
10use File::Basename;
11use File::Path;
12use File::stat;
13use Getopt::Long;
14use Digest::SHA1;
15use Fcntl ':flock';
16use Cwd 'abs_path';
17use IPC::Open3;
18use Fcntl;
19use PVE::SafeSyslog;
20use Storable qw(dclone);
21use PVE::Exception qw(raise raise_param_exc);
22use PVE::Storage;
23use PVE::Tools qw(run_command lock_file file_read_firstline);
24use PVE::Cluster qw(cfs_register_file cfs_read_file cfs_write_file cfs_lock_file);
25use PVE::INotify;
26use PVE::ProcFSTools;
27use Time::HiRes qw (gettimeofday);
28
29my $clock_ticks = POSIX::sysconf(&POSIX::_SC_CLK_TCK);
30
31# Note about locking: we use flock on the config file protect
32# against concurent actions.
33# Aditionaly, we have a 'lock' setting in the config file. This
34# can be set to 'migrate' or 'backup'. Most actions are not
35# allowed when such lock is set. But you can ignore this kind of
36# lock with the --skiplock flag.
37
38cfs_register_file('/qemu-server/', \&parse_vm_config);
39
40#no warnings 'redefine';
41
42unless(defined(&_VZSYSCALLS_H_)) {
43 eval 'sub _VZSYSCALLS_H_ () {1;}' unless defined(&_VZSYSCALLS_H_);
44 require 'sys/syscall.ph';
45 if(defined(&__x86_64__)) {
46 eval 'sub __NR_fairsched_vcpus () {499;}' unless defined(&__NR_fairsched_vcpus);
47 eval 'sub __NR_fairsched_mknod () {504;}' unless defined(&__NR_fairsched_mknod);
48 eval 'sub __NR_fairsched_rmnod () {505;}' unless defined(&__NR_fairsched_rmnod);
49 eval 'sub __NR_fairsched_chwt () {506;}' unless defined(&__NR_fairsched_chwt);
50 eval 'sub __NR_fairsched_mvpr () {507;}' unless defined(&__NR_fairsched_mvpr);
51 eval 'sub __NR_fairsched_rate () {508;}' unless defined(&__NR_fairsched_rate);
52 eval 'sub __NR_setluid () {501;}' unless defined(&__NR_setluid);
53 eval 'sub __NR_setublimit () {502;}' unless defined(&__NR_setublimit);
54 }
55 elsif(defined( &__i386__) ) {
56 eval 'sub __NR_fairsched_mknod () {500;}' unless defined(&__NR_fairsched_mknod);
57 eval 'sub __NR_fairsched_rmnod () {501;}' unless defined(&__NR_fairsched_rmnod);
58 eval 'sub __NR_fairsched_chwt () {502;}' unless defined(&__NR_fairsched_chwt);
59 eval 'sub __NR_fairsched_mvpr () {503;}' unless defined(&__NR_fairsched_mvpr);
60 eval 'sub __NR_fairsched_rate () {504;}' unless defined(&__NR_fairsched_rate);
61 eval 'sub __NR_fairsched_vcpus () {505;}' unless defined(&__NR_fairsched_vcpus);
62 eval 'sub __NR_setluid () {511;}' unless defined(&__NR_setluid);
63 eval 'sub __NR_setublimit () {512;}' unless defined(&__NR_setublimit);
64 } else {
65 die("no fairsched syscall for this arch");
66 }
67 require 'asm/ioctl.ph';
68 eval 'sub KVM_GET_API_VERSION () { &_IO(0xAE, 0x);}' unless defined(&KVM_GET_API_VERSION);
69}
70
71sub fairsched_mknod {
72 my ($parent, $weight, $desired) = @_;
73
74 return syscall(&__NR_fairsched_mknod, int ($parent), int ($weight), int ($desired));
75}
76
77sub fairsched_rmnod {
78 my ($id) = @_;
79
80 return syscall(&__NR_fairsched_rmnod, int ($id));
81}
82
83sub fairsched_mvpr {
84 my ($pid, $newid) = @_;
85
86 return syscall(&__NR_fairsched_mvpr, int ($pid), int ($newid));
87}
88
89sub fairsched_vcpus {
90 my ($id, $vcpus) = @_;
91
92 return syscall(&__NR_fairsched_vcpus, int ($id), int ($vcpus));
93}
94
95sub fairsched_rate {
96 my ($id, $op, $rate) = @_;
97
98 return syscall(&__NR_fairsched_rate, int ($id), int ($op), int ($rate));
99}
100
101use constant FAIRSCHED_SET_RATE => 0;
102use constant FAIRSCHED_DROP_RATE => 1;
103use constant FAIRSCHED_GET_RATE => 2;
104
105sub fairsched_cpulimit {
106 my ($id, $limit) = @_;
107
108 my $cpulim1024 = int ($limit * 1024 / 100);
109 my $op = $cpulim1024 ? FAIRSCHED_SET_RATE : FAIRSCHED_DROP_RATE;
110
111 return fairsched_rate ($id, $op, $cpulim1024);
112}
113
114my $nodename = PVE::INotify::nodename();
115
116mkdir "/etc/pve/nodes/$nodename";
117my $confdir = "/etc/pve/nodes/$nodename/qemu-server";
118mkdir $confdir;
119
120my $var_run_tmpdir = "/var/run/qemu-server";
121mkdir $var_run_tmpdir;
122
123my $lock_dir = "/var/lock/qemu-server";
124mkdir $lock_dir;
125
126my $pcisysfs = "/sys/bus/pci";
127
128my $keymaphash = PVE::Tools::kvmkeymaps();
129
130my $confdesc = {
131 onboot => {
132 optional => 1,
133 type => 'boolean',
134 description => "Specifies whether a VM will be started during system bootup.",
135 default => 0,
136 },
137 autostart => {
138 optional => 1,
139 type => 'boolean',
140 description => "Automatic restart after crash (currently ignored).",
141 default => 0,
142 },
143 reboot => {
144 optional => 1,
145 type => 'boolean',
146 description => "Allow reboot. If set to '0' the VM exit on reboot.",
147 default => 1,
148 },
149 lock => {
150 optional => 1,
151 type => 'string',
152 description => "Lock/unlock the VM.",
153 enum => [qw(migrate backup)],
154 },
155 cpulimit => {
156 optional => 1,
157 type => 'integer',
158 description => "Limit of CPU usage in per cent. Note if the computer has 2 CPUs, it has total of 200% CPU time. Value '0' indicates no CPU limit.\n\nNOTE: This option is currently ignored.",
159 minimum => 0,
160 default => 0,
161 },
162 cpuunits => {
163 optional => 1,
164 type => 'integer',
165 description => "CPU weight for a VM. Argument is used in the kernel fair scheduler. The larger the number is, the more CPU time this VM gets. Number is relative to weights of all the other running VMs.\n\nNOTE: You can disable fair-scheduler configuration by setting this to 0.",
166 minimum => 0,
167 maximum => 500000,
168 default => 1000,
169 },
170 memory => {
171 optional => 1,
172 type => 'integer',
173 description => "Amount of RAM for the VM in MB.",
174 minimum => 16,
175 default => 512,
176 },
177 keyboard => {
178 optional => 1,
179 type => 'string',
180 description => "Keybord layout for vnc server. Default is read from the datacenter configuration file.",
181 enum => [ keys %$keymaphash ],
182 default => 'en-us',
183 },
184 name => {
185 optional => 1,
186 type => 'string',
187 description => "Set a name for the VM. Only used on the configuration web interface.",
188 },
189 description => {
190 optional => 1,
191 type => 'string',
192 description => "Description for the VM. Only used on the configuration web interface.",
193 },
194 ostype => {
195 optional => 1,
196 type => 'string',
197 enum => [qw(other wxp w2k w2k3 w2k8 wvista win7 l24 l26)],
198 description => <<EODESC,
199Used to enable special optimization/features for specific
200operating systems:
201
202other => unspecified OS
203wxp => Microsoft Windows XP
204w2k => Microsoft Windows 2000
205w2k3 => Microsoft Windows 2003
206w2k8 => Microsoft Windows 2008
207wvista => Microsoft Windows Vista
208win7 => Microsoft Windows 7
209l24 => Linux 2.4 Kernel
210l26 => Linux 2.6/3.X Kernel
211
212other|l24|l26 ... no special behaviour
213wxp|w2k|w2k3|w2k8|wvista|win7 ... use --localtime switch
214EODESC
215 },
216 boot => {
217 optional => 1,
218 type => 'string',
219 description => "Boot on floppy (a), hard disk (c), CD-ROM (d), or network (n).",
220 pattern => '[acdn]{1,4}',
221 default => 'cad',
222 },
223 bootdisk => {
224 optional => 1,
225 type => 'string', format => 'pve-qm-bootdisk',
226 description => "Enable booting from specified disk.",
227 pattern => '(ide|scsi|virtio)\d+',
228 },
229 smp => {
230 optional => 1,
231 type => 'integer',
232 description => "The number of CPUs. Please use option -sockets instead.",
233 minimum => 1,
234 default => 1,
235 },
236 sockets => {
237 optional => 1,
238 type => 'integer',
239 description => "The number of CPU sockets.",
240 minimum => 1,
241 default => 1,
242 },
243 cores => {
244 optional => 1,
245 type => 'integer',
246 description => "The number of cores per socket.",
247 minimum => 1,
248 default => 1,
249 },
250 acpi => {
251 optional => 1,
252 type => 'boolean',
253 description => "Enable/disable ACPI.",
254 default => 1,
255 },
256 kvm => {
257 optional => 1,
258 type => 'boolean',
259 description => "Enable/disable KVM hardware virtualization.",
260 default => 1,
261 },
262 tdf => {
263 optional => 1,
264 type => 'boolean',
265 description => "Enable/disable time drift fix.",
266 default => 1,
267 },
268 localtime => {
269 optional => 1,
270 type => 'boolean',
271 description => "Set the real time clock to local time. This is enabled by default if ostype indicates a Microsoft OS.",
272 },
273 freeze => {
274 optional => 1,
275 type => 'boolean',
276 description => "Freeze CPU at startup (use 'c' monitor command to start execution).",
277 },
278 vga => {
279 optional => 1,
280 type => 'string',
281 description => "Select VGA type. If you want to use high resolution modes (>= 1280x1024x16) then you should use option 'std' or 'vmware'. Default is 'std' for win7/w2k8, and 'cirrur' for other OS types",
282 enum => [qw(std cirrus vmware)],
283 },
0ea9541d
DM
284 watchdog => {
285 optional => 1,
286 type => 'string', format => 'pve-qm-watchdog',
287 typetext => '[[model=]i6300esb|ib700] [,[action=]reset|shutdown|poweroff|pause|debug|none]',
288 description => "Create a virtual hardware watchdog device. Once enabled (by a guest action), the watchdog must be periodically polled by an agent inside the guest or else the guest will be restarted (or execute the action specified)",
289 },
1e3baf05
DM
290 startdate => {
291 optional => 1,
292 type => 'string',
293 typetext => "(now | YYYY-MM-DD | YYYY-MM-DDTHH:MM:SS)",
294 description => "Set the initial date of the real time clock. Valid format for date are: 'now' or '2006-06-17T16:01:21' or '2006-06-17'.",
295 pattern => '(now|\d{4}-\d{1,2}-\d{1,2}(T\d{1,2}:\d{1,2}:\d{1,2})?)',
296 default => 'now',
297 },
298 args => {
299 optional => 1,
300 type => 'string',
301 description => <<EODESCR,
302Note: this option is for experts only. It allows you to pass arbitrary arguments to kvm, for example:
303
304args: -no-reboot -no-hpet
305EODESCR
306 },
307 tablet => {
308 optional => 1,
309 type => 'boolean',
310 default => 1,
311 description => "Enable/disable the usb tablet device. This device is usually needed to allow absolute mouse positioning. Else the mouse runs out of sync with normal vnc clients. If you're running lots of console-only guests on one host, you may consider disabling this to save some context switches.",
312 },
313 migrate_speed => {
314 optional => 1,
315 type => 'integer',
316 description => "Set maximum speed (in MB/s) for migrations. Value 0 is no limit.",
317 minimum => 0,
318 default => 0,
319 },
320 migrate_downtime => {
321 optional => 1,
322 type => 'integer',
323 description => "Set maximum tolerated downtime (in seconds) for migrations.",
324 minimum => 0,
325 default => 1,
326 },
327 cdrom => {
328 optional => 1,
329 type => 'string', format => 'pve-qm-drive',
330 typetext => 'volume',
331 description => "This is an alias for option -ide2",
332 },
333 cpu => {
334 optional => 1,
335 description => "Emulated CPU type.",
336 type => 'string',
337 enum => [ qw(486 athlon pentium pentium2 pentium3 coreduo core2duo kvm32 kvm64 qemu32 qemu64 phenom host) ],
338 default => 'qemu64',
339 },
340};
341
342# what about other qemu settings ?
343#cpu => 'string',
344#machine => 'string',
345#fda => 'file',
346#fdb => 'file',
347#mtdblock => 'file',
348#sd => 'file',
349#pflash => 'file',
350#snapshot => 'bool',
351#bootp => 'file',
352##tftp => 'dir',
353##smb => 'dir',
354#kernel => 'file',
355#append => 'string',
356#initrd => 'file',
357##soundhw => 'string',
358
359while (my ($k, $v) = each %$confdesc) {
360 PVE::JSONSchema::register_standard_option("pve-qm-$k", $v);
361}
362
363my $MAX_IDE_DISKS = 4;
f62db2a4
DA
364my $MAX_SCSI_DISKS = 14;
365my $MAX_VIRTIO_DISKS = 6;
1e3baf05 366my $MAX_USB_DEVICES = 5;
f62db2a4 367my $MAX_NETS = 6;
1e3baf05 368my $MAX_UNUSED_DISKS = 8;
040b06b7 369my $MAX_HOSTPCI_DEVICES = 2;
bae179aa 370my $MAX_SERIAL_PORTS = 4;
1989a89c 371my $MAX_PARALLEL_PORTS = 3;
1e3baf05
DM
372
373my $nic_model_list = ['rtl8139', 'ne2k_pci', 'e1000', 'pcnet', 'virtio',
374 'ne2k_isa', 'i82551', 'i82557b', 'i82559er'];
375my $nic_model_list_txt = join (' ', sort @$nic_model_list);
376
377# fixme:
378my $netdesc = {
379 optional => 1,
380 type => 'string', format => 'pve-qm-net',
381 typetext => "MODEL=XX:XX:XX:XX:XX:XX [,bridge=<dev>][,rate=<mbps>]",
382 description => <<EODESCR,
383Specify network devices.
384
385MODEL is one of: $nic_model_list_txt
386
387XX:XX:XX:XX:XX:XX should be an unique MAC address. This is
388automatically generated if not specified.
389
390The bridge parameter can be used to automatically add the interface to a bridge device. The Proxmox VE standard bridge is called 'vmbr0'.
391
392Option 'rate' is used to limit traffic bandwidth from and to this interface. It is specified as floating point number, unit is 'Megabytes per second'.
393
394If you specify no bridge, we create a kvm 'user' (NATed) network device, which provides DHCP and DNS services. The following addresses are used:
395
39610.0.2.2 Gateway
39710.0.2.3 DNS Server
39810.0.2.4 SMB Server
399
400The DHCP server assign addresses to the guest starting from 10.0.2.15.
401
402EODESCR
403};
404PVE::JSONSchema::register_standard_option("pve-qm-net", $netdesc);
405
406for (my $i = 0; $i < $MAX_NETS; $i++) {
407 $confdesc->{"net$i"} = $netdesc;
408}
409
410my $drivename_hash;
411
412my $idedesc = {
413 optional => 1,
414 type => 'string', format => 'pve-qm-drive',
415 typetext => '[volume=]volume,] [,media=cdrom|disk] [,cyls=c,heads=h,secs=s[,trans=t]] [,snapshot=on|off] [,cache=none|writethrough|writeback] [,format=f] [,backup=yes|no] [,aio=native|threads]',
416 description => "Use volume as IDE hard disk or CD-ROM (n is 0 to 3).",
417};
418PVE::JSONSchema::register_standard_option("pve-qm-ide", $idedesc);
419
420my $scsidesc = {
421 optional => 1,
422 type => 'string', format => 'pve-qm-drive',
423 typetext => '[volume=]volume,] [,media=cdrom|disk] [,cyls=c,heads=h,secs=s[,trans=t]] [,snapshot=on|off] [,cache=none|writethrough|writeback] [,format=f] [,backup=yes|no] [,aio=native|threads]',
2fe1a152 424 description => "Use volume as SCSI hard disk or CD-ROM (n is 0 to 13).",
1e3baf05
DM
425};
426PVE::JSONSchema::register_standard_option("pve-qm-scsi", $scsidesc);
427
428my $virtiodesc = {
429 optional => 1,
430 type => 'string', format => 'pve-qm-drive',
431 typetext => '[volume=]volume,] [,media=cdrom|disk] [,cyls=c,heads=h,secs=s[,trans=t]] [,snapshot=on|off] [,cache=none|writethrough|writeback] [,format=f] [,backup=yes|no] [,aio=native|threads]',
2fe1a152 432 description => "Use volume as VIRTIO hard disk (n is 0 to 5).",
1e3baf05
DM
433};
434PVE::JSONSchema::register_standard_option("pve-qm-virtio", $virtiodesc);
435
436my $usbdesc = {
437 optional => 1,
438 type => 'string', format => 'pve-qm-usb-device',
439 typetext => 'host=HOSTUSBDEVICE',
440 description => <<EODESCR,
2fe1a152 441Configure an USB device (n is 0 to 4). This can be used to
1e3baf05
DM
442pass-through usb devices to the guest. HOSTUSBDEVICE syntax is:
443
444'bus-port(.port)*' (decimal numbers) or
445'vendor_id:product_id' (hexadeciaml numbers)
446
447You can use the 'lsusb -t' command to list existing usb devices.
448
449Note: This option allows direct access to host hardware. So it is no longer possible to migrate such machines - use with special care.
450
451EODESCR
452};
453PVE::JSONSchema::register_standard_option("pve-qm-usb", $usbdesc);
454
040b06b7
DA
455my $hostpcidesc = {
456 optional => 1,
457 type => 'string', format => 'pve-qm-hostpci',
458 typetext => "HOSTPCIDEVICE",
459 description => <<EODESCR,
460Map host pci devices. HOSTPCIDEVICE syntax is:
461
462'bus:dev.func' (hexadecimal numbers)
463
464You can us the 'lspci' command to list existing pci devices.
465
466Note: This option allows direct access to host hardware. So it is no longer possible to migrate such machines - use with special care.
467
468Experimental: user reported problems with this option.
469EODESCR
470};
471PVE::JSONSchema::register_standard_option("pve-qm-hostpci", $hostpcidesc);
472
bae179aa
DA
473my $serialdesc = {
474 optional => 1,
ca0cef26 475 type => 'string',
2fe1a152 476 pattern => '/dev/ttyS\d+',
bae179aa 477 description => <<EODESCR,
2fe1a152 478Map host serial devices (n is 0 to 3).
bae179aa
DA
479
480Note: This option allows direct access to host hardware. So it is no longer possible to migrate such machines - use with special care.
481
482Experimental: user reported problems with this option.
483EODESCR
484};
bae179aa 485
1989a89c
DA
486my $paralleldesc= {
487 optional => 1,
ca0cef26 488 type => 'string',
2fe1a152 489 pattern => '/dev/parport\d+',
1989a89c 490 description => <<EODESCR,
2fe1a152 491Map host parallel devices (n is 0 to 2).
1989a89c
DA
492
493Note: This option allows direct access to host hardware. So it is no longer possible to migrate such machines - use with special care.
494
495Experimental: user reported problems with this option.
496EODESCR
497};
1989a89c
DA
498
499for (my $i = 0; $i < $MAX_PARALLEL_PORTS; $i++) {
500 $confdesc->{"parallel$i"} = $paralleldesc;
501}
502
bae179aa
DA
503for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
504 $confdesc->{"serial$i"} = $serialdesc;
505}
506
040b06b7
DA
507for (my $i = 0; $i < $MAX_HOSTPCI_DEVICES; $i++) {
508 $confdesc->{"hostpci$i"} = $hostpcidesc;
509}
1e3baf05
DM
510
511for (my $i = 0; $i < $MAX_IDE_DISKS; $i++) {
512 $drivename_hash->{"ide$i"} = 1;
513 $confdesc->{"ide$i"} = $idedesc;
514}
515
516for (my $i = 0; $i < $MAX_SCSI_DISKS; $i++) {
517 $drivename_hash->{"scsi$i"} = 1;
518 $confdesc->{"scsi$i"} = $scsidesc ;
519}
520
521for (my $i = 0; $i < $MAX_VIRTIO_DISKS; $i++) {
522 $drivename_hash->{"virtio$i"} = 1;
523 $confdesc->{"virtio$i"} = $virtiodesc;
524}
525
526for (my $i = 0; $i < $MAX_USB_DEVICES; $i++) {
527 $confdesc->{"usb$i"} = $usbdesc;
528}
529
530my $unuseddesc = {
531 optional => 1,
532 type => 'string', format => 'pve-volume-id',
533 description => "Reference to unused volumes.",
534};
535
536for (my $i = 0; $i < $MAX_UNUSED_DISKS; $i++) {
537 $confdesc->{"unused$i"} = $unuseddesc;
538}
539
540my $kvm_api_version = 0;
541
542sub kvm_version {
543
544 return $kvm_api_version if $kvm_api_version;
545
546 my $fh = IO::File->new ("</dev/kvm") ||
547 return 0;
548
549 if (my $v = $fh->ioctl (KVM_GET_API_VERSION(), 0)) {
550 $kvm_api_version = $v;
551 }
552
553 $fh->close();
554
555 return $kvm_api_version;
556}
557
558my $kvm_user_version;
559
560sub kvm_user_version {
561
562 return $kvm_user_version if $kvm_user_version;
563
564 $kvm_user_version = 'unknown';
565
566 my $tmp = `kvm -help 2>/dev/null`;
567
568 if ($tmp =~ m/^QEMU( PC)? emulator version (\d+\.\d+\.\d+) /) {
569 $kvm_user_version = $2;
570 }
571
572 return $kvm_user_version;
573
574}
575
576my $kernel_has_vhost_net = -c '/dev/vhost-net';
577
578sub disknames {
579 # order is important - used to autoselect boot disk
580 return ((map { "ide$_" } (0 .. ($MAX_IDE_DISKS - 1))),
581 (map { "scsi$_" } (0 .. ($MAX_SCSI_DISKS - 1))),
582 (map { "virtio$_" } (0 .. ($MAX_VIRTIO_DISKS - 1))));
583}
584
585sub valid_drivename {
586 my $dev = shift;
587
588 return defined ($drivename_hash->{$dev});
589}
590
591sub option_exists {
592 my $key = shift;
593 return defined($confdesc->{$key});
594}
595
596sub nic_models {
597 return $nic_model_list;
598}
599
600sub os_list_description {
601
602 return {
603 other => 'Other',
604 wxp => 'Windows XP',
605 w2k => 'Windows 2000',
606 w2k3 =>, 'Windows 2003',
607 w2k8 => 'Windows 2008',
608 wvista => 'Windows Vista',
609 win7 => 'Windows 7',
610 l24 => 'Linux 2.4',
611 l26 => 'Linux 2.6',
612 };
613}
614
615# a clumsy way to split an argument string into an array,
616# we simply pass it to the cli (exec call)
617# fixme: use Text::ParseWords::shellwords() ?
618sub split_args {
619 my ($str) = @_;
620
621 my $args = [];
622
623 return $args if !$str;
624
625 my $cmd = 'perl -e \'foreach my $a (@ARGV) { print "$a\n"; } \' -- ' . $str;
626
627 eval {
628 run_command ($cmd, outfunc => sub {
629 my $data = shift;
630 push @$args, $data;
631 });
632 };
633
634 my $err = $@;
635
636 die "unable to parse args: $str\n" if $err;
637
638 return $args;
639}
640
641sub disk_devive_info {
642 my $dev = shift;
643
644 die "unknown disk device format '$dev'" if $dev !~ m/^(ide|scsi|virtio)(\d+)$/;
645
646 my $bus = $1;
647 my $index = $2;
648 my $maxdev = 1024;
649
650 if ($bus eq 'ide') {
651 $maxdev = 2;
652 } elsif ($bus eq 'scsi') {
f62db2a4 653 $maxdev = 7;
1e3baf05
DM
654 }
655
656 my $controller = int ($index / $maxdev);
657 my $unit = $index % $maxdev;
658
659
660 return { bus => $bus, desc => uc($bus) . " $controller:$unit",
661 controller => $controller, unit => $unit, index => $index };
662
663}
664
665sub qemu_drive_name {
666 my ($dev, $media) = @_;
667
668 my $info = disk_devive_info ($dev);
669 my $mediastr = '';
670
671 if (($info->{bus} eq 'ide') || ($info->{bus} eq 'scsi')) {
672 $mediastr = ($media eq 'cdrom') ? "-cd" : "-hd";
673 return sprintf("%s%i%s%i", $info->{bus}, $info->{controller},
674 $mediastr, $info->{unit});
675 } else {
676 return sprintf("%s%i", $info->{bus}, $info->{index});
677 }
678}
679
680my $cdrom_path;
681
682sub get_cdrom_path {
683
684 return $cdrom_path if $cdrom_path;
685
686 return $cdrom_path = "/dev/cdrom" if -l "/dev/cdrom";
687 return $cdrom_path = "/dev/cdrom1" if -l "/dev/cdrom1";
688 return $cdrom_path = "/dev/cdrom2" if -l "/dev/cdrom2";
689}
690
691sub get_iso_path {
692 my ($storecfg, $vmid, $cdrom) = @_;
693
694 if ($cdrom eq 'cdrom') {
695 return get_cdrom_path();
696 } elsif ($cdrom eq 'none') {
697 return '';
698 } elsif ($cdrom =~ m|^/|) {
699 return $cdrom;
700 } else {
701 return PVE::Storage::path ($storecfg, $cdrom);
702 }
703}
704
705# try to convert old style file names to volume IDs
706sub filename_to_volume_id {
707 my ($vmid, $file, $media) = @_;
708
709 if (!($file eq 'none' || $file eq 'cdrom' ||
710 $file =~ m|^/dev/.+| || $file =~ m/^([^:]+):(.+)$/)) {
711
712 return undef if $file =~ m|/|;
713
714 if ($media && $media eq 'cdrom') {
715 $file = "local:iso/$file";
716 } else {
717 $file = "local:$vmid/$file";
718 }
719 }
720
721 return $file;
722}
723
724sub verify_media_type {
725 my ($opt, $vtype, $media) = @_;
726
727 return if !$media;
728
729 my $etype;
730 if ($media eq 'disk') {
731 $etype = 'image';
732 } elsif ($media eq 'cdrom') {
733 $etype = 'iso';
734 } else {
735 die "internal error";
736 }
737
738 return if ($vtype eq $etype);
739
740 raise_param_exc({ $opt => "unexpected media type ($vtype != $etype)" });
741}
742
743sub cleanup_drive_path {
744 my ($opt, $storecfg, $drive) = @_;
745
746 # try to convert filesystem paths to volume IDs
747
748 if (($drive->{file} !~ m/^(cdrom|none)$/) &&
749 ($drive->{file} !~ m|^/dev/.+|) &&
750 ($drive->{file} !~ m/^([^:]+):(.+)$/) &&
751 ($drive->{file} !~ m/^\d+$/)) {
752 my ($vtype, $volid) = PVE::Storage::path_to_volume_id($storecfg, $drive->{file});
753 raise_param_exc({ $opt => "unable to associate path '$drive->{file}' to any storage"}) if !$vtype;
754 $drive->{media} = 'cdrom' if !$drive->{media} && $vtype eq 'iso';
755 verify_media_type($opt, $vtype, $drive->{media});
756 $drive->{file} = $volid;
757 }
758
759 $drive->{media} = 'cdrom' if !$drive->{media} && $drive->{file} =~ m/^(cdrom|none)$/;
760}
761
762sub create_conf_nolock {
763 my ($vmid, $settings) = @_;
764
765 my $filename = config_file ($vmid);
766
767 die "configuration file '$filename' already exists\n" if -f $filename;
768
769 my $defaults = load_defaults();
770
771 $settings->{name} = "vm$vmid" if !$settings->{name};
772 $settings->{memory} = $defaults->{memory} if !$settings->{memory};
773
774 my $data = '';
775 foreach my $opt (keys %$settings) {
776 next if !$confdesc->{$opt};
777
778 my $value = $settings->{$opt};
779 next if !$value;
780
781 $data .= "$opt: $value\n";
782 }
783
784 PVE::Tools::file_set_contents($filename, $data);
785}
786
787# ideX = [volume=]volume-id[,media=d][,cyls=c,heads=h,secs=s[,trans=t]]
788# [,snapshot=on|off][,cache=on|off][,format=f][,backup=yes|no]
789# [,aio=native|threads]
790
791sub parse_drive {
792 my ($key, $data) = @_;
793
794 my $res = {};
795
796 # $key may be undefined - used to verify JSON parameters
797 if (!defined($key)) {
798 $res->{interface} = 'unknown'; # should not harm when used to verify parameters
799 $res->{index} = 0;
800 } elsif ($key =~ m/^([^\d]+)(\d+)$/) {
801 $res->{interface} = $1;
802 $res->{index} = $2;
803 } else {
804 return undef;
805 }
806
807 foreach my $p (split (/,/, $data)) {
808 next if $p =~ m/^\s*$/;
809
810 if ($p =~ m/^(file|volume|cyls|heads|secs|trans|media|snapshot|cache|format|rerror|werror|backup|aio)=(.+)$/) {
811 my ($k, $v) = ($1, $2);
812
813 $k = 'file' if $k eq 'volume';
814
815 return undef if defined $res->{$k};
816
817 $res->{$k} = $v;
818 } else {
819 if (!$res->{file} && $p !~ m/=/) {
820 $res->{file} = $p;
821 } else {
822 return undef;
823 }
824 }
825 }
826
827 return undef if !$res->{file};
828
829 return undef if $res->{cache} &&
830 $res->{cache} !~ m/^(off|none|writethrough|writeback)$/;
831 return undef if $res->{snapshot} && $res->{snapshot} !~ m/^(on|off)$/;
832 return undef if $res->{cyls} && $res->{cyls} !~ m/^\d+$/;
833 return undef if $res->{heads} && $res->{heads} !~ m/^\d+$/;
834 return undef if $res->{secs} && $res->{secs} !~ m/^\d+$/;
835 return undef if $res->{media} && $res->{media} !~ m/^(disk|cdrom)$/;
836 return undef if $res->{trans} && $res->{trans} !~ m/^(none|lba|auto)$/;
837 return undef if $res->{format} && $res->{format} !~ m/^(raw|cow|qcow|qcow2|vmdk|cloop)$/;
838 return undef if $res->{rerror} && $res->{rerror} !~ m/^(ignore|report|stop)$/;
839 return undef if $res->{werror} && $res->{werror} !~ m/^(enospc|ignore|report|stop)$/;
840 return undef if $res->{backup} && $res->{backup} !~ m/^(yes|no)$/;
841 return undef if $res->{aio} && $res->{aio} !~ m/^(native|threads)$/;
842
843 if ($res->{media} && ($res->{media} eq 'cdrom')) {
844 return undef if $res->{snapshot} || $res->{trans} || $res->{format};
845 return undef if $res->{heads} || $res->{secs} || $res->{cyls};
846 return undef if $res->{interface} eq 'virtio';
847 }
848
849 # rerror does not work with scsi drives
850 if ($res->{rerror}) {
851 return undef if $res->{interface} eq 'scsi';
852 }
853
854 return $res;
855}
856
857my @qemu_drive_options = qw(heads secs cyls trans media format cache snapshot rerror werror aio);
858
859sub print_drive {
860 my ($vmid, $drive) = @_;
861
862 my $opts = '';
863 foreach my $o (@qemu_drive_options, 'backup') {
864 $opts .= ",$o=$drive->{$o}" if $drive->{$o};
865 }
866
867 return "$drive->{file}$opts";
868}
869
ca916ecc
DA
870sub print_drivedevice_full {
871 my ($storecfg, $vmid, $drive) = @_;
872
873 my $device = '';
874 my $maxdev = 0;
875
876 if ($drive->{interface} eq 'virtio') {
877
878 $device="virtio-blk-pci,drive=drive-$drive->{interface}$drive->{index},id=device-$drive->{interface}$drive->{index}";
879 }
880
881 elsif ($drive->{interface} eq 'scsi') {
882
883 $maxdev = 7;
884 my $controller = int ($drive->{index} / $maxdev);
885 my $unit = $drive->{index} % $maxdev;
886
887 $device="scsi-disk,bus=scsi$controller.0,scsi-id=$unit,drive=drive-$drive->{interface}$drive->{index},id=device-$drive->{interface}$drive->{index}";
888 }
889
890 elsif ($drive->{interface} eq 'ide'){
891
892 $maxdev = 2;
893 my $controller = int ($drive->{index} / $maxdev);
894 my $unit = $drive->{index} % $maxdev;
895
896 $device="ide-drive,bus=ide.$controller,unit=$unit,drive=drive-$drive->{interface}$drive->{index},id=device-$drive->{interface}$drive->{index}";
897 }
898
899 if ($drive->{interface} eq 'usb'){
900 # -device ide-drive,bus=ide.1,unit=0,drive=drive-ide0-1-0,id=ide0-1-0
901 }
902
903 return $device;
904}
905
1e3baf05
DM
906sub print_drive_full {
907 my ($storecfg, $vmid, $drive) = @_;
908
909 my $opts = '';
910 foreach my $o (@qemu_drive_options) {
911 $opts .= ",$o=$drive->{$o}" if $drive->{$o};
912 }
913
914 # use linux-aio by default (qemu default is threads)
915 $opts .= ",aio=native" if !$drive->{aio};
916
917 my $path;
918 my $volid = $drive->{file};
919 if (drive_is_cdrom ($drive)) {
920 $path = get_iso_path ($storecfg, $vmid, $volid);
921 } else {
922 if ($volid =~ m|^/|) {
923 $path = $volid;
924 } else {
925 $path = PVE::Storage::path ($storecfg, $volid);
926 }
927 }
928
929 my $pathinfo = $path ? "file=$path," : '';
930
3ebfcc86 931 return "${pathinfo}if=none,id=drive-$drive->{interface}$drive->{index}$opts";
1e3baf05
DM
932}
933
934
935sub drive_is_cdrom {
936 my ($drive) = @_;
937
938 return $drive && $drive->{media} && ($drive->{media} eq 'cdrom');
939
940}
941
040b06b7
DA
942sub parse_hostpci {
943 my ($value) = @_;
944
945 return undef if !$value;
946
947 my $res = {};
948
949 if ($value =~ m/^[a-f0-9]{2}:[a-f0-9]{2}\.[a-f0-9]$/) {
950 $res->{pciid} = $value;
951 } else {
952 return undef;
953 }
954
955 return $res;
956}
957
1e3baf05
DM
958# netX: e1000=XX:XX:XX:XX:XX:XX,bridge=vmbr0,rate=<mbps>
959sub parse_net {
960 my ($data) = @_;
961
962 my $res = {};
963
964 foreach my $kvp (split (/,/, $data)) {
965
966 if ($kvp =~ m/^(ne2k_pci|e1000|rtl8139|pcnet|virtio|ne2k_isa|i82551|i82557b|i82559er)(=([0-9a-f]{2}(:[0-9a-f]{2}){5}))?$/i) {
967 my $model = lc ($1);
968 my $mac = uc($3) || random_ether_addr ();
969 $res->{model} = $model;
970 $res->{macaddr} = $mac;
971 } elsif ($kvp =~ m/^bridge=(\S+)$/) {
972 $res->{bridge} = $1;
973 } elsif ($kvp =~ m/^rate=(\d+(\.\d+)?)$/) {
974 $res->{rate} = $1;
975 } else {
976 return undef;
977 }
978
979 }
980
981 return undef if !$res->{model};
982
983 return $res;
984}
985
986sub print_net {
987 my $net = shift;
988
989 my $res = "$net->{model}";
990 $res .= "=$net->{macaddr}" if $net->{macaddr};
991 $res .= ",bridge=$net->{bridge}" if $net->{bridge};
992 $res .= ",rate=$net->{rate}" if $net->{rate};
993
994 return $res;
995}
996
997sub add_random_macs {
998 my ($settings) = @_;
999
1000 foreach my $opt (keys %$settings) {
1001 next if $opt !~ m/^net(\d+)$/;
1002 my $net = parse_net($settings->{$opt});
1003 next if !$net;
1004 $settings->{$opt} = print_net($net);
1005 }
1006}
1007
1008sub add_unused_volume {
1009 my ($config, $res, $volid) = @_;
1010
1011 my $key;
1012 for (my $ind = $MAX_UNUSED_DISKS - 1; $ind >= 0; $ind--) {
1013 my $test = "unused$ind";
1014 if (my $vid = $config->{$test}) {
1015 return if $vid eq $volid; # do not add duplicates
1016 } else {
1017 $key = $test;
1018 }
1019 }
1020
1021 die "To many unused volume - please delete them first.\n" if !$key;
1022
1023 $res->{$key} = $volid;
1024}
1025
1026# fixme: remove all thos $noerr parameters?
1027
1028PVE::JSONSchema::register_format('pve-qm-bootdisk', \&verify_bootdisk);
1029sub verify_bootdisk {
1030 my ($value, $noerr) = @_;
1031
1032 return $value if valid_drivename($value);
1033
1034 return undef if $noerr;
1035
1036 die "invalid boot disk '$value'\n";
1037}
1038
1039PVE::JSONSchema::register_format('pve-qm-net', \&verify_net);
1040sub verify_net {
1041 my ($value, $noerr) = @_;
1042
1043 return $value if parse_net($value);
1044
1045 return undef if $noerr;
1046
1047 die "unable to parse network options\n";
1048}
1049
1050PVE::JSONSchema::register_format('pve-qm-drive', \&verify_drive);
1051sub verify_drive {
1052 my ($value, $noerr) = @_;
1053
1054 return $value if parse_drive (undef, $value);
1055
1056 return undef if $noerr;
1057
1058 die "unable to parse drive options\n";
1059}
1060
1061PVE::JSONSchema::register_format('pve-qm-hostpci', \&verify_hostpci);
1062sub verify_hostpci {
1063 my ($value, $noerr) = @_;
1064
040b06b7
DA
1065 return $value if parse_hostpci($value);
1066
1067 return undef if $noerr;
1068
1069 die "unable to parse pci id\n";
1e3baf05
DM
1070}
1071
0ea9541d
DM
1072PVE::JSONSchema::register_format('pve-qm-watchdog', \&verify_watchdog);
1073sub verify_watchdog {
1074 my ($value, $noerr) = @_;
1075
1076 return $value if parse_watchdog($value);
1077
1078 return undef if $noerr;
1079
1080 die "unable to parse watchdog options\n";
1081}
1082
1083sub parse_watchdog {
1084 my ($value) = @_;
1085
1086 return undef if !$value;
1087
1088 my $res = {};
1089
1090 foreach my $p (split (/,/, $value)) {
1091 next if $p =~ m/^\s*$/;
1092
1093 if ($p =~ m/^(model=)?(i6300esb|ib700)$/) {
1094 $res->{model} = $2;
1095 } elsif ($p =~ m/^(action=)?(reset|shutdown|poweroff|pause|debug|none)$/) {
1096 $res->{action} = $2;
1097 } else {
1098 return undef;
1099 }
1100 }
1101
1102 return $res;
1103}
1104
1e3baf05
DM
1105sub parse_usb_device {
1106 my ($value) = @_;
1107
1108 return undef if !$value;
1109
1110 my @dl = split (/,/, $value);
1111 my $found;
1112
1113 my $res = {};
1114 foreach my $v (@dl) {
1115 if ($v =~ m/^host=([0-9A-Fa-f]{4}):([0-9A-Fa-f]{4})$/) {
1116 $found = 1;
1117 $res->{vendorid} = $1;
1118 $res->{productid} = $2;
1119 } elsif ($v =~ m/^host=(\d+)\-(\d+(\.\d+)*)$/) {
1120 $found = 1;
1121 $res->{hostbus} = $1;
1122 $res->{hostport} = $2;
1123 } else {
1124 return undef;
1125 }
1126 }
1127 return undef if !$found;
1128
1129 return $res;
1130}
1131
1132PVE::JSONSchema::register_format('pve-qm-usb-device', \&verify_usb_device);
1133sub verify_usb_device {
1134 my ($value, $noerr) = @_;
1135
1136 return $value if parse_usb_device($value);
1137
1138 return undef if $noerr;
1139
1140 die "unable to parse usb device\n";
1141}
1142
1e3baf05
DM
1143# add JSON properties for create and set function
1144sub json_config_properties {
1145 my $prop = shift;
1146
1147 foreach my $opt (keys %$confdesc) {
1148 $prop->{$opt} = $confdesc->{$opt};
1149 }
1150
1151 return $prop;
1152}
1153
1154sub check_type {
1155 my ($key, $value) = @_;
1156
1157 die "unknown setting '$key'\n" if !$confdesc->{$key};
1158
1159 my $type = $confdesc->{$key}->{type};
1160
1161 if (!defined ($value)) {
1162 die "got undefined value\n";
1163 }
1164
1165 if ($value =~ m/[\n\r]/) {
1166 die "property contains a line feed\n";
1167 }
1168
1169 if ($type eq 'boolean') {
1170 return 1 if ($value eq '1') || ($value =~ m/^(on|yes|true)$/i);
1171 return 0 if ($value eq '0') || ($value =~ m/^(off|no|false)$/i);
1172 die "type check ('boolean') failed - got '$value'\n";
1173 } elsif ($type eq 'integer') {
1174 return int($1) if $value =~ m/^(\d+)$/;
1175 die "type check ('integer') failed - got '$value'\n";
1176 } elsif ($type eq 'string') {
1177 if (my $fmt = $confdesc->{$key}->{format}) {
1178 if ($fmt eq 'pve-qm-drive') {
1179 # special case - we need to pass $key to parse_drive()
1180 my $drive = parse_drive ($key, $value);
1181 return $value if $drive;
1182 die "unable to parse drive options\n";
1183 }
1184 PVE::JSONSchema::check_format($fmt, $value);
1185 return $value;
1186 }
1187 $value =~ s/^\"(.*)\"$/$1/;
1188 return $value;
1189 } else {
1190 die "internal error"
1191 }
1192}
1193
1194sub lock_config {
1195 my ($vmid, $code, @param) = @_;
1196
1197 my $filename = config_file_lock ($vmid);
1198
1199 lock_file($filename, 10, $code, @param);
1200
1201 die $@ if $@;
1202}
1203
1204sub cfs_config_path {
a78ccf26 1205 my ($vmid, $node) = @_;
1e3baf05 1206
a78ccf26
DM
1207 $node = $nodename if !$node;
1208 return "nodes/$node/qemu-server/$vmid.conf";
1e3baf05
DM
1209}
1210
040b06b7
DA
1211sub check_iommu_support{
1212 #fixme : need to check IOMMU support
1213 #http://www.linux-kvm.org/page/How_to_assign_devices_with_VT-d_in_KVM
1214
1215 my $iommu=1;
1216 return $iommu;
1217
1218}
1219
1e3baf05 1220sub config_file {
a78ccf26 1221 my ($vmid, $node) = @_;
1e3baf05 1222
a78ccf26 1223 my $cfspath = cfs_config_path($vmid, $node);
1e3baf05
DM
1224 return "/etc/pve/$cfspath";
1225}
1226
1227sub config_file_lock {
1228 my ($vmid) = @_;
1229
1230 return "$lock_dir/lock-$vmid.conf";
1231}
1232
1233sub touch_config {
1234 my ($vmid) = @_;
1235
1236 my $conf = config_file ($vmid);
1237 utime undef, undef, $conf;
1238}
1239
1240sub create_disks {
1241 my ($storecfg, $vmid, $settings) = @_;
1242
1243 my $vollist = [];
1244
1245 eval {
1246 foreach_drive($settings, sub {
1247 my ($ds, $disk) = @_;
1248
1249 return if drive_is_cdrom ($disk);
1250
1251 my $file = $disk->{file};
1252
1253 if ($file =~ m/^(([^:\s]+):)?(\d+(\.\d+)?)$/) {
1254 my $storeid = $2 || 'local';
1255 my $size = $3;
1256 my $defformat = PVE::Storage::storage_default_format ($storecfg, $storeid);
1257 my $fmt = $disk->{format} || $defformat;
1258 syslog ('info', "VM $vmid creating new disk - size is $size GB");
1259
1260 my $volid = PVE::Storage::vdisk_alloc ($storecfg, $storeid, $vmid,
1261 $fmt, undef, $size*1024*1024);
1262
1263 $disk->{file} = $volid;
1264 delete ($disk->{format}); # no longer needed
1265 push @$vollist, $volid;
1266 $settings->{$ds} = PVE::QemuServer::print_drive ($vmid, $disk);
1267 } else {
1268 my $path;
1269 if ($disk->{file} =~ m|^/dev/.+|) {
1270 $path = $disk->{file};
1271 } else {
1272 $path = PVE::Storage::path ($storecfg, $disk->{file});
1273 }
1274 if (!(-f $path || -b $path)) {
1275 die "image '$path' does not exists\n";
1276 }
1277 }
1278 });
1279 };
1280
1281 my $err = $@;
1282
1283 if ($err) {
1284 syslog ('err', "VM $vmid creating disks failed");
1285 foreach my $volid (@$vollist) {
1286 eval { PVE::Storage::vdisk_free ($storecfg, $volid); };
1287 warn $@ if $@;
1288 }
1289 die $err;
1290 }
1291
1292 return $vollist;
1293}
1294
1295sub unlink_image {
1296 my ($storecfg, $vmid, $volid) = @_;
1297
1298 die "reject to unlink absolute path '$volid'"
1299 if $volid =~ m|^/|;
1300
1301 my ($path, $owner) = PVE::Storage::path ($storecfg, $volid);
1302
1303 die "reject to unlink '$volid' - not owned by this VM"
1304 if !$owner || ($owner != $vmid);
1305
1306 syslog ('info', "VM $vmid deleting volume '$volid'");
1307
1308 PVE::Storage::vdisk_free ($storecfg, $volid);
1309
1310 touch_config ($vmid);
1311}
1312
1313sub destroy_vm {
1314 my ($storecfg, $vmid) = @_;
1315
1316 my $conffile = config_file ($vmid);
1317
1318 my $conf = load_config ($vmid);
1319
1320 check_lock ($conf);
1321
1322 # only remove disks owned by this VM
1323 foreach_drive($conf, sub {
1324 my ($ds, $drive) = @_;
1325
1326 return if drive_is_cdrom ($drive);
1327
1328 my $volid = $drive->{file};
1329 next if !$volid || $volid =~ m|^/|;
1330
1331 my ($path, $owner) = PVE::Storage::path ($storecfg, $volid);
1332 next if !$path || !$owner || ($owner != $vmid);
1333
1334 PVE::Storage::vdisk_free ($storecfg, $volid);
1335 });
1336
1337 unlink $conffile;
1338
1339 # also remove unused disk
1340 eval {
1341 my $dl = PVE::Storage::vdisk_list ($storecfg, undef, $vmid);
1342
1343 eval {
1344 PVE::Storage::foreach_volid ($dl, sub {
1345 my ($volid, $sid, $volname, $d) = @_;
1346 PVE::Storage::vdisk_free ($storecfg, $volid);
1347 });
1348 };
1349 warn $@ if $@;
1350
1351 };
1352 warn $@ if $@;
1353}
1354
1355# fixme: remove?
1356sub load_diskinfo_old {
1357 my ($storecfg, $vmid, $conf) = @_;
1358
1359 my $info = {};
1360 my $res = {};
1361 my $vollist;
1362
1363 foreach_drive($conf, sub {
1364 my ($ds, $di) = @_;
1365
1366 $res->{$ds} = $di;
1367
1368 return if drive_is_cdrom ($di);
1369
1370 if ($di->{file} =~ m|^/dev/.+|) {
1371 $info->{$di->{file}}->{size} = PVE::Storage::file_size_info ($di->{file});
1372 } else {
1373 push @$vollist, $di->{file};
1374 }
1375 });
1376
1377 eval {
1378 my $dl = PVE::Storage::vdisk_list ($storecfg, undef, $vmid, $vollist);
1379
1380 PVE::Storage::foreach_volid ($dl, sub {
1381 my ($volid, $sid, $volname, $d) = @_;
1382 $info->{$volid} = $d;
1383 });
1384 };
1385 warn $@ if $@;
1386
1387 foreach my $ds (keys %$res) {
1388 my $di = $res->{$ds};
1389
1390 $res->{$ds}->{disksize} = $info->{$di->{file}} ?
1391 $info->{$di->{file}}->{size} / (1024*1024) : 0;
1392 }
1393
1394 return $res;
1395}
1396
1397sub load_config {
1398 my ($vmid) = @_;
1399
1400 my $cfspath = cfs_config_path($vmid);
1401
1402 my $conf = PVE::Cluster::cfs_read_file($cfspath);
1403
1404 die "no such VM ('$vmid')\n" if !defined($conf);
1405
1406 return $conf;
1407}
1408
1409sub parse_vm_config {
1410 my ($filename, $raw) = @_;
1411
1412 return undef if !defined($raw);
1413
554ac7e7
DM
1414 my $res = {
1415 digest => Digest::SHA1::sha1_hex($raw),
1416 };
1e3baf05
DM
1417
1418 $filename =~ m|/qemu-server/(\d+)\.conf$|
1419 || die "got strange filename '$filename'";
1420
1421 my $vmid = $1;
1422
1423 while ($raw && $raw =~ s/^(.*?)(\n|$)//) {
1424 my $line = $1;
1425
1426 next if $line =~ m/^\#/;
1427
1428 next if $line =~ m/^\s*$/;
1429
1430 if ($line =~ m/^(description):\s*(.*\S)\s*$/) {
1431 my $key = $1;
1432 my $value = PVE::Tools::decode_text($2);
1433 $res->{$key} = $value;
1434 } elsif ($line =~ m/^(args):\s*(.*\S)\s*$/) {
1435 my $key = $1;
1436 my $value = $2;
1437 $res->{$key} = $value;
1438 } elsif ($line =~ m/^([a-z][a-z_]*\d*):\s*(\S+)\s*$/) {
1439 my $key = $1;
1440 my $value = $2;
1441 eval { $value = check_type($key, $value); };
1442 if ($@) {
1443 warn "vm $vmid - unable to parse value of '$key' - $@";
1444 } else {
1445 my $fmt = $confdesc->{$key}->{format};
1446 if ($fmt && $fmt eq 'pve-qm-drive') {
1447 my $v = parse_drive($key, $value);
1448 if (my $volid = filename_to_volume_id($vmid, $v->{file}, $v->{media})) {
1449 $v->{file} = $volid;
1450 $value = print_drive ($vmid, $v);
1451 } else {
1452 warn "vm $vmid - unable to parse value of '$key'\n";
1453 next;
1454 }
1455 }
1456
1457 if ($key eq 'cdrom') {
1458 $res->{ide2} = $value;
1459 } else {
1460 $res->{$key} = $value;
1461 }
1462 }
1463 }
1464 }
1465
1466 # convert old smp to sockets
1467 if ($res->{smp} && !$res->{sockets}) {
1468 $res->{sockets} = $res->{smp};
1469 }
1470 delete $res->{smp};
1471
1472 return $res;
1473}
1474
1475sub change_config {
1476 my ($vmid, $settings, $unset, $skiplock) = @_;
1477
1478 lock_config ($vmid, &change_config_nolock, $settings, $unset, $skiplock);
1479}
1480
1481sub change_config_nolock {
1482 my ($vmid, $settings, $unset, $skiplock) = @_;
1483
1484 my $res = {};
1485
1486 $unset->{ide2} = $unset->{cdrom} if $unset->{cdrom};
1487
1488 check_lock($settings) if !$skiplock;
1489
1490 # we do not use 'smp' any longer
1491 if ($settings->{sockets}) {
1492 $unset->{smp} = 1;
1493 } elsif ($settings->{smp}) {
1494 $settings->{sockets} = $settings->{smp};
1495 $unset->{smp} = 1;
1496 }
1497
1498 my $new_volids = {};
1499
1500 foreach my $key (keys %$settings) {
554ac7e7 1501 next if $key eq 'digest';
1e3baf05
DM
1502 my $value = $settings->{$key};
1503 if ($key eq 'description') {
1504 $value = PVE::Tools::encode_text($value);
1505 }
1506 eval { $value = check_type($key, $value); };
1507 die "unable to parse value of '$key' - $@" if $@;
1508 if ($key eq 'cdrom') {
1509 $res->{ide2} = $value;
1510 } else {
1511 $res->{$key} = $value;
1512 }
1513 if (valid_drivename($key)) {
1514 my $drive = PVE::QemuServer::parse_drive($key, $value);
1515 $new_volids->{$drive->{file}} = 1 if $drive && $drive->{file};
1516 }
1517 }
1518
1519 my $filename = config_file($vmid);
1520 my $tmpfn = "$filename.$$.tmp";
1521
1522 my $fh = new IO::File ($filename, "r") ||
1523 die "unable to read config for VM $vmid\n";
1524
1525 my $werror = "unable to write config for VM $vmid\n";
1526
1527 my $out = new IO::File ($tmpfn, "w") || die $werror;
1528
1529 eval {
1530
1531 my $done;
1532
1533 while (my $line = <$fh>) {
1534
1535 if (($line =~ m/^\#/) || ($line =~ m/^\s*$/)) {
1536 die $werror unless print $out $line;
1537 next;
1538 }
1539
1540 if ($line =~ m/^([a-z][a-z_]*\d*):\s*(.*\S)\s*$/) {
1541 my $key = $1;
1542 my $value = $2;
1543
1544 # remove 'unusedX' settings if we re-add a volume
1545 next if $key =~ m/^unused/ && $new_volids->{$value};
1546
1547 # convert 'smp' to 'sockets'
1548 $key = 'sockets' if $key eq 'smp';
1549
1550 next if $done->{$key};
1551 $done->{$key} = 1;
1552
1553 if (defined ($res->{$key})) {
1554 $value = $res->{$key};
1555 delete $res->{$key};
1556 }
1557 if (!defined ($unset->{$key})) {
1558 die $werror unless print $out "$key: $value\n";
1559 }
1560
1561 next;
1562 }
1563
1564 die "unable to parse config file: $line\n";
1565 }
1566
1567 foreach my $key (keys %$res) {
1568
1569 if (!defined ($unset->{$key})) {
1570 die $werror unless print $out "$key: $res->{$key}\n";
1571 }
1572 }
1573 };
1574
1575 my $err = $@;
1576
1577 $fh->close();
1578
1579 if ($err) {
1580 $out->close();
1581 unlink $tmpfn;
1582 die $err;
1583 }
1584
1585 if (!$out->close()) {
1586 $err = "close failed - $!\n";
1587 unlink $tmpfn;
1588 die $err;
1589 }
1590
1591 if (!rename($tmpfn, $filename)) {
1592 $err = "rename failed - $!\n";
1593 unlink $tmpfn;
1594 die $err;
1595 }
1596}
1597
1598sub load_defaults {
1599
1600 my $res = {};
1601
1602 # we use static defaults from our JSON schema configuration
1603 foreach my $key (keys %$confdesc) {
1604 if (defined(my $default = $confdesc->{$key}->{default})) {
1605 $res->{$key} = $default;
1606 }
1607 }
1608
1609 my $conf = PVE::Cluster::cfs_read_file('datacenter.cfg');
1610 $res->{keyboard} = $conf->{keyboard} if $conf->{keyboard};
1611
1612 return $res;
1613}
1614
1615sub config_list {
1616 my $vmlist = PVE::Cluster::get_vmlist();
1617 my $res = {};
1618 return $res if !$vmlist || !$vmlist->{ids};
1619 my $ids = $vmlist->{ids};
1620
1e3baf05
DM
1621 foreach my $vmid (keys %$ids) {
1622 my $d = $ids->{$vmid};
1623 next if !$d->{node} || $d->{node} ne $nodename;
1624 $res->{$vmid}->{exists} = 1;
1625 }
1626 return $res;
1627}
1628
64e13401
DM
1629# test if VM uses local resources (to prevent migration)
1630sub check_local_resources {
1631 my ($conf, $noerr) = @_;
1632
1633 my $loc_res = 0;
1634 # fixme:
1635 die "implement me";
e0ab7331
DM
1636 $loc_res = 1 if $conf->{hostusb}; # old syntax
1637 $loc_res = 1 if $conf->{hostpci}; # old syntax
64e13401 1638
0d29ab3b 1639 foreach my $k (keys %$conf) {
2fe1a152 1640 $loc_res = 1 if $k =~ m/^(usb|hostpci|serial|parallel)\d+$/;
64e13401
DM
1641 }
1642
1643 die "VM uses local resources\n" if $loc_res && !$noerr;
1644
1645 return $loc_res;
1646}
1647
1e3baf05
DM
1648sub check_lock {
1649 my ($conf) = @_;
1650
1651 die "VM is locked ($conf->{lock})\n" if $conf->{lock};
1652}
1653
1654sub check_cmdline {
1655 my ($pidfile, $pid) = @_;
1656
1657 my $fh = IO::File->new ("/proc/$pid/cmdline", "r");
1658 if (defined ($fh)) {
1659 my $line = <$fh>;
1660 $fh->close;
1661 return undef if !$line;
1662 my @param = split (/\0/, $line);
1663
1664 my $cmd = $param[0];
1665 return if !$cmd || ($cmd !~ m|kvm$|);
1666
1667 for (my $i = 0; $i < scalar (@param); $i++) {
1668 my $p = $param[$i];
1669 next if !$p;
1670 if (($p eq '-pidfile') || ($p eq '--pidfile')) {
1671 my $p = $param[$i+1];
1672 return 1 if $p && ($p eq $pidfile);
1673 return undef;
1674 }
1675 }
1676 }
1677 return undef;
1678}
1679
1680sub check_running {
1681 my ($vmid) = @_;
1682
1683 my $filename = config_file ($vmid);
1684
1685 die "unable to find configuration file for VM $vmid - no such machine\n"
1686 if ! -f $filename;
1687
1688 my $pidfile = pidfile_name ($vmid);
1689
1690 if (my $fd = IO::File->new ("<$pidfile")) {
1691 my $st = stat ($fd);
1692 my $line = <$fd>;
1693 close ($fd);
1694
1695 my $mtime = $st->mtime;
1696 if ($mtime > time()) {
1697 warn "file '$filename' modified in future\n";
1698 }
1699
1700 if ($line =~ m/^(\d+)$/) {
1701 my $pid = $1;
1702
1703 return $pid if ((-d "/proc/$pid") && check_cmdline ($pidfile, $pid));
1704 }
1705 }
1706
1707 return undef;
1708}
1709
1710sub vzlist {
1711
1712 my $vzlist = config_list();
1713
1714 my $fd = IO::Dir->new ($var_run_tmpdir) || return $vzlist;
1715
1716 while (defined(my $de = $fd->read)) {
1717 next if $de !~ m/^(\d+)\.pid$/;
1718 my $vmid = $1;
1719 next if !defined ($vzlist->{$vmid});
1720 if (my $pid = check_running ($vmid)) {
1721 $vzlist->{$vmid}->{pid} = $pid;
1722 }
1723 }
1724
1725 return $vzlist;
1726}
1727
1728my $storage_timeout_hash = {};
1729
1730sub disksize {
1731 my ($storecfg, $conf) = @_;
1732
1733 my $bootdisk = $conf->{bootdisk};
1734 return undef if !$bootdisk;
1735 return undef if !valid_drivename($bootdisk);
1736
1737 return undef if !$conf->{$bootdisk};
1738
1739 my $drive = parse_drive($bootdisk, $conf->{$bootdisk});
1740 return undef if !defined($drive);
1741
1742 return undef if drive_is_cdrom($drive);
1743
1744 my $volid = $drive->{file};
1745 return undef if !$volid;
1746
1747 my $path;
1748 my $storeid;
1749 my $timeoutid;
1750
1751 if ($volid =~ m|^/|) {
1752 $path = $timeoutid = $volid;
1753 } else {
1754 $storeid = $timeoutid = PVE::Storage::parse_volume_id ($volid);
1755 $path = PVE::Storage::path($storecfg, $volid);
1756 }
1757
1758 my $last_timeout = $storage_timeout_hash->{$timeoutid};
1759 if ($last_timeout) {
1760 if ((time() - $last_timeout) < 30) {
1761 # skip storage with errors
1762 return undef ;
1763 }
1764 delete $storage_timeout_hash->{$timeoutid};
1765 }
1766
1767 my ($size, $format, $used);
1768
1769 ($size, $format, $used) = PVE::Storage::file_size_info($path, 1);
1770
1771 if (!defined($format)) {
1772 # got timeout
1773 $storage_timeout_hash->{$timeoutid} = time();
1774 return undef;
1775 }
1776
1777 return wantarray ? ($size, $used) : $size;
1778}
1779
1780my $last_proc_pid_stat;
1781
1782sub vmstatus {
1783 my ($opt_vmid) = @_;
1784
1785 my $res = {};
1786
1787 my $storecfg = PVE::Storage::config();
1788
1789 my $list = vzlist();
1790 my ($uptime) = PVE::ProcFSTools::read_proc_uptime();
1791
1792 foreach my $vmid (keys %$list) {
1793 next if $opt_vmid && ($vmid ne $opt_vmid);
1794
1795 my $cfspath = cfs_config_path($vmid);
1796 my $conf = PVE::Cluster::cfs_read_file($cfspath) || {};
1797
1798 my $d = {};
1799 $d->{pid} = $list->{$vmid}->{pid};
1800
1801 # fixme: better status?
1802 $d->{status} = $list->{$vmid}->{pid} ? 'running' : 'stopped';
1803
1804 my ($size, $used) = disksize($storecfg, $conf);
1805 if (defined($size) && defined($used)) {
1806 $d->{disk} = $used;
1807 $d->{maxdisk} = $size;
1808 } else {
1809 $d->{disk} = 0;
1810 $d->{maxdisk} = 0;
1811 }
1812
1813 $d->{cpus} = ($conf->{sockets} || 1) * ($conf->{cores} || 1);
1814 $d->{name} = $conf->{name} || "VM $vmid";
1815 $d->{maxmem} = $conf->{memory} ? $conf->{memory}*(1024*1024) : 0;
1816
1817
1818 $d->{uptime} = 0;
1819 $d->{cpu} = 0;
1820 $d->{relcpu} = 0;
1821 $d->{mem} = 0;
1822
1823 $d->{netout} = 0;
1824 $d->{netin} = 0;
1825
1826 $d->{diskread} = 0;
1827 $d->{diskwrite} = 0;
1828
1829 $res->{$vmid} = $d;
1830 }
1831
1832 my $netdev = PVE::ProcFSTools::read_proc_net_dev();
1833 foreach my $dev (keys %$netdev) {
1834 next if $dev !~ m/^tap([1-9]\d*)i/;
1835 my $vmid = $1;
1836 my $d = $res->{$vmid};
1837 next if !$d;
1838
1839 $d->{netout} += $netdev->{$dev}->{receive};
1840 $d->{netin} += $netdev->{$dev}->{transmit};
1841 }
1842
1843 my $cpuinfo = PVE::ProcFSTools::read_cpuinfo();
1844 my $cpucount = $cpuinfo->{cpus} || 1;
1845 my $ctime = gettimeofday;
1846
1847 foreach my $vmid (keys %$list) {
1848
1849 my $d = $res->{$vmid};
1850 my $pid = $d->{pid};
1851 next if !$pid;
1852
1853 if (my $fh = IO::File->new("/proc/$pid/io", "r")) {
1854 my $data = {};
1855 while (defined (my $line = <$fh>)) {
1856 if ($line =~ m/^([rw]char):\s+(\d+)$/) {
1857 $data->{$1} = $2;
1858 }
1859 }
1860 close($fh);
1861 $d->{diskread} = $data->{rchar} || 0;
1862 $d->{diskwrite} = $data->{wchar} || 0;
1863 }
1864
1865 my $statstr = file_read_firstline("/proc/$pid/stat");
1866 next if !$statstr;
1867
1868 my ($utime, $stime, $vsize, $rss, $starttime);
1869 if ($statstr =~ m/^$pid \(.*\) \S (-?\d+) -?\d+ -?\d+ -?\d+ -?\d+ \d+ \d+ \d+ \d+ \d+ (\d+) (\d+) (-?\d+) (-?\d+) -?\d+ -?\d+ -?\d+ 0 (\d+) (\d+) (-?\d+) \d+ \d+ \d+ \d+ \d+ \d+ \d+ \d+ \d+ \d+ \d+ \d+ \d+ -?\d+ -?\d+ \d+ \d+ \d+/) {
1870 ($utime, $stime, $vsize, $rss, $starttime) = ($2, $3, $7, $8 * 4096, $6);
1871 } else {
1872 next;
1873 }
1874
1875 my $used = $utime + $stime;
1876
1877 my $vcpus = $d->{cpus} > $cpucount ? $cpucount : $d->{cpus};
1878
1879 $d->{uptime} = int ($uptime - ($starttime/100));
1880
1881 if ($vsize) {
1882 $d->{mem} = int (($rss/$vsize)*$d->{maxmem});
1883 }
1884
1885 my $old = $last_proc_pid_stat->{$pid};
1886 if (!$old) {
1887 $last_proc_pid_stat->{$pid} = {
1888 time => $ctime,
1889 used => $used,
1890 cpu => 0,
1891 relcpu => 0,
1892 };
1893 next;
1894 }
1895
1896 my $dtime = ($ctime - $old->{time}) * $cpucount * $clock_ticks;
1897
1898 if ($dtime > 1000) {
1899 my $dutime = $used - $old->{used};
1900
1901 $d->{cpu} = $dutime/$dtime;
1902 $d->{relcpu} = ($d->{cpu} * $cpucount) / $vcpus;
1903 $last_proc_pid_stat->{$pid} = {
1904 time => $ctime,
1905 used => $used,
1906 cpu => $d->{cpu},
1907 relcpu => $d->{relcpu},
1908 };
1909 } else {
1910 $d->{cpu} = $old->{cpu};
1911 $d->{relcpu} = $old->{relcpu};
1912 }
1913 }
1914
1915 return $res;
1916}
1917
1918sub foreach_drive {
1919 my ($conf, $func) = @_;
1920
1921 foreach my $ds (keys %$conf) {
1922 next if !valid_drivename($ds);
1923
1924 my $drive = parse_drive ($ds, $conf->{$ds});
1925 next if !$drive;
1926
1927 &$func($ds, $drive);
1928 }
1929}
1930
1931sub config_to_command {
1932 my ($storecfg, $vmid, $conf, $defaults, $migrate_uri) = @_;
1933
1934 my $cmd = [];
1935
1936 my $kvmver = kvm_user_version();
1937 my $vernum = 0; # unknown
1938 if ($kvmver =~ m/^(\d+)\.(\d+)\.(\d+)$/) {
1939 $vernum = $1*1000000+$2*1000+$3;
1940 }
1941
1942 die "detected old qemu-kvm binary ($kvmver)\n" if $vernum < 14000;
1943
1944 my $have_ovz = -f '/proc/vz/vestat';
1945
1946 push @$cmd, '/usr/bin/kvm';
1947
1948 push @$cmd, '-id', $vmid;
1949
1950 my $use_virtio = 0;
1951
1952 my $socket = monitor_socket ($vmid);
abb39b66
DA
1953 push @$cmd, '-chardev', "socket,id=monitor,path=$socket,server,nowait";
1954 push @$cmd, '-mon', "chardev=monitor,mode=readline";
1e3baf05
DM
1955
1956 $socket = vnc_socket ($vmid);
1957 push @$cmd, '-vnc', "unix:$socket,x509,password";
1958
1959 push @$cmd, '-pidfile' , pidfile_name ($vmid);
1960
1961 push @$cmd, '-daemonize';
1962
1963 push @$cmd, '-incoming', $migrate_uri if $migrate_uri;
1964
1965 # include usb device config
1966 push @$cmd, '-readconfig', '/usr/share/qemu-server/pve-usb.cfg';
1967
1968 # enable absolute mouse coordinates (needed by vnc)
1969 my $tablet = defined ($conf->{tablet}) ? $conf->{tablet} : $defaults->{tablet};
1970 push @$cmd, '-device', 'usb-tablet,bus=ehci.0,port=6' if $tablet;
1971
1972 # host pci devices
040b06b7
DA
1973 for (my $i = 0; $i < $MAX_HOSTPCI_DEVICES; $i++) {
1974 my $d = parse_hostpci($conf->{"hostpci$i"});
1975 next if !$d;
1976 push @$cmd, '-device', "pci-assign,host=$d->{pciid},id=hostpci$i";
1e3baf05
DM
1977 }
1978
1979 # usb devices
1980 for (my $i = 0; $i < $MAX_USB_DEVICES; $i++) {
1981 my $d = parse_usb_device($conf->{"usb$i"});
1982 next if !$d;
1983 if ($d->{vendorid} && $d->{productid}) {
1984 push @$cmd, '-device', "usb-host,vendorid=$d->{vendorid},productid=$d->{productid}";
1985 } elsif (defined($d->{hostbus}) && defined($d->{hostport})) {
1986 push @$cmd, '-device', "usb-host,hostbus=$d->{hostbus},hostport=$d->{hostport}";
1987 }
1988 }
1989
1e3baf05 1990 # serial devices
bae179aa 1991 for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
34978be3
DM
1992 if (my $path = $conf->{"serial$i"}) {
1993 die "no such serial device\n" if ! -c $path;
1994 push @$cmd, '-chardev', "tty,id=serial$i,path=$path";
1995 push @$cmd, '-device', "isa-serial,chardev=serial$i";
1996 }
1e3baf05
DM
1997 }
1998
1999 # parallel devices
1989a89c 2000 for (my $i = 0; $i < $MAX_PARALLEL_PORTS; $i++) {
34978be3
DM
2001 if (my $path = $conf->{"parallel$i"}) {
2002 die "no such parallel device\n" if ! -c $path;
2003 push @$cmd, '-chardev', "parport,id=parallel$i,path=$path";
2004 push @$cmd, '-device', "isa-parallel,chardev=parallel$i";
2005 }
1e3baf05
DM
2006 }
2007
2008 my $vmname = $conf->{name} || "vm$vmid";
2009
2010 push @$cmd, '-name', $vmname;
2011
2012 my $sockets = 1;
2013 $sockets = $conf->{smp} if $conf->{smp}; # old style - no longer iused
2014 $sockets = $conf->{sockets} if $conf->{sockets};
2015
2016 my $cores = $conf->{cores} || 1;
2017
2018 my $boot_opt;
2019
2020 push @$cmd, '-smp', "sockets=$sockets,cores=$cores";
2021
2022 push @$cmd, '-cpu', $conf->{cpu} if $conf->{cpu};
2023
2024 $boot_opt = "menu=on";
2025 if ($conf->{boot}) {
2026 $boot_opt .= ",order=$conf->{boot}";
2027 }
2028
2029 push @$cmd, '-nodefaults';
2030
2031 push @$cmd, '-boot', $boot_opt if $boot_opt;
2032
2033 push @$cmd, '-no-acpi' if defined ($conf->{acpi}) && $conf->{acpi} == 0;
2034
2035 push @$cmd, '-no-reboot' if defined ($conf->{reboot}) && $conf->{reboot} == 0;
2036
2037 my $vga = $conf->{vga};
2038 if (!$vga) {
2039 if ($conf->{ostype} && ($conf->{ostype} eq 'win7' || $conf->{ostype} eq 'w2k8')) {
2040 $vga = 'std';
2041 } else {
2042 $vga = 'cirrus';
2043 }
2044 }
2045
2046 push @$cmd, '-vga', $vga if $vga; # for kvm 77 and later
2047
2048 # time drift fix
2049 my $tdf = defined ($conf->{tdf}) ? $conf->{tdf} : $defaults->{tdf};
2050 push @$cmd, '-tdf' if $tdf;
2051
2052 my $nokvm = defined ($conf->{kvm}) && $conf->{kvm} == 0 ? 1 : 0;
2053
2054 if (my $ost = $conf->{ostype}) {
2055 # other, wxp, w2k, w2k3, w2k8, wvista, win7, l24, l26
2056
2057 if ($ost =~ m/^w/) { # windows
2058 push @$cmd, '-localtime' if !defined ($conf->{localtime});
2059
2060 # use rtc-td-hack when acpi is enabled
2061 if (!(defined ($conf->{acpi}) && $conf->{acpi} == 0)) {
2062 push @$cmd, '-rtc-td-hack';
2063 }
2064 }
2065
2066 # -tdf ?
2067 # -no-acpi
2068 # -no-kvm
2069 # -win2k-hack ?
2070 }
2071
2072 push @$cmd, '-no-kvm' if $nokvm;
2073
2074 push @$cmd, '-localtime' if $conf->{localtime};
2075
2076 push @$cmd, '-startdate', $conf->{startdate} if $conf->{startdate};
2077
2078 push @$cmd, '-S' if $conf->{freeze};
2079
2080 # set keyboard layout
2081 my $kb = $conf->{keyboard} || $defaults->{keyboard};
2082 push @$cmd, '-k', $kb if $kb;
2083
2084 # enable sound
2085 #my $soundhw = $conf->{soundhw} || $defaults->{soundhw};
2086 #push @$cmd, '-soundhw', 'es1370';
2087 #push @$cmd, '-soundhw', $soundhw if $soundhw;
2088
0ea9541d
DM
2089 if ($conf->{watchdog}) {
2090 my $wdopts = parse_watchdog($conf->{watchdog});
2091 push @$cmd, '-watchdog', $wdopts->{model} || 'i6300esb';
2092 push @$cmd, '-watchdog-action', $wdopts->{action} if $wdopts->{action};
2093 }
2094
1e3baf05 2095 my $vollist = [];
941e0c42 2096 my $scsicontroller = {};
1e3baf05
DM
2097
2098 foreach_drive($conf, sub {
2099 my ($ds, $drive) = @_;
2100
2101 eval {
2102 PVE::Storage::parse_volume_id ($drive->{file});
2103 push @$vollist, $drive->{file};
2104 }; # ignore errors
2105
2106 $use_virtio = 1 if $ds =~ m/^virtio/;
941e0c42
DA
2107 if ($drive->{interface} eq 'scsi') {
2108 my $maxdev = 7;
2109 my $controller = int ($drive->{index} / $maxdev);
2110 push @$cmd, '-device', "lsi,id=scsi$controller" if !$scsicontroller->{$controller};
2111 my $scsicontroller->{$controller}=1;
2112 }
1e3baf05
DM
2113 my $tmp = print_drive_full ($storecfg, $vmid, $drive);
2114 $tmp .= ",boot=on" if $conf->{bootdisk} && ($conf->{bootdisk} eq $ds);
2115 push @$cmd, '-drive', $tmp;
ca916ecc 2116 push @$cmd, '-device',print_drivedevice_full ($storecfg,$vmid, $drive);
1e3baf05
DM
2117 });
2118
2119 push @$cmd, '-m', $conf->{memory} || $defaults->{memory};
2120
2121 my $foundnet = 0;
2122
2123 foreach my $k (sort keys %$conf) {
2124 next if $k !~ m/^net(\d+)$/;
2125 my $i = int ($1);
2126
2127 die "got strange net id '$i'\n" if $i >= ${MAX_NETS};
2128
2129 if ($conf->{"net$i"} && (my $net = parse_net($conf->{"net$i"}))) {
2130
2131 $foundnet = 1;
2132
2133 my $ifname = "tap${vmid}i$i";
2134
2135 # kvm uses TUNSETIFF ioctl, and that limits ifname length
2136 die "interface name '$ifname' is too long (max 15 character)\n"
2137 if length($ifname) >= 16;
2138
2139 my $device = $net->{model};
2140 my $vhostparam = '';
2141 if ($net->{model} eq 'virtio') {
2142 $use_virtio = 1;
2143 $device = 'virtio-net-pci';
2144 $vhostparam = ',vhost=on' if $kernel_has_vhost_net;
2145 };
2146
2147 if ($net->{bridge}) {
2148 push @$cmd, '-netdev', "type=tap,id=${k},ifname=${ifname},script=/var/lib/qemu-server/pve-bridge$vhostparam";
2149 } else {
2150 push @$cmd, '-netdev', "type=user,id=${k},hostname=$vmname";
2151 }
2152
2153 # qemu > 0.15 always try to boot from network - we disable that by
2154 # not loading the pxe rom file
2155 my $extra = (!$conf->{boot} || ($conf->{boot} !~ m/n/)) ?
2156 "romfile=," : '';
2157 push @$cmd, '-device', "$device,${extra}mac=$net->{macaddr},netdev=${k}";
2158 }
2159 }
2160
2161 push @$cmd, '-net', 'none' if !$foundnet;
2162
2163 # hack: virtio with fairsched is unreliable, so we do not use fairsched
2164 # when the VM uses virtio devices.
2165 if (!$use_virtio && $have_ovz) {
2166
2167 my $cpuunits = defined ($conf->{cpuunits}) ?
2168 $conf->{cpuunits} : $defaults->{cpuunits};
2169
2170 push @$cmd, '-cpuunits', $cpuunits if $cpuunits;
2171
2172 # fixme: cpulimit is currently ignored
2173 #push @$cmd, '-cpulimit', $conf->{cpulimit} if $conf->{cpulimit};
2174 }
2175
2176 # add custom args
2177 if ($conf->{args}) {
2178 my $aa = split_args ($conf->{args});
2179 push @$cmd, @$aa;
2180 }
2181
2182 return wantarray ? ($cmd, $vollist) : $cmd;
2183}
2184
2185sub vnc_socket {
2186 my ($vmid) = @_;
2187 return "${var_run_tmpdir}/$vmid.vnc";
2188}
2189
2190sub monitor_socket {
2191 my ($vmid) = @_;
2192 return "${var_run_tmpdir}/$vmid.mon";
2193}
2194
2195sub pidfile_name {
2196 my ($vmid) = @_;
2197 return "${var_run_tmpdir}/$vmid.pid";
2198}
2199
2200sub random_ether_addr {
2201
2202 my $rand = Digest::SHA1::sha1_hex (rand(), time());
2203
2204 my $mac = '';
2205 for (my $i = 0; $i < 6; $i++) {
2206 my $ss = hex (substr ($rand, $i*2, 2));
2207 if (!$i) {
2208 $ss &= 0xfe; # clear multicast
2209 $ss |= 2; # set local id
2210 }
2211 $ss = sprintf ("%02X", $ss);
2212
2213 if (!$i) {
2214 $mac .= "$ss";
2215 } else {
2216 $mac .= ":$ss";
2217 }
2218 }
2219
2220 return $mac;
2221}
2222
2223sub next_migrate_port {
2224
2225 for (my $p = 60000; $p < 60010; $p++) {
2226
2227 my $sock = IO::Socket::INET->new (Listen => 5,
2228 LocalAddr => 'localhost',
2229 LocalPort => $p,
2230 ReuseAddr => 1,
2231 Proto => 0);
2232
2233 if ($sock) {
2234 close ($sock);
2235 return $p;
2236 }
2237 }
2238
2239 die "unable to find free migration port";
2240}
2241
2242sub vm_start {
2243 my ($storecfg, $vmid, $statefile, $skiplock) = @_;
2244
2245 lock_config ($vmid, sub {
2246 my $conf = load_config ($vmid);
2247
2248 check_lock ($conf) if !$skiplock;
2249
2250 if (check_running ($vmid)) {
2251 my $msg = "VM $vmid already running - start failed\n" ;
2252 syslog ('err', $msg);
2253 die $msg;
2254 } else {
2255 syslog ('info', "VM $vmid start");
2256 }
2257
2258 my $migrate_uri;
2259 my $migrate_port = 0;
2260
2261 if ($statefile) {
2262 if ($statefile eq 'tcp') {
2263 $migrate_port = next_migrate_port();
2264 $migrate_uri = "tcp:localhost:${migrate_port}";
2265 } else {
2266 if (-f $statefile) {
2267 $migrate_uri = "exec:cat $statefile";
2268 } else {
2269 warn "state file '$statefile' does not exist - doing normal startup\n";
2270 }
2271 }
2272 }
2273
2274 my $defaults = load_defaults();
2275
2276 my ($cmd, $vollist) = config_to_command ($storecfg, $vmid, $conf, $defaults, $migrate_uri);
2277 # host pci devices
040b06b7
DA
2278 for (my $i = 0; $i < $MAX_HOSTPCI_DEVICES; $i++) {
2279 my $d = parse_hostpci($conf->{"hostpci$i"});
2280 next if !$d;
2281 my $info = pci_device_info("0000:$d->{pciid}");
2282 die "IOMMU not present\n" if !check_iommu_support();
2283 die "no pci device info for device '$d->{pciid}'\n" if !$info;
2284 die "can't unbind pci device '$d->{pciid}'\n" if !pci_dev_bind_to_stub($info);
2285 die "can't reset pci device '$d->{pciid}'\n" if !pci_dev_reset($info);
2286 }
1e3baf05
DM
2287
2288 PVE::Storage::activate_volumes($storecfg, $vollist);
2289
2290 eval { run_command ($cmd, timeout => $migrate_uri ? undef : 30); };
2291
2292 my $err = $@;
2293
2294 if ($err) {
2295 my $msg = "start failed: $err";
2296 syslog ('err', "VM $vmid $msg");
2297 die $msg;
2298 }
2299
2300 if ($statefile) {
2301
2302 if ($statefile eq 'tcp') {
2303 print "migration listens on port $migrate_port\n";
2304 } else {
2305 unlink $statefile;
2306 # fixme: send resume - is that necessary ?
2307 eval { vm_monitor_command ($vmid, "cont", 1) };
2308 }
2309 }
2310
2311 if (my $migrate_speed =
2312 $conf->{migrate_speed} || $defaults->{migrate_speed}) {
2313 my $cmd = "migrate_set_speed ${migrate_speed}m";
2314 eval { vm_monitor_command ($vmid, $cmd, 1); };
2315 }
2316
2317 if (my $migrate_downtime =
2318 $conf->{migrate_downtime} || $defaults->{migrate_downtime}) {
2319 my $cmd = "migrate_set_downtime ${migrate_downtime}";
2320 eval { vm_monitor_command ($vmid, $cmd, 1); };
2321 }
2322 });
2323}
2324
2325sub __read_avail {
2326 my ($fh, $timeout) = @_;
2327
2328 my $sel = new IO::Select;
2329 $sel->add ($fh);
2330
2331 my $res = '';
2332 my $buf;
2333
2334 my @ready;
2335 while (scalar (@ready = $sel->can_read ($timeout))) {
2336 my $count;
2337 if ($count = $fh->sysread ($buf, 8192)) {
2338 if ($buf =~ /^(.*)\(qemu\) $/s) {
2339 $res .= $1;
2340 last;
2341 } else {
2342 $res .= $buf;
2343 }
2344 } else {
2345 if (!defined ($count)) {
2346 die "$!\n";
2347 }
2348 last;
2349 }
2350 }
2351
2352 die "monitor read timeout\n" if !scalar (@ready);
2353
2354 return $res;
2355}
2356
2357sub vm_monitor_command {
2358 my ($vmid, $cmdstr, $nolog) = @_;
2359
2360 my $res;
2361
2362 syslog ("info", "VM $vmid monitor command '$cmdstr'") if !$nolog;
2363
2364 eval {
2365 die "VM not running\n" if !check_running ($vmid);
2366
2367 my $sname = monitor_socket ($vmid);
2368
2369 my $sock = IO::Socket::UNIX->new ( Peer => $sname ) ||
2370 die "unable to connect to VM $vmid socket - $!\n";
2371
2372 my $timeout = 3;
2373
2374 # hack: migrate sometime blocks the monitor (when migrate_downtime
2375 # is set)
2376 if ($cmdstr =~ m/^(info\s+migrate|migrate\s)/) {
2377 $timeout = 60*60; # 1 hour
2378 }
2379
2380 # read banner;
2381 my $data = __read_avail ($sock, $timeout);
2382
2383 if ($data !~ m/^QEMU\s+(\S+)\s+monitor\s/) {
2384 die "got unexpected qemu monitor banner\n";
2385 }
2386
2387 my $sel = new IO::Select;
2388 $sel->add ($sock);
2389
2390 if (!scalar (my @ready = $sel->can_write ($timeout))) {
2391 die "monitor write error - timeout";
2392 }
2393
2394 my $fullcmd = "$cmdstr\r";
2395
2396 my $b;
2397 if (!($b = $sock->syswrite ($fullcmd)) || ($b != length ($fullcmd))) {
2398 die "monitor write error - $!";
2399 }
2400
2401 return if ($cmdstr eq 'q') || ($cmdstr eq 'quit');
2402
2403 $timeout = 20;
2404
2405 if ($cmdstr =~ m/^(info\s+migrate|migrate\s)/) {
2406 $timeout = 60*60; # 1 hour
2407 } elsif ($cmdstr =~ m/^(eject|change)/) {
2408 $timeout = 60; # note: cdrom mount command is slow
2409 }
2410 if ($res = __read_avail ($sock, $timeout)) {
2411
2412 my @lines = split ("\r?\n", $res);
2413
2414 shift @lines if $lines[0] !~ m/^unknown command/; # skip echo
2415
2416 $res = join ("\n", @lines);
2417 $res .= "\n";
2418 }
2419 };
2420
2421 my $err = $@;
2422
2423 if ($err) {
2424 syslog ("err", "VM $vmid monitor command failed - $err");
2425 die $err;
2426 }
2427
2428 return $res;
2429}
2430
2431sub vm_commandline {
2432 my ($storecfg, $vmid) = @_;
2433
2434 my $conf = load_config ($vmid);
2435
2436 my $defaults = load_defaults();
2437
2438 my $cmd = config_to_command ($storecfg, $vmid, $conf, $defaults);
2439
2440 return join (' ', @$cmd);
2441}
2442
2443sub vm_reset {
2444 my ($vmid, $skiplock) = @_;
2445
2446 lock_config ($vmid, sub {
2447
2448 my $conf = load_config ($vmid);
2449
2450 check_lock ($conf) if !$skiplock;
2451
2452 syslog ("info", "VM $vmid sending 'reset'");
2453
2454 vm_monitor_command ($vmid, "system_reset", 1);
2455 });
2456}
2457
2458sub vm_shutdown {
2459 my ($vmid, $skiplock) = @_;
2460
2461 lock_config ($vmid, sub {
2462
2463 my $conf = load_config ($vmid);
2464
2465 check_lock ($conf) if !$skiplock;
2466
2467 syslog ("info", "VM $vmid sending 'shutdown'");
2468
2469 vm_monitor_command ($vmid, "system_powerdown", 1);
2470 });
2471}
2472
2473sub vm_stop {
2474 my ($vmid, $skiplock) = @_;
2475
2476 lock_config ($vmid, sub {
2477
2478 my $pid = check_running ($vmid);
2479
2480 if (!$pid) {
2481 syslog ('info', "VM $vmid already stopped");
2482 return;
2483 }
2484
2485 my $conf = load_config ($vmid);
2486
2487 check_lock ($conf) if !$skiplock;
2488
2489 syslog ("info", "VM $vmid stopping");
2490
2491 eval { vm_monitor_command ($vmid, "quit", 1); };
2492
2493 my $err = $@;
2494
2495 if (!$err) {
2496 # wait some time
2497 my $timeout = 50; # fixme: how long?
2498
2499 my $count = 0;
2500 while (($count < $timeout) && check_running ($vmid)) {
2501 $count++;
2502 sleep 1;
2503 }
2504
2505 if ($count >= $timeout) {
2506 syslog ('info', "VM $vmid still running - terminating now with SIGTERM");
2507 kill 15, $pid;
2508 }
2509 } else {
2510 syslog ('info', "VM $vmid quit failed - terminating now with SIGTERM");
2511 kill 15, $pid;
2512 }
2513
2514 # wait again
2515 my $timeout = 10;
2516
2517 my $count = 0;
2518 while (($count < $timeout) && check_running ($vmid)) {
2519 $count++;
2520 sleep 1;
2521 }
2522
2523 if ($count >= $timeout) {
2524 syslog ('info', "VM $vmid still running - terminating now with SIGKILL\n");
2525 kill 9, $pid;
2526 }
2527
2528 fairsched_rmnod ($vmid); # try to destroy group
2529 });
2530}
2531
2532sub vm_suspend {
2533 my ($vmid, $skiplock) = @_;
2534
2535 lock_config ($vmid, sub {
2536
2537 my $conf = load_config ($vmid);
2538
2539 check_lock ($conf) if !$skiplock;
2540
2541 syslog ("info", "VM $vmid suspend");
2542
2543 vm_monitor_command ($vmid, "stop", 1);
2544 });
2545}
2546
2547sub vm_resume {
2548 my ($vmid, $skiplock) = @_;
2549
2550 lock_config ($vmid, sub {
2551
2552 my $conf = load_config ($vmid);
2553
2554 check_lock ($conf) if !$skiplock;
2555
2556 syslog ("info", "VM $vmid resume");
2557
2558 vm_monitor_command ($vmid, "cont", 1);
2559 });
2560}
2561
2562sub vm_cad {
2563 my ($vmid, $skiplock) = @_;
2564
2565 lock_config ($vmid, sub {
2566
2567 my $conf = load_config ($vmid);
2568
2569 check_lock ($conf) if !$skiplock;
2570
2571 syslog ("info", "VM $vmid sending cntl-alt-delete");
2572
2573 vm_monitor_command ($vmid, "sendkey ctrl-alt-delete", 1);
2574 });
2575}
2576
2577sub vm_destroy {
2578 my ($storecfg, $vmid, $skiplock) = @_;
2579
2580 lock_config ($vmid, sub {
2581
2582 my $conf = load_config ($vmid);
2583
2584 check_lock ($conf) if !$skiplock;
2585
2586 syslog ("info", "VM $vmid destroy called (removing all data)");
2587
2588 eval {
2589 if (!check_running($vmid)) {
2590 fairsched_rmnod($vmid); # try to destroy group
2591 destroy_vm($storecfg, $vmid);
2592 } else {
2593 die "VM is running\n";
2594 }
2595 };
2596
2597 my $err = $@;
2598
2599 if ($err) {
2600 syslog ("err", "VM $vmid destroy failed - $err");
2601 die $err;
2602 }
2603 });
2604}
2605
2606sub vm_stopall {
2607 my ($timeout) = @_;
2608
2609 $timeout = 3*60 if !$timeout;
2610
2611 my $vzlist = vzlist();
2612 my $count = 0;
2613 foreach my $vmid (keys %$vzlist) {
2614 next if !$vzlist->{$vmid}->{pid};
2615 $count++;
2616 }
2617
2618 if ($count) {
2619
2620 my $msg = "Stopping Qemu Server - sending shutdown requests to all VMs\n";
2621 syslog ('info', $msg);
2622 print STDERR $msg;
2623
2624 foreach my $vmid (keys %$vzlist) {
2625 next if !$vzlist->{$vmid}->{pid};
2626 eval { vm_shutdown ($vmid, 1); };
2627 print STDERR $@ if $@;
2628 }
2629
2630 my $wt = 5;
2631 my $maxtries = int (($timeout + $wt -1)/$wt);
2632 my $try = 0;
2633 while (($try < $maxtries) && $count) {
2634 $try++;
2635 sleep $wt;
2636
2637 $vzlist = vzlist();
2638 $count = 0;
2639 foreach my $vmid (keys %$vzlist) {
2640 next if !$vzlist->{$vmid}->{pid};
2641 $count++;
2642 }
2643 last if !$count;
2644 }
2645
2646 return if !$count;
2647
2648 foreach my $vmid (keys %$vzlist) {
2649 next if !$vzlist->{$vmid}->{pid};
2650
2651 $msg = "VM $vmid still running - sending stop now\n";
2652 syslog ('info', $msg);
2653 print $msg;
2654
2655 eval { vm_monitor_command ($vmid, "quit", 1); };
2656 print STDERR $@ if $@;
2657
2658 }
2659
2660 $timeout = 30;
2661 $maxtries = int (($timeout + $wt -1)/$wt);
2662 $try = 0;
2663 while (($try < $maxtries) && $count) {
2664 $try++;
2665 sleep $wt;
2666
2667 $vzlist = vzlist();
2668 $count = 0;
2669 foreach my $vmid (keys %$vzlist) {
2670 next if !$vzlist->{$vmid}->{pid};
2671 $count++;
2672 }
2673 last if !$count;
2674 }
2675
2676 return if !$count;
2677
2678 foreach my $vmid (keys %$vzlist) {
2679 next if !$vzlist->{$vmid}->{pid};
2680
2681 $msg = "VM $vmid still running - terminating now with SIGTERM\n";
2682 syslog ('info', $msg);
2683 print $msg;
2684 kill 15, $vzlist->{$vmid}->{pid};
2685 }
2686
2687 # this is called by system shotdown scripts, so remaining
2688 # processes gets killed anyways (no need to send kill -9 here)
2689
2690 $msg = "Qemu Server stopped\n";
2691 syslog ('info', $msg);
2692 print STDERR $msg;
2693 }
2694}
2695
2696# pci helpers
2697
2698sub file_write {
2699 my ($filename, $buf) = @_;
2700
2701 my $fh = IO::File->new ($filename, "w");
2702 return undef if !$fh;
2703
2704 my $res = print $fh $buf;
2705
2706 $fh->close();
2707
2708 return $res;
2709}
2710
2711sub pci_device_info {
2712 my ($name) = @_;
2713
2714 my $res;
2715
2716 return undef if $name !~ m/^([a-f0-9]{4}):([a-f0-9]{2}):([a-f0-9]{2})\.([a-f0-9])$/;
2717 my ($domain, $bus, $slot, $func) = ($1, $2, $3, $4);
2718
2719 my $irq = file_read_firstline("$pcisysfs/devices/$name/irq");
2720 return undef if !defined($irq) || $irq !~ m/^\d+$/;
2721
2722 my $vendor = file_read_firstline("$pcisysfs/devices/$name/vendor");
2723 return undef if !defined($vendor) || $vendor !~ s/^0x//;
2724
2725 my $product = file_read_firstline("$pcisysfs/devices/$name/device");
2726 return undef if !defined($product) || $product !~ s/^0x//;
2727
2728 $res = {
2729 name => $name,
2730 vendor => $vendor,
2731 product => $product,
2732 domain => $domain,
2733 bus => $bus,
2734 slot => $slot,
2735 func => $func,
2736 irq => $irq,
2737 has_fl_reset => -f "$pcisysfs/devices/$name/reset" || 0,
2738 };
2739
2740 return $res;
2741}
2742
2743sub pci_dev_reset {
2744 my ($dev) = @_;
2745
2746 my $name = $dev->{name};
2747
2748 my $fn = "$pcisysfs/devices/$name/reset";
2749
2750 return file_write ($fn, "1");
2751}
2752
2753sub pci_dev_bind_to_stub {
2754 my ($dev) = @_;
2755
2756 my $name = $dev->{name};
2757
2758 my $testdir = "$pcisysfs/drivers/pci-stub/$name";
2759 return 1 if -d $testdir;
2760
2761 my $data = "$dev->{vendor} $dev->{product}";
2762 return undef if !file_write ("$pcisysfs/drivers/pci-stub/new_id", $data);
2763
2764 my $fn = "$pcisysfs/devices/$name/driver/unbind";
2765 if (!file_write ($fn, $name)) {
2766 return undef if -f $fn;
2767 }
2768
2769 $fn = "$pcisysfs/drivers/pci-stub/bind";
2770 if (! -d $testdir) {
2771 return undef if !file_write ($fn, $name);
2772 }
2773
2774 return -d $testdir;
2775}
2776
27771;