]> git.proxmox.com Git - qemu-server.git/blame - PVE/QemuServer.pm
Hostpci rework v4
[qemu-server.git] / PVE / QemuServer.pm
CommitLineData
1e3baf05
DM
1package PVE::QemuServer;
2
3use strict;
4use POSIX;
5use IO::Handle;
6use IO::Select;
7use IO::File;
8use IO::Dir;
9use IO::Socket::UNIX;
10use File::Basename;
11use File::Path;
12use File::stat;
13use Getopt::Long;
14use Digest::SHA1;
15use Fcntl ':flock';
16use Cwd 'abs_path';
17use IPC::Open3;
18use Fcntl;
19use PVE::SafeSyslog;
20use Storable qw(dclone);
21use PVE::Exception qw(raise raise_param_exc);
22use PVE::Storage;
23use PVE::Tools qw(run_command lock_file file_read_firstline);
24use PVE::Cluster qw(cfs_register_file cfs_read_file cfs_write_file cfs_lock_file);
25use PVE::INotify;
26use PVE::ProcFSTools;
27use Time::HiRes qw (gettimeofday);
28
29my $clock_ticks = POSIX::sysconf(&POSIX::_SC_CLK_TCK);
30
31# Note about locking: we use flock on the config file protect
32# against concurent actions.
33# Aditionaly, we have a 'lock' setting in the config file. This
34# can be set to 'migrate' or 'backup'. Most actions are not
35# allowed when such lock is set. But you can ignore this kind of
36# lock with the --skiplock flag.
37
38cfs_register_file('/qemu-server/', \&parse_vm_config);
39
40#no warnings 'redefine';
41
42unless(defined(&_VZSYSCALLS_H_)) {
43 eval 'sub _VZSYSCALLS_H_ () {1;}' unless defined(&_VZSYSCALLS_H_);
44 require 'sys/syscall.ph';
45 if(defined(&__x86_64__)) {
46 eval 'sub __NR_fairsched_vcpus () {499;}' unless defined(&__NR_fairsched_vcpus);
47 eval 'sub __NR_fairsched_mknod () {504;}' unless defined(&__NR_fairsched_mknod);
48 eval 'sub __NR_fairsched_rmnod () {505;}' unless defined(&__NR_fairsched_rmnod);
49 eval 'sub __NR_fairsched_chwt () {506;}' unless defined(&__NR_fairsched_chwt);
50 eval 'sub __NR_fairsched_mvpr () {507;}' unless defined(&__NR_fairsched_mvpr);
51 eval 'sub __NR_fairsched_rate () {508;}' unless defined(&__NR_fairsched_rate);
52 eval 'sub __NR_setluid () {501;}' unless defined(&__NR_setluid);
53 eval 'sub __NR_setublimit () {502;}' unless defined(&__NR_setublimit);
54 }
55 elsif(defined( &__i386__) ) {
56 eval 'sub __NR_fairsched_mknod () {500;}' unless defined(&__NR_fairsched_mknod);
57 eval 'sub __NR_fairsched_rmnod () {501;}' unless defined(&__NR_fairsched_rmnod);
58 eval 'sub __NR_fairsched_chwt () {502;}' unless defined(&__NR_fairsched_chwt);
59 eval 'sub __NR_fairsched_mvpr () {503;}' unless defined(&__NR_fairsched_mvpr);
60 eval 'sub __NR_fairsched_rate () {504;}' unless defined(&__NR_fairsched_rate);
61 eval 'sub __NR_fairsched_vcpus () {505;}' unless defined(&__NR_fairsched_vcpus);
62 eval 'sub __NR_setluid () {511;}' unless defined(&__NR_setluid);
63 eval 'sub __NR_setublimit () {512;}' unless defined(&__NR_setublimit);
64 } else {
65 die("no fairsched syscall for this arch");
66 }
67 require 'asm/ioctl.ph';
68 eval 'sub KVM_GET_API_VERSION () { &_IO(0xAE, 0x);}' unless defined(&KVM_GET_API_VERSION);
69}
70
71sub fairsched_mknod {
72 my ($parent, $weight, $desired) = @_;
73
74 return syscall(&__NR_fairsched_mknod, int ($parent), int ($weight), int ($desired));
75}
76
77sub fairsched_rmnod {
78 my ($id) = @_;
79
80 return syscall(&__NR_fairsched_rmnod, int ($id));
81}
82
83sub fairsched_mvpr {
84 my ($pid, $newid) = @_;
85
86 return syscall(&__NR_fairsched_mvpr, int ($pid), int ($newid));
87}
88
89sub fairsched_vcpus {
90 my ($id, $vcpus) = @_;
91
92 return syscall(&__NR_fairsched_vcpus, int ($id), int ($vcpus));
93}
94
95sub fairsched_rate {
96 my ($id, $op, $rate) = @_;
97
98 return syscall(&__NR_fairsched_rate, int ($id), int ($op), int ($rate));
99}
100
101use constant FAIRSCHED_SET_RATE => 0;
102use constant FAIRSCHED_DROP_RATE => 1;
103use constant FAIRSCHED_GET_RATE => 2;
104
105sub fairsched_cpulimit {
106 my ($id, $limit) = @_;
107
108 my $cpulim1024 = int ($limit * 1024 / 100);
109 my $op = $cpulim1024 ? FAIRSCHED_SET_RATE : FAIRSCHED_DROP_RATE;
110
111 return fairsched_rate ($id, $op, $cpulim1024);
112}
113
114my $nodename = PVE::INotify::nodename();
115
116mkdir "/etc/pve/nodes/$nodename";
117my $confdir = "/etc/pve/nodes/$nodename/qemu-server";
118mkdir $confdir;
119
120my $var_run_tmpdir = "/var/run/qemu-server";
121mkdir $var_run_tmpdir;
122
123my $lock_dir = "/var/lock/qemu-server";
124mkdir $lock_dir;
125
126my $pcisysfs = "/sys/bus/pci";
127
128my $keymaphash = PVE::Tools::kvmkeymaps();
129
130my $confdesc = {
131 onboot => {
132 optional => 1,
133 type => 'boolean',
134 description => "Specifies whether a VM will be started during system bootup.",
135 default => 0,
136 },
137 autostart => {
138 optional => 1,
139 type => 'boolean',
140 description => "Automatic restart after crash (currently ignored).",
141 default => 0,
142 },
143 reboot => {
144 optional => 1,
145 type => 'boolean',
146 description => "Allow reboot. If set to '0' the VM exit on reboot.",
147 default => 1,
148 },
149 lock => {
150 optional => 1,
151 type => 'string',
152 description => "Lock/unlock the VM.",
153 enum => [qw(migrate backup)],
154 },
155 cpulimit => {
156 optional => 1,
157 type => 'integer',
158 description => "Limit of CPU usage in per cent. Note if the computer has 2 CPUs, it has total of 200% CPU time. Value '0' indicates no CPU limit.\n\nNOTE: This option is currently ignored.",
159 minimum => 0,
160 default => 0,
161 },
162 cpuunits => {
163 optional => 1,
164 type => 'integer',
165 description => "CPU weight for a VM. Argument is used in the kernel fair scheduler. The larger the number is, the more CPU time this VM gets. Number is relative to weights of all the other running VMs.\n\nNOTE: You can disable fair-scheduler configuration by setting this to 0.",
166 minimum => 0,
167 maximum => 500000,
168 default => 1000,
169 },
170 memory => {
171 optional => 1,
172 type => 'integer',
173 description => "Amount of RAM for the VM in MB.",
174 minimum => 16,
175 default => 512,
176 },
177 keyboard => {
178 optional => 1,
179 type => 'string',
180 description => "Keybord layout for vnc server. Default is read from the datacenter configuration file.",
181 enum => [ keys %$keymaphash ],
182 default => 'en-us',
183 },
184 name => {
185 optional => 1,
186 type => 'string',
187 description => "Set a name for the VM. Only used on the configuration web interface.",
188 },
189 description => {
190 optional => 1,
191 type => 'string',
192 description => "Description for the VM. Only used on the configuration web interface.",
193 },
194 ostype => {
195 optional => 1,
196 type => 'string',
197 enum => [qw(other wxp w2k w2k3 w2k8 wvista win7 l24 l26)],
198 description => <<EODESC,
199Used to enable special optimization/features for specific
200operating systems:
201
202other => unspecified OS
203wxp => Microsoft Windows XP
204w2k => Microsoft Windows 2000
205w2k3 => Microsoft Windows 2003
206w2k8 => Microsoft Windows 2008
207wvista => Microsoft Windows Vista
208win7 => Microsoft Windows 7
209l24 => Linux 2.4 Kernel
210l26 => Linux 2.6/3.X Kernel
211
212other|l24|l26 ... no special behaviour
213wxp|w2k|w2k3|w2k8|wvista|win7 ... use --localtime switch
214EODESC
215 },
216 boot => {
217 optional => 1,
218 type => 'string',
219 description => "Boot on floppy (a), hard disk (c), CD-ROM (d), or network (n).",
220 pattern => '[acdn]{1,4}',
221 default => 'cad',
222 },
223 bootdisk => {
224 optional => 1,
225 type => 'string', format => 'pve-qm-bootdisk',
226 description => "Enable booting from specified disk.",
227 pattern => '(ide|scsi|virtio)\d+',
228 },
229 smp => {
230 optional => 1,
231 type => 'integer',
232 description => "The number of CPUs. Please use option -sockets instead.",
233 minimum => 1,
234 default => 1,
235 },
236 sockets => {
237 optional => 1,
238 type => 'integer',
239 description => "The number of CPU sockets.",
240 minimum => 1,
241 default => 1,
242 },
243 cores => {
244 optional => 1,
245 type => 'integer',
246 description => "The number of cores per socket.",
247 minimum => 1,
248 default => 1,
249 },
250 acpi => {
251 optional => 1,
252 type => 'boolean',
253 description => "Enable/disable ACPI.",
254 default => 1,
255 },
256 kvm => {
257 optional => 1,
258 type => 'boolean',
259 description => "Enable/disable KVM hardware virtualization.",
260 default => 1,
261 },
262 tdf => {
263 optional => 1,
264 type => 'boolean',
265 description => "Enable/disable time drift fix.",
266 default => 1,
267 },
268 localtime => {
269 optional => 1,
270 type => 'boolean',
271 description => "Set the real time clock to local time. This is enabled by default if ostype indicates a Microsoft OS.",
272 },
273 freeze => {
274 optional => 1,
275 type => 'boolean',
276 description => "Freeze CPU at startup (use 'c' monitor command to start execution).",
277 },
278 vga => {
279 optional => 1,
280 type => 'string',
281 description => "Select VGA type. If you want to use high resolution modes (>= 1280x1024x16) then you should use option 'std' or 'vmware'. Default is 'std' for win7/w2k8, and 'cirrur' for other OS types",
282 enum => [qw(std cirrus vmware)],
283 },
0ea9541d
DM
284 watchdog => {
285 optional => 1,
286 type => 'string', format => 'pve-qm-watchdog',
287 typetext => '[[model=]i6300esb|ib700] [,[action=]reset|shutdown|poweroff|pause|debug|none]',
288 description => "Create a virtual hardware watchdog device. Once enabled (by a guest action), the watchdog must be periodically polled by an agent inside the guest or else the guest will be restarted (or execute the action specified)",
289 },
1e3baf05
DM
290 serial => {
291 optional => 1,
292 type => 'string', format => 'pve-qm-serial',
293 typetext => "SERIALDEVICE { , SERIALDEVICE }",
294 description => <<EODESCR,
295Map host serial devices. SERIALDEVICE syntax is /dev/ttyS*
296
297Note: This option allows direct access to host hardware. So it is no longer possible to migrate such machines - use with special care.
298
299Experimental: user reported problems with this option.
300EODESCR
301 },
302 parallel => {
303 optional => 1,
304 type => 'string', format => 'pve-qm-parallel',
305 typetext => "PARALLELDEVICE { , PARALLELDEVICE }",
306 description => <<EODESCR,
307Map host parallel devices. PARALLELDEVICE syntax is /dev/parport*
308
309Note: This option allows direct access to host hardware. So it is no longer possible to migrate such machines - use with special care.
310
311Experimental: user reported problems with this option.
312EODESCR
313 },
314 startdate => {
315 optional => 1,
316 type => 'string',
317 typetext => "(now | YYYY-MM-DD | YYYY-MM-DDTHH:MM:SS)",
318 description => "Set the initial date of the real time clock. Valid format for date are: 'now' or '2006-06-17T16:01:21' or '2006-06-17'.",
319 pattern => '(now|\d{4}-\d{1,2}-\d{1,2}(T\d{1,2}:\d{1,2}:\d{1,2})?)',
320 default => 'now',
321 },
322 args => {
323 optional => 1,
324 type => 'string',
325 description => <<EODESCR,
326Note: this option is for experts only. It allows you to pass arbitrary arguments to kvm, for example:
327
328args: -no-reboot -no-hpet
329EODESCR
330 },
331 tablet => {
332 optional => 1,
333 type => 'boolean',
334 default => 1,
335 description => "Enable/disable the usb tablet device. This device is usually needed to allow absolute mouse positioning. Else the mouse runs out of sync with normal vnc clients. If you're running lots of console-only guests on one host, you may consider disabling this to save some context switches.",
336 },
337 migrate_speed => {
338 optional => 1,
339 type => 'integer',
340 description => "Set maximum speed (in MB/s) for migrations. Value 0 is no limit.",
341 minimum => 0,
342 default => 0,
343 },
344 migrate_downtime => {
345 optional => 1,
346 type => 'integer',
347 description => "Set maximum tolerated downtime (in seconds) for migrations.",
348 minimum => 0,
349 default => 1,
350 },
351 cdrom => {
352 optional => 1,
353 type => 'string', format => 'pve-qm-drive',
354 typetext => 'volume',
355 description => "This is an alias for option -ide2",
356 },
357 cpu => {
358 optional => 1,
359 description => "Emulated CPU type.",
360 type => 'string',
361 enum => [ qw(486 athlon pentium pentium2 pentium3 coreduo core2duo kvm32 kvm64 qemu32 qemu64 phenom host) ],
362 default => 'qemu64',
363 },
364};
365
366# what about other qemu settings ?
367#cpu => 'string',
368#machine => 'string',
369#fda => 'file',
370#fdb => 'file',
371#mtdblock => 'file',
372#sd => 'file',
373#pflash => 'file',
374#snapshot => 'bool',
375#bootp => 'file',
376##tftp => 'dir',
377##smb => 'dir',
378#kernel => 'file',
379#append => 'string',
380#initrd => 'file',
381##soundhw => 'string',
382
383while (my ($k, $v) = each %$confdesc) {
384 PVE::JSONSchema::register_standard_option("pve-qm-$k", $v);
385}
386
387my $MAX_IDE_DISKS = 4;
f62db2a4
DA
388my $MAX_SCSI_DISKS = 14;
389my $MAX_VIRTIO_DISKS = 6;
1e3baf05 390my $MAX_USB_DEVICES = 5;
f62db2a4 391my $MAX_NETS = 6;
1e3baf05 392my $MAX_UNUSED_DISKS = 8;
040b06b7 393my $MAX_HOSTPCI_DEVICES = 2;
1e3baf05
DM
394
395my $nic_model_list = ['rtl8139', 'ne2k_pci', 'e1000', 'pcnet', 'virtio',
396 'ne2k_isa', 'i82551', 'i82557b', 'i82559er'];
397my $nic_model_list_txt = join (' ', sort @$nic_model_list);
398
399# fixme:
400my $netdesc = {
401 optional => 1,
402 type => 'string', format => 'pve-qm-net',
403 typetext => "MODEL=XX:XX:XX:XX:XX:XX [,bridge=<dev>][,rate=<mbps>]",
404 description => <<EODESCR,
405Specify network devices.
406
407MODEL is one of: $nic_model_list_txt
408
409XX:XX:XX:XX:XX:XX should be an unique MAC address. This is
410automatically generated if not specified.
411
412The bridge parameter can be used to automatically add the interface to a bridge device. The Proxmox VE standard bridge is called 'vmbr0'.
413
414Option 'rate' is used to limit traffic bandwidth from and to this interface. It is specified as floating point number, unit is 'Megabytes per second'.
415
416If you specify no bridge, we create a kvm 'user' (NATed) network device, which provides DHCP and DNS services. The following addresses are used:
417
41810.0.2.2 Gateway
41910.0.2.3 DNS Server
42010.0.2.4 SMB Server
421
422The DHCP server assign addresses to the guest starting from 10.0.2.15.
423
424EODESCR
425};
426PVE::JSONSchema::register_standard_option("pve-qm-net", $netdesc);
427
428for (my $i = 0; $i < $MAX_NETS; $i++) {
429 $confdesc->{"net$i"} = $netdesc;
430}
431
432my $drivename_hash;
433
434my $idedesc = {
435 optional => 1,
436 type => 'string', format => 'pve-qm-drive',
437 typetext => '[volume=]volume,] [,media=cdrom|disk] [,cyls=c,heads=h,secs=s[,trans=t]] [,snapshot=on|off] [,cache=none|writethrough|writeback] [,format=f] [,backup=yes|no] [,aio=native|threads]',
438 description => "Use volume as IDE hard disk or CD-ROM (n is 0 to 3).",
439};
440PVE::JSONSchema::register_standard_option("pve-qm-ide", $idedesc);
441
442my $scsidesc = {
443 optional => 1,
444 type => 'string', format => 'pve-qm-drive',
445 typetext => '[volume=]volume,] [,media=cdrom|disk] [,cyls=c,heads=h,secs=s[,trans=t]] [,snapshot=on|off] [,cache=none|writethrough|writeback] [,format=f] [,backup=yes|no] [,aio=native|threads]',
446 description => "Use volume as SCSI hard disk or CD-ROM (n is 0 to 15).",
447};
448PVE::JSONSchema::register_standard_option("pve-qm-scsi", $scsidesc);
449
450my $virtiodesc = {
451 optional => 1,
452 type => 'string', format => 'pve-qm-drive',
453 typetext => '[volume=]volume,] [,media=cdrom|disk] [,cyls=c,heads=h,secs=s[,trans=t]] [,snapshot=on|off] [,cache=none|writethrough|writeback] [,format=f] [,backup=yes|no] [,aio=native|threads]',
454 description => "Use volume as VIRTIO hard disk (n is 0 to 15).",
455};
456PVE::JSONSchema::register_standard_option("pve-qm-virtio", $virtiodesc);
457
458my $usbdesc = {
459 optional => 1,
460 type => 'string', format => 'pve-qm-usb-device',
461 typetext => 'host=HOSTUSBDEVICE',
462 description => <<EODESCR,
463Configure an USB device (n is 0 to 5). This can be used to
464pass-through usb devices to the guest. HOSTUSBDEVICE syntax is:
465
466'bus-port(.port)*' (decimal numbers) or
467'vendor_id:product_id' (hexadeciaml numbers)
468
469You can use the 'lsusb -t' command to list existing usb devices.
470
471Note: This option allows direct access to host hardware. So it is no longer possible to migrate such machines - use with special care.
472
473EODESCR
474};
475PVE::JSONSchema::register_standard_option("pve-qm-usb", $usbdesc);
476
040b06b7
DA
477my $hostpcidesc = {
478 optional => 1,
479 type => 'string', format => 'pve-qm-hostpci',
480 typetext => "HOSTPCIDEVICE",
481 description => <<EODESCR,
482Map host pci devices. HOSTPCIDEVICE syntax is:
483
484'bus:dev.func' (hexadecimal numbers)
485
486You can us the 'lspci' command to list existing pci devices.
487
488Note: This option allows direct access to host hardware. So it is no longer possible to migrate such machines - use with special care.
489
490Experimental: user reported problems with this option.
491EODESCR
492};
493PVE::JSONSchema::register_standard_option("pve-qm-hostpci", $hostpcidesc);
494
495for (my $i = 0; $i < $MAX_HOSTPCI_DEVICES; $i++) {
496 $confdesc->{"hostpci$i"} = $hostpcidesc;
497}
1e3baf05
DM
498
499for (my $i = 0; $i < $MAX_IDE_DISKS; $i++) {
500 $drivename_hash->{"ide$i"} = 1;
501 $confdesc->{"ide$i"} = $idedesc;
502}
503
504for (my $i = 0; $i < $MAX_SCSI_DISKS; $i++) {
505 $drivename_hash->{"scsi$i"} = 1;
506 $confdesc->{"scsi$i"} = $scsidesc ;
507}
508
509for (my $i = 0; $i < $MAX_VIRTIO_DISKS; $i++) {
510 $drivename_hash->{"virtio$i"} = 1;
511 $confdesc->{"virtio$i"} = $virtiodesc;
512}
513
514for (my $i = 0; $i < $MAX_USB_DEVICES; $i++) {
515 $confdesc->{"usb$i"} = $usbdesc;
516}
517
518my $unuseddesc = {
519 optional => 1,
520 type => 'string', format => 'pve-volume-id',
521 description => "Reference to unused volumes.",
522};
523
524for (my $i = 0; $i < $MAX_UNUSED_DISKS; $i++) {
525 $confdesc->{"unused$i"} = $unuseddesc;
526}
527
528my $kvm_api_version = 0;
529
530sub kvm_version {
531
532 return $kvm_api_version if $kvm_api_version;
533
534 my $fh = IO::File->new ("</dev/kvm") ||
535 return 0;
536
537 if (my $v = $fh->ioctl (KVM_GET_API_VERSION(), 0)) {
538 $kvm_api_version = $v;
539 }
540
541 $fh->close();
542
543 return $kvm_api_version;
544}
545
546my $kvm_user_version;
547
548sub kvm_user_version {
549
550 return $kvm_user_version if $kvm_user_version;
551
552 $kvm_user_version = 'unknown';
553
554 my $tmp = `kvm -help 2>/dev/null`;
555
556 if ($tmp =~ m/^QEMU( PC)? emulator version (\d+\.\d+\.\d+) /) {
557 $kvm_user_version = $2;
558 }
559
560 return $kvm_user_version;
561
562}
563
564my $kernel_has_vhost_net = -c '/dev/vhost-net';
565
566sub disknames {
567 # order is important - used to autoselect boot disk
568 return ((map { "ide$_" } (0 .. ($MAX_IDE_DISKS - 1))),
569 (map { "scsi$_" } (0 .. ($MAX_SCSI_DISKS - 1))),
570 (map { "virtio$_" } (0 .. ($MAX_VIRTIO_DISKS - 1))));
571}
572
573sub valid_drivename {
574 my $dev = shift;
575
576 return defined ($drivename_hash->{$dev});
577}
578
579sub option_exists {
580 my $key = shift;
581 return defined($confdesc->{$key});
582}
583
584sub nic_models {
585 return $nic_model_list;
586}
587
588sub os_list_description {
589
590 return {
591 other => 'Other',
592 wxp => 'Windows XP',
593 w2k => 'Windows 2000',
594 w2k3 =>, 'Windows 2003',
595 w2k8 => 'Windows 2008',
596 wvista => 'Windows Vista',
597 win7 => 'Windows 7',
598 l24 => 'Linux 2.4',
599 l26 => 'Linux 2.6',
600 };
601}
602
603# a clumsy way to split an argument string into an array,
604# we simply pass it to the cli (exec call)
605# fixme: use Text::ParseWords::shellwords() ?
606sub split_args {
607 my ($str) = @_;
608
609 my $args = [];
610
611 return $args if !$str;
612
613 my $cmd = 'perl -e \'foreach my $a (@ARGV) { print "$a\n"; } \' -- ' . $str;
614
615 eval {
616 run_command ($cmd, outfunc => sub {
617 my $data = shift;
618 push @$args, $data;
619 });
620 };
621
622 my $err = $@;
623
624 die "unable to parse args: $str\n" if $err;
625
626 return $args;
627}
628
629sub disk_devive_info {
630 my $dev = shift;
631
632 die "unknown disk device format '$dev'" if $dev !~ m/^(ide|scsi|virtio)(\d+)$/;
633
634 my $bus = $1;
635 my $index = $2;
636 my $maxdev = 1024;
637
638 if ($bus eq 'ide') {
639 $maxdev = 2;
640 } elsif ($bus eq 'scsi') {
f62db2a4 641 $maxdev = 7;
1e3baf05
DM
642 }
643
644 my $controller = int ($index / $maxdev);
645 my $unit = $index % $maxdev;
646
647
648 return { bus => $bus, desc => uc($bus) . " $controller:$unit",
649 controller => $controller, unit => $unit, index => $index };
650
651}
652
653sub qemu_drive_name {
654 my ($dev, $media) = @_;
655
656 my $info = disk_devive_info ($dev);
657 my $mediastr = '';
658
659 if (($info->{bus} eq 'ide') || ($info->{bus} eq 'scsi')) {
660 $mediastr = ($media eq 'cdrom') ? "-cd" : "-hd";
661 return sprintf("%s%i%s%i", $info->{bus}, $info->{controller},
662 $mediastr, $info->{unit});
663 } else {
664 return sprintf("%s%i", $info->{bus}, $info->{index});
665 }
666}
667
668my $cdrom_path;
669
670sub get_cdrom_path {
671
672 return $cdrom_path if $cdrom_path;
673
674 return $cdrom_path = "/dev/cdrom" if -l "/dev/cdrom";
675 return $cdrom_path = "/dev/cdrom1" if -l "/dev/cdrom1";
676 return $cdrom_path = "/dev/cdrom2" if -l "/dev/cdrom2";
677}
678
679sub get_iso_path {
680 my ($storecfg, $vmid, $cdrom) = @_;
681
682 if ($cdrom eq 'cdrom') {
683 return get_cdrom_path();
684 } elsif ($cdrom eq 'none') {
685 return '';
686 } elsif ($cdrom =~ m|^/|) {
687 return $cdrom;
688 } else {
689 return PVE::Storage::path ($storecfg, $cdrom);
690 }
691}
692
693# try to convert old style file names to volume IDs
694sub filename_to_volume_id {
695 my ($vmid, $file, $media) = @_;
696
697 if (!($file eq 'none' || $file eq 'cdrom' ||
698 $file =~ m|^/dev/.+| || $file =~ m/^([^:]+):(.+)$/)) {
699
700 return undef if $file =~ m|/|;
701
702 if ($media && $media eq 'cdrom') {
703 $file = "local:iso/$file";
704 } else {
705 $file = "local:$vmid/$file";
706 }
707 }
708
709 return $file;
710}
711
712sub verify_media_type {
713 my ($opt, $vtype, $media) = @_;
714
715 return if !$media;
716
717 my $etype;
718 if ($media eq 'disk') {
719 $etype = 'image';
720 } elsif ($media eq 'cdrom') {
721 $etype = 'iso';
722 } else {
723 die "internal error";
724 }
725
726 return if ($vtype eq $etype);
727
728 raise_param_exc({ $opt => "unexpected media type ($vtype != $etype)" });
729}
730
731sub cleanup_drive_path {
732 my ($opt, $storecfg, $drive) = @_;
733
734 # try to convert filesystem paths to volume IDs
735
736 if (($drive->{file} !~ m/^(cdrom|none)$/) &&
737 ($drive->{file} !~ m|^/dev/.+|) &&
738 ($drive->{file} !~ m/^([^:]+):(.+)$/) &&
739 ($drive->{file} !~ m/^\d+$/)) {
740 my ($vtype, $volid) = PVE::Storage::path_to_volume_id($storecfg, $drive->{file});
741 raise_param_exc({ $opt => "unable to associate path '$drive->{file}' to any storage"}) if !$vtype;
742 $drive->{media} = 'cdrom' if !$drive->{media} && $vtype eq 'iso';
743 verify_media_type($opt, $vtype, $drive->{media});
744 $drive->{file} = $volid;
745 }
746
747 $drive->{media} = 'cdrom' if !$drive->{media} && $drive->{file} =~ m/^(cdrom|none)$/;
748}
749
750sub create_conf_nolock {
751 my ($vmid, $settings) = @_;
752
753 my $filename = config_file ($vmid);
754
755 die "configuration file '$filename' already exists\n" if -f $filename;
756
757 my $defaults = load_defaults();
758
759 $settings->{name} = "vm$vmid" if !$settings->{name};
760 $settings->{memory} = $defaults->{memory} if !$settings->{memory};
761
762 my $data = '';
763 foreach my $opt (keys %$settings) {
764 next if !$confdesc->{$opt};
765
766 my $value = $settings->{$opt};
767 next if !$value;
768
769 $data .= "$opt: $value\n";
770 }
771
772 PVE::Tools::file_set_contents($filename, $data);
773}
774
775# ideX = [volume=]volume-id[,media=d][,cyls=c,heads=h,secs=s[,trans=t]]
776# [,snapshot=on|off][,cache=on|off][,format=f][,backup=yes|no]
777# [,aio=native|threads]
778
779sub parse_drive {
780 my ($key, $data) = @_;
781
782 my $res = {};
783
784 # $key may be undefined - used to verify JSON parameters
785 if (!defined($key)) {
786 $res->{interface} = 'unknown'; # should not harm when used to verify parameters
787 $res->{index} = 0;
788 } elsif ($key =~ m/^([^\d]+)(\d+)$/) {
789 $res->{interface} = $1;
790 $res->{index} = $2;
791 } else {
792 return undef;
793 }
794
795 foreach my $p (split (/,/, $data)) {
796 next if $p =~ m/^\s*$/;
797
798 if ($p =~ m/^(file|volume|cyls|heads|secs|trans|media|snapshot|cache|format|rerror|werror|backup|aio)=(.+)$/) {
799 my ($k, $v) = ($1, $2);
800
801 $k = 'file' if $k eq 'volume';
802
803 return undef if defined $res->{$k};
804
805 $res->{$k} = $v;
806 } else {
807 if (!$res->{file} && $p !~ m/=/) {
808 $res->{file} = $p;
809 } else {
810 return undef;
811 }
812 }
813 }
814
815 return undef if !$res->{file};
816
817 return undef if $res->{cache} &&
818 $res->{cache} !~ m/^(off|none|writethrough|writeback)$/;
819 return undef if $res->{snapshot} && $res->{snapshot} !~ m/^(on|off)$/;
820 return undef if $res->{cyls} && $res->{cyls} !~ m/^\d+$/;
821 return undef if $res->{heads} && $res->{heads} !~ m/^\d+$/;
822 return undef if $res->{secs} && $res->{secs} !~ m/^\d+$/;
823 return undef if $res->{media} && $res->{media} !~ m/^(disk|cdrom)$/;
824 return undef if $res->{trans} && $res->{trans} !~ m/^(none|lba|auto)$/;
825 return undef if $res->{format} && $res->{format} !~ m/^(raw|cow|qcow|qcow2|vmdk|cloop)$/;
826 return undef if $res->{rerror} && $res->{rerror} !~ m/^(ignore|report|stop)$/;
827 return undef if $res->{werror} && $res->{werror} !~ m/^(enospc|ignore|report|stop)$/;
828 return undef if $res->{backup} && $res->{backup} !~ m/^(yes|no)$/;
829 return undef if $res->{aio} && $res->{aio} !~ m/^(native|threads)$/;
830
831 if ($res->{media} && ($res->{media} eq 'cdrom')) {
832 return undef if $res->{snapshot} || $res->{trans} || $res->{format};
833 return undef if $res->{heads} || $res->{secs} || $res->{cyls};
834 return undef if $res->{interface} eq 'virtio';
835 }
836
837 # rerror does not work with scsi drives
838 if ($res->{rerror}) {
839 return undef if $res->{interface} eq 'scsi';
840 }
841
842 return $res;
843}
844
845my @qemu_drive_options = qw(heads secs cyls trans media format cache snapshot rerror werror aio);
846
847sub print_drive {
848 my ($vmid, $drive) = @_;
849
850 my $opts = '';
851 foreach my $o (@qemu_drive_options, 'backup') {
852 $opts .= ",$o=$drive->{$o}" if $drive->{$o};
853 }
854
855 return "$drive->{file}$opts";
856}
857
ca916ecc
DA
858sub print_drivedevice_full {
859 my ($storecfg, $vmid, $drive) = @_;
860
861 my $device = '';
862 my $maxdev = 0;
863
864 if ($drive->{interface} eq 'virtio') {
865
866 $device="virtio-blk-pci,drive=drive-$drive->{interface}$drive->{index},id=device-$drive->{interface}$drive->{index}";
867 }
868
869 elsif ($drive->{interface} eq 'scsi') {
870
871 $maxdev = 7;
872 my $controller = int ($drive->{index} / $maxdev);
873 my $unit = $drive->{index} % $maxdev;
874
875 $device="scsi-disk,bus=scsi$controller.0,scsi-id=$unit,drive=drive-$drive->{interface}$drive->{index},id=device-$drive->{interface}$drive->{index}";
876 }
877
878 elsif ($drive->{interface} eq 'ide'){
879
880 $maxdev = 2;
881 my $controller = int ($drive->{index} / $maxdev);
882 my $unit = $drive->{index} % $maxdev;
883
884 $device="ide-drive,bus=ide.$controller,unit=$unit,drive=drive-$drive->{interface}$drive->{index},id=device-$drive->{interface}$drive->{index}";
885 }
886
887 if ($drive->{interface} eq 'usb'){
888 # -device ide-drive,bus=ide.1,unit=0,drive=drive-ide0-1-0,id=ide0-1-0
889 }
890
891 return $device;
892}
893
1e3baf05
DM
894sub print_drive_full {
895 my ($storecfg, $vmid, $drive) = @_;
896
897 my $opts = '';
898 foreach my $o (@qemu_drive_options) {
899 $opts .= ",$o=$drive->{$o}" if $drive->{$o};
900 }
901
902 # use linux-aio by default (qemu default is threads)
903 $opts .= ",aio=native" if !$drive->{aio};
904
905 my $path;
906 my $volid = $drive->{file};
907 if (drive_is_cdrom ($drive)) {
908 $path = get_iso_path ($storecfg, $vmid, $volid);
909 } else {
910 if ($volid =~ m|^/|) {
911 $path = $volid;
912 } else {
913 $path = PVE::Storage::path ($storecfg, $volid);
914 }
915 }
916
917 my $pathinfo = $path ? "file=$path," : '';
918
3ebfcc86 919 return "${pathinfo}if=none,id=drive-$drive->{interface}$drive->{index}$opts";
1e3baf05
DM
920}
921
922
923sub drive_is_cdrom {
924 my ($drive) = @_;
925
926 return $drive && $drive->{media} && ($drive->{media} eq 'cdrom');
927
928}
929
040b06b7
DA
930sub parse_hostpci {
931 my ($value) = @_;
932
933 return undef if !$value;
934
935 my $res = {};
936
937 if ($value =~ m/^[a-f0-9]{2}:[a-f0-9]{2}\.[a-f0-9]$/) {
938 $res->{pciid} = $value;
939 } else {
940 return undef;
941 }
942
943 return $res;
944}
945
1e3baf05
DM
946# netX: e1000=XX:XX:XX:XX:XX:XX,bridge=vmbr0,rate=<mbps>
947sub parse_net {
948 my ($data) = @_;
949
950 my $res = {};
951
952 foreach my $kvp (split (/,/, $data)) {
953
954 if ($kvp =~ m/^(ne2k_pci|e1000|rtl8139|pcnet|virtio|ne2k_isa|i82551|i82557b|i82559er)(=([0-9a-f]{2}(:[0-9a-f]{2}){5}))?$/i) {
955 my $model = lc ($1);
956 my $mac = uc($3) || random_ether_addr ();
957 $res->{model} = $model;
958 $res->{macaddr} = $mac;
959 } elsif ($kvp =~ m/^bridge=(\S+)$/) {
960 $res->{bridge} = $1;
961 } elsif ($kvp =~ m/^rate=(\d+(\.\d+)?)$/) {
962 $res->{rate} = $1;
963 } else {
964 return undef;
965 }
966
967 }
968
969 return undef if !$res->{model};
970
971 return $res;
972}
973
974sub print_net {
975 my $net = shift;
976
977 my $res = "$net->{model}";
978 $res .= "=$net->{macaddr}" if $net->{macaddr};
979 $res .= ",bridge=$net->{bridge}" if $net->{bridge};
980 $res .= ",rate=$net->{rate}" if $net->{rate};
981
982 return $res;
983}
984
985sub add_random_macs {
986 my ($settings) = @_;
987
988 foreach my $opt (keys %$settings) {
989 next if $opt !~ m/^net(\d+)$/;
990 my $net = parse_net($settings->{$opt});
991 next if !$net;
992 $settings->{$opt} = print_net($net);
993 }
994}
995
996sub add_unused_volume {
997 my ($config, $res, $volid) = @_;
998
999 my $key;
1000 for (my $ind = $MAX_UNUSED_DISKS - 1; $ind >= 0; $ind--) {
1001 my $test = "unused$ind";
1002 if (my $vid = $config->{$test}) {
1003 return if $vid eq $volid; # do not add duplicates
1004 } else {
1005 $key = $test;
1006 }
1007 }
1008
1009 die "To many unused volume - please delete them first.\n" if !$key;
1010
1011 $res->{$key} = $volid;
1012}
1013
1014# fixme: remove all thos $noerr parameters?
1015
1016PVE::JSONSchema::register_format('pve-qm-bootdisk', \&verify_bootdisk);
1017sub verify_bootdisk {
1018 my ($value, $noerr) = @_;
1019
1020 return $value if valid_drivename($value);
1021
1022 return undef if $noerr;
1023
1024 die "invalid boot disk '$value'\n";
1025}
1026
1027PVE::JSONSchema::register_format('pve-qm-net', \&verify_net);
1028sub verify_net {
1029 my ($value, $noerr) = @_;
1030
1031 return $value if parse_net($value);
1032
1033 return undef if $noerr;
1034
1035 die "unable to parse network options\n";
1036}
1037
1038PVE::JSONSchema::register_format('pve-qm-drive', \&verify_drive);
1039sub verify_drive {
1040 my ($value, $noerr) = @_;
1041
1042 return $value if parse_drive (undef, $value);
1043
1044 return undef if $noerr;
1045
1046 die "unable to parse drive options\n";
1047}
1048
1049PVE::JSONSchema::register_format('pve-qm-hostpci', \&verify_hostpci);
1050sub verify_hostpci {
1051 my ($value, $noerr) = @_;
1052
040b06b7
DA
1053 return $value if parse_hostpci($value);
1054
1055 return undef if $noerr;
1056
1057 die "unable to parse pci id\n";
1e3baf05
DM
1058}
1059
0ea9541d
DM
1060PVE::JSONSchema::register_format('pve-qm-watchdog', \&verify_watchdog);
1061sub verify_watchdog {
1062 my ($value, $noerr) = @_;
1063
1064 return $value if parse_watchdog($value);
1065
1066 return undef if $noerr;
1067
1068 die "unable to parse watchdog options\n";
1069}
1070
1071sub parse_watchdog {
1072 my ($value) = @_;
1073
1074 return undef if !$value;
1075
1076 my $res = {};
1077
1078 foreach my $p (split (/,/, $value)) {
1079 next if $p =~ m/^\s*$/;
1080
1081 if ($p =~ m/^(model=)?(i6300esb|ib700)$/) {
1082 $res->{model} = $2;
1083 } elsif ($p =~ m/^(action=)?(reset|shutdown|poweroff|pause|debug|none)$/) {
1084 $res->{action} = $2;
1085 } else {
1086 return undef;
1087 }
1088 }
1089
1090 return $res;
1091}
1092
1e3baf05
DM
1093sub parse_usb_device {
1094 my ($value) = @_;
1095
1096 return undef if !$value;
1097
1098 my @dl = split (/,/, $value);
1099 my $found;
1100
1101 my $res = {};
1102 foreach my $v (@dl) {
1103 if ($v =~ m/^host=([0-9A-Fa-f]{4}):([0-9A-Fa-f]{4})$/) {
1104 $found = 1;
1105 $res->{vendorid} = $1;
1106 $res->{productid} = $2;
1107 } elsif ($v =~ m/^host=(\d+)\-(\d+(\.\d+)*)$/) {
1108 $found = 1;
1109 $res->{hostbus} = $1;
1110 $res->{hostport} = $2;
1111 } else {
1112 return undef;
1113 }
1114 }
1115 return undef if !$found;
1116
1117 return $res;
1118}
1119
1120PVE::JSONSchema::register_format('pve-qm-usb-device', \&verify_usb_device);
1121sub verify_usb_device {
1122 my ($value, $noerr) = @_;
1123
1124 return $value if parse_usb_device($value);
1125
1126 return undef if $noerr;
1127
1128 die "unable to parse usb device\n";
1129}
1130
1131PVE::JSONSchema::register_format('pve-qm-parallel', \&verify_parallel);
1132sub verify_parallel {
1133 my ($value, $noerr) = @_;
1134
1135 my @dl = split (/,/, $value);
1136 foreach my $v (@dl) {
1137 if ($v !~ m|^/dev/parport\d+$|) {
1138 return undef if $noerr;
1139 die "invalid device name\n";
1140 }
1141 }
1142 return $value;
1143}
1144
1145PVE::JSONSchema::register_format('pve-qm-serial', \&verify_serial);
1146sub verify_serial {
1147 my ($value, $noerr) = @_;
1148
1149 my @dl = split (/,/, $value);
1150 foreach my $v (@dl) {
1151 if ($v !~ m|^/dev/ttyS\d+$|) {
1152 return undef if $noerr;
1153 die "invalid device name\n";
1154 }
1155 }
1156 return $value;
1157}
1158
1159# add JSON properties for create and set function
1160sub json_config_properties {
1161 my $prop = shift;
1162
1163 foreach my $opt (keys %$confdesc) {
1164 $prop->{$opt} = $confdesc->{$opt};
1165 }
1166
1167 return $prop;
1168}
1169
1170sub check_type {
1171 my ($key, $value) = @_;
1172
1173 die "unknown setting '$key'\n" if !$confdesc->{$key};
1174
1175 my $type = $confdesc->{$key}->{type};
1176
1177 if (!defined ($value)) {
1178 die "got undefined value\n";
1179 }
1180
1181 if ($value =~ m/[\n\r]/) {
1182 die "property contains a line feed\n";
1183 }
1184
1185 if ($type eq 'boolean') {
1186 return 1 if ($value eq '1') || ($value =~ m/^(on|yes|true)$/i);
1187 return 0 if ($value eq '0') || ($value =~ m/^(off|no|false)$/i);
1188 die "type check ('boolean') failed - got '$value'\n";
1189 } elsif ($type eq 'integer') {
1190 return int($1) if $value =~ m/^(\d+)$/;
1191 die "type check ('integer') failed - got '$value'\n";
1192 } elsif ($type eq 'string') {
1193 if (my $fmt = $confdesc->{$key}->{format}) {
1194 if ($fmt eq 'pve-qm-drive') {
1195 # special case - we need to pass $key to parse_drive()
1196 my $drive = parse_drive ($key, $value);
1197 return $value if $drive;
1198 die "unable to parse drive options\n";
1199 }
1200 PVE::JSONSchema::check_format($fmt, $value);
1201 return $value;
1202 }
1203 $value =~ s/^\"(.*)\"$/$1/;
1204 return $value;
1205 } else {
1206 die "internal error"
1207 }
1208}
1209
1210sub lock_config {
1211 my ($vmid, $code, @param) = @_;
1212
1213 my $filename = config_file_lock ($vmid);
1214
1215 lock_file($filename, 10, $code, @param);
1216
1217 die $@ if $@;
1218}
1219
1220sub cfs_config_path {
a78ccf26 1221 my ($vmid, $node) = @_;
1e3baf05 1222
a78ccf26
DM
1223 $node = $nodename if !$node;
1224 return "nodes/$node/qemu-server/$vmid.conf";
1e3baf05
DM
1225}
1226
040b06b7
DA
1227sub check_iommu_support{
1228 #fixme : need to check IOMMU support
1229 #http://www.linux-kvm.org/page/How_to_assign_devices_with_VT-d_in_KVM
1230
1231 my $iommu=1;
1232 return $iommu;
1233
1234}
1235
1e3baf05 1236sub config_file {
a78ccf26 1237 my ($vmid, $node) = @_;
1e3baf05 1238
a78ccf26 1239 my $cfspath = cfs_config_path($vmid, $node);
1e3baf05
DM
1240 return "/etc/pve/$cfspath";
1241}
1242
1243sub config_file_lock {
1244 my ($vmid) = @_;
1245
1246 return "$lock_dir/lock-$vmid.conf";
1247}
1248
1249sub touch_config {
1250 my ($vmid) = @_;
1251
1252 my $conf = config_file ($vmid);
1253 utime undef, undef, $conf;
1254}
1255
1256sub create_disks {
1257 my ($storecfg, $vmid, $settings) = @_;
1258
1259 my $vollist = [];
1260
1261 eval {
1262 foreach_drive($settings, sub {
1263 my ($ds, $disk) = @_;
1264
1265 return if drive_is_cdrom ($disk);
1266
1267 my $file = $disk->{file};
1268
1269 if ($file =~ m/^(([^:\s]+):)?(\d+(\.\d+)?)$/) {
1270 my $storeid = $2 || 'local';
1271 my $size = $3;
1272 my $defformat = PVE::Storage::storage_default_format ($storecfg, $storeid);
1273 my $fmt = $disk->{format} || $defformat;
1274 syslog ('info', "VM $vmid creating new disk - size is $size GB");
1275
1276 my $volid = PVE::Storage::vdisk_alloc ($storecfg, $storeid, $vmid,
1277 $fmt, undef, $size*1024*1024);
1278
1279 $disk->{file} = $volid;
1280 delete ($disk->{format}); # no longer needed
1281 push @$vollist, $volid;
1282 $settings->{$ds} = PVE::QemuServer::print_drive ($vmid, $disk);
1283 } else {
1284 my $path;
1285 if ($disk->{file} =~ m|^/dev/.+|) {
1286 $path = $disk->{file};
1287 } else {
1288 $path = PVE::Storage::path ($storecfg, $disk->{file});
1289 }
1290 if (!(-f $path || -b $path)) {
1291 die "image '$path' does not exists\n";
1292 }
1293 }
1294 });
1295 };
1296
1297 my $err = $@;
1298
1299 if ($err) {
1300 syslog ('err', "VM $vmid creating disks failed");
1301 foreach my $volid (@$vollist) {
1302 eval { PVE::Storage::vdisk_free ($storecfg, $volid); };
1303 warn $@ if $@;
1304 }
1305 die $err;
1306 }
1307
1308 return $vollist;
1309}
1310
1311sub unlink_image {
1312 my ($storecfg, $vmid, $volid) = @_;
1313
1314 die "reject to unlink absolute path '$volid'"
1315 if $volid =~ m|^/|;
1316
1317 my ($path, $owner) = PVE::Storage::path ($storecfg, $volid);
1318
1319 die "reject to unlink '$volid' - not owned by this VM"
1320 if !$owner || ($owner != $vmid);
1321
1322 syslog ('info', "VM $vmid deleting volume '$volid'");
1323
1324 PVE::Storage::vdisk_free ($storecfg, $volid);
1325
1326 touch_config ($vmid);
1327}
1328
1329sub destroy_vm {
1330 my ($storecfg, $vmid) = @_;
1331
1332 my $conffile = config_file ($vmid);
1333
1334 my $conf = load_config ($vmid);
1335
1336 check_lock ($conf);
1337
1338 # only remove disks owned by this VM
1339 foreach_drive($conf, sub {
1340 my ($ds, $drive) = @_;
1341
1342 return if drive_is_cdrom ($drive);
1343
1344 my $volid = $drive->{file};
1345 next if !$volid || $volid =~ m|^/|;
1346
1347 my ($path, $owner) = PVE::Storage::path ($storecfg, $volid);
1348 next if !$path || !$owner || ($owner != $vmid);
1349
1350 PVE::Storage::vdisk_free ($storecfg, $volid);
1351 });
1352
1353 unlink $conffile;
1354
1355 # also remove unused disk
1356 eval {
1357 my $dl = PVE::Storage::vdisk_list ($storecfg, undef, $vmid);
1358
1359 eval {
1360 PVE::Storage::foreach_volid ($dl, sub {
1361 my ($volid, $sid, $volname, $d) = @_;
1362 PVE::Storage::vdisk_free ($storecfg, $volid);
1363 });
1364 };
1365 warn $@ if $@;
1366
1367 };
1368 warn $@ if $@;
1369}
1370
1371# fixme: remove?
1372sub load_diskinfo_old {
1373 my ($storecfg, $vmid, $conf) = @_;
1374
1375 my $info = {};
1376 my $res = {};
1377 my $vollist;
1378
1379 foreach_drive($conf, sub {
1380 my ($ds, $di) = @_;
1381
1382 $res->{$ds} = $di;
1383
1384 return if drive_is_cdrom ($di);
1385
1386 if ($di->{file} =~ m|^/dev/.+|) {
1387 $info->{$di->{file}}->{size} = PVE::Storage::file_size_info ($di->{file});
1388 } else {
1389 push @$vollist, $di->{file};
1390 }
1391 });
1392
1393 eval {
1394 my $dl = PVE::Storage::vdisk_list ($storecfg, undef, $vmid, $vollist);
1395
1396 PVE::Storage::foreach_volid ($dl, sub {
1397 my ($volid, $sid, $volname, $d) = @_;
1398 $info->{$volid} = $d;
1399 });
1400 };
1401 warn $@ if $@;
1402
1403 foreach my $ds (keys %$res) {
1404 my $di = $res->{$ds};
1405
1406 $res->{$ds}->{disksize} = $info->{$di->{file}} ?
1407 $info->{$di->{file}}->{size} / (1024*1024) : 0;
1408 }
1409
1410 return $res;
1411}
1412
1413sub load_config {
1414 my ($vmid) = @_;
1415
1416 my $cfspath = cfs_config_path($vmid);
1417
1418 my $conf = PVE::Cluster::cfs_read_file($cfspath);
1419
1420 die "no such VM ('$vmid')\n" if !defined($conf);
1421
1422 return $conf;
1423}
1424
1425sub parse_vm_config {
1426 my ($filename, $raw) = @_;
1427
1428 return undef if !defined($raw);
1429
554ac7e7
DM
1430 my $res = {
1431 digest => Digest::SHA1::sha1_hex($raw),
1432 };
1e3baf05
DM
1433
1434 $filename =~ m|/qemu-server/(\d+)\.conf$|
1435 || die "got strange filename '$filename'";
1436
1437 my $vmid = $1;
1438
1439 while ($raw && $raw =~ s/^(.*?)(\n|$)//) {
1440 my $line = $1;
1441
1442 next if $line =~ m/^\#/;
1443
1444 next if $line =~ m/^\s*$/;
1445
1446 if ($line =~ m/^(description):\s*(.*\S)\s*$/) {
1447 my $key = $1;
1448 my $value = PVE::Tools::decode_text($2);
1449 $res->{$key} = $value;
1450 } elsif ($line =~ m/^(args):\s*(.*\S)\s*$/) {
1451 my $key = $1;
1452 my $value = $2;
1453 $res->{$key} = $value;
1454 } elsif ($line =~ m/^([a-z][a-z_]*\d*):\s*(\S+)\s*$/) {
1455 my $key = $1;
1456 my $value = $2;
1457 eval { $value = check_type($key, $value); };
1458 if ($@) {
1459 warn "vm $vmid - unable to parse value of '$key' - $@";
1460 } else {
1461 my $fmt = $confdesc->{$key}->{format};
1462 if ($fmt && $fmt eq 'pve-qm-drive') {
1463 my $v = parse_drive($key, $value);
1464 if (my $volid = filename_to_volume_id($vmid, $v->{file}, $v->{media})) {
1465 $v->{file} = $volid;
1466 $value = print_drive ($vmid, $v);
1467 } else {
1468 warn "vm $vmid - unable to parse value of '$key'\n";
1469 next;
1470 }
1471 }
1472
1473 if ($key eq 'cdrom') {
1474 $res->{ide2} = $value;
1475 } else {
1476 $res->{$key} = $value;
1477 }
1478 }
1479 }
1480 }
1481
1482 # convert old smp to sockets
1483 if ($res->{smp} && !$res->{sockets}) {
1484 $res->{sockets} = $res->{smp};
1485 }
1486 delete $res->{smp};
1487
1488 return $res;
1489}
1490
1491sub change_config {
1492 my ($vmid, $settings, $unset, $skiplock) = @_;
1493
1494 lock_config ($vmid, &change_config_nolock, $settings, $unset, $skiplock);
1495}
1496
1497sub change_config_nolock {
1498 my ($vmid, $settings, $unset, $skiplock) = @_;
1499
1500 my $res = {};
1501
1502 $unset->{ide2} = $unset->{cdrom} if $unset->{cdrom};
1503
1504 check_lock($settings) if !$skiplock;
1505
1506 # we do not use 'smp' any longer
1507 if ($settings->{sockets}) {
1508 $unset->{smp} = 1;
1509 } elsif ($settings->{smp}) {
1510 $settings->{sockets} = $settings->{smp};
1511 $unset->{smp} = 1;
1512 }
1513
1514 my $new_volids = {};
1515
1516 foreach my $key (keys %$settings) {
554ac7e7 1517 next if $key eq 'digest';
1e3baf05
DM
1518 my $value = $settings->{$key};
1519 if ($key eq 'description') {
1520 $value = PVE::Tools::encode_text($value);
1521 }
1522 eval { $value = check_type($key, $value); };
1523 die "unable to parse value of '$key' - $@" if $@;
1524 if ($key eq 'cdrom') {
1525 $res->{ide2} = $value;
1526 } else {
1527 $res->{$key} = $value;
1528 }
1529 if (valid_drivename($key)) {
1530 my $drive = PVE::QemuServer::parse_drive($key, $value);
1531 $new_volids->{$drive->{file}} = 1 if $drive && $drive->{file};
1532 }
1533 }
1534
1535 my $filename = config_file($vmid);
1536 my $tmpfn = "$filename.$$.tmp";
1537
1538 my $fh = new IO::File ($filename, "r") ||
1539 die "unable to read config for VM $vmid\n";
1540
1541 my $werror = "unable to write config for VM $vmid\n";
1542
1543 my $out = new IO::File ($tmpfn, "w") || die $werror;
1544
1545 eval {
1546
1547 my $done;
1548
1549 while (my $line = <$fh>) {
1550
1551 if (($line =~ m/^\#/) || ($line =~ m/^\s*$/)) {
1552 die $werror unless print $out $line;
1553 next;
1554 }
1555
1556 if ($line =~ m/^([a-z][a-z_]*\d*):\s*(.*\S)\s*$/) {
1557 my $key = $1;
1558 my $value = $2;
1559
1560 # remove 'unusedX' settings if we re-add a volume
1561 next if $key =~ m/^unused/ && $new_volids->{$value};
1562
1563 # convert 'smp' to 'sockets'
1564 $key = 'sockets' if $key eq 'smp';
1565
1566 next if $done->{$key};
1567 $done->{$key} = 1;
1568
1569 if (defined ($res->{$key})) {
1570 $value = $res->{$key};
1571 delete $res->{$key};
1572 }
1573 if (!defined ($unset->{$key})) {
1574 die $werror unless print $out "$key: $value\n";
1575 }
1576
1577 next;
1578 }
1579
1580 die "unable to parse config file: $line\n";
1581 }
1582
1583 foreach my $key (keys %$res) {
1584
1585 if (!defined ($unset->{$key})) {
1586 die $werror unless print $out "$key: $res->{$key}\n";
1587 }
1588 }
1589 };
1590
1591 my $err = $@;
1592
1593 $fh->close();
1594
1595 if ($err) {
1596 $out->close();
1597 unlink $tmpfn;
1598 die $err;
1599 }
1600
1601 if (!$out->close()) {
1602 $err = "close failed - $!\n";
1603 unlink $tmpfn;
1604 die $err;
1605 }
1606
1607 if (!rename($tmpfn, $filename)) {
1608 $err = "rename failed - $!\n";
1609 unlink $tmpfn;
1610 die $err;
1611 }
1612}
1613
1614sub load_defaults {
1615
1616 my $res = {};
1617
1618 # we use static defaults from our JSON schema configuration
1619 foreach my $key (keys %$confdesc) {
1620 if (defined(my $default = $confdesc->{$key}->{default})) {
1621 $res->{$key} = $default;
1622 }
1623 }
1624
1625 my $conf = PVE::Cluster::cfs_read_file('datacenter.cfg');
1626 $res->{keyboard} = $conf->{keyboard} if $conf->{keyboard};
1627
1628 return $res;
1629}
1630
1631sub config_list {
1632 my $vmlist = PVE::Cluster::get_vmlist();
1633 my $res = {};
1634 return $res if !$vmlist || !$vmlist->{ids};
1635 my $ids = $vmlist->{ids};
1636
1e3baf05
DM
1637 foreach my $vmid (keys %$ids) {
1638 my $d = $ids->{$vmid};
1639 next if !$d->{node} || $d->{node} ne $nodename;
1640 $res->{$vmid}->{exists} = 1;
1641 }
1642 return $res;
1643}
1644
64e13401
DM
1645# test if VM uses local resources (to prevent migration)
1646sub check_local_resources {
1647 my ($conf, $noerr) = @_;
1648
1649 my $loc_res = 0;
1650 # fixme:
1651 die "implement me";
1652 $loc_res = 1 if $conf->{hostusb};
1653 $loc_res = 1 if $conf->{hostpci};
1654 $loc_res = 1 if $conf->{serial};
1655 $loc_res = 1 if $conf->{parallel};
1656
0d29ab3b 1657 foreach my $k (keys %$conf) {
64e13401
DM
1658 $loc_res = 1 if $k =~ m/^(usb|pci)\d+$/;
1659 }
1660
1661 die "VM uses local resources\n" if $loc_res && !$noerr;
1662
1663 return $loc_res;
1664}
1665
1e3baf05
DM
1666sub check_lock {
1667 my ($conf) = @_;
1668
1669 die "VM is locked ($conf->{lock})\n" if $conf->{lock};
1670}
1671
1672sub check_cmdline {
1673 my ($pidfile, $pid) = @_;
1674
1675 my $fh = IO::File->new ("/proc/$pid/cmdline", "r");
1676 if (defined ($fh)) {
1677 my $line = <$fh>;
1678 $fh->close;
1679 return undef if !$line;
1680 my @param = split (/\0/, $line);
1681
1682 my $cmd = $param[0];
1683 return if !$cmd || ($cmd !~ m|kvm$|);
1684
1685 for (my $i = 0; $i < scalar (@param); $i++) {
1686 my $p = $param[$i];
1687 next if !$p;
1688 if (($p eq '-pidfile') || ($p eq '--pidfile')) {
1689 my $p = $param[$i+1];
1690 return 1 if $p && ($p eq $pidfile);
1691 return undef;
1692 }
1693 }
1694 }
1695 return undef;
1696}
1697
1698sub check_running {
1699 my ($vmid) = @_;
1700
1701 my $filename = config_file ($vmid);
1702
1703 die "unable to find configuration file for VM $vmid - no such machine\n"
1704 if ! -f $filename;
1705
1706 my $pidfile = pidfile_name ($vmid);
1707
1708 if (my $fd = IO::File->new ("<$pidfile")) {
1709 my $st = stat ($fd);
1710 my $line = <$fd>;
1711 close ($fd);
1712
1713 my $mtime = $st->mtime;
1714 if ($mtime > time()) {
1715 warn "file '$filename' modified in future\n";
1716 }
1717
1718 if ($line =~ m/^(\d+)$/) {
1719 my $pid = $1;
1720
1721 return $pid if ((-d "/proc/$pid") && check_cmdline ($pidfile, $pid));
1722 }
1723 }
1724
1725 return undef;
1726}
1727
1728sub vzlist {
1729
1730 my $vzlist = config_list();
1731
1732 my $fd = IO::Dir->new ($var_run_tmpdir) || return $vzlist;
1733
1734 while (defined(my $de = $fd->read)) {
1735 next if $de !~ m/^(\d+)\.pid$/;
1736 my $vmid = $1;
1737 next if !defined ($vzlist->{$vmid});
1738 if (my $pid = check_running ($vmid)) {
1739 $vzlist->{$vmid}->{pid} = $pid;
1740 }
1741 }
1742
1743 return $vzlist;
1744}
1745
1746my $storage_timeout_hash = {};
1747
1748sub disksize {
1749 my ($storecfg, $conf) = @_;
1750
1751 my $bootdisk = $conf->{bootdisk};
1752 return undef if !$bootdisk;
1753 return undef if !valid_drivename($bootdisk);
1754
1755 return undef if !$conf->{$bootdisk};
1756
1757 my $drive = parse_drive($bootdisk, $conf->{$bootdisk});
1758 return undef if !defined($drive);
1759
1760 return undef if drive_is_cdrom($drive);
1761
1762 my $volid = $drive->{file};
1763 return undef if !$volid;
1764
1765 my $path;
1766 my $storeid;
1767 my $timeoutid;
1768
1769 if ($volid =~ m|^/|) {
1770 $path = $timeoutid = $volid;
1771 } else {
1772 $storeid = $timeoutid = PVE::Storage::parse_volume_id ($volid);
1773 $path = PVE::Storage::path($storecfg, $volid);
1774 }
1775
1776 my $last_timeout = $storage_timeout_hash->{$timeoutid};
1777 if ($last_timeout) {
1778 if ((time() - $last_timeout) < 30) {
1779 # skip storage with errors
1780 return undef ;
1781 }
1782 delete $storage_timeout_hash->{$timeoutid};
1783 }
1784
1785 my ($size, $format, $used);
1786
1787 ($size, $format, $used) = PVE::Storage::file_size_info($path, 1);
1788
1789 if (!defined($format)) {
1790 # got timeout
1791 $storage_timeout_hash->{$timeoutid} = time();
1792 return undef;
1793 }
1794
1795 return wantarray ? ($size, $used) : $size;
1796}
1797
1798my $last_proc_pid_stat;
1799
1800sub vmstatus {
1801 my ($opt_vmid) = @_;
1802
1803 my $res = {};
1804
1805 my $storecfg = PVE::Storage::config();
1806
1807 my $list = vzlist();
1808 my ($uptime) = PVE::ProcFSTools::read_proc_uptime();
1809
1810 foreach my $vmid (keys %$list) {
1811 next if $opt_vmid && ($vmid ne $opt_vmid);
1812
1813 my $cfspath = cfs_config_path($vmid);
1814 my $conf = PVE::Cluster::cfs_read_file($cfspath) || {};
1815
1816 my $d = {};
1817 $d->{pid} = $list->{$vmid}->{pid};
1818
1819 # fixme: better status?
1820 $d->{status} = $list->{$vmid}->{pid} ? 'running' : 'stopped';
1821
1822 my ($size, $used) = disksize($storecfg, $conf);
1823 if (defined($size) && defined($used)) {
1824 $d->{disk} = $used;
1825 $d->{maxdisk} = $size;
1826 } else {
1827 $d->{disk} = 0;
1828 $d->{maxdisk} = 0;
1829 }
1830
1831 $d->{cpus} = ($conf->{sockets} || 1) * ($conf->{cores} || 1);
1832 $d->{name} = $conf->{name} || "VM $vmid";
1833 $d->{maxmem} = $conf->{memory} ? $conf->{memory}*(1024*1024) : 0;
1834
1835
1836 $d->{uptime} = 0;
1837 $d->{cpu} = 0;
1838 $d->{relcpu} = 0;
1839 $d->{mem} = 0;
1840
1841 $d->{netout} = 0;
1842 $d->{netin} = 0;
1843
1844 $d->{diskread} = 0;
1845 $d->{diskwrite} = 0;
1846
1847 $res->{$vmid} = $d;
1848 }
1849
1850 my $netdev = PVE::ProcFSTools::read_proc_net_dev();
1851 foreach my $dev (keys %$netdev) {
1852 next if $dev !~ m/^tap([1-9]\d*)i/;
1853 my $vmid = $1;
1854 my $d = $res->{$vmid};
1855 next if !$d;
1856
1857 $d->{netout} += $netdev->{$dev}->{receive};
1858 $d->{netin} += $netdev->{$dev}->{transmit};
1859 }
1860
1861 my $cpuinfo = PVE::ProcFSTools::read_cpuinfo();
1862 my $cpucount = $cpuinfo->{cpus} || 1;
1863 my $ctime = gettimeofday;
1864
1865 foreach my $vmid (keys %$list) {
1866
1867 my $d = $res->{$vmid};
1868 my $pid = $d->{pid};
1869 next if !$pid;
1870
1871 if (my $fh = IO::File->new("/proc/$pid/io", "r")) {
1872 my $data = {};
1873 while (defined (my $line = <$fh>)) {
1874 if ($line =~ m/^([rw]char):\s+(\d+)$/) {
1875 $data->{$1} = $2;
1876 }
1877 }
1878 close($fh);
1879 $d->{diskread} = $data->{rchar} || 0;
1880 $d->{diskwrite} = $data->{wchar} || 0;
1881 }
1882
1883 my $statstr = file_read_firstline("/proc/$pid/stat");
1884 next if !$statstr;
1885
1886 my ($utime, $stime, $vsize, $rss, $starttime);
1887 if ($statstr =~ m/^$pid \(.*\) \S (-?\d+) -?\d+ -?\d+ -?\d+ -?\d+ \d+ \d+ \d+ \d+ \d+ (\d+) (\d+) (-?\d+) (-?\d+) -?\d+ -?\d+ -?\d+ 0 (\d+) (\d+) (-?\d+) \d+ \d+ \d+ \d+ \d+ \d+ \d+ \d+ \d+ \d+ \d+ \d+ \d+ -?\d+ -?\d+ \d+ \d+ \d+/) {
1888 ($utime, $stime, $vsize, $rss, $starttime) = ($2, $3, $7, $8 * 4096, $6);
1889 } else {
1890 next;
1891 }
1892
1893 my $used = $utime + $stime;
1894
1895 my $vcpus = $d->{cpus} > $cpucount ? $cpucount : $d->{cpus};
1896
1897 $d->{uptime} = int ($uptime - ($starttime/100));
1898
1899 if ($vsize) {
1900 $d->{mem} = int (($rss/$vsize)*$d->{maxmem});
1901 }
1902
1903 my $old = $last_proc_pid_stat->{$pid};
1904 if (!$old) {
1905 $last_proc_pid_stat->{$pid} = {
1906 time => $ctime,
1907 used => $used,
1908 cpu => 0,
1909 relcpu => 0,
1910 };
1911 next;
1912 }
1913
1914 my $dtime = ($ctime - $old->{time}) * $cpucount * $clock_ticks;
1915
1916 if ($dtime > 1000) {
1917 my $dutime = $used - $old->{used};
1918
1919 $d->{cpu} = $dutime/$dtime;
1920 $d->{relcpu} = ($d->{cpu} * $cpucount) / $vcpus;
1921 $last_proc_pid_stat->{$pid} = {
1922 time => $ctime,
1923 used => $used,
1924 cpu => $d->{cpu},
1925 relcpu => $d->{relcpu},
1926 };
1927 } else {
1928 $d->{cpu} = $old->{cpu};
1929 $d->{relcpu} = $old->{relcpu};
1930 }
1931 }
1932
1933 return $res;
1934}
1935
1936sub foreach_drive {
1937 my ($conf, $func) = @_;
1938
1939 foreach my $ds (keys %$conf) {
1940 next if !valid_drivename($ds);
1941
1942 my $drive = parse_drive ($ds, $conf->{$ds});
1943 next if !$drive;
1944
1945 &$func($ds, $drive);
1946 }
1947}
1948
1949sub config_to_command {
1950 my ($storecfg, $vmid, $conf, $defaults, $migrate_uri) = @_;
1951
1952 my $cmd = [];
1953
1954 my $kvmver = kvm_user_version();
1955 my $vernum = 0; # unknown
1956 if ($kvmver =~ m/^(\d+)\.(\d+)\.(\d+)$/) {
1957 $vernum = $1*1000000+$2*1000+$3;
1958 }
1959
1960 die "detected old qemu-kvm binary ($kvmver)\n" if $vernum < 14000;
1961
1962 my $have_ovz = -f '/proc/vz/vestat';
1963
1964 push @$cmd, '/usr/bin/kvm';
1965
1966 push @$cmd, '-id', $vmid;
1967
1968 my $use_virtio = 0;
1969
1970 my $socket = monitor_socket ($vmid);
abb39b66
DA
1971 push @$cmd, '-chardev', "socket,id=monitor,path=$socket,server,nowait";
1972 push @$cmd, '-mon', "chardev=monitor,mode=readline";
1e3baf05
DM
1973
1974 $socket = vnc_socket ($vmid);
1975 push @$cmd, '-vnc', "unix:$socket,x509,password";
1976
1977 push @$cmd, '-pidfile' , pidfile_name ($vmid);
1978
1979 push @$cmd, '-daemonize';
1980
1981 push @$cmd, '-incoming', $migrate_uri if $migrate_uri;
1982
1983 # include usb device config
1984 push @$cmd, '-readconfig', '/usr/share/qemu-server/pve-usb.cfg';
1985
1986 # enable absolute mouse coordinates (needed by vnc)
1987 my $tablet = defined ($conf->{tablet}) ? $conf->{tablet} : $defaults->{tablet};
1988 push @$cmd, '-device', 'usb-tablet,bus=ehci.0,port=6' if $tablet;
1989
1990 # host pci devices
040b06b7
DA
1991 for (my $i = 0; $i < $MAX_HOSTPCI_DEVICES; $i++) {
1992 my $d = parse_hostpci($conf->{"hostpci$i"});
1993 next if !$d;
1994 push @$cmd, '-device', "pci-assign,host=$d->{pciid},id=hostpci$i";
1e3baf05
DM
1995 }
1996
1997 # usb devices
1998 for (my $i = 0; $i < $MAX_USB_DEVICES; $i++) {
1999 my $d = parse_usb_device($conf->{"usb$i"});
2000 next if !$d;
2001 if ($d->{vendorid} && $d->{productid}) {
2002 push @$cmd, '-device', "usb-host,vendorid=$d->{vendorid},productid=$d->{productid}";
2003 } elsif (defined($d->{hostbus}) && defined($d->{hostport})) {
2004 push @$cmd, '-device', "usb-host,hostbus=$d->{hostbus},hostport=$d->{hostport}";
2005 }
2006 }
2007
2008 if (my $usbdl = $conf->{hostusb}) {
2009 my @dl = split (/,/, $usbdl);
2010 foreach my $dev (@dl) {
2011 push @$cmd, '-usbdevice', "host:$dev" if $dev;
2012 }
2013 }
2014
2015 # serial devices
2016 if (my $serdl = $conf->{serial}) {
2017 my @dl = split (/,/, $serdl);
2018 foreach my $dev (@dl) {
2019 next if !$dev;
2020 if (-c $dev) {
2021 push @$cmd, '-serial', "$dev";
2022 }
2023 }
2024 }
2025
2026 # parallel devices
2027 if (my $pardl = $conf->{parallel}) {
2028 my @dl = split (/,/, $pardl);
2029 foreach my $dev (@dl) {
2030 next if !$dev;
2031 if (-c $dev) {
2032 push @$cmd, '-parallel', "$dev";
2033 }
2034 }
2035 }
2036
2037 my $vmname = $conf->{name} || "vm$vmid";
2038
2039 push @$cmd, '-name', $vmname;
2040
2041 my $sockets = 1;
2042 $sockets = $conf->{smp} if $conf->{smp}; # old style - no longer iused
2043 $sockets = $conf->{sockets} if $conf->{sockets};
2044
2045 my $cores = $conf->{cores} || 1;
2046
2047 my $boot_opt;
2048
2049 push @$cmd, '-smp', "sockets=$sockets,cores=$cores";
2050
2051 push @$cmd, '-cpu', $conf->{cpu} if $conf->{cpu};
2052
2053 $boot_opt = "menu=on";
2054 if ($conf->{boot}) {
2055 $boot_opt .= ",order=$conf->{boot}";
2056 }
2057
2058 push @$cmd, '-nodefaults';
2059
2060 push @$cmd, '-boot', $boot_opt if $boot_opt;
2061
2062 push @$cmd, '-no-acpi' if defined ($conf->{acpi}) && $conf->{acpi} == 0;
2063
2064 push @$cmd, '-no-reboot' if defined ($conf->{reboot}) && $conf->{reboot} == 0;
2065
2066 my $vga = $conf->{vga};
2067 if (!$vga) {
2068 if ($conf->{ostype} && ($conf->{ostype} eq 'win7' || $conf->{ostype} eq 'w2k8')) {
2069 $vga = 'std';
2070 } else {
2071 $vga = 'cirrus';
2072 }
2073 }
2074
2075 push @$cmd, '-vga', $vga if $vga; # for kvm 77 and later
2076
2077 # time drift fix
2078 my $tdf = defined ($conf->{tdf}) ? $conf->{tdf} : $defaults->{tdf};
2079 push @$cmd, '-tdf' if $tdf;
2080
2081 my $nokvm = defined ($conf->{kvm}) && $conf->{kvm} == 0 ? 1 : 0;
2082
2083 if (my $ost = $conf->{ostype}) {
2084 # other, wxp, w2k, w2k3, w2k8, wvista, win7, l24, l26
2085
2086 if ($ost =~ m/^w/) { # windows
2087 push @$cmd, '-localtime' if !defined ($conf->{localtime});
2088
2089 # use rtc-td-hack when acpi is enabled
2090 if (!(defined ($conf->{acpi}) && $conf->{acpi} == 0)) {
2091 push @$cmd, '-rtc-td-hack';
2092 }
2093 }
2094
2095 # -tdf ?
2096 # -no-acpi
2097 # -no-kvm
2098 # -win2k-hack ?
2099 }
2100
2101 push @$cmd, '-no-kvm' if $nokvm;
2102
2103 push @$cmd, '-localtime' if $conf->{localtime};
2104
2105 push @$cmd, '-startdate', $conf->{startdate} if $conf->{startdate};
2106
2107 push @$cmd, '-S' if $conf->{freeze};
2108
2109 # set keyboard layout
2110 my $kb = $conf->{keyboard} || $defaults->{keyboard};
2111 push @$cmd, '-k', $kb if $kb;
2112
2113 # enable sound
2114 #my $soundhw = $conf->{soundhw} || $defaults->{soundhw};
2115 #push @$cmd, '-soundhw', 'es1370';
2116 #push @$cmd, '-soundhw', $soundhw if $soundhw;
2117
0ea9541d
DM
2118 if ($conf->{watchdog}) {
2119 my $wdopts = parse_watchdog($conf->{watchdog});
2120 push @$cmd, '-watchdog', $wdopts->{model} || 'i6300esb';
2121 push @$cmd, '-watchdog-action', $wdopts->{action} if $wdopts->{action};
2122 }
2123
1e3baf05 2124 my $vollist = [];
941e0c42 2125 my $scsicontroller = {};
1e3baf05
DM
2126
2127 foreach_drive($conf, sub {
2128 my ($ds, $drive) = @_;
2129
2130 eval {
2131 PVE::Storage::parse_volume_id ($drive->{file});
2132 push @$vollist, $drive->{file};
2133 }; # ignore errors
2134
2135 $use_virtio = 1 if $ds =~ m/^virtio/;
941e0c42
DA
2136 if ($drive->{interface} eq 'scsi') {
2137 my $maxdev = 7;
2138 my $controller = int ($drive->{index} / $maxdev);
2139 push @$cmd, '-device', "lsi,id=scsi$controller" if !$scsicontroller->{$controller};
2140 my $scsicontroller->{$controller}=1;
2141 }
1e3baf05
DM
2142 my $tmp = print_drive_full ($storecfg, $vmid, $drive);
2143 $tmp .= ",boot=on" if $conf->{bootdisk} && ($conf->{bootdisk} eq $ds);
2144 push @$cmd, '-drive', $tmp;
ca916ecc 2145 push @$cmd, '-device',print_drivedevice_full ($storecfg,$vmid, $drive);
1e3baf05
DM
2146 });
2147
2148 push @$cmd, '-m', $conf->{memory} || $defaults->{memory};
2149
2150 my $foundnet = 0;
2151
2152 foreach my $k (sort keys %$conf) {
2153 next if $k !~ m/^net(\d+)$/;
2154 my $i = int ($1);
2155
2156 die "got strange net id '$i'\n" if $i >= ${MAX_NETS};
2157
2158 if ($conf->{"net$i"} && (my $net = parse_net($conf->{"net$i"}))) {
2159
2160 $foundnet = 1;
2161
2162 my $ifname = "tap${vmid}i$i";
2163
2164 # kvm uses TUNSETIFF ioctl, and that limits ifname length
2165 die "interface name '$ifname' is too long (max 15 character)\n"
2166 if length($ifname) >= 16;
2167
2168 my $device = $net->{model};
2169 my $vhostparam = '';
2170 if ($net->{model} eq 'virtio') {
2171 $use_virtio = 1;
2172 $device = 'virtio-net-pci';
2173 $vhostparam = ',vhost=on' if $kernel_has_vhost_net;
2174 };
2175
2176 if ($net->{bridge}) {
2177 push @$cmd, '-netdev', "type=tap,id=${k},ifname=${ifname},script=/var/lib/qemu-server/pve-bridge$vhostparam";
2178 } else {
2179 push @$cmd, '-netdev', "type=user,id=${k},hostname=$vmname";
2180 }
2181
2182 # qemu > 0.15 always try to boot from network - we disable that by
2183 # not loading the pxe rom file
2184 my $extra = (!$conf->{boot} || ($conf->{boot} !~ m/n/)) ?
2185 "romfile=," : '';
2186 push @$cmd, '-device', "$device,${extra}mac=$net->{macaddr},netdev=${k}";
2187 }
2188 }
2189
2190 push @$cmd, '-net', 'none' if !$foundnet;
2191
2192 # hack: virtio with fairsched is unreliable, so we do not use fairsched
2193 # when the VM uses virtio devices.
2194 if (!$use_virtio && $have_ovz) {
2195
2196 my $cpuunits = defined ($conf->{cpuunits}) ?
2197 $conf->{cpuunits} : $defaults->{cpuunits};
2198
2199 push @$cmd, '-cpuunits', $cpuunits if $cpuunits;
2200
2201 # fixme: cpulimit is currently ignored
2202 #push @$cmd, '-cpulimit', $conf->{cpulimit} if $conf->{cpulimit};
2203 }
2204
2205 # add custom args
2206 if ($conf->{args}) {
2207 my $aa = split_args ($conf->{args});
2208 push @$cmd, @$aa;
2209 }
2210
2211 return wantarray ? ($cmd, $vollist) : $cmd;
2212}
2213
2214sub vnc_socket {
2215 my ($vmid) = @_;
2216 return "${var_run_tmpdir}/$vmid.vnc";
2217}
2218
2219sub monitor_socket {
2220 my ($vmid) = @_;
2221 return "${var_run_tmpdir}/$vmid.mon";
2222}
2223
2224sub pidfile_name {
2225 my ($vmid) = @_;
2226 return "${var_run_tmpdir}/$vmid.pid";
2227}
2228
2229sub random_ether_addr {
2230
2231 my $rand = Digest::SHA1::sha1_hex (rand(), time());
2232
2233 my $mac = '';
2234 for (my $i = 0; $i < 6; $i++) {
2235 my $ss = hex (substr ($rand, $i*2, 2));
2236 if (!$i) {
2237 $ss &= 0xfe; # clear multicast
2238 $ss |= 2; # set local id
2239 }
2240 $ss = sprintf ("%02X", $ss);
2241
2242 if (!$i) {
2243 $mac .= "$ss";
2244 } else {
2245 $mac .= ":$ss";
2246 }
2247 }
2248
2249 return $mac;
2250}
2251
2252sub next_migrate_port {
2253
2254 for (my $p = 60000; $p < 60010; $p++) {
2255
2256 my $sock = IO::Socket::INET->new (Listen => 5,
2257 LocalAddr => 'localhost',
2258 LocalPort => $p,
2259 ReuseAddr => 1,
2260 Proto => 0);
2261
2262 if ($sock) {
2263 close ($sock);
2264 return $p;
2265 }
2266 }
2267
2268 die "unable to find free migration port";
2269}
2270
2271sub vm_start {
2272 my ($storecfg, $vmid, $statefile, $skiplock) = @_;
2273
2274 lock_config ($vmid, sub {
2275 my $conf = load_config ($vmid);
2276
2277 check_lock ($conf) if !$skiplock;
2278
2279 if (check_running ($vmid)) {
2280 my $msg = "VM $vmid already running - start failed\n" ;
2281 syslog ('err', $msg);
2282 die $msg;
2283 } else {
2284 syslog ('info', "VM $vmid start");
2285 }
2286
2287 my $migrate_uri;
2288 my $migrate_port = 0;
2289
2290 if ($statefile) {
2291 if ($statefile eq 'tcp') {
2292 $migrate_port = next_migrate_port();
2293 $migrate_uri = "tcp:localhost:${migrate_port}";
2294 } else {
2295 if (-f $statefile) {
2296 $migrate_uri = "exec:cat $statefile";
2297 } else {
2298 warn "state file '$statefile' does not exist - doing normal startup\n";
2299 }
2300 }
2301 }
2302
2303 my $defaults = load_defaults();
2304
2305 my ($cmd, $vollist) = config_to_command ($storecfg, $vmid, $conf, $defaults, $migrate_uri);
2306 # host pci devices
040b06b7
DA
2307 for (my $i = 0; $i < $MAX_HOSTPCI_DEVICES; $i++) {
2308 my $d = parse_hostpci($conf->{"hostpci$i"});
2309 next if !$d;
2310 my $info = pci_device_info("0000:$d->{pciid}");
2311 die "IOMMU not present\n" if !check_iommu_support();
2312 die "no pci device info for device '$d->{pciid}'\n" if !$info;
2313 die "can't unbind pci device '$d->{pciid}'\n" if !pci_dev_bind_to_stub($info);
2314 die "can't reset pci device '$d->{pciid}'\n" if !pci_dev_reset($info);
2315 }
1e3baf05
DM
2316
2317 PVE::Storage::activate_volumes($storecfg, $vollist);
2318
2319 eval { run_command ($cmd, timeout => $migrate_uri ? undef : 30); };
2320
2321 my $err = $@;
2322
2323 if ($err) {
2324 my $msg = "start failed: $err";
2325 syslog ('err', "VM $vmid $msg");
2326 die $msg;
2327 }
2328
2329 if ($statefile) {
2330
2331 if ($statefile eq 'tcp') {
2332 print "migration listens on port $migrate_port\n";
2333 } else {
2334 unlink $statefile;
2335 # fixme: send resume - is that necessary ?
2336 eval { vm_monitor_command ($vmid, "cont", 1) };
2337 }
2338 }
2339
2340 if (my $migrate_speed =
2341 $conf->{migrate_speed} || $defaults->{migrate_speed}) {
2342 my $cmd = "migrate_set_speed ${migrate_speed}m";
2343 eval { vm_monitor_command ($vmid, $cmd, 1); };
2344 }
2345
2346 if (my $migrate_downtime =
2347 $conf->{migrate_downtime} || $defaults->{migrate_downtime}) {
2348 my $cmd = "migrate_set_downtime ${migrate_downtime}";
2349 eval { vm_monitor_command ($vmid, $cmd, 1); };
2350 }
2351 });
2352}
2353
2354sub __read_avail {
2355 my ($fh, $timeout) = @_;
2356
2357 my $sel = new IO::Select;
2358 $sel->add ($fh);
2359
2360 my $res = '';
2361 my $buf;
2362
2363 my @ready;
2364 while (scalar (@ready = $sel->can_read ($timeout))) {
2365 my $count;
2366 if ($count = $fh->sysread ($buf, 8192)) {
2367 if ($buf =~ /^(.*)\(qemu\) $/s) {
2368 $res .= $1;
2369 last;
2370 } else {
2371 $res .= $buf;
2372 }
2373 } else {
2374 if (!defined ($count)) {
2375 die "$!\n";
2376 }
2377 last;
2378 }
2379 }
2380
2381 die "monitor read timeout\n" if !scalar (@ready);
2382
2383 return $res;
2384}
2385
2386sub vm_monitor_command {
2387 my ($vmid, $cmdstr, $nolog) = @_;
2388
2389 my $res;
2390
2391 syslog ("info", "VM $vmid monitor command '$cmdstr'") if !$nolog;
2392
2393 eval {
2394 die "VM not running\n" if !check_running ($vmid);
2395
2396 my $sname = monitor_socket ($vmid);
2397
2398 my $sock = IO::Socket::UNIX->new ( Peer => $sname ) ||
2399 die "unable to connect to VM $vmid socket - $!\n";
2400
2401 my $timeout = 3;
2402
2403 # hack: migrate sometime blocks the monitor (when migrate_downtime
2404 # is set)
2405 if ($cmdstr =~ m/^(info\s+migrate|migrate\s)/) {
2406 $timeout = 60*60; # 1 hour
2407 }
2408
2409 # read banner;
2410 my $data = __read_avail ($sock, $timeout);
2411
2412 if ($data !~ m/^QEMU\s+(\S+)\s+monitor\s/) {
2413 die "got unexpected qemu monitor banner\n";
2414 }
2415
2416 my $sel = new IO::Select;
2417 $sel->add ($sock);
2418
2419 if (!scalar (my @ready = $sel->can_write ($timeout))) {
2420 die "monitor write error - timeout";
2421 }
2422
2423 my $fullcmd = "$cmdstr\r";
2424
2425 my $b;
2426 if (!($b = $sock->syswrite ($fullcmd)) || ($b != length ($fullcmd))) {
2427 die "monitor write error - $!";
2428 }
2429
2430 return if ($cmdstr eq 'q') || ($cmdstr eq 'quit');
2431
2432 $timeout = 20;
2433
2434 if ($cmdstr =~ m/^(info\s+migrate|migrate\s)/) {
2435 $timeout = 60*60; # 1 hour
2436 } elsif ($cmdstr =~ m/^(eject|change)/) {
2437 $timeout = 60; # note: cdrom mount command is slow
2438 }
2439 if ($res = __read_avail ($sock, $timeout)) {
2440
2441 my @lines = split ("\r?\n", $res);
2442
2443 shift @lines if $lines[0] !~ m/^unknown command/; # skip echo
2444
2445 $res = join ("\n", @lines);
2446 $res .= "\n";
2447 }
2448 };
2449
2450 my $err = $@;
2451
2452 if ($err) {
2453 syslog ("err", "VM $vmid monitor command failed - $err");
2454 die $err;
2455 }
2456
2457 return $res;
2458}
2459
2460sub vm_commandline {
2461 my ($storecfg, $vmid) = @_;
2462
2463 my $conf = load_config ($vmid);
2464
2465 my $defaults = load_defaults();
2466
2467 my $cmd = config_to_command ($storecfg, $vmid, $conf, $defaults);
2468
2469 return join (' ', @$cmd);
2470}
2471
2472sub vm_reset {
2473 my ($vmid, $skiplock) = @_;
2474
2475 lock_config ($vmid, sub {
2476
2477 my $conf = load_config ($vmid);
2478
2479 check_lock ($conf) if !$skiplock;
2480
2481 syslog ("info", "VM $vmid sending 'reset'");
2482
2483 vm_monitor_command ($vmid, "system_reset", 1);
2484 });
2485}
2486
2487sub vm_shutdown {
2488 my ($vmid, $skiplock) = @_;
2489
2490 lock_config ($vmid, sub {
2491
2492 my $conf = load_config ($vmid);
2493
2494 check_lock ($conf) if !$skiplock;
2495
2496 syslog ("info", "VM $vmid sending 'shutdown'");
2497
2498 vm_monitor_command ($vmid, "system_powerdown", 1);
2499 });
2500}
2501
2502sub vm_stop {
2503 my ($vmid, $skiplock) = @_;
2504
2505 lock_config ($vmid, sub {
2506
2507 my $pid = check_running ($vmid);
2508
2509 if (!$pid) {
2510 syslog ('info', "VM $vmid already stopped");
2511 return;
2512 }
2513
2514 my $conf = load_config ($vmid);
2515
2516 check_lock ($conf) if !$skiplock;
2517
2518 syslog ("info", "VM $vmid stopping");
2519
2520 eval { vm_monitor_command ($vmid, "quit", 1); };
2521
2522 my $err = $@;
2523
2524 if (!$err) {
2525 # wait some time
2526 my $timeout = 50; # fixme: how long?
2527
2528 my $count = 0;
2529 while (($count < $timeout) && check_running ($vmid)) {
2530 $count++;
2531 sleep 1;
2532 }
2533
2534 if ($count >= $timeout) {
2535 syslog ('info', "VM $vmid still running - terminating now with SIGTERM");
2536 kill 15, $pid;
2537 }
2538 } else {
2539 syslog ('info', "VM $vmid quit failed - terminating now with SIGTERM");
2540 kill 15, $pid;
2541 }
2542
2543 # wait again
2544 my $timeout = 10;
2545
2546 my $count = 0;
2547 while (($count < $timeout) && check_running ($vmid)) {
2548 $count++;
2549 sleep 1;
2550 }
2551
2552 if ($count >= $timeout) {
2553 syslog ('info', "VM $vmid still running - terminating now with SIGKILL\n");
2554 kill 9, $pid;
2555 }
2556
2557 fairsched_rmnod ($vmid); # try to destroy group
2558 });
2559}
2560
2561sub vm_suspend {
2562 my ($vmid, $skiplock) = @_;
2563
2564 lock_config ($vmid, sub {
2565
2566 my $conf = load_config ($vmid);
2567
2568 check_lock ($conf) if !$skiplock;
2569
2570 syslog ("info", "VM $vmid suspend");
2571
2572 vm_monitor_command ($vmid, "stop", 1);
2573 });
2574}
2575
2576sub vm_resume {
2577 my ($vmid, $skiplock) = @_;
2578
2579 lock_config ($vmid, sub {
2580
2581 my $conf = load_config ($vmid);
2582
2583 check_lock ($conf) if !$skiplock;
2584
2585 syslog ("info", "VM $vmid resume");
2586
2587 vm_monitor_command ($vmid, "cont", 1);
2588 });
2589}
2590
2591sub vm_cad {
2592 my ($vmid, $skiplock) = @_;
2593
2594 lock_config ($vmid, sub {
2595
2596 my $conf = load_config ($vmid);
2597
2598 check_lock ($conf) if !$skiplock;
2599
2600 syslog ("info", "VM $vmid sending cntl-alt-delete");
2601
2602 vm_monitor_command ($vmid, "sendkey ctrl-alt-delete", 1);
2603 });
2604}
2605
2606sub vm_destroy {
2607 my ($storecfg, $vmid, $skiplock) = @_;
2608
2609 lock_config ($vmid, sub {
2610
2611 my $conf = load_config ($vmid);
2612
2613 check_lock ($conf) if !$skiplock;
2614
2615 syslog ("info", "VM $vmid destroy called (removing all data)");
2616
2617 eval {
2618 if (!check_running($vmid)) {
2619 fairsched_rmnod($vmid); # try to destroy group
2620 destroy_vm($storecfg, $vmid);
2621 } else {
2622 die "VM is running\n";
2623 }
2624 };
2625
2626 my $err = $@;
2627
2628 if ($err) {
2629 syslog ("err", "VM $vmid destroy failed - $err");
2630 die $err;
2631 }
2632 });
2633}
2634
2635sub vm_stopall {
2636 my ($timeout) = @_;
2637
2638 $timeout = 3*60 if !$timeout;
2639
2640 my $vzlist = vzlist();
2641 my $count = 0;
2642 foreach my $vmid (keys %$vzlist) {
2643 next if !$vzlist->{$vmid}->{pid};
2644 $count++;
2645 }
2646
2647 if ($count) {
2648
2649 my $msg = "Stopping Qemu Server - sending shutdown requests to all VMs\n";
2650 syslog ('info', $msg);
2651 print STDERR $msg;
2652
2653 foreach my $vmid (keys %$vzlist) {
2654 next if !$vzlist->{$vmid}->{pid};
2655 eval { vm_shutdown ($vmid, 1); };
2656 print STDERR $@ if $@;
2657 }
2658
2659 my $wt = 5;
2660 my $maxtries = int (($timeout + $wt -1)/$wt);
2661 my $try = 0;
2662 while (($try < $maxtries) && $count) {
2663 $try++;
2664 sleep $wt;
2665
2666 $vzlist = vzlist();
2667 $count = 0;
2668 foreach my $vmid (keys %$vzlist) {
2669 next if !$vzlist->{$vmid}->{pid};
2670 $count++;
2671 }
2672 last if !$count;
2673 }
2674
2675 return if !$count;
2676
2677 foreach my $vmid (keys %$vzlist) {
2678 next if !$vzlist->{$vmid}->{pid};
2679
2680 $msg = "VM $vmid still running - sending stop now\n";
2681 syslog ('info', $msg);
2682 print $msg;
2683
2684 eval { vm_monitor_command ($vmid, "quit", 1); };
2685 print STDERR $@ if $@;
2686
2687 }
2688
2689 $timeout = 30;
2690 $maxtries = int (($timeout + $wt -1)/$wt);
2691 $try = 0;
2692 while (($try < $maxtries) && $count) {
2693 $try++;
2694 sleep $wt;
2695
2696 $vzlist = vzlist();
2697 $count = 0;
2698 foreach my $vmid (keys %$vzlist) {
2699 next if !$vzlist->{$vmid}->{pid};
2700 $count++;
2701 }
2702 last if !$count;
2703 }
2704
2705 return if !$count;
2706
2707 foreach my $vmid (keys %$vzlist) {
2708 next if !$vzlist->{$vmid}->{pid};
2709
2710 $msg = "VM $vmid still running - terminating now with SIGTERM\n";
2711 syslog ('info', $msg);
2712 print $msg;
2713 kill 15, $vzlist->{$vmid}->{pid};
2714 }
2715
2716 # this is called by system shotdown scripts, so remaining
2717 # processes gets killed anyways (no need to send kill -9 here)
2718
2719 $msg = "Qemu Server stopped\n";
2720 syslog ('info', $msg);
2721 print STDERR $msg;
2722 }
2723}
2724
2725# pci helpers
2726
2727sub file_write {
2728 my ($filename, $buf) = @_;
2729
2730 my $fh = IO::File->new ($filename, "w");
2731 return undef if !$fh;
2732
2733 my $res = print $fh $buf;
2734
2735 $fh->close();
2736
2737 return $res;
2738}
2739
2740sub pci_device_info {
2741 my ($name) = @_;
2742
2743 my $res;
2744
2745 return undef if $name !~ m/^([a-f0-9]{4}):([a-f0-9]{2}):([a-f0-9]{2})\.([a-f0-9])$/;
2746 my ($domain, $bus, $slot, $func) = ($1, $2, $3, $4);
2747
2748 my $irq = file_read_firstline("$pcisysfs/devices/$name/irq");
2749 return undef if !defined($irq) || $irq !~ m/^\d+$/;
2750
2751 my $vendor = file_read_firstline("$pcisysfs/devices/$name/vendor");
2752 return undef if !defined($vendor) || $vendor !~ s/^0x//;
2753
2754 my $product = file_read_firstline("$pcisysfs/devices/$name/device");
2755 return undef if !defined($product) || $product !~ s/^0x//;
2756
2757 $res = {
2758 name => $name,
2759 vendor => $vendor,
2760 product => $product,
2761 domain => $domain,
2762 bus => $bus,
2763 slot => $slot,
2764 func => $func,
2765 irq => $irq,
2766 has_fl_reset => -f "$pcisysfs/devices/$name/reset" || 0,
2767 };
2768
2769 return $res;
2770}
2771
2772sub pci_dev_reset {
2773 my ($dev) = @_;
2774
2775 my $name = $dev->{name};
2776
2777 my $fn = "$pcisysfs/devices/$name/reset";
2778
2779 return file_write ($fn, "1");
2780}
2781
2782sub pci_dev_bind_to_stub {
2783 my ($dev) = @_;
2784
2785 my $name = $dev->{name};
2786
2787 my $testdir = "$pcisysfs/drivers/pci-stub/$name";
2788 return 1 if -d $testdir;
2789
2790 my $data = "$dev->{vendor} $dev->{product}";
2791 return undef if !file_write ("$pcisysfs/drivers/pci-stub/new_id", $data);
2792
2793 my $fn = "$pcisysfs/devices/$name/driver/unbind";
2794 if (!file_write ($fn, $name)) {
2795 return undef if -f $fn;
2796 }
2797
2798 $fn = "$pcisysfs/drivers/pci-stub/bind";
2799 if (! -d $testdir) {
2800 return undef if !file_write ($fn, $name);
2801 }
2802
2803 return -d $testdir;
2804}
2805
28061;