]> git.proxmox.com Git - pve-container.git/blob - src/PVE/LXC.pm
added 'acl' and 'ro' mountpoint options
[pve-container.git] / src / PVE / LXC.pm
1 package PVE::LXC;
2
3 use strict;
4 use warnings;
5 use POSIX qw(EINTR);
6
7 use File::Path;
8 use File::Spec;
9 use Cwd qw();
10 use Fcntl ':flock';
11
12 use PVE::Cluster qw(cfs_register_file cfs_read_file);
13 use PVE::Storage;
14 use PVE::SafeSyslog;
15 use PVE::INotify;
16 use PVE::JSONSchema qw(get_standard_option);
17 use PVE::Tools qw($IPV6RE $IPV4RE dir_glob_foreach);
18 use PVE::Network;
19 use PVE::AccessControl;
20 use PVE::ProcFSTools;
21 use Time::HiRes qw (gettimeofday);
22
23 use Data::Dumper;
24
25 my $nodename = PVE::INotify::nodename();
26
27 my $cpuinfo= PVE::ProcFSTools::read_cpuinfo();
28
29 our $COMMON_TAR_FLAGS = [ '--sparse', '--numeric-owner', '--acls',
30 '--xattrs',
31 '--xattrs-include=user.*',
32 '--xattrs-include=security.capability',
33 '--warning=no-xattr-write' ];
34
35 cfs_register_file('/lxc/', \&parse_pct_config, \&write_pct_config);
36
37 my $rootfs_desc = {
38 volume => {
39 type => 'string',
40 default_key => 1,
41 format_description => 'volume',
42 description => 'Volume, device or directory to mount into the container.',
43 },
44 backup => {
45 type => 'boolean',
46 format_description => '[1|0]',
47 description => 'Whether to include the mountpoint in backups.',
48 optional => 1,
49 },
50 size => {
51 type => 'string',
52 format => 'disk-size',
53 format_description => 'DiskSize',
54 description => 'Volume size (read only value).',
55 optional => 1,
56 },
57 acl => {
58 type => 'boolean',
59 format_description => 'acl',
60 description => 'Explicitly enable or disable ACL support.',
61 optional => 1,
62 },
63 ro => {
64 type => 'boolean',
65 format_description => 'ro',
66 description => 'Read-only mountpoint (not supported with bind mounts)',
67 optional => 1,
68 },
69 };
70
71 PVE::JSONSchema::register_standard_option('pve-ct-rootfs', {
72 type => 'string', format => $rootfs_desc,
73 description => "Use volume as container root.",
74 optional => 1,
75 });
76
77 PVE::JSONSchema::register_standard_option('pve-lxc-snapshot-name', {
78 description => "The name of the snapshot.",
79 type => 'string', format => 'pve-configid',
80 maxLength => 40,
81 });
82
83 my $confdesc = {
84 lock => {
85 optional => 1,
86 type => 'string',
87 description => "Lock/unlock the VM.",
88 enum => [qw(migrate backup snapshot rollback)],
89 },
90 onboot => {
91 optional => 1,
92 type => 'boolean',
93 description => "Specifies whether a VM will be started during system bootup.",
94 default => 0,
95 },
96 startup => get_standard_option('pve-startup-order'),
97 template => {
98 optional => 1,
99 type => 'boolean',
100 description => "Enable/disable Template.",
101 default => 0,
102 },
103 arch => {
104 optional => 1,
105 type => 'string',
106 enum => ['amd64', 'i386'],
107 description => "OS architecture type.",
108 default => 'amd64',
109 },
110 ostype => {
111 optional => 1,
112 type => 'string',
113 enum => ['debian', 'ubuntu', 'centos', 'fedora', 'opensuse', 'archlinux'],
114 description => "OS type. Corresponds to lxc setup scripts in /usr/share/lxc/config/<ostype>.common.conf.",
115 },
116 console => {
117 optional => 1,
118 type => 'boolean',
119 description => "Attach a console device (/dev/console) to the container.",
120 default => 1,
121 },
122 tty => {
123 optional => 1,
124 type => 'integer',
125 description => "Specify the number of tty available to the container",
126 minimum => 0,
127 maximum => 6,
128 default => 2,
129 },
130 cpulimit => {
131 optional => 1,
132 type => 'number',
133 description => "Limit of CPU usage. Note if the computer has 2 CPUs, it has a total of '2' CPU time. Value '0' indicates no CPU limit.",
134 minimum => 0,
135 maximum => 128,
136 default => 0,
137 },
138 cpuunits => {
139 optional => 1,
140 type => 'integer',
141 description => "CPU weight for a VM. Argument is used in the kernel fair scheduler. The larger the number is, the more CPU time this VM gets. Number is relative to the weights of all the other running VMs.\n\nNOTE: You can disable fair-scheduler configuration by setting this to 0.",
142 minimum => 0,
143 maximum => 500000,
144 default => 1024,
145 },
146 memory => {
147 optional => 1,
148 type => 'integer',
149 description => "Amount of RAM for the VM in MB.",
150 minimum => 16,
151 default => 512,
152 },
153 swap => {
154 optional => 1,
155 type => 'integer',
156 description => "Amount of SWAP for the VM in MB.",
157 minimum => 0,
158 default => 512,
159 },
160 hostname => {
161 optional => 1,
162 description => "Set a host name for the container.",
163 type => 'string', format => 'dns-name',
164 maxLength => 255,
165 },
166 description => {
167 optional => 1,
168 type => 'string',
169 description => "Container description. Only used on the configuration web interface.",
170 },
171 searchdomain => {
172 optional => 1,
173 type => 'string', format => 'dns-name-list',
174 description => "Sets DNS search domains for a container. Create will automatically use the setting from the host if you neither set searchdomain nor nameserver.",
175 },
176 nameserver => {
177 optional => 1,
178 type => 'string', format => 'address-list',
179 description => "Sets DNS server IP address for a container. Create will automatically use the setting from the host if you neither set searchdomain nor nameserver.",
180 },
181 rootfs => get_standard_option('pve-ct-rootfs'),
182 parent => {
183 optional => 1,
184 type => 'string', format => 'pve-configid',
185 maxLength => 40,
186 description => "Parent snapshot name. This is used internally, and should not be modified.",
187 },
188 snaptime => {
189 optional => 1,
190 description => "Timestamp for snapshots.",
191 type => 'integer',
192 minimum => 0,
193 },
194 cmode => {
195 optional => 1,
196 description => "Console mode. By default, the console command tries to open a connection to one of the available tty devices. By setting cmode to 'console' it tries to attach to /dev/console instead. If you set cmode to 'shell', it simply invokes a shell inside the container (no login).",
197 type => 'string',
198 enum => ['shell', 'console', 'tty'],
199 default => 'tty',
200 },
201 protection => {
202 optional => 1,
203 type => 'boolean',
204 description => "Sets the protection flag of the container. This will prevent the CT or CT's disk remove/update operation.",
205 default => 0,
206 },
207 unprivileged => {
208 optional => 1,
209 type => 'boolean',
210 description => "Makes the container run as unprivileged user. (Should not be modified manually.)",
211 default => 0,
212 },
213 };
214
215 my $valid_lxc_conf_keys = {
216 'lxc.include' => 1,
217 'lxc.arch' => 1,
218 'lxc.utsname' => 1,
219 'lxc.haltsignal' => 1,
220 'lxc.rebootsignal' => 1,
221 'lxc.stopsignal' => 1,
222 'lxc.init_cmd' => 1,
223 'lxc.network.type' => 1,
224 'lxc.network.flags' => 1,
225 'lxc.network.link' => 1,
226 'lxc.network.mtu' => 1,
227 'lxc.network.name' => 1,
228 'lxc.network.hwaddr' => 1,
229 'lxc.network.ipv4' => 1,
230 'lxc.network.ipv4.gateway' => 1,
231 'lxc.network.ipv6' => 1,
232 'lxc.network.ipv6.gateway' => 1,
233 'lxc.network.script.up' => 1,
234 'lxc.network.script.down' => 1,
235 'lxc.pts' => 1,
236 'lxc.console.logfile' => 1,
237 'lxc.console' => 1,
238 'lxc.tty' => 1,
239 'lxc.devttydir' => 1,
240 'lxc.hook.autodev' => 1,
241 'lxc.autodev' => 1,
242 'lxc.kmsg' => 1,
243 'lxc.mount' => 1,
244 'lxc.mount.entry' => 1,
245 'lxc.mount.auto' => 1,
246 'lxc.rootfs' => 'lxc.rootfs is auto generated from rootfs',
247 'lxc.rootfs.mount' => 1,
248 'lxc.rootfs.options' => 'lxc.rootfs.options is not supported' .
249 ', please use mountpoint options in the "rootfs" key',
250 # lxc.cgroup.*
251 'lxc.cap.drop' => 1,
252 'lxc.cap.keep' => 1,
253 'lxc.aa_profile' => 1,
254 'lxc.aa_allow_incomplete' => 1,
255 'lxc.se_context' => 1,
256 'lxc.seccomp' => 1,
257 'lxc.id_map' => 1,
258 'lxc.hook.pre-start' => 1,
259 'lxc.hook.pre-mount' => 1,
260 'lxc.hook.mount' => 1,
261 'lxc.hook.start' => 1,
262 'lxc.hook.stop' => 1,
263 'lxc.hook.post-stop' => 1,
264 'lxc.hook.clone' => 1,
265 'lxc.hook.destroy' => 1,
266 'lxc.loglevel' => 1,
267 'lxc.logfile' => 1,
268 'lxc.start.auto' => 1,
269 'lxc.start.delay' => 1,
270 'lxc.start.order' => 1,
271 'lxc.group' => 1,
272 'lxc.environment' => 1,
273 };
274
275 my $netconf_desc = {
276 type => {
277 type => 'string',
278 optional => 1,
279 description => "Network interface type.",
280 enum => [qw(veth)],
281 },
282 name => {
283 type => 'string',
284 format_description => 'String',
285 description => 'Name of the network device as seen from inside the container. (lxc.network.name)',
286 pattern => '[-_.\w\d]+',
287 },
288 bridge => {
289 type => 'string',
290 format_description => 'vmbr<Number>',
291 description => 'Bridge to attach the network device to.',
292 pattern => '[-_.\w\d]+',
293 optional => 1,
294 },
295 hwaddr => {
296 type => 'string',
297 format_description => 'MAC',
298 description => 'Bridge to attach the network device to. (lxc.network.hwaddr)',
299 pattern => qr/(?:[a-f0-9]{2}:){5}[a-f0-9]{2}/i,
300 optional => 1,
301 },
302 mtu => {
303 type => 'integer',
304 format_description => 'Number',
305 description => 'Maximum transfer unit of the interface. (lxc.network.mtu)',
306 minimum => 64, # minimum ethernet frame is 64 bytes
307 optional => 1,
308 },
309 ip => {
310 type => 'string',
311 format => 'pve-ipv4-config',
312 format_description => 'IPv4Format/CIDR',
313 description => 'IPv4 address in CIDR format.',
314 optional => 1,
315 },
316 gw => {
317 type => 'string',
318 format => 'ipv4',
319 format_description => 'GatewayIPv4',
320 description => 'Default gateway for IPv4 traffic.',
321 optional => 1,
322 },
323 ip6 => {
324 type => 'string',
325 format => 'pve-ipv6-config',
326 format_description => 'IPv6Format/CIDR',
327 description => 'IPv6 address in CIDR format.',
328 optional => 1,
329 },
330 gw6 => {
331 type => 'string',
332 format => 'ipv6',
333 format_description => 'GatewayIPv6',
334 description => 'Default gateway for IPv6 traffic.',
335 optional => 1,
336 },
337 firewall => {
338 type => 'boolean',
339 format_description => '[1|0]',
340 description => "Controls whether this interface's firewall rules should be used.",
341 optional => 1,
342 },
343 tag => {
344 type => 'integer',
345 format_description => 'VlanNo',
346 minimum => '2',
347 maximum => '4094',
348 description => "VLAN tag for this interface.",
349 optional => 1,
350 },
351 trunks => {
352 type => 'string',
353 pattern => qr/\d+(?:;\d+)*/,
354 format_description => 'vlanid[;vlanid...]',
355 description => "VLAN ids to pass through the interface",
356 optional => 1,
357 },
358 };
359 PVE::JSONSchema::register_format('pve-lxc-network', $netconf_desc);
360
361 my $MAX_LXC_NETWORKS = 10;
362 for (my $i = 0; $i < $MAX_LXC_NETWORKS; $i++) {
363 $confdesc->{"net$i"} = {
364 optional => 1,
365 type => 'string', format => $netconf_desc,
366 description => "Specifies network interfaces for the container.",
367 };
368 }
369
370 my $mp_desc = {
371 %$rootfs_desc,
372 mp => {
373 type => 'string',
374 format_description => 'Path',
375 description => 'Path to the mountpoint as seen from inside the container.',
376 },
377 };
378 PVE::JSONSchema::register_format('pve-ct-mountpoint', $mp_desc);
379
380 my $unuseddesc = {
381 optional => 1,
382 type => 'string', format => 'pve-volume-id',
383 description => "Reference to unused volumes.",
384 };
385
386 my $MAX_MOUNT_POINTS = 10;
387 for (my $i = 0; $i < $MAX_MOUNT_POINTS; $i++) {
388 $confdesc->{"mp$i"} = {
389 optional => 1,
390 type => 'string', format => $mp_desc,
391 description => "Use volume as container mount point (experimental feature).",
392 optional => 1,
393 };
394 }
395
396 my $MAX_UNUSED_DISKS = $MAX_MOUNT_POINTS;
397 for (my $i = 0; $i < $MAX_MOUNT_POINTS; $i++) {
398 $confdesc->{"unused$i"} = $unuseddesc;
399 }
400
401 sub write_pct_config {
402 my ($filename, $conf) = @_;
403
404 delete $conf->{snapstate}; # just to be sure
405
406 my $generate_raw_config = sub {
407 my ($conf) = @_;
408
409 my $raw = '';
410
411 # add description as comment to top of file
412 my $descr = $conf->{description} || '';
413 foreach my $cl (split(/\n/, $descr)) {
414 $raw .= '#' . PVE::Tools::encode_text($cl) . "\n";
415 }
416
417 foreach my $key (sort keys %$conf) {
418 next if $key eq 'digest' || $key eq 'description' || $key eq 'pending' ||
419 $key eq 'snapshots' || $key eq 'snapname' || $key eq 'lxc';
420 my $value = $conf->{$key};
421 die "detected invalid newline inside property '$key'\n" if $value =~ m/\n/;
422 $raw .= "$key: $value\n";
423 }
424
425 if (my $lxcconf = $conf->{lxc}) {
426 foreach my $entry (@$lxcconf) {
427 my ($k, $v) = @$entry;
428 $raw .= "$k: $v\n";
429 }
430 }
431
432 return $raw;
433 };
434
435 my $raw = &$generate_raw_config($conf);
436
437 foreach my $snapname (sort keys %{$conf->{snapshots}}) {
438 $raw .= "\n[$snapname]\n";
439 $raw .= &$generate_raw_config($conf->{snapshots}->{$snapname});
440 }
441
442 return $raw;
443 }
444
445 sub check_type {
446 my ($key, $value) = @_;
447
448 die "unknown setting '$key'\n" if !$confdesc->{$key};
449
450 my $type = $confdesc->{$key}->{type};
451
452 if (!defined($value)) {
453 die "got undefined value\n";
454 }
455
456 if ($value =~ m/[\n\r]/) {
457 die "property contains a line feed\n";
458 }
459
460 if ($type eq 'boolean') {
461 return 1 if ($value eq '1') || ($value =~ m/^(on|yes|true)$/i);
462 return 0 if ($value eq '0') || ($value =~ m/^(off|no|false)$/i);
463 die "type check ('boolean') failed - got '$value'\n";
464 } elsif ($type eq 'integer') {
465 return int($1) if $value =~ m/^(\d+)$/;
466 die "type check ('integer') failed - got '$value'\n";
467 } elsif ($type eq 'number') {
468 return $value if $value =~ m/^(\d+)(\.\d+)?$/;
469 die "type check ('number') failed - got '$value'\n";
470 } elsif ($type eq 'string') {
471 if (my $fmt = $confdesc->{$key}->{format}) {
472 PVE::JSONSchema::check_format($fmt, $value);
473 return $value;
474 }
475 return $value;
476 } else {
477 die "internal error"
478 }
479 }
480
481 sub parse_pct_config {
482 my ($filename, $raw) = @_;
483
484 return undef if !defined($raw);
485
486 my $res = {
487 digest => Digest::SHA::sha1_hex($raw),
488 snapshots => {},
489 };
490
491 $filename =~ m|/lxc/(\d+).conf$|
492 || die "got strange filename '$filename'";
493
494 my $vmid = $1;
495
496 my $conf = $res;
497 my $descr = '';
498 my $section = '';
499
500 my @lines = split(/\n/, $raw);
501 foreach my $line (@lines) {
502 next if $line =~ m/^\s*$/;
503
504 if ($line =~ m/^\[([a-z][a-z0-9_\-]+)\]\s*$/i) {
505 $section = $1;
506 $conf->{description} = $descr if $descr;
507 $descr = '';
508 $conf = $res->{snapshots}->{$section} = {};
509 next;
510 }
511
512 if ($line =~ m/^\#(.*)\s*$/) {
513 $descr .= PVE::Tools::decode_text($1) . "\n";
514 next;
515 }
516
517 if ($line =~ m/^(lxc\.[a-z0-9_\-\.]+)(:|\s*=)\s*(.*?)\s*$/) {
518 my $key = $1;
519 my $value = $3;
520 if ($valid_lxc_conf_keys->{$key} eq 1 || $key =~ m/^lxc\.cgroup\./) {
521 push @{$conf->{lxc}}, [$key, $value];
522 } elsif (my $errmsg = $valid_lxc_conf_keys->{$key}) {
523 warn "vm $vmid - $key: $errmsg\n";
524 } else {
525 warn "vm $vmid - unable to parse config: $line\n";
526 }
527 } elsif ($line =~ m/^(description):\s*(.*\S)\s*$/) {
528 $descr .= PVE::Tools::decode_text($2);
529 } elsif ($line =~ m/snapstate:\s*(prepare|delete)\s*$/) {
530 $conf->{snapstate} = $1;
531 } elsif ($line =~ m/^([a-z][a-z_]*\d*):\s*(\S.*)\s*$/) {
532 my $key = $1;
533 my $value = $2;
534 eval { $value = check_type($key, $value); };
535 warn "vm $vmid - unable to parse value of '$key' - $@" if $@;
536 $conf->{$key} = $value;
537 } else {
538 warn "vm $vmid - unable to parse config: $line\n";
539 }
540 }
541
542 $conf->{description} = $descr if $descr;
543
544 delete $res->{snapstate}; # just to be sure
545
546 return $res;
547 }
548
549 sub config_list {
550 my $vmlist = PVE::Cluster::get_vmlist();
551 my $res = {};
552 return $res if !$vmlist || !$vmlist->{ids};
553 my $ids = $vmlist->{ids};
554
555 foreach my $vmid (keys %$ids) {
556 next if !$vmid; # skip CT0
557 my $d = $ids->{$vmid};
558 next if !$d->{node} || $d->{node} ne $nodename;
559 next if !$d->{type} || $d->{type} ne 'lxc';
560 $res->{$vmid}->{type} = 'lxc';
561 }
562 return $res;
563 }
564
565 sub cfs_config_path {
566 my ($vmid, $node) = @_;
567
568 $node = $nodename if !$node;
569 return "nodes/$node/lxc/$vmid.conf";
570 }
571
572 sub config_file {
573 my ($vmid, $node) = @_;
574
575 my $cfspath = cfs_config_path($vmid, $node);
576 return "/etc/pve/$cfspath";
577 }
578
579 sub load_config {
580 my ($vmid, $node) = @_;
581
582 $node = $nodename if !$node;
583 my $cfspath = cfs_config_path($vmid, $node);
584
585 my $conf = PVE::Cluster::cfs_read_file($cfspath);
586 die "container $vmid does not exist\n" if !defined($conf);
587
588 return $conf;
589 }
590
591 sub create_config {
592 my ($vmid, $conf) = @_;
593
594 my $dir = "/etc/pve/nodes/$nodename/lxc";
595 mkdir $dir;
596
597 write_config($vmid, $conf);
598 }
599
600 sub destroy_config {
601 my ($vmid) = @_;
602
603 unlink config_file($vmid, $nodename);
604 }
605
606 sub write_config {
607 my ($vmid, $conf) = @_;
608
609 my $cfspath = cfs_config_path($vmid);
610
611 PVE::Cluster::cfs_write_file($cfspath, $conf);
612 }
613
614 # flock: we use one file handle per process, so lock file
615 # can be called multiple times and will succeed for the same process.
616
617 my $lock_handles = {};
618 my $lockdir = "/run/lock/lxc";
619
620 sub lock_filename {
621 my ($vmid) = @_;
622
623 return "$lockdir/pve-config-${vmid}.lock";
624 }
625
626 sub lock_aquire {
627 my ($vmid, $timeout) = @_;
628
629 $timeout = 10 if !$timeout;
630 my $mode = LOCK_EX;
631
632 my $filename = lock_filename($vmid);
633
634 mkdir $lockdir if !-d $lockdir;
635
636 my $lock_func = sub {
637 if (!$lock_handles->{$$}->{$filename}) {
638 my $fh = new IO::File(">>$filename") ||
639 die "can't open file - $!\n";
640 $lock_handles->{$$}->{$filename} = { fh => $fh, refcount => 0};
641 }
642
643 if (!flock($lock_handles->{$$}->{$filename}->{fh}, $mode |LOCK_NB)) {
644 print STDERR "trying to aquire lock...";
645 my $success;
646 while(1) {
647 $success = flock($lock_handles->{$$}->{$filename}->{fh}, $mode);
648 # try again on EINTR (see bug #273)
649 if ($success || ($! != EINTR)) {
650 last;
651 }
652 }
653 if (!$success) {
654 print STDERR " failed\n";
655 die "can't aquire lock - $!\n";
656 }
657
658 print STDERR " OK\n";
659 }
660
661 $lock_handles->{$$}->{$filename}->{refcount}++;
662 };
663
664 eval { PVE::Tools::run_with_timeout($timeout, $lock_func); };
665 my $err = $@;
666 if ($err) {
667 die "can't lock file '$filename' - $err";
668 }
669 }
670
671 sub lock_release {
672 my ($vmid) = @_;
673
674 my $filename = lock_filename($vmid);
675
676 if (my $fh = $lock_handles->{$$}->{$filename}->{fh}) {
677 my $refcount = --$lock_handles->{$$}->{$filename}->{refcount};
678 if ($refcount <= 0) {
679 $lock_handles->{$$}->{$filename} = undef;
680 close ($fh);
681 }
682 }
683 }
684
685 sub lock_container {
686 my ($vmid, $timeout, $code, @param) = @_;
687
688 my $res;
689
690 lock_aquire($vmid, $timeout);
691 eval { $res = &$code(@param) };
692 my $err = $@;
693 lock_release($vmid);
694
695 die $err if $err;
696
697 return $res;
698 }
699
700 sub option_exists {
701 my ($name) = @_;
702
703 return defined($confdesc->{$name});
704 }
705
706 # add JSON properties for create and set function
707 sub json_config_properties {
708 my $prop = shift;
709
710 foreach my $opt (keys %$confdesc) {
711 next if $opt eq 'parent' || $opt eq 'snaptime';
712 next if $prop->{$opt};
713 $prop->{$opt} = $confdesc->{$opt};
714 }
715
716 return $prop;
717 }
718
719 sub json_config_properties_no_rootfs {
720 my $prop = shift;
721
722 foreach my $opt (keys %$confdesc) {
723 next if $prop->{$opt};
724 next if $opt eq 'parent' || $opt eq 'snaptime' || $opt eq 'rootfs';
725 $prop->{$opt} = $confdesc->{$opt};
726 }
727
728 return $prop;
729 }
730
731 # container status helpers
732
733 sub list_active_containers {
734
735 my $filename = "/proc/net/unix";
736
737 # similar test is used by lcxcontainers.c: list_active_containers
738 my $res = {};
739
740 my $fh = IO::File->new ($filename, "r");
741 return $res if !$fh;
742
743 while (defined(my $line = <$fh>)) {
744 if ($line =~ m/^[a-f0-9]+:\s\S+\s\S+\s\S+\s\S+\s\S+\s\d+\s(\S+)$/) {
745 my $path = $1;
746 if ($path =~ m!^@/var/lib/lxc/(\d+)/command$!) {
747 $res->{$1} = 1;
748 }
749 }
750 }
751
752 close($fh);
753
754 return $res;
755 }
756
757 # warning: this is slow
758 sub check_running {
759 my ($vmid) = @_;
760
761 my $active_hash = list_active_containers();
762
763 return 1 if defined($active_hash->{$vmid});
764
765 return undef;
766 }
767
768 sub get_container_disk_usage {
769 my ($vmid, $pid) = @_;
770
771 return PVE::Tools::df("/proc/$pid/root/", 1);
772 }
773
774 my $last_proc_vmid_stat;
775
776 my $parse_cpuacct_stat = sub {
777 my ($vmid) = @_;
778
779 my $raw = read_cgroup_value('cpuacct', $vmid, 'cpuacct.stat', 1);
780
781 my $stat = {};
782
783 if ($raw =~ m/^user (\d+)\nsystem (\d+)\n/) {
784
785 $stat->{utime} = $1;
786 $stat->{stime} = $2;
787
788 }
789
790 return $stat;
791 };
792
793 sub vmstatus {
794 my ($opt_vmid) = @_;
795
796 my $list = $opt_vmid ? { $opt_vmid => { type => 'lxc' }} : config_list();
797
798 my $active_hash = list_active_containers();
799
800 my $cpucount = $cpuinfo->{cpus} || 1;
801
802 my $cdtime = gettimeofday;
803
804 my $uptime = (PVE::ProcFSTools::read_proc_uptime(1))[0];
805
806 foreach my $vmid (keys %$list) {
807 my $d = $list->{$vmid};
808
809 eval { $d->{pid} = find_lxc_pid($vmid) if defined($active_hash->{$vmid}); };
810 warn $@ if $@; # ignore errors (consider them stopped)
811
812 $d->{status} = $d->{pid} ? 'running' : 'stopped';
813
814 my $cfspath = cfs_config_path($vmid);
815 my $conf = PVE::Cluster::cfs_read_file($cfspath) || {};
816
817 $d->{name} = $conf->{'hostname'} || "CT$vmid";
818 $d->{name} =~ s/[\s]//g;
819
820 $d->{cpus} = $conf->{cpulimit} || $cpucount;
821
822 if ($d->{pid}) {
823 my $res = get_container_disk_usage($vmid, $d->{pid});
824 $d->{disk} = $res->{used};
825 $d->{maxdisk} = $res->{total};
826 } else {
827 $d->{disk} = 0;
828 # use 4GB by default ??
829 if (my $rootfs = $conf->{rootfs}) {
830 my $rootinfo = parse_ct_rootfs($rootfs);
831 $d->{maxdisk} = int(($rootinfo->{size} || 4)*1024*1024)*1024;
832 } else {
833 $d->{maxdisk} = 4*1024*1024*1024;
834 }
835 }
836
837 $d->{mem} = 0;
838 $d->{swap} = 0;
839 $d->{maxmem} = ($conf->{memory}||512)*1024*1024;
840 $d->{maxswap} = ($conf->{swap}//0)*1024*1024;
841
842 $d->{uptime} = 0;
843 $d->{cpu} = 0;
844
845 $d->{netout} = 0;
846 $d->{netin} = 0;
847
848 $d->{diskread} = 0;
849 $d->{diskwrite} = 0;
850
851 $d->{template} = is_template($conf);
852 }
853
854 foreach my $vmid (keys %$list) {
855 my $d = $list->{$vmid};
856 my $pid = $d->{pid};
857
858 next if !$pid; # skip stopped CTs
859
860 my $ctime = (stat("/proc/$pid"))[10]; # 10 = ctime
861 $d->{uptime} = time - $ctime; # the method lxcfs uses
862
863 $d->{mem} = read_cgroup_value('memory', $vmid, 'memory.usage_in_bytes');
864 $d->{swap} = read_cgroup_value('memory', $vmid, 'memory.memsw.usage_in_bytes') - $d->{mem};
865
866 my $blkio_bytes = read_cgroup_value('blkio', $vmid, 'blkio.throttle.io_service_bytes', 1);
867 my @bytes = split(/\n/, $blkio_bytes);
868 foreach my $byte (@bytes) {
869 if (my ($key, $value) = $byte =~ /(Read|Write)\s+(\d+)/) {
870 $d->{diskread} = $2 if $key eq 'Read';
871 $d->{diskwrite} = $2 if $key eq 'Write';
872 }
873 }
874
875 my $pstat = &$parse_cpuacct_stat($vmid);
876
877 my $used = $pstat->{utime} + $pstat->{stime};
878
879 my $old = $last_proc_vmid_stat->{$vmid};
880 if (!$old) {
881 $last_proc_vmid_stat->{$vmid} = {
882 time => $cdtime,
883 used => $used,
884 cpu => 0,
885 };
886 next;
887 }
888
889 my $dtime = ($cdtime - $old->{time}) * $cpucount * $cpuinfo->{user_hz};
890
891 if ($dtime > 1000) {
892 my $dutime = $used - $old->{used};
893
894 $d->{cpu} = (($dutime/$dtime)* $cpucount) / $d->{cpus};
895 $last_proc_vmid_stat->{$vmid} = {
896 time => $cdtime,
897 used => $used,
898 cpu => $d->{cpu},
899 };
900 } else {
901 $d->{cpu} = $old->{cpu};
902 }
903 }
904
905 my $netdev = PVE::ProcFSTools::read_proc_net_dev();
906
907 foreach my $dev (keys %$netdev) {
908 next if $dev !~ m/^veth([1-9]\d*)i/;
909 my $vmid = $1;
910 my $d = $list->{$vmid};
911
912 next if !$d;
913
914 $d->{netout} += $netdev->{$dev}->{receive};
915 $d->{netin} += $netdev->{$dev}->{transmit};
916
917 }
918
919 return $list;
920 }
921
922 sub classify_mountpoint {
923 my ($vol) = @_;
924 if ($vol =~ m!^/!) {
925 return 'device' if $vol =~ m!^/dev/!;
926 return 'bind';
927 }
928 return 'volume';
929 }
930
931 my $parse_ct_mountpoint_full = sub {
932 my ($desc, $data, $noerr) = @_;
933
934 $data //= '';
935
936 my $res;
937 eval { $res = PVE::JSONSchema::parse_property_string($desc, $data) };
938 if ($@) {
939 return undef if $noerr;
940 die $@;
941 }
942
943 if (defined(my $size = $res->{size})) {
944 $size = PVE::JSONSchema::parse_size($size);
945 if (!defined($size)) {
946 return undef if $noerr;
947 die "invalid size: $size\n";
948 }
949 $res->{size} = $size;
950 }
951
952 $res->{type} = classify_mountpoint($res->{volume});
953
954 return $res;
955 };
956
957 sub parse_ct_rootfs {
958 my ($data, $noerr) = @_;
959
960 my $res = &$parse_ct_mountpoint_full($rootfs_desc, $data, $noerr);
961
962 $res->{mp} = '/' if defined($res);
963
964 return $res;
965 }
966
967 sub parse_ct_mountpoint {
968 my ($data, $noerr) = @_;
969
970 return &$parse_ct_mountpoint_full($mp_desc, $data, $noerr);
971 }
972
973 sub print_ct_mountpoint {
974 my ($info, $nomp) = @_;
975 my $skip = [ 'type' ];
976 push @$skip, 'mp' if $nomp;
977 return PVE::JSONSchema::print_property_string($info, $mp_desc, $skip);
978 }
979
980 sub print_lxc_network {
981 my $net = shift;
982 return PVE::JSONSchema::print_property_string($net, $netconf_desc);
983 }
984
985 sub parse_lxc_network {
986 my ($data) = @_;
987
988 my $res = {};
989
990 return $res if !$data;
991
992 $res = PVE::JSONSchema::parse_property_string($netconf_desc, $data);
993
994 $res->{type} = 'veth';
995 $res->{hwaddr} = PVE::Tools::random_ether_addr() if !$res->{hwaddr};
996
997 return $res;
998 }
999
1000 sub read_cgroup_value {
1001 my ($group, $vmid, $name, $full) = @_;
1002
1003 my $path = "/sys/fs/cgroup/$group/lxc/$vmid/$name";
1004
1005 return PVE::Tools::file_get_contents($path) if $full;
1006
1007 return PVE::Tools::file_read_firstline($path);
1008 }
1009
1010 sub write_cgroup_value {
1011 my ($group, $vmid, $name, $value) = @_;
1012
1013 my $path = "/sys/fs/cgroup/$group/lxc/$vmid/$name";
1014 PVE::ProcFSTools::write_proc_entry($path, $value) if -e $path;
1015
1016 }
1017
1018 sub find_lxc_console_pids {
1019
1020 my $res = {};
1021
1022 PVE::Tools::dir_glob_foreach('/proc', '\d+', sub {
1023 my ($pid) = @_;
1024
1025 my $cmdline = PVE::Tools::file_read_firstline("/proc/$pid/cmdline");
1026 return if !$cmdline;
1027
1028 my @args = split(/\0/, $cmdline);
1029
1030 # search for lxc-console -n <vmid>
1031 return if scalar(@args) != 3;
1032 return if $args[1] ne '-n';
1033 return if $args[2] !~ m/^\d+$/;
1034 return if $args[0] !~ m|^(/usr/bin/)?lxc-console$|;
1035
1036 my $vmid = $args[2];
1037
1038 push @{$res->{$vmid}}, $pid;
1039 });
1040
1041 return $res;
1042 }
1043
1044 sub find_lxc_pid {
1045 my ($vmid) = @_;
1046
1047 my $pid = undef;
1048 my $parser = sub {
1049 my $line = shift;
1050 $pid = $1 if $line =~ m/^PID:\s+(\d+)$/;
1051 };
1052 PVE::Tools::run_command(['lxc-info', '-n', $vmid, '-p'], outfunc => $parser);
1053
1054 die "unable to get PID for CT $vmid (not running?)\n" if !$pid;
1055
1056 return $pid;
1057 }
1058
1059 # Note: we cannot use Net:IP, because that only allows strict
1060 # CIDR networks
1061 sub parse_ipv4_cidr {
1062 my ($cidr, $noerr) = @_;
1063
1064 if ($cidr =~ m!^($IPV4RE)(?:/(\d+))$! && ($2 > 7) && ($2 <= 32)) {
1065 return { address => $1, netmask => $PVE::Network::ipv4_reverse_mask->[$2] };
1066 }
1067
1068 return undef if $noerr;
1069
1070 die "unable to parse ipv4 address/mask\n";
1071 }
1072
1073 sub check_lock {
1074 my ($conf) = @_;
1075
1076 die "VM is locked ($conf->{'lock'})\n" if $conf->{'lock'};
1077 }
1078
1079 sub check_protection {
1080 my ($vm_conf, $err_msg) = @_;
1081
1082 if ($vm_conf->{protection}) {
1083 die "$err_msg - protection mode enabled\n";
1084 }
1085 }
1086
1087 sub update_lxc_config {
1088 my ($storage_cfg, $vmid, $conf) = @_;
1089
1090 my $dir = "/var/lib/lxc/$vmid";
1091
1092 if ($conf->{template}) {
1093
1094 unlink "$dir/config";
1095
1096 return;
1097 }
1098
1099 my $raw = '';
1100
1101 die "missing 'arch' - internal error" if !$conf->{arch};
1102 $raw .= "lxc.arch = $conf->{arch}\n";
1103
1104 my $unprivileged = $conf->{unprivileged};
1105 my $custom_idmap = grep { $_->[0] eq 'lxc.id_map' } @{$conf->{lxc}};
1106
1107 my $ostype = $conf->{ostype} || die "missing 'ostype' - internal error";
1108 if ($ostype =~ /^(?:debian | ubuntu | centos | fedora | opensuse | archlinux)$/x) {
1109 $raw .= "lxc.include = /usr/share/lxc/config/$ostype.common.conf\n";
1110 if ($unprivileged || $custom_idmap) {
1111 $raw .= "lxc.include = /usr/share/lxc/config/$ostype.userns.conf\n"
1112 }
1113 } else {
1114 die "implement me (ostype $ostype)";
1115 }
1116
1117 $raw .= "lxc.monitor.unshare = 1\n";
1118
1119 # Should we read them from /etc/subuid?
1120 if ($unprivileged && !$custom_idmap) {
1121 $raw .= "lxc.id_map = u 0 100000 65536\n";
1122 $raw .= "lxc.id_map = g 0 100000 65536\n";
1123 }
1124
1125 if (!has_dev_console($conf)) {
1126 $raw .= "lxc.console = none\n";
1127 $raw .= "lxc.cgroup.devices.deny = c 5:1 rwm\n";
1128 }
1129
1130 my $ttycount = get_tty_count($conf);
1131 $raw .= "lxc.tty = $ttycount\n";
1132
1133 # some init scripts expect a linux terminal (turnkey).
1134 $raw .= "lxc.environment = TERM=linux\n";
1135
1136 my $utsname = $conf->{hostname} || "CT$vmid";
1137 $raw .= "lxc.utsname = $utsname\n";
1138
1139 my $memory = $conf->{memory} || 512;
1140 my $swap = $conf->{swap} // 0;
1141
1142 my $lxcmem = int($memory*1024*1024);
1143 $raw .= "lxc.cgroup.memory.limit_in_bytes = $lxcmem\n";
1144
1145 my $lxcswap = int(($memory + $swap)*1024*1024);
1146 $raw .= "lxc.cgroup.memory.memsw.limit_in_bytes = $lxcswap\n";
1147
1148 if (my $cpulimit = $conf->{cpulimit}) {
1149 $raw .= "lxc.cgroup.cpu.cfs_period_us = 100000\n";
1150 my $value = int(100000*$cpulimit);
1151 $raw .= "lxc.cgroup.cpu.cfs_quota_us = $value\n";
1152 }
1153
1154 my $shares = $conf->{cpuunits} || 1024;
1155 $raw .= "lxc.cgroup.cpu.shares = $shares\n";
1156
1157 my $mountpoint = parse_ct_rootfs($conf->{rootfs});
1158
1159 $raw .= "lxc.rootfs = $dir/rootfs\n";
1160
1161 my $netcount = 0;
1162 foreach my $k (keys %$conf) {
1163 next if $k !~ m/^net(\d+)$/;
1164 my $ind = $1;
1165 my $d = parse_lxc_network($conf->{$k});
1166 $netcount++;
1167 $raw .= "lxc.network.type = veth\n";
1168 $raw .= "lxc.network.veth.pair = veth${vmid}i${ind}\n";
1169 $raw .= "lxc.network.hwaddr = $d->{hwaddr}\n" if defined($d->{hwaddr});
1170 $raw .= "lxc.network.name = $d->{name}\n" if defined($d->{name});
1171 $raw .= "lxc.network.mtu = $d->{mtu}\n" if defined($d->{mtu});
1172 }
1173
1174 if (my $lxcconf = $conf->{lxc}) {
1175 foreach my $entry (@$lxcconf) {
1176 my ($k, $v) = @$entry;
1177 $netcount++ if $k eq 'lxc.network.type';
1178 $raw .= "$k = $v\n";
1179 }
1180 }
1181
1182 $raw .= "lxc.network.type = empty\n" if !$netcount;
1183
1184 File::Path::mkpath("$dir/rootfs");
1185
1186 PVE::Tools::file_set_contents("$dir/config", $raw);
1187 }
1188
1189 # verify and cleanup nameserver list (replace \0 with ' ')
1190 sub verify_nameserver_list {
1191 my ($nameserver_list) = @_;
1192
1193 my @list = ();
1194 foreach my $server (PVE::Tools::split_list($nameserver_list)) {
1195 PVE::JSONSchema::pve_verify_ip($server);
1196 push @list, $server;
1197 }
1198
1199 return join(' ', @list);
1200 }
1201
1202 sub verify_searchdomain_list {
1203 my ($searchdomain_list) = @_;
1204
1205 my @list = ();
1206 foreach my $server (PVE::Tools::split_list($searchdomain_list)) {
1207 # todo: should we add checks for valid dns domains?
1208 push @list, $server;
1209 }
1210
1211 return join(' ', @list);
1212 }
1213
1214 sub add_unused_volume {
1215 my ($config, $volid) = @_;
1216
1217 my $key;
1218 for (my $ind = $MAX_UNUSED_DISKS - 1; $ind >= 0; $ind--) {
1219 my $test = "unused$ind";
1220 if (my $vid = $config->{$test}) {
1221 return if $vid eq $volid; # do not add duplicates
1222 } else {
1223 $key = $test;
1224 }
1225 }
1226
1227 die "Too many unused volumes - please delete them first.\n" if !$key;
1228
1229 $config->{$key} = $volid;
1230
1231 return $key;
1232 }
1233
1234 sub update_pct_config {
1235 my ($vmid, $conf, $running, $param, $delete) = @_;
1236
1237 my @nohotplug;
1238
1239 my $new_disks = 0;
1240 my @deleted_volumes;
1241
1242 my $rootdir;
1243 if ($running) {
1244 my $pid = find_lxc_pid($vmid);
1245 $rootdir = "/proc/$pid/root";
1246 }
1247
1248 my $hotplug_error = sub {
1249 if ($running) {
1250 push @nohotplug, @_;
1251 return 1;
1252 } else {
1253 return 0;
1254 }
1255 };
1256
1257 if (defined($delete)) {
1258 foreach my $opt (@$delete) {
1259 if (!exists($conf->{$opt})) {
1260 warn "no such option: $opt\n";
1261 next;
1262 }
1263
1264 if ($opt eq 'hostname' || $opt eq 'memory' || $opt eq 'rootfs') {
1265 die "unable to delete required option '$opt'\n";
1266 } elsif ($opt eq 'swap') {
1267 delete $conf->{$opt};
1268 write_cgroup_value("memory", $vmid, "memory.memsw.limit_in_bytes", -1);
1269 } elsif ($opt eq 'description' || $opt eq 'onboot' || $opt eq 'startup') {
1270 delete $conf->{$opt};
1271 } elsif ($opt eq 'nameserver' || $opt eq 'searchdomain' ||
1272 $opt eq 'tty' || $opt eq 'console' || $opt eq 'cmode') {
1273 next if $hotplug_error->($opt);
1274 delete $conf->{$opt};
1275 } elsif ($opt =~ m/^net(\d)$/) {
1276 delete $conf->{$opt};
1277 next if !$running;
1278 my $netid = $1;
1279 PVE::Network::veth_delete("veth${vmid}i$netid");
1280 } elsif ($opt eq 'protection') {
1281 delete $conf->{$opt};
1282 } elsif ($opt =~ m/^unused(\d+)$/) {
1283 next if $hotplug_error->($opt);
1284 check_protection($conf, "can't remove CT $vmid drive '$opt'");
1285 push @deleted_volumes, $conf->{$opt};
1286 delete $conf->{$opt};
1287 } elsif ($opt =~ m/^mp(\d+)$/) {
1288 next if $hotplug_error->($opt);
1289 check_protection($conf, "can't remove CT $vmid drive '$opt'");
1290 my $mountpoint = parse_ct_mountpoint($conf->{$opt});
1291 if ($mountpoint->{type} eq 'volume') {
1292 add_unused_volume($conf, $mountpoint->{volume})
1293 }
1294 delete $conf->{$opt};
1295 } elsif ($opt eq 'unprivileged') {
1296 die "unable to delete read-only option: '$opt'\n";
1297 } else {
1298 die "implement me (delete: $opt)"
1299 }
1300 write_config($vmid, $conf) if $running;
1301 }
1302 }
1303
1304 # There's no separate swap size to configure, there's memory and "total"
1305 # memory (iow. memory+swap). This means we have to change them together.
1306 my $wanted_memory = PVE::Tools::extract_param($param, 'memory');
1307 my $wanted_swap = PVE::Tools::extract_param($param, 'swap');
1308 if (defined($wanted_memory) || defined($wanted_swap)) {
1309
1310 $wanted_memory //= ($conf->{memory} || 512);
1311 $wanted_swap //= ($conf->{swap} || 0);
1312
1313 my $total = $wanted_memory + $wanted_swap;
1314 if ($running) {
1315 write_cgroup_value("memory", $vmid, "memory.limit_in_bytes", int($wanted_memory*1024*1024));
1316 write_cgroup_value("memory", $vmid, "memory.memsw.limit_in_bytes", int($total*1024*1024));
1317 }
1318 $conf->{memory} = $wanted_memory;
1319 $conf->{swap} = $wanted_swap;
1320
1321 write_config($vmid, $conf) if $running;
1322 }
1323
1324 foreach my $opt (keys %$param) {
1325 my $value = $param->{$opt};
1326 if ($opt eq 'hostname') {
1327 $conf->{$opt} = $value;
1328 } elsif ($opt eq 'onboot') {
1329 $conf->{$opt} = $value ? 1 : 0;
1330 } elsif ($opt eq 'startup') {
1331 $conf->{$opt} = $value;
1332 } elsif ($opt eq 'tty' || $opt eq 'console' || $opt eq 'cmode') {
1333 next if $hotplug_error->($opt);
1334 $conf->{$opt} = $value;
1335 } elsif ($opt eq 'nameserver') {
1336 next if $hotplug_error->($opt);
1337 my $list = verify_nameserver_list($value);
1338 $conf->{$opt} = $list;
1339 } elsif ($opt eq 'searchdomain') {
1340 next if $hotplug_error->($opt);
1341 my $list = verify_searchdomain_list($value);
1342 $conf->{$opt} = $list;
1343 } elsif ($opt eq 'cpulimit') {
1344 next if $hotplug_error->($opt); # FIXME: hotplug
1345 $conf->{$opt} = $value;
1346 } elsif ($opt eq 'cpuunits') {
1347 $conf->{$opt} = $value;
1348 write_cgroup_value("cpu", $vmid, "cpu.shares", $value);
1349 } elsif ($opt eq 'description') {
1350 $conf->{$opt} = PVE::Tools::encode_text($value);
1351 } elsif ($opt =~ m/^net(\d+)$/) {
1352 my $netid = $1;
1353 my $net = parse_lxc_network($value);
1354 if (!$running) {
1355 $conf->{$opt} = print_lxc_network($net);
1356 } else {
1357 update_net($vmid, $conf, $opt, $net, $netid, $rootdir);
1358 }
1359 } elsif ($opt eq 'protection') {
1360 $conf->{$opt} = $value ? 1 : 0;
1361 } elsif ($opt =~ m/^mp(\d+)$/) {
1362 next if $hotplug_error->($opt);
1363 check_protection($conf, "can't update CT $vmid drive '$opt'");
1364 $conf->{$opt} = $value;
1365 $new_disks = 1;
1366 } elsif ($opt eq 'rootfs') {
1367 check_protection($conf, "can't update CT $vmid drive '$opt'");
1368 die "implement me: $opt";
1369 } elsif ($opt eq 'unprivileged') {
1370 die "unable to modify read-only option: '$opt'\n";
1371 } else {
1372 die "implement me: $opt";
1373 }
1374 write_config($vmid, $conf) if $running;
1375 }
1376
1377 if (@deleted_volumes) {
1378 my $storage_cfg = PVE::Storage::config();
1379 foreach my $volume (@deleted_volumes) {
1380 delete_mountpoint_volume($storage_cfg, $vmid, $volume);
1381 }
1382 }
1383
1384 if ($new_disks) {
1385 my $storage_cfg = PVE::Storage::config();
1386 create_disks($storage_cfg, $vmid, $conf, $conf);
1387 }
1388
1389 # This should be the last thing we do here
1390 if ($running && scalar(@nohotplug)) {
1391 die "unable to modify " . join(',', @nohotplug) . " while container is running\n";
1392 }
1393 }
1394
1395 sub has_dev_console {
1396 my ($conf) = @_;
1397
1398 return !(defined($conf->{console}) && !$conf->{console});
1399 }
1400
1401 sub get_tty_count {
1402 my ($conf) = @_;
1403
1404 return $conf->{tty} // $confdesc->{tty}->{default};
1405 }
1406
1407 sub get_cmode {
1408 my ($conf) = @_;
1409
1410 return $conf->{cmode} // $confdesc->{cmode}->{default};
1411 }
1412
1413 sub get_console_command {
1414 my ($vmid, $conf) = @_;
1415
1416 my $cmode = get_cmode($conf);
1417
1418 if ($cmode eq 'console') {
1419 return ['lxc-console', '-n', $vmid, '-t', 0];
1420 } elsif ($cmode eq 'tty') {
1421 return ['lxc-console', '-n', $vmid];
1422 } elsif ($cmode eq 'shell') {
1423 return ['lxc-attach', '--clear-env', '-n', $vmid];
1424 } else {
1425 die "internal error";
1426 }
1427 }
1428
1429 sub get_primary_ips {
1430 my ($conf) = @_;
1431
1432 # return data from net0
1433
1434 return undef if !defined($conf->{net0});
1435 my $net = parse_lxc_network($conf->{net0});
1436
1437 my $ipv4 = $net->{ip};
1438 if ($ipv4) {
1439 if ($ipv4 =~ /^(dhcp|manual)$/) {
1440 $ipv4 = undef
1441 } else {
1442 $ipv4 =~ s!/\d+$!!;
1443 }
1444 }
1445 my $ipv6 = $net->{ip6};
1446 if ($ipv6) {
1447 if ($ipv6 =~ /^(auto|dhcp|manual)$/) {
1448 $ipv6 = undef;
1449 } else {
1450 $ipv6 =~ s!/\d+$!!;
1451 }
1452 }
1453
1454 return ($ipv4, $ipv6);
1455 }
1456
1457 sub delete_mountpoint_volume {
1458 my ($storage_cfg, $vmid, $volume) = @_;
1459
1460 return if classify_mountpoint($volume) ne 'volume';
1461
1462 my ($vtype, $name, $owner) = PVE::Storage::parse_volname($storage_cfg, $volume);
1463 PVE::Storage::vdisk_free($storage_cfg, $volume) if $vmid == $owner;
1464 }
1465
1466 sub destroy_lxc_container {
1467 my ($storage_cfg, $vmid, $conf) = @_;
1468
1469 foreach_mountpoint($conf, sub {
1470 my ($ms, $mountpoint) = @_;
1471 delete_mountpoint_volume($storage_cfg, $vmid, $mountpoint->{volume});
1472 });
1473
1474 rmdir "/var/lib/lxc/$vmid/rootfs";
1475 unlink "/var/lib/lxc/$vmid/config";
1476 rmdir "/var/lib/lxc/$vmid";
1477 destroy_config($vmid);
1478
1479 #my $cmd = ['lxc-destroy', '-n', $vmid ];
1480 #PVE::Tools::run_command($cmd);
1481 }
1482
1483 sub vm_stop_cleanup {
1484 my ($storage_cfg, $vmid, $conf, $keepActive) = @_;
1485
1486 eval {
1487 if (!$keepActive) {
1488
1489 my $vollist = get_vm_volumes($conf);
1490 PVE::Storage::deactivate_volumes($storage_cfg, $vollist);
1491 }
1492 };
1493 warn $@ if $@; # avoid errors - just warn
1494 }
1495
1496 my $safe_num_ne = sub {
1497 my ($a, $b) = @_;
1498
1499 return 0 if !defined($a) && !defined($b);
1500 return 1 if !defined($a);
1501 return 1 if !defined($b);
1502
1503 return $a != $b;
1504 };
1505
1506 my $safe_string_ne = sub {
1507 my ($a, $b) = @_;
1508
1509 return 0 if !defined($a) && !defined($b);
1510 return 1 if !defined($a);
1511 return 1 if !defined($b);
1512
1513 return $a ne $b;
1514 };
1515
1516 sub update_net {
1517 my ($vmid, $conf, $opt, $newnet, $netid, $rootdir) = @_;
1518
1519 if ($newnet->{type} ne 'veth') {
1520 # for when there are physical interfaces
1521 die "cannot update interface of type $newnet->{type}";
1522 }
1523
1524 my $veth = "veth${vmid}i${netid}";
1525 my $eth = $newnet->{name};
1526
1527 if (my $oldnetcfg = $conf->{$opt}) {
1528 my $oldnet = parse_lxc_network($oldnetcfg);
1529
1530 if (&$safe_string_ne($oldnet->{hwaddr}, $newnet->{hwaddr}) ||
1531 &$safe_string_ne($oldnet->{name}, $newnet->{name})) {
1532
1533 PVE::Network::veth_delete($veth);
1534 delete $conf->{$opt};
1535 write_config($vmid, $conf);
1536
1537 hotplug_net($vmid, $conf, $opt, $newnet, $netid);
1538
1539 } elsif (&$safe_string_ne($oldnet->{bridge}, $newnet->{bridge}) ||
1540 &$safe_num_ne($oldnet->{tag}, $newnet->{tag}) ||
1541 &$safe_num_ne($oldnet->{firewall}, $newnet->{firewall})) {
1542
1543 if ($oldnet->{bridge}) {
1544 PVE::Network::tap_unplug($veth);
1545 foreach (qw(bridge tag firewall)) {
1546 delete $oldnet->{$_};
1547 }
1548 $conf->{$opt} = print_lxc_network($oldnet);
1549 write_config($vmid, $conf);
1550 }
1551
1552 PVE::Network::tap_plug($veth, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks});
1553 foreach (qw(bridge tag firewall)) {
1554 $oldnet->{$_} = $newnet->{$_} if $newnet->{$_};
1555 }
1556 $conf->{$opt} = print_lxc_network($oldnet);
1557 write_config($vmid, $conf);
1558 }
1559 } else {
1560 hotplug_net($vmid, $conf, $opt, $newnet, $netid);
1561 }
1562
1563 update_ipconfig($vmid, $conf, $opt, $eth, $newnet, $rootdir);
1564 }
1565
1566 sub hotplug_net {
1567 my ($vmid, $conf, $opt, $newnet, $netid) = @_;
1568
1569 my $veth = "veth${vmid}i${netid}";
1570 my $vethpeer = $veth . "p";
1571 my $eth = $newnet->{name};
1572
1573 PVE::Network::veth_create($veth, $vethpeer, $newnet->{bridge}, $newnet->{hwaddr});
1574 PVE::Network::tap_plug($veth, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks});
1575
1576 # attach peer in container
1577 my $cmd = ['lxc-device', '-n', $vmid, 'add', $vethpeer, "$eth" ];
1578 PVE::Tools::run_command($cmd);
1579
1580 # link up peer in container
1581 $cmd = ['lxc-attach', '-n', $vmid, '-s', 'NETWORK', '--', '/sbin/ip', 'link', 'set', $eth ,'up' ];
1582 PVE::Tools::run_command($cmd);
1583
1584 my $done = { type => 'veth' };
1585 foreach (qw(bridge tag firewall hwaddr name)) {
1586 $done->{$_} = $newnet->{$_} if $newnet->{$_};
1587 }
1588 $conf->{$opt} = print_lxc_network($done);
1589
1590 write_config($vmid, $conf);
1591 }
1592
1593 sub update_ipconfig {
1594 my ($vmid, $conf, $opt, $eth, $newnet, $rootdir) = @_;
1595
1596 my $lxc_setup = PVE::LXC::Setup->new($conf, $rootdir);
1597
1598 my $optdata = parse_lxc_network($conf->{$opt});
1599 my $deleted = [];
1600 my $added = [];
1601 my $nscmd = sub {
1602 my $cmdargs = shift;
1603 PVE::Tools::run_command(['lxc-attach', '-n', $vmid, '-s', 'NETWORK', '--', @_], %$cmdargs);
1604 };
1605 my $ipcmd = sub { &$nscmd({}, '/sbin/ip', @_) };
1606
1607 my $change_ip_config = sub {
1608 my ($ipversion) = @_;
1609
1610 my $family_opt = "-$ipversion";
1611 my $suffix = $ipversion == 4 ? '' : $ipversion;
1612 my $gw= "gw$suffix";
1613 my $ip= "ip$suffix";
1614
1615 my $newip = $newnet->{$ip};
1616 my $newgw = $newnet->{$gw};
1617 my $oldip = $optdata->{$ip};
1618
1619 my $change_ip = &$safe_string_ne($oldip, $newip);
1620 my $change_gw = &$safe_string_ne($optdata->{$gw}, $newgw);
1621
1622 return if !$change_ip && !$change_gw;
1623
1624 # step 1: add new IP, if this fails we cancel
1625 my $is_real_ip = ($newip && $newip !~ /^(?:auto|dhcp|manual)$/);
1626 if ($change_ip && $is_real_ip) {
1627 eval { &$ipcmd($family_opt, 'addr', 'add', $newip, 'dev', $eth); };
1628 if (my $err = $@) {
1629 warn $err;
1630 return;
1631 }
1632 }
1633
1634 # step 2: replace gateway
1635 # If this fails we delete the added IP and cancel.
1636 # If it succeeds we save the config and delete the old IP, ignoring
1637 # errors. The config is then saved.
1638 # Note: 'ip route replace' can add
1639 if ($change_gw) {
1640 if ($newgw) {
1641 eval {
1642 if ($is_real_ip && !PVE::Network::is_ip_in_cidr($newgw, $newip, $ipversion)) {
1643 &$ipcmd($family_opt, 'route', 'add', $newgw, 'dev', $eth);
1644 }
1645 &$ipcmd($family_opt, 'route', 'replace', 'default', 'via', $newgw);
1646 };
1647 if (my $err = $@) {
1648 warn $err;
1649 # the route was not replaced, the old IP is still available
1650 # rollback (delete new IP) and cancel
1651 if ($change_ip) {
1652 eval { &$ipcmd($family_opt, 'addr', 'del', $newip, 'dev', $eth); };
1653 warn $@ if $@; # no need to die here
1654 }
1655 return;
1656 }
1657 } else {
1658 eval { &$ipcmd($family_opt, 'route', 'del', 'default'); };
1659 # if the route was not deleted, the guest might have deleted it manually
1660 # warn and continue
1661 warn $@ if $@;
1662 }
1663 }
1664
1665 # from this point on we save the configuration
1666 # step 3: delete old IP ignoring errors
1667 if ($change_ip && $oldip && $oldip !~ /^(?:auto|dhcp)$/) {
1668 # We need to enable promote_secondaries, otherwise our newly added
1669 # address will be removed along with the old one.
1670 my $promote = 0;
1671 eval {
1672 if ($ipversion == 4) {
1673 &$nscmd({ outfunc => sub { $promote = int(shift) } },
1674 'cat', "/proc/sys/net/ipv4/conf/$eth/promote_secondaries");
1675 &$nscmd({}, 'sysctl', "net.ipv4.conf.$eth.promote_secondaries=1");
1676 }
1677 &$ipcmd($family_opt, 'addr', 'del', $oldip, 'dev', $eth);
1678 };
1679 warn $@ if $@; # no need to die here
1680
1681 if ($ipversion == 4) {
1682 &$nscmd({}, 'sysctl', "net.ipv4.conf.$eth.promote_secondaries=$promote");
1683 }
1684 }
1685
1686 foreach my $property ($ip, $gw) {
1687 if ($newnet->{$property}) {
1688 $optdata->{$property} = $newnet->{$property};
1689 } else {
1690 delete $optdata->{$property};
1691 }
1692 }
1693 $conf->{$opt} = print_lxc_network($optdata);
1694 write_config($vmid, $conf);
1695 $lxc_setup->setup_network($conf);
1696 };
1697
1698 &$change_ip_config(4);
1699 &$change_ip_config(6);
1700
1701 }
1702
1703 # Internal snapshots
1704
1705 # NOTE: Snapshot create/delete involves several non-atomic
1706 # actions, and can take a long time.
1707 # So we try to avoid locking the file and use the 'lock' variable
1708 # inside the config file instead.
1709
1710 my $snapshot_copy_config = sub {
1711 my ($source, $dest) = @_;
1712
1713 foreach my $k (keys %$source) {
1714 next if $k eq 'snapshots';
1715 next if $k eq 'snapstate';
1716 next if $k eq 'snaptime';
1717 next if $k eq 'vmstate';
1718 next if $k eq 'lock';
1719 next if $k eq 'digest';
1720 next if $k eq 'description';
1721
1722 $dest->{$k} = $source->{$k};
1723 }
1724 };
1725
1726 my $snapshot_prepare = sub {
1727 my ($vmid, $snapname, $comment) = @_;
1728
1729 my $snap;
1730
1731 my $updatefn = sub {
1732
1733 my $conf = load_config($vmid);
1734
1735 die "you can't take a snapshot if it's a template\n"
1736 if is_template($conf);
1737
1738 check_lock($conf);
1739
1740 $conf->{lock} = 'snapshot';
1741
1742 die "snapshot name '$snapname' already used\n"
1743 if defined($conf->{snapshots}->{$snapname});
1744
1745 my $storecfg = PVE::Storage::config();
1746 die "snapshot feature is not available\n" if !has_feature('snapshot', $conf, $storecfg);
1747
1748 $snap = $conf->{snapshots}->{$snapname} = {};
1749
1750 &$snapshot_copy_config($conf, $snap);
1751
1752 $snap->{'snapstate'} = "prepare";
1753 $snap->{'snaptime'} = time();
1754 $snap->{'description'} = $comment if $comment;
1755 $conf->{snapshots}->{$snapname} = $snap;
1756
1757 write_config($vmid, $conf);
1758 };
1759
1760 lock_container($vmid, 10, $updatefn);
1761
1762 return $snap;
1763 };
1764
1765 my $snapshot_commit = sub {
1766 my ($vmid, $snapname) = @_;
1767
1768 my $updatefn = sub {
1769
1770 my $conf = load_config($vmid);
1771
1772 die "missing snapshot lock\n"
1773 if !($conf->{lock} && $conf->{lock} eq 'snapshot');
1774
1775 die "snapshot '$snapname' does not exist\n"
1776 if !defined($conf->{snapshots}->{$snapname});
1777
1778 die "wrong snapshot state\n"
1779 if !($conf->{snapshots}->{$snapname}->{'snapstate'} &&
1780 $conf->{snapshots}->{$snapname}->{'snapstate'} eq "prepare");
1781
1782 delete $conf->{snapshots}->{$snapname}->{'snapstate'};
1783 delete $conf->{lock};
1784 $conf->{parent} = $snapname;
1785
1786 write_config($vmid, $conf);
1787 };
1788
1789 lock_container($vmid, 10 ,$updatefn);
1790 };
1791
1792 sub has_feature {
1793 my ($feature, $conf, $storecfg, $snapname) = @_;
1794
1795 my $err;
1796
1797 foreach_mountpoint($conf, sub {
1798 my ($ms, $mountpoint) = @_;
1799
1800 return if $err; # skip further test
1801
1802 $err = 1 if !PVE::Storage::volume_has_feature($storecfg, $feature, $mountpoint->{volume}, $snapname);
1803
1804 # TODO: implement support for mountpoints
1805 die "unable to handle mountpoint '$ms' - feature not implemented\n"
1806 if $ms ne 'rootfs';
1807 });
1808
1809 return $err ? 0 : 1;
1810 }
1811
1812 sub snapshot_create {
1813 my ($vmid, $snapname, $comment) = @_;
1814
1815 my $snap = &$snapshot_prepare($vmid, $snapname, $comment);
1816
1817 my $conf = load_config($vmid);
1818
1819 my $running = check_running($vmid);
1820
1821 my $unfreeze = 0;
1822
1823 eval {
1824 if ($running) {
1825 PVE::Tools::run_command(['/usr/bin/lxc-freeze', '-n', $vmid]);
1826 $unfreeze = 1;
1827 PVE::Tools::run_command(['/bin/sync']);
1828 };
1829
1830 my $storecfg = PVE::Storage::config();
1831 my $rootinfo = parse_ct_rootfs($conf->{rootfs});
1832 my $volid = $rootinfo->{volume};
1833
1834 PVE::Storage::volume_snapshot($storecfg, $volid, $snapname);
1835 &$snapshot_commit($vmid, $snapname);
1836 };
1837 my $err = $@;
1838
1839 if ($unfreeze) {
1840 eval { PVE::Tools::run_command(['/usr/bin/lxc-unfreeze', '-n', $vmid]); };
1841 warn $@ if $@;
1842 }
1843
1844 if ($err) {
1845 snapshot_delete($vmid, $snapname, 1);
1846 die "$err\n";
1847 }
1848 }
1849
1850 sub snapshot_delete {
1851 my ($vmid, $snapname, $force) = @_;
1852
1853 my $snap;
1854
1855 my $conf;
1856
1857 my $updatefn = sub {
1858
1859 $conf = load_config($vmid);
1860
1861 die "you can't delete a snapshot if vm is a template\n"
1862 if is_template($conf);
1863
1864 $snap = $conf->{snapshots}->{$snapname};
1865
1866 check_lock($conf);
1867
1868 die "snapshot '$snapname' does not exist\n" if !defined($snap);
1869
1870 $snap->{snapstate} = 'delete';
1871
1872 write_config($vmid, $conf);
1873 };
1874
1875 lock_container($vmid, 10, $updatefn);
1876
1877 my $storecfg = PVE::Storage::config();
1878
1879 my $unlink_parent = sub {
1880
1881 my ($confref, $new_parent) = @_;
1882
1883 if ($confref->{parent} && $confref->{parent} eq $snapname) {
1884 if ($new_parent) {
1885 $confref->{parent} = $new_parent;
1886 } else {
1887 delete $confref->{parent};
1888 }
1889 }
1890 };
1891
1892 my $del_snap = sub {
1893
1894 check_lock($conf);
1895
1896 my $parent = $conf->{snapshots}->{$snapname}->{parent};
1897 foreach my $snapkey (keys %{$conf->{snapshots}}) {
1898 &$unlink_parent($conf->{snapshots}->{$snapkey}, $parent);
1899 }
1900
1901 &$unlink_parent($conf, $parent);
1902
1903 delete $conf->{snapshots}->{$snapname};
1904
1905 write_config($vmid, $conf);
1906 };
1907
1908 my $rootfs = $conf->{snapshots}->{$snapname}->{rootfs};
1909 my $rootinfo = parse_ct_rootfs($rootfs);
1910 my $volid = $rootinfo->{volume};
1911
1912 eval {
1913 PVE::Storage::volume_snapshot_delete($storecfg, $volid, $snapname);
1914 };
1915 my $err = $@;
1916
1917 if(!$err || ($err && $force)) {
1918 lock_container($vmid, 10, $del_snap);
1919 if ($err) {
1920 die "Can't delete snapshot: $vmid $snapname $err\n";
1921 }
1922 }
1923 }
1924
1925 sub snapshot_rollback {
1926 my ($vmid, $snapname) = @_;
1927
1928 my $storecfg = PVE::Storage::config();
1929
1930 my $conf = load_config($vmid);
1931
1932 die "you can't rollback if vm is a template\n" if is_template($conf);
1933
1934 my $snap = $conf->{snapshots}->{$snapname};
1935
1936 die "snapshot '$snapname' does not exist\n" if !defined($snap);
1937
1938 my $rootfs = $snap->{rootfs};
1939 my $rootinfo = parse_ct_rootfs($rootfs);
1940 my $volid = $rootinfo->{volume};
1941
1942 PVE::Storage::volume_rollback_is_possible($storecfg, $volid, $snapname);
1943
1944 my $updatefn = sub {
1945
1946 die "unable to rollback to incomplete snapshot (snapstate = $snap->{snapstate})\n"
1947 if $snap->{snapstate};
1948
1949 check_lock($conf);
1950
1951 system("lxc-stop -n $vmid --kill") if check_running($vmid);
1952
1953 die "unable to rollback vm $vmid: vm is running\n"
1954 if check_running($vmid);
1955
1956 $conf->{lock} = 'rollback';
1957
1958 my $forcemachine;
1959
1960 # copy snapshot config to current config
1961
1962 my $tmp_conf = $conf;
1963 &$snapshot_copy_config($tmp_conf->{snapshots}->{$snapname}, $conf);
1964 $conf->{snapshots} = $tmp_conf->{snapshots};
1965 delete $conf->{snaptime};
1966 delete $conf->{snapname};
1967 $conf->{parent} = $snapname;
1968
1969 write_config($vmid, $conf);
1970 };
1971
1972 my $unlockfn = sub {
1973 delete $conf->{lock};
1974 write_config($vmid, $conf);
1975 };
1976
1977 lock_container($vmid, 10, $updatefn);
1978
1979 PVE::Storage::volume_snapshot_rollback($storecfg, $volid, $snapname);
1980
1981 lock_container($vmid, 5, $unlockfn);
1982 }
1983
1984 sub template_create {
1985 my ($vmid, $conf) = @_;
1986
1987 my $storecfg = PVE::Storage::config();
1988
1989 my $rootinfo = parse_ct_rootfs($conf->{rootfs});
1990 my $volid = $rootinfo->{volume};
1991
1992 die "Template feature is not available for '$volid'\n"
1993 if !PVE::Storage::volume_has_feature($storecfg, 'template', $volid);
1994
1995 PVE::Storage::activate_volumes($storecfg, [$volid]);
1996
1997 my $template_volid = PVE::Storage::vdisk_create_base($storecfg, $volid);
1998 $rootinfo->{volume} = $template_volid;
1999 $conf->{rootfs} = print_ct_mountpoint($rootinfo, 1);
2000
2001 write_config($vmid, $conf);
2002 }
2003
2004 sub is_template {
2005 my ($conf) = @_;
2006
2007 return 1 if defined $conf->{template} && $conf->{template} == 1;
2008 }
2009
2010 sub mountpoint_names {
2011 my ($reverse) = @_;
2012
2013 my @names = ('rootfs');
2014
2015 for (my $i = 0; $i < $MAX_MOUNT_POINTS; $i++) {
2016 push @names, "mp$i";
2017 }
2018
2019 return $reverse ? reverse @names : @names;
2020 }
2021
2022 # The container might have *different* symlinks than the host. realpath/abs_path
2023 # use the actual filesystem to resolve links.
2024 sub sanitize_mountpoint {
2025 my ($mp) = @_;
2026 $mp = '/' . $mp; # we always start with a slash
2027 $mp =~ s@/{2,}@/@g; # collapse sequences of slashes
2028 $mp =~ s@/\./@@g; # collapse /./
2029 $mp =~ s@/\.(/)?$@$1@; # collapse a trailing /. or /./
2030 $mp =~ s@(.*)/[^/]+/\.\./@$1/@g; # collapse /../ without regard for symlinks
2031 $mp =~ s@/\.\.(/)?$@$1@; # collapse trailing /.. or /../ disregarding symlinks
2032 return $mp;
2033 }
2034
2035 sub foreach_mountpoint_full {
2036 my ($conf, $reverse, $func) = @_;
2037
2038 foreach my $key (mountpoint_names($reverse)) {
2039 my $value = $conf->{$key};
2040 next if !defined($value);
2041 my $mountpoint = $key eq 'rootfs' ? parse_ct_rootfs($value, 1) : parse_ct_mountpoint($value, 1);
2042 next if !defined($mountpoint);
2043
2044 $mountpoint->{mp} = sanitize_mountpoint($mountpoint->{mp});
2045
2046 my $path = $mountpoint->{volume};
2047 $mountpoint->{volume} = sanitize_mountpoint($path) if $path =~ m|^/|;
2048
2049 &$func($key, $mountpoint);
2050 }
2051 }
2052
2053 sub foreach_mountpoint {
2054 my ($conf, $func) = @_;
2055
2056 foreach_mountpoint_full($conf, 0, $func);
2057 }
2058
2059 sub foreach_mountpoint_reverse {
2060 my ($conf, $func) = @_;
2061
2062 foreach_mountpoint_full($conf, 1, $func);
2063 }
2064
2065 sub check_ct_modify_config_perm {
2066 my ($rpcenv, $authuser, $vmid, $pool, $key_list) = @_;
2067
2068 return 1 if $authuser ne 'root@pam';
2069
2070 foreach my $opt (@$key_list) {
2071
2072 if ($opt eq 'cpus' || $opt eq 'cpuunits' || $opt eq 'cpulimit') {
2073 $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.CPU']);
2074 } elsif ($opt eq 'rootfs' || $opt =~ /^mp\d+$/) {
2075 $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Disk']);
2076 } elsif ($opt eq 'memory' || $opt eq 'swap') {
2077 $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Memory']);
2078 } elsif ($opt =~ m/^net\d+$/ || $opt eq 'nameserver' ||
2079 $opt eq 'searchdomain' || $opt eq 'hostname') {
2080 $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Network']);
2081 } else {
2082 $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Options']);
2083 }
2084 }
2085
2086 return 1;
2087 }
2088
2089 sub umount_all {
2090 my ($vmid, $storage_cfg, $conf, $noerr) = @_;
2091
2092 my $rootdir = "/var/lib/lxc/$vmid/rootfs";
2093 my $volid_list = get_vm_volumes($conf);
2094
2095 foreach_mountpoint_reverse($conf, sub {
2096 my ($ms, $mountpoint) = @_;
2097
2098 my $volid = $mountpoint->{volume};
2099 my $mount = $mountpoint->{mp};
2100
2101 return if !$volid || !$mount;
2102
2103 my $mount_path = "$rootdir/$mount";
2104 $mount_path =~ s!/+!/!g;
2105
2106 return if !PVE::ProcFSTools::is_mounted($mount_path);
2107
2108 eval {
2109 PVE::Tools::run_command(['umount', '-d', $mount_path]);
2110 };
2111 if (my $err = $@) {
2112 if ($noerr) {
2113 warn $err;
2114 } else {
2115 die $err;
2116 }
2117 }
2118 });
2119 }
2120
2121 sub mount_all {
2122 my ($vmid, $storage_cfg, $conf) = @_;
2123
2124 my $rootdir = "/var/lib/lxc/$vmid/rootfs";
2125 File::Path::make_path($rootdir);
2126
2127 my $volid_list = get_vm_volumes($conf);
2128 PVE::Storage::activate_volumes($storage_cfg, $volid_list);
2129
2130 eval {
2131 foreach_mountpoint($conf, sub {
2132 my ($ms, $mountpoint) = @_;
2133
2134 mountpoint_mount($mountpoint, $rootdir, $storage_cfg);
2135 });
2136 };
2137 if (my $err = $@) {
2138 warn "mounting container failed\n";
2139 umount_all($vmid, $storage_cfg, $conf, 1);
2140 die $err;
2141 }
2142
2143 return $rootdir;
2144 }
2145
2146
2147 sub mountpoint_mount_path {
2148 my ($mountpoint, $storage_cfg, $snapname) = @_;
2149
2150 return mountpoint_mount($mountpoint, undef, $storage_cfg, $snapname);
2151 }
2152
2153 my $check_mount_path = sub {
2154 my ($path) = @_;
2155 $path = File::Spec->canonpath($path);
2156 my $real = Cwd::realpath($path);
2157 if ($real ne $path) {
2158 die "mount path modified by symlink: $path != $real";
2159 }
2160 };
2161
2162 sub query_loopdev {
2163 my ($path) = @_;
2164 my $found;
2165 my $parser = sub {
2166 my $line = shift;
2167 if ($line =~ m@^(/dev/loop\d+):@) {
2168 $found = $1;
2169 }
2170 };
2171 my $cmd = ['losetup', '--associated', $path];
2172 PVE::Tools::run_command($cmd, outfunc => $parser);
2173 return $found;
2174 }
2175
2176 # use $rootdir = undef to just return the corresponding mount path
2177 sub mountpoint_mount {
2178 my ($mountpoint, $rootdir, $storage_cfg, $snapname) = @_;
2179
2180 my $volid = $mountpoint->{volume};
2181 my $mount = $mountpoint->{mp};
2182 my $type = $mountpoint->{type};
2183
2184 return if !$volid || !$mount;
2185
2186 my $mount_path;
2187
2188 if (defined($rootdir)) {
2189 $rootdir =~ s!/+$!!;
2190 $mount_path = "$rootdir/$mount";
2191 $mount_path =~ s!/+!/!g;
2192 &$check_mount_path($mount_path);
2193 File::Path::mkpath($mount_path);
2194 }
2195
2196 my ($storage, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2197
2198 die "unknown snapshot path for '$volid'" if !$storage && defined($snapname);
2199
2200 my $optstring = '';
2201 if (defined($mountpoint->{acl})) {
2202 $optstring .= ($mountpoint->{acl} ? 'acl' : 'noacl');
2203 }
2204 if ($mountpoint->{ro}) {
2205 $optstring .= ',' if $optstring;
2206 $optstring .= 'ro';
2207 }
2208
2209 my @extra_opts = ('-o', $optstring);
2210
2211 if ($storage) {
2212
2213 my $scfg = PVE::Storage::storage_config($storage_cfg, $storage);
2214 my $path = PVE::Storage::path($storage_cfg, $volid, $snapname);
2215
2216 my ($vtype, undef, undef, undef, undef, $isBase, $format) =
2217 PVE::Storage::parse_volname($storage_cfg, $volid);
2218
2219 $format = 'iso' if $vtype eq 'iso'; # allow to handle iso files
2220
2221 if ($format eq 'subvol') {
2222 if ($mount_path) {
2223 if ($snapname) {
2224 if ($scfg->{type} eq 'zfspool') {
2225 my $path_arg = $path;
2226 $path_arg =~ s!^/+!!;
2227 PVE::Tools::run_command(['mount', '-o', 'ro', @extra_opts, '-t', 'zfs', $path_arg, $mount_path]);
2228 } else {
2229 die "cannot mount subvol snapshots for storage type '$scfg->{type}'\n";
2230 }
2231 } else {
2232 if ($mountpoint->{ro}) {
2233 die "read-only bind mounts not supported\n";
2234 }
2235 PVE::Tools::run_command(['mount', '-o', 'bind', @extra_opts, $path, $mount_path]);
2236 }
2237 }
2238 return wantarray ? ($path, 0) : $path;
2239 } elsif ($format eq 'raw' || $format eq 'iso') {
2240 my $use_loopdev = 0;
2241 if ($scfg->{path}) {
2242 push @extra_opts, '-o', 'loop';
2243 $use_loopdev = 1;
2244 } elsif ($scfg->{type} eq 'drbd' || $scfg->{type} eq 'lvm' ||
2245 $scfg->{type} eq 'rbd' || $scfg->{type} eq 'lvmthin') {
2246 # do nothing
2247 } else {
2248 die "unsupported storage type '$scfg->{type}'\n";
2249 }
2250 if ($mount_path) {
2251 if ($format eq 'iso') {
2252 PVE::Tools::run_command(['mount', '-o', 'ro', @extra_opts, $path, $mount_path]);
2253 } elsif ($isBase || defined($snapname)) {
2254 PVE::Tools::run_command(['mount', '-o', 'ro,noload', @extra_opts, $path, $mount_path]);
2255 } else {
2256 PVE::Tools::run_command(['mount', @extra_opts, $path, $mount_path]);
2257 }
2258 }
2259 return wantarray ? ($path, $use_loopdev) : $path;
2260 } else {
2261 die "unsupported image format '$format'\n";
2262 }
2263 } elsif ($type eq 'device') {
2264 PVE::Tools::run_command(['mount', @extra_opts, $volid, $mount_path]) if $mount_path;
2265 return wantarray ? ($volid, 0) : $volid;
2266 } elsif ($type eq 'bind') {
2267 if ($mountpoint->{ro}) {
2268 die "read-only bind mounts not supported\n";
2269 # Theoretically we'd have to execute both:
2270 # mount -o bind $a $b
2271 # mount -o bind,remount,ro $a $b
2272 }
2273 die "directory '$volid' does not exist\n" if ! -d $volid;
2274 &$check_mount_path($volid);
2275 PVE::Tools::run_command(['mount', '-o', 'bind', @extra_opts, $volid, $mount_path]) if $mount_path;
2276 return wantarray ? ($volid, 0) : $volid;
2277 }
2278
2279 die "unsupported storage";
2280 }
2281
2282 sub get_vm_volumes {
2283 my ($conf, $excludes) = @_;
2284
2285 my $vollist = [];
2286
2287 foreach_mountpoint($conf, sub {
2288 my ($ms, $mountpoint) = @_;
2289
2290 return if $excludes && $ms eq $excludes;
2291
2292 my $volid = $mountpoint->{volume};
2293
2294 return if !$volid || $mountpoint->{type} ne 'volume';
2295
2296 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2297 return if !$sid;
2298
2299 push @$vollist, $volid;
2300 });
2301
2302 return $vollist;
2303 }
2304
2305 sub mkfs {
2306 my ($dev, $rootuid, $rootgid) = @_;
2307
2308 PVE::Tools::run_command(['mkfs.ext4', '-O', 'mmp',
2309 '-E', "root_owner=$rootuid:$rootgid",
2310 $dev]);
2311 }
2312
2313 sub format_disk {
2314 my ($storage_cfg, $volid, $rootuid, $rootgid) = @_;
2315
2316 if ($volid =~ m!^/dev/.+!) {
2317 mkfs($volid);
2318 return;
2319 }
2320
2321 my ($storage, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2322
2323 die "cannot format volume '$volid' with no storage\n" if !$storage;
2324
2325 PVE::Storage::activate_volumes($storage_cfg, [$volid]);
2326
2327 my $path = PVE::Storage::path($storage_cfg, $volid);
2328
2329 my ($vtype, undef, undef, undef, undef, $isBase, $format) =
2330 PVE::Storage::parse_volname($storage_cfg, $volid);
2331
2332 die "cannot format volume '$volid' (format == $format)\n"
2333 if $format ne 'raw';
2334
2335 mkfs($path, $rootuid, $rootgid);
2336 }
2337
2338 sub destroy_disks {
2339 my ($storecfg, $vollist) = @_;
2340
2341 foreach my $volid (@$vollist) {
2342 eval { PVE::Storage::vdisk_free($storecfg, $volid); };
2343 warn $@ if $@;
2344 }
2345 }
2346
2347 sub create_disks {
2348 my ($storecfg, $vmid, $settings, $conf) = @_;
2349
2350 my $vollist = [];
2351
2352 eval {
2353 my (undef, $rootuid, $rootgid) = PVE::LXC::parse_id_maps($conf);
2354 my $chown_vollist = [];
2355
2356 foreach_mountpoint($settings, sub {
2357 my ($ms, $mountpoint) = @_;
2358
2359 my $volid = $mountpoint->{volume};
2360 my $mp = $mountpoint->{mp};
2361
2362 my ($storage, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2363
2364 if ($storage && ($volid =~ m/^([^:\s]+):(\d+(\.\d+)?)$/)) {
2365 my ($storeid, $size_gb) = ($1, $2);
2366
2367 my $size_kb = int(${size_gb}*1024) * 1024;
2368
2369 my $scfg = PVE::Storage::storage_config($storecfg, $storage);
2370 # fixme: use better naming ct-$vmid-disk-X.raw?
2371
2372 if ($scfg->{type} eq 'dir' || $scfg->{type} eq 'nfs') {
2373 if ($size_kb > 0) {
2374 $volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'raw',
2375 undef, $size_kb);
2376 format_disk($storecfg, $volid, $rootuid, $rootgid);
2377 } else {
2378 $volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'subvol',
2379 undef, 0);
2380 push @$chown_vollist, $volid;
2381 }
2382 } elsif ($scfg->{type} eq 'zfspool') {
2383
2384 $volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'subvol',
2385 undef, $size_kb);
2386 push @$chown_vollist, $volid;
2387 } elsif ($scfg->{type} eq 'drbd' || $scfg->{type} eq 'lvm' || $scfg->{type} eq 'lvmthin') {
2388
2389 $volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'raw', undef, $size_kb);
2390 format_disk($storecfg, $volid, $rootuid, $rootgid);
2391
2392 } elsif ($scfg->{type} eq 'rbd') {
2393
2394 die "krbd option must be enabled on storage type '$scfg->{type}'\n" if !$scfg->{krbd};
2395 $volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'raw', undef, $size_kb);
2396 format_disk($storecfg, $volid, $rootuid, $rootgid);
2397 } else {
2398 die "unable to create containers on storage type '$scfg->{type}'\n";
2399 }
2400 push @$vollist, $volid;
2401 $mountpoint->{volume} = $volid;
2402 $mountpoint->{size} = $size_kb * 1024;
2403 $conf->{$ms} = print_ct_mountpoint($mountpoint, $ms eq 'rootfs');
2404 } else {
2405 # use specified/existing volid/dir/device
2406 $conf->{$ms} = print_ct_mountpoint($mountpoint, $ms eq 'rootfs');
2407 }
2408 });
2409
2410 PVE::Storage::activate_volumes($storecfg, $chown_vollist, undef);
2411 foreach my $volid (@$chown_vollist) {
2412 my $path = PVE::Storage::path($storecfg, $volid, undef);
2413 chown($rootuid, $rootgid, $path);
2414 }
2415 PVE::Storage::deactivate_volumes($storecfg, $chown_vollist, undef);
2416 };
2417 # free allocated images on error
2418 if (my $err = $@) {
2419 destroy_disks($storecfg, $vollist);
2420 die $err;
2421 }
2422 return $vollist;
2423 }
2424
2425 # bash completion helper
2426
2427 sub complete_os_templates {
2428 my ($cmdname, $pname, $cvalue) = @_;
2429
2430 my $cfg = PVE::Storage::config();
2431
2432 my $storeid;
2433
2434 if ($cvalue =~ m/^([^:]+):/) {
2435 $storeid = $1;
2436 }
2437
2438 my $vtype = $cmdname eq 'restore' ? 'backup' : 'vztmpl';
2439 my $data = PVE::Storage::template_list($cfg, $storeid, $vtype);
2440
2441 my $res = [];
2442 foreach my $id (keys %$data) {
2443 foreach my $item (@{$data->{$id}}) {
2444 push @$res, $item->{volid} if defined($item->{volid});
2445 }
2446 }
2447
2448 return $res;
2449 }
2450
2451 my $complete_ctid_full = sub {
2452 my ($running) = @_;
2453
2454 my $idlist = vmstatus();
2455
2456 my $active_hash = list_active_containers();
2457
2458 my $res = [];
2459
2460 foreach my $id (keys %$idlist) {
2461 my $d = $idlist->{$id};
2462 if (defined($running)) {
2463 next if $d->{template};
2464 next if $running && !$active_hash->{$id};
2465 next if !$running && $active_hash->{$id};
2466 }
2467 push @$res, $id;
2468
2469 }
2470 return $res;
2471 };
2472
2473 sub complete_ctid {
2474 return &$complete_ctid_full();
2475 }
2476
2477 sub complete_ctid_stopped {
2478 return &$complete_ctid_full(0);
2479 }
2480
2481 sub complete_ctid_running {
2482 return &$complete_ctid_full(1);
2483 }
2484
2485 sub parse_id_maps {
2486 my ($conf) = @_;
2487
2488 my $id_map = [];
2489 my $rootuid = 0;
2490 my $rootgid = 0;
2491
2492 my $lxc = $conf->{lxc};
2493 foreach my $entry (@$lxc) {
2494 my ($key, $value) = @$entry;
2495 next if $key ne 'lxc.id_map';
2496 if ($value =~ /^([ug])\s+(\d+)\s+(\d+)\s+(\d+)\s*$/) {
2497 my ($type, $ct, $host, $length) = ($1, $2, $3, $4);
2498 push @$id_map, [$type, $ct, $host, $length];
2499 if ($ct == 0) {
2500 $rootuid = $host if $type eq 'u';
2501 $rootgid = $host if $type eq 'g';
2502 }
2503 } else {
2504 die "failed to parse id_map: $value\n";
2505 }
2506 }
2507
2508 if (!@$id_map && $conf->{unprivileged}) {
2509 # Should we read them from /etc/subuid?
2510 $id_map = [ ['u', '0', '100000', '65536'],
2511 ['g', '0', '100000', '65536'] ];
2512 $rootuid = $rootgid = 100000;
2513 }
2514
2515 return ($id_map, $rootuid, $rootgid);
2516 }
2517
2518 sub userns_command {
2519 my ($id_map) = @_;
2520 if (@$id_map) {
2521 return ['lxc-usernsexec', (map { ('-m', join(':', @$_)) } @$id_map), '--'];
2522 }
2523 return [];
2524 }
2525
2526 1;