]> git.proxmox.com Git - pve-container.git/blob - src/PVE/LXC.pm
lxc: read-only bind mounts
[pve-container.git] / src / PVE / LXC.pm
1 package PVE::LXC;
2
3 use strict;
4 use warnings;
5 use POSIX qw(EINTR);
6
7 use Socket;
8
9 use File::Path;
10 use File::Spec;
11 use Cwd qw();
12 use Fcntl qw(O_RDONLY);
13
14 use PVE::Cluster qw(cfs_register_file cfs_read_file);
15 use PVE::Storage;
16 use PVE::SafeSyslog;
17 use PVE::INotify;
18 use PVE::JSONSchema qw(get_standard_option);
19 use PVE::Tools qw($IPV6RE $IPV4RE dir_glob_foreach lock_file lock_file_full);
20 use PVE::Network;
21 use PVE::AccessControl;
22 use PVE::ProcFSTools;
23 use Time::HiRes qw (gettimeofday);
24
25 use Data::Dumper;
26
27 my $nodename = PVE::INotify::nodename();
28
29 my $cpuinfo= PVE::ProcFSTools::read_cpuinfo();
30
31 our $COMMON_TAR_FLAGS = [ '--sparse', '--numeric-owner', '--acls',
32 '--xattrs',
33 '--xattrs-include=user.*',
34 '--xattrs-include=security.capability',
35 '--warning=no-xattr-write' ];
36
37 cfs_register_file('/lxc/', \&parse_pct_config, \&write_pct_config);
38
39 my $rootfs_desc = {
40 volume => {
41 type => 'string',
42 default_key => 1,
43 format => 'pve-lxc-mp-string',
44 format_description => 'volume',
45 description => 'Volume, device or directory to mount into the container.',
46 },
47 backup => {
48 type => 'boolean',
49 format_description => '[1|0]',
50 description => 'Whether to include the mountpoint in backups.',
51 optional => 1,
52 },
53 size => {
54 type => 'string',
55 format => 'disk-size',
56 format_description => 'DiskSize',
57 description => 'Volume size (read only value).',
58 optional => 1,
59 },
60 acl => {
61 type => 'boolean',
62 format_description => 'acl',
63 description => 'Explicitly enable or disable ACL support.',
64 optional => 1,
65 },
66 ro => {
67 type => 'boolean',
68 format_description => 'ro',
69 description => 'Read-only mountpoint (not supported with bind mounts)',
70 optional => 1,
71 },
72 quota => {
73 type => 'boolean',
74 format_description => '[0|1]',
75 description => 'Enable user quotas inside the container (not supported with zfs subvolumes)',
76 optional => 1,
77 },
78 };
79
80 PVE::JSONSchema::register_standard_option('pve-ct-rootfs', {
81 type => 'string', format => $rootfs_desc,
82 description => "Use volume as container root.",
83 optional => 1,
84 });
85
86 PVE::JSONSchema::register_standard_option('pve-lxc-snapshot-name', {
87 description => "The name of the snapshot.",
88 type => 'string', format => 'pve-configid',
89 maxLength => 40,
90 });
91
92 my $confdesc = {
93 lock => {
94 optional => 1,
95 type => 'string',
96 description => "Lock/unlock the VM.",
97 enum => [qw(migrate backup snapshot rollback)],
98 },
99 onboot => {
100 optional => 1,
101 type => 'boolean',
102 description => "Specifies whether a VM will be started during system bootup.",
103 default => 0,
104 },
105 startup => get_standard_option('pve-startup-order'),
106 template => {
107 optional => 1,
108 type => 'boolean',
109 description => "Enable/disable Template.",
110 default => 0,
111 },
112 arch => {
113 optional => 1,
114 type => 'string',
115 enum => ['amd64', 'i386'],
116 description => "OS architecture type.",
117 default => 'amd64',
118 },
119 ostype => {
120 optional => 1,
121 type => 'string',
122 enum => ['debian', 'ubuntu', 'centos', 'fedora', 'opensuse', 'archlinux'],
123 description => "OS type. Corresponds to lxc setup scripts in /usr/share/lxc/config/<ostype>.common.conf.",
124 },
125 console => {
126 optional => 1,
127 type => 'boolean',
128 description => "Attach a console device (/dev/console) to the container.",
129 default => 1,
130 },
131 tty => {
132 optional => 1,
133 type => 'integer',
134 description => "Specify the number of tty available to the container",
135 minimum => 0,
136 maximum => 6,
137 default => 2,
138 },
139 cpulimit => {
140 optional => 1,
141 type => 'number',
142 description => "Limit of CPU usage. Note if the computer has 2 CPUs, it has a total of '2' CPU time. Value '0' indicates no CPU limit.",
143 minimum => 0,
144 maximum => 128,
145 default => 0,
146 },
147 cpuunits => {
148 optional => 1,
149 type => 'integer',
150 description => "CPU weight for a VM. Argument is used in the kernel fair scheduler. The larger the number is, the more CPU time this VM gets. Number is relative to the weights of all the other running VMs.\n\nNOTE: You can disable fair-scheduler configuration by setting this to 0.",
151 minimum => 0,
152 maximum => 500000,
153 default => 1024,
154 },
155 memory => {
156 optional => 1,
157 type => 'integer',
158 description => "Amount of RAM for the VM in MB.",
159 minimum => 16,
160 default => 512,
161 },
162 swap => {
163 optional => 1,
164 type => 'integer',
165 description => "Amount of SWAP for the VM in MB.",
166 minimum => 0,
167 default => 512,
168 },
169 hostname => {
170 optional => 1,
171 description => "Set a host name for the container.",
172 type => 'string', format => 'dns-name',
173 maxLength => 255,
174 },
175 description => {
176 optional => 1,
177 type => 'string',
178 description => "Container description. Only used on the configuration web interface.",
179 },
180 searchdomain => {
181 optional => 1,
182 type => 'string', format => 'dns-name-list',
183 description => "Sets DNS search domains for a container. Create will automatically use the setting from the host if you neither set searchdomain nor nameserver.",
184 },
185 nameserver => {
186 optional => 1,
187 type => 'string', format => 'address-list',
188 description => "Sets DNS server IP address for a container. Create will automatically use the setting from the host if you neither set searchdomain nor nameserver.",
189 },
190 rootfs => get_standard_option('pve-ct-rootfs'),
191 parent => {
192 optional => 1,
193 type => 'string', format => 'pve-configid',
194 maxLength => 40,
195 description => "Parent snapshot name. This is used internally, and should not be modified.",
196 },
197 snaptime => {
198 optional => 1,
199 description => "Timestamp for snapshots.",
200 type => 'integer',
201 minimum => 0,
202 },
203 cmode => {
204 optional => 1,
205 description => "Console mode. By default, the console command tries to open a connection to one of the available tty devices. By setting cmode to 'console' it tries to attach to /dev/console instead. If you set cmode to 'shell', it simply invokes a shell inside the container (no login).",
206 type => 'string',
207 enum => ['shell', 'console', 'tty'],
208 default => 'tty',
209 },
210 protection => {
211 optional => 1,
212 type => 'boolean',
213 description => "Sets the protection flag of the container. This will prevent the CT or CT's disk remove/update operation.",
214 default => 0,
215 },
216 unprivileged => {
217 optional => 1,
218 type => 'boolean',
219 description => "Makes the container run as unprivileged user. (Should not be modified manually.)",
220 default => 0,
221 },
222 };
223
224 my $valid_lxc_conf_keys = {
225 'lxc.include' => 1,
226 'lxc.arch' => 1,
227 'lxc.utsname' => 1,
228 'lxc.haltsignal' => 1,
229 'lxc.rebootsignal' => 1,
230 'lxc.stopsignal' => 1,
231 'lxc.init_cmd' => 1,
232 'lxc.network.type' => 1,
233 'lxc.network.flags' => 1,
234 'lxc.network.link' => 1,
235 'lxc.network.mtu' => 1,
236 'lxc.network.name' => 1,
237 'lxc.network.hwaddr' => 1,
238 'lxc.network.ipv4' => 1,
239 'lxc.network.ipv4.gateway' => 1,
240 'lxc.network.ipv6' => 1,
241 'lxc.network.ipv6.gateway' => 1,
242 'lxc.network.script.up' => 1,
243 'lxc.network.script.down' => 1,
244 'lxc.pts' => 1,
245 'lxc.console.logfile' => 1,
246 'lxc.console' => 1,
247 'lxc.tty' => 1,
248 'lxc.devttydir' => 1,
249 'lxc.hook.autodev' => 1,
250 'lxc.autodev' => 1,
251 'lxc.kmsg' => 1,
252 'lxc.mount' => 1,
253 'lxc.mount.entry' => 1,
254 'lxc.mount.auto' => 1,
255 'lxc.rootfs' => 'lxc.rootfs is auto generated from rootfs',
256 'lxc.rootfs.mount' => 1,
257 'lxc.rootfs.options' => 'lxc.rootfs.options is not supported' .
258 ', please use mountpoint options in the "rootfs" key',
259 # lxc.cgroup.*
260 'lxc.cap.drop' => 1,
261 'lxc.cap.keep' => 1,
262 'lxc.aa_profile' => 1,
263 'lxc.aa_allow_incomplete' => 1,
264 'lxc.se_context' => 1,
265 'lxc.seccomp' => 1,
266 'lxc.id_map' => 1,
267 'lxc.hook.pre-start' => 1,
268 'lxc.hook.pre-mount' => 1,
269 'lxc.hook.mount' => 1,
270 'lxc.hook.start' => 1,
271 'lxc.hook.stop' => 1,
272 'lxc.hook.post-stop' => 1,
273 'lxc.hook.clone' => 1,
274 'lxc.hook.destroy' => 1,
275 'lxc.loglevel' => 1,
276 'lxc.logfile' => 1,
277 'lxc.start.auto' => 1,
278 'lxc.start.delay' => 1,
279 'lxc.start.order' => 1,
280 'lxc.group' => 1,
281 'lxc.environment' => 1,
282 };
283
284 my $netconf_desc = {
285 type => {
286 type => 'string',
287 optional => 1,
288 description => "Network interface type.",
289 enum => [qw(veth)],
290 },
291 name => {
292 type => 'string',
293 format_description => 'String',
294 description => 'Name of the network device as seen from inside the container. (lxc.network.name)',
295 pattern => '[-_.\w\d]+',
296 },
297 bridge => {
298 type => 'string',
299 format_description => 'vmbr<Number>',
300 description => 'Bridge to attach the network device to.',
301 pattern => '[-_.\w\d]+',
302 optional => 1,
303 },
304 hwaddr => {
305 type => 'string',
306 format_description => 'MAC',
307 description => 'Bridge to attach the network device to. (lxc.network.hwaddr)',
308 pattern => qr/(?:[a-f0-9]{2}:){5}[a-f0-9]{2}/i,
309 optional => 1,
310 },
311 mtu => {
312 type => 'integer',
313 format_description => 'Number',
314 description => 'Maximum transfer unit of the interface. (lxc.network.mtu)',
315 minimum => 64, # minimum ethernet frame is 64 bytes
316 optional => 1,
317 },
318 ip => {
319 type => 'string',
320 format => 'pve-ipv4-config',
321 format_description => 'IPv4Format/CIDR',
322 description => 'IPv4 address in CIDR format.',
323 optional => 1,
324 },
325 gw => {
326 type => 'string',
327 format => 'ipv4',
328 format_description => 'GatewayIPv4',
329 description => 'Default gateway for IPv4 traffic.',
330 optional => 1,
331 },
332 ip6 => {
333 type => 'string',
334 format => 'pve-ipv6-config',
335 format_description => 'IPv6Format/CIDR',
336 description => 'IPv6 address in CIDR format.',
337 optional => 1,
338 },
339 gw6 => {
340 type => 'string',
341 format => 'ipv6',
342 format_description => 'GatewayIPv6',
343 description => 'Default gateway for IPv6 traffic.',
344 optional => 1,
345 },
346 firewall => {
347 type => 'boolean',
348 format_description => '[1|0]',
349 description => "Controls whether this interface's firewall rules should be used.",
350 optional => 1,
351 },
352 tag => {
353 type => 'integer',
354 format_description => 'VlanNo',
355 minimum => '2',
356 maximum => '4094',
357 description => "VLAN tag for this interface.",
358 optional => 1,
359 },
360 trunks => {
361 type => 'string',
362 pattern => qr/\d+(?:;\d+)*/,
363 format_description => 'vlanid[;vlanid...]',
364 description => "VLAN ids to pass through the interface",
365 optional => 1,
366 },
367 };
368 PVE::JSONSchema::register_format('pve-lxc-network', $netconf_desc);
369
370 my $MAX_LXC_NETWORKS = 10;
371 for (my $i = 0; $i < $MAX_LXC_NETWORKS; $i++) {
372 $confdesc->{"net$i"} = {
373 optional => 1,
374 type => 'string', format => $netconf_desc,
375 description => "Specifies network interfaces for the container.",
376 };
377 }
378
379 PVE::JSONSchema::register_format('pve-lxc-mp-string', \&verify_lxc_mp_string);
380 sub verify_lxc_mp_string{
381 my ($mp, $noerr) = @_;
382
383 # do not allow:
384 # /./ or /../
385 # /. or /.. at the end
386 # ../ at the beginning
387
388 if($mp =~ m@/\.\.?/@ ||
389 $mp =~ m@/\.\.?$@ ||
390 $mp =~ m@^\.\./@){
391 return undef if $noerr;
392 die "$mp contains illegal character sequences\n";
393 }
394 return $mp;
395 }
396
397 my $mp_desc = {
398 %$rootfs_desc,
399 mp => {
400 type => 'string',
401 format => 'pve-lxc-mp-string',
402 format_description => 'Path',
403 description => 'Path to the mountpoint as seen from inside the container.',
404 },
405 };
406 PVE::JSONSchema::register_format('pve-ct-mountpoint', $mp_desc);
407
408 my $unuseddesc = {
409 optional => 1,
410 type => 'string', format => 'pve-volume-id',
411 description => "Reference to unused volumes.",
412 };
413
414 my $MAX_MOUNT_POINTS = 10;
415 for (my $i = 0; $i < $MAX_MOUNT_POINTS; $i++) {
416 $confdesc->{"mp$i"} = {
417 optional => 1,
418 type => 'string', format => $mp_desc,
419 description => "Use volume as container mount point (experimental feature).",
420 optional => 1,
421 };
422 }
423
424 my $MAX_UNUSED_DISKS = $MAX_MOUNT_POINTS;
425 for (my $i = 0; $i < $MAX_MOUNT_POINTS; $i++) {
426 $confdesc->{"unused$i"} = $unuseddesc;
427 }
428
429 sub write_pct_config {
430 my ($filename, $conf) = @_;
431
432 delete $conf->{snapstate}; # just to be sure
433
434 my $generate_raw_config = sub {
435 my ($conf) = @_;
436
437 my $raw = '';
438
439 # add description as comment to top of file
440 my $descr = $conf->{description} || '';
441 foreach my $cl (split(/\n/, $descr)) {
442 $raw .= '#' . PVE::Tools::encode_text($cl) . "\n";
443 }
444
445 foreach my $key (sort keys %$conf) {
446 next if $key eq 'digest' || $key eq 'description' || $key eq 'pending' ||
447 $key eq 'snapshots' || $key eq 'snapname' || $key eq 'lxc';
448 my $value = $conf->{$key};
449 die "detected invalid newline inside property '$key'\n" if $value =~ m/\n/;
450 $raw .= "$key: $value\n";
451 }
452
453 if (my $lxcconf = $conf->{lxc}) {
454 foreach my $entry (@$lxcconf) {
455 my ($k, $v) = @$entry;
456 $raw .= "$k: $v\n";
457 }
458 }
459
460 return $raw;
461 };
462
463 my $raw = &$generate_raw_config($conf);
464
465 foreach my $snapname (sort keys %{$conf->{snapshots}}) {
466 $raw .= "\n[$snapname]\n";
467 $raw .= &$generate_raw_config($conf->{snapshots}->{$snapname});
468 }
469
470 return $raw;
471 }
472
473 sub check_type {
474 my ($key, $value) = @_;
475
476 die "unknown setting '$key'\n" if !$confdesc->{$key};
477
478 my $type = $confdesc->{$key}->{type};
479
480 if (!defined($value)) {
481 die "got undefined value\n";
482 }
483
484 if ($value =~ m/[\n\r]/) {
485 die "property contains a line feed\n";
486 }
487
488 if ($type eq 'boolean') {
489 return 1 if ($value eq '1') || ($value =~ m/^(on|yes|true)$/i);
490 return 0 if ($value eq '0') || ($value =~ m/^(off|no|false)$/i);
491 die "type check ('boolean') failed - got '$value'\n";
492 } elsif ($type eq 'integer') {
493 return int($1) if $value =~ m/^(\d+)$/;
494 die "type check ('integer') failed - got '$value'\n";
495 } elsif ($type eq 'number') {
496 return $value if $value =~ m/^(\d+)(\.\d+)?$/;
497 die "type check ('number') failed - got '$value'\n";
498 } elsif ($type eq 'string') {
499 if (my $fmt = $confdesc->{$key}->{format}) {
500 PVE::JSONSchema::check_format($fmt, $value);
501 return $value;
502 }
503 return $value;
504 } else {
505 die "internal error"
506 }
507 }
508
509 sub parse_pct_config {
510 my ($filename, $raw) = @_;
511
512 return undef if !defined($raw);
513
514 my $res = {
515 digest => Digest::SHA::sha1_hex($raw),
516 snapshots => {},
517 };
518
519 $filename =~ m|/lxc/(\d+).conf$|
520 || die "got strange filename '$filename'";
521
522 my $vmid = $1;
523
524 my $conf = $res;
525 my $descr = '';
526 my $section = '';
527
528 my @lines = split(/\n/, $raw);
529 foreach my $line (@lines) {
530 next if $line =~ m/^\s*$/;
531
532 if ($line =~ m/^\[([a-z][a-z0-9_\-]+)\]\s*$/i) {
533 $section = $1;
534 $conf->{description} = $descr if $descr;
535 $descr = '';
536 $conf = $res->{snapshots}->{$section} = {};
537 next;
538 }
539
540 if ($line =~ m/^\#(.*)\s*$/) {
541 $descr .= PVE::Tools::decode_text($1) . "\n";
542 next;
543 }
544
545 if ($line =~ m/^(lxc\.[a-z0-9_\-\.]+)(:|\s*=)\s*(.*?)\s*$/) {
546 my $key = $1;
547 my $value = $3;
548 my $validity = $valid_lxc_conf_keys->{$key} || 0;
549 if ($validity eq 1 || $key =~ m/^lxc\.cgroup\./) {
550 push @{$conf->{lxc}}, [$key, $value];
551 } elsif (my $errmsg = $validity) {
552 warn "vm $vmid - $key: $errmsg\n";
553 } else {
554 warn "vm $vmid - unable to parse config: $line\n";
555 }
556 } elsif ($line =~ m/^(description):\s*(.*\S)\s*$/) {
557 $descr .= PVE::Tools::decode_text($2);
558 } elsif ($line =~ m/snapstate:\s*(prepare|delete)\s*$/) {
559 $conf->{snapstate} = $1;
560 } elsif ($line =~ m/^([a-z][a-z_]*\d*):\s*(\S.*)\s*$/) {
561 my $key = $1;
562 my $value = $2;
563 eval { $value = check_type($key, $value); };
564 warn "vm $vmid - unable to parse value of '$key' - $@" if $@;
565 $conf->{$key} = $value;
566 } else {
567 warn "vm $vmid - unable to parse config: $line\n";
568 }
569 }
570
571 $conf->{description} = $descr if $descr;
572
573 delete $res->{snapstate}; # just to be sure
574
575 return $res;
576 }
577
578 sub config_list {
579 my $vmlist = PVE::Cluster::get_vmlist();
580 my $res = {};
581 return $res if !$vmlist || !$vmlist->{ids};
582 my $ids = $vmlist->{ids};
583
584 foreach my $vmid (keys %$ids) {
585 next if !$vmid; # skip CT0
586 my $d = $ids->{$vmid};
587 next if !$d->{node} || $d->{node} ne $nodename;
588 next if !$d->{type} || $d->{type} ne 'lxc';
589 $res->{$vmid}->{type} = 'lxc';
590 }
591 return $res;
592 }
593
594 sub cfs_config_path {
595 my ($vmid, $node) = @_;
596
597 $node = $nodename if !$node;
598 return "nodes/$node/lxc/$vmid.conf";
599 }
600
601 sub config_file {
602 my ($vmid, $node) = @_;
603
604 my $cfspath = cfs_config_path($vmid, $node);
605 return "/etc/pve/$cfspath";
606 }
607
608 sub load_config {
609 my ($vmid, $node) = @_;
610
611 $node = $nodename if !$node;
612 my $cfspath = cfs_config_path($vmid, $node);
613
614 my $conf = PVE::Cluster::cfs_read_file($cfspath);
615 die "container $vmid does not exist\n" if !defined($conf);
616
617 return $conf;
618 }
619
620 sub create_config {
621 my ($vmid, $conf) = @_;
622
623 my $dir = "/etc/pve/nodes/$nodename/lxc";
624 mkdir $dir;
625
626 write_config($vmid, $conf);
627 }
628
629 sub destroy_config {
630 my ($vmid) = @_;
631
632 unlink config_file($vmid, $nodename);
633 }
634
635 sub write_config {
636 my ($vmid, $conf) = @_;
637
638 my $cfspath = cfs_config_path($vmid);
639
640 PVE::Cluster::cfs_write_file($cfspath, $conf);
641 }
642
643 # flock: we use one file handle per process, so lock file
644 # can be called multiple times and will succeed for the same process.
645
646 my $lock_handles = {};
647 my $lockdir = "/run/lock/lxc";
648
649 sub config_file_lock {
650 my ($vmid) = @_;
651
652 return "$lockdir/pve-config-${vmid}.lock";
653 }
654
655 sub lock_config_full {
656 my ($vmid, $timeout, $code, @param) = @_;
657
658 my $filename = config_file_lock($vmid);
659
660 mkdir $lockdir if !-d $lockdir;
661
662 my $res = lock_file($filename, $timeout, $code, @param);
663
664 die $@ if $@;
665
666 return $res;
667 }
668
669 sub lock_config_mode {
670 my ($vmid, $timeout, $shared, $code, @param) = @_;
671
672 my $filename = config_file_lock($vmid);
673
674 mkdir $lockdir if !-d $lockdir;
675
676 my $res = lock_file_full($filename, $timeout, $shared, $code, @param);
677
678 die $@ if $@;
679
680 return $res;
681 }
682
683 sub lock_config {
684 my ($vmid, $code, @param) = @_;
685
686 return lock_config_full($vmid, 10, $code, @param);
687 }
688
689 sub option_exists {
690 my ($name) = @_;
691
692 return defined($confdesc->{$name});
693 }
694
695 # add JSON properties for create and set function
696 sub json_config_properties {
697 my $prop = shift;
698
699 foreach my $opt (keys %$confdesc) {
700 next if $opt eq 'parent' || $opt eq 'snaptime';
701 next if $prop->{$opt};
702 $prop->{$opt} = $confdesc->{$opt};
703 }
704
705 return $prop;
706 }
707
708 # container status helpers
709
710 sub list_active_containers {
711
712 my $filename = "/proc/net/unix";
713
714 # similar test is used by lcxcontainers.c: list_active_containers
715 my $res = {};
716
717 my $fh = IO::File->new ($filename, "r");
718 return $res if !$fh;
719
720 while (defined(my $line = <$fh>)) {
721 if ($line =~ m/^[a-f0-9]+:\s\S+\s\S+\s\S+\s\S+\s\S+\s\d+\s(\S+)$/) {
722 my $path = $1;
723 if ($path =~ m!^@/var/lib/lxc/(\d+)/command$!) {
724 $res->{$1} = 1;
725 }
726 }
727 }
728
729 close($fh);
730
731 return $res;
732 }
733
734 # warning: this is slow
735 sub check_running {
736 my ($vmid) = @_;
737
738 my $active_hash = list_active_containers();
739
740 return 1 if defined($active_hash->{$vmid});
741
742 return undef;
743 }
744
745 sub get_container_disk_usage {
746 my ($vmid, $pid) = @_;
747
748 return PVE::Tools::df("/proc/$pid/root/", 1);
749 }
750
751 my $last_proc_vmid_stat;
752
753 my $parse_cpuacct_stat = sub {
754 my ($vmid) = @_;
755
756 my $raw = read_cgroup_value('cpuacct', $vmid, 'cpuacct.stat', 1);
757
758 my $stat = {};
759
760 if ($raw =~ m/^user (\d+)\nsystem (\d+)\n/) {
761
762 $stat->{utime} = $1;
763 $stat->{stime} = $2;
764
765 }
766
767 return $stat;
768 };
769
770 sub vmstatus {
771 my ($opt_vmid) = @_;
772
773 my $list = $opt_vmid ? { $opt_vmid => { type => 'lxc' }} : config_list();
774
775 my $active_hash = list_active_containers();
776
777 my $cpucount = $cpuinfo->{cpus} || 1;
778
779 my $cdtime = gettimeofday;
780
781 my $uptime = (PVE::ProcFSTools::read_proc_uptime(1))[0];
782
783 foreach my $vmid (keys %$list) {
784 my $d = $list->{$vmid};
785
786 eval { $d->{pid} = find_lxc_pid($vmid) if defined($active_hash->{$vmid}); };
787 warn $@ if $@; # ignore errors (consider them stopped)
788
789 $d->{status} = $d->{pid} ? 'running' : 'stopped';
790
791 my $cfspath = cfs_config_path($vmid);
792 my $conf = PVE::Cluster::cfs_read_file($cfspath) || {};
793
794 $d->{name} = $conf->{'hostname'} || "CT$vmid";
795 $d->{name} =~ s/[\s]//g;
796
797 $d->{cpus} = $conf->{cpulimit} || $cpucount;
798
799 if ($d->{pid}) {
800 my $res = get_container_disk_usage($vmid, $d->{pid});
801 $d->{disk} = $res->{used};
802 $d->{maxdisk} = $res->{total};
803 } else {
804 $d->{disk} = 0;
805 # use 4GB by default ??
806 if (my $rootfs = $conf->{rootfs}) {
807 my $rootinfo = parse_ct_rootfs($rootfs);
808 $d->{maxdisk} = int(($rootinfo->{size} || 4)*1024*1024)*1024;
809 } else {
810 $d->{maxdisk} = 4*1024*1024*1024;
811 }
812 }
813
814 $d->{mem} = 0;
815 $d->{swap} = 0;
816 $d->{maxmem} = ($conf->{memory}||512)*1024*1024;
817 $d->{maxswap} = ($conf->{swap}//0)*1024*1024;
818
819 $d->{uptime} = 0;
820 $d->{cpu} = 0;
821
822 $d->{netout} = 0;
823 $d->{netin} = 0;
824
825 $d->{diskread} = 0;
826 $d->{diskwrite} = 0;
827
828 $d->{template} = is_template($conf);
829 }
830
831 foreach my $vmid (keys %$list) {
832 my $d = $list->{$vmid};
833 my $pid = $d->{pid};
834
835 next if !$pid; # skip stopped CTs
836
837 my $ctime = (stat("/proc/$pid"))[10]; # 10 = ctime
838 $d->{uptime} = time - $ctime; # the method lxcfs uses
839
840 $d->{mem} = read_cgroup_value('memory', $vmid, 'memory.usage_in_bytes');
841 $d->{swap} = read_cgroup_value('memory', $vmid, 'memory.memsw.usage_in_bytes') - $d->{mem};
842
843 my $blkio_bytes = read_cgroup_value('blkio', $vmid, 'blkio.throttle.io_service_bytes', 1);
844 my @bytes = split(/\n/, $blkio_bytes);
845 foreach my $byte (@bytes) {
846 if (my ($key, $value) = $byte =~ /(Read|Write)\s+(\d+)/) {
847 $d->{diskread} = $2 if $key eq 'Read';
848 $d->{diskwrite} = $2 if $key eq 'Write';
849 }
850 }
851
852 my $pstat = &$parse_cpuacct_stat($vmid);
853
854 my $used = $pstat->{utime} + $pstat->{stime};
855
856 my $old = $last_proc_vmid_stat->{$vmid};
857 if (!$old) {
858 $last_proc_vmid_stat->{$vmid} = {
859 time => $cdtime,
860 used => $used,
861 cpu => 0,
862 };
863 next;
864 }
865
866 my $dtime = ($cdtime - $old->{time}) * $cpucount * $cpuinfo->{user_hz};
867
868 if ($dtime > 1000) {
869 my $dutime = $used - $old->{used};
870
871 $d->{cpu} = (($dutime/$dtime)* $cpucount) / $d->{cpus};
872 $last_proc_vmid_stat->{$vmid} = {
873 time => $cdtime,
874 used => $used,
875 cpu => $d->{cpu},
876 };
877 } else {
878 $d->{cpu} = $old->{cpu};
879 }
880 }
881
882 my $netdev = PVE::ProcFSTools::read_proc_net_dev();
883
884 foreach my $dev (keys %$netdev) {
885 next if $dev !~ m/^veth([1-9]\d*)i/;
886 my $vmid = $1;
887 my $d = $list->{$vmid};
888
889 next if !$d;
890
891 $d->{netout} += $netdev->{$dev}->{receive};
892 $d->{netin} += $netdev->{$dev}->{transmit};
893
894 }
895
896 return $list;
897 }
898
899 sub classify_mountpoint {
900 my ($vol) = @_;
901 if ($vol =~ m!^/!) {
902 return 'device' if $vol =~ m!^/dev/!;
903 return 'bind';
904 }
905 return 'volume';
906 }
907
908 my $parse_ct_mountpoint_full = sub {
909 my ($desc, $data, $noerr) = @_;
910
911 $data //= '';
912
913 my $res;
914 eval { $res = PVE::JSONSchema::parse_property_string($desc, $data) };
915 if ($@) {
916 return undef if $noerr;
917 die $@;
918 }
919
920 if (defined(my $size = $res->{size})) {
921 $size = PVE::JSONSchema::parse_size($size);
922 if (!defined($size)) {
923 return undef if $noerr;
924 die "invalid size: $size\n";
925 }
926 $res->{size} = $size;
927 }
928
929 $res->{type} = classify_mountpoint($res->{volume});
930
931 return $res;
932 };
933
934 sub parse_ct_rootfs {
935 my ($data, $noerr) = @_;
936
937 my $res = &$parse_ct_mountpoint_full($rootfs_desc, $data, $noerr);
938
939 $res->{mp} = '/' if defined($res);
940
941 return $res;
942 }
943
944 sub parse_ct_mountpoint {
945 my ($data, $noerr) = @_;
946
947 return &$parse_ct_mountpoint_full($mp_desc, $data, $noerr);
948 }
949
950 sub print_ct_mountpoint {
951 my ($info, $nomp) = @_;
952 my $skip = [ 'type' ];
953 push @$skip, 'mp' if $nomp;
954 return PVE::JSONSchema::print_property_string($info, $mp_desc, $skip);
955 }
956
957 sub print_lxc_network {
958 my $net = shift;
959 return PVE::JSONSchema::print_property_string($net, $netconf_desc);
960 }
961
962 sub parse_lxc_network {
963 my ($data) = @_;
964
965 my $res = {};
966
967 return $res if !$data;
968
969 $res = PVE::JSONSchema::parse_property_string($netconf_desc, $data);
970
971 $res->{type} = 'veth';
972 $res->{hwaddr} = PVE::Tools::random_ether_addr() if !$res->{hwaddr};
973
974 return $res;
975 }
976
977 sub read_cgroup_value {
978 my ($group, $vmid, $name, $full) = @_;
979
980 my $path = "/sys/fs/cgroup/$group/lxc/$vmid/$name";
981
982 return PVE::Tools::file_get_contents($path) if $full;
983
984 return PVE::Tools::file_read_firstline($path);
985 }
986
987 sub write_cgroup_value {
988 my ($group, $vmid, $name, $value) = @_;
989
990 my $path = "/sys/fs/cgroup/$group/lxc/$vmid/$name";
991 PVE::ProcFSTools::write_proc_entry($path, $value) if -e $path;
992
993 }
994
995 sub find_lxc_console_pids {
996
997 my $res = {};
998
999 PVE::Tools::dir_glob_foreach('/proc', '\d+', sub {
1000 my ($pid) = @_;
1001
1002 my $cmdline = PVE::Tools::file_read_firstline("/proc/$pid/cmdline");
1003 return if !$cmdline;
1004
1005 my @args = split(/\0/, $cmdline);
1006
1007 # search for lxc-console -n <vmid>
1008 return if scalar(@args) != 3;
1009 return if $args[1] ne '-n';
1010 return if $args[2] !~ m/^\d+$/;
1011 return if $args[0] !~ m|^(/usr/bin/)?lxc-console$|;
1012
1013 my $vmid = $args[2];
1014
1015 push @{$res->{$vmid}}, $pid;
1016 });
1017
1018 return $res;
1019 }
1020
1021 sub find_lxc_pid {
1022 my ($vmid) = @_;
1023
1024 my $pid = undef;
1025 my $parser = sub {
1026 my $line = shift;
1027 $pid = $1 if $line =~ m/^PID:\s+(\d+)$/;
1028 };
1029 PVE::Tools::run_command(['lxc-info', '-n', $vmid, '-p'], outfunc => $parser);
1030
1031 die "unable to get PID for CT $vmid (not running?)\n" if !$pid;
1032
1033 return $pid;
1034 }
1035
1036 # Note: we cannot use Net:IP, because that only allows strict
1037 # CIDR networks
1038 sub parse_ipv4_cidr {
1039 my ($cidr, $noerr) = @_;
1040
1041 if ($cidr =~ m!^($IPV4RE)(?:/(\d+))$! && ($2 > 7) && ($2 <= 32)) {
1042 return { address => $1, netmask => $PVE::Network::ipv4_reverse_mask->[$2] };
1043 }
1044
1045 return undef if $noerr;
1046
1047 die "unable to parse ipv4 address/mask\n";
1048 }
1049
1050 sub check_lock {
1051 my ($conf) = @_;
1052
1053 die "VM is locked ($conf->{'lock'})\n" if $conf->{'lock'};
1054 }
1055
1056 sub check_protection {
1057 my ($vm_conf, $err_msg) = @_;
1058
1059 if ($vm_conf->{protection}) {
1060 die "$err_msg - protection mode enabled\n";
1061 }
1062 }
1063
1064 sub update_lxc_config {
1065 my ($storage_cfg, $vmid, $conf) = @_;
1066
1067 my $dir = "/var/lib/lxc/$vmid";
1068
1069 if ($conf->{template}) {
1070
1071 unlink "$dir/config";
1072
1073 return;
1074 }
1075
1076 my $raw = '';
1077
1078 die "missing 'arch' - internal error" if !$conf->{arch};
1079 $raw .= "lxc.arch = $conf->{arch}\n";
1080
1081 my $unprivileged = $conf->{unprivileged};
1082 my $custom_idmap = grep { $_->[0] eq 'lxc.id_map' } @{$conf->{lxc}};
1083
1084 my $ostype = $conf->{ostype} || die "missing 'ostype' - internal error";
1085 if ($ostype =~ /^(?:debian | ubuntu | centos | fedora | opensuse | archlinux)$/x) {
1086 $raw .= "lxc.include = /usr/share/lxc/config/$ostype.common.conf\n";
1087 if ($unprivileged || $custom_idmap) {
1088 $raw .= "lxc.include = /usr/share/lxc/config/$ostype.userns.conf\n"
1089 }
1090 } else {
1091 die "implement me (ostype $ostype)";
1092 }
1093
1094 # WARNING: DO NOT REMOVE this without making sure that loop device nodes
1095 # cannot be exposed to the container with r/w access (cgroup perms).
1096 # When this is enabled mounts will still remain in the monitor's namespace
1097 # after the container unmounted them and thus will not detach from their
1098 # files while the container is running!
1099 $raw .= "lxc.monitor.unshare = 1\n";
1100
1101 # Should we read them from /etc/subuid?
1102 if ($unprivileged && !$custom_idmap) {
1103 $raw .= "lxc.id_map = u 0 100000 65536\n";
1104 $raw .= "lxc.id_map = g 0 100000 65536\n";
1105 }
1106
1107 if (!has_dev_console($conf)) {
1108 $raw .= "lxc.console = none\n";
1109 $raw .= "lxc.cgroup.devices.deny = c 5:1 rwm\n";
1110 }
1111
1112 my $ttycount = get_tty_count($conf);
1113 $raw .= "lxc.tty = $ttycount\n";
1114
1115 # some init scripts expect a linux terminal (turnkey).
1116 $raw .= "lxc.environment = TERM=linux\n";
1117
1118 my $utsname = $conf->{hostname} || "CT$vmid";
1119 $raw .= "lxc.utsname = $utsname\n";
1120
1121 my $memory = $conf->{memory} || 512;
1122 my $swap = $conf->{swap} // 0;
1123
1124 my $lxcmem = int($memory*1024*1024);
1125 $raw .= "lxc.cgroup.memory.limit_in_bytes = $lxcmem\n";
1126
1127 my $lxcswap = int(($memory + $swap)*1024*1024);
1128 $raw .= "lxc.cgroup.memory.memsw.limit_in_bytes = $lxcswap\n";
1129
1130 if (my $cpulimit = $conf->{cpulimit}) {
1131 $raw .= "lxc.cgroup.cpu.cfs_period_us = 100000\n";
1132 my $value = int(100000*$cpulimit);
1133 $raw .= "lxc.cgroup.cpu.cfs_quota_us = $value\n";
1134 }
1135
1136 my $shares = $conf->{cpuunits} || 1024;
1137 $raw .= "lxc.cgroup.cpu.shares = $shares\n";
1138
1139 my $mountpoint = parse_ct_rootfs($conf->{rootfs});
1140
1141 $raw .= "lxc.rootfs = $dir/rootfs\n";
1142
1143 my $netcount = 0;
1144 foreach my $k (keys %$conf) {
1145 next if $k !~ m/^net(\d+)$/;
1146 my $ind = $1;
1147 my $d = parse_lxc_network($conf->{$k});
1148 $netcount++;
1149 $raw .= "lxc.network.type = veth\n";
1150 $raw .= "lxc.network.veth.pair = veth${vmid}i${ind}\n";
1151 $raw .= "lxc.network.hwaddr = $d->{hwaddr}\n" if defined($d->{hwaddr});
1152 $raw .= "lxc.network.name = $d->{name}\n" if defined($d->{name});
1153 $raw .= "lxc.network.mtu = $d->{mtu}\n" if defined($d->{mtu});
1154 }
1155
1156 if (my $lxcconf = $conf->{lxc}) {
1157 foreach my $entry (@$lxcconf) {
1158 my ($k, $v) = @$entry;
1159 $netcount++ if $k eq 'lxc.network.type';
1160 $raw .= "$k = $v\n";
1161 }
1162 }
1163
1164 $raw .= "lxc.network.type = empty\n" if !$netcount;
1165
1166 File::Path::mkpath("$dir/rootfs");
1167
1168 PVE::Tools::file_set_contents("$dir/config", $raw);
1169 }
1170
1171 # verify and cleanup nameserver list (replace \0 with ' ')
1172 sub verify_nameserver_list {
1173 my ($nameserver_list) = @_;
1174
1175 my @list = ();
1176 foreach my $server (PVE::Tools::split_list($nameserver_list)) {
1177 PVE::JSONSchema::pve_verify_ip($server);
1178 push @list, $server;
1179 }
1180
1181 return join(' ', @list);
1182 }
1183
1184 sub verify_searchdomain_list {
1185 my ($searchdomain_list) = @_;
1186
1187 my @list = ();
1188 foreach my $server (PVE::Tools::split_list($searchdomain_list)) {
1189 # todo: should we add checks for valid dns domains?
1190 push @list, $server;
1191 }
1192
1193 return join(' ', @list);
1194 }
1195
1196 sub add_unused_volume {
1197 my ($config, $volid) = @_;
1198
1199 my $key;
1200 for (my $ind = $MAX_UNUSED_DISKS - 1; $ind >= 0; $ind--) {
1201 my $test = "unused$ind";
1202 if (my $vid = $config->{$test}) {
1203 return if $vid eq $volid; # do not add duplicates
1204 } else {
1205 $key = $test;
1206 }
1207 }
1208
1209 die "Too many unused volumes - please delete them first.\n" if !$key;
1210
1211 $config->{$key} = $volid;
1212
1213 return $key;
1214 }
1215
1216 sub update_pct_config {
1217 my ($vmid, $conf, $running, $param, $delete) = @_;
1218
1219 my @nohotplug;
1220
1221 my $new_disks = 0;
1222 my @deleted_volumes;
1223
1224 my $rootdir;
1225 if ($running) {
1226 my $pid = find_lxc_pid($vmid);
1227 $rootdir = "/proc/$pid/root";
1228 }
1229
1230 my $hotplug_error = sub {
1231 if ($running) {
1232 push @nohotplug, @_;
1233 return 1;
1234 } else {
1235 return 0;
1236 }
1237 };
1238
1239 if (defined($delete)) {
1240 foreach my $opt (@$delete) {
1241 if (!exists($conf->{$opt})) {
1242 warn "no such option: $opt\n";
1243 next;
1244 }
1245
1246 if ($opt eq 'hostname' || $opt eq 'memory' || $opt eq 'rootfs') {
1247 die "unable to delete required option '$opt'\n";
1248 } elsif ($opt eq 'swap') {
1249 delete $conf->{$opt};
1250 write_cgroup_value("memory", $vmid, "memory.memsw.limit_in_bytes", -1);
1251 } elsif ($opt eq 'description' || $opt eq 'onboot' || $opt eq 'startup') {
1252 delete $conf->{$opt};
1253 } elsif ($opt eq 'nameserver' || $opt eq 'searchdomain' ||
1254 $opt eq 'tty' || $opt eq 'console' || $opt eq 'cmode') {
1255 next if $hotplug_error->($opt);
1256 delete $conf->{$opt};
1257 } elsif ($opt =~ m/^net(\d)$/) {
1258 delete $conf->{$opt};
1259 next if !$running;
1260 my $netid = $1;
1261 PVE::Network::veth_delete("veth${vmid}i$netid");
1262 } elsif ($opt eq 'protection') {
1263 delete $conf->{$opt};
1264 } elsif ($opt =~ m/^unused(\d+)$/) {
1265 next if $hotplug_error->($opt);
1266 check_protection($conf, "can't remove CT $vmid drive '$opt'");
1267 push @deleted_volumes, $conf->{$opt};
1268 delete $conf->{$opt};
1269 } elsif ($opt =~ m/^mp(\d+)$/) {
1270 next if $hotplug_error->($opt);
1271 check_protection($conf, "can't remove CT $vmid drive '$opt'");
1272 my $mountpoint = parse_ct_mountpoint($conf->{$opt});
1273 if ($mountpoint->{type} eq 'volume') {
1274 add_unused_volume($conf, $mountpoint->{volume})
1275 }
1276 delete $conf->{$opt};
1277 } elsif ($opt eq 'unprivileged') {
1278 die "unable to delete read-only option: '$opt'\n";
1279 } else {
1280 die "implement me (delete: $opt)"
1281 }
1282 write_config($vmid, $conf) if $running;
1283 }
1284 }
1285
1286 # There's no separate swap size to configure, there's memory and "total"
1287 # memory (iow. memory+swap). This means we have to change them together.
1288 my $wanted_memory = PVE::Tools::extract_param($param, 'memory');
1289 my $wanted_swap = PVE::Tools::extract_param($param, 'swap');
1290 if (defined($wanted_memory) || defined($wanted_swap)) {
1291
1292 my $old_memory = ($conf->{memory} || 512);
1293 my $old_swap = ($conf->{swap} || 0);
1294
1295 $wanted_memory //= $old_memory;
1296 $wanted_swap //= $old_swap;
1297
1298 my $total = $wanted_memory + $wanted_swap;
1299 if ($running) {
1300 my $old_total = $old_memory + $old_swap;
1301 if ($total > $old_total) {
1302 write_cgroup_value("memory", $vmid, "memory.memsw.limit_in_bytes", int($total*1024*1024));
1303 write_cgroup_value("memory", $vmid, "memory.limit_in_bytes", int($wanted_memory*1024*1024));
1304 } else {
1305 write_cgroup_value("memory", $vmid, "memory.limit_in_bytes", int($wanted_memory*1024*1024));
1306 write_cgroup_value("memory", $vmid, "memory.memsw.limit_in_bytes", int($total*1024*1024));
1307 }
1308 }
1309 $conf->{memory} = $wanted_memory;
1310 $conf->{swap} = $wanted_swap;
1311
1312 write_config($vmid, $conf) if $running;
1313 }
1314
1315 foreach my $opt (keys %$param) {
1316 my $value = $param->{$opt};
1317 if ($opt eq 'hostname') {
1318 $conf->{$opt} = $value;
1319 } elsif ($opt eq 'onboot') {
1320 $conf->{$opt} = $value ? 1 : 0;
1321 } elsif ($opt eq 'startup') {
1322 $conf->{$opt} = $value;
1323 } elsif ($opt eq 'tty' || $opt eq 'console' || $opt eq 'cmode') {
1324 next if $hotplug_error->($opt);
1325 $conf->{$opt} = $value;
1326 } elsif ($opt eq 'nameserver') {
1327 next if $hotplug_error->($opt);
1328 my $list = verify_nameserver_list($value);
1329 $conf->{$opt} = $list;
1330 } elsif ($opt eq 'searchdomain') {
1331 next if $hotplug_error->($opt);
1332 my $list = verify_searchdomain_list($value);
1333 $conf->{$opt} = $list;
1334 } elsif ($opt eq 'cpulimit') {
1335 next if $hotplug_error->($opt); # FIXME: hotplug
1336 $conf->{$opt} = $value;
1337 } elsif ($opt eq 'cpuunits') {
1338 $conf->{$opt} = $value;
1339 write_cgroup_value("cpu", $vmid, "cpu.shares", $value);
1340 } elsif ($opt eq 'description') {
1341 $conf->{$opt} = PVE::Tools::encode_text($value);
1342 } elsif ($opt =~ m/^net(\d+)$/) {
1343 my $netid = $1;
1344 my $net = parse_lxc_network($value);
1345 if (!$running) {
1346 $conf->{$opt} = print_lxc_network($net);
1347 } else {
1348 update_net($vmid, $conf, $opt, $net, $netid, $rootdir);
1349 }
1350 } elsif ($opt eq 'protection') {
1351 $conf->{$opt} = $value ? 1 : 0;
1352 } elsif ($opt =~ m/^mp(\d+)$/) {
1353 next if $hotplug_error->($opt);
1354 check_protection($conf, "can't update CT $vmid drive '$opt'");
1355 $conf->{$opt} = $value;
1356 $new_disks = 1;
1357 } elsif ($opt eq 'rootfs') {
1358 next if $hotplug_error->($opt);
1359 check_protection($conf, "can't update CT $vmid drive '$opt'");
1360 $conf->{$opt} = $value;
1361 } elsif ($opt eq 'unprivileged') {
1362 die "unable to modify read-only option: '$opt'\n";
1363 } else {
1364 die "implement me: $opt";
1365 }
1366 write_config($vmid, $conf) if $running;
1367 }
1368
1369 if (@deleted_volumes) {
1370 my $storage_cfg = PVE::Storage::config();
1371 foreach my $volume (@deleted_volumes) {
1372 delete_mountpoint_volume($storage_cfg, $vmid, $volume);
1373 }
1374 }
1375
1376 if ($new_disks) {
1377 my $storage_cfg = PVE::Storage::config();
1378 create_disks($storage_cfg, $vmid, $conf, $conf);
1379 }
1380
1381 # This should be the last thing we do here
1382 if ($running && scalar(@nohotplug)) {
1383 die "unable to modify " . join(',', @nohotplug) . " while container is running\n";
1384 }
1385 }
1386
1387 sub has_dev_console {
1388 my ($conf) = @_;
1389
1390 return !(defined($conf->{console}) && !$conf->{console});
1391 }
1392
1393 sub get_tty_count {
1394 my ($conf) = @_;
1395
1396 return $conf->{tty} // $confdesc->{tty}->{default};
1397 }
1398
1399 sub get_cmode {
1400 my ($conf) = @_;
1401
1402 return $conf->{cmode} // $confdesc->{cmode}->{default};
1403 }
1404
1405 sub get_console_command {
1406 my ($vmid, $conf) = @_;
1407
1408 my $cmode = get_cmode($conf);
1409
1410 if ($cmode eq 'console') {
1411 return ['lxc-console', '-n', $vmid, '-t', 0];
1412 } elsif ($cmode eq 'tty') {
1413 return ['lxc-console', '-n', $vmid];
1414 } elsif ($cmode eq 'shell') {
1415 return ['lxc-attach', '--clear-env', '-n', $vmid];
1416 } else {
1417 die "internal error";
1418 }
1419 }
1420
1421 sub get_primary_ips {
1422 my ($conf) = @_;
1423
1424 # return data from net0
1425
1426 return undef if !defined($conf->{net0});
1427 my $net = parse_lxc_network($conf->{net0});
1428
1429 my $ipv4 = $net->{ip};
1430 if ($ipv4) {
1431 if ($ipv4 =~ /^(dhcp|manual)$/) {
1432 $ipv4 = undef
1433 } else {
1434 $ipv4 =~ s!/\d+$!!;
1435 }
1436 }
1437 my $ipv6 = $net->{ip6};
1438 if ($ipv6) {
1439 if ($ipv6 =~ /^(auto|dhcp|manual)$/) {
1440 $ipv6 = undef;
1441 } else {
1442 $ipv6 =~ s!/\d+$!!;
1443 }
1444 }
1445
1446 return ($ipv4, $ipv6);
1447 }
1448
1449 sub delete_mountpoint_volume {
1450 my ($storage_cfg, $vmid, $volume) = @_;
1451
1452 return if classify_mountpoint($volume) ne 'volume';
1453
1454 my ($vtype, $name, $owner) = PVE::Storage::parse_volname($storage_cfg, $volume);
1455 PVE::Storage::vdisk_free($storage_cfg, $volume) if $vmid == $owner;
1456 }
1457
1458 sub destroy_lxc_container {
1459 my ($storage_cfg, $vmid, $conf) = @_;
1460
1461 foreach_mountpoint($conf, sub {
1462 my ($ms, $mountpoint) = @_;
1463 delete_mountpoint_volume($storage_cfg, $vmid, $mountpoint->{volume});
1464 });
1465
1466 rmdir "/var/lib/lxc/$vmid/rootfs";
1467 unlink "/var/lib/lxc/$vmid/config";
1468 rmdir "/var/lib/lxc/$vmid";
1469 destroy_config($vmid);
1470
1471 #my $cmd = ['lxc-destroy', '-n', $vmid ];
1472 #PVE::Tools::run_command($cmd);
1473 }
1474
1475 sub vm_stop_cleanup {
1476 my ($storage_cfg, $vmid, $conf, $keepActive) = @_;
1477
1478 eval {
1479 if (!$keepActive) {
1480
1481 my $vollist = get_vm_volumes($conf);
1482 PVE::Storage::deactivate_volumes($storage_cfg, $vollist);
1483 }
1484 };
1485 warn $@ if $@; # avoid errors - just warn
1486 }
1487
1488 my $safe_num_ne = sub {
1489 my ($a, $b) = @_;
1490
1491 return 0 if !defined($a) && !defined($b);
1492 return 1 if !defined($a);
1493 return 1 if !defined($b);
1494
1495 return $a != $b;
1496 };
1497
1498 my $safe_string_ne = sub {
1499 my ($a, $b) = @_;
1500
1501 return 0 if !defined($a) && !defined($b);
1502 return 1 if !defined($a);
1503 return 1 if !defined($b);
1504
1505 return $a ne $b;
1506 };
1507
1508 sub update_net {
1509 my ($vmid, $conf, $opt, $newnet, $netid, $rootdir) = @_;
1510
1511 if ($newnet->{type} ne 'veth') {
1512 # for when there are physical interfaces
1513 die "cannot update interface of type $newnet->{type}";
1514 }
1515
1516 my $veth = "veth${vmid}i${netid}";
1517 my $eth = $newnet->{name};
1518
1519 if (my $oldnetcfg = $conf->{$opt}) {
1520 my $oldnet = parse_lxc_network($oldnetcfg);
1521
1522 if (&$safe_string_ne($oldnet->{hwaddr}, $newnet->{hwaddr}) ||
1523 &$safe_string_ne($oldnet->{name}, $newnet->{name})) {
1524
1525 PVE::Network::veth_delete($veth);
1526 delete $conf->{$opt};
1527 write_config($vmid, $conf);
1528
1529 hotplug_net($vmid, $conf, $opt, $newnet, $netid);
1530
1531 } elsif (&$safe_string_ne($oldnet->{bridge}, $newnet->{bridge}) ||
1532 &$safe_num_ne($oldnet->{tag}, $newnet->{tag}) ||
1533 &$safe_num_ne($oldnet->{firewall}, $newnet->{firewall})) {
1534
1535 if ($oldnet->{bridge}) {
1536 PVE::Network::tap_unplug($veth);
1537 foreach (qw(bridge tag firewall)) {
1538 delete $oldnet->{$_};
1539 }
1540 $conf->{$opt} = print_lxc_network($oldnet);
1541 write_config($vmid, $conf);
1542 }
1543
1544 PVE::Network::tap_plug($veth, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks});
1545 foreach (qw(bridge tag firewall)) {
1546 $oldnet->{$_} = $newnet->{$_} if $newnet->{$_};
1547 }
1548 $conf->{$opt} = print_lxc_network($oldnet);
1549 write_config($vmid, $conf);
1550 }
1551 } else {
1552 hotplug_net($vmid, $conf, $opt, $newnet, $netid);
1553 }
1554
1555 update_ipconfig($vmid, $conf, $opt, $eth, $newnet, $rootdir);
1556 }
1557
1558 sub hotplug_net {
1559 my ($vmid, $conf, $opt, $newnet, $netid) = @_;
1560
1561 my $veth = "veth${vmid}i${netid}";
1562 my $vethpeer = $veth . "p";
1563 my $eth = $newnet->{name};
1564
1565 PVE::Network::veth_create($veth, $vethpeer, $newnet->{bridge}, $newnet->{hwaddr});
1566 PVE::Network::tap_plug($veth, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks});
1567
1568 # attach peer in container
1569 my $cmd = ['lxc-device', '-n', $vmid, 'add', $vethpeer, "$eth" ];
1570 PVE::Tools::run_command($cmd);
1571
1572 # link up peer in container
1573 $cmd = ['lxc-attach', '-n', $vmid, '-s', 'NETWORK', '--', '/sbin/ip', 'link', 'set', $eth ,'up' ];
1574 PVE::Tools::run_command($cmd);
1575
1576 my $done = { type => 'veth' };
1577 foreach (qw(bridge tag firewall hwaddr name)) {
1578 $done->{$_} = $newnet->{$_} if $newnet->{$_};
1579 }
1580 $conf->{$opt} = print_lxc_network($done);
1581
1582 write_config($vmid, $conf);
1583 }
1584
1585 sub update_ipconfig {
1586 my ($vmid, $conf, $opt, $eth, $newnet, $rootdir) = @_;
1587
1588 my $lxc_setup = PVE::LXC::Setup->new($conf, $rootdir);
1589
1590 my $optdata = parse_lxc_network($conf->{$opt});
1591 my $deleted = [];
1592 my $added = [];
1593 my $nscmd = sub {
1594 my $cmdargs = shift;
1595 PVE::Tools::run_command(['lxc-attach', '-n', $vmid, '-s', 'NETWORK', '--', @_], %$cmdargs);
1596 };
1597 my $ipcmd = sub { &$nscmd({}, '/sbin/ip', @_) };
1598
1599 my $change_ip_config = sub {
1600 my ($ipversion) = @_;
1601
1602 my $family_opt = "-$ipversion";
1603 my $suffix = $ipversion == 4 ? '' : $ipversion;
1604 my $gw= "gw$suffix";
1605 my $ip= "ip$suffix";
1606
1607 my $newip = $newnet->{$ip};
1608 my $newgw = $newnet->{$gw};
1609 my $oldip = $optdata->{$ip};
1610
1611 my $change_ip = &$safe_string_ne($oldip, $newip);
1612 my $change_gw = &$safe_string_ne($optdata->{$gw}, $newgw);
1613
1614 return if !$change_ip && !$change_gw;
1615
1616 # step 1: add new IP, if this fails we cancel
1617 my $is_real_ip = ($newip && $newip !~ /^(?:auto|dhcp|manual)$/);
1618 if ($change_ip && $is_real_ip) {
1619 eval { &$ipcmd($family_opt, 'addr', 'add', $newip, 'dev', $eth); };
1620 if (my $err = $@) {
1621 warn $err;
1622 return;
1623 }
1624 }
1625
1626 # step 2: replace gateway
1627 # If this fails we delete the added IP and cancel.
1628 # If it succeeds we save the config and delete the old IP, ignoring
1629 # errors. The config is then saved.
1630 # Note: 'ip route replace' can add
1631 if ($change_gw) {
1632 if ($newgw) {
1633 eval {
1634 if ($is_real_ip && !PVE::Network::is_ip_in_cidr($newgw, $newip, $ipversion)) {
1635 &$ipcmd($family_opt, 'route', 'add', $newgw, 'dev', $eth);
1636 }
1637 &$ipcmd($family_opt, 'route', 'replace', 'default', 'via', $newgw);
1638 };
1639 if (my $err = $@) {
1640 warn $err;
1641 # the route was not replaced, the old IP is still available
1642 # rollback (delete new IP) and cancel
1643 if ($change_ip) {
1644 eval { &$ipcmd($family_opt, 'addr', 'del', $newip, 'dev', $eth); };
1645 warn $@ if $@; # no need to die here
1646 }
1647 return;
1648 }
1649 } else {
1650 eval { &$ipcmd($family_opt, 'route', 'del', 'default'); };
1651 # if the route was not deleted, the guest might have deleted it manually
1652 # warn and continue
1653 warn $@ if $@;
1654 }
1655 }
1656
1657 # from this point on we save the configuration
1658 # step 3: delete old IP ignoring errors
1659 if ($change_ip && $oldip && $oldip !~ /^(?:auto|dhcp)$/) {
1660 # We need to enable promote_secondaries, otherwise our newly added
1661 # address will be removed along with the old one.
1662 my $promote = 0;
1663 eval {
1664 if ($ipversion == 4) {
1665 &$nscmd({ outfunc => sub { $promote = int(shift) } },
1666 'cat', "/proc/sys/net/ipv4/conf/$eth/promote_secondaries");
1667 &$nscmd({}, 'sysctl', "net.ipv4.conf.$eth.promote_secondaries=1");
1668 }
1669 &$ipcmd($family_opt, 'addr', 'del', $oldip, 'dev', $eth);
1670 };
1671 warn $@ if $@; # no need to die here
1672
1673 if ($ipversion == 4) {
1674 &$nscmd({}, 'sysctl', "net.ipv4.conf.$eth.promote_secondaries=$promote");
1675 }
1676 }
1677
1678 foreach my $property ($ip, $gw) {
1679 if ($newnet->{$property}) {
1680 $optdata->{$property} = $newnet->{$property};
1681 } else {
1682 delete $optdata->{$property};
1683 }
1684 }
1685 $conf->{$opt} = print_lxc_network($optdata);
1686 write_config($vmid, $conf);
1687 $lxc_setup->setup_network($conf);
1688 };
1689
1690 &$change_ip_config(4);
1691 &$change_ip_config(6);
1692
1693 }
1694
1695 # Internal snapshots
1696
1697 # NOTE: Snapshot create/delete involves several non-atomic
1698 # actions, and can take a long time.
1699 # So we try to avoid locking the file and use the 'lock' variable
1700 # inside the config file instead.
1701
1702 my $snapshot_copy_config = sub {
1703 my ($source, $dest) = @_;
1704
1705 foreach my $k (keys %$source) {
1706 next if $k eq 'snapshots';
1707 next if $k eq 'snapstate';
1708 next if $k eq 'snaptime';
1709 next if $k eq 'vmstate';
1710 next if $k eq 'lock';
1711 next if $k eq 'digest';
1712 next if $k eq 'description';
1713
1714 $dest->{$k} = $source->{$k};
1715 }
1716 };
1717
1718 my $snapshot_prepare = sub {
1719 my ($vmid, $snapname, $comment) = @_;
1720
1721 my $snap;
1722
1723 my $updatefn = sub {
1724
1725 my $conf = load_config($vmid);
1726
1727 die "you can't take a snapshot if it's a template\n"
1728 if is_template($conf);
1729
1730 check_lock($conf);
1731
1732 $conf->{lock} = 'snapshot';
1733
1734 die "snapshot name '$snapname' already used\n"
1735 if defined($conf->{snapshots}->{$snapname});
1736
1737 my $storecfg = PVE::Storage::config();
1738 my $feature = $snapname eq 'vzdump' ? 'vzdump' : 'snapshot';
1739 die "snapshot feature is not available\n" if !has_feature($feature, $conf, $storecfg);
1740
1741 $snap = $conf->{snapshots}->{$snapname} = {};
1742
1743 &$snapshot_copy_config($conf, $snap);
1744
1745 $snap->{'snapstate'} = "prepare";
1746 $snap->{'snaptime'} = time();
1747 $snap->{'description'} = $comment if $comment;
1748 $conf->{snapshots}->{$snapname} = $snap;
1749
1750 write_config($vmid, $conf);
1751 };
1752
1753 lock_config($vmid, $updatefn);
1754
1755 return $snap;
1756 };
1757
1758 my $snapshot_commit = sub {
1759 my ($vmid, $snapname) = @_;
1760
1761 my $updatefn = sub {
1762
1763 my $conf = load_config($vmid);
1764
1765 die "missing snapshot lock\n"
1766 if !($conf->{lock} && $conf->{lock} eq 'snapshot');
1767
1768 die "snapshot '$snapname' does not exist\n"
1769 if !defined($conf->{snapshots}->{$snapname});
1770
1771 die "wrong snapshot state\n"
1772 if !($conf->{snapshots}->{$snapname}->{'snapstate'} &&
1773 $conf->{snapshots}->{$snapname}->{'snapstate'} eq "prepare");
1774
1775 delete $conf->{snapshots}->{$snapname}->{'snapstate'};
1776 delete $conf->{lock};
1777 $conf->{parent} = $snapname;
1778
1779 write_config($vmid, $conf);
1780 };
1781
1782 lock_config($vmid ,$updatefn);
1783 };
1784
1785 sub has_feature {
1786 my ($feature, $conf, $storecfg, $snapname) = @_;
1787
1788 my $err;
1789 my $vzdump = $feature eq 'vzdump';
1790 $feature = 'snapshot' if $vzdump;
1791
1792 foreach_mountpoint($conf, sub {
1793 my ($ms, $mountpoint) = @_;
1794
1795 return if $err; # skip further test
1796 return if $vzdump && $ms ne 'rootfs' && !$mountpoint->{backup};
1797
1798 $err = 1 if !PVE::Storage::volume_has_feature($storecfg, $feature, $mountpoint->{volume}, $snapname);
1799
1800 # TODO: implement support for mountpoints
1801 die "unable to handle mountpoint '$ms' - feature not implemented\n"
1802 if $ms ne 'rootfs';
1803 });
1804
1805 return $err ? 0 : 1;
1806 }
1807
1808 my $enter_namespace = sub {
1809 my ($vmid, $pid, $which, $type) = @_;
1810 sysopen my $fd, "/proc/$pid/ns/$which", O_RDONLY
1811 or die "failed to open $which namespace of container $vmid: $!\n";
1812 PVE::Tools::setns(fileno($fd), $type)
1813 or die "failed to enter $which namespace of container $vmid: $!\n";
1814 close $fd;
1815 };
1816
1817 my $do_syncfs = sub {
1818 my ($vmid, $pid, $socket) = @_;
1819
1820 &$enter_namespace($vmid, $pid, 'mnt', PVE::Tools::CLONE_NEWNS);
1821
1822 # Tell the parent process to start reading our /proc/mounts
1823 print {$socket} "go\n";
1824 $socket->flush();
1825
1826 # Receive /proc/self/mounts
1827 my $mountdata = do { local $/ = undef; <$socket> };
1828 close $socket;
1829
1830 # Now sync all mountpoints...
1831 my $mounts = PVE::ProcFSTools::parse_mounts($mountdata);
1832 foreach my $mp (@$mounts) {
1833 my ($what, $dir, $fs) = @$mp;
1834 next if $fs eq 'fuse.lxcfs';
1835 eval { PVE::Tools::sync_mountpoint($dir); };
1836 warn $@ if $@;
1837 }
1838 };
1839
1840 sub sync_container_namespace {
1841 my ($vmid) = @_;
1842 my $pid = find_lxc_pid($vmid);
1843
1844 # SOCK_DGRAM is nicer for barriers but cannot be slurped
1845 socketpair my $pfd, my $cfd, AF_UNIX, SOCK_STREAM, PF_UNSPEC
1846 or die "failed to create socketpair: $!\n";
1847
1848 my $child = fork();
1849 die "fork failed: $!\n" if !defined($child);
1850
1851 if (!$child) {
1852 eval {
1853 close $pfd;
1854 &$do_syncfs($vmid, $pid, $cfd);
1855 };
1856 if (my $err = $@) {
1857 warn $err;
1858 POSIX::_exit(1);
1859 }
1860 POSIX::_exit(0);
1861 }
1862 close $cfd;
1863 my $go = <$pfd>;
1864 die "failed to enter container namespace\n" if $go ne "go\n";
1865
1866 open my $mounts, '<', "/proc/$child/mounts"
1867 or die "failed to open container's /proc/mounts: $!\n";
1868 my $mountdata = do { local $/ = undef; <$mounts> };
1869 close $mounts;
1870 print {$pfd} $mountdata;
1871 close $pfd;
1872
1873 while (waitpid($child, 0) != $child) {}
1874 die "failed to sync container namespace\n" if $? != 0;
1875 }
1876
1877 sub snapshot_create {
1878 my ($vmid, $snapname, $comment) = @_;
1879
1880 my $snap = &$snapshot_prepare($vmid, $snapname, $comment);
1881
1882 my $conf = load_config($vmid);
1883
1884 my $running = check_running($vmid);
1885
1886 my $unfreeze = 0;
1887
1888 my $drivehash = {};
1889
1890 eval {
1891 if ($running) {
1892 $unfreeze = 1;
1893 PVE::Tools::run_command(['/usr/bin/lxc-freeze', '-n', $vmid]);
1894 sync_container_namespace($vmid);
1895 };
1896
1897 my $storecfg = PVE::Storage::config();
1898 my $rootinfo = parse_ct_rootfs($conf->{rootfs});
1899 my $volid = $rootinfo->{volume};
1900
1901 PVE::Storage::volume_snapshot($storecfg, $volid, $snapname);
1902 $drivehash->{rootfs} = 1;
1903 };
1904 my $err = $@;
1905
1906 if ($unfreeze) {
1907 eval { PVE::Tools::run_command(['/usr/bin/lxc-unfreeze', '-n', $vmid]); };
1908 warn $@ if $@;
1909 }
1910
1911 if ($err) {
1912 eval { snapshot_delete($vmid, $snapname, 1, $drivehash); };
1913 warn "$@\n" if $@;
1914 die "$err\n";
1915 }
1916
1917 &$snapshot_commit($vmid, $snapname);
1918 }
1919
1920 # Note: $drivehash is only set when called from snapshot_create.
1921 sub snapshot_delete {
1922 my ($vmid, $snapname, $force, $drivehash) = @_;
1923
1924 my $snap;
1925
1926 my $conf;
1927
1928 my $updatefn = sub {
1929
1930 $conf = load_config($vmid);
1931
1932 die "you can't delete a snapshot if vm is a template\n"
1933 if is_template($conf);
1934
1935 $snap = $conf->{snapshots}->{$snapname};
1936
1937 if (!$drivehash) {
1938 check_lock($conf);
1939 }
1940
1941 die "snapshot '$snapname' does not exist\n" if !defined($snap);
1942
1943 $snap->{snapstate} = 'delete';
1944
1945 write_config($vmid, $conf);
1946 };
1947
1948 lock_config($vmid, $updatefn);
1949
1950 my $storecfg = PVE::Storage::config();
1951
1952 my $unlink_parent = sub {
1953
1954 my ($confref, $new_parent) = @_;
1955
1956 if ($confref->{parent} && $confref->{parent} eq $snapname) {
1957 if ($new_parent) {
1958 $confref->{parent} = $new_parent;
1959 } else {
1960 delete $confref->{parent};
1961 }
1962 }
1963 };
1964
1965 my $del_snap = sub {
1966
1967 $conf = load_config($vmid);
1968
1969 if ($drivehash) {
1970 delete $conf->{lock};
1971 } else {
1972 check_lock($conf);
1973 }
1974
1975 my $parent = $conf->{snapshots}->{$snapname}->{parent};
1976 foreach my $snapkey (keys %{$conf->{snapshots}}) {
1977 &$unlink_parent($conf->{snapshots}->{$snapkey}, $parent);
1978 }
1979
1980 &$unlink_parent($conf, $parent);
1981
1982 delete $conf->{snapshots}->{$snapname};
1983
1984 write_config($vmid, $conf);
1985 };
1986
1987 my $rootfs = $conf->{snapshots}->{$snapname}->{rootfs};
1988 my $rootinfo = parse_ct_rootfs($rootfs);
1989 my $volid = $rootinfo->{volume};
1990
1991 eval {
1992 PVE::Storage::volume_snapshot_delete($storecfg, $volid, $snapname);
1993 };
1994 my $err = $@;
1995
1996 if(!$err || ($err && $force)) {
1997 lock_config($vmid, $del_snap);
1998 if ($err) {
1999 die "Can't delete snapshot: $vmid $snapname $err\n";
2000 }
2001 }
2002 }
2003
2004 sub snapshot_rollback {
2005 my ($vmid, $snapname) = @_;
2006
2007 my $storecfg = PVE::Storage::config();
2008
2009 my $conf = load_config($vmid);
2010
2011 die "you can't rollback if vm is a template\n" if is_template($conf);
2012
2013 my $snap = $conf->{snapshots}->{$snapname};
2014
2015 die "snapshot '$snapname' does not exist\n" if !defined($snap);
2016
2017 my $rootfs = $snap->{rootfs};
2018 my $rootinfo = parse_ct_rootfs($rootfs);
2019 my $volid = $rootinfo->{volume};
2020
2021 PVE::Storage::volume_rollback_is_possible($storecfg, $volid, $snapname);
2022
2023 my $updatefn = sub {
2024
2025 die "unable to rollback to incomplete snapshot (snapstate = $snap->{snapstate})\n"
2026 if $snap->{snapstate};
2027
2028 check_lock($conf);
2029
2030 system("lxc-stop -n $vmid --kill") if check_running($vmid);
2031
2032 die "unable to rollback vm $vmid: vm is running\n"
2033 if check_running($vmid);
2034
2035 $conf->{lock} = 'rollback';
2036
2037 my $forcemachine;
2038
2039 # copy snapshot config to current config
2040
2041 my $tmp_conf = $conf;
2042 &$snapshot_copy_config($tmp_conf->{snapshots}->{$snapname}, $conf);
2043 $conf->{snapshots} = $tmp_conf->{snapshots};
2044 delete $conf->{snaptime};
2045 delete $conf->{snapname};
2046 $conf->{parent} = $snapname;
2047
2048 write_config($vmid, $conf);
2049 };
2050
2051 my $unlockfn = sub {
2052 delete $conf->{lock};
2053 write_config($vmid, $conf);
2054 };
2055
2056 lock_config($vmid, $updatefn);
2057
2058 PVE::Storage::volume_snapshot_rollback($storecfg, $volid, $snapname);
2059
2060 lock_config($vmid, $unlockfn);
2061 }
2062
2063 sub template_create {
2064 my ($vmid, $conf) = @_;
2065
2066 my $storecfg = PVE::Storage::config();
2067
2068 my $rootinfo = parse_ct_rootfs($conf->{rootfs});
2069 my $volid = $rootinfo->{volume};
2070
2071 die "Template feature is not available for '$volid'\n"
2072 if !PVE::Storage::volume_has_feature($storecfg, 'template', $volid);
2073
2074 PVE::Storage::activate_volumes($storecfg, [$volid]);
2075
2076 my $template_volid = PVE::Storage::vdisk_create_base($storecfg, $volid);
2077 $rootinfo->{volume} = $template_volid;
2078 $conf->{rootfs} = print_ct_mountpoint($rootinfo, 1);
2079
2080 write_config($vmid, $conf);
2081 }
2082
2083 sub is_template {
2084 my ($conf) = @_;
2085
2086 return 1 if defined $conf->{template} && $conf->{template} == 1;
2087 }
2088
2089 sub mountpoint_names {
2090 my ($reverse) = @_;
2091
2092 my @names = ('rootfs');
2093
2094 for (my $i = 0; $i < $MAX_MOUNT_POINTS; $i++) {
2095 push @names, "mp$i";
2096 }
2097
2098 return $reverse ? reverse @names : @names;
2099 }
2100
2101
2102 sub foreach_mountpoint_full {
2103 my ($conf, $reverse, $func) = @_;
2104
2105 foreach my $key (mountpoint_names($reverse)) {
2106 my $value = $conf->{$key};
2107 next if !defined($value);
2108 my $mountpoint = $key eq 'rootfs' ? parse_ct_rootfs($value, 1) : parse_ct_mountpoint($value, 1);
2109 next if !defined($mountpoint);
2110
2111 &$func($key, $mountpoint);
2112 }
2113 }
2114
2115 sub foreach_mountpoint {
2116 my ($conf, $func) = @_;
2117
2118 foreach_mountpoint_full($conf, 0, $func);
2119 }
2120
2121 sub foreach_mountpoint_reverse {
2122 my ($conf, $func) = @_;
2123
2124 foreach_mountpoint_full($conf, 1, $func);
2125 }
2126
2127 sub check_ct_modify_config_perm {
2128 my ($rpcenv, $authuser, $vmid, $pool, $key_list) = @_;
2129
2130 return 1 if $authuser ne 'root@pam';
2131
2132 foreach my $opt (@$key_list) {
2133
2134 if ($opt eq 'cpus' || $opt eq 'cpuunits' || $opt eq 'cpulimit') {
2135 $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.CPU']);
2136 } elsif ($opt eq 'rootfs' || $opt =~ /^mp\d+$/) {
2137 $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Disk']);
2138 } elsif ($opt eq 'memory' || $opt eq 'swap') {
2139 $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Memory']);
2140 } elsif ($opt =~ m/^net\d+$/ || $opt eq 'nameserver' ||
2141 $opt eq 'searchdomain' || $opt eq 'hostname') {
2142 $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Network']);
2143 } else {
2144 $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Options']);
2145 }
2146 }
2147
2148 return 1;
2149 }
2150
2151 sub umount_all {
2152 my ($vmid, $storage_cfg, $conf, $noerr) = @_;
2153
2154 my $rootdir = "/var/lib/lxc/$vmid/rootfs";
2155 my $volid_list = get_vm_volumes($conf);
2156
2157 foreach_mountpoint_reverse($conf, sub {
2158 my ($ms, $mountpoint) = @_;
2159
2160 my $volid = $mountpoint->{volume};
2161 my $mount = $mountpoint->{mp};
2162
2163 return if !$volid || !$mount;
2164
2165 my $mount_path = "$rootdir/$mount";
2166 $mount_path =~ s!/+!/!g;
2167
2168 return if !PVE::ProcFSTools::is_mounted($mount_path);
2169
2170 eval {
2171 PVE::Tools::run_command(['umount', '-d', $mount_path]);
2172 };
2173 if (my $err = $@) {
2174 if ($noerr) {
2175 warn $err;
2176 } else {
2177 die $err;
2178 }
2179 }
2180 });
2181 }
2182
2183 sub mount_all {
2184 my ($vmid, $storage_cfg, $conf) = @_;
2185
2186 my $rootdir = "/var/lib/lxc/$vmid/rootfs";
2187 File::Path::make_path($rootdir);
2188
2189 my $volid_list = get_vm_volumes($conf);
2190 PVE::Storage::activate_volumes($storage_cfg, $volid_list);
2191
2192 eval {
2193 foreach_mountpoint($conf, sub {
2194 my ($ms, $mountpoint) = @_;
2195
2196 mountpoint_mount($mountpoint, $rootdir, $storage_cfg);
2197 });
2198 };
2199 if (my $err = $@) {
2200 warn "mounting container failed\n";
2201 umount_all($vmid, $storage_cfg, $conf, 1);
2202 die $err;
2203 }
2204
2205 return $rootdir;
2206 }
2207
2208
2209 sub mountpoint_mount_path {
2210 my ($mountpoint, $storage_cfg, $snapname) = @_;
2211
2212 return mountpoint_mount($mountpoint, undef, $storage_cfg, $snapname);
2213 }
2214
2215 my $check_mount_path = sub {
2216 my ($path) = @_;
2217 $path = File::Spec->canonpath($path);
2218 my $real = Cwd::realpath($path);
2219 if ($real ne $path) {
2220 die "mount path modified by symlink: $path != $real";
2221 }
2222 };
2223
2224 sub query_loopdev {
2225 my ($path) = @_;
2226 my $found;
2227 my $parser = sub {
2228 my $line = shift;
2229 if ($line =~ m@^(/dev/loop\d+):@) {
2230 $found = $1;
2231 }
2232 };
2233 my $cmd = ['losetup', '--associated', $path];
2234 PVE::Tools::run_command($cmd, outfunc => $parser);
2235 return $found;
2236 }
2237
2238 # Run a function with a file attached to a loop device.
2239 # The loop device is always detached afterwards (or set to autoclear).
2240 # Returns the loop device.
2241 sub run_with_loopdev {
2242 my ($func, $file) = @_;
2243 my $device;
2244 my $parser = sub {
2245 my $line = shift;
2246 if ($line =~ m@^(/dev/loop\d+)$@) {
2247 $device = $1;
2248 }
2249 };
2250 PVE::Tools::run_command(['losetup', '--show', '-f', $file], outfunc => $parser);
2251 die "failed to setup loop device for $file\n" if !$device;
2252 eval { &$func($device); };
2253 my $err = $@;
2254 PVE::Tools::run_command(['losetup', '-d', $device]);
2255 die $err if $err;
2256 return $device;
2257 }
2258
2259 sub bindmount {
2260 my ($dir, $dest, $ro, @extra_opts) = @_;
2261 PVE::Tools::run_command(['mount', '-o', 'bind', @extra_opts, $dir, $dest]);
2262 if ($ro) {
2263 eval { PVE::Tools::run_command(['mount', '-o', 'bind,remount,ro', $dest]); };
2264 if (my $err = $@) {
2265 warn "bindmount error\n";
2266 # don't leave writable bind-mounts behind...
2267 PVE::Tools::run_command(['umount', $dest]);
2268 die $err;
2269 }
2270 }
2271 }
2272
2273 # use $rootdir = undef to just return the corresponding mount path
2274 sub mountpoint_mount {
2275 my ($mountpoint, $rootdir, $storage_cfg, $snapname) = @_;
2276
2277 my $volid = $mountpoint->{volume};
2278 my $mount = $mountpoint->{mp};
2279 my $type = $mountpoint->{type};
2280 my $quota = !$snapname && !$mountpoint->{ro} && $mountpoint->{quota};
2281 my $mounted_dev;
2282
2283 return if !$volid || !$mount;
2284
2285 my $mount_path;
2286
2287 if (defined($rootdir)) {
2288 $rootdir =~ s!/+$!!;
2289 $mount_path = "$rootdir/$mount";
2290 $mount_path =~ s!/+!/!g;
2291 &$check_mount_path($mount_path);
2292 File::Path::mkpath($mount_path);
2293 }
2294
2295 my ($storage, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2296
2297 die "unknown snapshot path for '$volid'" if !$storage && defined($snapname);
2298
2299 my $optstring = '';
2300 if (defined($mountpoint->{acl})) {
2301 $optstring .= ($mountpoint->{acl} ? 'acl' : 'noacl');
2302 }
2303 my $readonly = $mountpoint->{ro};
2304
2305 my @extra_opts = ('-o', $optstring);
2306
2307 if ($storage) {
2308
2309 my $scfg = PVE::Storage::storage_config($storage_cfg, $storage);
2310 my $path = PVE::Storage::path($storage_cfg, $volid, $snapname);
2311
2312 my ($vtype, undef, undef, undef, undef, $isBase, $format) =
2313 PVE::Storage::parse_volname($storage_cfg, $volid);
2314
2315 $format = 'iso' if $vtype eq 'iso'; # allow to handle iso files
2316
2317 if ($format eq 'subvol') {
2318 if ($mount_path) {
2319 if ($snapname) {
2320 if ($scfg->{type} eq 'zfspool') {
2321 my $path_arg = $path;
2322 $path_arg =~ s!^/+!!;
2323 PVE::Tools::run_command(['mount', '-o', 'ro', @extra_opts, '-t', 'zfs', $path_arg, $mount_path]);
2324 } else {
2325 die "cannot mount subvol snapshots for storage type '$scfg->{type}'\n";
2326 }
2327 } else {
2328 bindmount($path, $mount_path, $readonly, @extra_opts);
2329 warn "cannot enable quota control for bind mounted subvolumes\n" if $quota;
2330 }
2331 }
2332 return wantarray ? ($path, 0, $mounted_dev) : $path;
2333 } elsif ($format eq 'raw' || $format eq 'iso') {
2334 my $domount = sub {
2335 my ($path) = @_;
2336 if ($mount_path) {
2337 if ($format eq 'iso') {
2338 PVE::Tools::run_command(['mount', '-o', 'ro', @extra_opts, $path, $mount_path]);
2339 } elsif ($isBase || defined($snapname)) {
2340 PVE::Tools::run_command(['mount', '-o', 'ro,noload', @extra_opts, $path, $mount_path]);
2341 } else {
2342 if ($quota) {
2343 push @extra_opts, '-o', 'usrjquota=aquota.user,grpjquota=aquota.group,jqfmt=vfsv0';
2344 }
2345 push @extra_opts, '-o', 'ro' if $readonly;
2346 PVE::Tools::run_command(['mount', @extra_opts, $path, $mount_path]);
2347 }
2348 }
2349 };
2350 my $use_loopdev = 0;
2351 if ($scfg->{path}) {
2352 $mounted_dev = run_with_loopdev($domount, $path);
2353 $use_loopdev = 1;
2354 } elsif ($scfg->{type} eq 'drbd' || $scfg->{type} eq 'lvm' ||
2355 $scfg->{type} eq 'rbd' || $scfg->{type} eq 'lvmthin') {
2356 $mounted_dev = $path;
2357 &$domount($path);
2358 } else {
2359 die "unsupported storage type '$scfg->{type}'\n";
2360 }
2361 return wantarray ? ($path, $use_loopdev, $mounted_dev) : $path;
2362 } else {
2363 die "unsupported image format '$format'\n";
2364 }
2365 } elsif ($type eq 'device') {
2366 push @extra_opts, '-o', 'ro' if $readonly;
2367 PVE::Tools::run_command(['mount', @extra_opts, $volid, $mount_path]) if $mount_path;
2368 return wantarray ? ($volid, 0, $volid) : $volid;
2369 } elsif ($type eq 'bind') {
2370 die "directory '$volid' does not exist\n" if ! -d $volid;
2371 &$check_mount_path($volid);
2372 bindmount($volid, $mount_path, $readonly, @extra_opts) if $mount_path;
2373 warn "cannot enable quota control for bind mounts\n" if $quota;
2374 return wantarray ? ($volid, 0, undef) : $volid;
2375 }
2376
2377 die "unsupported storage";
2378 }
2379
2380 sub get_vm_volumes {
2381 my ($conf, $excludes) = @_;
2382
2383 my $vollist = [];
2384
2385 foreach_mountpoint($conf, sub {
2386 my ($ms, $mountpoint) = @_;
2387
2388 return if $excludes && $ms eq $excludes;
2389
2390 my $volid = $mountpoint->{volume};
2391
2392 return if !$volid || $mountpoint->{type} ne 'volume';
2393
2394 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2395 return if !$sid;
2396
2397 push @$vollist, $volid;
2398 });
2399
2400 return $vollist;
2401 }
2402
2403 sub mkfs {
2404 my ($dev, $rootuid, $rootgid) = @_;
2405
2406 PVE::Tools::run_command(['mkfs.ext4', '-O', 'mmp',
2407 '-E', "root_owner=$rootuid:$rootgid",
2408 $dev]);
2409 }
2410
2411 sub format_disk {
2412 my ($storage_cfg, $volid, $rootuid, $rootgid) = @_;
2413
2414 if ($volid =~ m!^/dev/.+!) {
2415 mkfs($volid);
2416 return;
2417 }
2418
2419 my ($storage, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2420
2421 die "cannot format volume '$volid' with no storage\n" if !$storage;
2422
2423 PVE::Storage::activate_volumes($storage_cfg, [$volid]);
2424
2425 my $path = PVE::Storage::path($storage_cfg, $volid);
2426
2427 my ($vtype, undef, undef, undef, undef, $isBase, $format) =
2428 PVE::Storage::parse_volname($storage_cfg, $volid);
2429
2430 die "cannot format volume '$volid' (format == $format)\n"
2431 if $format ne 'raw';
2432
2433 mkfs($path, $rootuid, $rootgid);
2434 }
2435
2436 sub destroy_disks {
2437 my ($storecfg, $vollist) = @_;
2438
2439 foreach my $volid (@$vollist) {
2440 eval { PVE::Storage::vdisk_free($storecfg, $volid); };
2441 warn $@ if $@;
2442 }
2443 }
2444
2445 sub create_disks {
2446 my ($storecfg, $vmid, $settings, $conf) = @_;
2447
2448 my $vollist = [];
2449
2450 eval {
2451 my (undef, $rootuid, $rootgid) = PVE::LXC::parse_id_maps($conf);
2452 my $chown_vollist = [];
2453
2454 foreach_mountpoint($settings, sub {
2455 my ($ms, $mountpoint) = @_;
2456
2457 my $volid = $mountpoint->{volume};
2458 my $mp = $mountpoint->{mp};
2459
2460 my ($storage, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2461
2462 if ($storage && ($volid =~ m/^([^:\s]+):(\d+(\.\d+)?)$/)) {
2463 my ($storeid, $size_gb) = ($1, $2);
2464
2465 my $size_kb = int(${size_gb}*1024) * 1024;
2466
2467 my $scfg = PVE::Storage::storage_config($storecfg, $storage);
2468 # fixme: use better naming ct-$vmid-disk-X.raw?
2469
2470 if ($scfg->{type} eq 'dir' || $scfg->{type} eq 'nfs') {
2471 if ($size_kb > 0) {
2472 $volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'raw',
2473 undef, $size_kb);
2474 format_disk($storecfg, $volid, $rootuid, $rootgid);
2475 } else {
2476 $volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'subvol',
2477 undef, 0);
2478 push @$chown_vollist, $volid;
2479 }
2480 } elsif ($scfg->{type} eq 'zfspool') {
2481
2482 $volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'subvol',
2483 undef, $size_kb);
2484 push @$chown_vollist, $volid;
2485 } elsif ($scfg->{type} eq 'drbd' || $scfg->{type} eq 'lvm' || $scfg->{type} eq 'lvmthin') {
2486
2487 $volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'raw', undef, $size_kb);
2488 format_disk($storecfg, $volid, $rootuid, $rootgid);
2489
2490 } elsif ($scfg->{type} eq 'rbd') {
2491
2492 die "krbd option must be enabled on storage type '$scfg->{type}'\n" if !$scfg->{krbd};
2493 $volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'raw', undef, $size_kb);
2494 format_disk($storecfg, $volid, $rootuid, $rootgid);
2495 } else {
2496 die "unable to create containers on storage type '$scfg->{type}'\n";
2497 }
2498 push @$vollist, $volid;
2499 $mountpoint->{volume} = $volid;
2500 $mountpoint->{size} = $size_kb * 1024;
2501 $conf->{$ms} = print_ct_mountpoint($mountpoint, $ms eq 'rootfs');
2502 } else {
2503 # use specified/existing volid/dir/device
2504 $conf->{$ms} = print_ct_mountpoint($mountpoint, $ms eq 'rootfs');
2505 }
2506 });
2507
2508 PVE::Storage::activate_volumes($storecfg, $chown_vollist, undef);
2509 foreach my $volid (@$chown_vollist) {
2510 my $path = PVE::Storage::path($storecfg, $volid, undef);
2511 chown($rootuid, $rootgid, $path);
2512 }
2513 PVE::Storage::deactivate_volumes($storecfg, $chown_vollist, undef);
2514 };
2515 # free allocated images on error
2516 if (my $err = $@) {
2517 destroy_disks($storecfg, $vollist);
2518 die $err;
2519 }
2520 return $vollist;
2521 }
2522
2523 # bash completion helper
2524
2525 sub complete_os_templates {
2526 my ($cmdname, $pname, $cvalue) = @_;
2527
2528 my $cfg = PVE::Storage::config();
2529
2530 my $storeid;
2531
2532 if ($cvalue =~ m/^([^:]+):/) {
2533 $storeid = $1;
2534 }
2535
2536 my $vtype = $cmdname eq 'restore' ? 'backup' : 'vztmpl';
2537 my $data = PVE::Storage::template_list($cfg, $storeid, $vtype);
2538
2539 my $res = [];
2540 foreach my $id (keys %$data) {
2541 foreach my $item (@{$data->{$id}}) {
2542 push @$res, $item->{volid} if defined($item->{volid});
2543 }
2544 }
2545
2546 return $res;
2547 }
2548
2549 my $complete_ctid_full = sub {
2550 my ($running) = @_;
2551
2552 my $idlist = vmstatus();
2553
2554 my $active_hash = list_active_containers();
2555
2556 my $res = [];
2557
2558 foreach my $id (keys %$idlist) {
2559 my $d = $idlist->{$id};
2560 if (defined($running)) {
2561 next if $d->{template};
2562 next if $running && !$active_hash->{$id};
2563 next if !$running && $active_hash->{$id};
2564 }
2565 push @$res, $id;
2566
2567 }
2568 return $res;
2569 };
2570
2571 sub complete_ctid {
2572 return &$complete_ctid_full();
2573 }
2574
2575 sub complete_ctid_stopped {
2576 return &$complete_ctid_full(0);
2577 }
2578
2579 sub complete_ctid_running {
2580 return &$complete_ctid_full(1);
2581 }
2582
2583 sub parse_id_maps {
2584 my ($conf) = @_;
2585
2586 my $id_map = [];
2587 my $rootuid = 0;
2588 my $rootgid = 0;
2589
2590 my $lxc = $conf->{lxc};
2591 foreach my $entry (@$lxc) {
2592 my ($key, $value) = @$entry;
2593 next if $key ne 'lxc.id_map';
2594 if ($value =~ /^([ug])\s+(\d+)\s+(\d+)\s+(\d+)\s*$/) {
2595 my ($type, $ct, $host, $length) = ($1, $2, $3, $4);
2596 push @$id_map, [$type, $ct, $host, $length];
2597 if ($ct == 0) {
2598 $rootuid = $host if $type eq 'u';
2599 $rootgid = $host if $type eq 'g';
2600 }
2601 } else {
2602 die "failed to parse id_map: $value\n";
2603 }
2604 }
2605
2606 if (!@$id_map && $conf->{unprivileged}) {
2607 # Should we read them from /etc/subuid?
2608 $id_map = [ ['u', '0', '100000', '65536'],
2609 ['g', '0', '100000', '65536'] ];
2610 $rootuid = $rootgid = 100000;
2611 }
2612
2613 return ($id_map, $rootuid, $rootgid);
2614 }
2615
2616 sub userns_command {
2617 my ($id_map) = @_;
2618 if (@$id_map) {
2619 return ['lxc-usernsexec', (map { ('-m', join(':', @$_)) } @$id_map), '--'];
2620 }
2621 return [];
2622 }
2623
2624 1;