]> git.proxmox.com Git - pve-container.git/blob - src/PVE/LXC.pm
Adjust snapshot_create and snapshot_prepare signatures
[pve-container.git] / src / PVE / LXC.pm
1 package PVE::LXC;
2
3 use strict;
4 use warnings;
5 use POSIX qw(EINTR);
6
7 use Socket;
8
9 use File::Path;
10 use File::Spec;
11 use Cwd qw();
12 use Fcntl qw(O_RDONLY);
13
14 use PVE::Cluster qw(cfs_register_file cfs_read_file);
15 use PVE::Storage;
16 use PVE::SafeSyslog;
17 use PVE::INotify;
18 use PVE::JSONSchema qw(get_standard_option);
19 use PVE::Tools qw($IPV6RE $IPV4RE dir_glob_foreach lock_file lock_file_full);
20 use PVE::Network;
21 use PVE::AccessControl;
22 use PVE::ProcFSTools;
23 use Time::HiRes qw (gettimeofday);
24
25 use Data::Dumper;
26
27 my $nodename = PVE::INotify::nodename();
28
29 my $cpuinfo= PVE::ProcFSTools::read_cpuinfo();
30
31 our $COMMON_TAR_FLAGS = [ '--sparse', '--numeric-owner', '--acls',
32 '--xattrs',
33 '--xattrs-include=user.*',
34 '--xattrs-include=security.capability',
35 '--warning=no-xattr-write' ];
36
37 cfs_register_file('/lxc/', \&parse_pct_config, \&write_pct_config);
38
39 my $rootfs_desc = {
40 volume => {
41 type => 'string',
42 default_key => 1,
43 format => 'pve-lxc-mp-string',
44 format_description => 'volume',
45 description => 'Volume, device or directory to mount into the container.',
46 },
47 backup => {
48 type => 'boolean',
49 format_description => '[1|0]',
50 description => 'Whether to include the mountpoint in backups.',
51 optional => 1,
52 },
53 size => {
54 type => 'string',
55 format => 'disk-size',
56 format_description => 'DiskSize',
57 description => 'Volume size (read only value).',
58 optional => 1,
59 },
60 acl => {
61 type => 'boolean',
62 format_description => 'acl',
63 description => 'Explicitly enable or disable ACL support.',
64 optional => 1,
65 },
66 ro => {
67 type => 'boolean',
68 format_description => 'ro',
69 description => 'Read-only mountpoint (not supported with bind mounts)',
70 optional => 1,
71 },
72 quota => {
73 type => 'boolean',
74 format_description => '[0|1]',
75 description => 'Enable user quotas inside the container (not supported with zfs subvolumes)',
76 optional => 1,
77 },
78 };
79
80 PVE::JSONSchema::register_standard_option('pve-ct-rootfs', {
81 type => 'string', format => $rootfs_desc,
82 description => "Use volume as container root.",
83 optional => 1,
84 });
85
86 PVE::JSONSchema::register_standard_option('pve-lxc-snapshot-name', {
87 description => "The name of the snapshot.",
88 type => 'string', format => 'pve-configid',
89 maxLength => 40,
90 });
91
92 my $confdesc = {
93 lock => {
94 optional => 1,
95 type => 'string',
96 description => "Lock/unlock the VM.",
97 enum => [qw(migrate backup snapshot rollback)],
98 },
99 onboot => {
100 optional => 1,
101 type => 'boolean',
102 description => "Specifies whether a VM will be started during system bootup.",
103 default => 0,
104 },
105 startup => get_standard_option('pve-startup-order'),
106 template => {
107 optional => 1,
108 type => 'boolean',
109 description => "Enable/disable Template.",
110 default => 0,
111 },
112 arch => {
113 optional => 1,
114 type => 'string',
115 enum => ['amd64', 'i386'],
116 description => "OS architecture type.",
117 default => 'amd64',
118 },
119 ostype => {
120 optional => 1,
121 type => 'string',
122 enum => ['debian', 'ubuntu', 'centos', 'fedora', 'opensuse', 'archlinux', 'alpine'],
123 description => "OS type. Corresponds to lxc setup scripts in /usr/share/lxc/config/<ostype>.common.conf.",
124 },
125 console => {
126 optional => 1,
127 type => 'boolean',
128 description => "Attach a console device (/dev/console) to the container.",
129 default => 1,
130 },
131 tty => {
132 optional => 1,
133 type => 'integer',
134 description => "Specify the number of tty available to the container",
135 minimum => 0,
136 maximum => 6,
137 default => 2,
138 },
139 cpulimit => {
140 optional => 1,
141 type => 'number',
142 description => "Limit of CPU usage. Note if the computer has 2 CPUs, it has a total of '2' CPU time. Value '0' indicates no CPU limit.",
143 minimum => 0,
144 maximum => 128,
145 default => 0,
146 },
147 cpuunits => {
148 optional => 1,
149 type => 'integer',
150 description => "CPU weight for a VM. Argument is used in the kernel fair scheduler. The larger the number is, the more CPU time this VM gets. Number is relative to the weights of all the other running VMs.\n\nNOTE: You can disable fair-scheduler configuration by setting this to 0.",
151 minimum => 0,
152 maximum => 500000,
153 default => 1024,
154 },
155 memory => {
156 optional => 1,
157 type => 'integer',
158 description => "Amount of RAM for the VM in MB.",
159 minimum => 16,
160 default => 512,
161 },
162 swap => {
163 optional => 1,
164 type => 'integer',
165 description => "Amount of SWAP for the VM in MB.",
166 minimum => 0,
167 default => 512,
168 },
169 hostname => {
170 optional => 1,
171 description => "Set a host name for the container.",
172 type => 'string', format => 'dns-name',
173 maxLength => 255,
174 },
175 description => {
176 optional => 1,
177 type => 'string',
178 description => "Container description. Only used on the configuration web interface.",
179 },
180 searchdomain => {
181 optional => 1,
182 type => 'string', format => 'dns-name-list',
183 description => "Sets DNS search domains for a container. Create will automatically use the setting from the host if you neither set searchdomain nor nameserver.",
184 },
185 nameserver => {
186 optional => 1,
187 type => 'string', format => 'address-list',
188 description => "Sets DNS server IP address for a container. Create will automatically use the setting from the host if you neither set searchdomain nor nameserver.",
189 },
190 rootfs => get_standard_option('pve-ct-rootfs'),
191 parent => {
192 optional => 1,
193 type => 'string', format => 'pve-configid',
194 maxLength => 40,
195 description => "Parent snapshot name. This is used internally, and should not be modified.",
196 },
197 snaptime => {
198 optional => 1,
199 description => "Timestamp for snapshots.",
200 type => 'integer',
201 minimum => 0,
202 },
203 cmode => {
204 optional => 1,
205 description => "Console mode. By default, the console command tries to open a connection to one of the available tty devices. By setting cmode to 'console' it tries to attach to /dev/console instead. If you set cmode to 'shell', it simply invokes a shell inside the container (no login).",
206 type => 'string',
207 enum => ['shell', 'console', 'tty'],
208 default => 'tty',
209 },
210 protection => {
211 optional => 1,
212 type => 'boolean',
213 description => "Sets the protection flag of the container. This will prevent the CT or CT's disk remove/update operation.",
214 default => 0,
215 },
216 unprivileged => {
217 optional => 1,
218 type => 'boolean',
219 description => "Makes the container run as unprivileged user. (Should not be modified manually.)",
220 default => 0,
221 },
222 };
223
224 my $valid_lxc_conf_keys = {
225 'lxc.include' => 1,
226 'lxc.arch' => 1,
227 'lxc.utsname' => 1,
228 'lxc.haltsignal' => 1,
229 'lxc.rebootsignal' => 1,
230 'lxc.stopsignal' => 1,
231 'lxc.init_cmd' => 1,
232 'lxc.network.type' => 1,
233 'lxc.network.flags' => 1,
234 'lxc.network.link' => 1,
235 'lxc.network.mtu' => 1,
236 'lxc.network.name' => 1,
237 'lxc.network.hwaddr' => 1,
238 'lxc.network.ipv4' => 1,
239 'lxc.network.ipv4.gateway' => 1,
240 'lxc.network.ipv6' => 1,
241 'lxc.network.ipv6.gateway' => 1,
242 'lxc.network.script.up' => 1,
243 'lxc.network.script.down' => 1,
244 'lxc.pts' => 1,
245 'lxc.console.logfile' => 1,
246 'lxc.console' => 1,
247 'lxc.tty' => 1,
248 'lxc.devttydir' => 1,
249 'lxc.hook.autodev' => 1,
250 'lxc.autodev' => 1,
251 'lxc.kmsg' => 1,
252 'lxc.mount' => 1,
253 'lxc.mount.entry' => 1,
254 'lxc.mount.auto' => 1,
255 'lxc.rootfs' => 'lxc.rootfs is auto generated from rootfs',
256 'lxc.rootfs.mount' => 1,
257 'lxc.rootfs.options' => 'lxc.rootfs.options is not supported' .
258 ', please use mountpoint options in the "rootfs" key',
259 # lxc.cgroup.*
260 'lxc.cap.drop' => 1,
261 'lxc.cap.keep' => 1,
262 'lxc.aa_profile' => 1,
263 'lxc.aa_allow_incomplete' => 1,
264 'lxc.se_context' => 1,
265 'lxc.seccomp' => 1,
266 'lxc.id_map' => 1,
267 'lxc.hook.pre-start' => 1,
268 'lxc.hook.pre-mount' => 1,
269 'lxc.hook.mount' => 1,
270 'lxc.hook.start' => 1,
271 'lxc.hook.stop' => 1,
272 'lxc.hook.post-stop' => 1,
273 'lxc.hook.clone' => 1,
274 'lxc.hook.destroy' => 1,
275 'lxc.loglevel' => 1,
276 'lxc.logfile' => 1,
277 'lxc.start.auto' => 1,
278 'lxc.start.delay' => 1,
279 'lxc.start.order' => 1,
280 'lxc.group' => 1,
281 'lxc.environment' => 1,
282 };
283
284 my $netconf_desc = {
285 type => {
286 type => 'string',
287 optional => 1,
288 description => "Network interface type.",
289 enum => [qw(veth)],
290 },
291 name => {
292 type => 'string',
293 format_description => 'String',
294 description => 'Name of the network device as seen from inside the container. (lxc.network.name)',
295 pattern => '[-_.\w\d]+',
296 },
297 bridge => {
298 type => 'string',
299 format_description => 'vmbr<Number>',
300 description => 'Bridge to attach the network device to.',
301 pattern => '[-_.\w\d]+',
302 optional => 1,
303 },
304 hwaddr => {
305 type => 'string',
306 format_description => 'MAC',
307 description => 'Bridge to attach the network device to. (lxc.network.hwaddr)',
308 pattern => qr/(?:[a-f0-9]{2}:){5}[a-f0-9]{2}/i,
309 optional => 1,
310 },
311 mtu => {
312 type => 'integer',
313 format_description => 'Number',
314 description => 'Maximum transfer unit of the interface. (lxc.network.mtu)',
315 minimum => 64, # minimum ethernet frame is 64 bytes
316 optional => 1,
317 },
318 ip => {
319 type => 'string',
320 format => 'pve-ipv4-config',
321 format_description => 'IPv4Format/CIDR',
322 description => 'IPv4 address in CIDR format.',
323 optional => 1,
324 },
325 gw => {
326 type => 'string',
327 format => 'ipv4',
328 format_description => 'GatewayIPv4',
329 description => 'Default gateway for IPv4 traffic.',
330 optional => 1,
331 },
332 ip6 => {
333 type => 'string',
334 format => 'pve-ipv6-config',
335 format_description => 'IPv6Format/CIDR',
336 description => 'IPv6 address in CIDR format.',
337 optional => 1,
338 },
339 gw6 => {
340 type => 'string',
341 format => 'ipv6',
342 format_description => 'GatewayIPv6',
343 description => 'Default gateway for IPv6 traffic.',
344 optional => 1,
345 },
346 firewall => {
347 type => 'boolean',
348 format_description => '[1|0]',
349 description => "Controls whether this interface's firewall rules should be used.",
350 optional => 1,
351 },
352 tag => {
353 type => 'integer',
354 format_description => 'VlanNo',
355 minimum => '2',
356 maximum => '4094',
357 description => "VLAN tag for this interface.",
358 optional => 1,
359 },
360 trunks => {
361 type => 'string',
362 pattern => qr/\d+(?:;\d+)*/,
363 format_description => 'vlanid[;vlanid...]',
364 description => "VLAN ids to pass through the interface",
365 optional => 1,
366 },
367 };
368 PVE::JSONSchema::register_format('pve-lxc-network', $netconf_desc);
369
370 my $MAX_LXC_NETWORKS = 10;
371 for (my $i = 0; $i < $MAX_LXC_NETWORKS; $i++) {
372 $confdesc->{"net$i"} = {
373 optional => 1,
374 type => 'string', format => $netconf_desc,
375 description => "Specifies network interfaces for the container.",
376 };
377 }
378
379 PVE::JSONSchema::register_format('pve-lxc-mp-string', \&verify_lxc_mp_string);
380 sub verify_lxc_mp_string{
381 my ($mp, $noerr) = @_;
382
383 # do not allow:
384 # /./ or /../
385 # /. or /.. at the end
386 # ../ at the beginning
387
388 if($mp =~ m@/\.\.?/@ ||
389 $mp =~ m@/\.\.?$@ ||
390 $mp =~ m@^\.\./@){
391 return undef if $noerr;
392 die "$mp contains illegal character sequences\n";
393 }
394 return $mp;
395 }
396
397 my $mp_desc = {
398 %$rootfs_desc,
399 mp => {
400 type => 'string',
401 format => 'pve-lxc-mp-string',
402 format_description => 'Path',
403 description => 'Path to the mountpoint as seen from inside the container.',
404 },
405 };
406 PVE::JSONSchema::register_format('pve-ct-mountpoint', $mp_desc);
407
408 my $unuseddesc = {
409 optional => 1,
410 type => 'string', format => 'pve-volume-id',
411 description => "Reference to unused volumes.",
412 };
413
414 my $MAX_MOUNT_POINTS = 10;
415 for (my $i = 0; $i < $MAX_MOUNT_POINTS; $i++) {
416 $confdesc->{"mp$i"} = {
417 optional => 1,
418 type => 'string', format => $mp_desc,
419 description => "Use volume as container mount point (experimental feature).",
420 optional => 1,
421 };
422 }
423
424 my $MAX_UNUSED_DISKS = $MAX_MOUNT_POINTS;
425 for (my $i = 0; $i < $MAX_MOUNT_POINTS; $i++) {
426 $confdesc->{"unused$i"} = $unuseddesc;
427 }
428
429 sub write_pct_config {
430 my ($filename, $conf) = @_;
431
432 delete $conf->{snapstate}; # just to be sure
433
434 my $generate_raw_config = sub {
435 my ($conf) = @_;
436
437 my $raw = '';
438
439 # add description as comment to top of file
440 my $descr = $conf->{description} || '';
441 foreach my $cl (split(/\n/, $descr)) {
442 $raw .= '#' . PVE::Tools::encode_text($cl) . "\n";
443 }
444
445 foreach my $key (sort keys %$conf) {
446 next if $key eq 'digest' || $key eq 'description' || $key eq 'pending' ||
447 $key eq 'snapshots' || $key eq 'snapname' || $key eq 'lxc';
448 my $value = $conf->{$key};
449 die "detected invalid newline inside property '$key'\n" if $value =~ m/\n/;
450 $raw .= "$key: $value\n";
451 }
452
453 if (my $lxcconf = $conf->{lxc}) {
454 foreach my $entry (@$lxcconf) {
455 my ($k, $v) = @$entry;
456 $raw .= "$k: $v\n";
457 }
458 }
459
460 return $raw;
461 };
462
463 my $raw = &$generate_raw_config($conf);
464
465 foreach my $snapname (sort keys %{$conf->{snapshots}}) {
466 $raw .= "\n[$snapname]\n";
467 $raw .= &$generate_raw_config($conf->{snapshots}->{$snapname});
468 }
469
470 return $raw;
471 }
472
473 sub check_type {
474 my ($key, $value) = @_;
475
476 die "unknown setting '$key'\n" if !$confdesc->{$key};
477
478 my $type = $confdesc->{$key}->{type};
479
480 if (!defined($value)) {
481 die "got undefined value\n";
482 }
483
484 if ($value =~ m/[\n\r]/) {
485 die "property contains a line feed\n";
486 }
487
488 if ($type eq 'boolean') {
489 return 1 if ($value eq '1') || ($value =~ m/^(on|yes|true)$/i);
490 return 0 if ($value eq '0') || ($value =~ m/^(off|no|false)$/i);
491 die "type check ('boolean') failed - got '$value'\n";
492 } elsif ($type eq 'integer') {
493 return int($1) if $value =~ m/^(\d+)$/;
494 die "type check ('integer') failed - got '$value'\n";
495 } elsif ($type eq 'number') {
496 return $value if $value =~ m/^(\d+)(\.\d+)?$/;
497 die "type check ('number') failed - got '$value'\n";
498 } elsif ($type eq 'string') {
499 if (my $fmt = $confdesc->{$key}->{format}) {
500 PVE::JSONSchema::check_format($fmt, $value);
501 return $value;
502 }
503 return $value;
504 } else {
505 die "internal error"
506 }
507 }
508
509 sub parse_pct_config {
510 my ($filename, $raw) = @_;
511
512 return undef if !defined($raw);
513
514 my $res = {
515 digest => Digest::SHA::sha1_hex($raw),
516 snapshots => {},
517 };
518
519 $filename =~ m|/lxc/(\d+).conf$|
520 || die "got strange filename '$filename'";
521
522 my $vmid = $1;
523
524 my $conf = $res;
525 my $descr = '';
526 my $section = '';
527
528 my @lines = split(/\n/, $raw);
529 foreach my $line (@lines) {
530 next if $line =~ m/^\s*$/;
531
532 if ($line =~ m/^\[([a-z][a-z0-9_\-]+)\]\s*$/i) {
533 $section = $1;
534 $conf->{description} = $descr if $descr;
535 $descr = '';
536 $conf = $res->{snapshots}->{$section} = {};
537 next;
538 }
539
540 if ($line =~ m/^\#(.*)\s*$/) {
541 $descr .= PVE::Tools::decode_text($1) . "\n";
542 next;
543 }
544
545 if ($line =~ m/^(lxc\.[a-z0-9_\-\.]+)(:|\s*=)\s*(.*?)\s*$/) {
546 my $key = $1;
547 my $value = $3;
548 my $validity = $valid_lxc_conf_keys->{$key} || 0;
549 if ($validity eq 1 || $key =~ m/^lxc\.cgroup\./) {
550 push @{$conf->{lxc}}, [$key, $value];
551 } elsif (my $errmsg = $validity) {
552 warn "vm $vmid - $key: $errmsg\n";
553 } else {
554 warn "vm $vmid - unable to parse config: $line\n";
555 }
556 } elsif ($line =~ m/^(description):\s*(.*\S)\s*$/) {
557 $descr .= PVE::Tools::decode_text($2);
558 } elsif ($line =~ m/snapstate:\s*(prepare|delete)\s*$/) {
559 $conf->{snapstate} = $1;
560 } elsif ($line =~ m/^([a-z][a-z_]*\d*):\s*(\S.*)\s*$/) {
561 my $key = $1;
562 my $value = $2;
563 eval { $value = check_type($key, $value); };
564 warn "vm $vmid - unable to parse value of '$key' - $@" if $@;
565 $conf->{$key} = $value;
566 } else {
567 warn "vm $vmid - unable to parse config: $line\n";
568 }
569 }
570
571 $conf->{description} = $descr if $descr;
572
573 delete $res->{snapstate}; # just to be sure
574
575 return $res;
576 }
577
578 sub config_list {
579 my $vmlist = PVE::Cluster::get_vmlist();
580 my $res = {};
581 return $res if !$vmlist || !$vmlist->{ids};
582 my $ids = $vmlist->{ids};
583
584 foreach my $vmid (keys %$ids) {
585 next if !$vmid; # skip CT0
586 my $d = $ids->{$vmid};
587 next if !$d->{node} || $d->{node} ne $nodename;
588 next if !$d->{type} || $d->{type} ne 'lxc';
589 $res->{$vmid}->{type} = 'lxc';
590 }
591 return $res;
592 }
593
594 sub cfs_config_path {
595 my ($vmid, $node) = @_;
596
597 $node = $nodename if !$node;
598 return "nodes/$node/lxc/$vmid.conf";
599 }
600
601 sub config_file {
602 my ($vmid, $node) = @_;
603
604 my $cfspath = cfs_config_path($vmid, $node);
605 return "/etc/pve/$cfspath";
606 }
607
608 sub load_config {
609 my ($vmid, $node) = @_;
610
611 $node = $nodename if !$node;
612 my $cfspath = cfs_config_path($vmid, $node);
613
614 my $conf = PVE::Cluster::cfs_read_file($cfspath);
615 die "container $vmid does not exist\n" if !defined($conf);
616
617 return $conf;
618 }
619
620 sub create_config {
621 my ($vmid, $conf) = @_;
622
623 my $dir = "/etc/pve/nodes/$nodename/lxc";
624 mkdir $dir;
625
626 write_config($vmid, $conf);
627 }
628
629 sub destroy_config {
630 my ($vmid) = @_;
631
632 unlink config_file($vmid, $nodename);
633 }
634
635 sub write_config {
636 my ($vmid, $conf) = @_;
637
638 my $cfspath = cfs_config_path($vmid);
639
640 PVE::Cluster::cfs_write_file($cfspath, $conf);
641 }
642
643 # flock: we use one file handle per process, so lock file
644 # can be called multiple times and will succeed for the same process.
645
646 my $lock_handles = {};
647 my $lockdir = "/run/lock/lxc";
648
649 sub config_file_lock {
650 my ($vmid) = @_;
651
652 return "$lockdir/pve-config-${vmid}.lock";
653 }
654
655 sub lock_config_full {
656 my ($vmid, $timeout, $code, @param) = @_;
657
658 my $filename = config_file_lock($vmid);
659
660 mkdir $lockdir if !-d $lockdir;
661
662 my $res = lock_file($filename, $timeout, $code, @param);
663
664 die $@ if $@;
665
666 return $res;
667 }
668
669 sub lock_config_mode {
670 my ($vmid, $timeout, $shared, $code, @param) = @_;
671
672 my $filename = config_file_lock($vmid);
673
674 mkdir $lockdir if !-d $lockdir;
675
676 my $res = lock_file_full($filename, $timeout, $shared, $code, @param);
677
678 die $@ if $@;
679
680 return $res;
681 }
682
683 sub lock_config {
684 my ($vmid, $code, @param) = @_;
685
686 return lock_config_full($vmid, 10, $code, @param);
687 }
688
689 sub option_exists {
690 my ($name) = @_;
691
692 return defined($confdesc->{$name});
693 }
694
695 # add JSON properties for create and set function
696 sub json_config_properties {
697 my $prop = shift;
698
699 foreach my $opt (keys %$confdesc) {
700 next if $opt eq 'parent' || $opt eq 'snaptime';
701 next if $prop->{$opt};
702 $prop->{$opt} = $confdesc->{$opt};
703 }
704
705 return $prop;
706 }
707
708 # container status helpers
709
710 sub list_active_containers {
711
712 my $filename = "/proc/net/unix";
713
714 # similar test is used by lcxcontainers.c: list_active_containers
715 my $res = {};
716
717 my $fh = IO::File->new ($filename, "r");
718 return $res if !$fh;
719
720 while (defined(my $line = <$fh>)) {
721 if ($line =~ m/^[a-f0-9]+:\s\S+\s\S+\s\S+\s\S+\s\S+\s\d+\s(\S+)$/) {
722 my $path = $1;
723 if ($path =~ m!^@/var/lib/lxc/(\d+)/command$!) {
724 $res->{$1} = 1;
725 }
726 }
727 }
728
729 close($fh);
730
731 return $res;
732 }
733
734 # warning: this is slow
735 sub check_running {
736 my ($vmid) = @_;
737
738 my $active_hash = list_active_containers();
739
740 return 1 if defined($active_hash->{$vmid});
741
742 return undef;
743 }
744
745 sub get_container_disk_usage {
746 my ($vmid, $pid) = @_;
747
748 return PVE::Tools::df("/proc/$pid/root/", 1);
749 }
750
751 my $last_proc_vmid_stat;
752
753 my $parse_cpuacct_stat = sub {
754 my ($vmid) = @_;
755
756 my $raw = read_cgroup_value('cpuacct', $vmid, 'cpuacct.stat', 1);
757
758 my $stat = {};
759
760 if ($raw =~ m/^user (\d+)\nsystem (\d+)\n/) {
761
762 $stat->{utime} = $1;
763 $stat->{stime} = $2;
764
765 }
766
767 return $stat;
768 };
769
770 sub vmstatus {
771 my ($opt_vmid) = @_;
772
773 my $list = $opt_vmid ? { $opt_vmid => { type => 'lxc' }} : config_list();
774
775 my $active_hash = list_active_containers();
776
777 my $cpucount = $cpuinfo->{cpus} || 1;
778
779 my $cdtime = gettimeofday;
780
781 my $uptime = (PVE::ProcFSTools::read_proc_uptime(1))[0];
782
783 foreach my $vmid (keys %$list) {
784 my $d = $list->{$vmid};
785
786 eval { $d->{pid} = find_lxc_pid($vmid) if defined($active_hash->{$vmid}); };
787 warn $@ if $@; # ignore errors (consider them stopped)
788
789 $d->{status} = $d->{pid} ? 'running' : 'stopped';
790
791 my $cfspath = cfs_config_path($vmid);
792 my $conf = PVE::Cluster::cfs_read_file($cfspath) || {};
793
794 $d->{name} = $conf->{'hostname'} || "CT$vmid";
795 $d->{name} =~ s/[\s]//g;
796
797 $d->{cpus} = $conf->{cpulimit} || $cpucount;
798
799 if ($d->{pid}) {
800 my $res = get_container_disk_usage($vmid, $d->{pid});
801 $d->{disk} = $res->{used};
802 $d->{maxdisk} = $res->{total};
803 } else {
804 $d->{disk} = 0;
805 # use 4GB by default ??
806 if (my $rootfs = $conf->{rootfs}) {
807 my $rootinfo = parse_ct_rootfs($rootfs);
808 $d->{maxdisk} = int(($rootinfo->{size} || 4)*1024*1024)*1024;
809 } else {
810 $d->{maxdisk} = 4*1024*1024*1024;
811 }
812 }
813
814 $d->{mem} = 0;
815 $d->{swap} = 0;
816 $d->{maxmem} = ($conf->{memory}||512)*1024*1024;
817 $d->{maxswap} = ($conf->{swap}//0)*1024*1024;
818
819 $d->{uptime} = 0;
820 $d->{cpu} = 0;
821
822 $d->{netout} = 0;
823 $d->{netin} = 0;
824
825 $d->{diskread} = 0;
826 $d->{diskwrite} = 0;
827
828 $d->{template} = is_template($conf);
829 }
830
831 foreach my $vmid (keys %$list) {
832 my $d = $list->{$vmid};
833 my $pid = $d->{pid};
834
835 next if !$pid; # skip stopped CTs
836
837 my $ctime = (stat("/proc/$pid"))[10]; # 10 = ctime
838 $d->{uptime} = time - $ctime; # the method lxcfs uses
839
840 $d->{mem} = read_cgroup_value('memory', $vmid, 'memory.usage_in_bytes');
841 $d->{swap} = read_cgroup_value('memory', $vmid, 'memory.memsw.usage_in_bytes') - $d->{mem};
842
843 my $blkio_bytes = read_cgroup_value('blkio', $vmid, 'blkio.throttle.io_service_bytes', 1);
844 my @bytes = split(/\n/, $blkio_bytes);
845 foreach my $byte (@bytes) {
846 if (my ($key, $value) = $byte =~ /(Read|Write)\s+(\d+)/) {
847 $d->{diskread} = $2 if $key eq 'Read';
848 $d->{diskwrite} = $2 if $key eq 'Write';
849 }
850 }
851
852 my $pstat = &$parse_cpuacct_stat($vmid);
853
854 my $used = $pstat->{utime} + $pstat->{stime};
855
856 my $old = $last_proc_vmid_stat->{$vmid};
857 if (!$old) {
858 $last_proc_vmid_stat->{$vmid} = {
859 time => $cdtime,
860 used => $used,
861 cpu => 0,
862 };
863 next;
864 }
865
866 my $dtime = ($cdtime - $old->{time}) * $cpucount * $cpuinfo->{user_hz};
867
868 if ($dtime > 1000) {
869 my $dutime = $used - $old->{used};
870
871 $d->{cpu} = (($dutime/$dtime)* $cpucount) / $d->{cpus};
872 $last_proc_vmid_stat->{$vmid} = {
873 time => $cdtime,
874 used => $used,
875 cpu => $d->{cpu},
876 };
877 } else {
878 $d->{cpu} = $old->{cpu};
879 }
880 }
881
882 my $netdev = PVE::ProcFSTools::read_proc_net_dev();
883
884 foreach my $dev (keys %$netdev) {
885 next if $dev !~ m/^veth([1-9]\d*)i/;
886 my $vmid = $1;
887 my $d = $list->{$vmid};
888
889 next if !$d;
890
891 $d->{netout} += $netdev->{$dev}->{receive};
892 $d->{netin} += $netdev->{$dev}->{transmit};
893
894 }
895
896 return $list;
897 }
898
899 sub classify_mountpoint {
900 my ($vol) = @_;
901 if ($vol =~ m!^/!) {
902 return 'device' if $vol =~ m!^/dev/!;
903 return 'bind';
904 }
905 return 'volume';
906 }
907
908 my $parse_ct_mountpoint_full = sub {
909 my ($desc, $data, $noerr) = @_;
910
911 $data //= '';
912
913 my $res;
914 eval { $res = PVE::JSONSchema::parse_property_string($desc, $data) };
915 if ($@) {
916 return undef if $noerr;
917 die $@;
918 }
919
920 if (defined(my $size = $res->{size})) {
921 $size = PVE::JSONSchema::parse_size($size);
922 if (!defined($size)) {
923 return undef if $noerr;
924 die "invalid size: $size\n";
925 }
926 $res->{size} = $size;
927 }
928
929 $res->{type} = classify_mountpoint($res->{volume});
930
931 return $res;
932 };
933
934 sub parse_ct_rootfs {
935 my ($data, $noerr) = @_;
936
937 my $res = &$parse_ct_mountpoint_full($rootfs_desc, $data, $noerr);
938
939 $res->{mp} = '/' if defined($res);
940
941 return $res;
942 }
943
944 sub parse_ct_mountpoint {
945 my ($data, $noerr) = @_;
946
947 return &$parse_ct_mountpoint_full($mp_desc, $data, $noerr);
948 }
949
950 sub print_ct_mountpoint {
951 my ($info, $nomp) = @_;
952 my $skip = [ 'type' ];
953 push @$skip, 'mp' if $nomp;
954 return PVE::JSONSchema::print_property_string($info, $mp_desc, $skip);
955 }
956
957 sub print_lxc_network {
958 my $net = shift;
959 return PVE::JSONSchema::print_property_string($net, $netconf_desc);
960 }
961
962 sub parse_lxc_network {
963 my ($data) = @_;
964
965 my $res = {};
966
967 return $res if !$data;
968
969 $res = PVE::JSONSchema::parse_property_string($netconf_desc, $data);
970
971 $res->{type} = 'veth';
972 $res->{hwaddr} = PVE::Tools::random_ether_addr() if !$res->{hwaddr};
973
974 return $res;
975 }
976
977 sub read_cgroup_value {
978 my ($group, $vmid, $name, $full) = @_;
979
980 my $path = "/sys/fs/cgroup/$group/lxc/$vmid/$name";
981
982 return PVE::Tools::file_get_contents($path) if $full;
983
984 return PVE::Tools::file_read_firstline($path);
985 }
986
987 sub write_cgroup_value {
988 my ($group, $vmid, $name, $value) = @_;
989
990 my $path = "/sys/fs/cgroup/$group/lxc/$vmid/$name";
991 PVE::ProcFSTools::write_proc_entry($path, $value) if -e $path;
992
993 }
994
995 sub find_lxc_console_pids {
996
997 my $res = {};
998
999 PVE::Tools::dir_glob_foreach('/proc', '\d+', sub {
1000 my ($pid) = @_;
1001
1002 my $cmdline = PVE::Tools::file_read_firstline("/proc/$pid/cmdline");
1003 return if !$cmdline;
1004
1005 my @args = split(/\0/, $cmdline);
1006
1007 # search for lxc-console -n <vmid>
1008 return if scalar(@args) != 3;
1009 return if $args[1] ne '-n';
1010 return if $args[2] !~ m/^\d+$/;
1011 return if $args[0] !~ m|^(/usr/bin/)?lxc-console$|;
1012
1013 my $vmid = $args[2];
1014
1015 push @{$res->{$vmid}}, $pid;
1016 });
1017
1018 return $res;
1019 }
1020
1021 sub find_lxc_pid {
1022 my ($vmid) = @_;
1023
1024 my $pid = undef;
1025 my $parser = sub {
1026 my $line = shift;
1027 $pid = $1 if $line =~ m/^PID:\s+(\d+)$/;
1028 };
1029 PVE::Tools::run_command(['lxc-info', '-n', $vmid, '-p'], outfunc => $parser);
1030
1031 die "unable to get PID for CT $vmid (not running?)\n" if !$pid;
1032
1033 return $pid;
1034 }
1035
1036 # Note: we cannot use Net:IP, because that only allows strict
1037 # CIDR networks
1038 sub parse_ipv4_cidr {
1039 my ($cidr, $noerr) = @_;
1040
1041 if ($cidr =~ m!^($IPV4RE)(?:/(\d+))$! && ($2 > 7) && ($2 <= 32)) {
1042 return { address => $1, netmask => $PVE::Network::ipv4_reverse_mask->[$2] };
1043 }
1044
1045 return undef if $noerr;
1046
1047 die "unable to parse ipv4 address/mask\n";
1048 }
1049
1050 sub check_lock {
1051 my ($conf) = @_;
1052
1053 die "VM is locked ($conf->{'lock'})\n" if $conf->{'lock'};
1054 }
1055
1056 sub check_protection {
1057 my ($vm_conf, $err_msg) = @_;
1058
1059 if ($vm_conf->{protection}) {
1060 die "$err_msg - protection mode enabled\n";
1061 }
1062 }
1063
1064 sub update_lxc_config {
1065 my ($storage_cfg, $vmid, $conf) = @_;
1066
1067 my $dir = "/var/lib/lxc/$vmid";
1068
1069 if ($conf->{template}) {
1070
1071 unlink "$dir/config";
1072
1073 return;
1074 }
1075
1076 my $raw = '';
1077
1078 die "missing 'arch' - internal error" if !$conf->{arch};
1079 $raw .= "lxc.arch = $conf->{arch}\n";
1080
1081 my $unprivileged = $conf->{unprivileged};
1082 my $custom_idmap = grep { $_->[0] eq 'lxc.id_map' } @{$conf->{lxc}};
1083
1084 my $ostype = $conf->{ostype} || die "missing 'ostype' - internal error";
1085 if ($ostype =~ /^(?:debian | ubuntu | centos | fedora | opensuse | archlinux | alpine)$/x) {
1086 my $inc ="/usr/share/lxc/config/$ostype.common.conf";
1087 $inc ="/usr/share/lxc/config/common.conf" if !-f $inc;
1088 $raw .= "lxc.include = $inc\n";
1089 if ($unprivileged || $custom_idmap) {
1090 $inc = "/usr/share/lxc/config/$ostype.userns.conf";
1091 $inc = "/usr/share/lxc/config/userns.conf" if !-f $inc;
1092 $raw .= "lxc.include = $inc\n"
1093 }
1094 } else {
1095 die "implement me (ostype $ostype)";
1096 }
1097
1098 # WARNING: DO NOT REMOVE this without making sure that loop device nodes
1099 # cannot be exposed to the container with r/w access (cgroup perms).
1100 # When this is enabled mounts will still remain in the monitor's namespace
1101 # after the container unmounted them and thus will not detach from their
1102 # files while the container is running!
1103 $raw .= "lxc.monitor.unshare = 1\n";
1104
1105 # Should we read them from /etc/subuid?
1106 if ($unprivileged && !$custom_idmap) {
1107 $raw .= "lxc.id_map = u 0 100000 65536\n";
1108 $raw .= "lxc.id_map = g 0 100000 65536\n";
1109 }
1110
1111 if (!has_dev_console($conf)) {
1112 $raw .= "lxc.console = none\n";
1113 $raw .= "lxc.cgroup.devices.deny = c 5:1 rwm\n";
1114 }
1115
1116 my $ttycount = get_tty_count($conf);
1117 $raw .= "lxc.tty = $ttycount\n";
1118
1119 # some init scripts expect a linux terminal (turnkey).
1120 $raw .= "lxc.environment = TERM=linux\n";
1121
1122 my $utsname = $conf->{hostname} || "CT$vmid";
1123 $raw .= "lxc.utsname = $utsname\n";
1124
1125 my $memory = $conf->{memory} || 512;
1126 my $swap = $conf->{swap} // 0;
1127
1128 my $lxcmem = int($memory*1024*1024);
1129 $raw .= "lxc.cgroup.memory.limit_in_bytes = $lxcmem\n";
1130
1131 my $lxcswap = int(($memory + $swap)*1024*1024);
1132 $raw .= "lxc.cgroup.memory.memsw.limit_in_bytes = $lxcswap\n";
1133
1134 if (my $cpulimit = $conf->{cpulimit}) {
1135 $raw .= "lxc.cgroup.cpu.cfs_period_us = 100000\n";
1136 my $value = int(100000*$cpulimit);
1137 $raw .= "lxc.cgroup.cpu.cfs_quota_us = $value\n";
1138 }
1139
1140 my $shares = $conf->{cpuunits} || 1024;
1141 $raw .= "lxc.cgroup.cpu.shares = $shares\n";
1142
1143 my $mountpoint = parse_ct_rootfs($conf->{rootfs});
1144
1145 $raw .= "lxc.rootfs = $dir/rootfs\n";
1146
1147 my $netcount = 0;
1148 foreach my $k (keys %$conf) {
1149 next if $k !~ m/^net(\d+)$/;
1150 my $ind = $1;
1151 my $d = parse_lxc_network($conf->{$k});
1152 $netcount++;
1153 $raw .= "lxc.network.type = veth\n";
1154 $raw .= "lxc.network.veth.pair = veth${vmid}i${ind}\n";
1155 $raw .= "lxc.network.hwaddr = $d->{hwaddr}\n" if defined($d->{hwaddr});
1156 $raw .= "lxc.network.name = $d->{name}\n" if defined($d->{name});
1157 $raw .= "lxc.network.mtu = $d->{mtu}\n" if defined($d->{mtu});
1158 }
1159
1160 if (my $lxcconf = $conf->{lxc}) {
1161 foreach my $entry (@$lxcconf) {
1162 my ($k, $v) = @$entry;
1163 $netcount++ if $k eq 'lxc.network.type';
1164 $raw .= "$k = $v\n";
1165 }
1166 }
1167
1168 $raw .= "lxc.network.type = empty\n" if !$netcount;
1169
1170 File::Path::mkpath("$dir/rootfs");
1171
1172 PVE::Tools::file_set_contents("$dir/config", $raw);
1173 }
1174
1175 # verify and cleanup nameserver list (replace \0 with ' ')
1176 sub verify_nameserver_list {
1177 my ($nameserver_list) = @_;
1178
1179 my @list = ();
1180 foreach my $server (PVE::Tools::split_list($nameserver_list)) {
1181 PVE::JSONSchema::pve_verify_ip($server);
1182 push @list, $server;
1183 }
1184
1185 return join(' ', @list);
1186 }
1187
1188 sub verify_searchdomain_list {
1189 my ($searchdomain_list) = @_;
1190
1191 my @list = ();
1192 foreach my $server (PVE::Tools::split_list($searchdomain_list)) {
1193 # todo: should we add checks for valid dns domains?
1194 push @list, $server;
1195 }
1196
1197 return join(' ', @list);
1198 }
1199
1200 sub is_volume_in_use {
1201 my ($config, $volid) = @_;
1202 my $used = 0;
1203
1204 foreach_mountpoint($config, sub {
1205 my ($ms, $mountpoint) = @_;
1206 return if $used;
1207 if ($mountpoint->{type} eq 'volume' && $mountpoint->{volume} eq $volid) {
1208 $used = 1;
1209 }
1210 });
1211
1212 return $used;
1213 }
1214
1215 sub add_unused_volume {
1216 my ($config, $volid) = @_;
1217
1218 my $key;
1219 for (my $ind = $MAX_UNUSED_DISKS - 1; $ind >= 0; $ind--) {
1220 my $test = "unused$ind";
1221 if (my $vid = $config->{$test}) {
1222 return if $vid eq $volid; # do not add duplicates
1223 } else {
1224 $key = $test;
1225 }
1226 }
1227
1228 die "Too many unused volumes - please delete them first.\n" if !$key;
1229
1230 $config->{$key} = $volid;
1231
1232 return $key;
1233 }
1234
1235 sub update_pct_config {
1236 my ($vmid, $conf, $running, $param, $delete) = @_;
1237
1238 my @nohotplug;
1239
1240 my $new_disks = 0;
1241 my @deleted_volumes;
1242
1243 my $rootdir;
1244 if ($running) {
1245 my $pid = find_lxc_pid($vmid);
1246 $rootdir = "/proc/$pid/root";
1247 }
1248
1249 my $hotplug_error = sub {
1250 if ($running) {
1251 push @nohotplug, @_;
1252 return 1;
1253 } else {
1254 return 0;
1255 }
1256 };
1257
1258 if (defined($delete)) {
1259 foreach my $opt (@$delete) {
1260 if (!exists($conf->{$opt})) {
1261 warn "no such option: $opt\n";
1262 next;
1263 }
1264
1265 if ($opt eq 'hostname' || $opt eq 'memory' || $opt eq 'rootfs') {
1266 die "unable to delete required option '$opt'\n";
1267 } elsif ($opt eq 'swap') {
1268 delete $conf->{$opt};
1269 write_cgroup_value("memory", $vmid, "memory.memsw.limit_in_bytes", -1);
1270 } elsif ($opt eq 'description' || $opt eq 'onboot' || $opt eq 'startup') {
1271 delete $conf->{$opt};
1272 } elsif ($opt eq 'nameserver' || $opt eq 'searchdomain' ||
1273 $opt eq 'tty' || $opt eq 'console' || $opt eq 'cmode') {
1274 next if $hotplug_error->($opt);
1275 delete $conf->{$opt};
1276 } elsif ($opt =~ m/^net(\d)$/) {
1277 delete $conf->{$opt};
1278 next if !$running;
1279 my $netid = $1;
1280 PVE::Network::veth_delete("veth${vmid}i$netid");
1281 } elsif ($opt eq 'protection') {
1282 delete $conf->{$opt};
1283 } elsif ($opt =~ m/^unused(\d+)$/) {
1284 next if $hotplug_error->($opt);
1285 check_protection($conf, "can't remove CT $vmid drive '$opt'");
1286 push @deleted_volumes, $conf->{$opt};
1287 delete $conf->{$opt};
1288 } elsif ($opt =~ m/^mp(\d+)$/) {
1289 next if $hotplug_error->($opt);
1290 check_protection($conf, "can't remove CT $vmid drive '$opt'");
1291 my $mp = parse_ct_mountpoint($conf->{$opt});
1292 delete $conf->{$opt};
1293 if ($mp->{type} eq 'volume' && !is_volume_in_use($conf, $mp->{volume})) {
1294 add_unused_volume($conf, $mp->{volume});
1295 }
1296 } elsif ($opt eq 'unprivileged') {
1297 die "unable to delete read-only option: '$opt'\n";
1298 } else {
1299 die "implement me (delete: $opt)"
1300 }
1301 write_config($vmid, $conf) if $running;
1302 }
1303 }
1304
1305 # There's no separate swap size to configure, there's memory and "total"
1306 # memory (iow. memory+swap). This means we have to change them together.
1307 my $wanted_memory = PVE::Tools::extract_param($param, 'memory');
1308 my $wanted_swap = PVE::Tools::extract_param($param, 'swap');
1309 if (defined($wanted_memory) || defined($wanted_swap)) {
1310
1311 my $old_memory = ($conf->{memory} || 512);
1312 my $old_swap = ($conf->{swap} || 0);
1313
1314 $wanted_memory //= $old_memory;
1315 $wanted_swap //= $old_swap;
1316
1317 my $total = $wanted_memory + $wanted_swap;
1318 if ($running) {
1319 my $old_total = $old_memory + $old_swap;
1320 if ($total > $old_total) {
1321 write_cgroup_value("memory", $vmid, "memory.memsw.limit_in_bytes", int($total*1024*1024));
1322 write_cgroup_value("memory", $vmid, "memory.limit_in_bytes", int($wanted_memory*1024*1024));
1323 } else {
1324 write_cgroup_value("memory", $vmid, "memory.limit_in_bytes", int($wanted_memory*1024*1024));
1325 write_cgroup_value("memory", $vmid, "memory.memsw.limit_in_bytes", int($total*1024*1024));
1326 }
1327 }
1328 $conf->{memory} = $wanted_memory;
1329 $conf->{swap} = $wanted_swap;
1330
1331 write_config($vmid, $conf) if $running;
1332 }
1333
1334 foreach my $opt (keys %$param) {
1335 my $value = $param->{$opt};
1336 if ($opt eq 'hostname') {
1337 $conf->{$opt} = $value;
1338 } elsif ($opt eq 'onboot') {
1339 $conf->{$opt} = $value ? 1 : 0;
1340 } elsif ($opt eq 'startup') {
1341 $conf->{$opt} = $value;
1342 } elsif ($opt eq 'tty' || $opt eq 'console' || $opt eq 'cmode') {
1343 next if $hotplug_error->($opt);
1344 $conf->{$opt} = $value;
1345 } elsif ($opt eq 'nameserver') {
1346 next if $hotplug_error->($opt);
1347 my $list = verify_nameserver_list($value);
1348 $conf->{$opt} = $list;
1349 } elsif ($opt eq 'searchdomain') {
1350 next if $hotplug_error->($opt);
1351 my $list = verify_searchdomain_list($value);
1352 $conf->{$opt} = $list;
1353 } elsif ($opt eq 'cpulimit') {
1354 next if $hotplug_error->($opt); # FIXME: hotplug
1355 $conf->{$opt} = $value;
1356 } elsif ($opt eq 'cpuunits') {
1357 $conf->{$opt} = $value;
1358 write_cgroup_value("cpu", $vmid, "cpu.shares", $value);
1359 } elsif ($opt eq 'description') {
1360 $conf->{$opt} = PVE::Tools::encode_text($value);
1361 } elsif ($opt =~ m/^net(\d+)$/) {
1362 my $netid = $1;
1363 my $net = parse_lxc_network($value);
1364 if (!$running) {
1365 $conf->{$opt} = print_lxc_network($net);
1366 } else {
1367 update_net($vmid, $conf, $opt, $net, $netid, $rootdir);
1368 }
1369 } elsif ($opt eq 'protection') {
1370 $conf->{$opt} = $value ? 1 : 0;
1371 } elsif ($opt =~ m/^mp(\d+)$/) {
1372 next if $hotplug_error->($opt);
1373 check_protection($conf, "can't update CT $vmid drive '$opt'");
1374 my $old = $conf->{$opt};
1375 $conf->{$opt} = $value;
1376 if (defined($old)) {
1377 my $mp = parse_ct_mountpoint($old);
1378 if ($mp->{type} eq 'volume' && !is_volume_in_use($conf, $mp->{volume})) {
1379 add_unused_volume($conf, $mp->{volume});
1380 }
1381 }
1382 $new_disks = 1;
1383 } elsif ($opt eq 'rootfs') {
1384 next if $hotplug_error->($opt);
1385 check_protection($conf, "can't update CT $vmid drive '$opt'");
1386 my $old = $conf->{$opt};
1387 $conf->{$opt} = $value;
1388 if (defined($old)) {
1389 my $mp = parse_ct_rootfs($old);
1390 if ($mp->{type} eq 'volume' && !is_volume_in_use($conf, $mp->{volume})) {
1391 add_unused_volume($conf, $mp->{volume});
1392 }
1393 }
1394 } elsif ($opt eq 'unprivileged') {
1395 die "unable to modify read-only option: '$opt'\n";
1396 } else {
1397 die "implement me: $opt";
1398 }
1399 write_config($vmid, $conf) if $running;
1400 }
1401
1402 if (@deleted_volumes) {
1403 my $storage_cfg = PVE::Storage::config();
1404 foreach my $volume (@deleted_volumes) {
1405 delete_mountpoint_volume($storage_cfg, $vmid, $volume);
1406 }
1407 }
1408
1409 if ($new_disks) {
1410 my $storage_cfg = PVE::Storage::config();
1411 create_disks($storage_cfg, $vmid, $conf, $conf);
1412 }
1413
1414 # This should be the last thing we do here
1415 if ($running && scalar(@nohotplug)) {
1416 die "unable to modify " . join(',', @nohotplug) . " while container is running\n";
1417 }
1418 }
1419
1420 sub has_dev_console {
1421 my ($conf) = @_;
1422
1423 return !(defined($conf->{console}) && !$conf->{console});
1424 }
1425
1426 sub get_tty_count {
1427 my ($conf) = @_;
1428
1429 return $conf->{tty} // $confdesc->{tty}->{default};
1430 }
1431
1432 sub get_cmode {
1433 my ($conf) = @_;
1434
1435 return $conf->{cmode} // $confdesc->{cmode}->{default};
1436 }
1437
1438 sub get_console_command {
1439 my ($vmid, $conf) = @_;
1440
1441 my $cmode = get_cmode($conf);
1442
1443 if ($cmode eq 'console') {
1444 return ['lxc-console', '-n', $vmid, '-t', 0];
1445 } elsif ($cmode eq 'tty') {
1446 return ['lxc-console', '-n', $vmid];
1447 } elsif ($cmode eq 'shell') {
1448 return ['lxc-attach', '--clear-env', '-n', $vmid];
1449 } else {
1450 die "internal error";
1451 }
1452 }
1453
1454 sub get_primary_ips {
1455 my ($conf) = @_;
1456
1457 # return data from net0
1458
1459 return undef if !defined($conf->{net0});
1460 my $net = parse_lxc_network($conf->{net0});
1461
1462 my $ipv4 = $net->{ip};
1463 if ($ipv4) {
1464 if ($ipv4 =~ /^(dhcp|manual)$/) {
1465 $ipv4 = undef
1466 } else {
1467 $ipv4 =~ s!/\d+$!!;
1468 }
1469 }
1470 my $ipv6 = $net->{ip6};
1471 if ($ipv6) {
1472 if ($ipv6 =~ /^(auto|dhcp|manual)$/) {
1473 $ipv6 = undef;
1474 } else {
1475 $ipv6 =~ s!/\d+$!!;
1476 }
1477 }
1478
1479 return ($ipv4, $ipv6);
1480 }
1481
1482 sub delete_mountpoint_volume {
1483 my ($storage_cfg, $vmid, $volume) = @_;
1484
1485 return if classify_mountpoint($volume) ne 'volume';
1486
1487 my ($vtype, $name, $owner) = PVE::Storage::parse_volname($storage_cfg, $volume);
1488 PVE::Storage::vdisk_free($storage_cfg, $volume) if $vmid == $owner;
1489 }
1490
1491 sub destroy_lxc_container {
1492 my ($storage_cfg, $vmid, $conf) = @_;
1493
1494 foreach_mountpoint($conf, sub {
1495 my ($ms, $mountpoint) = @_;
1496 delete_mountpoint_volume($storage_cfg, $vmid, $mountpoint->{volume});
1497 });
1498
1499 rmdir "/var/lib/lxc/$vmid/rootfs";
1500 unlink "/var/lib/lxc/$vmid/config";
1501 rmdir "/var/lib/lxc/$vmid";
1502 destroy_config($vmid);
1503
1504 #my $cmd = ['lxc-destroy', '-n', $vmid ];
1505 #PVE::Tools::run_command($cmd);
1506 }
1507
1508 sub vm_stop_cleanup {
1509 my ($storage_cfg, $vmid, $conf, $keepActive) = @_;
1510
1511 eval {
1512 if (!$keepActive) {
1513
1514 my $vollist = get_vm_volumes($conf);
1515 PVE::Storage::deactivate_volumes($storage_cfg, $vollist);
1516 }
1517 };
1518 warn $@ if $@; # avoid errors - just warn
1519 }
1520
1521 my $safe_num_ne = sub {
1522 my ($a, $b) = @_;
1523
1524 return 0 if !defined($a) && !defined($b);
1525 return 1 if !defined($a);
1526 return 1 if !defined($b);
1527
1528 return $a != $b;
1529 };
1530
1531 my $safe_string_ne = sub {
1532 my ($a, $b) = @_;
1533
1534 return 0 if !defined($a) && !defined($b);
1535 return 1 if !defined($a);
1536 return 1 if !defined($b);
1537
1538 return $a ne $b;
1539 };
1540
1541 sub update_net {
1542 my ($vmid, $conf, $opt, $newnet, $netid, $rootdir) = @_;
1543
1544 if ($newnet->{type} ne 'veth') {
1545 # for when there are physical interfaces
1546 die "cannot update interface of type $newnet->{type}";
1547 }
1548
1549 my $veth = "veth${vmid}i${netid}";
1550 my $eth = $newnet->{name};
1551
1552 if (my $oldnetcfg = $conf->{$opt}) {
1553 my $oldnet = parse_lxc_network($oldnetcfg);
1554
1555 if (&$safe_string_ne($oldnet->{hwaddr}, $newnet->{hwaddr}) ||
1556 &$safe_string_ne($oldnet->{name}, $newnet->{name})) {
1557
1558 PVE::Network::veth_delete($veth);
1559 delete $conf->{$opt};
1560 write_config($vmid, $conf);
1561
1562 hotplug_net($vmid, $conf, $opt, $newnet, $netid);
1563
1564 } elsif (&$safe_string_ne($oldnet->{bridge}, $newnet->{bridge}) ||
1565 &$safe_num_ne($oldnet->{tag}, $newnet->{tag}) ||
1566 &$safe_num_ne($oldnet->{firewall}, $newnet->{firewall})) {
1567
1568 if ($oldnet->{bridge}) {
1569 PVE::Network::tap_unplug($veth);
1570 foreach (qw(bridge tag firewall)) {
1571 delete $oldnet->{$_};
1572 }
1573 $conf->{$opt} = print_lxc_network($oldnet);
1574 write_config($vmid, $conf);
1575 }
1576
1577 PVE::Network::tap_plug($veth, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks});
1578 foreach (qw(bridge tag firewall)) {
1579 $oldnet->{$_} = $newnet->{$_} if $newnet->{$_};
1580 }
1581 $conf->{$opt} = print_lxc_network($oldnet);
1582 write_config($vmid, $conf);
1583 }
1584 } else {
1585 hotplug_net($vmid, $conf, $opt, $newnet, $netid);
1586 }
1587
1588 update_ipconfig($vmid, $conf, $opt, $eth, $newnet, $rootdir);
1589 }
1590
1591 sub hotplug_net {
1592 my ($vmid, $conf, $opt, $newnet, $netid) = @_;
1593
1594 my $veth = "veth${vmid}i${netid}";
1595 my $vethpeer = $veth . "p";
1596 my $eth = $newnet->{name};
1597
1598 PVE::Network::veth_create($veth, $vethpeer, $newnet->{bridge}, $newnet->{hwaddr});
1599 PVE::Network::tap_plug($veth, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks});
1600
1601 # attach peer in container
1602 my $cmd = ['lxc-device', '-n', $vmid, 'add', $vethpeer, "$eth" ];
1603 PVE::Tools::run_command($cmd);
1604
1605 # link up peer in container
1606 $cmd = ['lxc-attach', '-n', $vmid, '-s', 'NETWORK', '--', '/sbin/ip', 'link', 'set', $eth ,'up' ];
1607 PVE::Tools::run_command($cmd);
1608
1609 my $done = { type => 'veth' };
1610 foreach (qw(bridge tag firewall hwaddr name)) {
1611 $done->{$_} = $newnet->{$_} if $newnet->{$_};
1612 }
1613 $conf->{$opt} = print_lxc_network($done);
1614
1615 write_config($vmid, $conf);
1616 }
1617
1618 sub update_ipconfig {
1619 my ($vmid, $conf, $opt, $eth, $newnet, $rootdir) = @_;
1620
1621 my $lxc_setup = PVE::LXC::Setup->new($conf, $rootdir);
1622
1623 my $optdata = parse_lxc_network($conf->{$opt});
1624 my $deleted = [];
1625 my $added = [];
1626 my $nscmd = sub {
1627 my $cmdargs = shift;
1628 PVE::Tools::run_command(['lxc-attach', '-n', $vmid, '-s', 'NETWORK', '--', @_], %$cmdargs);
1629 };
1630 my $ipcmd = sub { &$nscmd({}, '/sbin/ip', @_) };
1631
1632 my $change_ip_config = sub {
1633 my ($ipversion) = @_;
1634
1635 my $family_opt = "-$ipversion";
1636 my $suffix = $ipversion == 4 ? '' : $ipversion;
1637 my $gw= "gw$suffix";
1638 my $ip= "ip$suffix";
1639
1640 my $newip = $newnet->{$ip};
1641 my $newgw = $newnet->{$gw};
1642 my $oldip = $optdata->{$ip};
1643
1644 my $change_ip = &$safe_string_ne($oldip, $newip);
1645 my $change_gw = &$safe_string_ne($optdata->{$gw}, $newgw);
1646
1647 return if !$change_ip && !$change_gw;
1648
1649 # step 1: add new IP, if this fails we cancel
1650 my $is_real_ip = ($newip && $newip !~ /^(?:auto|dhcp|manual)$/);
1651 if ($change_ip && $is_real_ip) {
1652 eval { &$ipcmd($family_opt, 'addr', 'add', $newip, 'dev', $eth); };
1653 if (my $err = $@) {
1654 warn $err;
1655 return;
1656 }
1657 }
1658
1659 # step 2: replace gateway
1660 # If this fails we delete the added IP and cancel.
1661 # If it succeeds we save the config and delete the old IP, ignoring
1662 # errors. The config is then saved.
1663 # Note: 'ip route replace' can add
1664 if ($change_gw) {
1665 if ($newgw) {
1666 eval {
1667 if ($is_real_ip && !PVE::Network::is_ip_in_cidr($newgw, $newip, $ipversion)) {
1668 &$ipcmd($family_opt, 'route', 'add', $newgw, 'dev', $eth);
1669 }
1670 &$ipcmd($family_opt, 'route', 'replace', 'default', 'via', $newgw);
1671 };
1672 if (my $err = $@) {
1673 warn $err;
1674 # the route was not replaced, the old IP is still available
1675 # rollback (delete new IP) and cancel
1676 if ($change_ip) {
1677 eval { &$ipcmd($family_opt, 'addr', 'del', $newip, 'dev', $eth); };
1678 warn $@ if $@; # no need to die here
1679 }
1680 return;
1681 }
1682 } else {
1683 eval { &$ipcmd($family_opt, 'route', 'del', 'default'); };
1684 # if the route was not deleted, the guest might have deleted it manually
1685 # warn and continue
1686 warn $@ if $@;
1687 }
1688 }
1689
1690 # from this point on we save the configuration
1691 # step 3: delete old IP ignoring errors
1692 if ($change_ip && $oldip && $oldip !~ /^(?:auto|dhcp)$/) {
1693 # We need to enable promote_secondaries, otherwise our newly added
1694 # address will be removed along with the old one.
1695 my $promote = 0;
1696 eval {
1697 if ($ipversion == 4) {
1698 &$nscmd({ outfunc => sub { $promote = int(shift) } },
1699 'cat', "/proc/sys/net/ipv4/conf/$eth/promote_secondaries");
1700 &$nscmd({}, 'sysctl', "net.ipv4.conf.$eth.promote_secondaries=1");
1701 }
1702 &$ipcmd($family_opt, 'addr', 'del', $oldip, 'dev', $eth);
1703 };
1704 warn $@ if $@; # no need to die here
1705
1706 if ($ipversion == 4) {
1707 &$nscmd({}, 'sysctl', "net.ipv4.conf.$eth.promote_secondaries=$promote");
1708 }
1709 }
1710
1711 foreach my $property ($ip, $gw) {
1712 if ($newnet->{$property}) {
1713 $optdata->{$property} = $newnet->{$property};
1714 } else {
1715 delete $optdata->{$property};
1716 }
1717 }
1718 $conf->{$opt} = print_lxc_network($optdata);
1719 write_config($vmid, $conf);
1720 $lxc_setup->setup_network($conf);
1721 };
1722
1723 &$change_ip_config(4);
1724 &$change_ip_config(6);
1725
1726 }
1727
1728 # Internal snapshots
1729
1730 # NOTE: Snapshot create/delete involves several non-atomic
1731 # actions, and can take a long time.
1732 # So we try to avoid locking the file and use the 'lock' variable
1733 # inside the config file instead.
1734
1735 my $snapshot_copy_config = sub {
1736 my ($source, $dest) = @_;
1737
1738 foreach my $k (keys %$source) {
1739 next if $k eq 'snapshots';
1740 next if $k eq 'snapstate';
1741 next if $k eq 'snaptime';
1742 next if $k eq 'vmstate';
1743 next if $k eq 'lock';
1744 next if $k eq 'digest';
1745 next if $k eq 'description';
1746
1747 $dest->{$k} = $source->{$k};
1748 }
1749 };
1750
1751 sub snapshot_prepare {
1752 my ($vmid, $snapname, $save_vmstate, $comment) = @_;
1753
1754 my $snap;
1755
1756 my $updatefn = sub {
1757
1758 my $conf = load_config($vmid);
1759
1760 die "you can't take a snapshot if it's a template\n"
1761 if is_template($conf);
1762
1763 check_lock($conf);
1764
1765 $conf->{lock} = 'snapshot';
1766
1767 die "snapshot name '$snapname' already used\n"
1768 if defined($conf->{snapshots}->{$snapname});
1769
1770 my $storecfg = PVE::Storage::config();
1771 my $feature = $snapname eq 'vzdump' ? 'vzdump' : 'snapshot';
1772 die "snapshot feature is not available\n" if !has_feature($feature, $conf, $storecfg);
1773
1774 $snap = $conf->{snapshots}->{$snapname} = {};
1775
1776 &$snapshot_copy_config($conf, $snap);
1777
1778 $snap->{'snapstate'} = "prepare";
1779 $snap->{'snaptime'} = time();
1780 $snap->{'description'} = $comment if $comment;
1781 $conf->{snapshots}->{$snapname} = $snap;
1782
1783 write_config($vmid, $conf);
1784 };
1785
1786 lock_config($vmid, $updatefn);
1787
1788 return $snap;
1789 }
1790
1791 sub snapshot_commit {
1792 my ($vmid, $snapname) = @_;
1793
1794 my $updatefn = sub {
1795
1796 my $conf = load_config($vmid);
1797
1798 die "missing snapshot lock\n"
1799 if !($conf->{lock} && $conf->{lock} eq 'snapshot');
1800
1801 die "snapshot '$snapname' does not exist\n"
1802 if !defined($conf->{snapshots}->{$snapname});
1803
1804 die "wrong snapshot state\n"
1805 if !($conf->{snapshots}->{$snapname}->{'snapstate'} &&
1806 $conf->{snapshots}->{$snapname}->{'snapstate'} eq "prepare");
1807
1808 delete $conf->{snapshots}->{$snapname}->{'snapstate'};
1809 delete $conf->{lock};
1810 $conf->{parent} = $snapname;
1811
1812 write_config($vmid, $conf);
1813 };
1814
1815 lock_config($vmid, $updatefn);
1816 }
1817
1818 sub has_feature {
1819 my ($feature, $conf, $storecfg, $snapname) = @_;
1820
1821 my $err;
1822 my $vzdump = $feature eq 'vzdump';
1823 $feature = 'snapshot' if $vzdump;
1824
1825 foreach_mountpoint($conf, sub {
1826 my ($ms, $mountpoint) = @_;
1827
1828 return if $err; # skip further test
1829 return if $vzdump && $ms ne 'rootfs' && !$mountpoint->{backup};
1830
1831 $err = 1 if !PVE::Storage::volume_has_feature($storecfg, $feature, $mountpoint->{volume}, $snapname);
1832
1833 # TODO: implement support for mountpoints
1834 die "unable to handle mountpoint '$ms' - feature not implemented\n"
1835 if $ms ne 'rootfs';
1836 });
1837
1838 return $err ? 0 : 1;
1839 }
1840
1841 my $enter_namespace = sub {
1842 my ($vmid, $pid, $which, $type) = @_;
1843 sysopen my $fd, "/proc/$pid/ns/$which", O_RDONLY
1844 or die "failed to open $which namespace of container $vmid: $!\n";
1845 PVE::Tools::setns(fileno($fd), $type)
1846 or die "failed to enter $which namespace of container $vmid: $!\n";
1847 close $fd;
1848 };
1849
1850 my $do_syncfs = sub {
1851 my ($vmid, $pid, $socket) = @_;
1852
1853 &$enter_namespace($vmid, $pid, 'mnt', PVE::Tools::CLONE_NEWNS);
1854
1855 # Tell the parent process to start reading our /proc/mounts
1856 print {$socket} "go\n";
1857 $socket->flush();
1858
1859 # Receive /proc/self/mounts
1860 my $mountdata = do { local $/ = undef; <$socket> };
1861 close $socket;
1862
1863 # Now sync all mountpoints...
1864 my $mounts = PVE::ProcFSTools::parse_mounts($mountdata);
1865 foreach my $mp (@$mounts) {
1866 my ($what, $dir, $fs) = @$mp;
1867 next if $fs eq 'fuse.lxcfs';
1868 eval { PVE::Tools::sync_mountpoint($dir); };
1869 warn $@ if $@;
1870 }
1871 };
1872
1873 sub sync_container_namespace {
1874 my ($vmid) = @_;
1875 my $pid = find_lxc_pid($vmid);
1876
1877 # SOCK_DGRAM is nicer for barriers but cannot be slurped
1878 socketpair my $pfd, my $cfd, AF_UNIX, SOCK_STREAM, PF_UNSPEC
1879 or die "failed to create socketpair: $!\n";
1880
1881 my $child = fork();
1882 die "fork failed: $!\n" if !defined($child);
1883
1884 if (!$child) {
1885 eval {
1886 close $pfd;
1887 &$do_syncfs($vmid, $pid, $cfd);
1888 };
1889 if (my $err = $@) {
1890 warn $err;
1891 POSIX::_exit(1);
1892 }
1893 POSIX::_exit(0);
1894 }
1895 close $cfd;
1896 my $go = <$pfd>;
1897 die "failed to enter container namespace\n" if $go ne "go\n";
1898
1899 open my $mounts, '<', "/proc/$child/mounts"
1900 or die "failed to open container's /proc/mounts: $!\n";
1901 my $mountdata = do { local $/ = undef; <$mounts> };
1902 close $mounts;
1903 print {$pfd} $mountdata;
1904 close $pfd;
1905
1906 while (waitpid($child, 0) != $child) {}
1907 die "failed to sync container namespace\n" if $? != 0;
1908 }
1909
1910 sub snapshot_create {
1911 my ($vmid, $snapname, $save_vmstate, $comment) = @_;
1912
1913 my $snap = snapshot_prepare($vmid, $snapname, $save_vmstate, $comment);
1914
1915 my $conf = load_config($vmid);
1916
1917 my $running = check_running($vmid);
1918
1919 my $unfreeze = 0;
1920
1921 my $drivehash = {};
1922
1923 eval {
1924 if ($running) {
1925 $unfreeze = 1;
1926 PVE::Tools::run_command(['/usr/bin/lxc-freeze', '-n', $vmid]);
1927 sync_container_namespace($vmid);
1928 };
1929
1930 my $storecfg = PVE::Storage::config();
1931 my $rootinfo = parse_ct_rootfs($conf->{rootfs});
1932 my $volid = $rootinfo->{volume};
1933
1934 PVE::Storage::volume_snapshot($storecfg, $volid, $snapname);
1935 $drivehash->{rootfs} = 1;
1936 };
1937 my $err = $@;
1938
1939 if ($unfreeze) {
1940 eval { PVE::Tools::run_command(['/usr/bin/lxc-unfreeze', '-n', $vmid]); };
1941 warn $@ if $@;
1942 }
1943
1944 if ($err) {
1945 eval { snapshot_delete($vmid, $snapname, 1, $drivehash); };
1946 warn "$@\n" if $@;
1947 die "$err\n";
1948 }
1949
1950 snapshot_commit($vmid, $snapname);
1951 }
1952
1953 # Note: $drivehash is only set when called from snapshot_create.
1954 sub snapshot_delete {
1955 my ($vmid, $snapname, $force, $drivehash) = @_;
1956
1957 my $snap;
1958
1959 my $conf;
1960
1961 my $updatefn = sub {
1962
1963 $conf = load_config($vmid);
1964
1965 die "you can't delete a snapshot if vm is a template\n"
1966 if is_template($conf);
1967
1968 $snap = $conf->{snapshots}->{$snapname};
1969
1970 if (!$drivehash) {
1971 check_lock($conf);
1972 }
1973
1974 die "snapshot '$snapname' does not exist\n" if !defined($snap);
1975
1976 $snap->{snapstate} = 'delete';
1977
1978 write_config($vmid, $conf);
1979 };
1980
1981 lock_config($vmid, $updatefn);
1982
1983 my $storecfg = PVE::Storage::config();
1984
1985 my $unlink_parent = sub {
1986
1987 my ($confref, $new_parent) = @_;
1988
1989 if ($confref->{parent} && $confref->{parent} eq $snapname) {
1990 if ($new_parent) {
1991 $confref->{parent} = $new_parent;
1992 } else {
1993 delete $confref->{parent};
1994 }
1995 }
1996 };
1997
1998 my $del_snap = sub {
1999
2000 $conf = load_config($vmid);
2001
2002 if ($drivehash) {
2003 delete $conf->{lock};
2004 } else {
2005 check_lock($conf);
2006 }
2007
2008 my $parent = $conf->{snapshots}->{$snapname}->{parent};
2009 foreach my $snapkey (keys %{$conf->{snapshots}}) {
2010 &$unlink_parent($conf->{snapshots}->{$snapkey}, $parent);
2011 }
2012
2013 &$unlink_parent($conf, $parent);
2014
2015 delete $conf->{snapshots}->{$snapname};
2016
2017 write_config($vmid, $conf);
2018 };
2019
2020 my $rootfs = $conf->{snapshots}->{$snapname}->{rootfs};
2021 my $rootinfo = parse_ct_rootfs($rootfs);
2022 my $volid = $rootinfo->{volume};
2023
2024 eval {
2025 PVE::Storage::volume_snapshot_delete($storecfg, $volid, $snapname);
2026 };
2027 my $err = $@;
2028
2029 if(!$err || ($err && $force)) {
2030 lock_config($vmid, $del_snap);
2031 if ($err) {
2032 die "Can't delete snapshot: $vmid $snapname $err\n";
2033 }
2034 }
2035 }
2036
2037 sub snapshot_rollback {
2038 my ($vmid, $snapname) = @_;
2039
2040 my $storecfg = PVE::Storage::config();
2041
2042 my $conf = load_config($vmid);
2043
2044 die "you can't rollback if vm is a template\n" if is_template($conf);
2045
2046 my $snap = $conf->{snapshots}->{$snapname};
2047
2048 die "snapshot '$snapname' does not exist\n" if !defined($snap);
2049
2050 my $rootfs = $snap->{rootfs};
2051 my $rootinfo = parse_ct_rootfs($rootfs);
2052 my $volid = $rootinfo->{volume};
2053
2054 PVE::Storage::volume_rollback_is_possible($storecfg, $volid, $snapname);
2055
2056 my $updatefn = sub {
2057
2058 die "unable to rollback to incomplete snapshot (snapstate = $snap->{snapstate})\n"
2059 if $snap->{snapstate};
2060
2061 check_lock($conf);
2062
2063 system("lxc-stop -n $vmid --kill") if check_running($vmid);
2064
2065 die "unable to rollback vm $vmid: vm is running\n"
2066 if check_running($vmid);
2067
2068 $conf->{lock} = 'rollback';
2069
2070 my $forcemachine;
2071
2072 # copy snapshot config to current config
2073
2074 my $tmp_conf = $conf;
2075 &$snapshot_copy_config($tmp_conf->{snapshots}->{$snapname}, $conf);
2076 $conf->{snapshots} = $tmp_conf->{snapshots};
2077 delete $conf->{snaptime};
2078 delete $conf->{snapname};
2079 $conf->{parent} = $snapname;
2080
2081 write_config($vmid, $conf);
2082 };
2083
2084 my $unlockfn = sub {
2085 delete $conf->{lock};
2086 write_config($vmid, $conf);
2087 };
2088
2089 lock_config($vmid, $updatefn);
2090
2091 PVE::Storage::volume_snapshot_rollback($storecfg, $volid, $snapname);
2092
2093 lock_config($vmid, $unlockfn);
2094 }
2095
2096 sub template_create {
2097 my ($vmid, $conf) = @_;
2098
2099 my $storecfg = PVE::Storage::config();
2100
2101 my $rootinfo = parse_ct_rootfs($conf->{rootfs});
2102 my $volid = $rootinfo->{volume};
2103
2104 die "Template feature is not available for '$volid'\n"
2105 if !PVE::Storage::volume_has_feature($storecfg, 'template', $volid);
2106
2107 PVE::Storage::activate_volumes($storecfg, [$volid]);
2108
2109 my $template_volid = PVE::Storage::vdisk_create_base($storecfg, $volid);
2110 $rootinfo->{volume} = $template_volid;
2111 $conf->{rootfs} = print_ct_mountpoint($rootinfo, 1);
2112
2113 write_config($vmid, $conf);
2114 }
2115
2116 sub is_template {
2117 my ($conf) = @_;
2118
2119 return 1 if defined $conf->{template} && $conf->{template} == 1;
2120 }
2121
2122 sub mountpoint_names {
2123 my ($reverse) = @_;
2124
2125 my @names = ('rootfs');
2126
2127 for (my $i = 0; $i < $MAX_MOUNT_POINTS; $i++) {
2128 push @names, "mp$i";
2129 }
2130
2131 return $reverse ? reverse @names : @names;
2132 }
2133
2134
2135 sub foreach_mountpoint_full {
2136 my ($conf, $reverse, $func) = @_;
2137
2138 foreach my $key (mountpoint_names($reverse)) {
2139 my $value = $conf->{$key};
2140 next if !defined($value);
2141 my $mountpoint = $key eq 'rootfs' ? parse_ct_rootfs($value, 1) : parse_ct_mountpoint($value, 1);
2142 next if !defined($mountpoint);
2143
2144 &$func($key, $mountpoint);
2145 }
2146 }
2147
2148 sub foreach_mountpoint {
2149 my ($conf, $func) = @_;
2150
2151 foreach_mountpoint_full($conf, 0, $func);
2152 }
2153
2154 sub foreach_mountpoint_reverse {
2155 my ($conf, $func) = @_;
2156
2157 foreach_mountpoint_full($conf, 1, $func);
2158 }
2159
2160 sub check_ct_modify_config_perm {
2161 my ($rpcenv, $authuser, $vmid, $pool, $key_list) = @_;
2162
2163 return 1 if $authuser ne 'root@pam';
2164
2165 foreach my $opt (@$key_list) {
2166
2167 if ($opt eq 'cpus' || $opt eq 'cpuunits' || $opt eq 'cpulimit') {
2168 $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.CPU']);
2169 } elsif ($opt eq 'rootfs' || $opt =~ /^mp\d+$/) {
2170 $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Disk']);
2171 } elsif ($opt eq 'memory' || $opt eq 'swap') {
2172 $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Memory']);
2173 } elsif ($opt =~ m/^net\d+$/ || $opt eq 'nameserver' ||
2174 $opt eq 'searchdomain' || $opt eq 'hostname') {
2175 $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Network']);
2176 } else {
2177 $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Options']);
2178 }
2179 }
2180
2181 return 1;
2182 }
2183
2184 sub umount_all {
2185 my ($vmid, $storage_cfg, $conf, $noerr) = @_;
2186
2187 my $rootdir = "/var/lib/lxc/$vmid/rootfs";
2188 my $volid_list = get_vm_volumes($conf);
2189
2190 foreach_mountpoint_reverse($conf, sub {
2191 my ($ms, $mountpoint) = @_;
2192
2193 my $volid = $mountpoint->{volume};
2194 my $mount = $mountpoint->{mp};
2195
2196 return if !$volid || !$mount;
2197
2198 my $mount_path = "$rootdir/$mount";
2199 $mount_path =~ s!/+!/!g;
2200
2201 return if !PVE::ProcFSTools::is_mounted($mount_path);
2202
2203 eval {
2204 PVE::Tools::run_command(['umount', '-d', $mount_path]);
2205 };
2206 if (my $err = $@) {
2207 if ($noerr) {
2208 warn $err;
2209 } else {
2210 die $err;
2211 }
2212 }
2213 });
2214 }
2215
2216 sub mount_all {
2217 my ($vmid, $storage_cfg, $conf) = @_;
2218
2219 my $rootdir = "/var/lib/lxc/$vmid/rootfs";
2220 File::Path::make_path($rootdir);
2221
2222 my $volid_list = get_vm_volumes($conf);
2223 PVE::Storage::activate_volumes($storage_cfg, $volid_list);
2224
2225 eval {
2226 foreach_mountpoint($conf, sub {
2227 my ($ms, $mountpoint) = @_;
2228
2229 mountpoint_mount($mountpoint, $rootdir, $storage_cfg);
2230 });
2231 };
2232 if (my $err = $@) {
2233 warn "mounting container failed\n";
2234 umount_all($vmid, $storage_cfg, $conf, 1);
2235 die $err;
2236 }
2237
2238 return $rootdir;
2239 }
2240
2241
2242 sub mountpoint_mount_path {
2243 my ($mountpoint, $storage_cfg, $snapname) = @_;
2244
2245 return mountpoint_mount($mountpoint, undef, $storage_cfg, $snapname);
2246 }
2247
2248 my $check_mount_path = sub {
2249 my ($path) = @_;
2250 $path = File::Spec->canonpath($path);
2251 my $real = Cwd::realpath($path);
2252 if ($real ne $path) {
2253 die "mount path modified by symlink: $path != $real";
2254 }
2255 };
2256
2257 sub query_loopdev {
2258 my ($path) = @_;
2259 my $found;
2260 my $parser = sub {
2261 my $line = shift;
2262 if ($line =~ m@^(/dev/loop\d+):@) {
2263 $found = $1;
2264 }
2265 };
2266 my $cmd = ['losetup', '--associated', $path];
2267 PVE::Tools::run_command($cmd, outfunc => $parser);
2268 return $found;
2269 }
2270
2271 # Run a function with a file attached to a loop device.
2272 # The loop device is always detached afterwards (or set to autoclear).
2273 # Returns the loop device.
2274 sub run_with_loopdev {
2275 my ($func, $file) = @_;
2276 my $device;
2277 my $parser = sub {
2278 my $line = shift;
2279 if ($line =~ m@^(/dev/loop\d+)$@) {
2280 $device = $1;
2281 }
2282 };
2283 PVE::Tools::run_command(['losetup', '--show', '-f', $file], outfunc => $parser);
2284 die "failed to setup loop device for $file\n" if !$device;
2285 eval { &$func($device); };
2286 my $err = $@;
2287 PVE::Tools::run_command(['losetup', '-d', $device]);
2288 die $err if $err;
2289 return $device;
2290 }
2291
2292 sub bindmount {
2293 my ($dir, $dest, $ro, @extra_opts) = @_;
2294 PVE::Tools::run_command(['mount', '-o', 'bind', @extra_opts, $dir, $dest]);
2295 if ($ro) {
2296 eval { PVE::Tools::run_command(['mount', '-o', 'bind,remount,ro', $dest]); };
2297 if (my $err = $@) {
2298 warn "bindmount error\n";
2299 # don't leave writable bind-mounts behind...
2300 PVE::Tools::run_command(['umount', $dest]);
2301 die $err;
2302 }
2303 }
2304 }
2305
2306 # use $rootdir = undef to just return the corresponding mount path
2307 sub mountpoint_mount {
2308 my ($mountpoint, $rootdir, $storage_cfg, $snapname) = @_;
2309
2310 my $volid = $mountpoint->{volume};
2311 my $mount = $mountpoint->{mp};
2312 my $type = $mountpoint->{type};
2313 my $quota = !$snapname && !$mountpoint->{ro} && $mountpoint->{quota};
2314 my $mounted_dev;
2315
2316 return if !$volid || !$mount;
2317
2318 my $mount_path;
2319
2320 if (defined($rootdir)) {
2321 $rootdir =~ s!/+$!!;
2322 $mount_path = "$rootdir/$mount";
2323 $mount_path =~ s!/+!/!g;
2324 &$check_mount_path($mount_path);
2325 File::Path::mkpath($mount_path);
2326 }
2327
2328 my ($storage, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2329
2330 die "unknown snapshot path for '$volid'" if !$storage && defined($snapname);
2331
2332 my $optstring = '';
2333 if (defined($mountpoint->{acl})) {
2334 $optstring .= ($mountpoint->{acl} ? 'acl' : 'noacl');
2335 }
2336 my $readonly = $mountpoint->{ro};
2337
2338 my @extra_opts = ('-o', $optstring);
2339
2340 if ($storage) {
2341
2342 my $scfg = PVE::Storage::storage_config($storage_cfg, $storage);
2343 my $path = PVE::Storage::path($storage_cfg, $volid, $snapname);
2344
2345 my ($vtype, undef, undef, undef, undef, $isBase, $format) =
2346 PVE::Storage::parse_volname($storage_cfg, $volid);
2347
2348 $format = 'iso' if $vtype eq 'iso'; # allow to handle iso files
2349
2350 if ($format eq 'subvol') {
2351 if ($mount_path) {
2352 if ($snapname) {
2353 if ($scfg->{type} eq 'zfspool') {
2354 my $path_arg = $path;
2355 $path_arg =~ s!^/+!!;
2356 PVE::Tools::run_command(['mount', '-o', 'ro', @extra_opts, '-t', 'zfs', $path_arg, $mount_path]);
2357 } else {
2358 die "cannot mount subvol snapshots for storage type '$scfg->{type}'\n";
2359 }
2360 } else {
2361 bindmount($path, $mount_path, $readonly, @extra_opts);
2362 warn "cannot enable quota control for bind mounted subvolumes\n" if $quota;
2363 }
2364 }
2365 return wantarray ? ($path, 0, $mounted_dev) : $path;
2366 } elsif ($format eq 'raw' || $format eq 'iso') {
2367 my $domount = sub {
2368 my ($path) = @_;
2369 if ($mount_path) {
2370 if ($format eq 'iso') {
2371 PVE::Tools::run_command(['mount', '-o', 'ro', @extra_opts, $path, $mount_path]);
2372 } elsif ($isBase || defined($snapname)) {
2373 PVE::Tools::run_command(['mount', '-o', 'ro,noload', @extra_opts, $path, $mount_path]);
2374 } else {
2375 if ($quota) {
2376 push @extra_opts, '-o', 'usrjquota=aquota.user,grpjquota=aquota.group,jqfmt=vfsv0';
2377 }
2378 push @extra_opts, '-o', 'ro' if $readonly;
2379 PVE::Tools::run_command(['mount', @extra_opts, $path, $mount_path]);
2380 }
2381 }
2382 };
2383 my $use_loopdev = 0;
2384 if ($scfg->{path}) {
2385 $mounted_dev = run_with_loopdev($domount, $path);
2386 $use_loopdev = 1;
2387 } elsif ($scfg->{type} eq 'drbd' || $scfg->{type} eq 'lvm' ||
2388 $scfg->{type} eq 'rbd' || $scfg->{type} eq 'lvmthin') {
2389 $mounted_dev = $path;
2390 &$domount($path);
2391 } else {
2392 die "unsupported storage type '$scfg->{type}'\n";
2393 }
2394 return wantarray ? ($path, $use_loopdev, $mounted_dev) : $path;
2395 } else {
2396 die "unsupported image format '$format'\n";
2397 }
2398 } elsif ($type eq 'device') {
2399 push @extra_opts, '-o', 'ro' if $readonly;
2400 PVE::Tools::run_command(['mount', @extra_opts, $volid, $mount_path]) if $mount_path;
2401 return wantarray ? ($volid, 0, $volid) : $volid;
2402 } elsif ($type eq 'bind') {
2403 die "directory '$volid' does not exist\n" if ! -d $volid;
2404 &$check_mount_path($volid);
2405 bindmount($volid, $mount_path, $readonly, @extra_opts) if $mount_path;
2406 warn "cannot enable quota control for bind mounts\n" if $quota;
2407 return wantarray ? ($volid, 0, undef) : $volid;
2408 }
2409
2410 die "unsupported storage";
2411 }
2412
2413 sub get_vm_volumes {
2414 my ($conf, $excludes) = @_;
2415
2416 my $vollist = [];
2417
2418 foreach_mountpoint($conf, sub {
2419 my ($ms, $mountpoint) = @_;
2420
2421 return if $excludes && $ms eq $excludes;
2422
2423 my $volid = $mountpoint->{volume};
2424
2425 return if !$volid || $mountpoint->{type} ne 'volume';
2426
2427 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2428 return if !$sid;
2429
2430 push @$vollist, $volid;
2431 });
2432
2433 return $vollist;
2434 }
2435
2436 sub mkfs {
2437 my ($dev, $rootuid, $rootgid) = @_;
2438
2439 PVE::Tools::run_command(['mkfs.ext4', '-O', 'mmp',
2440 '-E', "root_owner=$rootuid:$rootgid",
2441 $dev]);
2442 }
2443
2444 sub format_disk {
2445 my ($storage_cfg, $volid, $rootuid, $rootgid) = @_;
2446
2447 if ($volid =~ m!^/dev/.+!) {
2448 mkfs($volid);
2449 return;
2450 }
2451
2452 my ($storage, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2453
2454 die "cannot format volume '$volid' with no storage\n" if !$storage;
2455
2456 PVE::Storage::activate_volumes($storage_cfg, [$volid]);
2457
2458 my $path = PVE::Storage::path($storage_cfg, $volid);
2459
2460 my ($vtype, undef, undef, undef, undef, $isBase, $format) =
2461 PVE::Storage::parse_volname($storage_cfg, $volid);
2462
2463 die "cannot format volume '$volid' (format == $format)\n"
2464 if $format ne 'raw';
2465
2466 mkfs($path, $rootuid, $rootgid);
2467 }
2468
2469 sub destroy_disks {
2470 my ($storecfg, $vollist) = @_;
2471
2472 foreach my $volid (@$vollist) {
2473 eval { PVE::Storage::vdisk_free($storecfg, $volid); };
2474 warn $@ if $@;
2475 }
2476 }
2477
2478 sub create_disks {
2479 my ($storecfg, $vmid, $settings, $conf) = @_;
2480
2481 my $vollist = [];
2482
2483 eval {
2484 my (undef, $rootuid, $rootgid) = PVE::LXC::parse_id_maps($conf);
2485 my $chown_vollist = [];
2486
2487 foreach_mountpoint($settings, sub {
2488 my ($ms, $mountpoint) = @_;
2489
2490 my $volid = $mountpoint->{volume};
2491 my $mp = $mountpoint->{mp};
2492
2493 my ($storage, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2494
2495 if ($storage && ($volid =~ m/^([^:\s]+):(\d+(\.\d+)?)$/)) {
2496 my ($storeid, $size_gb) = ($1, $2);
2497
2498 my $size_kb = int(${size_gb}*1024) * 1024;
2499
2500 my $scfg = PVE::Storage::storage_config($storecfg, $storage);
2501 # fixme: use better naming ct-$vmid-disk-X.raw?
2502
2503 if ($scfg->{type} eq 'dir' || $scfg->{type} eq 'nfs') {
2504 if ($size_kb > 0) {
2505 $volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'raw',
2506 undef, $size_kb);
2507 format_disk($storecfg, $volid, $rootuid, $rootgid);
2508 } else {
2509 $volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'subvol',
2510 undef, 0);
2511 push @$chown_vollist, $volid;
2512 }
2513 } elsif ($scfg->{type} eq 'zfspool') {
2514
2515 $volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'subvol',
2516 undef, $size_kb);
2517 push @$chown_vollist, $volid;
2518 } elsif ($scfg->{type} eq 'drbd' || $scfg->{type} eq 'lvm' || $scfg->{type} eq 'lvmthin') {
2519
2520 $volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'raw', undef, $size_kb);
2521 format_disk($storecfg, $volid, $rootuid, $rootgid);
2522
2523 } elsif ($scfg->{type} eq 'rbd') {
2524
2525 die "krbd option must be enabled on storage type '$scfg->{type}'\n" if !$scfg->{krbd};
2526 $volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'raw', undef, $size_kb);
2527 format_disk($storecfg, $volid, $rootuid, $rootgid);
2528 } else {
2529 die "unable to create containers on storage type '$scfg->{type}'\n";
2530 }
2531 push @$vollist, $volid;
2532 $mountpoint->{volume} = $volid;
2533 $mountpoint->{size} = $size_kb * 1024;
2534 $conf->{$ms} = print_ct_mountpoint($mountpoint, $ms eq 'rootfs');
2535 } else {
2536 # use specified/existing volid/dir/device
2537 $conf->{$ms} = print_ct_mountpoint($mountpoint, $ms eq 'rootfs');
2538 }
2539 });
2540
2541 PVE::Storage::activate_volumes($storecfg, $chown_vollist, undef);
2542 foreach my $volid (@$chown_vollist) {
2543 my $path = PVE::Storage::path($storecfg, $volid, undef);
2544 chown($rootuid, $rootgid, $path);
2545 }
2546 PVE::Storage::deactivate_volumes($storecfg, $chown_vollist, undef);
2547 };
2548 # free allocated images on error
2549 if (my $err = $@) {
2550 destroy_disks($storecfg, $vollist);
2551 die $err;
2552 }
2553 return $vollist;
2554 }
2555
2556 # bash completion helper
2557
2558 sub complete_os_templates {
2559 my ($cmdname, $pname, $cvalue) = @_;
2560
2561 my $cfg = PVE::Storage::config();
2562
2563 my $storeid;
2564
2565 if ($cvalue =~ m/^([^:]+):/) {
2566 $storeid = $1;
2567 }
2568
2569 my $vtype = $cmdname eq 'restore' ? 'backup' : 'vztmpl';
2570 my $data = PVE::Storage::template_list($cfg, $storeid, $vtype);
2571
2572 my $res = [];
2573 foreach my $id (keys %$data) {
2574 foreach my $item (@{$data->{$id}}) {
2575 push @$res, $item->{volid} if defined($item->{volid});
2576 }
2577 }
2578
2579 return $res;
2580 }
2581
2582 my $complete_ctid_full = sub {
2583 my ($running) = @_;
2584
2585 my $idlist = vmstatus();
2586
2587 my $active_hash = list_active_containers();
2588
2589 my $res = [];
2590
2591 foreach my $id (keys %$idlist) {
2592 my $d = $idlist->{$id};
2593 if (defined($running)) {
2594 next if $d->{template};
2595 next if $running && !$active_hash->{$id};
2596 next if !$running && $active_hash->{$id};
2597 }
2598 push @$res, $id;
2599
2600 }
2601 return $res;
2602 };
2603
2604 sub complete_ctid {
2605 return &$complete_ctid_full();
2606 }
2607
2608 sub complete_ctid_stopped {
2609 return &$complete_ctid_full(0);
2610 }
2611
2612 sub complete_ctid_running {
2613 return &$complete_ctid_full(1);
2614 }
2615
2616 sub parse_id_maps {
2617 my ($conf) = @_;
2618
2619 my $id_map = [];
2620 my $rootuid = 0;
2621 my $rootgid = 0;
2622
2623 my $lxc = $conf->{lxc};
2624 foreach my $entry (@$lxc) {
2625 my ($key, $value) = @$entry;
2626 next if $key ne 'lxc.id_map';
2627 if ($value =~ /^([ug])\s+(\d+)\s+(\d+)\s+(\d+)\s*$/) {
2628 my ($type, $ct, $host, $length) = ($1, $2, $3, $4);
2629 push @$id_map, [$type, $ct, $host, $length];
2630 if ($ct == 0) {
2631 $rootuid = $host if $type eq 'u';
2632 $rootgid = $host if $type eq 'g';
2633 }
2634 } else {
2635 die "failed to parse id_map: $value\n";
2636 }
2637 }
2638
2639 if (!@$id_map && $conf->{unprivileged}) {
2640 # Should we read them from /etc/subuid?
2641 $id_map = [ ['u', '0', '100000', '65536'],
2642 ['g', '0', '100000', '65536'] ];
2643 $rootuid = $rootgid = 100000;
2644 }
2645
2646 return ($id_map, $rootuid, $rootgid);
2647 }
2648
2649 sub userns_command {
2650 my ($id_map) = @_;
2651 if (@$id_map) {
2652 return ['lxc-usernsexec', (map { ('-m', join(':', @$_)) } @$id_map), '--'];
2653 }
2654 return [];
2655 }
2656
2657 1;