]> git.proxmox.com Git - pve-container.git/blob - src/PVE/LXC.pm
Add snapshot_apply_config, cleanup prepare and commit
[pve-container.git] / src / PVE / LXC.pm
1 package PVE::LXC;
2
3 use strict;
4 use warnings;
5 use POSIX qw(EINTR);
6
7 use Socket;
8
9 use File::Path;
10 use File::Spec;
11 use Cwd qw();
12 use Fcntl qw(O_RDONLY);
13
14 use PVE::Cluster qw(cfs_register_file cfs_read_file);
15 use PVE::Storage;
16 use PVE::SafeSyslog;
17 use PVE::INotify;
18 use PVE::JSONSchema qw(get_standard_option);
19 use PVE::Tools qw($IPV6RE $IPV4RE dir_glob_foreach lock_file lock_file_full);
20 use PVE::Network;
21 use PVE::AccessControl;
22 use PVE::ProcFSTools;
23 use Time::HiRes qw (gettimeofday);
24
25 use Data::Dumper;
26
27 my $nodename = PVE::INotify::nodename();
28
29 my $cpuinfo= PVE::ProcFSTools::read_cpuinfo();
30
31 our $COMMON_TAR_FLAGS = [ '--sparse', '--numeric-owner', '--acls',
32 '--xattrs',
33 '--xattrs-include=user.*',
34 '--xattrs-include=security.capability',
35 '--warning=no-xattr-write' ];
36
37 cfs_register_file('/lxc/', \&parse_pct_config, \&write_pct_config);
38
39 my $rootfs_desc = {
40 volume => {
41 type => 'string',
42 default_key => 1,
43 format => 'pve-lxc-mp-string',
44 format_description => 'volume',
45 description => 'Volume, device or directory to mount into the container.',
46 },
47 backup => {
48 type => 'boolean',
49 format_description => '[1|0]',
50 description => 'Whether to include the mountpoint in backups.',
51 optional => 1,
52 },
53 size => {
54 type => 'string',
55 format => 'disk-size',
56 format_description => 'DiskSize',
57 description => 'Volume size (read only value).',
58 optional => 1,
59 },
60 acl => {
61 type => 'boolean',
62 format_description => 'acl',
63 description => 'Explicitly enable or disable ACL support.',
64 optional => 1,
65 },
66 ro => {
67 type => 'boolean',
68 format_description => 'ro',
69 description => 'Read-only mountpoint (not supported with bind mounts)',
70 optional => 1,
71 },
72 quota => {
73 type => 'boolean',
74 format_description => '[0|1]',
75 description => 'Enable user quotas inside the container (not supported with zfs subvolumes)',
76 optional => 1,
77 },
78 };
79
80 PVE::JSONSchema::register_standard_option('pve-ct-rootfs', {
81 type => 'string', format => $rootfs_desc,
82 description => "Use volume as container root.",
83 optional => 1,
84 });
85
86 PVE::JSONSchema::register_standard_option('pve-lxc-snapshot-name', {
87 description => "The name of the snapshot.",
88 type => 'string', format => 'pve-configid',
89 maxLength => 40,
90 });
91
92 my $confdesc = {
93 lock => {
94 optional => 1,
95 type => 'string',
96 description => "Lock/unlock the VM.",
97 enum => [qw(migrate backup snapshot rollback)],
98 },
99 onboot => {
100 optional => 1,
101 type => 'boolean',
102 description => "Specifies whether a VM will be started during system bootup.",
103 default => 0,
104 },
105 startup => get_standard_option('pve-startup-order'),
106 template => {
107 optional => 1,
108 type => 'boolean',
109 description => "Enable/disable Template.",
110 default => 0,
111 },
112 arch => {
113 optional => 1,
114 type => 'string',
115 enum => ['amd64', 'i386'],
116 description => "OS architecture type.",
117 default => 'amd64',
118 },
119 ostype => {
120 optional => 1,
121 type => 'string',
122 enum => ['debian', 'ubuntu', 'centos', 'fedora', 'opensuse', 'archlinux', 'alpine'],
123 description => "OS type. Corresponds to lxc setup scripts in /usr/share/lxc/config/<ostype>.common.conf.",
124 },
125 console => {
126 optional => 1,
127 type => 'boolean',
128 description => "Attach a console device (/dev/console) to the container.",
129 default => 1,
130 },
131 tty => {
132 optional => 1,
133 type => 'integer',
134 description => "Specify the number of tty available to the container",
135 minimum => 0,
136 maximum => 6,
137 default => 2,
138 },
139 cpulimit => {
140 optional => 1,
141 type => 'number',
142 description => "Limit of CPU usage. Note if the computer has 2 CPUs, it has a total of '2' CPU time. Value '0' indicates no CPU limit.",
143 minimum => 0,
144 maximum => 128,
145 default => 0,
146 },
147 cpuunits => {
148 optional => 1,
149 type => 'integer',
150 description => "CPU weight for a VM. Argument is used in the kernel fair scheduler. The larger the number is, the more CPU time this VM gets. Number is relative to the weights of all the other running VMs.\n\nNOTE: You can disable fair-scheduler configuration by setting this to 0.",
151 minimum => 0,
152 maximum => 500000,
153 default => 1024,
154 },
155 memory => {
156 optional => 1,
157 type => 'integer',
158 description => "Amount of RAM for the VM in MB.",
159 minimum => 16,
160 default => 512,
161 },
162 swap => {
163 optional => 1,
164 type => 'integer',
165 description => "Amount of SWAP for the VM in MB.",
166 minimum => 0,
167 default => 512,
168 },
169 hostname => {
170 optional => 1,
171 description => "Set a host name for the container.",
172 type => 'string', format => 'dns-name',
173 maxLength => 255,
174 },
175 description => {
176 optional => 1,
177 type => 'string',
178 description => "Container description. Only used on the configuration web interface.",
179 },
180 searchdomain => {
181 optional => 1,
182 type => 'string', format => 'dns-name-list',
183 description => "Sets DNS search domains for a container. Create will automatically use the setting from the host if you neither set searchdomain nor nameserver.",
184 },
185 nameserver => {
186 optional => 1,
187 type => 'string', format => 'address-list',
188 description => "Sets DNS server IP address for a container. Create will automatically use the setting from the host if you neither set searchdomain nor nameserver.",
189 },
190 rootfs => get_standard_option('pve-ct-rootfs'),
191 parent => {
192 optional => 1,
193 type => 'string', format => 'pve-configid',
194 maxLength => 40,
195 description => "Parent snapshot name. This is used internally, and should not be modified.",
196 },
197 snaptime => {
198 optional => 1,
199 description => "Timestamp for snapshots.",
200 type => 'integer',
201 minimum => 0,
202 },
203 cmode => {
204 optional => 1,
205 description => "Console mode. By default, the console command tries to open a connection to one of the available tty devices. By setting cmode to 'console' it tries to attach to /dev/console instead. If you set cmode to 'shell', it simply invokes a shell inside the container (no login).",
206 type => 'string',
207 enum => ['shell', 'console', 'tty'],
208 default => 'tty',
209 },
210 protection => {
211 optional => 1,
212 type => 'boolean',
213 description => "Sets the protection flag of the container. This will prevent the CT or CT's disk remove/update operation.",
214 default => 0,
215 },
216 unprivileged => {
217 optional => 1,
218 type => 'boolean',
219 description => "Makes the container run as unprivileged user. (Should not be modified manually.)",
220 default => 0,
221 },
222 };
223
224 my $valid_lxc_conf_keys = {
225 'lxc.include' => 1,
226 'lxc.arch' => 1,
227 'lxc.utsname' => 1,
228 'lxc.haltsignal' => 1,
229 'lxc.rebootsignal' => 1,
230 'lxc.stopsignal' => 1,
231 'lxc.init_cmd' => 1,
232 'lxc.network.type' => 1,
233 'lxc.network.flags' => 1,
234 'lxc.network.link' => 1,
235 'lxc.network.mtu' => 1,
236 'lxc.network.name' => 1,
237 'lxc.network.hwaddr' => 1,
238 'lxc.network.ipv4' => 1,
239 'lxc.network.ipv4.gateway' => 1,
240 'lxc.network.ipv6' => 1,
241 'lxc.network.ipv6.gateway' => 1,
242 'lxc.network.script.up' => 1,
243 'lxc.network.script.down' => 1,
244 'lxc.pts' => 1,
245 'lxc.console.logfile' => 1,
246 'lxc.console' => 1,
247 'lxc.tty' => 1,
248 'lxc.devttydir' => 1,
249 'lxc.hook.autodev' => 1,
250 'lxc.autodev' => 1,
251 'lxc.kmsg' => 1,
252 'lxc.mount' => 1,
253 'lxc.mount.entry' => 1,
254 'lxc.mount.auto' => 1,
255 'lxc.rootfs' => 'lxc.rootfs is auto generated from rootfs',
256 'lxc.rootfs.mount' => 1,
257 'lxc.rootfs.options' => 'lxc.rootfs.options is not supported' .
258 ', please use mountpoint options in the "rootfs" key',
259 # lxc.cgroup.*
260 'lxc.cap.drop' => 1,
261 'lxc.cap.keep' => 1,
262 'lxc.aa_profile' => 1,
263 'lxc.aa_allow_incomplete' => 1,
264 'lxc.se_context' => 1,
265 'lxc.seccomp' => 1,
266 'lxc.id_map' => 1,
267 'lxc.hook.pre-start' => 1,
268 'lxc.hook.pre-mount' => 1,
269 'lxc.hook.mount' => 1,
270 'lxc.hook.start' => 1,
271 'lxc.hook.stop' => 1,
272 'lxc.hook.post-stop' => 1,
273 'lxc.hook.clone' => 1,
274 'lxc.hook.destroy' => 1,
275 'lxc.loglevel' => 1,
276 'lxc.logfile' => 1,
277 'lxc.start.auto' => 1,
278 'lxc.start.delay' => 1,
279 'lxc.start.order' => 1,
280 'lxc.group' => 1,
281 'lxc.environment' => 1,
282 };
283
284 my $netconf_desc = {
285 type => {
286 type => 'string',
287 optional => 1,
288 description => "Network interface type.",
289 enum => [qw(veth)],
290 },
291 name => {
292 type => 'string',
293 format_description => 'String',
294 description => 'Name of the network device as seen from inside the container. (lxc.network.name)',
295 pattern => '[-_.\w\d]+',
296 },
297 bridge => {
298 type => 'string',
299 format_description => 'vmbr<Number>',
300 description => 'Bridge to attach the network device to.',
301 pattern => '[-_.\w\d]+',
302 optional => 1,
303 },
304 hwaddr => {
305 type => 'string',
306 format_description => 'MAC',
307 description => 'Bridge to attach the network device to. (lxc.network.hwaddr)',
308 pattern => qr/(?:[a-f0-9]{2}:){5}[a-f0-9]{2}/i,
309 optional => 1,
310 },
311 mtu => {
312 type => 'integer',
313 format_description => 'Number',
314 description => 'Maximum transfer unit of the interface. (lxc.network.mtu)',
315 minimum => 64, # minimum ethernet frame is 64 bytes
316 optional => 1,
317 },
318 ip => {
319 type => 'string',
320 format => 'pve-ipv4-config',
321 format_description => 'IPv4Format/CIDR',
322 description => 'IPv4 address in CIDR format.',
323 optional => 1,
324 },
325 gw => {
326 type => 'string',
327 format => 'ipv4',
328 format_description => 'GatewayIPv4',
329 description => 'Default gateway for IPv4 traffic.',
330 optional => 1,
331 },
332 ip6 => {
333 type => 'string',
334 format => 'pve-ipv6-config',
335 format_description => 'IPv6Format/CIDR',
336 description => 'IPv6 address in CIDR format.',
337 optional => 1,
338 },
339 gw6 => {
340 type => 'string',
341 format => 'ipv6',
342 format_description => 'GatewayIPv6',
343 description => 'Default gateway for IPv6 traffic.',
344 optional => 1,
345 },
346 firewall => {
347 type => 'boolean',
348 format_description => '[1|0]',
349 description => "Controls whether this interface's firewall rules should be used.",
350 optional => 1,
351 },
352 tag => {
353 type => 'integer',
354 format_description => 'VlanNo',
355 minimum => '2',
356 maximum => '4094',
357 description => "VLAN tag for this interface.",
358 optional => 1,
359 },
360 trunks => {
361 type => 'string',
362 pattern => qr/\d+(?:;\d+)*/,
363 format_description => 'vlanid[;vlanid...]',
364 description => "VLAN ids to pass through the interface",
365 optional => 1,
366 },
367 };
368 PVE::JSONSchema::register_format('pve-lxc-network', $netconf_desc);
369
370 my $MAX_LXC_NETWORKS = 10;
371 for (my $i = 0; $i < $MAX_LXC_NETWORKS; $i++) {
372 $confdesc->{"net$i"} = {
373 optional => 1,
374 type => 'string', format => $netconf_desc,
375 description => "Specifies network interfaces for the container.",
376 };
377 }
378
379 PVE::JSONSchema::register_format('pve-lxc-mp-string', \&verify_lxc_mp_string);
380 sub verify_lxc_mp_string{
381 my ($mp, $noerr) = @_;
382
383 # do not allow:
384 # /./ or /../
385 # /. or /.. at the end
386 # ../ at the beginning
387
388 if($mp =~ m@/\.\.?/@ ||
389 $mp =~ m@/\.\.?$@ ||
390 $mp =~ m@^\.\./@){
391 return undef if $noerr;
392 die "$mp contains illegal character sequences\n";
393 }
394 return $mp;
395 }
396
397 my $mp_desc = {
398 %$rootfs_desc,
399 mp => {
400 type => 'string',
401 format => 'pve-lxc-mp-string',
402 format_description => 'Path',
403 description => 'Path to the mountpoint as seen from inside the container.',
404 },
405 };
406 PVE::JSONSchema::register_format('pve-ct-mountpoint', $mp_desc);
407
408 my $unuseddesc = {
409 optional => 1,
410 type => 'string', format => 'pve-volume-id',
411 description => "Reference to unused volumes.",
412 };
413
414 my $MAX_MOUNT_POINTS = 10;
415 for (my $i = 0; $i < $MAX_MOUNT_POINTS; $i++) {
416 $confdesc->{"mp$i"} = {
417 optional => 1,
418 type => 'string', format => $mp_desc,
419 description => "Use volume as container mount point (experimental feature).",
420 optional => 1,
421 };
422 }
423
424 my $MAX_UNUSED_DISKS = $MAX_MOUNT_POINTS;
425 for (my $i = 0; $i < $MAX_MOUNT_POINTS; $i++) {
426 $confdesc->{"unused$i"} = $unuseddesc;
427 }
428
429 sub write_pct_config {
430 my ($filename, $conf) = @_;
431
432 delete $conf->{snapstate}; # just to be sure
433
434 my $generate_raw_config = sub {
435 my ($conf) = @_;
436
437 my $raw = '';
438
439 # add description as comment to top of file
440 my $descr = $conf->{description} || '';
441 foreach my $cl (split(/\n/, $descr)) {
442 $raw .= '#' . PVE::Tools::encode_text($cl) . "\n";
443 }
444
445 foreach my $key (sort keys %$conf) {
446 next if $key eq 'digest' || $key eq 'description' || $key eq 'pending' ||
447 $key eq 'snapshots' || $key eq 'snapname' || $key eq 'lxc';
448 my $value = $conf->{$key};
449 die "detected invalid newline inside property '$key'\n" if $value =~ m/\n/;
450 $raw .= "$key: $value\n";
451 }
452
453 if (my $lxcconf = $conf->{lxc}) {
454 foreach my $entry (@$lxcconf) {
455 my ($k, $v) = @$entry;
456 $raw .= "$k: $v\n";
457 }
458 }
459
460 return $raw;
461 };
462
463 my $raw = &$generate_raw_config($conf);
464
465 foreach my $snapname (sort keys %{$conf->{snapshots}}) {
466 $raw .= "\n[$snapname]\n";
467 $raw .= &$generate_raw_config($conf->{snapshots}->{$snapname});
468 }
469
470 return $raw;
471 }
472
473 sub check_type {
474 my ($key, $value) = @_;
475
476 die "unknown setting '$key'\n" if !$confdesc->{$key};
477
478 my $type = $confdesc->{$key}->{type};
479
480 if (!defined($value)) {
481 die "got undefined value\n";
482 }
483
484 if ($value =~ m/[\n\r]/) {
485 die "property contains a line feed\n";
486 }
487
488 if ($type eq 'boolean') {
489 return 1 if ($value eq '1') || ($value =~ m/^(on|yes|true)$/i);
490 return 0 if ($value eq '0') || ($value =~ m/^(off|no|false)$/i);
491 die "type check ('boolean') failed - got '$value'\n";
492 } elsif ($type eq 'integer') {
493 return int($1) if $value =~ m/^(\d+)$/;
494 die "type check ('integer') failed - got '$value'\n";
495 } elsif ($type eq 'number') {
496 return $value if $value =~ m/^(\d+)(\.\d+)?$/;
497 die "type check ('number') failed - got '$value'\n";
498 } elsif ($type eq 'string') {
499 if (my $fmt = $confdesc->{$key}->{format}) {
500 PVE::JSONSchema::check_format($fmt, $value);
501 return $value;
502 }
503 return $value;
504 } else {
505 die "internal error"
506 }
507 }
508
509 sub parse_pct_config {
510 my ($filename, $raw) = @_;
511
512 return undef if !defined($raw);
513
514 my $res = {
515 digest => Digest::SHA::sha1_hex($raw),
516 snapshots => {},
517 };
518
519 $filename =~ m|/lxc/(\d+).conf$|
520 || die "got strange filename '$filename'";
521
522 my $vmid = $1;
523
524 my $conf = $res;
525 my $descr = '';
526 my $section = '';
527
528 my @lines = split(/\n/, $raw);
529 foreach my $line (@lines) {
530 next if $line =~ m/^\s*$/;
531
532 if ($line =~ m/^\[([a-z][a-z0-9_\-]+)\]\s*$/i) {
533 $section = $1;
534 $conf->{description} = $descr if $descr;
535 $descr = '';
536 $conf = $res->{snapshots}->{$section} = {};
537 next;
538 }
539
540 if ($line =~ m/^\#(.*)\s*$/) {
541 $descr .= PVE::Tools::decode_text($1) . "\n";
542 next;
543 }
544
545 if ($line =~ m/^(lxc\.[a-z0-9_\-\.]+)(:|\s*=)\s*(.*?)\s*$/) {
546 my $key = $1;
547 my $value = $3;
548 my $validity = $valid_lxc_conf_keys->{$key} || 0;
549 if ($validity eq 1 || $key =~ m/^lxc\.cgroup\./) {
550 push @{$conf->{lxc}}, [$key, $value];
551 } elsif (my $errmsg = $validity) {
552 warn "vm $vmid - $key: $errmsg\n";
553 } else {
554 warn "vm $vmid - unable to parse config: $line\n";
555 }
556 } elsif ($line =~ m/^(description):\s*(.*\S)\s*$/) {
557 $descr .= PVE::Tools::decode_text($2);
558 } elsif ($line =~ m/snapstate:\s*(prepare|delete)\s*$/) {
559 $conf->{snapstate} = $1;
560 } elsif ($line =~ m/^([a-z][a-z_]*\d*):\s*(\S.*)\s*$/) {
561 my $key = $1;
562 my $value = $2;
563 eval { $value = check_type($key, $value); };
564 warn "vm $vmid - unable to parse value of '$key' - $@" if $@;
565 $conf->{$key} = $value;
566 } else {
567 warn "vm $vmid - unable to parse config: $line\n";
568 }
569 }
570
571 $conf->{description} = $descr if $descr;
572
573 delete $res->{snapstate}; # just to be sure
574
575 return $res;
576 }
577
578 sub config_list {
579 my $vmlist = PVE::Cluster::get_vmlist();
580 my $res = {};
581 return $res if !$vmlist || !$vmlist->{ids};
582 my $ids = $vmlist->{ids};
583
584 foreach my $vmid (keys %$ids) {
585 next if !$vmid; # skip CT0
586 my $d = $ids->{$vmid};
587 next if !$d->{node} || $d->{node} ne $nodename;
588 next if !$d->{type} || $d->{type} ne 'lxc';
589 $res->{$vmid}->{type} = 'lxc';
590 }
591 return $res;
592 }
593
594 sub cfs_config_path {
595 my ($vmid, $node) = @_;
596
597 $node = $nodename if !$node;
598 return "nodes/$node/lxc/$vmid.conf";
599 }
600
601 sub config_file {
602 my ($vmid, $node) = @_;
603
604 my $cfspath = cfs_config_path($vmid, $node);
605 return "/etc/pve/$cfspath";
606 }
607
608 sub load_config {
609 my ($vmid, $node) = @_;
610
611 $node = $nodename if !$node;
612 my $cfspath = cfs_config_path($vmid, $node);
613
614 my $conf = PVE::Cluster::cfs_read_file($cfspath);
615 die "container $vmid does not exist\n" if !defined($conf);
616
617 return $conf;
618 }
619
620 sub create_config {
621 my ($vmid, $conf) = @_;
622
623 my $dir = "/etc/pve/nodes/$nodename/lxc";
624 mkdir $dir;
625
626 write_config($vmid, $conf);
627 }
628
629 sub destroy_config {
630 my ($vmid) = @_;
631
632 unlink config_file($vmid, $nodename);
633 }
634
635 sub write_config {
636 my ($vmid, $conf) = @_;
637
638 my $cfspath = cfs_config_path($vmid);
639
640 PVE::Cluster::cfs_write_file($cfspath, $conf);
641 }
642
643 # flock: we use one file handle per process, so lock file
644 # can be called multiple times and will succeed for the same process.
645
646 my $lock_handles = {};
647 my $lockdir = "/run/lock/lxc";
648
649 sub config_file_lock {
650 my ($vmid) = @_;
651
652 return "$lockdir/pve-config-${vmid}.lock";
653 }
654
655 sub lock_config_full {
656 my ($vmid, $timeout, $code, @param) = @_;
657
658 my $filename = config_file_lock($vmid);
659
660 mkdir $lockdir if !-d $lockdir;
661
662 my $res = lock_file($filename, $timeout, $code, @param);
663
664 die $@ if $@;
665
666 return $res;
667 }
668
669 sub lock_config_mode {
670 my ($vmid, $timeout, $shared, $code, @param) = @_;
671
672 my $filename = config_file_lock($vmid);
673
674 mkdir $lockdir if !-d $lockdir;
675
676 my $res = lock_file_full($filename, $timeout, $shared, $code, @param);
677
678 die $@ if $@;
679
680 return $res;
681 }
682
683 sub lock_config {
684 my ($vmid, $code, @param) = @_;
685
686 return lock_config_full($vmid, 10, $code, @param);
687 }
688
689 sub option_exists {
690 my ($name) = @_;
691
692 return defined($confdesc->{$name});
693 }
694
695 # add JSON properties for create and set function
696 sub json_config_properties {
697 my $prop = shift;
698
699 foreach my $opt (keys %$confdesc) {
700 next if $opt eq 'parent' || $opt eq 'snaptime';
701 next if $prop->{$opt};
702 $prop->{$opt} = $confdesc->{$opt};
703 }
704
705 return $prop;
706 }
707
708 # container status helpers
709
710 sub list_active_containers {
711
712 my $filename = "/proc/net/unix";
713
714 # similar test is used by lcxcontainers.c: list_active_containers
715 my $res = {};
716
717 my $fh = IO::File->new ($filename, "r");
718 return $res if !$fh;
719
720 while (defined(my $line = <$fh>)) {
721 if ($line =~ m/^[a-f0-9]+:\s\S+\s\S+\s\S+\s\S+\s\S+\s\d+\s(\S+)$/) {
722 my $path = $1;
723 if ($path =~ m!^@/var/lib/lxc/(\d+)/command$!) {
724 $res->{$1} = 1;
725 }
726 }
727 }
728
729 close($fh);
730
731 return $res;
732 }
733
734 # warning: this is slow
735 sub check_running {
736 my ($vmid) = @_;
737
738 my $active_hash = list_active_containers();
739
740 return 1 if defined($active_hash->{$vmid});
741
742 return undef;
743 }
744
745 sub get_container_disk_usage {
746 my ($vmid, $pid) = @_;
747
748 return PVE::Tools::df("/proc/$pid/root/", 1);
749 }
750
751 my $last_proc_vmid_stat;
752
753 my $parse_cpuacct_stat = sub {
754 my ($vmid) = @_;
755
756 my $raw = read_cgroup_value('cpuacct', $vmid, 'cpuacct.stat', 1);
757
758 my $stat = {};
759
760 if ($raw =~ m/^user (\d+)\nsystem (\d+)\n/) {
761
762 $stat->{utime} = $1;
763 $stat->{stime} = $2;
764
765 }
766
767 return $stat;
768 };
769
770 sub vmstatus {
771 my ($opt_vmid) = @_;
772
773 my $list = $opt_vmid ? { $opt_vmid => { type => 'lxc' }} : config_list();
774
775 my $active_hash = list_active_containers();
776
777 my $cpucount = $cpuinfo->{cpus} || 1;
778
779 my $cdtime = gettimeofday;
780
781 my $uptime = (PVE::ProcFSTools::read_proc_uptime(1))[0];
782
783 foreach my $vmid (keys %$list) {
784 my $d = $list->{$vmid};
785
786 eval { $d->{pid} = find_lxc_pid($vmid) if defined($active_hash->{$vmid}); };
787 warn $@ if $@; # ignore errors (consider them stopped)
788
789 $d->{status} = $d->{pid} ? 'running' : 'stopped';
790
791 my $cfspath = cfs_config_path($vmid);
792 my $conf = PVE::Cluster::cfs_read_file($cfspath) || {};
793
794 $d->{name} = $conf->{'hostname'} || "CT$vmid";
795 $d->{name} =~ s/[\s]//g;
796
797 $d->{cpus} = $conf->{cpulimit} || $cpucount;
798
799 if ($d->{pid}) {
800 my $res = get_container_disk_usage($vmid, $d->{pid});
801 $d->{disk} = $res->{used};
802 $d->{maxdisk} = $res->{total};
803 } else {
804 $d->{disk} = 0;
805 # use 4GB by default ??
806 if (my $rootfs = $conf->{rootfs}) {
807 my $rootinfo = parse_ct_rootfs($rootfs);
808 $d->{maxdisk} = int(($rootinfo->{size} || 4)*1024*1024)*1024;
809 } else {
810 $d->{maxdisk} = 4*1024*1024*1024;
811 }
812 }
813
814 $d->{mem} = 0;
815 $d->{swap} = 0;
816 $d->{maxmem} = ($conf->{memory}||512)*1024*1024;
817 $d->{maxswap} = ($conf->{swap}//0)*1024*1024;
818
819 $d->{uptime} = 0;
820 $d->{cpu} = 0;
821
822 $d->{netout} = 0;
823 $d->{netin} = 0;
824
825 $d->{diskread} = 0;
826 $d->{diskwrite} = 0;
827
828 $d->{template} = is_template($conf);
829 }
830
831 foreach my $vmid (keys %$list) {
832 my $d = $list->{$vmid};
833 my $pid = $d->{pid};
834
835 next if !$pid; # skip stopped CTs
836
837 my $ctime = (stat("/proc/$pid"))[10]; # 10 = ctime
838 $d->{uptime} = time - $ctime; # the method lxcfs uses
839
840 $d->{mem} = read_cgroup_value('memory', $vmid, 'memory.usage_in_bytes');
841 $d->{swap} = read_cgroup_value('memory', $vmid, 'memory.memsw.usage_in_bytes') - $d->{mem};
842
843 my $blkio_bytes = read_cgroup_value('blkio', $vmid, 'blkio.throttle.io_service_bytes', 1);
844 my @bytes = split(/\n/, $blkio_bytes);
845 foreach my $byte (@bytes) {
846 if (my ($key, $value) = $byte =~ /(Read|Write)\s+(\d+)/) {
847 $d->{diskread} = $2 if $key eq 'Read';
848 $d->{diskwrite} = $2 if $key eq 'Write';
849 }
850 }
851
852 my $pstat = &$parse_cpuacct_stat($vmid);
853
854 my $used = $pstat->{utime} + $pstat->{stime};
855
856 my $old = $last_proc_vmid_stat->{$vmid};
857 if (!$old) {
858 $last_proc_vmid_stat->{$vmid} = {
859 time => $cdtime,
860 used => $used,
861 cpu => 0,
862 };
863 next;
864 }
865
866 my $dtime = ($cdtime - $old->{time}) * $cpucount * $cpuinfo->{user_hz};
867
868 if ($dtime > 1000) {
869 my $dutime = $used - $old->{used};
870
871 $d->{cpu} = (($dutime/$dtime)* $cpucount) / $d->{cpus};
872 $last_proc_vmid_stat->{$vmid} = {
873 time => $cdtime,
874 used => $used,
875 cpu => $d->{cpu},
876 };
877 } else {
878 $d->{cpu} = $old->{cpu};
879 }
880 }
881
882 my $netdev = PVE::ProcFSTools::read_proc_net_dev();
883
884 foreach my $dev (keys %$netdev) {
885 next if $dev !~ m/^veth([1-9]\d*)i/;
886 my $vmid = $1;
887 my $d = $list->{$vmid};
888
889 next if !$d;
890
891 $d->{netout} += $netdev->{$dev}->{receive};
892 $d->{netin} += $netdev->{$dev}->{transmit};
893
894 }
895
896 return $list;
897 }
898
899 sub classify_mountpoint {
900 my ($vol) = @_;
901 if ($vol =~ m!^/!) {
902 return 'device' if $vol =~ m!^/dev/!;
903 return 'bind';
904 }
905 return 'volume';
906 }
907
908 my $parse_ct_mountpoint_full = sub {
909 my ($desc, $data, $noerr) = @_;
910
911 $data //= '';
912
913 my $res;
914 eval { $res = PVE::JSONSchema::parse_property_string($desc, $data) };
915 if ($@) {
916 return undef if $noerr;
917 die $@;
918 }
919
920 if (defined(my $size = $res->{size})) {
921 $size = PVE::JSONSchema::parse_size($size);
922 if (!defined($size)) {
923 return undef if $noerr;
924 die "invalid size: $size\n";
925 }
926 $res->{size} = $size;
927 }
928
929 $res->{type} = classify_mountpoint($res->{volume});
930
931 return $res;
932 };
933
934 sub parse_ct_rootfs {
935 my ($data, $noerr) = @_;
936
937 my $res = &$parse_ct_mountpoint_full($rootfs_desc, $data, $noerr);
938
939 $res->{mp} = '/' if defined($res);
940
941 return $res;
942 }
943
944 sub parse_ct_mountpoint {
945 my ($data, $noerr) = @_;
946
947 return &$parse_ct_mountpoint_full($mp_desc, $data, $noerr);
948 }
949
950 sub print_ct_mountpoint {
951 my ($info, $nomp) = @_;
952 my $skip = [ 'type' ];
953 push @$skip, 'mp' if $nomp;
954 return PVE::JSONSchema::print_property_string($info, $mp_desc, $skip);
955 }
956
957 sub print_lxc_network {
958 my $net = shift;
959 return PVE::JSONSchema::print_property_string($net, $netconf_desc);
960 }
961
962 sub parse_lxc_network {
963 my ($data) = @_;
964
965 my $res = {};
966
967 return $res if !$data;
968
969 $res = PVE::JSONSchema::parse_property_string($netconf_desc, $data);
970
971 $res->{type} = 'veth';
972 $res->{hwaddr} = PVE::Tools::random_ether_addr() if !$res->{hwaddr};
973
974 return $res;
975 }
976
977 sub read_cgroup_value {
978 my ($group, $vmid, $name, $full) = @_;
979
980 my $path = "/sys/fs/cgroup/$group/lxc/$vmid/$name";
981
982 return PVE::Tools::file_get_contents($path) if $full;
983
984 return PVE::Tools::file_read_firstline($path);
985 }
986
987 sub write_cgroup_value {
988 my ($group, $vmid, $name, $value) = @_;
989
990 my $path = "/sys/fs/cgroup/$group/lxc/$vmid/$name";
991 PVE::ProcFSTools::write_proc_entry($path, $value) if -e $path;
992
993 }
994
995 sub find_lxc_console_pids {
996
997 my $res = {};
998
999 PVE::Tools::dir_glob_foreach('/proc', '\d+', sub {
1000 my ($pid) = @_;
1001
1002 my $cmdline = PVE::Tools::file_read_firstline("/proc/$pid/cmdline");
1003 return if !$cmdline;
1004
1005 my @args = split(/\0/, $cmdline);
1006
1007 # search for lxc-console -n <vmid>
1008 return if scalar(@args) != 3;
1009 return if $args[1] ne '-n';
1010 return if $args[2] !~ m/^\d+$/;
1011 return if $args[0] !~ m|^(/usr/bin/)?lxc-console$|;
1012
1013 my $vmid = $args[2];
1014
1015 push @{$res->{$vmid}}, $pid;
1016 });
1017
1018 return $res;
1019 }
1020
1021 sub find_lxc_pid {
1022 my ($vmid) = @_;
1023
1024 my $pid = undef;
1025 my $parser = sub {
1026 my $line = shift;
1027 $pid = $1 if $line =~ m/^PID:\s+(\d+)$/;
1028 };
1029 PVE::Tools::run_command(['lxc-info', '-n', $vmid, '-p'], outfunc => $parser);
1030
1031 die "unable to get PID for CT $vmid (not running?)\n" if !$pid;
1032
1033 return $pid;
1034 }
1035
1036 # Note: we cannot use Net:IP, because that only allows strict
1037 # CIDR networks
1038 sub parse_ipv4_cidr {
1039 my ($cidr, $noerr) = @_;
1040
1041 if ($cidr =~ m!^($IPV4RE)(?:/(\d+))$! && ($2 > 7) && ($2 <= 32)) {
1042 return { address => $1, netmask => $PVE::Network::ipv4_reverse_mask->[$2] };
1043 }
1044
1045 return undef if $noerr;
1046
1047 die "unable to parse ipv4 address/mask\n";
1048 }
1049
1050 sub check_lock {
1051 my ($conf) = @_;
1052
1053 die "VM is locked ($conf->{'lock'})\n" if $conf->{'lock'};
1054 }
1055
1056 sub check_protection {
1057 my ($vm_conf, $err_msg) = @_;
1058
1059 if ($vm_conf->{protection}) {
1060 die "$err_msg - protection mode enabled\n";
1061 }
1062 }
1063
1064 sub update_lxc_config {
1065 my ($storage_cfg, $vmid, $conf) = @_;
1066
1067 my $dir = "/var/lib/lxc/$vmid";
1068
1069 if ($conf->{template}) {
1070
1071 unlink "$dir/config";
1072
1073 return;
1074 }
1075
1076 my $raw = '';
1077
1078 die "missing 'arch' - internal error" if !$conf->{arch};
1079 $raw .= "lxc.arch = $conf->{arch}\n";
1080
1081 my $unprivileged = $conf->{unprivileged};
1082 my $custom_idmap = grep { $_->[0] eq 'lxc.id_map' } @{$conf->{lxc}};
1083
1084 my $ostype = $conf->{ostype} || die "missing 'ostype' - internal error";
1085 if ($ostype =~ /^(?:debian | ubuntu | centos | fedora | opensuse | archlinux | alpine)$/x) {
1086 my $inc ="/usr/share/lxc/config/$ostype.common.conf";
1087 $inc ="/usr/share/lxc/config/common.conf" if !-f $inc;
1088 $raw .= "lxc.include = $inc\n";
1089 if ($unprivileged || $custom_idmap) {
1090 $inc = "/usr/share/lxc/config/$ostype.userns.conf";
1091 $inc = "/usr/share/lxc/config/userns.conf" if !-f $inc;
1092 $raw .= "lxc.include = $inc\n"
1093 }
1094 } else {
1095 die "implement me (ostype $ostype)";
1096 }
1097
1098 # WARNING: DO NOT REMOVE this without making sure that loop device nodes
1099 # cannot be exposed to the container with r/w access (cgroup perms).
1100 # When this is enabled mounts will still remain in the monitor's namespace
1101 # after the container unmounted them and thus will not detach from their
1102 # files while the container is running!
1103 $raw .= "lxc.monitor.unshare = 1\n";
1104
1105 # Should we read them from /etc/subuid?
1106 if ($unprivileged && !$custom_idmap) {
1107 $raw .= "lxc.id_map = u 0 100000 65536\n";
1108 $raw .= "lxc.id_map = g 0 100000 65536\n";
1109 }
1110
1111 if (!has_dev_console($conf)) {
1112 $raw .= "lxc.console = none\n";
1113 $raw .= "lxc.cgroup.devices.deny = c 5:1 rwm\n";
1114 }
1115
1116 my $ttycount = get_tty_count($conf);
1117 $raw .= "lxc.tty = $ttycount\n";
1118
1119 # some init scripts expect a linux terminal (turnkey).
1120 $raw .= "lxc.environment = TERM=linux\n";
1121
1122 my $utsname = $conf->{hostname} || "CT$vmid";
1123 $raw .= "lxc.utsname = $utsname\n";
1124
1125 my $memory = $conf->{memory} || 512;
1126 my $swap = $conf->{swap} // 0;
1127
1128 my $lxcmem = int($memory*1024*1024);
1129 $raw .= "lxc.cgroup.memory.limit_in_bytes = $lxcmem\n";
1130
1131 my $lxcswap = int(($memory + $swap)*1024*1024);
1132 $raw .= "lxc.cgroup.memory.memsw.limit_in_bytes = $lxcswap\n";
1133
1134 if (my $cpulimit = $conf->{cpulimit}) {
1135 $raw .= "lxc.cgroup.cpu.cfs_period_us = 100000\n";
1136 my $value = int(100000*$cpulimit);
1137 $raw .= "lxc.cgroup.cpu.cfs_quota_us = $value\n";
1138 }
1139
1140 my $shares = $conf->{cpuunits} || 1024;
1141 $raw .= "lxc.cgroup.cpu.shares = $shares\n";
1142
1143 my $mountpoint = parse_ct_rootfs($conf->{rootfs});
1144
1145 $raw .= "lxc.rootfs = $dir/rootfs\n";
1146
1147 my $netcount = 0;
1148 foreach my $k (keys %$conf) {
1149 next if $k !~ m/^net(\d+)$/;
1150 my $ind = $1;
1151 my $d = parse_lxc_network($conf->{$k});
1152 $netcount++;
1153 $raw .= "lxc.network.type = veth\n";
1154 $raw .= "lxc.network.veth.pair = veth${vmid}i${ind}\n";
1155 $raw .= "lxc.network.hwaddr = $d->{hwaddr}\n" if defined($d->{hwaddr});
1156 $raw .= "lxc.network.name = $d->{name}\n" if defined($d->{name});
1157 $raw .= "lxc.network.mtu = $d->{mtu}\n" if defined($d->{mtu});
1158 }
1159
1160 if (my $lxcconf = $conf->{lxc}) {
1161 foreach my $entry (@$lxcconf) {
1162 my ($k, $v) = @$entry;
1163 $netcount++ if $k eq 'lxc.network.type';
1164 $raw .= "$k = $v\n";
1165 }
1166 }
1167
1168 $raw .= "lxc.network.type = empty\n" if !$netcount;
1169
1170 File::Path::mkpath("$dir/rootfs");
1171
1172 PVE::Tools::file_set_contents("$dir/config", $raw);
1173 }
1174
1175 # verify and cleanup nameserver list (replace \0 with ' ')
1176 sub verify_nameserver_list {
1177 my ($nameserver_list) = @_;
1178
1179 my @list = ();
1180 foreach my $server (PVE::Tools::split_list($nameserver_list)) {
1181 PVE::JSONSchema::pve_verify_ip($server);
1182 push @list, $server;
1183 }
1184
1185 return join(' ', @list);
1186 }
1187
1188 sub verify_searchdomain_list {
1189 my ($searchdomain_list) = @_;
1190
1191 my @list = ();
1192 foreach my $server (PVE::Tools::split_list($searchdomain_list)) {
1193 # todo: should we add checks for valid dns domains?
1194 push @list, $server;
1195 }
1196
1197 return join(' ', @list);
1198 }
1199
1200 sub is_volume_in_use {
1201 my ($config, $volid) = @_;
1202 my $used = 0;
1203
1204 foreach_mountpoint($config, sub {
1205 my ($ms, $mountpoint) = @_;
1206 return if $used;
1207 if ($mountpoint->{type} eq 'volume' && $mountpoint->{volume} eq $volid) {
1208 $used = 1;
1209 }
1210 });
1211
1212 return $used;
1213 }
1214
1215 sub add_unused_volume {
1216 my ($config, $volid) = @_;
1217
1218 my $key;
1219 for (my $ind = $MAX_UNUSED_DISKS - 1; $ind >= 0; $ind--) {
1220 my $test = "unused$ind";
1221 if (my $vid = $config->{$test}) {
1222 return if $vid eq $volid; # do not add duplicates
1223 } else {
1224 $key = $test;
1225 }
1226 }
1227
1228 die "Too many unused volumes - please delete them first.\n" if !$key;
1229
1230 $config->{$key} = $volid;
1231
1232 return $key;
1233 }
1234
1235 sub update_pct_config {
1236 my ($vmid, $conf, $running, $param, $delete) = @_;
1237
1238 my @nohotplug;
1239
1240 my $new_disks = 0;
1241 my @deleted_volumes;
1242
1243 my $rootdir;
1244 if ($running) {
1245 my $pid = find_lxc_pid($vmid);
1246 $rootdir = "/proc/$pid/root";
1247 }
1248
1249 my $hotplug_error = sub {
1250 if ($running) {
1251 push @nohotplug, @_;
1252 return 1;
1253 } else {
1254 return 0;
1255 }
1256 };
1257
1258 if (defined($delete)) {
1259 foreach my $opt (@$delete) {
1260 if (!exists($conf->{$opt})) {
1261 warn "no such option: $opt\n";
1262 next;
1263 }
1264
1265 if ($opt eq 'hostname' || $opt eq 'memory' || $opt eq 'rootfs') {
1266 die "unable to delete required option '$opt'\n";
1267 } elsif ($opt eq 'swap') {
1268 delete $conf->{$opt};
1269 write_cgroup_value("memory", $vmid, "memory.memsw.limit_in_bytes", -1);
1270 } elsif ($opt eq 'description' || $opt eq 'onboot' || $opt eq 'startup') {
1271 delete $conf->{$opt};
1272 } elsif ($opt eq 'nameserver' || $opt eq 'searchdomain' ||
1273 $opt eq 'tty' || $opt eq 'console' || $opt eq 'cmode') {
1274 next if $hotplug_error->($opt);
1275 delete $conf->{$opt};
1276 } elsif ($opt =~ m/^net(\d)$/) {
1277 delete $conf->{$opt};
1278 next if !$running;
1279 my $netid = $1;
1280 PVE::Network::veth_delete("veth${vmid}i$netid");
1281 } elsif ($opt eq 'protection') {
1282 delete $conf->{$opt};
1283 } elsif ($opt =~ m/^unused(\d+)$/) {
1284 next if $hotplug_error->($opt);
1285 check_protection($conf, "can't remove CT $vmid drive '$opt'");
1286 push @deleted_volumes, $conf->{$opt};
1287 delete $conf->{$opt};
1288 } elsif ($opt =~ m/^mp(\d+)$/) {
1289 next if $hotplug_error->($opt);
1290 check_protection($conf, "can't remove CT $vmid drive '$opt'");
1291 my $mp = parse_ct_mountpoint($conf->{$opt});
1292 delete $conf->{$opt};
1293 if ($mp->{type} eq 'volume' && !is_volume_in_use($conf, $mp->{volume})) {
1294 add_unused_volume($conf, $mp->{volume});
1295 }
1296 } elsif ($opt eq 'unprivileged') {
1297 die "unable to delete read-only option: '$opt'\n";
1298 } else {
1299 die "implement me (delete: $opt)"
1300 }
1301 write_config($vmid, $conf) if $running;
1302 }
1303 }
1304
1305 # There's no separate swap size to configure, there's memory and "total"
1306 # memory (iow. memory+swap). This means we have to change them together.
1307 my $wanted_memory = PVE::Tools::extract_param($param, 'memory');
1308 my $wanted_swap = PVE::Tools::extract_param($param, 'swap');
1309 if (defined($wanted_memory) || defined($wanted_swap)) {
1310
1311 my $old_memory = ($conf->{memory} || 512);
1312 my $old_swap = ($conf->{swap} || 0);
1313
1314 $wanted_memory //= $old_memory;
1315 $wanted_swap //= $old_swap;
1316
1317 my $total = $wanted_memory + $wanted_swap;
1318 if ($running) {
1319 my $old_total = $old_memory + $old_swap;
1320 if ($total > $old_total) {
1321 write_cgroup_value("memory", $vmid, "memory.memsw.limit_in_bytes", int($total*1024*1024));
1322 write_cgroup_value("memory", $vmid, "memory.limit_in_bytes", int($wanted_memory*1024*1024));
1323 } else {
1324 write_cgroup_value("memory", $vmid, "memory.limit_in_bytes", int($wanted_memory*1024*1024));
1325 write_cgroup_value("memory", $vmid, "memory.memsw.limit_in_bytes", int($total*1024*1024));
1326 }
1327 }
1328 $conf->{memory} = $wanted_memory;
1329 $conf->{swap} = $wanted_swap;
1330
1331 write_config($vmid, $conf) if $running;
1332 }
1333
1334 foreach my $opt (keys %$param) {
1335 my $value = $param->{$opt};
1336 if ($opt eq 'hostname') {
1337 $conf->{$opt} = $value;
1338 } elsif ($opt eq 'onboot') {
1339 $conf->{$opt} = $value ? 1 : 0;
1340 } elsif ($opt eq 'startup') {
1341 $conf->{$opt} = $value;
1342 } elsif ($opt eq 'tty' || $opt eq 'console' || $opt eq 'cmode') {
1343 next if $hotplug_error->($opt);
1344 $conf->{$opt} = $value;
1345 } elsif ($opt eq 'nameserver') {
1346 next if $hotplug_error->($opt);
1347 my $list = verify_nameserver_list($value);
1348 $conf->{$opt} = $list;
1349 } elsif ($opt eq 'searchdomain') {
1350 next if $hotplug_error->($opt);
1351 my $list = verify_searchdomain_list($value);
1352 $conf->{$opt} = $list;
1353 } elsif ($opt eq 'cpulimit') {
1354 next if $hotplug_error->($opt); # FIXME: hotplug
1355 $conf->{$opt} = $value;
1356 } elsif ($opt eq 'cpuunits') {
1357 $conf->{$opt} = $value;
1358 write_cgroup_value("cpu", $vmid, "cpu.shares", $value);
1359 } elsif ($opt eq 'description') {
1360 $conf->{$opt} = PVE::Tools::encode_text($value);
1361 } elsif ($opt =~ m/^net(\d+)$/) {
1362 my $netid = $1;
1363 my $net = parse_lxc_network($value);
1364 if (!$running) {
1365 $conf->{$opt} = print_lxc_network($net);
1366 } else {
1367 update_net($vmid, $conf, $opt, $net, $netid, $rootdir);
1368 }
1369 } elsif ($opt eq 'protection') {
1370 $conf->{$opt} = $value ? 1 : 0;
1371 } elsif ($opt =~ m/^mp(\d+)$/) {
1372 next if $hotplug_error->($opt);
1373 check_protection($conf, "can't update CT $vmid drive '$opt'");
1374 my $old = $conf->{$opt};
1375 $conf->{$opt} = $value;
1376 if (defined($old)) {
1377 my $mp = parse_ct_mountpoint($old);
1378 if ($mp->{type} eq 'volume' && !is_volume_in_use($conf, $mp->{volume})) {
1379 add_unused_volume($conf, $mp->{volume});
1380 }
1381 }
1382 $new_disks = 1;
1383 } elsif ($opt eq 'rootfs') {
1384 next if $hotplug_error->($opt);
1385 check_protection($conf, "can't update CT $vmid drive '$opt'");
1386 my $old = $conf->{$opt};
1387 $conf->{$opt} = $value;
1388 if (defined($old)) {
1389 my $mp = parse_ct_rootfs($old);
1390 if ($mp->{type} eq 'volume' && !is_volume_in_use($conf, $mp->{volume})) {
1391 add_unused_volume($conf, $mp->{volume});
1392 }
1393 }
1394 } elsif ($opt eq 'unprivileged') {
1395 die "unable to modify read-only option: '$opt'\n";
1396 } else {
1397 die "implement me: $opt";
1398 }
1399 write_config($vmid, $conf) if $running;
1400 }
1401
1402 if (@deleted_volumes) {
1403 my $storage_cfg = PVE::Storage::config();
1404 foreach my $volume (@deleted_volumes) {
1405 delete_mountpoint_volume($storage_cfg, $vmid, $volume);
1406 }
1407 }
1408
1409 if ($new_disks) {
1410 my $storage_cfg = PVE::Storage::config();
1411 create_disks($storage_cfg, $vmid, $conf, $conf);
1412 }
1413
1414 # This should be the last thing we do here
1415 if ($running && scalar(@nohotplug)) {
1416 die "unable to modify " . join(',', @nohotplug) . " while container is running\n";
1417 }
1418 }
1419
1420 sub has_dev_console {
1421 my ($conf) = @_;
1422
1423 return !(defined($conf->{console}) && !$conf->{console});
1424 }
1425
1426 sub get_tty_count {
1427 my ($conf) = @_;
1428
1429 return $conf->{tty} // $confdesc->{tty}->{default};
1430 }
1431
1432 sub get_cmode {
1433 my ($conf) = @_;
1434
1435 return $conf->{cmode} // $confdesc->{cmode}->{default};
1436 }
1437
1438 sub get_console_command {
1439 my ($vmid, $conf) = @_;
1440
1441 my $cmode = get_cmode($conf);
1442
1443 if ($cmode eq 'console') {
1444 return ['lxc-console', '-n', $vmid, '-t', 0];
1445 } elsif ($cmode eq 'tty') {
1446 return ['lxc-console', '-n', $vmid];
1447 } elsif ($cmode eq 'shell') {
1448 return ['lxc-attach', '--clear-env', '-n', $vmid];
1449 } else {
1450 die "internal error";
1451 }
1452 }
1453
1454 sub get_primary_ips {
1455 my ($conf) = @_;
1456
1457 # return data from net0
1458
1459 return undef if !defined($conf->{net0});
1460 my $net = parse_lxc_network($conf->{net0});
1461
1462 my $ipv4 = $net->{ip};
1463 if ($ipv4) {
1464 if ($ipv4 =~ /^(dhcp|manual)$/) {
1465 $ipv4 = undef
1466 } else {
1467 $ipv4 =~ s!/\d+$!!;
1468 }
1469 }
1470 my $ipv6 = $net->{ip6};
1471 if ($ipv6) {
1472 if ($ipv6 =~ /^(auto|dhcp|manual)$/) {
1473 $ipv6 = undef;
1474 } else {
1475 $ipv6 =~ s!/\d+$!!;
1476 }
1477 }
1478
1479 return ($ipv4, $ipv6);
1480 }
1481
1482 sub delete_mountpoint_volume {
1483 my ($storage_cfg, $vmid, $volume) = @_;
1484
1485 return if classify_mountpoint($volume) ne 'volume';
1486
1487 my ($vtype, $name, $owner) = PVE::Storage::parse_volname($storage_cfg, $volume);
1488 PVE::Storage::vdisk_free($storage_cfg, $volume) if $vmid == $owner;
1489 }
1490
1491 sub destroy_lxc_container {
1492 my ($storage_cfg, $vmid, $conf) = @_;
1493
1494 foreach_mountpoint($conf, sub {
1495 my ($ms, $mountpoint) = @_;
1496 delete_mountpoint_volume($storage_cfg, $vmid, $mountpoint->{volume});
1497 });
1498
1499 rmdir "/var/lib/lxc/$vmid/rootfs";
1500 unlink "/var/lib/lxc/$vmid/config";
1501 rmdir "/var/lib/lxc/$vmid";
1502 destroy_config($vmid);
1503
1504 #my $cmd = ['lxc-destroy', '-n', $vmid ];
1505 #PVE::Tools::run_command($cmd);
1506 }
1507
1508 sub vm_stop_cleanup {
1509 my ($storage_cfg, $vmid, $conf, $keepActive) = @_;
1510
1511 eval {
1512 if (!$keepActive) {
1513
1514 my $vollist = get_vm_volumes($conf);
1515 PVE::Storage::deactivate_volumes($storage_cfg, $vollist);
1516 }
1517 };
1518 warn $@ if $@; # avoid errors - just warn
1519 }
1520
1521 my $safe_num_ne = sub {
1522 my ($a, $b) = @_;
1523
1524 return 0 if !defined($a) && !defined($b);
1525 return 1 if !defined($a);
1526 return 1 if !defined($b);
1527
1528 return $a != $b;
1529 };
1530
1531 my $safe_string_ne = sub {
1532 my ($a, $b) = @_;
1533
1534 return 0 if !defined($a) && !defined($b);
1535 return 1 if !defined($a);
1536 return 1 if !defined($b);
1537
1538 return $a ne $b;
1539 };
1540
1541 sub update_net {
1542 my ($vmid, $conf, $opt, $newnet, $netid, $rootdir) = @_;
1543
1544 if ($newnet->{type} ne 'veth') {
1545 # for when there are physical interfaces
1546 die "cannot update interface of type $newnet->{type}";
1547 }
1548
1549 my $veth = "veth${vmid}i${netid}";
1550 my $eth = $newnet->{name};
1551
1552 if (my $oldnetcfg = $conf->{$opt}) {
1553 my $oldnet = parse_lxc_network($oldnetcfg);
1554
1555 if (&$safe_string_ne($oldnet->{hwaddr}, $newnet->{hwaddr}) ||
1556 &$safe_string_ne($oldnet->{name}, $newnet->{name})) {
1557
1558 PVE::Network::veth_delete($veth);
1559 delete $conf->{$opt};
1560 write_config($vmid, $conf);
1561
1562 hotplug_net($vmid, $conf, $opt, $newnet, $netid);
1563
1564 } elsif (&$safe_string_ne($oldnet->{bridge}, $newnet->{bridge}) ||
1565 &$safe_num_ne($oldnet->{tag}, $newnet->{tag}) ||
1566 &$safe_num_ne($oldnet->{firewall}, $newnet->{firewall})) {
1567
1568 if ($oldnet->{bridge}) {
1569 PVE::Network::tap_unplug($veth);
1570 foreach (qw(bridge tag firewall)) {
1571 delete $oldnet->{$_};
1572 }
1573 $conf->{$opt} = print_lxc_network($oldnet);
1574 write_config($vmid, $conf);
1575 }
1576
1577 PVE::Network::tap_plug($veth, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks});
1578 foreach (qw(bridge tag firewall)) {
1579 $oldnet->{$_} = $newnet->{$_} if $newnet->{$_};
1580 }
1581 $conf->{$opt} = print_lxc_network($oldnet);
1582 write_config($vmid, $conf);
1583 }
1584 } else {
1585 hotplug_net($vmid, $conf, $opt, $newnet, $netid);
1586 }
1587
1588 update_ipconfig($vmid, $conf, $opt, $eth, $newnet, $rootdir);
1589 }
1590
1591 sub hotplug_net {
1592 my ($vmid, $conf, $opt, $newnet, $netid) = @_;
1593
1594 my $veth = "veth${vmid}i${netid}";
1595 my $vethpeer = $veth . "p";
1596 my $eth = $newnet->{name};
1597
1598 PVE::Network::veth_create($veth, $vethpeer, $newnet->{bridge}, $newnet->{hwaddr});
1599 PVE::Network::tap_plug($veth, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks});
1600
1601 # attach peer in container
1602 my $cmd = ['lxc-device', '-n', $vmid, 'add', $vethpeer, "$eth" ];
1603 PVE::Tools::run_command($cmd);
1604
1605 # link up peer in container
1606 $cmd = ['lxc-attach', '-n', $vmid, '-s', 'NETWORK', '--', '/sbin/ip', 'link', 'set', $eth ,'up' ];
1607 PVE::Tools::run_command($cmd);
1608
1609 my $done = { type => 'veth' };
1610 foreach (qw(bridge tag firewall hwaddr name)) {
1611 $done->{$_} = $newnet->{$_} if $newnet->{$_};
1612 }
1613 $conf->{$opt} = print_lxc_network($done);
1614
1615 write_config($vmid, $conf);
1616 }
1617
1618 sub update_ipconfig {
1619 my ($vmid, $conf, $opt, $eth, $newnet, $rootdir) = @_;
1620
1621 my $lxc_setup = PVE::LXC::Setup->new($conf, $rootdir);
1622
1623 my $optdata = parse_lxc_network($conf->{$opt});
1624 my $deleted = [];
1625 my $added = [];
1626 my $nscmd = sub {
1627 my $cmdargs = shift;
1628 PVE::Tools::run_command(['lxc-attach', '-n', $vmid, '-s', 'NETWORK', '--', @_], %$cmdargs);
1629 };
1630 my $ipcmd = sub { &$nscmd({}, '/sbin/ip', @_) };
1631
1632 my $change_ip_config = sub {
1633 my ($ipversion) = @_;
1634
1635 my $family_opt = "-$ipversion";
1636 my $suffix = $ipversion == 4 ? '' : $ipversion;
1637 my $gw= "gw$suffix";
1638 my $ip= "ip$suffix";
1639
1640 my $newip = $newnet->{$ip};
1641 my $newgw = $newnet->{$gw};
1642 my $oldip = $optdata->{$ip};
1643
1644 my $change_ip = &$safe_string_ne($oldip, $newip);
1645 my $change_gw = &$safe_string_ne($optdata->{$gw}, $newgw);
1646
1647 return if !$change_ip && !$change_gw;
1648
1649 # step 1: add new IP, if this fails we cancel
1650 my $is_real_ip = ($newip && $newip !~ /^(?:auto|dhcp|manual)$/);
1651 if ($change_ip && $is_real_ip) {
1652 eval { &$ipcmd($family_opt, 'addr', 'add', $newip, 'dev', $eth); };
1653 if (my $err = $@) {
1654 warn $err;
1655 return;
1656 }
1657 }
1658
1659 # step 2: replace gateway
1660 # If this fails we delete the added IP and cancel.
1661 # If it succeeds we save the config and delete the old IP, ignoring
1662 # errors. The config is then saved.
1663 # Note: 'ip route replace' can add
1664 if ($change_gw) {
1665 if ($newgw) {
1666 eval {
1667 if ($is_real_ip && !PVE::Network::is_ip_in_cidr($newgw, $newip, $ipversion)) {
1668 &$ipcmd($family_opt, 'route', 'add', $newgw, 'dev', $eth);
1669 }
1670 &$ipcmd($family_opt, 'route', 'replace', 'default', 'via', $newgw);
1671 };
1672 if (my $err = $@) {
1673 warn $err;
1674 # the route was not replaced, the old IP is still available
1675 # rollback (delete new IP) and cancel
1676 if ($change_ip) {
1677 eval { &$ipcmd($family_opt, 'addr', 'del', $newip, 'dev', $eth); };
1678 warn $@ if $@; # no need to die here
1679 }
1680 return;
1681 }
1682 } else {
1683 eval { &$ipcmd($family_opt, 'route', 'del', 'default'); };
1684 # if the route was not deleted, the guest might have deleted it manually
1685 # warn and continue
1686 warn $@ if $@;
1687 }
1688 }
1689
1690 # from this point on we save the configuration
1691 # step 3: delete old IP ignoring errors
1692 if ($change_ip && $oldip && $oldip !~ /^(?:auto|dhcp)$/) {
1693 # We need to enable promote_secondaries, otherwise our newly added
1694 # address will be removed along with the old one.
1695 my $promote = 0;
1696 eval {
1697 if ($ipversion == 4) {
1698 &$nscmd({ outfunc => sub { $promote = int(shift) } },
1699 'cat', "/proc/sys/net/ipv4/conf/$eth/promote_secondaries");
1700 &$nscmd({}, 'sysctl', "net.ipv4.conf.$eth.promote_secondaries=1");
1701 }
1702 &$ipcmd($family_opt, 'addr', 'del', $oldip, 'dev', $eth);
1703 };
1704 warn $@ if $@; # no need to die here
1705
1706 if ($ipversion == 4) {
1707 &$nscmd({}, 'sysctl', "net.ipv4.conf.$eth.promote_secondaries=$promote");
1708 }
1709 }
1710
1711 foreach my $property ($ip, $gw) {
1712 if ($newnet->{$property}) {
1713 $optdata->{$property} = $newnet->{$property};
1714 } else {
1715 delete $optdata->{$property};
1716 }
1717 }
1718 $conf->{$opt} = print_lxc_network($optdata);
1719 write_config($vmid, $conf);
1720 $lxc_setup->setup_network($conf);
1721 };
1722
1723 &$change_ip_config(4);
1724 &$change_ip_config(6);
1725
1726 }
1727
1728 # Internal snapshots
1729
1730 # NOTE: Snapshot create/delete involves several non-atomic
1731 # actions, and can take a long time.
1732 # So we try to avoid locking the file and use the 'lock' variable
1733 # inside the config file instead.
1734
1735 my $snapshot_copy_config = sub {
1736 my ($source, $dest) = @_;
1737
1738 foreach my $k (keys %$source) {
1739 next if $k eq 'snapshots';
1740 next if $k eq 'snapstate';
1741 next if $k eq 'snaptime';
1742 next if $k eq 'vmstate';
1743 next if $k eq 'lock';
1744 next if $k eq 'digest';
1745 next if $k eq 'description';
1746 next if $k =~ m/^unused\d+$/;
1747
1748 $dest->{$k} = $source->{$k};
1749 }
1750 };
1751
1752 my $snapshot_apply_config = sub {
1753 my ($conf, $snap) = @_;
1754
1755 # copy snapshot list
1756 my $newconf = {
1757 snapshots => $conf->{snapshots},
1758 };
1759
1760 # keep description and list of unused disks
1761 foreach my $k (keys %$conf) {
1762 next if !($k =~ m/^unused\d+$/ || $k eq 'description');
1763 $newconf->{$k} = $conf->{$k};
1764 }
1765
1766 &$snapshot_copy_config($snap, $newconf);
1767
1768 return $newconf;
1769 };
1770
1771 my $snapshot_save_vmstate = sub {
1772 die "implement me - snapshot_save_vmstate\n";
1773 };
1774
1775 sub snapshot_prepare {
1776 my ($vmid, $snapname, $save_vmstate, $comment) = @_;
1777
1778 my $snap;
1779
1780 my $updatefn = sub {
1781
1782 my $conf = load_config($vmid);
1783
1784 die "you can't take a snapshot if it's a template\n"
1785 if is_template($conf);
1786
1787 check_lock($conf);
1788
1789 $conf->{lock} = 'snapshot';
1790
1791 die "snapshot name '$snapname' already used\n"
1792 if defined($conf->{snapshots}->{$snapname});
1793
1794 my $storecfg = PVE::Storage::config();
1795
1796 # workaround until mp snapshots are implemented
1797 my $feature = $snapname eq 'vzdump' ? 'vzdump' : 'snapshot';
1798 die "snapshot feature is not available\n" if !has_feature($feature, $conf, $storecfg);
1799
1800 $snap = $conf->{snapshots}->{$snapname} = {};
1801
1802 if ($save_vmstate && check_running($vmid)) {
1803 &$snapshot_save_vmstate($vmid, $conf, $snapname, $storecfg);
1804 }
1805
1806 &$snapshot_copy_config($conf, $snap);
1807
1808 $snap->{snapstate} = "prepare";
1809 $snap->{snaptime} = time();
1810 $snap->{description} = $comment if $comment;
1811
1812 write_config($vmid, $conf);
1813 };
1814
1815 lock_config($vmid, $updatefn);
1816
1817 return $snap;
1818 }
1819
1820 sub snapshot_commit {
1821 my ($vmid, $snapname) = @_;
1822
1823 my $updatefn = sub {
1824
1825 my $conf = load_config($vmid);
1826
1827 die "missing snapshot lock\n"
1828 if !($conf->{lock} && $conf->{lock} eq 'snapshot');
1829
1830 my $snap = $conf->{snapshots}->{$snapname};
1831 die "snapshot '$snapname' does not exist\n" if !defined($snap);
1832
1833 die "wrong snapshot state\n"
1834 if !($snap->{snapstate} && $snap->{snapstate} eq "prepare");
1835
1836 delete $snap->{snapstate};
1837 delete $conf->{lock};
1838
1839 my $newconf = &$snapshot_apply_config($conf, $snap);
1840
1841 $newconf->{parent} = $snapname;
1842
1843 write_config($vmid, $newconf);
1844 };
1845
1846 lock_config($vmid, $updatefn);
1847 }
1848
1849 sub has_feature {
1850 my ($feature, $conf, $storecfg, $snapname) = @_;
1851
1852 my $err;
1853 my $vzdump = $feature eq 'vzdump';
1854 $feature = 'snapshot' if $vzdump;
1855
1856 foreach_mountpoint($conf, sub {
1857 my ($ms, $mountpoint) = @_;
1858
1859 return if $err; # skip further test
1860 return if $vzdump && $ms ne 'rootfs' && !$mountpoint->{backup};
1861
1862 $err = 1 if !PVE::Storage::volume_has_feature($storecfg, $feature, $mountpoint->{volume}, $snapname);
1863
1864 # TODO: implement support for mountpoints
1865 die "unable to handle mountpoint '$ms' - feature not implemented\n"
1866 if $ms ne 'rootfs';
1867 });
1868
1869 return $err ? 0 : 1;
1870 }
1871
1872 my $enter_namespace = sub {
1873 my ($vmid, $pid, $which, $type) = @_;
1874 sysopen my $fd, "/proc/$pid/ns/$which", O_RDONLY
1875 or die "failed to open $which namespace of container $vmid: $!\n";
1876 PVE::Tools::setns(fileno($fd), $type)
1877 or die "failed to enter $which namespace of container $vmid: $!\n";
1878 close $fd;
1879 };
1880
1881 my $do_syncfs = sub {
1882 my ($vmid, $pid, $socket) = @_;
1883
1884 &$enter_namespace($vmid, $pid, 'mnt', PVE::Tools::CLONE_NEWNS);
1885
1886 # Tell the parent process to start reading our /proc/mounts
1887 print {$socket} "go\n";
1888 $socket->flush();
1889
1890 # Receive /proc/self/mounts
1891 my $mountdata = do { local $/ = undef; <$socket> };
1892 close $socket;
1893
1894 # Now sync all mountpoints...
1895 my $mounts = PVE::ProcFSTools::parse_mounts($mountdata);
1896 foreach my $mp (@$mounts) {
1897 my ($what, $dir, $fs) = @$mp;
1898 next if $fs eq 'fuse.lxcfs';
1899 eval { PVE::Tools::sync_mountpoint($dir); };
1900 warn $@ if $@;
1901 }
1902 };
1903
1904 sub sync_container_namespace {
1905 my ($vmid) = @_;
1906 my $pid = find_lxc_pid($vmid);
1907
1908 # SOCK_DGRAM is nicer for barriers but cannot be slurped
1909 socketpair my $pfd, my $cfd, AF_UNIX, SOCK_STREAM, PF_UNSPEC
1910 or die "failed to create socketpair: $!\n";
1911
1912 my $child = fork();
1913 die "fork failed: $!\n" if !defined($child);
1914
1915 if (!$child) {
1916 eval {
1917 close $pfd;
1918 &$do_syncfs($vmid, $pid, $cfd);
1919 };
1920 if (my $err = $@) {
1921 warn $err;
1922 POSIX::_exit(1);
1923 }
1924 POSIX::_exit(0);
1925 }
1926 close $cfd;
1927 my $go = <$pfd>;
1928 die "failed to enter container namespace\n" if $go ne "go\n";
1929
1930 open my $mounts, '<', "/proc/$child/mounts"
1931 or die "failed to open container's /proc/mounts: $!\n";
1932 my $mountdata = do { local $/ = undef; <$mounts> };
1933 close $mounts;
1934 print {$pfd} $mountdata;
1935 close $pfd;
1936
1937 while (waitpid($child, 0) != $child) {}
1938 die "failed to sync container namespace\n" if $? != 0;
1939 }
1940
1941 sub snapshot_create {
1942 my ($vmid, $snapname, $save_vmstate, $comment) = @_;
1943
1944 my $snap = snapshot_prepare($vmid, $snapname, $save_vmstate, $comment);
1945
1946 my $conf = load_config($vmid);
1947
1948 my $running = check_running($vmid);
1949
1950 my $unfreeze = 0;
1951
1952 my $drivehash = {};
1953
1954 eval {
1955 if ($running) {
1956 $unfreeze = 1;
1957 PVE::Tools::run_command(['/usr/bin/lxc-freeze', '-n', $vmid]);
1958 sync_container_namespace($vmid);
1959 };
1960
1961 my $storecfg = PVE::Storage::config();
1962 my $rootinfo = parse_ct_rootfs($conf->{rootfs});
1963 my $volid = $rootinfo->{volume};
1964
1965 PVE::Storage::volume_snapshot($storecfg, $volid, $snapname);
1966 $drivehash->{rootfs} = 1;
1967 };
1968 my $err = $@;
1969
1970 if ($unfreeze) {
1971 eval { PVE::Tools::run_command(['/usr/bin/lxc-unfreeze', '-n', $vmid]); };
1972 warn $@ if $@;
1973 }
1974
1975 if ($err) {
1976 eval { snapshot_delete($vmid, $snapname, 1, $drivehash); };
1977 warn "$@\n" if $@;
1978 die "$err\n";
1979 }
1980
1981 snapshot_commit($vmid, $snapname);
1982 }
1983
1984 # Note: $drivehash is only set when called from snapshot_create.
1985 sub snapshot_delete {
1986 my ($vmid, $snapname, $force, $drivehash) = @_;
1987
1988 my $snap;
1989
1990 my $conf;
1991
1992 my $updatefn = sub {
1993
1994 $conf = load_config($vmid);
1995
1996 die "you can't delete a snapshot if vm is a template\n"
1997 if is_template($conf);
1998
1999 $snap = $conf->{snapshots}->{$snapname};
2000
2001 if (!$drivehash) {
2002 check_lock($conf);
2003 }
2004
2005 die "snapshot '$snapname' does not exist\n" if !defined($snap);
2006
2007 $snap->{snapstate} = 'delete';
2008
2009 write_config($vmid, $conf);
2010 };
2011
2012 lock_config($vmid, $updatefn);
2013
2014 my $storecfg = PVE::Storage::config();
2015
2016 my $unlink_parent = sub {
2017
2018 my ($confref, $new_parent) = @_;
2019
2020 if ($confref->{parent} && $confref->{parent} eq $snapname) {
2021 if ($new_parent) {
2022 $confref->{parent} = $new_parent;
2023 } else {
2024 delete $confref->{parent};
2025 }
2026 }
2027 };
2028
2029 my $del_snap = sub {
2030
2031 $conf = load_config($vmid);
2032
2033 if ($drivehash) {
2034 delete $conf->{lock};
2035 } else {
2036 check_lock($conf);
2037 }
2038
2039 my $parent = $conf->{snapshots}->{$snapname}->{parent};
2040 foreach my $snapkey (keys %{$conf->{snapshots}}) {
2041 &$unlink_parent($conf->{snapshots}->{$snapkey}, $parent);
2042 }
2043
2044 &$unlink_parent($conf, $parent);
2045
2046 delete $conf->{snapshots}->{$snapname};
2047
2048 write_config($vmid, $conf);
2049 };
2050
2051 my $rootfs = $conf->{snapshots}->{$snapname}->{rootfs};
2052 my $rootinfo = parse_ct_rootfs($rootfs);
2053 my $volid = $rootinfo->{volume};
2054
2055 eval {
2056 PVE::Storage::volume_snapshot_delete($storecfg, $volid, $snapname);
2057 };
2058 my $err = $@;
2059
2060 if(!$err || ($err && $force)) {
2061 lock_config($vmid, $del_snap);
2062 if ($err) {
2063 die "Can't delete snapshot: $vmid $snapname $err\n";
2064 }
2065 }
2066 }
2067
2068 sub snapshot_rollback {
2069 my ($vmid, $snapname) = @_;
2070
2071 my $storecfg = PVE::Storage::config();
2072
2073 my $conf = load_config($vmid);
2074
2075 die "you can't rollback if vm is a template\n" if is_template($conf);
2076
2077 my $snap = $conf->{snapshots}->{$snapname};
2078
2079 die "snapshot '$snapname' does not exist\n" if !defined($snap);
2080
2081 my $rootfs = $snap->{rootfs};
2082 my $rootinfo = parse_ct_rootfs($rootfs);
2083 my $volid = $rootinfo->{volume};
2084
2085 PVE::Storage::volume_rollback_is_possible($storecfg, $volid, $snapname);
2086
2087 my $updatefn = sub {
2088
2089 die "unable to rollback to incomplete snapshot (snapstate = $snap->{snapstate})\n"
2090 if $snap->{snapstate};
2091
2092 check_lock($conf);
2093
2094 system("lxc-stop -n $vmid --kill") if check_running($vmid);
2095
2096 die "unable to rollback vm $vmid: vm is running\n"
2097 if check_running($vmid);
2098
2099 $conf->{lock} = 'rollback';
2100
2101 my $forcemachine;
2102
2103 # copy snapshot config to current config
2104
2105 my $tmp_conf = $conf;
2106 &$snapshot_copy_config($tmp_conf->{snapshots}->{$snapname}, $conf);
2107 $conf->{snapshots} = $tmp_conf->{snapshots};
2108 delete $conf->{snaptime};
2109 delete $conf->{snapname};
2110 $conf->{parent} = $snapname;
2111
2112 write_config($vmid, $conf);
2113 };
2114
2115 my $unlockfn = sub {
2116 delete $conf->{lock};
2117 write_config($vmid, $conf);
2118 };
2119
2120 lock_config($vmid, $updatefn);
2121
2122 PVE::Storage::volume_snapshot_rollback($storecfg, $volid, $snapname);
2123
2124 lock_config($vmid, $unlockfn);
2125 }
2126
2127 sub template_create {
2128 my ($vmid, $conf) = @_;
2129
2130 my $storecfg = PVE::Storage::config();
2131
2132 my $rootinfo = parse_ct_rootfs($conf->{rootfs});
2133 my $volid = $rootinfo->{volume};
2134
2135 die "Template feature is not available for '$volid'\n"
2136 if !PVE::Storage::volume_has_feature($storecfg, 'template', $volid);
2137
2138 PVE::Storage::activate_volumes($storecfg, [$volid]);
2139
2140 my $template_volid = PVE::Storage::vdisk_create_base($storecfg, $volid);
2141 $rootinfo->{volume} = $template_volid;
2142 $conf->{rootfs} = print_ct_mountpoint($rootinfo, 1);
2143
2144 write_config($vmid, $conf);
2145 }
2146
2147 sub is_template {
2148 my ($conf) = @_;
2149
2150 return 1 if defined $conf->{template} && $conf->{template} == 1;
2151 }
2152
2153 sub mountpoint_names {
2154 my ($reverse) = @_;
2155
2156 my @names = ('rootfs');
2157
2158 for (my $i = 0; $i < $MAX_MOUNT_POINTS; $i++) {
2159 push @names, "mp$i";
2160 }
2161
2162 return $reverse ? reverse @names : @names;
2163 }
2164
2165
2166 sub foreach_mountpoint_full {
2167 my ($conf, $reverse, $func) = @_;
2168
2169 foreach my $key (mountpoint_names($reverse)) {
2170 my $value = $conf->{$key};
2171 next if !defined($value);
2172 my $mountpoint = $key eq 'rootfs' ? parse_ct_rootfs($value, 1) : parse_ct_mountpoint($value, 1);
2173 next if !defined($mountpoint);
2174
2175 &$func($key, $mountpoint);
2176 }
2177 }
2178
2179 sub foreach_mountpoint {
2180 my ($conf, $func) = @_;
2181
2182 foreach_mountpoint_full($conf, 0, $func);
2183 }
2184
2185 sub foreach_mountpoint_reverse {
2186 my ($conf, $func) = @_;
2187
2188 foreach_mountpoint_full($conf, 1, $func);
2189 }
2190
2191 sub check_ct_modify_config_perm {
2192 my ($rpcenv, $authuser, $vmid, $pool, $key_list) = @_;
2193
2194 return 1 if $authuser ne 'root@pam';
2195
2196 foreach my $opt (@$key_list) {
2197
2198 if ($opt eq 'cpus' || $opt eq 'cpuunits' || $opt eq 'cpulimit') {
2199 $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.CPU']);
2200 } elsif ($opt eq 'rootfs' || $opt =~ /^mp\d+$/) {
2201 $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Disk']);
2202 } elsif ($opt eq 'memory' || $opt eq 'swap') {
2203 $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Memory']);
2204 } elsif ($opt =~ m/^net\d+$/ || $opt eq 'nameserver' ||
2205 $opt eq 'searchdomain' || $opt eq 'hostname') {
2206 $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Network']);
2207 } else {
2208 $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Options']);
2209 }
2210 }
2211
2212 return 1;
2213 }
2214
2215 sub umount_all {
2216 my ($vmid, $storage_cfg, $conf, $noerr) = @_;
2217
2218 my $rootdir = "/var/lib/lxc/$vmid/rootfs";
2219 my $volid_list = get_vm_volumes($conf);
2220
2221 foreach_mountpoint_reverse($conf, sub {
2222 my ($ms, $mountpoint) = @_;
2223
2224 my $volid = $mountpoint->{volume};
2225 my $mount = $mountpoint->{mp};
2226
2227 return if !$volid || !$mount;
2228
2229 my $mount_path = "$rootdir/$mount";
2230 $mount_path =~ s!/+!/!g;
2231
2232 return if !PVE::ProcFSTools::is_mounted($mount_path);
2233
2234 eval {
2235 PVE::Tools::run_command(['umount', '-d', $mount_path]);
2236 };
2237 if (my $err = $@) {
2238 if ($noerr) {
2239 warn $err;
2240 } else {
2241 die $err;
2242 }
2243 }
2244 });
2245 }
2246
2247 sub mount_all {
2248 my ($vmid, $storage_cfg, $conf) = @_;
2249
2250 my $rootdir = "/var/lib/lxc/$vmid/rootfs";
2251 File::Path::make_path($rootdir);
2252
2253 my $volid_list = get_vm_volumes($conf);
2254 PVE::Storage::activate_volumes($storage_cfg, $volid_list);
2255
2256 eval {
2257 foreach_mountpoint($conf, sub {
2258 my ($ms, $mountpoint) = @_;
2259
2260 mountpoint_mount($mountpoint, $rootdir, $storage_cfg);
2261 });
2262 };
2263 if (my $err = $@) {
2264 warn "mounting container failed\n";
2265 umount_all($vmid, $storage_cfg, $conf, 1);
2266 die $err;
2267 }
2268
2269 return $rootdir;
2270 }
2271
2272
2273 sub mountpoint_mount_path {
2274 my ($mountpoint, $storage_cfg, $snapname) = @_;
2275
2276 return mountpoint_mount($mountpoint, undef, $storage_cfg, $snapname);
2277 }
2278
2279 my $check_mount_path = sub {
2280 my ($path) = @_;
2281 $path = File::Spec->canonpath($path);
2282 my $real = Cwd::realpath($path);
2283 if ($real ne $path) {
2284 die "mount path modified by symlink: $path != $real";
2285 }
2286 };
2287
2288 sub query_loopdev {
2289 my ($path) = @_;
2290 my $found;
2291 my $parser = sub {
2292 my $line = shift;
2293 if ($line =~ m@^(/dev/loop\d+):@) {
2294 $found = $1;
2295 }
2296 };
2297 my $cmd = ['losetup', '--associated', $path];
2298 PVE::Tools::run_command($cmd, outfunc => $parser);
2299 return $found;
2300 }
2301
2302 # Run a function with a file attached to a loop device.
2303 # The loop device is always detached afterwards (or set to autoclear).
2304 # Returns the loop device.
2305 sub run_with_loopdev {
2306 my ($func, $file) = @_;
2307 my $device;
2308 my $parser = sub {
2309 my $line = shift;
2310 if ($line =~ m@^(/dev/loop\d+)$@) {
2311 $device = $1;
2312 }
2313 };
2314 PVE::Tools::run_command(['losetup', '--show', '-f', $file], outfunc => $parser);
2315 die "failed to setup loop device for $file\n" if !$device;
2316 eval { &$func($device); };
2317 my $err = $@;
2318 PVE::Tools::run_command(['losetup', '-d', $device]);
2319 die $err if $err;
2320 return $device;
2321 }
2322
2323 sub bindmount {
2324 my ($dir, $dest, $ro, @extra_opts) = @_;
2325 PVE::Tools::run_command(['mount', '-o', 'bind', @extra_opts, $dir, $dest]);
2326 if ($ro) {
2327 eval { PVE::Tools::run_command(['mount', '-o', 'bind,remount,ro', $dest]); };
2328 if (my $err = $@) {
2329 warn "bindmount error\n";
2330 # don't leave writable bind-mounts behind...
2331 PVE::Tools::run_command(['umount', $dest]);
2332 die $err;
2333 }
2334 }
2335 }
2336
2337 # use $rootdir = undef to just return the corresponding mount path
2338 sub mountpoint_mount {
2339 my ($mountpoint, $rootdir, $storage_cfg, $snapname) = @_;
2340
2341 my $volid = $mountpoint->{volume};
2342 my $mount = $mountpoint->{mp};
2343 my $type = $mountpoint->{type};
2344 my $quota = !$snapname && !$mountpoint->{ro} && $mountpoint->{quota};
2345 my $mounted_dev;
2346
2347 return if !$volid || !$mount;
2348
2349 my $mount_path;
2350
2351 if (defined($rootdir)) {
2352 $rootdir =~ s!/+$!!;
2353 $mount_path = "$rootdir/$mount";
2354 $mount_path =~ s!/+!/!g;
2355 &$check_mount_path($mount_path);
2356 File::Path::mkpath($mount_path);
2357 }
2358
2359 my ($storage, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2360
2361 die "unknown snapshot path for '$volid'" if !$storage && defined($snapname);
2362
2363 my $optstring = '';
2364 if (defined($mountpoint->{acl})) {
2365 $optstring .= ($mountpoint->{acl} ? 'acl' : 'noacl');
2366 }
2367 my $readonly = $mountpoint->{ro};
2368
2369 my @extra_opts = ('-o', $optstring);
2370
2371 if ($storage) {
2372
2373 my $scfg = PVE::Storage::storage_config($storage_cfg, $storage);
2374 my $path = PVE::Storage::path($storage_cfg, $volid, $snapname);
2375
2376 my ($vtype, undef, undef, undef, undef, $isBase, $format) =
2377 PVE::Storage::parse_volname($storage_cfg, $volid);
2378
2379 $format = 'iso' if $vtype eq 'iso'; # allow to handle iso files
2380
2381 if ($format eq 'subvol') {
2382 if ($mount_path) {
2383 if ($snapname) {
2384 if ($scfg->{type} eq 'zfspool') {
2385 my $path_arg = $path;
2386 $path_arg =~ s!^/+!!;
2387 PVE::Tools::run_command(['mount', '-o', 'ro', @extra_opts, '-t', 'zfs', $path_arg, $mount_path]);
2388 } else {
2389 die "cannot mount subvol snapshots for storage type '$scfg->{type}'\n";
2390 }
2391 } else {
2392 bindmount($path, $mount_path, $readonly, @extra_opts);
2393 warn "cannot enable quota control for bind mounted subvolumes\n" if $quota;
2394 }
2395 }
2396 return wantarray ? ($path, 0, $mounted_dev) : $path;
2397 } elsif ($format eq 'raw' || $format eq 'iso') {
2398 my $domount = sub {
2399 my ($path) = @_;
2400 if ($mount_path) {
2401 if ($format eq 'iso') {
2402 PVE::Tools::run_command(['mount', '-o', 'ro', @extra_opts, $path, $mount_path]);
2403 } elsif ($isBase || defined($snapname)) {
2404 PVE::Tools::run_command(['mount', '-o', 'ro,noload', @extra_opts, $path, $mount_path]);
2405 } else {
2406 if ($quota) {
2407 push @extra_opts, '-o', 'usrjquota=aquota.user,grpjquota=aquota.group,jqfmt=vfsv0';
2408 }
2409 push @extra_opts, '-o', 'ro' if $readonly;
2410 PVE::Tools::run_command(['mount', @extra_opts, $path, $mount_path]);
2411 }
2412 }
2413 };
2414 my $use_loopdev = 0;
2415 if ($scfg->{path}) {
2416 $mounted_dev = run_with_loopdev($domount, $path);
2417 $use_loopdev = 1;
2418 } elsif ($scfg->{type} eq 'drbd' || $scfg->{type} eq 'lvm' ||
2419 $scfg->{type} eq 'rbd' || $scfg->{type} eq 'lvmthin') {
2420 $mounted_dev = $path;
2421 &$domount($path);
2422 } else {
2423 die "unsupported storage type '$scfg->{type}'\n";
2424 }
2425 return wantarray ? ($path, $use_loopdev, $mounted_dev) : $path;
2426 } else {
2427 die "unsupported image format '$format'\n";
2428 }
2429 } elsif ($type eq 'device') {
2430 push @extra_opts, '-o', 'ro' if $readonly;
2431 PVE::Tools::run_command(['mount', @extra_opts, $volid, $mount_path]) if $mount_path;
2432 return wantarray ? ($volid, 0, $volid) : $volid;
2433 } elsif ($type eq 'bind') {
2434 die "directory '$volid' does not exist\n" if ! -d $volid;
2435 &$check_mount_path($volid);
2436 bindmount($volid, $mount_path, $readonly, @extra_opts) if $mount_path;
2437 warn "cannot enable quota control for bind mounts\n" if $quota;
2438 return wantarray ? ($volid, 0, undef) : $volid;
2439 }
2440
2441 die "unsupported storage";
2442 }
2443
2444 sub get_vm_volumes {
2445 my ($conf, $excludes) = @_;
2446
2447 my $vollist = [];
2448
2449 foreach_mountpoint($conf, sub {
2450 my ($ms, $mountpoint) = @_;
2451
2452 return if $excludes && $ms eq $excludes;
2453
2454 my $volid = $mountpoint->{volume};
2455
2456 return if !$volid || $mountpoint->{type} ne 'volume';
2457
2458 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2459 return if !$sid;
2460
2461 push @$vollist, $volid;
2462 });
2463
2464 return $vollist;
2465 }
2466
2467 sub mkfs {
2468 my ($dev, $rootuid, $rootgid) = @_;
2469
2470 PVE::Tools::run_command(['mkfs.ext4', '-O', 'mmp',
2471 '-E', "root_owner=$rootuid:$rootgid",
2472 $dev]);
2473 }
2474
2475 sub format_disk {
2476 my ($storage_cfg, $volid, $rootuid, $rootgid) = @_;
2477
2478 if ($volid =~ m!^/dev/.+!) {
2479 mkfs($volid);
2480 return;
2481 }
2482
2483 my ($storage, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2484
2485 die "cannot format volume '$volid' with no storage\n" if !$storage;
2486
2487 PVE::Storage::activate_volumes($storage_cfg, [$volid]);
2488
2489 my $path = PVE::Storage::path($storage_cfg, $volid);
2490
2491 my ($vtype, undef, undef, undef, undef, $isBase, $format) =
2492 PVE::Storage::parse_volname($storage_cfg, $volid);
2493
2494 die "cannot format volume '$volid' (format == $format)\n"
2495 if $format ne 'raw';
2496
2497 mkfs($path, $rootuid, $rootgid);
2498 }
2499
2500 sub destroy_disks {
2501 my ($storecfg, $vollist) = @_;
2502
2503 foreach my $volid (@$vollist) {
2504 eval { PVE::Storage::vdisk_free($storecfg, $volid); };
2505 warn $@ if $@;
2506 }
2507 }
2508
2509 sub create_disks {
2510 my ($storecfg, $vmid, $settings, $conf) = @_;
2511
2512 my $vollist = [];
2513
2514 eval {
2515 my (undef, $rootuid, $rootgid) = PVE::LXC::parse_id_maps($conf);
2516 my $chown_vollist = [];
2517
2518 foreach_mountpoint($settings, sub {
2519 my ($ms, $mountpoint) = @_;
2520
2521 my $volid = $mountpoint->{volume};
2522 my $mp = $mountpoint->{mp};
2523
2524 my ($storage, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2525
2526 if ($storage && ($volid =~ m/^([^:\s]+):(\d+(\.\d+)?)$/)) {
2527 my ($storeid, $size_gb) = ($1, $2);
2528
2529 my $size_kb = int(${size_gb}*1024) * 1024;
2530
2531 my $scfg = PVE::Storage::storage_config($storecfg, $storage);
2532 # fixme: use better naming ct-$vmid-disk-X.raw?
2533
2534 if ($scfg->{type} eq 'dir' || $scfg->{type} eq 'nfs') {
2535 if ($size_kb > 0) {
2536 $volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'raw',
2537 undef, $size_kb);
2538 format_disk($storecfg, $volid, $rootuid, $rootgid);
2539 } else {
2540 $volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'subvol',
2541 undef, 0);
2542 push @$chown_vollist, $volid;
2543 }
2544 } elsif ($scfg->{type} eq 'zfspool') {
2545
2546 $volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'subvol',
2547 undef, $size_kb);
2548 push @$chown_vollist, $volid;
2549 } elsif ($scfg->{type} eq 'drbd' || $scfg->{type} eq 'lvm' || $scfg->{type} eq 'lvmthin') {
2550
2551 $volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'raw', undef, $size_kb);
2552 format_disk($storecfg, $volid, $rootuid, $rootgid);
2553
2554 } elsif ($scfg->{type} eq 'rbd') {
2555
2556 die "krbd option must be enabled on storage type '$scfg->{type}'\n" if !$scfg->{krbd};
2557 $volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'raw', undef, $size_kb);
2558 format_disk($storecfg, $volid, $rootuid, $rootgid);
2559 } else {
2560 die "unable to create containers on storage type '$scfg->{type}'\n";
2561 }
2562 push @$vollist, $volid;
2563 $mountpoint->{volume} = $volid;
2564 $mountpoint->{size} = $size_kb * 1024;
2565 $conf->{$ms} = print_ct_mountpoint($mountpoint, $ms eq 'rootfs');
2566 } else {
2567 # use specified/existing volid/dir/device
2568 $conf->{$ms} = print_ct_mountpoint($mountpoint, $ms eq 'rootfs');
2569 }
2570 });
2571
2572 PVE::Storage::activate_volumes($storecfg, $chown_vollist, undef);
2573 foreach my $volid (@$chown_vollist) {
2574 my $path = PVE::Storage::path($storecfg, $volid, undef);
2575 chown($rootuid, $rootgid, $path);
2576 }
2577 PVE::Storage::deactivate_volumes($storecfg, $chown_vollist, undef);
2578 };
2579 # free allocated images on error
2580 if (my $err = $@) {
2581 destroy_disks($storecfg, $vollist);
2582 die $err;
2583 }
2584 return $vollist;
2585 }
2586
2587 # bash completion helper
2588
2589 sub complete_os_templates {
2590 my ($cmdname, $pname, $cvalue) = @_;
2591
2592 my $cfg = PVE::Storage::config();
2593
2594 my $storeid;
2595
2596 if ($cvalue =~ m/^([^:]+):/) {
2597 $storeid = $1;
2598 }
2599
2600 my $vtype = $cmdname eq 'restore' ? 'backup' : 'vztmpl';
2601 my $data = PVE::Storage::template_list($cfg, $storeid, $vtype);
2602
2603 my $res = [];
2604 foreach my $id (keys %$data) {
2605 foreach my $item (@{$data->{$id}}) {
2606 push @$res, $item->{volid} if defined($item->{volid});
2607 }
2608 }
2609
2610 return $res;
2611 }
2612
2613 my $complete_ctid_full = sub {
2614 my ($running) = @_;
2615
2616 my $idlist = vmstatus();
2617
2618 my $active_hash = list_active_containers();
2619
2620 my $res = [];
2621
2622 foreach my $id (keys %$idlist) {
2623 my $d = $idlist->{$id};
2624 if (defined($running)) {
2625 next if $d->{template};
2626 next if $running && !$active_hash->{$id};
2627 next if !$running && $active_hash->{$id};
2628 }
2629 push @$res, $id;
2630
2631 }
2632 return $res;
2633 };
2634
2635 sub complete_ctid {
2636 return &$complete_ctid_full();
2637 }
2638
2639 sub complete_ctid_stopped {
2640 return &$complete_ctid_full(0);
2641 }
2642
2643 sub complete_ctid_running {
2644 return &$complete_ctid_full(1);
2645 }
2646
2647 sub parse_id_maps {
2648 my ($conf) = @_;
2649
2650 my $id_map = [];
2651 my $rootuid = 0;
2652 my $rootgid = 0;
2653
2654 my $lxc = $conf->{lxc};
2655 foreach my $entry (@$lxc) {
2656 my ($key, $value) = @$entry;
2657 next if $key ne 'lxc.id_map';
2658 if ($value =~ /^([ug])\s+(\d+)\s+(\d+)\s+(\d+)\s*$/) {
2659 my ($type, $ct, $host, $length) = ($1, $2, $3, $4);
2660 push @$id_map, [$type, $ct, $host, $length];
2661 if ($ct == 0) {
2662 $rootuid = $host if $type eq 'u';
2663 $rootgid = $host if $type eq 'g';
2664 }
2665 } else {
2666 die "failed to parse id_map: $value\n";
2667 }
2668 }
2669
2670 if (!@$id_map && $conf->{unprivileged}) {
2671 # Should we read them from /etc/subuid?
2672 $id_map = [ ['u', '0', '100000', '65536'],
2673 ['g', '0', '100000', '65536'] ];
2674 $rootuid = $rootgid = 100000;
2675 }
2676
2677 return ($id_map, $rootuid, $rootgid);
2678 }
2679
2680 sub userns_command {
2681 my ($id_map) = @_;
2682 if (@$id_map) {
2683 return ['lxc-usernsexec', (map { ('-m', join(':', @$_)) } @$id_map), '--'];
2684 }
2685 return [];
2686 }
2687
2688 1;