]> git.proxmox.com Git - pve-container.git/blob - src/PVE/LXC.pm
Refactor snapshot_rollback
[pve-container.git] / src / PVE / LXC.pm
1 package PVE::LXC;
2
3 use strict;
4 use warnings;
5 use POSIX qw(EINTR);
6
7 use Socket;
8
9 use File::Path;
10 use File::Spec;
11 use Cwd qw();
12 use Fcntl qw(O_RDONLY);
13
14 use PVE::Cluster qw(cfs_register_file cfs_read_file);
15 use PVE::Storage;
16 use PVE::SafeSyslog;
17 use PVE::INotify;
18 use PVE::JSONSchema qw(get_standard_option);
19 use PVE::Tools qw($IPV6RE $IPV4RE dir_glob_foreach lock_file lock_file_full);
20 use PVE::Network;
21 use PVE::AccessControl;
22 use PVE::ProcFSTools;
23 use Time::HiRes qw (gettimeofday);
24
25 use Data::Dumper;
26
27 my $nodename = PVE::INotify::nodename();
28
29 my $cpuinfo= PVE::ProcFSTools::read_cpuinfo();
30
31 our $COMMON_TAR_FLAGS = [ '--sparse', '--numeric-owner', '--acls',
32 '--xattrs',
33 '--xattrs-include=user.*',
34 '--xattrs-include=security.capability',
35 '--warning=no-xattr-write' ];
36
37 cfs_register_file('/lxc/', \&parse_pct_config, \&write_pct_config);
38
39 my $rootfs_desc = {
40 volume => {
41 type => 'string',
42 default_key => 1,
43 format => 'pve-lxc-mp-string',
44 format_description => 'volume',
45 description => 'Volume, device or directory to mount into the container.',
46 },
47 backup => {
48 type => 'boolean',
49 format_description => '[1|0]',
50 description => 'Whether to include the mountpoint in backups.',
51 optional => 1,
52 },
53 size => {
54 type => 'string',
55 format => 'disk-size',
56 format_description => 'DiskSize',
57 description => 'Volume size (read only value).',
58 optional => 1,
59 },
60 acl => {
61 type => 'boolean',
62 format_description => 'acl',
63 description => 'Explicitly enable or disable ACL support.',
64 optional => 1,
65 },
66 ro => {
67 type => 'boolean',
68 format_description => 'ro',
69 description => 'Read-only mountpoint (not supported with bind mounts)',
70 optional => 1,
71 },
72 quota => {
73 type => 'boolean',
74 format_description => '[0|1]',
75 description => 'Enable user quotas inside the container (not supported with zfs subvolumes)',
76 optional => 1,
77 },
78 };
79
80 PVE::JSONSchema::register_standard_option('pve-ct-rootfs', {
81 type => 'string', format => $rootfs_desc,
82 description => "Use volume as container root.",
83 optional => 1,
84 });
85
86 PVE::JSONSchema::register_standard_option('pve-lxc-snapshot-name', {
87 description => "The name of the snapshot.",
88 type => 'string', format => 'pve-configid',
89 maxLength => 40,
90 });
91
92 my $confdesc = {
93 lock => {
94 optional => 1,
95 type => 'string',
96 description => "Lock/unlock the VM.",
97 enum => [qw(migrate backup snapshot rollback)],
98 },
99 onboot => {
100 optional => 1,
101 type => 'boolean',
102 description => "Specifies whether a VM will be started during system bootup.",
103 default => 0,
104 },
105 startup => get_standard_option('pve-startup-order'),
106 template => {
107 optional => 1,
108 type => 'boolean',
109 description => "Enable/disable Template.",
110 default => 0,
111 },
112 arch => {
113 optional => 1,
114 type => 'string',
115 enum => ['amd64', 'i386'],
116 description => "OS architecture type.",
117 default => 'amd64',
118 },
119 ostype => {
120 optional => 1,
121 type => 'string',
122 enum => ['debian', 'ubuntu', 'centos', 'fedora', 'opensuse', 'archlinux', 'alpine'],
123 description => "OS type. Corresponds to lxc setup scripts in /usr/share/lxc/config/<ostype>.common.conf.",
124 },
125 console => {
126 optional => 1,
127 type => 'boolean',
128 description => "Attach a console device (/dev/console) to the container.",
129 default => 1,
130 },
131 tty => {
132 optional => 1,
133 type => 'integer',
134 description => "Specify the number of tty available to the container",
135 minimum => 0,
136 maximum => 6,
137 default => 2,
138 },
139 cpulimit => {
140 optional => 1,
141 type => 'number',
142 description => "Limit of CPU usage. Note if the computer has 2 CPUs, it has a total of '2' CPU time. Value '0' indicates no CPU limit.",
143 minimum => 0,
144 maximum => 128,
145 default => 0,
146 },
147 cpuunits => {
148 optional => 1,
149 type => 'integer',
150 description => "CPU weight for a VM. Argument is used in the kernel fair scheduler. The larger the number is, the more CPU time this VM gets. Number is relative to the weights of all the other running VMs.\n\nNOTE: You can disable fair-scheduler configuration by setting this to 0.",
151 minimum => 0,
152 maximum => 500000,
153 default => 1024,
154 },
155 memory => {
156 optional => 1,
157 type => 'integer',
158 description => "Amount of RAM for the VM in MB.",
159 minimum => 16,
160 default => 512,
161 },
162 swap => {
163 optional => 1,
164 type => 'integer',
165 description => "Amount of SWAP for the VM in MB.",
166 minimum => 0,
167 default => 512,
168 },
169 hostname => {
170 optional => 1,
171 description => "Set a host name for the container.",
172 type => 'string', format => 'dns-name',
173 maxLength => 255,
174 },
175 description => {
176 optional => 1,
177 type => 'string',
178 description => "Container description. Only used on the configuration web interface.",
179 },
180 searchdomain => {
181 optional => 1,
182 type => 'string', format => 'dns-name-list',
183 description => "Sets DNS search domains for a container. Create will automatically use the setting from the host if you neither set searchdomain nor nameserver.",
184 },
185 nameserver => {
186 optional => 1,
187 type => 'string', format => 'address-list',
188 description => "Sets DNS server IP address for a container. Create will automatically use the setting from the host if you neither set searchdomain nor nameserver.",
189 },
190 rootfs => get_standard_option('pve-ct-rootfs'),
191 parent => {
192 optional => 1,
193 type => 'string', format => 'pve-configid',
194 maxLength => 40,
195 description => "Parent snapshot name. This is used internally, and should not be modified.",
196 },
197 snaptime => {
198 optional => 1,
199 description => "Timestamp for snapshots.",
200 type => 'integer',
201 minimum => 0,
202 },
203 cmode => {
204 optional => 1,
205 description => "Console mode. By default, the console command tries to open a connection to one of the available tty devices. By setting cmode to 'console' it tries to attach to /dev/console instead. If you set cmode to 'shell', it simply invokes a shell inside the container (no login).",
206 type => 'string',
207 enum => ['shell', 'console', 'tty'],
208 default => 'tty',
209 },
210 protection => {
211 optional => 1,
212 type => 'boolean',
213 description => "Sets the protection flag of the container. This will prevent the CT or CT's disk remove/update operation.",
214 default => 0,
215 },
216 unprivileged => {
217 optional => 1,
218 type => 'boolean',
219 description => "Makes the container run as unprivileged user. (Should not be modified manually.)",
220 default => 0,
221 },
222 };
223
224 my $valid_lxc_conf_keys = {
225 'lxc.include' => 1,
226 'lxc.arch' => 1,
227 'lxc.utsname' => 1,
228 'lxc.haltsignal' => 1,
229 'lxc.rebootsignal' => 1,
230 'lxc.stopsignal' => 1,
231 'lxc.init_cmd' => 1,
232 'lxc.network.type' => 1,
233 'lxc.network.flags' => 1,
234 'lxc.network.link' => 1,
235 'lxc.network.mtu' => 1,
236 'lxc.network.name' => 1,
237 'lxc.network.hwaddr' => 1,
238 'lxc.network.ipv4' => 1,
239 'lxc.network.ipv4.gateway' => 1,
240 'lxc.network.ipv6' => 1,
241 'lxc.network.ipv6.gateway' => 1,
242 'lxc.network.script.up' => 1,
243 'lxc.network.script.down' => 1,
244 'lxc.pts' => 1,
245 'lxc.console.logfile' => 1,
246 'lxc.console' => 1,
247 'lxc.tty' => 1,
248 'lxc.devttydir' => 1,
249 'lxc.hook.autodev' => 1,
250 'lxc.autodev' => 1,
251 'lxc.kmsg' => 1,
252 'lxc.mount' => 1,
253 'lxc.mount.entry' => 1,
254 'lxc.mount.auto' => 1,
255 'lxc.rootfs' => 'lxc.rootfs is auto generated from rootfs',
256 'lxc.rootfs.mount' => 1,
257 'lxc.rootfs.options' => 'lxc.rootfs.options is not supported' .
258 ', please use mountpoint options in the "rootfs" key',
259 # lxc.cgroup.*
260 'lxc.cap.drop' => 1,
261 'lxc.cap.keep' => 1,
262 'lxc.aa_profile' => 1,
263 'lxc.aa_allow_incomplete' => 1,
264 'lxc.se_context' => 1,
265 'lxc.seccomp' => 1,
266 'lxc.id_map' => 1,
267 'lxc.hook.pre-start' => 1,
268 'lxc.hook.pre-mount' => 1,
269 'lxc.hook.mount' => 1,
270 'lxc.hook.start' => 1,
271 'lxc.hook.stop' => 1,
272 'lxc.hook.post-stop' => 1,
273 'lxc.hook.clone' => 1,
274 'lxc.hook.destroy' => 1,
275 'lxc.loglevel' => 1,
276 'lxc.logfile' => 1,
277 'lxc.start.auto' => 1,
278 'lxc.start.delay' => 1,
279 'lxc.start.order' => 1,
280 'lxc.group' => 1,
281 'lxc.environment' => 1,
282 };
283
284 my $netconf_desc = {
285 type => {
286 type => 'string',
287 optional => 1,
288 description => "Network interface type.",
289 enum => [qw(veth)],
290 },
291 name => {
292 type => 'string',
293 format_description => 'String',
294 description => 'Name of the network device as seen from inside the container. (lxc.network.name)',
295 pattern => '[-_.\w\d]+',
296 },
297 bridge => {
298 type => 'string',
299 format_description => 'vmbr<Number>',
300 description => 'Bridge to attach the network device to.',
301 pattern => '[-_.\w\d]+',
302 optional => 1,
303 },
304 hwaddr => {
305 type => 'string',
306 format_description => 'MAC',
307 description => 'Bridge to attach the network device to. (lxc.network.hwaddr)',
308 pattern => qr/(?:[a-f0-9]{2}:){5}[a-f0-9]{2}/i,
309 optional => 1,
310 },
311 mtu => {
312 type => 'integer',
313 format_description => 'Number',
314 description => 'Maximum transfer unit of the interface. (lxc.network.mtu)',
315 minimum => 64, # minimum ethernet frame is 64 bytes
316 optional => 1,
317 },
318 ip => {
319 type => 'string',
320 format => 'pve-ipv4-config',
321 format_description => 'IPv4Format/CIDR',
322 description => 'IPv4 address in CIDR format.',
323 optional => 1,
324 },
325 gw => {
326 type => 'string',
327 format => 'ipv4',
328 format_description => 'GatewayIPv4',
329 description => 'Default gateway for IPv4 traffic.',
330 optional => 1,
331 },
332 ip6 => {
333 type => 'string',
334 format => 'pve-ipv6-config',
335 format_description => 'IPv6Format/CIDR',
336 description => 'IPv6 address in CIDR format.',
337 optional => 1,
338 },
339 gw6 => {
340 type => 'string',
341 format => 'ipv6',
342 format_description => 'GatewayIPv6',
343 description => 'Default gateway for IPv6 traffic.',
344 optional => 1,
345 },
346 firewall => {
347 type => 'boolean',
348 format_description => '[1|0]',
349 description => "Controls whether this interface's firewall rules should be used.",
350 optional => 1,
351 },
352 tag => {
353 type => 'integer',
354 format_description => 'VlanNo',
355 minimum => '2',
356 maximum => '4094',
357 description => "VLAN tag for this interface.",
358 optional => 1,
359 },
360 trunks => {
361 type => 'string',
362 pattern => qr/\d+(?:;\d+)*/,
363 format_description => 'vlanid[;vlanid...]',
364 description => "VLAN ids to pass through the interface",
365 optional => 1,
366 },
367 };
368 PVE::JSONSchema::register_format('pve-lxc-network', $netconf_desc);
369
370 my $MAX_LXC_NETWORKS = 10;
371 for (my $i = 0; $i < $MAX_LXC_NETWORKS; $i++) {
372 $confdesc->{"net$i"} = {
373 optional => 1,
374 type => 'string', format => $netconf_desc,
375 description => "Specifies network interfaces for the container.",
376 };
377 }
378
379 PVE::JSONSchema::register_format('pve-lxc-mp-string', \&verify_lxc_mp_string);
380 sub verify_lxc_mp_string{
381 my ($mp, $noerr) = @_;
382
383 # do not allow:
384 # /./ or /../
385 # /. or /.. at the end
386 # ../ at the beginning
387
388 if($mp =~ m@/\.\.?/@ ||
389 $mp =~ m@/\.\.?$@ ||
390 $mp =~ m@^\.\./@){
391 return undef if $noerr;
392 die "$mp contains illegal character sequences\n";
393 }
394 return $mp;
395 }
396
397 my $mp_desc = {
398 %$rootfs_desc,
399 mp => {
400 type => 'string',
401 format => 'pve-lxc-mp-string',
402 format_description => 'Path',
403 description => 'Path to the mountpoint as seen from inside the container.',
404 },
405 };
406 PVE::JSONSchema::register_format('pve-ct-mountpoint', $mp_desc);
407
408 my $unuseddesc = {
409 optional => 1,
410 type => 'string', format => 'pve-volume-id',
411 description => "Reference to unused volumes.",
412 };
413
414 my $MAX_MOUNT_POINTS = 10;
415 for (my $i = 0; $i < $MAX_MOUNT_POINTS; $i++) {
416 $confdesc->{"mp$i"} = {
417 optional => 1,
418 type => 'string', format => $mp_desc,
419 description => "Use volume as container mount point (experimental feature).",
420 optional => 1,
421 };
422 }
423
424 my $MAX_UNUSED_DISKS = $MAX_MOUNT_POINTS;
425 for (my $i = 0; $i < $MAX_MOUNT_POINTS; $i++) {
426 $confdesc->{"unused$i"} = $unuseddesc;
427 }
428
429 sub write_pct_config {
430 my ($filename, $conf) = @_;
431
432 delete $conf->{snapstate}; # just to be sure
433
434 my $generate_raw_config = sub {
435 my ($conf) = @_;
436
437 my $raw = '';
438
439 # add description as comment to top of file
440 my $descr = $conf->{description} || '';
441 foreach my $cl (split(/\n/, $descr)) {
442 $raw .= '#' . PVE::Tools::encode_text($cl) . "\n";
443 }
444
445 foreach my $key (sort keys %$conf) {
446 next if $key eq 'digest' || $key eq 'description' || $key eq 'pending' ||
447 $key eq 'snapshots' || $key eq 'snapname' || $key eq 'lxc';
448 my $value = $conf->{$key};
449 die "detected invalid newline inside property '$key'\n" if $value =~ m/\n/;
450 $raw .= "$key: $value\n";
451 }
452
453 if (my $lxcconf = $conf->{lxc}) {
454 foreach my $entry (@$lxcconf) {
455 my ($k, $v) = @$entry;
456 $raw .= "$k: $v\n";
457 }
458 }
459
460 return $raw;
461 };
462
463 my $raw = &$generate_raw_config($conf);
464
465 foreach my $snapname (sort keys %{$conf->{snapshots}}) {
466 $raw .= "\n[$snapname]\n";
467 $raw .= &$generate_raw_config($conf->{snapshots}->{$snapname});
468 }
469
470 return $raw;
471 }
472
473 sub check_type {
474 my ($key, $value) = @_;
475
476 die "unknown setting '$key'\n" if !$confdesc->{$key};
477
478 my $type = $confdesc->{$key}->{type};
479
480 if (!defined($value)) {
481 die "got undefined value\n";
482 }
483
484 if ($value =~ m/[\n\r]/) {
485 die "property contains a line feed\n";
486 }
487
488 if ($type eq 'boolean') {
489 return 1 if ($value eq '1') || ($value =~ m/^(on|yes|true)$/i);
490 return 0 if ($value eq '0') || ($value =~ m/^(off|no|false)$/i);
491 die "type check ('boolean') failed - got '$value'\n";
492 } elsif ($type eq 'integer') {
493 return int($1) if $value =~ m/^(\d+)$/;
494 die "type check ('integer') failed - got '$value'\n";
495 } elsif ($type eq 'number') {
496 return $value if $value =~ m/^(\d+)(\.\d+)?$/;
497 die "type check ('number') failed - got '$value'\n";
498 } elsif ($type eq 'string') {
499 if (my $fmt = $confdesc->{$key}->{format}) {
500 PVE::JSONSchema::check_format($fmt, $value);
501 return $value;
502 }
503 return $value;
504 } else {
505 die "internal error"
506 }
507 }
508
509 sub parse_pct_config {
510 my ($filename, $raw) = @_;
511
512 return undef if !defined($raw);
513
514 my $res = {
515 digest => Digest::SHA::sha1_hex($raw),
516 snapshots => {},
517 };
518
519 $filename =~ m|/lxc/(\d+).conf$|
520 || die "got strange filename '$filename'";
521
522 my $vmid = $1;
523
524 my $conf = $res;
525 my $descr = '';
526 my $section = '';
527
528 my @lines = split(/\n/, $raw);
529 foreach my $line (@lines) {
530 next if $line =~ m/^\s*$/;
531
532 if ($line =~ m/^\[([a-z][a-z0-9_\-]+)\]\s*$/i) {
533 $section = $1;
534 $conf->{description} = $descr if $descr;
535 $descr = '';
536 $conf = $res->{snapshots}->{$section} = {};
537 next;
538 }
539
540 if ($line =~ m/^\#(.*)\s*$/) {
541 $descr .= PVE::Tools::decode_text($1) . "\n";
542 next;
543 }
544
545 if ($line =~ m/^(lxc\.[a-z0-9_\-\.]+)(:|\s*=)\s*(.*?)\s*$/) {
546 my $key = $1;
547 my $value = $3;
548 my $validity = $valid_lxc_conf_keys->{$key} || 0;
549 if ($validity eq 1 || $key =~ m/^lxc\.cgroup\./) {
550 push @{$conf->{lxc}}, [$key, $value];
551 } elsif (my $errmsg = $validity) {
552 warn "vm $vmid - $key: $errmsg\n";
553 } else {
554 warn "vm $vmid - unable to parse config: $line\n";
555 }
556 } elsif ($line =~ m/^(description):\s*(.*\S)\s*$/) {
557 $descr .= PVE::Tools::decode_text($2);
558 } elsif ($line =~ m/snapstate:\s*(prepare|delete)\s*$/) {
559 $conf->{snapstate} = $1;
560 } elsif ($line =~ m/^([a-z][a-z_]*\d*):\s*(\S.*)\s*$/) {
561 my $key = $1;
562 my $value = $2;
563 eval { $value = check_type($key, $value); };
564 warn "vm $vmid - unable to parse value of '$key' - $@" if $@;
565 $conf->{$key} = $value;
566 } else {
567 warn "vm $vmid - unable to parse config: $line\n";
568 }
569 }
570
571 $conf->{description} = $descr if $descr;
572
573 delete $res->{snapstate}; # just to be sure
574
575 return $res;
576 }
577
578 sub config_list {
579 my $vmlist = PVE::Cluster::get_vmlist();
580 my $res = {};
581 return $res if !$vmlist || !$vmlist->{ids};
582 my $ids = $vmlist->{ids};
583
584 foreach my $vmid (keys %$ids) {
585 next if !$vmid; # skip CT0
586 my $d = $ids->{$vmid};
587 next if !$d->{node} || $d->{node} ne $nodename;
588 next if !$d->{type} || $d->{type} ne 'lxc';
589 $res->{$vmid}->{type} = 'lxc';
590 }
591 return $res;
592 }
593
594 sub cfs_config_path {
595 my ($vmid, $node) = @_;
596
597 $node = $nodename if !$node;
598 return "nodes/$node/lxc/$vmid.conf";
599 }
600
601 sub config_file {
602 my ($vmid, $node) = @_;
603
604 my $cfspath = cfs_config_path($vmid, $node);
605 return "/etc/pve/$cfspath";
606 }
607
608 sub load_config {
609 my ($vmid, $node) = @_;
610
611 $node = $nodename if !$node;
612 my $cfspath = cfs_config_path($vmid, $node);
613
614 my $conf = PVE::Cluster::cfs_read_file($cfspath);
615 die "container $vmid does not exist\n" if !defined($conf);
616
617 return $conf;
618 }
619
620 sub create_config {
621 my ($vmid, $conf) = @_;
622
623 my $dir = "/etc/pve/nodes/$nodename/lxc";
624 mkdir $dir;
625
626 write_config($vmid, $conf);
627 }
628
629 sub destroy_config {
630 my ($vmid) = @_;
631
632 unlink config_file($vmid, $nodename);
633 }
634
635 sub write_config {
636 my ($vmid, $conf) = @_;
637
638 my $cfspath = cfs_config_path($vmid);
639
640 PVE::Cluster::cfs_write_file($cfspath, $conf);
641 }
642
643 # flock: we use one file handle per process, so lock file
644 # can be called multiple times and will succeed for the same process.
645
646 my $lock_handles = {};
647 my $lockdir = "/run/lock/lxc";
648
649 sub config_file_lock {
650 my ($vmid) = @_;
651
652 return "$lockdir/pve-config-${vmid}.lock";
653 }
654
655 sub lock_config_full {
656 my ($vmid, $timeout, $code, @param) = @_;
657
658 my $filename = config_file_lock($vmid);
659
660 mkdir $lockdir if !-d $lockdir;
661
662 my $res = lock_file($filename, $timeout, $code, @param);
663
664 die $@ if $@;
665
666 return $res;
667 }
668
669 sub lock_config_mode {
670 my ($vmid, $timeout, $shared, $code, @param) = @_;
671
672 my $filename = config_file_lock($vmid);
673
674 mkdir $lockdir if !-d $lockdir;
675
676 my $res = lock_file_full($filename, $timeout, $shared, $code, @param);
677
678 die $@ if $@;
679
680 return $res;
681 }
682
683 sub lock_config {
684 my ($vmid, $code, @param) = @_;
685
686 return lock_config_full($vmid, 10, $code, @param);
687 }
688
689 sub option_exists {
690 my ($name) = @_;
691
692 return defined($confdesc->{$name});
693 }
694
695 # add JSON properties for create and set function
696 sub json_config_properties {
697 my $prop = shift;
698
699 foreach my $opt (keys %$confdesc) {
700 next if $opt eq 'parent' || $opt eq 'snaptime';
701 next if $prop->{$opt};
702 $prop->{$opt} = $confdesc->{$opt};
703 }
704
705 return $prop;
706 }
707
708 # container status helpers
709
710 sub list_active_containers {
711
712 my $filename = "/proc/net/unix";
713
714 # similar test is used by lcxcontainers.c: list_active_containers
715 my $res = {};
716
717 my $fh = IO::File->new ($filename, "r");
718 return $res if !$fh;
719
720 while (defined(my $line = <$fh>)) {
721 if ($line =~ m/^[a-f0-9]+:\s\S+\s\S+\s\S+\s\S+\s\S+\s\d+\s(\S+)$/) {
722 my $path = $1;
723 if ($path =~ m!^@/var/lib/lxc/(\d+)/command$!) {
724 $res->{$1} = 1;
725 }
726 }
727 }
728
729 close($fh);
730
731 return $res;
732 }
733
734 # warning: this is slow
735 sub check_running {
736 my ($vmid) = @_;
737
738 my $active_hash = list_active_containers();
739
740 return 1 if defined($active_hash->{$vmid});
741
742 return undef;
743 }
744
745 sub get_container_disk_usage {
746 my ($vmid, $pid) = @_;
747
748 return PVE::Tools::df("/proc/$pid/root/", 1);
749 }
750
751 my $last_proc_vmid_stat;
752
753 my $parse_cpuacct_stat = sub {
754 my ($vmid) = @_;
755
756 my $raw = read_cgroup_value('cpuacct', $vmid, 'cpuacct.stat', 1);
757
758 my $stat = {};
759
760 if ($raw =~ m/^user (\d+)\nsystem (\d+)\n/) {
761
762 $stat->{utime} = $1;
763 $stat->{stime} = $2;
764
765 }
766
767 return $stat;
768 };
769
770 sub vmstatus {
771 my ($opt_vmid) = @_;
772
773 my $list = $opt_vmid ? { $opt_vmid => { type => 'lxc' }} : config_list();
774
775 my $active_hash = list_active_containers();
776
777 my $cpucount = $cpuinfo->{cpus} || 1;
778
779 my $cdtime = gettimeofday;
780
781 my $uptime = (PVE::ProcFSTools::read_proc_uptime(1))[0];
782
783 foreach my $vmid (keys %$list) {
784 my $d = $list->{$vmid};
785
786 eval { $d->{pid} = find_lxc_pid($vmid) if defined($active_hash->{$vmid}); };
787 warn $@ if $@; # ignore errors (consider them stopped)
788
789 $d->{status} = $d->{pid} ? 'running' : 'stopped';
790
791 my $cfspath = cfs_config_path($vmid);
792 my $conf = PVE::Cluster::cfs_read_file($cfspath) || {};
793
794 $d->{name} = $conf->{'hostname'} || "CT$vmid";
795 $d->{name} =~ s/[\s]//g;
796
797 $d->{cpus} = $conf->{cpulimit} || $cpucount;
798
799 if ($d->{pid}) {
800 my $res = get_container_disk_usage($vmid, $d->{pid});
801 $d->{disk} = $res->{used};
802 $d->{maxdisk} = $res->{total};
803 } else {
804 $d->{disk} = 0;
805 # use 4GB by default ??
806 if (my $rootfs = $conf->{rootfs}) {
807 my $rootinfo = parse_ct_rootfs($rootfs);
808 $d->{maxdisk} = int(($rootinfo->{size} || 4)*1024*1024)*1024;
809 } else {
810 $d->{maxdisk} = 4*1024*1024*1024;
811 }
812 }
813
814 $d->{mem} = 0;
815 $d->{swap} = 0;
816 $d->{maxmem} = ($conf->{memory}||512)*1024*1024;
817 $d->{maxswap} = ($conf->{swap}//0)*1024*1024;
818
819 $d->{uptime} = 0;
820 $d->{cpu} = 0;
821
822 $d->{netout} = 0;
823 $d->{netin} = 0;
824
825 $d->{diskread} = 0;
826 $d->{diskwrite} = 0;
827
828 $d->{template} = is_template($conf);
829 }
830
831 foreach my $vmid (keys %$list) {
832 my $d = $list->{$vmid};
833 my $pid = $d->{pid};
834
835 next if !$pid; # skip stopped CTs
836
837 my $ctime = (stat("/proc/$pid"))[10]; # 10 = ctime
838 $d->{uptime} = time - $ctime; # the method lxcfs uses
839
840 $d->{mem} = read_cgroup_value('memory', $vmid, 'memory.usage_in_bytes');
841 $d->{swap} = read_cgroup_value('memory', $vmid, 'memory.memsw.usage_in_bytes') - $d->{mem};
842
843 my $blkio_bytes = read_cgroup_value('blkio', $vmid, 'blkio.throttle.io_service_bytes', 1);
844 my @bytes = split(/\n/, $blkio_bytes);
845 foreach my $byte (@bytes) {
846 if (my ($key, $value) = $byte =~ /(Read|Write)\s+(\d+)/) {
847 $d->{diskread} = $2 if $key eq 'Read';
848 $d->{diskwrite} = $2 if $key eq 'Write';
849 }
850 }
851
852 my $pstat = &$parse_cpuacct_stat($vmid);
853
854 my $used = $pstat->{utime} + $pstat->{stime};
855
856 my $old = $last_proc_vmid_stat->{$vmid};
857 if (!$old) {
858 $last_proc_vmid_stat->{$vmid} = {
859 time => $cdtime,
860 used => $used,
861 cpu => 0,
862 };
863 next;
864 }
865
866 my $dtime = ($cdtime - $old->{time}) * $cpucount * $cpuinfo->{user_hz};
867
868 if ($dtime > 1000) {
869 my $dutime = $used - $old->{used};
870
871 $d->{cpu} = (($dutime/$dtime)* $cpucount) / $d->{cpus};
872 $last_proc_vmid_stat->{$vmid} = {
873 time => $cdtime,
874 used => $used,
875 cpu => $d->{cpu},
876 };
877 } else {
878 $d->{cpu} = $old->{cpu};
879 }
880 }
881
882 my $netdev = PVE::ProcFSTools::read_proc_net_dev();
883
884 foreach my $dev (keys %$netdev) {
885 next if $dev !~ m/^veth([1-9]\d*)i/;
886 my $vmid = $1;
887 my $d = $list->{$vmid};
888
889 next if !$d;
890
891 $d->{netout} += $netdev->{$dev}->{receive};
892 $d->{netin} += $netdev->{$dev}->{transmit};
893
894 }
895
896 return $list;
897 }
898
899 sub classify_mountpoint {
900 my ($vol) = @_;
901 if ($vol =~ m!^/!) {
902 return 'device' if $vol =~ m!^/dev/!;
903 return 'bind';
904 }
905 return 'volume';
906 }
907
908 my $parse_ct_mountpoint_full = sub {
909 my ($desc, $data, $noerr) = @_;
910
911 $data //= '';
912
913 my $res;
914 eval { $res = PVE::JSONSchema::parse_property_string($desc, $data) };
915 if ($@) {
916 return undef if $noerr;
917 die $@;
918 }
919
920 if (defined(my $size = $res->{size})) {
921 $size = PVE::JSONSchema::parse_size($size);
922 if (!defined($size)) {
923 return undef if $noerr;
924 die "invalid size: $size\n";
925 }
926 $res->{size} = $size;
927 }
928
929 $res->{type} = classify_mountpoint($res->{volume});
930
931 return $res;
932 };
933
934 sub parse_ct_rootfs {
935 my ($data, $noerr) = @_;
936
937 my $res = &$parse_ct_mountpoint_full($rootfs_desc, $data, $noerr);
938
939 $res->{mp} = '/' if defined($res);
940
941 return $res;
942 }
943
944 sub parse_ct_mountpoint {
945 my ($data, $noerr) = @_;
946
947 return &$parse_ct_mountpoint_full($mp_desc, $data, $noerr);
948 }
949
950 sub print_ct_mountpoint {
951 my ($info, $nomp) = @_;
952 my $skip = [ 'type' ];
953 push @$skip, 'mp' if $nomp;
954 return PVE::JSONSchema::print_property_string($info, $mp_desc, $skip);
955 }
956
957 sub print_lxc_network {
958 my $net = shift;
959 return PVE::JSONSchema::print_property_string($net, $netconf_desc);
960 }
961
962 sub parse_lxc_network {
963 my ($data) = @_;
964
965 my $res = {};
966
967 return $res if !$data;
968
969 $res = PVE::JSONSchema::parse_property_string($netconf_desc, $data);
970
971 $res->{type} = 'veth';
972 $res->{hwaddr} = PVE::Tools::random_ether_addr() if !$res->{hwaddr};
973
974 return $res;
975 }
976
977 sub read_cgroup_value {
978 my ($group, $vmid, $name, $full) = @_;
979
980 my $path = "/sys/fs/cgroup/$group/lxc/$vmid/$name";
981
982 return PVE::Tools::file_get_contents($path) if $full;
983
984 return PVE::Tools::file_read_firstline($path);
985 }
986
987 sub write_cgroup_value {
988 my ($group, $vmid, $name, $value) = @_;
989
990 my $path = "/sys/fs/cgroup/$group/lxc/$vmid/$name";
991 PVE::ProcFSTools::write_proc_entry($path, $value) if -e $path;
992
993 }
994
995 sub find_lxc_console_pids {
996
997 my $res = {};
998
999 PVE::Tools::dir_glob_foreach('/proc', '\d+', sub {
1000 my ($pid) = @_;
1001
1002 my $cmdline = PVE::Tools::file_read_firstline("/proc/$pid/cmdline");
1003 return if !$cmdline;
1004
1005 my @args = split(/\0/, $cmdline);
1006
1007 # search for lxc-console -n <vmid>
1008 return if scalar(@args) != 3;
1009 return if $args[1] ne '-n';
1010 return if $args[2] !~ m/^\d+$/;
1011 return if $args[0] !~ m|^(/usr/bin/)?lxc-console$|;
1012
1013 my $vmid = $args[2];
1014
1015 push @{$res->{$vmid}}, $pid;
1016 });
1017
1018 return $res;
1019 }
1020
1021 sub find_lxc_pid {
1022 my ($vmid) = @_;
1023
1024 my $pid = undef;
1025 my $parser = sub {
1026 my $line = shift;
1027 $pid = $1 if $line =~ m/^PID:\s+(\d+)$/;
1028 };
1029 PVE::Tools::run_command(['lxc-info', '-n', $vmid, '-p'], outfunc => $parser);
1030
1031 die "unable to get PID for CT $vmid (not running?)\n" if !$pid;
1032
1033 return $pid;
1034 }
1035
1036 # Note: we cannot use Net:IP, because that only allows strict
1037 # CIDR networks
1038 sub parse_ipv4_cidr {
1039 my ($cidr, $noerr) = @_;
1040
1041 if ($cidr =~ m!^($IPV4RE)(?:/(\d+))$! && ($2 > 7) && ($2 <= 32)) {
1042 return { address => $1, netmask => $PVE::Network::ipv4_reverse_mask->[$2] };
1043 }
1044
1045 return undef if $noerr;
1046
1047 die "unable to parse ipv4 address/mask\n";
1048 }
1049
1050 sub check_lock {
1051 my ($conf) = @_;
1052
1053 die "VM is locked ($conf->{'lock'})\n" if $conf->{'lock'};
1054 }
1055
1056 sub check_protection {
1057 my ($vm_conf, $err_msg) = @_;
1058
1059 if ($vm_conf->{protection}) {
1060 die "$err_msg - protection mode enabled\n";
1061 }
1062 }
1063
1064 sub update_lxc_config {
1065 my ($storage_cfg, $vmid, $conf) = @_;
1066
1067 my $dir = "/var/lib/lxc/$vmid";
1068
1069 if ($conf->{template}) {
1070
1071 unlink "$dir/config";
1072
1073 return;
1074 }
1075
1076 my $raw = '';
1077
1078 die "missing 'arch' - internal error" if !$conf->{arch};
1079 $raw .= "lxc.arch = $conf->{arch}\n";
1080
1081 my $unprivileged = $conf->{unprivileged};
1082 my $custom_idmap = grep { $_->[0] eq 'lxc.id_map' } @{$conf->{lxc}};
1083
1084 my $ostype = $conf->{ostype} || die "missing 'ostype' - internal error";
1085 if ($ostype =~ /^(?:debian | ubuntu | centos | fedora | opensuse | archlinux | alpine)$/x) {
1086 my $inc ="/usr/share/lxc/config/$ostype.common.conf";
1087 $inc ="/usr/share/lxc/config/common.conf" if !-f $inc;
1088 $raw .= "lxc.include = $inc\n";
1089 if ($unprivileged || $custom_idmap) {
1090 $inc = "/usr/share/lxc/config/$ostype.userns.conf";
1091 $inc = "/usr/share/lxc/config/userns.conf" if !-f $inc;
1092 $raw .= "lxc.include = $inc\n"
1093 }
1094 } else {
1095 die "implement me (ostype $ostype)";
1096 }
1097
1098 # WARNING: DO NOT REMOVE this without making sure that loop device nodes
1099 # cannot be exposed to the container with r/w access (cgroup perms).
1100 # When this is enabled mounts will still remain in the monitor's namespace
1101 # after the container unmounted them and thus will not detach from their
1102 # files while the container is running!
1103 $raw .= "lxc.monitor.unshare = 1\n";
1104
1105 # Should we read them from /etc/subuid?
1106 if ($unprivileged && !$custom_idmap) {
1107 $raw .= "lxc.id_map = u 0 100000 65536\n";
1108 $raw .= "lxc.id_map = g 0 100000 65536\n";
1109 }
1110
1111 if (!has_dev_console($conf)) {
1112 $raw .= "lxc.console = none\n";
1113 $raw .= "lxc.cgroup.devices.deny = c 5:1 rwm\n";
1114 }
1115
1116 my $ttycount = get_tty_count($conf);
1117 $raw .= "lxc.tty = $ttycount\n";
1118
1119 # some init scripts expect a linux terminal (turnkey).
1120 $raw .= "lxc.environment = TERM=linux\n";
1121
1122 my $utsname = $conf->{hostname} || "CT$vmid";
1123 $raw .= "lxc.utsname = $utsname\n";
1124
1125 my $memory = $conf->{memory} || 512;
1126 my $swap = $conf->{swap} // 0;
1127
1128 my $lxcmem = int($memory*1024*1024);
1129 $raw .= "lxc.cgroup.memory.limit_in_bytes = $lxcmem\n";
1130
1131 my $lxcswap = int(($memory + $swap)*1024*1024);
1132 $raw .= "lxc.cgroup.memory.memsw.limit_in_bytes = $lxcswap\n";
1133
1134 if (my $cpulimit = $conf->{cpulimit}) {
1135 $raw .= "lxc.cgroup.cpu.cfs_period_us = 100000\n";
1136 my $value = int(100000*$cpulimit);
1137 $raw .= "lxc.cgroup.cpu.cfs_quota_us = $value\n";
1138 }
1139
1140 my $shares = $conf->{cpuunits} || 1024;
1141 $raw .= "lxc.cgroup.cpu.shares = $shares\n";
1142
1143 my $mountpoint = parse_ct_rootfs($conf->{rootfs});
1144
1145 $raw .= "lxc.rootfs = $dir/rootfs\n";
1146
1147 my $netcount = 0;
1148 foreach my $k (keys %$conf) {
1149 next if $k !~ m/^net(\d+)$/;
1150 my $ind = $1;
1151 my $d = parse_lxc_network($conf->{$k});
1152 $netcount++;
1153 $raw .= "lxc.network.type = veth\n";
1154 $raw .= "lxc.network.veth.pair = veth${vmid}i${ind}\n";
1155 $raw .= "lxc.network.hwaddr = $d->{hwaddr}\n" if defined($d->{hwaddr});
1156 $raw .= "lxc.network.name = $d->{name}\n" if defined($d->{name});
1157 $raw .= "lxc.network.mtu = $d->{mtu}\n" if defined($d->{mtu});
1158 }
1159
1160 if (my $lxcconf = $conf->{lxc}) {
1161 foreach my $entry (@$lxcconf) {
1162 my ($k, $v) = @$entry;
1163 $netcount++ if $k eq 'lxc.network.type';
1164 $raw .= "$k = $v\n";
1165 }
1166 }
1167
1168 $raw .= "lxc.network.type = empty\n" if !$netcount;
1169
1170 File::Path::mkpath("$dir/rootfs");
1171
1172 PVE::Tools::file_set_contents("$dir/config", $raw);
1173 }
1174
1175 # verify and cleanup nameserver list (replace \0 with ' ')
1176 sub verify_nameserver_list {
1177 my ($nameserver_list) = @_;
1178
1179 my @list = ();
1180 foreach my $server (PVE::Tools::split_list($nameserver_list)) {
1181 PVE::JSONSchema::pve_verify_ip($server);
1182 push @list, $server;
1183 }
1184
1185 return join(' ', @list);
1186 }
1187
1188 sub verify_searchdomain_list {
1189 my ($searchdomain_list) = @_;
1190
1191 my @list = ();
1192 foreach my $server (PVE::Tools::split_list($searchdomain_list)) {
1193 # todo: should we add checks for valid dns domains?
1194 push @list, $server;
1195 }
1196
1197 return join(' ', @list);
1198 }
1199
1200 sub is_volume_in_use {
1201 my ($config, $volid) = @_;
1202 my $used = 0;
1203
1204 foreach_mountpoint($config, sub {
1205 my ($ms, $mountpoint) = @_;
1206 return if $used;
1207 if ($mountpoint->{type} eq 'volume' && $mountpoint->{volume} eq $volid) {
1208 $used = 1;
1209 }
1210 });
1211
1212 return $used;
1213 }
1214
1215 sub add_unused_volume {
1216 my ($config, $volid) = @_;
1217
1218 my $key;
1219 for (my $ind = $MAX_UNUSED_DISKS - 1; $ind >= 0; $ind--) {
1220 my $test = "unused$ind";
1221 if (my $vid = $config->{$test}) {
1222 return if $vid eq $volid; # do not add duplicates
1223 } else {
1224 $key = $test;
1225 }
1226 }
1227
1228 die "Too many unused volumes - please delete them first.\n" if !$key;
1229
1230 $config->{$key} = $volid;
1231
1232 return $key;
1233 }
1234
1235 sub update_pct_config {
1236 my ($vmid, $conf, $running, $param, $delete) = @_;
1237
1238 my @nohotplug;
1239
1240 my $new_disks = 0;
1241 my @deleted_volumes;
1242
1243 my $rootdir;
1244 if ($running) {
1245 my $pid = find_lxc_pid($vmid);
1246 $rootdir = "/proc/$pid/root";
1247 }
1248
1249 my $hotplug_error = sub {
1250 if ($running) {
1251 push @nohotplug, @_;
1252 return 1;
1253 } else {
1254 return 0;
1255 }
1256 };
1257
1258 if (defined($delete)) {
1259 foreach my $opt (@$delete) {
1260 if (!exists($conf->{$opt})) {
1261 warn "no such option: $opt\n";
1262 next;
1263 }
1264
1265 if ($opt eq 'hostname' || $opt eq 'memory' || $opt eq 'rootfs') {
1266 die "unable to delete required option '$opt'\n";
1267 } elsif ($opt eq 'swap') {
1268 delete $conf->{$opt};
1269 write_cgroup_value("memory", $vmid, "memory.memsw.limit_in_bytes", -1);
1270 } elsif ($opt eq 'description' || $opt eq 'onboot' || $opt eq 'startup') {
1271 delete $conf->{$opt};
1272 } elsif ($opt eq 'nameserver' || $opt eq 'searchdomain' ||
1273 $opt eq 'tty' || $opt eq 'console' || $opt eq 'cmode') {
1274 next if $hotplug_error->($opt);
1275 delete $conf->{$opt};
1276 } elsif ($opt =~ m/^net(\d)$/) {
1277 delete $conf->{$opt};
1278 next if !$running;
1279 my $netid = $1;
1280 PVE::Network::veth_delete("veth${vmid}i$netid");
1281 } elsif ($opt eq 'protection') {
1282 delete $conf->{$opt};
1283 } elsif ($opt =~ m/^unused(\d+)$/) {
1284 next if $hotplug_error->($opt);
1285 check_protection($conf, "can't remove CT $vmid drive '$opt'");
1286 push @deleted_volumes, $conf->{$opt};
1287 delete $conf->{$opt};
1288 } elsif ($opt =~ m/^mp(\d+)$/) {
1289 next if $hotplug_error->($opt);
1290 check_protection($conf, "can't remove CT $vmid drive '$opt'");
1291 my $mp = parse_ct_mountpoint($conf->{$opt});
1292 delete $conf->{$opt};
1293 if ($mp->{type} eq 'volume' && !is_volume_in_use($conf, $mp->{volume})) {
1294 add_unused_volume($conf, $mp->{volume});
1295 }
1296 } elsif ($opt eq 'unprivileged') {
1297 die "unable to delete read-only option: '$opt'\n";
1298 } else {
1299 die "implement me (delete: $opt)"
1300 }
1301 write_config($vmid, $conf) if $running;
1302 }
1303 }
1304
1305 # There's no separate swap size to configure, there's memory and "total"
1306 # memory (iow. memory+swap). This means we have to change them together.
1307 my $wanted_memory = PVE::Tools::extract_param($param, 'memory');
1308 my $wanted_swap = PVE::Tools::extract_param($param, 'swap');
1309 if (defined($wanted_memory) || defined($wanted_swap)) {
1310
1311 my $old_memory = ($conf->{memory} || 512);
1312 my $old_swap = ($conf->{swap} || 0);
1313
1314 $wanted_memory //= $old_memory;
1315 $wanted_swap //= $old_swap;
1316
1317 my $total = $wanted_memory + $wanted_swap;
1318 if ($running) {
1319 my $old_total = $old_memory + $old_swap;
1320 if ($total > $old_total) {
1321 write_cgroup_value("memory", $vmid, "memory.memsw.limit_in_bytes", int($total*1024*1024));
1322 write_cgroup_value("memory", $vmid, "memory.limit_in_bytes", int($wanted_memory*1024*1024));
1323 } else {
1324 write_cgroup_value("memory", $vmid, "memory.limit_in_bytes", int($wanted_memory*1024*1024));
1325 write_cgroup_value("memory", $vmid, "memory.memsw.limit_in_bytes", int($total*1024*1024));
1326 }
1327 }
1328 $conf->{memory} = $wanted_memory;
1329 $conf->{swap} = $wanted_swap;
1330
1331 write_config($vmid, $conf) if $running;
1332 }
1333
1334 foreach my $opt (keys %$param) {
1335 my $value = $param->{$opt};
1336 if ($opt eq 'hostname') {
1337 $conf->{$opt} = $value;
1338 } elsif ($opt eq 'onboot') {
1339 $conf->{$opt} = $value ? 1 : 0;
1340 } elsif ($opt eq 'startup') {
1341 $conf->{$opt} = $value;
1342 } elsif ($opt eq 'tty' || $opt eq 'console' || $opt eq 'cmode') {
1343 next if $hotplug_error->($opt);
1344 $conf->{$opt} = $value;
1345 } elsif ($opt eq 'nameserver') {
1346 next if $hotplug_error->($opt);
1347 my $list = verify_nameserver_list($value);
1348 $conf->{$opt} = $list;
1349 } elsif ($opt eq 'searchdomain') {
1350 next if $hotplug_error->($opt);
1351 my $list = verify_searchdomain_list($value);
1352 $conf->{$opt} = $list;
1353 } elsif ($opt eq 'cpulimit') {
1354 next if $hotplug_error->($opt); # FIXME: hotplug
1355 $conf->{$opt} = $value;
1356 } elsif ($opt eq 'cpuunits') {
1357 $conf->{$opt} = $value;
1358 write_cgroup_value("cpu", $vmid, "cpu.shares", $value);
1359 } elsif ($opt eq 'description') {
1360 $conf->{$opt} = PVE::Tools::encode_text($value);
1361 } elsif ($opt =~ m/^net(\d+)$/) {
1362 my $netid = $1;
1363 my $net = parse_lxc_network($value);
1364 if (!$running) {
1365 $conf->{$opt} = print_lxc_network($net);
1366 } else {
1367 update_net($vmid, $conf, $opt, $net, $netid, $rootdir);
1368 }
1369 } elsif ($opt eq 'protection') {
1370 $conf->{$opt} = $value ? 1 : 0;
1371 } elsif ($opt =~ m/^mp(\d+)$/) {
1372 next if $hotplug_error->($opt);
1373 check_protection($conf, "can't update CT $vmid drive '$opt'");
1374 my $old = $conf->{$opt};
1375 $conf->{$opt} = $value;
1376 if (defined($old)) {
1377 my $mp = parse_ct_mountpoint($old);
1378 if ($mp->{type} eq 'volume' && !is_volume_in_use($conf, $mp->{volume})) {
1379 add_unused_volume($conf, $mp->{volume});
1380 }
1381 }
1382 $new_disks = 1;
1383 } elsif ($opt eq 'rootfs') {
1384 next if $hotplug_error->($opt);
1385 check_protection($conf, "can't update CT $vmid drive '$opt'");
1386 my $old = $conf->{$opt};
1387 $conf->{$opt} = $value;
1388 if (defined($old)) {
1389 my $mp = parse_ct_rootfs($old);
1390 if ($mp->{type} eq 'volume' && !is_volume_in_use($conf, $mp->{volume})) {
1391 add_unused_volume($conf, $mp->{volume});
1392 }
1393 }
1394 } elsif ($opt eq 'unprivileged') {
1395 die "unable to modify read-only option: '$opt'\n";
1396 } else {
1397 die "implement me: $opt";
1398 }
1399 write_config($vmid, $conf) if $running;
1400 }
1401
1402 if (@deleted_volumes) {
1403 my $storage_cfg = PVE::Storage::config();
1404 foreach my $volume (@deleted_volumes) {
1405 delete_mountpoint_volume($storage_cfg, $vmid, $volume);
1406 }
1407 }
1408
1409 if ($new_disks) {
1410 my $storage_cfg = PVE::Storage::config();
1411 create_disks($storage_cfg, $vmid, $conf, $conf);
1412 }
1413
1414 # This should be the last thing we do here
1415 if ($running && scalar(@nohotplug)) {
1416 die "unable to modify " . join(',', @nohotplug) . " while container is running\n";
1417 }
1418 }
1419
1420 sub has_dev_console {
1421 my ($conf) = @_;
1422
1423 return !(defined($conf->{console}) && !$conf->{console});
1424 }
1425
1426 sub get_tty_count {
1427 my ($conf) = @_;
1428
1429 return $conf->{tty} // $confdesc->{tty}->{default};
1430 }
1431
1432 sub get_cmode {
1433 my ($conf) = @_;
1434
1435 return $conf->{cmode} // $confdesc->{cmode}->{default};
1436 }
1437
1438 sub get_console_command {
1439 my ($vmid, $conf) = @_;
1440
1441 my $cmode = get_cmode($conf);
1442
1443 if ($cmode eq 'console') {
1444 return ['lxc-console', '-n', $vmid, '-t', 0];
1445 } elsif ($cmode eq 'tty') {
1446 return ['lxc-console', '-n', $vmid];
1447 } elsif ($cmode eq 'shell') {
1448 return ['lxc-attach', '--clear-env', '-n', $vmid];
1449 } else {
1450 die "internal error";
1451 }
1452 }
1453
1454 sub get_primary_ips {
1455 my ($conf) = @_;
1456
1457 # return data from net0
1458
1459 return undef if !defined($conf->{net0});
1460 my $net = parse_lxc_network($conf->{net0});
1461
1462 my $ipv4 = $net->{ip};
1463 if ($ipv4) {
1464 if ($ipv4 =~ /^(dhcp|manual)$/) {
1465 $ipv4 = undef
1466 } else {
1467 $ipv4 =~ s!/\d+$!!;
1468 }
1469 }
1470 my $ipv6 = $net->{ip6};
1471 if ($ipv6) {
1472 if ($ipv6 =~ /^(auto|dhcp|manual)$/) {
1473 $ipv6 = undef;
1474 } else {
1475 $ipv6 =~ s!/\d+$!!;
1476 }
1477 }
1478
1479 return ($ipv4, $ipv6);
1480 }
1481
1482 sub delete_mountpoint_volume {
1483 my ($storage_cfg, $vmid, $volume) = @_;
1484
1485 return if classify_mountpoint($volume) ne 'volume';
1486
1487 my ($vtype, $name, $owner) = PVE::Storage::parse_volname($storage_cfg, $volume);
1488 PVE::Storage::vdisk_free($storage_cfg, $volume) if $vmid == $owner;
1489 }
1490
1491 sub destroy_lxc_container {
1492 my ($storage_cfg, $vmid, $conf) = @_;
1493
1494 foreach_mountpoint($conf, sub {
1495 my ($ms, $mountpoint) = @_;
1496 delete_mountpoint_volume($storage_cfg, $vmid, $mountpoint->{volume});
1497 });
1498
1499 rmdir "/var/lib/lxc/$vmid/rootfs";
1500 unlink "/var/lib/lxc/$vmid/config";
1501 rmdir "/var/lib/lxc/$vmid";
1502 destroy_config($vmid);
1503
1504 #my $cmd = ['lxc-destroy', '-n', $vmid ];
1505 #PVE::Tools::run_command($cmd);
1506 }
1507
1508 sub vm_stop_cleanup {
1509 my ($storage_cfg, $vmid, $conf, $keepActive) = @_;
1510
1511 eval {
1512 if (!$keepActive) {
1513
1514 my $vollist = get_vm_volumes($conf);
1515 PVE::Storage::deactivate_volumes($storage_cfg, $vollist);
1516 }
1517 };
1518 warn $@ if $@; # avoid errors - just warn
1519 }
1520
1521 my $safe_num_ne = sub {
1522 my ($a, $b) = @_;
1523
1524 return 0 if !defined($a) && !defined($b);
1525 return 1 if !defined($a);
1526 return 1 if !defined($b);
1527
1528 return $a != $b;
1529 };
1530
1531 my $safe_string_ne = sub {
1532 my ($a, $b) = @_;
1533
1534 return 0 if !defined($a) && !defined($b);
1535 return 1 if !defined($a);
1536 return 1 if !defined($b);
1537
1538 return $a ne $b;
1539 };
1540
1541 sub update_net {
1542 my ($vmid, $conf, $opt, $newnet, $netid, $rootdir) = @_;
1543
1544 if ($newnet->{type} ne 'veth') {
1545 # for when there are physical interfaces
1546 die "cannot update interface of type $newnet->{type}";
1547 }
1548
1549 my $veth = "veth${vmid}i${netid}";
1550 my $eth = $newnet->{name};
1551
1552 if (my $oldnetcfg = $conf->{$opt}) {
1553 my $oldnet = parse_lxc_network($oldnetcfg);
1554
1555 if (&$safe_string_ne($oldnet->{hwaddr}, $newnet->{hwaddr}) ||
1556 &$safe_string_ne($oldnet->{name}, $newnet->{name})) {
1557
1558 PVE::Network::veth_delete($veth);
1559 delete $conf->{$opt};
1560 write_config($vmid, $conf);
1561
1562 hotplug_net($vmid, $conf, $opt, $newnet, $netid);
1563
1564 } elsif (&$safe_string_ne($oldnet->{bridge}, $newnet->{bridge}) ||
1565 &$safe_num_ne($oldnet->{tag}, $newnet->{tag}) ||
1566 &$safe_num_ne($oldnet->{firewall}, $newnet->{firewall})) {
1567
1568 if ($oldnet->{bridge}) {
1569 PVE::Network::tap_unplug($veth);
1570 foreach (qw(bridge tag firewall)) {
1571 delete $oldnet->{$_};
1572 }
1573 $conf->{$opt} = print_lxc_network($oldnet);
1574 write_config($vmid, $conf);
1575 }
1576
1577 PVE::Network::tap_plug($veth, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks});
1578 foreach (qw(bridge tag firewall)) {
1579 $oldnet->{$_} = $newnet->{$_} if $newnet->{$_};
1580 }
1581 $conf->{$opt} = print_lxc_network($oldnet);
1582 write_config($vmid, $conf);
1583 }
1584 } else {
1585 hotplug_net($vmid, $conf, $opt, $newnet, $netid);
1586 }
1587
1588 update_ipconfig($vmid, $conf, $opt, $eth, $newnet, $rootdir);
1589 }
1590
1591 sub hotplug_net {
1592 my ($vmid, $conf, $opt, $newnet, $netid) = @_;
1593
1594 my $veth = "veth${vmid}i${netid}";
1595 my $vethpeer = $veth . "p";
1596 my $eth = $newnet->{name};
1597
1598 PVE::Network::veth_create($veth, $vethpeer, $newnet->{bridge}, $newnet->{hwaddr});
1599 PVE::Network::tap_plug($veth, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks});
1600
1601 # attach peer in container
1602 my $cmd = ['lxc-device', '-n', $vmid, 'add', $vethpeer, "$eth" ];
1603 PVE::Tools::run_command($cmd);
1604
1605 # link up peer in container
1606 $cmd = ['lxc-attach', '-n', $vmid, '-s', 'NETWORK', '--', '/sbin/ip', 'link', 'set', $eth ,'up' ];
1607 PVE::Tools::run_command($cmd);
1608
1609 my $done = { type => 'veth' };
1610 foreach (qw(bridge tag firewall hwaddr name)) {
1611 $done->{$_} = $newnet->{$_} if $newnet->{$_};
1612 }
1613 $conf->{$opt} = print_lxc_network($done);
1614
1615 write_config($vmid, $conf);
1616 }
1617
1618 sub update_ipconfig {
1619 my ($vmid, $conf, $opt, $eth, $newnet, $rootdir) = @_;
1620
1621 my $lxc_setup = PVE::LXC::Setup->new($conf, $rootdir);
1622
1623 my $optdata = parse_lxc_network($conf->{$opt});
1624 my $deleted = [];
1625 my $added = [];
1626 my $nscmd = sub {
1627 my $cmdargs = shift;
1628 PVE::Tools::run_command(['lxc-attach', '-n', $vmid, '-s', 'NETWORK', '--', @_], %$cmdargs);
1629 };
1630 my $ipcmd = sub { &$nscmd({}, '/sbin/ip', @_) };
1631
1632 my $change_ip_config = sub {
1633 my ($ipversion) = @_;
1634
1635 my $family_opt = "-$ipversion";
1636 my $suffix = $ipversion == 4 ? '' : $ipversion;
1637 my $gw= "gw$suffix";
1638 my $ip= "ip$suffix";
1639
1640 my $newip = $newnet->{$ip};
1641 my $newgw = $newnet->{$gw};
1642 my $oldip = $optdata->{$ip};
1643
1644 my $change_ip = &$safe_string_ne($oldip, $newip);
1645 my $change_gw = &$safe_string_ne($optdata->{$gw}, $newgw);
1646
1647 return if !$change_ip && !$change_gw;
1648
1649 # step 1: add new IP, if this fails we cancel
1650 my $is_real_ip = ($newip && $newip !~ /^(?:auto|dhcp|manual)$/);
1651 if ($change_ip && $is_real_ip) {
1652 eval { &$ipcmd($family_opt, 'addr', 'add', $newip, 'dev', $eth); };
1653 if (my $err = $@) {
1654 warn $err;
1655 return;
1656 }
1657 }
1658
1659 # step 2: replace gateway
1660 # If this fails we delete the added IP and cancel.
1661 # If it succeeds we save the config and delete the old IP, ignoring
1662 # errors. The config is then saved.
1663 # Note: 'ip route replace' can add
1664 if ($change_gw) {
1665 if ($newgw) {
1666 eval {
1667 if ($is_real_ip && !PVE::Network::is_ip_in_cidr($newgw, $newip, $ipversion)) {
1668 &$ipcmd($family_opt, 'route', 'add', $newgw, 'dev', $eth);
1669 }
1670 &$ipcmd($family_opt, 'route', 'replace', 'default', 'via', $newgw);
1671 };
1672 if (my $err = $@) {
1673 warn $err;
1674 # the route was not replaced, the old IP is still available
1675 # rollback (delete new IP) and cancel
1676 if ($change_ip) {
1677 eval { &$ipcmd($family_opt, 'addr', 'del', $newip, 'dev', $eth); };
1678 warn $@ if $@; # no need to die here
1679 }
1680 return;
1681 }
1682 } else {
1683 eval { &$ipcmd($family_opt, 'route', 'del', 'default'); };
1684 # if the route was not deleted, the guest might have deleted it manually
1685 # warn and continue
1686 warn $@ if $@;
1687 }
1688 }
1689
1690 # from this point on we save the configuration
1691 # step 3: delete old IP ignoring errors
1692 if ($change_ip && $oldip && $oldip !~ /^(?:auto|dhcp)$/) {
1693 # We need to enable promote_secondaries, otherwise our newly added
1694 # address will be removed along with the old one.
1695 my $promote = 0;
1696 eval {
1697 if ($ipversion == 4) {
1698 &$nscmd({ outfunc => sub { $promote = int(shift) } },
1699 'cat', "/proc/sys/net/ipv4/conf/$eth/promote_secondaries");
1700 &$nscmd({}, 'sysctl', "net.ipv4.conf.$eth.promote_secondaries=1");
1701 }
1702 &$ipcmd($family_opt, 'addr', 'del', $oldip, 'dev', $eth);
1703 };
1704 warn $@ if $@; # no need to die here
1705
1706 if ($ipversion == 4) {
1707 &$nscmd({}, 'sysctl', "net.ipv4.conf.$eth.promote_secondaries=$promote");
1708 }
1709 }
1710
1711 foreach my $property ($ip, $gw) {
1712 if ($newnet->{$property}) {
1713 $optdata->{$property} = $newnet->{$property};
1714 } else {
1715 delete $optdata->{$property};
1716 }
1717 }
1718 $conf->{$opt} = print_lxc_network($optdata);
1719 write_config($vmid, $conf);
1720 $lxc_setup->setup_network($conf);
1721 };
1722
1723 &$change_ip_config(4);
1724 &$change_ip_config(6);
1725
1726 }
1727
1728 # Internal snapshots
1729
1730 # NOTE: Snapshot create/delete involves several non-atomic
1731 # actions, and can take a long time.
1732 # So we try to avoid locking the file and use the 'lock' variable
1733 # inside the config file instead.
1734
1735 my $snapshot_copy_config = sub {
1736 my ($source, $dest) = @_;
1737
1738 foreach my $k (keys %$source) {
1739 next if $k eq 'snapshots';
1740 next if $k eq 'snapstate';
1741 next if $k eq 'snaptime';
1742 next if $k eq 'vmstate';
1743 next if $k eq 'lock';
1744 next if $k eq 'digest';
1745 next if $k eq 'description';
1746 next if $k =~ m/^unused\d+$/;
1747
1748 $dest->{$k} = $source->{$k};
1749 }
1750 };
1751
1752 my $snapshot_apply_config = sub {
1753 my ($conf, $snap) = @_;
1754
1755 # copy snapshot list
1756 my $newconf = {
1757 snapshots => $conf->{snapshots},
1758 };
1759
1760 # keep description and list of unused disks
1761 foreach my $k (keys %$conf) {
1762 next if !($k =~ m/^unused\d+$/ || $k eq 'description');
1763 $newconf->{$k} = $conf->{$k};
1764 }
1765
1766 &$snapshot_copy_config($snap, $newconf);
1767
1768 return $newconf;
1769 };
1770
1771 my $snapshot_save_vmstate = sub {
1772 die "implement me - snapshot_save_vmstate\n";
1773 };
1774
1775 sub snapshot_prepare {
1776 my ($vmid, $snapname, $save_vmstate, $comment) = @_;
1777
1778 my $snap;
1779
1780 my $updatefn = sub {
1781
1782 my $conf = load_config($vmid);
1783
1784 die "you can't take a snapshot if it's a template\n"
1785 if is_template($conf);
1786
1787 check_lock($conf);
1788
1789 $conf->{lock} = 'snapshot';
1790
1791 die "snapshot name '$snapname' already used\n"
1792 if defined($conf->{snapshots}->{$snapname});
1793
1794 my $storecfg = PVE::Storage::config();
1795
1796 # workaround until mp snapshots are implemented
1797 my $feature = $snapname eq 'vzdump' ? 'vzdump' : 'snapshot';
1798 die "snapshot feature is not available\n" if !has_feature($feature, $conf, $storecfg);
1799
1800 $snap = $conf->{snapshots}->{$snapname} = {};
1801
1802 if ($save_vmstate && check_running($vmid)) {
1803 &$snapshot_save_vmstate($vmid, $conf, $snapname, $storecfg);
1804 }
1805
1806 &$snapshot_copy_config($conf, $snap);
1807
1808 $snap->{snapstate} = "prepare";
1809 $snap->{snaptime} = time();
1810 $snap->{description} = $comment if $comment;
1811
1812 write_config($vmid, $conf);
1813 };
1814
1815 lock_config($vmid, $updatefn);
1816
1817 return $snap;
1818 }
1819
1820 sub snapshot_commit {
1821 my ($vmid, $snapname) = @_;
1822
1823 my $updatefn = sub {
1824
1825 my $conf = load_config($vmid);
1826
1827 die "missing snapshot lock\n"
1828 if !($conf->{lock} && $conf->{lock} eq 'snapshot');
1829
1830 my $snap = $conf->{snapshots}->{$snapname};
1831 die "snapshot '$snapname' does not exist\n" if !defined($snap);
1832
1833 die "wrong snapshot state\n"
1834 if !($snap->{snapstate} && $snap->{snapstate} eq "prepare");
1835
1836 delete $snap->{snapstate};
1837 delete $conf->{lock};
1838
1839 my $newconf = &$snapshot_apply_config($conf, $snap);
1840
1841 $newconf->{parent} = $snapname;
1842
1843 write_config($vmid, $newconf);
1844 };
1845
1846 lock_config($vmid, $updatefn);
1847 }
1848
1849 sub has_feature {
1850 my ($feature, $conf, $storecfg, $snapname) = @_;
1851
1852 my $err;
1853 my $vzdump = $feature eq 'vzdump';
1854 $feature = 'snapshot' if $vzdump;
1855
1856 foreach_mountpoint($conf, sub {
1857 my ($ms, $mountpoint) = @_;
1858
1859 return if $err; # skip further test
1860 return if $vzdump && $ms ne 'rootfs' && !$mountpoint->{backup};
1861
1862 $err = 1 if !PVE::Storage::volume_has_feature($storecfg, $feature, $mountpoint->{volume}, $snapname);
1863
1864 # TODO: implement support for mountpoints
1865 die "unable to handle mountpoint '$ms' - feature not implemented\n"
1866 if $ms ne 'rootfs';
1867 });
1868
1869 return $err ? 0 : 1;
1870 }
1871
1872 my $enter_namespace = sub {
1873 my ($vmid, $pid, $which, $type) = @_;
1874 sysopen my $fd, "/proc/$pid/ns/$which", O_RDONLY
1875 or die "failed to open $which namespace of container $vmid: $!\n";
1876 PVE::Tools::setns(fileno($fd), $type)
1877 or die "failed to enter $which namespace of container $vmid: $!\n";
1878 close $fd;
1879 };
1880
1881 my $do_syncfs = sub {
1882 my ($vmid, $pid, $socket) = @_;
1883
1884 &$enter_namespace($vmid, $pid, 'mnt', PVE::Tools::CLONE_NEWNS);
1885
1886 # Tell the parent process to start reading our /proc/mounts
1887 print {$socket} "go\n";
1888 $socket->flush();
1889
1890 # Receive /proc/self/mounts
1891 my $mountdata = do { local $/ = undef; <$socket> };
1892 close $socket;
1893
1894 # Now sync all mountpoints...
1895 my $mounts = PVE::ProcFSTools::parse_mounts($mountdata);
1896 foreach my $mp (@$mounts) {
1897 my ($what, $dir, $fs) = @$mp;
1898 next if $fs eq 'fuse.lxcfs';
1899 eval { PVE::Tools::sync_mountpoint($dir); };
1900 warn $@ if $@;
1901 }
1902 };
1903
1904 sub sync_container_namespace {
1905 my ($vmid) = @_;
1906 my $pid = find_lxc_pid($vmid);
1907
1908 # SOCK_DGRAM is nicer for barriers but cannot be slurped
1909 socketpair my $pfd, my $cfd, AF_UNIX, SOCK_STREAM, PF_UNSPEC
1910 or die "failed to create socketpair: $!\n";
1911
1912 my $child = fork();
1913 die "fork failed: $!\n" if !defined($child);
1914
1915 if (!$child) {
1916 eval {
1917 close $pfd;
1918 &$do_syncfs($vmid, $pid, $cfd);
1919 };
1920 if (my $err = $@) {
1921 warn $err;
1922 POSIX::_exit(1);
1923 }
1924 POSIX::_exit(0);
1925 }
1926 close $cfd;
1927 my $go = <$pfd>;
1928 die "failed to enter container namespace\n" if $go ne "go\n";
1929
1930 open my $mounts, '<', "/proc/$child/mounts"
1931 or die "failed to open container's /proc/mounts: $!\n";
1932 my $mountdata = do { local $/ = undef; <$mounts> };
1933 close $mounts;
1934 print {$pfd} $mountdata;
1935 close $pfd;
1936
1937 while (waitpid($child, 0) != $child) {}
1938 die "failed to sync container namespace\n" if $? != 0;
1939 }
1940
1941 sub snapshot_create {
1942 my ($vmid, $snapname, $save_vmstate, $comment) = @_;
1943
1944 my $snap = snapshot_prepare($vmid, $snapname, $save_vmstate, $comment);
1945
1946 my $conf = load_config($vmid);
1947
1948 my $running = check_running($vmid);
1949
1950 my $unfreeze = 0;
1951
1952 my $drivehash = {};
1953
1954 eval {
1955 if ($running) {
1956 $unfreeze = 1;
1957 PVE::Tools::run_command(['/usr/bin/lxc-freeze', '-n', $vmid]);
1958 sync_container_namespace($vmid);
1959 };
1960
1961 my $storecfg = PVE::Storage::config();
1962 my $rootinfo = parse_ct_rootfs($conf->{rootfs});
1963 my $volid = $rootinfo->{volume};
1964
1965 PVE::Storage::volume_snapshot($storecfg, $volid, $snapname);
1966 $drivehash->{rootfs} = 1;
1967 };
1968 my $err = $@;
1969
1970 if ($unfreeze) {
1971 eval { PVE::Tools::run_command(['/usr/bin/lxc-unfreeze', '-n', $vmid]); };
1972 warn $@ if $@;
1973 }
1974
1975 if ($err) {
1976 eval { snapshot_delete($vmid, $snapname, 1, $drivehash); };
1977 warn "$@\n" if $@;
1978 die "$err\n";
1979 }
1980
1981 snapshot_commit($vmid, $snapname);
1982 }
1983
1984 # Note: $drivehash is only set when called from snapshot_create.
1985 sub snapshot_delete {
1986 my ($vmid, $snapname, $force, $drivehash) = @_;
1987
1988 my $prepare = 1;
1989
1990 my $snap;
1991
1992 my $unlink_parent = sub {
1993 my ($confref, $new_parent) = @_;
1994
1995 if ($confref->{parent} && $confref->{parent} eq $snapname) {
1996 if ($new_parent) {
1997 $confref->{parent} = $new_parent;
1998 } else {
1999 delete $confref->{parent};
2000 }
2001 }
2002 };
2003
2004 my $updatefn = sub {
2005 my ($remove_drive) = @_;
2006
2007 my $conf = load_config($vmid);
2008
2009 if (!$drivehash) {
2010 check_lock($conf);
2011 die "you can't delete a snapshot if vm is a template\n"
2012 if is_template($conf);
2013 }
2014
2015 $snap = $conf->{snapshots}->{$snapname};
2016
2017 die "snapshot '$snapname' does not exist\n" if !defined($snap);
2018
2019 # remove parent refs
2020 if (!$prepare) {
2021 &$unlink_parent($conf, $snap->{parent});
2022 foreach my $sn (keys %{$conf->{snapshots}}) {
2023 next if $sn eq $snapname;
2024 &$unlink_parent($conf->{snapshots}->{$sn}, $snap->{parent});
2025 }
2026 }
2027
2028 if ($remove_drive) {
2029 if ($remove_drive eq 'vmstate') {
2030 die "implement me - saving vmstate\n";
2031 } else {
2032 die "implement me - remove drive\n";
2033 }
2034 }
2035
2036 if ($prepare) {
2037 $snap->{snapstate} = 'delete';
2038 } else {
2039 delete $conf->{snapshots}->{$snapname};
2040 delete $conf->{lock} if $drivehash;
2041 }
2042
2043 write_config($vmid, $conf);
2044 };
2045
2046 lock_config($vmid, $updatefn);
2047
2048 # now remove vmstate file
2049 # never set for LXC!
2050 my $storecfg = PVE::Storage::config();
2051
2052 if ($snap->{vmstate}) {
2053 die "implement me - saving vmstate\n";
2054 };
2055
2056 # now remove all volume snapshots
2057 # only rootfs for now!
2058 eval {
2059 my $rootfs = $snap->{rootfs};
2060 my $rootinfo = parse_ct_rootfs($rootfs);
2061 my $volid = $rootinfo->{volume};
2062 PVE::Storage::volume_snapshot_delete($storecfg, $volid, $snapname);
2063 };
2064 if (my $err = $@) {
2065 die $err if !$force;
2066 warn $err;
2067 }
2068
2069 # now cleanup config
2070 $prepare = 0;
2071 lock_config($vmid, $updatefn);
2072 }
2073
2074 sub snapshot_rollback {
2075 my ($vmid, $snapname) = @_;
2076
2077 my $prepare = 1;
2078
2079 my $storecfg = PVE::Storage::config();
2080
2081 my $conf = load_config($vmid);
2082
2083 my $get_snapshot_config = sub {
2084
2085 die "you can't rollback if vm is a template\n" if is_template($conf);
2086
2087 my $res = $conf->{snapshots}->{$snapname};
2088
2089 die "snapshot '$snapname' does not exist\n" if !defined($res);
2090
2091 return $res;
2092 };
2093
2094 my $snap = &$get_snapshot_config();
2095
2096 # only for rootfs for now!
2097 my $rootfs = $snap->{rootfs};
2098 my $rootinfo = parse_ct_rootfs($rootfs);
2099 my $volid = $rootinfo->{volume};
2100
2101 PVE::Storage::volume_rollback_is_possible($storecfg, $volid, $snapname);
2102
2103 my $updatefn = sub {
2104
2105 $conf = load_config($vmid);
2106
2107 $snap = &$get_snapshot_config();
2108
2109 die "unable to rollback to incomplete snapshot (snapstate = $snap->{snapstate})\n"
2110 if $snap->{snapstate};
2111
2112 if ($prepare) {
2113 check_lock($conf);
2114 system("lxc-stop -n $vmid --kill") if check_running($vmid);
2115 }
2116
2117 die "unable to rollback vm $vmid: vm is running\n"
2118 if check_running($vmid);
2119
2120 if ($prepare) {
2121 $conf->{lock} = 'rollback';
2122 } else {
2123 die "got wrong lock\n" if !($conf->{lock} && $conf->{lock} eq 'rollback');
2124 delete $conf->{lock};
2125 }
2126
2127 my $forcemachine;
2128
2129 if (!$prepare) {
2130 # copy snapshot config to current config
2131 $conf = &$snapshot_apply_config($conf, $snap);
2132 $conf->{parent} = $snapname;
2133 }
2134
2135 write_config($vmid, $conf);
2136
2137 if (!$prepare && $snap->{vmstate}) {
2138 die "implement me - save vmstate";
2139 }
2140 };
2141
2142 lock_config($vmid, $updatefn);
2143
2144 # only rootfs for now!
2145 PVE::Storage::volume_snapshot_rollback($storecfg, $volid, $snapname);
2146
2147 $prepare = 0;
2148 lock_config($vmid, $updatefn);
2149 }
2150
2151 sub template_create {
2152 my ($vmid, $conf) = @_;
2153
2154 my $storecfg = PVE::Storage::config();
2155
2156 my $rootinfo = parse_ct_rootfs($conf->{rootfs});
2157 my $volid = $rootinfo->{volume};
2158
2159 die "Template feature is not available for '$volid'\n"
2160 if !PVE::Storage::volume_has_feature($storecfg, 'template', $volid);
2161
2162 PVE::Storage::activate_volumes($storecfg, [$volid]);
2163
2164 my $template_volid = PVE::Storage::vdisk_create_base($storecfg, $volid);
2165 $rootinfo->{volume} = $template_volid;
2166 $conf->{rootfs} = print_ct_mountpoint($rootinfo, 1);
2167
2168 write_config($vmid, $conf);
2169 }
2170
2171 sub is_template {
2172 my ($conf) = @_;
2173
2174 return 1 if defined $conf->{template} && $conf->{template} == 1;
2175 }
2176
2177 sub mountpoint_names {
2178 my ($reverse) = @_;
2179
2180 my @names = ('rootfs');
2181
2182 for (my $i = 0; $i < $MAX_MOUNT_POINTS; $i++) {
2183 push @names, "mp$i";
2184 }
2185
2186 return $reverse ? reverse @names : @names;
2187 }
2188
2189
2190 sub foreach_mountpoint_full {
2191 my ($conf, $reverse, $func) = @_;
2192
2193 foreach my $key (mountpoint_names($reverse)) {
2194 my $value = $conf->{$key};
2195 next if !defined($value);
2196 my $mountpoint = $key eq 'rootfs' ? parse_ct_rootfs($value, 1) : parse_ct_mountpoint($value, 1);
2197 next if !defined($mountpoint);
2198
2199 &$func($key, $mountpoint);
2200 }
2201 }
2202
2203 sub foreach_mountpoint {
2204 my ($conf, $func) = @_;
2205
2206 foreach_mountpoint_full($conf, 0, $func);
2207 }
2208
2209 sub foreach_mountpoint_reverse {
2210 my ($conf, $func) = @_;
2211
2212 foreach_mountpoint_full($conf, 1, $func);
2213 }
2214
2215 sub check_ct_modify_config_perm {
2216 my ($rpcenv, $authuser, $vmid, $pool, $key_list) = @_;
2217
2218 return 1 if $authuser ne 'root@pam';
2219
2220 foreach my $opt (@$key_list) {
2221
2222 if ($opt eq 'cpus' || $opt eq 'cpuunits' || $opt eq 'cpulimit') {
2223 $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.CPU']);
2224 } elsif ($opt eq 'rootfs' || $opt =~ /^mp\d+$/) {
2225 $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Disk']);
2226 } elsif ($opt eq 'memory' || $opt eq 'swap') {
2227 $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Memory']);
2228 } elsif ($opt =~ m/^net\d+$/ || $opt eq 'nameserver' ||
2229 $opt eq 'searchdomain' || $opt eq 'hostname') {
2230 $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Network']);
2231 } else {
2232 $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Options']);
2233 }
2234 }
2235
2236 return 1;
2237 }
2238
2239 sub umount_all {
2240 my ($vmid, $storage_cfg, $conf, $noerr) = @_;
2241
2242 my $rootdir = "/var/lib/lxc/$vmid/rootfs";
2243 my $volid_list = get_vm_volumes($conf);
2244
2245 foreach_mountpoint_reverse($conf, sub {
2246 my ($ms, $mountpoint) = @_;
2247
2248 my $volid = $mountpoint->{volume};
2249 my $mount = $mountpoint->{mp};
2250
2251 return if !$volid || !$mount;
2252
2253 my $mount_path = "$rootdir/$mount";
2254 $mount_path =~ s!/+!/!g;
2255
2256 return if !PVE::ProcFSTools::is_mounted($mount_path);
2257
2258 eval {
2259 PVE::Tools::run_command(['umount', '-d', $mount_path]);
2260 };
2261 if (my $err = $@) {
2262 if ($noerr) {
2263 warn $err;
2264 } else {
2265 die $err;
2266 }
2267 }
2268 });
2269 }
2270
2271 sub mount_all {
2272 my ($vmid, $storage_cfg, $conf) = @_;
2273
2274 my $rootdir = "/var/lib/lxc/$vmid/rootfs";
2275 File::Path::make_path($rootdir);
2276
2277 my $volid_list = get_vm_volumes($conf);
2278 PVE::Storage::activate_volumes($storage_cfg, $volid_list);
2279
2280 eval {
2281 foreach_mountpoint($conf, sub {
2282 my ($ms, $mountpoint) = @_;
2283
2284 mountpoint_mount($mountpoint, $rootdir, $storage_cfg);
2285 });
2286 };
2287 if (my $err = $@) {
2288 warn "mounting container failed\n";
2289 umount_all($vmid, $storage_cfg, $conf, 1);
2290 die $err;
2291 }
2292
2293 return $rootdir;
2294 }
2295
2296
2297 sub mountpoint_mount_path {
2298 my ($mountpoint, $storage_cfg, $snapname) = @_;
2299
2300 return mountpoint_mount($mountpoint, undef, $storage_cfg, $snapname);
2301 }
2302
2303 my $check_mount_path = sub {
2304 my ($path) = @_;
2305 $path = File::Spec->canonpath($path);
2306 my $real = Cwd::realpath($path);
2307 if ($real ne $path) {
2308 die "mount path modified by symlink: $path != $real";
2309 }
2310 };
2311
2312 sub query_loopdev {
2313 my ($path) = @_;
2314 my $found;
2315 my $parser = sub {
2316 my $line = shift;
2317 if ($line =~ m@^(/dev/loop\d+):@) {
2318 $found = $1;
2319 }
2320 };
2321 my $cmd = ['losetup', '--associated', $path];
2322 PVE::Tools::run_command($cmd, outfunc => $parser);
2323 return $found;
2324 }
2325
2326 # Run a function with a file attached to a loop device.
2327 # The loop device is always detached afterwards (or set to autoclear).
2328 # Returns the loop device.
2329 sub run_with_loopdev {
2330 my ($func, $file) = @_;
2331 my $device;
2332 my $parser = sub {
2333 my $line = shift;
2334 if ($line =~ m@^(/dev/loop\d+)$@) {
2335 $device = $1;
2336 }
2337 };
2338 PVE::Tools::run_command(['losetup', '--show', '-f', $file], outfunc => $parser);
2339 die "failed to setup loop device for $file\n" if !$device;
2340 eval { &$func($device); };
2341 my $err = $@;
2342 PVE::Tools::run_command(['losetup', '-d', $device]);
2343 die $err if $err;
2344 return $device;
2345 }
2346
2347 sub bindmount {
2348 my ($dir, $dest, $ro, @extra_opts) = @_;
2349 PVE::Tools::run_command(['mount', '-o', 'bind', @extra_opts, $dir, $dest]);
2350 if ($ro) {
2351 eval { PVE::Tools::run_command(['mount', '-o', 'bind,remount,ro', $dest]); };
2352 if (my $err = $@) {
2353 warn "bindmount error\n";
2354 # don't leave writable bind-mounts behind...
2355 PVE::Tools::run_command(['umount', $dest]);
2356 die $err;
2357 }
2358 }
2359 }
2360
2361 # use $rootdir = undef to just return the corresponding mount path
2362 sub mountpoint_mount {
2363 my ($mountpoint, $rootdir, $storage_cfg, $snapname) = @_;
2364
2365 my $volid = $mountpoint->{volume};
2366 my $mount = $mountpoint->{mp};
2367 my $type = $mountpoint->{type};
2368 my $quota = !$snapname && !$mountpoint->{ro} && $mountpoint->{quota};
2369 my $mounted_dev;
2370
2371 return if !$volid || !$mount;
2372
2373 my $mount_path;
2374
2375 if (defined($rootdir)) {
2376 $rootdir =~ s!/+$!!;
2377 $mount_path = "$rootdir/$mount";
2378 $mount_path =~ s!/+!/!g;
2379 &$check_mount_path($mount_path);
2380 File::Path::mkpath($mount_path);
2381 }
2382
2383 my ($storage, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2384
2385 die "unknown snapshot path for '$volid'" if !$storage && defined($snapname);
2386
2387 my $optstring = '';
2388 if (defined($mountpoint->{acl})) {
2389 $optstring .= ($mountpoint->{acl} ? 'acl' : 'noacl');
2390 }
2391 my $readonly = $mountpoint->{ro};
2392
2393 my @extra_opts = ('-o', $optstring);
2394
2395 if ($storage) {
2396
2397 my $scfg = PVE::Storage::storage_config($storage_cfg, $storage);
2398 my $path = PVE::Storage::path($storage_cfg, $volid, $snapname);
2399
2400 my ($vtype, undef, undef, undef, undef, $isBase, $format) =
2401 PVE::Storage::parse_volname($storage_cfg, $volid);
2402
2403 $format = 'iso' if $vtype eq 'iso'; # allow to handle iso files
2404
2405 if ($format eq 'subvol') {
2406 if ($mount_path) {
2407 if ($snapname) {
2408 if ($scfg->{type} eq 'zfspool') {
2409 my $path_arg = $path;
2410 $path_arg =~ s!^/+!!;
2411 PVE::Tools::run_command(['mount', '-o', 'ro', @extra_opts, '-t', 'zfs', $path_arg, $mount_path]);
2412 } else {
2413 die "cannot mount subvol snapshots for storage type '$scfg->{type}'\n";
2414 }
2415 } else {
2416 bindmount($path, $mount_path, $readonly, @extra_opts);
2417 warn "cannot enable quota control for bind mounted subvolumes\n" if $quota;
2418 }
2419 }
2420 return wantarray ? ($path, 0, $mounted_dev) : $path;
2421 } elsif ($format eq 'raw' || $format eq 'iso') {
2422 my $domount = sub {
2423 my ($path) = @_;
2424 if ($mount_path) {
2425 if ($format eq 'iso') {
2426 PVE::Tools::run_command(['mount', '-o', 'ro', @extra_opts, $path, $mount_path]);
2427 } elsif ($isBase || defined($snapname)) {
2428 PVE::Tools::run_command(['mount', '-o', 'ro,noload', @extra_opts, $path, $mount_path]);
2429 } else {
2430 if ($quota) {
2431 push @extra_opts, '-o', 'usrjquota=aquota.user,grpjquota=aquota.group,jqfmt=vfsv0';
2432 }
2433 push @extra_opts, '-o', 'ro' if $readonly;
2434 PVE::Tools::run_command(['mount', @extra_opts, $path, $mount_path]);
2435 }
2436 }
2437 };
2438 my $use_loopdev = 0;
2439 if ($scfg->{path}) {
2440 $mounted_dev = run_with_loopdev($domount, $path);
2441 $use_loopdev = 1;
2442 } elsif ($scfg->{type} eq 'drbd' || $scfg->{type} eq 'lvm' ||
2443 $scfg->{type} eq 'rbd' || $scfg->{type} eq 'lvmthin') {
2444 $mounted_dev = $path;
2445 &$domount($path);
2446 } else {
2447 die "unsupported storage type '$scfg->{type}'\n";
2448 }
2449 return wantarray ? ($path, $use_loopdev, $mounted_dev) : $path;
2450 } else {
2451 die "unsupported image format '$format'\n";
2452 }
2453 } elsif ($type eq 'device') {
2454 push @extra_opts, '-o', 'ro' if $readonly;
2455 PVE::Tools::run_command(['mount', @extra_opts, $volid, $mount_path]) if $mount_path;
2456 return wantarray ? ($volid, 0, $volid) : $volid;
2457 } elsif ($type eq 'bind') {
2458 die "directory '$volid' does not exist\n" if ! -d $volid;
2459 &$check_mount_path($volid);
2460 bindmount($volid, $mount_path, $readonly, @extra_opts) if $mount_path;
2461 warn "cannot enable quota control for bind mounts\n" if $quota;
2462 return wantarray ? ($volid, 0, undef) : $volid;
2463 }
2464
2465 die "unsupported storage";
2466 }
2467
2468 sub get_vm_volumes {
2469 my ($conf, $excludes) = @_;
2470
2471 my $vollist = [];
2472
2473 foreach_mountpoint($conf, sub {
2474 my ($ms, $mountpoint) = @_;
2475
2476 return if $excludes && $ms eq $excludes;
2477
2478 my $volid = $mountpoint->{volume};
2479
2480 return if !$volid || $mountpoint->{type} ne 'volume';
2481
2482 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2483 return if !$sid;
2484
2485 push @$vollist, $volid;
2486 });
2487
2488 return $vollist;
2489 }
2490
2491 sub mkfs {
2492 my ($dev, $rootuid, $rootgid) = @_;
2493
2494 PVE::Tools::run_command(['mkfs.ext4', '-O', 'mmp',
2495 '-E', "root_owner=$rootuid:$rootgid",
2496 $dev]);
2497 }
2498
2499 sub format_disk {
2500 my ($storage_cfg, $volid, $rootuid, $rootgid) = @_;
2501
2502 if ($volid =~ m!^/dev/.+!) {
2503 mkfs($volid);
2504 return;
2505 }
2506
2507 my ($storage, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2508
2509 die "cannot format volume '$volid' with no storage\n" if !$storage;
2510
2511 PVE::Storage::activate_volumes($storage_cfg, [$volid]);
2512
2513 my $path = PVE::Storage::path($storage_cfg, $volid);
2514
2515 my ($vtype, undef, undef, undef, undef, $isBase, $format) =
2516 PVE::Storage::parse_volname($storage_cfg, $volid);
2517
2518 die "cannot format volume '$volid' (format == $format)\n"
2519 if $format ne 'raw';
2520
2521 mkfs($path, $rootuid, $rootgid);
2522 }
2523
2524 sub destroy_disks {
2525 my ($storecfg, $vollist) = @_;
2526
2527 foreach my $volid (@$vollist) {
2528 eval { PVE::Storage::vdisk_free($storecfg, $volid); };
2529 warn $@ if $@;
2530 }
2531 }
2532
2533 sub create_disks {
2534 my ($storecfg, $vmid, $settings, $conf) = @_;
2535
2536 my $vollist = [];
2537
2538 eval {
2539 my (undef, $rootuid, $rootgid) = PVE::LXC::parse_id_maps($conf);
2540 my $chown_vollist = [];
2541
2542 foreach_mountpoint($settings, sub {
2543 my ($ms, $mountpoint) = @_;
2544
2545 my $volid = $mountpoint->{volume};
2546 my $mp = $mountpoint->{mp};
2547
2548 my ($storage, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2549
2550 if ($storage && ($volid =~ m/^([^:\s]+):(\d+(\.\d+)?)$/)) {
2551 my ($storeid, $size_gb) = ($1, $2);
2552
2553 my $size_kb = int(${size_gb}*1024) * 1024;
2554
2555 my $scfg = PVE::Storage::storage_config($storecfg, $storage);
2556 # fixme: use better naming ct-$vmid-disk-X.raw?
2557
2558 if ($scfg->{type} eq 'dir' || $scfg->{type} eq 'nfs') {
2559 if ($size_kb > 0) {
2560 $volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'raw',
2561 undef, $size_kb);
2562 format_disk($storecfg, $volid, $rootuid, $rootgid);
2563 } else {
2564 $volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'subvol',
2565 undef, 0);
2566 push @$chown_vollist, $volid;
2567 }
2568 } elsif ($scfg->{type} eq 'zfspool') {
2569
2570 $volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'subvol',
2571 undef, $size_kb);
2572 push @$chown_vollist, $volid;
2573 } elsif ($scfg->{type} eq 'drbd' || $scfg->{type} eq 'lvm' || $scfg->{type} eq 'lvmthin') {
2574
2575 $volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'raw', undef, $size_kb);
2576 format_disk($storecfg, $volid, $rootuid, $rootgid);
2577
2578 } elsif ($scfg->{type} eq 'rbd') {
2579
2580 die "krbd option must be enabled on storage type '$scfg->{type}'\n" if !$scfg->{krbd};
2581 $volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'raw', undef, $size_kb);
2582 format_disk($storecfg, $volid, $rootuid, $rootgid);
2583 } else {
2584 die "unable to create containers on storage type '$scfg->{type}'\n";
2585 }
2586 push @$vollist, $volid;
2587 $mountpoint->{volume} = $volid;
2588 $mountpoint->{size} = $size_kb * 1024;
2589 $conf->{$ms} = print_ct_mountpoint($mountpoint, $ms eq 'rootfs');
2590 } else {
2591 # use specified/existing volid/dir/device
2592 $conf->{$ms} = print_ct_mountpoint($mountpoint, $ms eq 'rootfs');
2593 }
2594 });
2595
2596 PVE::Storage::activate_volumes($storecfg, $chown_vollist, undef);
2597 foreach my $volid (@$chown_vollist) {
2598 my $path = PVE::Storage::path($storecfg, $volid, undef);
2599 chown($rootuid, $rootgid, $path);
2600 }
2601 PVE::Storage::deactivate_volumes($storecfg, $chown_vollist, undef);
2602 };
2603 # free allocated images on error
2604 if (my $err = $@) {
2605 destroy_disks($storecfg, $vollist);
2606 die $err;
2607 }
2608 return $vollist;
2609 }
2610
2611 # bash completion helper
2612
2613 sub complete_os_templates {
2614 my ($cmdname, $pname, $cvalue) = @_;
2615
2616 my $cfg = PVE::Storage::config();
2617
2618 my $storeid;
2619
2620 if ($cvalue =~ m/^([^:]+):/) {
2621 $storeid = $1;
2622 }
2623
2624 my $vtype = $cmdname eq 'restore' ? 'backup' : 'vztmpl';
2625 my $data = PVE::Storage::template_list($cfg, $storeid, $vtype);
2626
2627 my $res = [];
2628 foreach my $id (keys %$data) {
2629 foreach my $item (@{$data->{$id}}) {
2630 push @$res, $item->{volid} if defined($item->{volid});
2631 }
2632 }
2633
2634 return $res;
2635 }
2636
2637 my $complete_ctid_full = sub {
2638 my ($running) = @_;
2639
2640 my $idlist = vmstatus();
2641
2642 my $active_hash = list_active_containers();
2643
2644 my $res = [];
2645
2646 foreach my $id (keys %$idlist) {
2647 my $d = $idlist->{$id};
2648 if (defined($running)) {
2649 next if $d->{template};
2650 next if $running && !$active_hash->{$id};
2651 next if !$running && $active_hash->{$id};
2652 }
2653 push @$res, $id;
2654
2655 }
2656 return $res;
2657 };
2658
2659 sub complete_ctid {
2660 return &$complete_ctid_full();
2661 }
2662
2663 sub complete_ctid_stopped {
2664 return &$complete_ctid_full(0);
2665 }
2666
2667 sub complete_ctid_running {
2668 return &$complete_ctid_full(1);
2669 }
2670
2671 sub parse_id_maps {
2672 my ($conf) = @_;
2673
2674 my $id_map = [];
2675 my $rootuid = 0;
2676 my $rootgid = 0;
2677
2678 my $lxc = $conf->{lxc};
2679 foreach my $entry (@$lxc) {
2680 my ($key, $value) = @$entry;
2681 next if $key ne 'lxc.id_map';
2682 if ($value =~ /^([ug])\s+(\d+)\s+(\d+)\s+(\d+)\s*$/) {
2683 my ($type, $ct, $host, $length) = ($1, $2, $3, $4);
2684 push @$id_map, [$type, $ct, $host, $length];
2685 if ($ct == 0) {
2686 $rootuid = $host if $type eq 'u';
2687 $rootgid = $host if $type eq 'g';
2688 }
2689 } else {
2690 die "failed to parse id_map: $value\n";
2691 }
2692 }
2693
2694 if (!@$id_map && $conf->{unprivileged}) {
2695 # Should we read them from /etc/subuid?
2696 $id_map = [ ['u', '0', '100000', '65536'],
2697 ['g', '0', '100000', '65536'] ];
2698 $rootuid = $rootgid = 100000;
2699 }
2700
2701 return ($id_map, $rootuid, $rootgid);
2702 }
2703
2704 sub userns_command {
2705 my ($id_map) = @_;
2706 if (@$id_map) {
2707 return ['lxc-usernsexec', (map { ('-m', join(':', @$_)) } @$id_map), '--'];
2708 }
2709 return [];
2710 }
2711
2712 1;