]> git.proxmox.com Git - pve-container.git/blob - src/PVE/LXC.pm
42f6cc3ba7a7d2d055b5180ac4fea51259a92189
[pve-container.git] / src / PVE / LXC.pm
1 package PVE::LXC;
2
3 use strict;
4 use warnings;
5 use POSIX qw(EINTR);
6
7 use Socket;
8
9 use File::Path;
10 use File::Spec;
11 use Cwd qw();
12 use Fcntl qw(O_RDONLY);
13
14 use PVE::Cluster qw(cfs_register_file cfs_read_file);
15 use PVE::Storage;
16 use PVE::SafeSyslog;
17 use PVE::INotify;
18 use PVE::JSONSchema qw(get_standard_option);
19 use PVE::Tools qw($IPV6RE $IPV4RE dir_glob_foreach lock_file lock_file_full);
20 use PVE::Network;
21 use PVE::AccessControl;
22 use PVE::ProcFSTools;
23 use Time::HiRes qw (gettimeofday);
24
25 use Data::Dumper;
26
27 my $nodename = PVE::INotify::nodename();
28
29 my $cpuinfo= PVE::ProcFSTools::read_cpuinfo();
30
31 our $COMMON_TAR_FLAGS = [ '--sparse', '--numeric-owner', '--acls',
32 '--xattrs',
33 '--xattrs-include=user.*',
34 '--xattrs-include=security.capability',
35 '--warning=no-xattr-write' ];
36
37 cfs_register_file('/lxc/', \&parse_pct_config, \&write_pct_config);
38
39 my $rootfs_desc = {
40 volume => {
41 type => 'string',
42 default_key => 1,
43 format => 'pve-lxc-mp-string',
44 format_description => 'volume',
45 description => 'Volume, device or directory to mount into the container.',
46 },
47 backup => {
48 type => 'boolean',
49 format_description => '[1|0]',
50 description => 'Whether to include the mountpoint in backups.',
51 optional => 1,
52 },
53 size => {
54 type => 'string',
55 format => 'disk-size',
56 format_description => 'DiskSize',
57 description => 'Volume size (read only value).',
58 optional => 1,
59 },
60 acl => {
61 type => 'boolean',
62 format_description => 'acl',
63 description => 'Explicitly enable or disable ACL support.',
64 optional => 1,
65 },
66 ro => {
67 type => 'boolean',
68 format_description => 'ro',
69 description => 'Read-only mountpoint (not supported with bind mounts)',
70 optional => 1,
71 },
72 quota => {
73 type => 'boolean',
74 format_description => '[0|1]',
75 description => 'Enable user quotas inside the container (not supported with zfs subvolumes)',
76 optional => 1,
77 },
78 };
79
80 PVE::JSONSchema::register_standard_option('pve-ct-rootfs', {
81 type => 'string', format => $rootfs_desc,
82 description => "Use volume as container root.",
83 optional => 1,
84 });
85
86 PVE::JSONSchema::register_standard_option('pve-lxc-snapshot-name', {
87 description => "The name of the snapshot.",
88 type => 'string', format => 'pve-configid',
89 maxLength => 40,
90 });
91
92 my $confdesc = {
93 lock => {
94 optional => 1,
95 type => 'string',
96 description => "Lock/unlock the VM.",
97 enum => [qw(migrate backup snapshot rollback)],
98 },
99 onboot => {
100 optional => 1,
101 type => 'boolean',
102 description => "Specifies whether a VM will be started during system bootup.",
103 default => 0,
104 },
105 startup => get_standard_option('pve-startup-order'),
106 template => {
107 optional => 1,
108 type => 'boolean',
109 description => "Enable/disable Template.",
110 default => 0,
111 },
112 arch => {
113 optional => 1,
114 type => 'string',
115 enum => ['amd64', 'i386'],
116 description => "OS architecture type.",
117 default => 'amd64',
118 },
119 ostype => {
120 optional => 1,
121 type => 'string',
122 enum => ['debian', 'ubuntu', 'centos', 'fedora', 'opensuse', 'archlinux', 'alpine'],
123 description => "OS type. Corresponds to lxc setup scripts in /usr/share/lxc/config/<ostype>.common.conf.",
124 },
125 console => {
126 optional => 1,
127 type => 'boolean',
128 description => "Attach a console device (/dev/console) to the container.",
129 default => 1,
130 },
131 tty => {
132 optional => 1,
133 type => 'integer',
134 description => "Specify the number of tty available to the container",
135 minimum => 0,
136 maximum => 6,
137 default => 2,
138 },
139 cpulimit => {
140 optional => 1,
141 type => 'number',
142 description => "Limit of CPU usage. Note if the computer has 2 CPUs, it has a total of '2' CPU time. Value '0' indicates no CPU limit.",
143 minimum => 0,
144 maximum => 128,
145 default => 0,
146 },
147 cpuunits => {
148 optional => 1,
149 type => 'integer',
150 description => "CPU weight for a VM. Argument is used in the kernel fair scheduler. The larger the number is, the more CPU time this VM gets. Number is relative to the weights of all the other running VMs.\n\nNOTE: You can disable fair-scheduler configuration by setting this to 0.",
151 minimum => 0,
152 maximum => 500000,
153 default => 1024,
154 },
155 memory => {
156 optional => 1,
157 type => 'integer',
158 description => "Amount of RAM for the VM in MB.",
159 minimum => 16,
160 default => 512,
161 },
162 swap => {
163 optional => 1,
164 type => 'integer',
165 description => "Amount of SWAP for the VM in MB.",
166 minimum => 0,
167 default => 512,
168 },
169 hostname => {
170 optional => 1,
171 description => "Set a host name for the container.",
172 type => 'string', format => 'dns-name',
173 maxLength => 255,
174 },
175 description => {
176 optional => 1,
177 type => 'string',
178 description => "Container description. Only used on the configuration web interface.",
179 },
180 searchdomain => {
181 optional => 1,
182 type => 'string', format => 'dns-name-list',
183 description => "Sets DNS search domains for a container. Create will automatically use the setting from the host if you neither set searchdomain nor nameserver.",
184 },
185 nameserver => {
186 optional => 1,
187 type => 'string', format => 'address-list',
188 description => "Sets DNS server IP address for a container. Create will automatically use the setting from the host if you neither set searchdomain nor nameserver.",
189 },
190 rootfs => get_standard_option('pve-ct-rootfs'),
191 parent => {
192 optional => 1,
193 type => 'string', format => 'pve-configid',
194 maxLength => 40,
195 description => "Parent snapshot name. This is used internally, and should not be modified.",
196 },
197 snaptime => {
198 optional => 1,
199 description => "Timestamp for snapshots.",
200 type => 'integer',
201 minimum => 0,
202 },
203 cmode => {
204 optional => 1,
205 description => "Console mode. By default, the console command tries to open a connection to one of the available tty devices. By setting cmode to 'console' it tries to attach to /dev/console instead. If you set cmode to 'shell', it simply invokes a shell inside the container (no login).",
206 type => 'string',
207 enum => ['shell', 'console', 'tty'],
208 default => 'tty',
209 },
210 protection => {
211 optional => 1,
212 type => 'boolean',
213 description => "Sets the protection flag of the container. This will prevent the CT or CT's disk remove/update operation.",
214 default => 0,
215 },
216 unprivileged => {
217 optional => 1,
218 type => 'boolean',
219 description => "Makes the container run as unprivileged user. (Should not be modified manually.)",
220 default => 0,
221 },
222 };
223
224 my $valid_lxc_conf_keys = {
225 'lxc.include' => 1,
226 'lxc.arch' => 1,
227 'lxc.utsname' => 1,
228 'lxc.haltsignal' => 1,
229 'lxc.rebootsignal' => 1,
230 'lxc.stopsignal' => 1,
231 'lxc.init_cmd' => 1,
232 'lxc.network.type' => 1,
233 'lxc.network.flags' => 1,
234 'lxc.network.link' => 1,
235 'lxc.network.mtu' => 1,
236 'lxc.network.name' => 1,
237 'lxc.network.hwaddr' => 1,
238 'lxc.network.ipv4' => 1,
239 'lxc.network.ipv4.gateway' => 1,
240 'lxc.network.ipv6' => 1,
241 'lxc.network.ipv6.gateway' => 1,
242 'lxc.network.script.up' => 1,
243 'lxc.network.script.down' => 1,
244 'lxc.pts' => 1,
245 'lxc.console.logfile' => 1,
246 'lxc.console' => 1,
247 'lxc.tty' => 1,
248 'lxc.devttydir' => 1,
249 'lxc.hook.autodev' => 1,
250 'lxc.autodev' => 1,
251 'lxc.kmsg' => 1,
252 'lxc.mount' => 1,
253 'lxc.mount.entry' => 1,
254 'lxc.mount.auto' => 1,
255 'lxc.rootfs' => 'lxc.rootfs is auto generated from rootfs',
256 'lxc.rootfs.mount' => 1,
257 'lxc.rootfs.options' => 'lxc.rootfs.options is not supported' .
258 ', please use mountpoint options in the "rootfs" key',
259 # lxc.cgroup.*
260 'lxc.cap.drop' => 1,
261 'lxc.cap.keep' => 1,
262 'lxc.aa_profile' => 1,
263 'lxc.aa_allow_incomplete' => 1,
264 'lxc.se_context' => 1,
265 'lxc.seccomp' => 1,
266 'lxc.id_map' => 1,
267 'lxc.hook.pre-start' => 1,
268 'lxc.hook.pre-mount' => 1,
269 'lxc.hook.mount' => 1,
270 'lxc.hook.start' => 1,
271 'lxc.hook.stop' => 1,
272 'lxc.hook.post-stop' => 1,
273 'lxc.hook.clone' => 1,
274 'lxc.hook.destroy' => 1,
275 'lxc.loglevel' => 1,
276 'lxc.logfile' => 1,
277 'lxc.start.auto' => 1,
278 'lxc.start.delay' => 1,
279 'lxc.start.order' => 1,
280 'lxc.group' => 1,
281 'lxc.environment' => 1,
282 };
283
284 my $netconf_desc = {
285 type => {
286 type => 'string',
287 optional => 1,
288 description => "Network interface type.",
289 enum => [qw(veth)],
290 },
291 name => {
292 type => 'string',
293 format_description => 'String',
294 description => 'Name of the network device as seen from inside the container. (lxc.network.name)',
295 pattern => '[-_.\w\d]+',
296 },
297 bridge => {
298 type => 'string',
299 format_description => 'vmbr<Number>',
300 description => 'Bridge to attach the network device to.',
301 pattern => '[-_.\w\d]+',
302 optional => 1,
303 },
304 hwaddr => {
305 type => 'string',
306 format_description => 'MAC',
307 description => 'Bridge to attach the network device to. (lxc.network.hwaddr)',
308 pattern => qr/(?:[a-f0-9]{2}:){5}[a-f0-9]{2}/i,
309 optional => 1,
310 },
311 mtu => {
312 type => 'integer',
313 format_description => 'Number',
314 description => 'Maximum transfer unit of the interface. (lxc.network.mtu)',
315 minimum => 64, # minimum ethernet frame is 64 bytes
316 optional => 1,
317 },
318 ip => {
319 type => 'string',
320 format => 'pve-ipv4-config',
321 format_description => 'IPv4Format/CIDR',
322 description => 'IPv4 address in CIDR format.',
323 optional => 1,
324 },
325 gw => {
326 type => 'string',
327 format => 'ipv4',
328 format_description => 'GatewayIPv4',
329 description => 'Default gateway for IPv4 traffic.',
330 optional => 1,
331 },
332 ip6 => {
333 type => 'string',
334 format => 'pve-ipv6-config',
335 format_description => 'IPv6Format/CIDR',
336 description => 'IPv6 address in CIDR format.',
337 optional => 1,
338 },
339 gw6 => {
340 type => 'string',
341 format => 'ipv6',
342 format_description => 'GatewayIPv6',
343 description => 'Default gateway for IPv6 traffic.',
344 optional => 1,
345 },
346 firewall => {
347 type => 'boolean',
348 format_description => '[1|0]',
349 description => "Controls whether this interface's firewall rules should be used.",
350 optional => 1,
351 },
352 tag => {
353 type => 'integer',
354 format_description => 'VlanNo',
355 minimum => '2',
356 maximum => '4094',
357 description => "VLAN tag for this interface.",
358 optional => 1,
359 },
360 trunks => {
361 type => 'string',
362 pattern => qr/\d+(?:;\d+)*/,
363 format_description => 'vlanid[;vlanid...]',
364 description => "VLAN ids to pass through the interface",
365 optional => 1,
366 },
367 };
368 PVE::JSONSchema::register_format('pve-lxc-network', $netconf_desc);
369
370 my $MAX_LXC_NETWORKS = 10;
371 for (my $i = 0; $i < $MAX_LXC_NETWORKS; $i++) {
372 $confdesc->{"net$i"} = {
373 optional => 1,
374 type => 'string', format => $netconf_desc,
375 description => "Specifies network interfaces for the container.",
376 };
377 }
378
379 PVE::JSONSchema::register_format('pve-lxc-mp-string', \&verify_lxc_mp_string);
380 sub verify_lxc_mp_string{
381 my ($mp, $noerr) = @_;
382
383 # do not allow:
384 # /./ or /../
385 # /. or /.. at the end
386 # ../ at the beginning
387
388 if($mp =~ m@/\.\.?/@ ||
389 $mp =~ m@/\.\.?$@ ||
390 $mp =~ m@^\.\./@){
391 return undef if $noerr;
392 die "$mp contains illegal character sequences\n";
393 }
394 return $mp;
395 }
396
397 my $mp_desc = {
398 %$rootfs_desc,
399 mp => {
400 type => 'string',
401 format => 'pve-lxc-mp-string',
402 format_description => 'Path',
403 description => 'Path to the mountpoint as seen from inside the container.',
404 },
405 };
406 PVE::JSONSchema::register_format('pve-ct-mountpoint', $mp_desc);
407
408 my $unuseddesc = {
409 optional => 1,
410 type => 'string', format => 'pve-volume-id',
411 description => "Reference to unused volumes.",
412 };
413
414 my $MAX_MOUNT_POINTS = 10;
415 for (my $i = 0; $i < $MAX_MOUNT_POINTS; $i++) {
416 $confdesc->{"mp$i"} = {
417 optional => 1,
418 type => 'string', format => $mp_desc,
419 description => "Use volume as container mount point (experimental feature).",
420 optional => 1,
421 };
422 }
423
424 my $MAX_UNUSED_DISKS = $MAX_MOUNT_POINTS;
425 for (my $i = 0; $i < $MAX_MOUNT_POINTS; $i++) {
426 $confdesc->{"unused$i"} = $unuseddesc;
427 }
428
429 sub write_pct_config {
430 my ($filename, $conf) = @_;
431
432 delete $conf->{snapstate}; # just to be sure
433
434 my $generate_raw_config = sub {
435 my ($conf) = @_;
436
437 my $raw = '';
438
439 # add description as comment to top of file
440 my $descr = $conf->{description} || '';
441 foreach my $cl (split(/\n/, $descr)) {
442 $raw .= '#' . PVE::Tools::encode_text($cl) . "\n";
443 }
444
445 foreach my $key (sort keys %$conf) {
446 next if $key eq 'digest' || $key eq 'description' || $key eq 'pending' ||
447 $key eq 'snapshots' || $key eq 'snapname' || $key eq 'lxc';
448 my $value = $conf->{$key};
449 die "detected invalid newline inside property '$key'\n" if $value =~ m/\n/;
450 $raw .= "$key: $value\n";
451 }
452
453 if (my $lxcconf = $conf->{lxc}) {
454 foreach my $entry (@$lxcconf) {
455 my ($k, $v) = @$entry;
456 $raw .= "$k: $v\n";
457 }
458 }
459
460 return $raw;
461 };
462
463 my $raw = &$generate_raw_config($conf);
464
465 foreach my $snapname (sort keys %{$conf->{snapshots}}) {
466 $raw .= "\n[$snapname]\n";
467 $raw .= &$generate_raw_config($conf->{snapshots}->{$snapname});
468 }
469
470 return $raw;
471 }
472
473 sub check_type {
474 my ($key, $value) = @_;
475
476 die "unknown setting '$key'\n" if !$confdesc->{$key};
477
478 my $type = $confdesc->{$key}->{type};
479
480 if (!defined($value)) {
481 die "got undefined value\n";
482 }
483
484 if ($value =~ m/[\n\r]/) {
485 die "property contains a line feed\n";
486 }
487
488 if ($type eq 'boolean') {
489 return 1 if ($value eq '1') || ($value =~ m/^(on|yes|true)$/i);
490 return 0 if ($value eq '0') || ($value =~ m/^(off|no|false)$/i);
491 die "type check ('boolean') failed - got '$value'\n";
492 } elsif ($type eq 'integer') {
493 return int($1) if $value =~ m/^(\d+)$/;
494 die "type check ('integer') failed - got '$value'\n";
495 } elsif ($type eq 'number') {
496 return $value if $value =~ m/^(\d+)(\.\d+)?$/;
497 die "type check ('number') failed - got '$value'\n";
498 } elsif ($type eq 'string') {
499 if (my $fmt = $confdesc->{$key}->{format}) {
500 PVE::JSONSchema::check_format($fmt, $value);
501 return $value;
502 }
503 return $value;
504 } else {
505 die "internal error"
506 }
507 }
508
509 sub parse_pct_config {
510 my ($filename, $raw) = @_;
511
512 return undef if !defined($raw);
513
514 my $res = {
515 digest => Digest::SHA::sha1_hex($raw),
516 snapshots => {},
517 };
518
519 $filename =~ m|/lxc/(\d+).conf$|
520 || die "got strange filename '$filename'";
521
522 my $vmid = $1;
523
524 my $conf = $res;
525 my $descr = '';
526 my $section = '';
527
528 my @lines = split(/\n/, $raw);
529 foreach my $line (@lines) {
530 next if $line =~ m/^\s*$/;
531
532 if ($line =~ m/^\[([a-z][a-z0-9_\-]+)\]\s*$/i) {
533 $section = $1;
534 $conf->{description} = $descr if $descr;
535 $descr = '';
536 $conf = $res->{snapshots}->{$section} = {};
537 next;
538 }
539
540 if ($line =~ m/^\#(.*)\s*$/) {
541 $descr .= PVE::Tools::decode_text($1) . "\n";
542 next;
543 }
544
545 if ($line =~ m/^(lxc\.[a-z0-9_\-\.]+)(:|\s*=)\s*(.*?)\s*$/) {
546 my $key = $1;
547 my $value = $3;
548 my $validity = $valid_lxc_conf_keys->{$key} || 0;
549 if ($validity eq 1 || $key =~ m/^lxc\.cgroup\./) {
550 push @{$conf->{lxc}}, [$key, $value];
551 } elsif (my $errmsg = $validity) {
552 warn "vm $vmid - $key: $errmsg\n";
553 } else {
554 warn "vm $vmid - unable to parse config: $line\n";
555 }
556 } elsif ($line =~ m/^(description):\s*(.*\S)\s*$/) {
557 $descr .= PVE::Tools::decode_text($2);
558 } elsif ($line =~ m/snapstate:\s*(prepare|delete)\s*$/) {
559 $conf->{snapstate} = $1;
560 } elsif ($line =~ m/^([a-z][a-z_]*\d*):\s*(\S.*)\s*$/) {
561 my $key = $1;
562 my $value = $2;
563 eval { $value = check_type($key, $value); };
564 warn "vm $vmid - unable to parse value of '$key' - $@" if $@;
565 $conf->{$key} = $value;
566 } else {
567 warn "vm $vmid - unable to parse config: $line\n";
568 }
569 }
570
571 $conf->{description} = $descr if $descr;
572
573 delete $res->{snapstate}; # just to be sure
574
575 return $res;
576 }
577
578 sub config_list {
579 my $vmlist = PVE::Cluster::get_vmlist();
580 my $res = {};
581 return $res if !$vmlist || !$vmlist->{ids};
582 my $ids = $vmlist->{ids};
583
584 foreach my $vmid (keys %$ids) {
585 next if !$vmid; # skip CT0
586 my $d = $ids->{$vmid};
587 next if !$d->{node} || $d->{node} ne $nodename;
588 next if !$d->{type} || $d->{type} ne 'lxc';
589 $res->{$vmid}->{type} = 'lxc';
590 }
591 return $res;
592 }
593
594 sub cfs_config_path {
595 my ($vmid, $node) = @_;
596
597 $node = $nodename if !$node;
598 return "nodes/$node/lxc/$vmid.conf";
599 }
600
601 sub config_file {
602 my ($vmid, $node) = @_;
603
604 my $cfspath = cfs_config_path($vmid, $node);
605 return "/etc/pve/$cfspath";
606 }
607
608 sub load_config {
609 my ($vmid, $node) = @_;
610
611 $node = $nodename if !$node;
612 my $cfspath = cfs_config_path($vmid, $node);
613
614 my $conf = PVE::Cluster::cfs_read_file($cfspath);
615 die "container $vmid does not exist\n" if !defined($conf);
616
617 return $conf;
618 }
619
620 sub create_config {
621 my ($vmid, $conf) = @_;
622
623 my $dir = "/etc/pve/nodes/$nodename/lxc";
624 mkdir $dir;
625
626 write_config($vmid, $conf);
627 }
628
629 sub destroy_config {
630 my ($vmid) = @_;
631
632 unlink config_file($vmid, $nodename);
633 }
634
635 sub write_config {
636 my ($vmid, $conf) = @_;
637
638 my $cfspath = cfs_config_path($vmid);
639
640 PVE::Cluster::cfs_write_file($cfspath, $conf);
641 }
642
643 # flock: we use one file handle per process, so lock file
644 # can be called multiple times and will succeed for the same process.
645
646 my $lock_handles = {};
647 my $lockdir = "/run/lock/lxc";
648
649 sub config_file_lock {
650 my ($vmid) = @_;
651
652 return "$lockdir/pve-config-${vmid}.lock";
653 }
654
655 sub lock_config_full {
656 my ($vmid, $timeout, $code, @param) = @_;
657
658 my $filename = config_file_lock($vmid);
659
660 mkdir $lockdir if !-d $lockdir;
661
662 my $res = lock_file($filename, $timeout, $code, @param);
663
664 die $@ if $@;
665
666 return $res;
667 }
668
669 sub lock_config_mode {
670 my ($vmid, $timeout, $shared, $code, @param) = @_;
671
672 my $filename = config_file_lock($vmid);
673
674 mkdir $lockdir if !-d $lockdir;
675
676 my $res = lock_file_full($filename, $timeout, $shared, $code, @param);
677
678 die $@ if $@;
679
680 return $res;
681 }
682
683 sub lock_config {
684 my ($vmid, $code, @param) = @_;
685
686 return lock_config_full($vmid, 10, $code, @param);
687 }
688
689 sub option_exists {
690 my ($name) = @_;
691
692 return defined($confdesc->{$name});
693 }
694
695 # add JSON properties for create and set function
696 sub json_config_properties {
697 my $prop = shift;
698
699 foreach my $opt (keys %$confdesc) {
700 next if $opt eq 'parent' || $opt eq 'snaptime';
701 next if $prop->{$opt};
702 $prop->{$opt} = $confdesc->{$opt};
703 }
704
705 return $prop;
706 }
707
708 # container status helpers
709
710 sub list_active_containers {
711
712 my $filename = "/proc/net/unix";
713
714 # similar test is used by lcxcontainers.c: list_active_containers
715 my $res = {};
716
717 my $fh = IO::File->new ($filename, "r");
718 return $res if !$fh;
719
720 while (defined(my $line = <$fh>)) {
721 if ($line =~ m/^[a-f0-9]+:\s\S+\s\S+\s\S+\s\S+\s\S+\s\d+\s(\S+)$/) {
722 my $path = $1;
723 if ($path =~ m!^@/var/lib/lxc/(\d+)/command$!) {
724 $res->{$1} = 1;
725 }
726 }
727 }
728
729 close($fh);
730
731 return $res;
732 }
733
734 # warning: this is slow
735 sub check_running {
736 my ($vmid) = @_;
737
738 my $active_hash = list_active_containers();
739
740 return 1 if defined($active_hash->{$vmid});
741
742 return undef;
743 }
744
745 sub get_container_disk_usage {
746 my ($vmid, $pid) = @_;
747
748 return PVE::Tools::df("/proc/$pid/root/", 1);
749 }
750
751 my $last_proc_vmid_stat;
752
753 my $parse_cpuacct_stat = sub {
754 my ($vmid) = @_;
755
756 my $raw = read_cgroup_value('cpuacct', $vmid, 'cpuacct.stat', 1);
757
758 my $stat = {};
759
760 if ($raw =~ m/^user (\d+)\nsystem (\d+)\n/) {
761
762 $stat->{utime} = $1;
763 $stat->{stime} = $2;
764
765 }
766
767 return $stat;
768 };
769
770 sub vmstatus {
771 my ($opt_vmid) = @_;
772
773 my $list = $opt_vmid ? { $opt_vmid => { type => 'lxc' }} : config_list();
774
775 my $active_hash = list_active_containers();
776
777 my $cpucount = $cpuinfo->{cpus} || 1;
778
779 my $cdtime = gettimeofday;
780
781 my $uptime = (PVE::ProcFSTools::read_proc_uptime(1))[0];
782
783 foreach my $vmid (keys %$list) {
784 my $d = $list->{$vmid};
785
786 eval { $d->{pid} = find_lxc_pid($vmid) if defined($active_hash->{$vmid}); };
787 warn $@ if $@; # ignore errors (consider them stopped)
788
789 $d->{status} = $d->{pid} ? 'running' : 'stopped';
790
791 my $cfspath = cfs_config_path($vmid);
792 my $conf = PVE::Cluster::cfs_read_file($cfspath) || {};
793
794 $d->{name} = $conf->{'hostname'} || "CT$vmid";
795 $d->{name} =~ s/[\s]//g;
796
797 $d->{cpus} = $conf->{cpulimit} || $cpucount;
798
799 if ($d->{pid}) {
800 my $res = get_container_disk_usage($vmid, $d->{pid});
801 $d->{disk} = $res->{used};
802 $d->{maxdisk} = $res->{total};
803 } else {
804 $d->{disk} = 0;
805 # use 4GB by default ??
806 if (my $rootfs = $conf->{rootfs}) {
807 my $rootinfo = parse_ct_rootfs($rootfs);
808 $d->{maxdisk} = int(($rootinfo->{size} || 4)*1024*1024)*1024;
809 } else {
810 $d->{maxdisk} = 4*1024*1024*1024;
811 }
812 }
813
814 $d->{mem} = 0;
815 $d->{swap} = 0;
816 $d->{maxmem} = ($conf->{memory}||512)*1024*1024;
817 $d->{maxswap} = ($conf->{swap}//0)*1024*1024;
818
819 $d->{uptime} = 0;
820 $d->{cpu} = 0;
821
822 $d->{netout} = 0;
823 $d->{netin} = 0;
824
825 $d->{diskread} = 0;
826 $d->{diskwrite} = 0;
827
828 $d->{template} = is_template($conf);
829 }
830
831 foreach my $vmid (keys %$list) {
832 my $d = $list->{$vmid};
833 my $pid = $d->{pid};
834
835 next if !$pid; # skip stopped CTs
836
837 my $ctime = (stat("/proc/$pid"))[10]; # 10 = ctime
838 $d->{uptime} = time - $ctime; # the method lxcfs uses
839
840 $d->{mem} = read_cgroup_value('memory', $vmid, 'memory.usage_in_bytes');
841 $d->{swap} = read_cgroup_value('memory', $vmid, 'memory.memsw.usage_in_bytes') - $d->{mem};
842
843 my $blkio_bytes = read_cgroup_value('blkio', $vmid, 'blkio.throttle.io_service_bytes', 1);
844 my @bytes = split(/\n/, $blkio_bytes);
845 foreach my $byte (@bytes) {
846 if (my ($key, $value) = $byte =~ /(Read|Write)\s+(\d+)/) {
847 $d->{diskread} = $2 if $key eq 'Read';
848 $d->{diskwrite} = $2 if $key eq 'Write';
849 }
850 }
851
852 my $pstat = &$parse_cpuacct_stat($vmid);
853
854 my $used = $pstat->{utime} + $pstat->{stime};
855
856 my $old = $last_proc_vmid_stat->{$vmid};
857 if (!$old) {
858 $last_proc_vmid_stat->{$vmid} = {
859 time => $cdtime,
860 used => $used,
861 cpu => 0,
862 };
863 next;
864 }
865
866 my $dtime = ($cdtime - $old->{time}) * $cpucount * $cpuinfo->{user_hz};
867
868 if ($dtime > 1000) {
869 my $dutime = $used - $old->{used};
870
871 $d->{cpu} = (($dutime/$dtime)* $cpucount) / $d->{cpus};
872 $last_proc_vmid_stat->{$vmid} = {
873 time => $cdtime,
874 used => $used,
875 cpu => $d->{cpu},
876 };
877 } else {
878 $d->{cpu} = $old->{cpu};
879 }
880 }
881
882 my $netdev = PVE::ProcFSTools::read_proc_net_dev();
883
884 foreach my $dev (keys %$netdev) {
885 next if $dev !~ m/^veth([1-9]\d*)i/;
886 my $vmid = $1;
887 my $d = $list->{$vmid};
888
889 next if !$d;
890
891 $d->{netout} += $netdev->{$dev}->{receive};
892 $d->{netin} += $netdev->{$dev}->{transmit};
893
894 }
895
896 return $list;
897 }
898
899 sub classify_mountpoint {
900 my ($vol) = @_;
901 if ($vol =~ m!^/!) {
902 return 'device' if $vol =~ m!^/dev/!;
903 return 'bind';
904 }
905 return 'volume';
906 }
907
908 my $parse_ct_mountpoint_full = sub {
909 my ($desc, $data, $noerr) = @_;
910
911 $data //= '';
912
913 my $res;
914 eval { $res = PVE::JSONSchema::parse_property_string($desc, $data) };
915 if ($@) {
916 return undef if $noerr;
917 die $@;
918 }
919
920 if (defined(my $size = $res->{size})) {
921 $size = PVE::JSONSchema::parse_size($size);
922 if (!defined($size)) {
923 return undef if $noerr;
924 die "invalid size: $size\n";
925 }
926 $res->{size} = $size;
927 }
928
929 $res->{type} = classify_mountpoint($res->{volume});
930
931 return $res;
932 };
933
934 sub parse_ct_rootfs {
935 my ($data, $noerr) = @_;
936
937 my $res = &$parse_ct_mountpoint_full($rootfs_desc, $data, $noerr);
938
939 $res->{mp} = '/' if defined($res);
940
941 return $res;
942 }
943
944 sub parse_ct_mountpoint {
945 my ($data, $noerr) = @_;
946
947 return &$parse_ct_mountpoint_full($mp_desc, $data, $noerr);
948 }
949
950 sub print_ct_mountpoint {
951 my ($info, $nomp) = @_;
952 my $skip = [ 'type' ];
953 push @$skip, 'mp' if $nomp;
954 return PVE::JSONSchema::print_property_string($info, $mp_desc, $skip);
955 }
956
957 sub print_lxc_network {
958 my $net = shift;
959 return PVE::JSONSchema::print_property_string($net, $netconf_desc);
960 }
961
962 sub parse_lxc_network {
963 my ($data) = @_;
964
965 my $res = {};
966
967 return $res if !$data;
968
969 $res = PVE::JSONSchema::parse_property_string($netconf_desc, $data);
970
971 $res->{type} = 'veth';
972 $res->{hwaddr} = PVE::Tools::random_ether_addr() if !$res->{hwaddr};
973
974 return $res;
975 }
976
977 sub read_cgroup_value {
978 my ($group, $vmid, $name, $full) = @_;
979
980 my $path = "/sys/fs/cgroup/$group/lxc/$vmid/$name";
981
982 return PVE::Tools::file_get_contents($path) if $full;
983
984 return PVE::Tools::file_read_firstline($path);
985 }
986
987 sub write_cgroup_value {
988 my ($group, $vmid, $name, $value) = @_;
989
990 my $path = "/sys/fs/cgroup/$group/lxc/$vmid/$name";
991 PVE::ProcFSTools::write_proc_entry($path, $value) if -e $path;
992
993 }
994
995 sub find_lxc_console_pids {
996
997 my $res = {};
998
999 PVE::Tools::dir_glob_foreach('/proc', '\d+', sub {
1000 my ($pid) = @_;
1001
1002 my $cmdline = PVE::Tools::file_read_firstline("/proc/$pid/cmdline");
1003 return if !$cmdline;
1004
1005 my @args = split(/\0/, $cmdline);
1006
1007 # search for lxc-console -n <vmid>
1008 return if scalar(@args) != 3;
1009 return if $args[1] ne '-n';
1010 return if $args[2] !~ m/^\d+$/;
1011 return if $args[0] !~ m|^(/usr/bin/)?lxc-console$|;
1012
1013 my $vmid = $args[2];
1014
1015 push @{$res->{$vmid}}, $pid;
1016 });
1017
1018 return $res;
1019 }
1020
1021 sub find_lxc_pid {
1022 my ($vmid) = @_;
1023
1024 my $pid = undef;
1025 my $parser = sub {
1026 my $line = shift;
1027 $pid = $1 if $line =~ m/^PID:\s+(\d+)$/;
1028 };
1029 PVE::Tools::run_command(['lxc-info', '-n', $vmid, '-p'], outfunc => $parser);
1030
1031 die "unable to get PID for CT $vmid (not running?)\n" if !$pid;
1032
1033 return $pid;
1034 }
1035
1036 # Note: we cannot use Net:IP, because that only allows strict
1037 # CIDR networks
1038 sub parse_ipv4_cidr {
1039 my ($cidr, $noerr) = @_;
1040
1041 if ($cidr =~ m!^($IPV4RE)(?:/(\d+))$! && ($2 > 7) && ($2 <= 32)) {
1042 return { address => $1, netmask => $PVE::Network::ipv4_reverse_mask->[$2] };
1043 }
1044
1045 return undef if $noerr;
1046
1047 die "unable to parse ipv4 address/mask\n";
1048 }
1049
1050 sub check_lock {
1051 my ($conf) = @_;
1052
1053 die "VM is locked ($conf->{'lock'})\n" if $conf->{'lock'};
1054 }
1055
1056 sub check_protection {
1057 my ($vm_conf, $err_msg) = @_;
1058
1059 if ($vm_conf->{protection}) {
1060 die "$err_msg - protection mode enabled\n";
1061 }
1062 }
1063
1064 sub update_lxc_config {
1065 my ($storage_cfg, $vmid, $conf) = @_;
1066
1067 my $dir = "/var/lib/lxc/$vmid";
1068
1069 if ($conf->{template}) {
1070
1071 unlink "$dir/config";
1072
1073 return;
1074 }
1075
1076 my $raw = '';
1077
1078 die "missing 'arch' - internal error" if !$conf->{arch};
1079 $raw .= "lxc.arch = $conf->{arch}\n";
1080
1081 my $unprivileged = $conf->{unprivileged};
1082 my $custom_idmap = grep { $_->[0] eq 'lxc.id_map' } @{$conf->{lxc}};
1083
1084 my $ostype = $conf->{ostype} || die "missing 'ostype' - internal error";
1085 if ($ostype =~ /^(?:debian | ubuntu | centos | fedora | opensuse | archlinux | alpine)$/x) {
1086 my $inc ="/usr/share/lxc/config/$ostype.common.conf";
1087 $inc ="/usr/share/lxc/config/common.conf" if !-f $inc;
1088 $raw .= "lxc.include = $inc\n";
1089 if ($unprivileged || $custom_idmap) {
1090 $inc = "/usr/share/lxc/config/$ostype.userns.conf";
1091 $inc = "/usr/share/lxc/config/userns.conf" if !-f $inc;
1092 $raw .= "lxc.include = $inc\n"
1093 }
1094 } else {
1095 die "implement me (ostype $ostype)";
1096 }
1097
1098 # WARNING: DO NOT REMOVE this without making sure that loop device nodes
1099 # cannot be exposed to the container with r/w access (cgroup perms).
1100 # When this is enabled mounts will still remain in the monitor's namespace
1101 # after the container unmounted them and thus will not detach from their
1102 # files while the container is running!
1103 $raw .= "lxc.monitor.unshare = 1\n";
1104
1105 # Should we read them from /etc/subuid?
1106 if ($unprivileged && !$custom_idmap) {
1107 $raw .= "lxc.id_map = u 0 100000 65536\n";
1108 $raw .= "lxc.id_map = g 0 100000 65536\n";
1109 }
1110
1111 if (!has_dev_console($conf)) {
1112 $raw .= "lxc.console = none\n";
1113 $raw .= "lxc.cgroup.devices.deny = c 5:1 rwm\n";
1114 }
1115
1116 my $ttycount = get_tty_count($conf);
1117 $raw .= "lxc.tty = $ttycount\n";
1118
1119 # some init scripts expect a linux terminal (turnkey).
1120 $raw .= "lxc.environment = TERM=linux\n";
1121
1122 my $utsname = $conf->{hostname} || "CT$vmid";
1123 $raw .= "lxc.utsname = $utsname\n";
1124
1125 my $memory = $conf->{memory} || 512;
1126 my $swap = $conf->{swap} // 0;
1127
1128 my $lxcmem = int($memory*1024*1024);
1129 $raw .= "lxc.cgroup.memory.limit_in_bytes = $lxcmem\n";
1130
1131 my $lxcswap = int(($memory + $swap)*1024*1024);
1132 $raw .= "lxc.cgroup.memory.memsw.limit_in_bytes = $lxcswap\n";
1133
1134 if (my $cpulimit = $conf->{cpulimit}) {
1135 $raw .= "lxc.cgroup.cpu.cfs_period_us = 100000\n";
1136 my $value = int(100000*$cpulimit);
1137 $raw .= "lxc.cgroup.cpu.cfs_quota_us = $value\n";
1138 }
1139
1140 my $shares = $conf->{cpuunits} || 1024;
1141 $raw .= "lxc.cgroup.cpu.shares = $shares\n";
1142
1143 my $mountpoint = parse_ct_rootfs($conf->{rootfs});
1144
1145 $raw .= "lxc.rootfs = $dir/rootfs\n";
1146
1147 my $netcount = 0;
1148 foreach my $k (keys %$conf) {
1149 next if $k !~ m/^net(\d+)$/;
1150 my $ind = $1;
1151 my $d = parse_lxc_network($conf->{$k});
1152 $netcount++;
1153 $raw .= "lxc.network.type = veth\n";
1154 $raw .= "lxc.network.veth.pair = veth${vmid}i${ind}\n";
1155 $raw .= "lxc.network.hwaddr = $d->{hwaddr}\n" if defined($d->{hwaddr});
1156 $raw .= "lxc.network.name = $d->{name}\n" if defined($d->{name});
1157 $raw .= "lxc.network.mtu = $d->{mtu}\n" if defined($d->{mtu});
1158 }
1159
1160 if (my $lxcconf = $conf->{lxc}) {
1161 foreach my $entry (@$lxcconf) {
1162 my ($k, $v) = @$entry;
1163 $netcount++ if $k eq 'lxc.network.type';
1164 $raw .= "$k = $v\n";
1165 }
1166 }
1167
1168 $raw .= "lxc.network.type = empty\n" if !$netcount;
1169
1170 File::Path::mkpath("$dir/rootfs");
1171
1172 PVE::Tools::file_set_contents("$dir/config", $raw);
1173 }
1174
1175 # verify and cleanup nameserver list (replace \0 with ' ')
1176 sub verify_nameserver_list {
1177 my ($nameserver_list) = @_;
1178
1179 my @list = ();
1180 foreach my $server (PVE::Tools::split_list($nameserver_list)) {
1181 PVE::JSONSchema::pve_verify_ip($server);
1182 push @list, $server;
1183 }
1184
1185 return join(' ', @list);
1186 }
1187
1188 sub verify_searchdomain_list {
1189 my ($searchdomain_list) = @_;
1190
1191 my @list = ();
1192 foreach my $server (PVE::Tools::split_list($searchdomain_list)) {
1193 # todo: should we add checks for valid dns domains?
1194 push @list, $server;
1195 }
1196
1197 return join(' ', @list);
1198 }
1199
1200 sub add_unused_volume {
1201 my ($config, $volid) = @_;
1202
1203 my $key;
1204 for (my $ind = $MAX_UNUSED_DISKS - 1; $ind >= 0; $ind--) {
1205 my $test = "unused$ind";
1206 if (my $vid = $config->{$test}) {
1207 return if $vid eq $volid; # do not add duplicates
1208 } else {
1209 $key = $test;
1210 }
1211 }
1212
1213 die "Too many unused volumes - please delete them first.\n" if !$key;
1214
1215 $config->{$key} = $volid;
1216
1217 return $key;
1218 }
1219
1220 sub update_pct_config {
1221 my ($vmid, $conf, $running, $param, $delete) = @_;
1222
1223 my @nohotplug;
1224
1225 my $new_disks = 0;
1226 my @deleted_volumes;
1227
1228 my $rootdir;
1229 if ($running) {
1230 my $pid = find_lxc_pid($vmid);
1231 $rootdir = "/proc/$pid/root";
1232 }
1233
1234 my $hotplug_error = sub {
1235 if ($running) {
1236 push @nohotplug, @_;
1237 return 1;
1238 } else {
1239 return 0;
1240 }
1241 };
1242
1243 if (defined($delete)) {
1244 foreach my $opt (@$delete) {
1245 if (!exists($conf->{$opt})) {
1246 warn "no such option: $opt\n";
1247 next;
1248 }
1249
1250 if ($opt eq 'hostname' || $opt eq 'memory' || $opt eq 'rootfs') {
1251 die "unable to delete required option '$opt'\n";
1252 } elsif ($opt eq 'swap') {
1253 delete $conf->{$opt};
1254 write_cgroup_value("memory", $vmid, "memory.memsw.limit_in_bytes", -1);
1255 } elsif ($opt eq 'description' || $opt eq 'onboot' || $opt eq 'startup') {
1256 delete $conf->{$opt};
1257 } elsif ($opt eq 'nameserver' || $opt eq 'searchdomain' ||
1258 $opt eq 'tty' || $opt eq 'console' || $opt eq 'cmode') {
1259 next if $hotplug_error->($opt);
1260 delete $conf->{$opt};
1261 } elsif ($opt =~ m/^net(\d)$/) {
1262 delete $conf->{$opt};
1263 next if !$running;
1264 my $netid = $1;
1265 PVE::Network::veth_delete("veth${vmid}i$netid");
1266 } elsif ($opt eq 'protection') {
1267 delete $conf->{$opt};
1268 } elsif ($opt =~ m/^unused(\d+)$/) {
1269 next if $hotplug_error->($opt);
1270 check_protection($conf, "can't remove CT $vmid drive '$opt'");
1271 push @deleted_volumes, $conf->{$opt};
1272 delete $conf->{$opt};
1273 } elsif ($opt =~ m/^mp(\d+)$/) {
1274 next if $hotplug_error->($opt);
1275 check_protection($conf, "can't remove CT $vmid drive '$opt'");
1276 my $mountpoint = parse_ct_mountpoint($conf->{$opt});
1277 if ($mountpoint->{type} eq 'volume') {
1278 add_unused_volume($conf, $mountpoint->{volume})
1279 }
1280 delete $conf->{$opt};
1281 } elsif ($opt eq 'unprivileged') {
1282 die "unable to delete read-only option: '$opt'\n";
1283 } else {
1284 die "implement me (delete: $opt)"
1285 }
1286 write_config($vmid, $conf) if $running;
1287 }
1288 }
1289
1290 # There's no separate swap size to configure, there's memory and "total"
1291 # memory (iow. memory+swap). This means we have to change them together.
1292 my $wanted_memory = PVE::Tools::extract_param($param, 'memory');
1293 my $wanted_swap = PVE::Tools::extract_param($param, 'swap');
1294 if (defined($wanted_memory) || defined($wanted_swap)) {
1295
1296 my $old_memory = ($conf->{memory} || 512);
1297 my $old_swap = ($conf->{swap} || 0);
1298
1299 $wanted_memory //= $old_memory;
1300 $wanted_swap //= $old_swap;
1301
1302 my $total = $wanted_memory + $wanted_swap;
1303 if ($running) {
1304 my $old_total = $old_memory + $old_swap;
1305 if ($total > $old_total) {
1306 write_cgroup_value("memory", $vmid, "memory.memsw.limit_in_bytes", int($total*1024*1024));
1307 write_cgroup_value("memory", $vmid, "memory.limit_in_bytes", int($wanted_memory*1024*1024));
1308 } else {
1309 write_cgroup_value("memory", $vmid, "memory.limit_in_bytes", int($wanted_memory*1024*1024));
1310 write_cgroup_value("memory", $vmid, "memory.memsw.limit_in_bytes", int($total*1024*1024));
1311 }
1312 }
1313 $conf->{memory} = $wanted_memory;
1314 $conf->{swap} = $wanted_swap;
1315
1316 write_config($vmid, $conf) if $running;
1317 }
1318
1319 foreach my $opt (keys %$param) {
1320 my $value = $param->{$opt};
1321 if ($opt eq 'hostname') {
1322 $conf->{$opt} = $value;
1323 } elsif ($opt eq 'onboot') {
1324 $conf->{$opt} = $value ? 1 : 0;
1325 } elsif ($opt eq 'startup') {
1326 $conf->{$opt} = $value;
1327 } elsif ($opt eq 'tty' || $opt eq 'console' || $opt eq 'cmode') {
1328 next if $hotplug_error->($opt);
1329 $conf->{$opt} = $value;
1330 } elsif ($opt eq 'nameserver') {
1331 next if $hotplug_error->($opt);
1332 my $list = verify_nameserver_list($value);
1333 $conf->{$opt} = $list;
1334 } elsif ($opt eq 'searchdomain') {
1335 next if $hotplug_error->($opt);
1336 my $list = verify_searchdomain_list($value);
1337 $conf->{$opt} = $list;
1338 } elsif ($opt eq 'cpulimit') {
1339 next if $hotplug_error->($opt); # FIXME: hotplug
1340 $conf->{$opt} = $value;
1341 } elsif ($opt eq 'cpuunits') {
1342 $conf->{$opt} = $value;
1343 write_cgroup_value("cpu", $vmid, "cpu.shares", $value);
1344 } elsif ($opt eq 'description') {
1345 $conf->{$opt} = PVE::Tools::encode_text($value);
1346 } elsif ($opt =~ m/^net(\d+)$/) {
1347 my $netid = $1;
1348 my $net = parse_lxc_network($value);
1349 if (!$running) {
1350 $conf->{$opt} = print_lxc_network($net);
1351 } else {
1352 update_net($vmid, $conf, $opt, $net, $netid, $rootdir);
1353 }
1354 } elsif ($opt eq 'protection') {
1355 $conf->{$opt} = $value ? 1 : 0;
1356 } elsif ($opt =~ m/^mp(\d+)$/) {
1357 next if $hotplug_error->($opt);
1358 check_protection($conf, "can't update CT $vmid drive '$opt'");
1359 $conf->{$opt} = $value;
1360 $new_disks = 1;
1361 } elsif ($opt eq 'rootfs') {
1362 next if $hotplug_error->($opt);
1363 check_protection($conf, "can't update CT $vmid drive '$opt'");
1364 $conf->{$opt} = $value;
1365 } elsif ($opt eq 'unprivileged') {
1366 die "unable to modify read-only option: '$opt'\n";
1367 } else {
1368 die "implement me: $opt";
1369 }
1370 write_config($vmid, $conf) if $running;
1371 }
1372
1373 if (@deleted_volumes) {
1374 my $storage_cfg = PVE::Storage::config();
1375 foreach my $volume (@deleted_volumes) {
1376 delete_mountpoint_volume($storage_cfg, $vmid, $volume);
1377 }
1378 }
1379
1380 if ($new_disks) {
1381 my $storage_cfg = PVE::Storage::config();
1382 create_disks($storage_cfg, $vmid, $conf, $conf);
1383 }
1384
1385 # This should be the last thing we do here
1386 if ($running && scalar(@nohotplug)) {
1387 die "unable to modify " . join(',', @nohotplug) . " while container is running\n";
1388 }
1389 }
1390
1391 sub has_dev_console {
1392 my ($conf) = @_;
1393
1394 return !(defined($conf->{console}) && !$conf->{console});
1395 }
1396
1397 sub get_tty_count {
1398 my ($conf) = @_;
1399
1400 return $conf->{tty} // $confdesc->{tty}->{default};
1401 }
1402
1403 sub get_cmode {
1404 my ($conf) = @_;
1405
1406 return $conf->{cmode} // $confdesc->{cmode}->{default};
1407 }
1408
1409 sub get_console_command {
1410 my ($vmid, $conf) = @_;
1411
1412 my $cmode = get_cmode($conf);
1413
1414 if ($cmode eq 'console') {
1415 return ['lxc-console', '-n', $vmid, '-t', 0];
1416 } elsif ($cmode eq 'tty') {
1417 return ['lxc-console', '-n', $vmid];
1418 } elsif ($cmode eq 'shell') {
1419 return ['lxc-attach', '--clear-env', '-n', $vmid];
1420 } else {
1421 die "internal error";
1422 }
1423 }
1424
1425 sub get_primary_ips {
1426 my ($conf) = @_;
1427
1428 # return data from net0
1429
1430 return undef if !defined($conf->{net0});
1431 my $net = parse_lxc_network($conf->{net0});
1432
1433 my $ipv4 = $net->{ip};
1434 if ($ipv4) {
1435 if ($ipv4 =~ /^(dhcp|manual)$/) {
1436 $ipv4 = undef
1437 } else {
1438 $ipv4 =~ s!/\d+$!!;
1439 }
1440 }
1441 my $ipv6 = $net->{ip6};
1442 if ($ipv6) {
1443 if ($ipv6 =~ /^(auto|dhcp|manual)$/) {
1444 $ipv6 = undef;
1445 } else {
1446 $ipv6 =~ s!/\d+$!!;
1447 }
1448 }
1449
1450 return ($ipv4, $ipv6);
1451 }
1452
1453 sub delete_mountpoint_volume {
1454 my ($storage_cfg, $vmid, $volume) = @_;
1455
1456 return if classify_mountpoint($volume) ne 'volume';
1457
1458 my ($vtype, $name, $owner) = PVE::Storage::parse_volname($storage_cfg, $volume);
1459 PVE::Storage::vdisk_free($storage_cfg, $volume) if $vmid == $owner;
1460 }
1461
1462 sub destroy_lxc_container {
1463 my ($storage_cfg, $vmid, $conf) = @_;
1464
1465 foreach_mountpoint($conf, sub {
1466 my ($ms, $mountpoint) = @_;
1467 delete_mountpoint_volume($storage_cfg, $vmid, $mountpoint->{volume});
1468 });
1469
1470 rmdir "/var/lib/lxc/$vmid/rootfs";
1471 unlink "/var/lib/lxc/$vmid/config";
1472 rmdir "/var/lib/lxc/$vmid";
1473 destroy_config($vmid);
1474
1475 #my $cmd = ['lxc-destroy', '-n', $vmid ];
1476 #PVE::Tools::run_command($cmd);
1477 }
1478
1479 sub vm_stop_cleanup {
1480 my ($storage_cfg, $vmid, $conf, $keepActive) = @_;
1481
1482 eval {
1483 if (!$keepActive) {
1484
1485 my $vollist = get_vm_volumes($conf);
1486 PVE::Storage::deactivate_volumes($storage_cfg, $vollist);
1487 }
1488 };
1489 warn $@ if $@; # avoid errors - just warn
1490 }
1491
1492 my $safe_num_ne = sub {
1493 my ($a, $b) = @_;
1494
1495 return 0 if !defined($a) && !defined($b);
1496 return 1 if !defined($a);
1497 return 1 if !defined($b);
1498
1499 return $a != $b;
1500 };
1501
1502 my $safe_string_ne = sub {
1503 my ($a, $b) = @_;
1504
1505 return 0 if !defined($a) && !defined($b);
1506 return 1 if !defined($a);
1507 return 1 if !defined($b);
1508
1509 return $a ne $b;
1510 };
1511
1512 sub update_net {
1513 my ($vmid, $conf, $opt, $newnet, $netid, $rootdir) = @_;
1514
1515 if ($newnet->{type} ne 'veth') {
1516 # for when there are physical interfaces
1517 die "cannot update interface of type $newnet->{type}";
1518 }
1519
1520 my $veth = "veth${vmid}i${netid}";
1521 my $eth = $newnet->{name};
1522
1523 if (my $oldnetcfg = $conf->{$opt}) {
1524 my $oldnet = parse_lxc_network($oldnetcfg);
1525
1526 if (&$safe_string_ne($oldnet->{hwaddr}, $newnet->{hwaddr}) ||
1527 &$safe_string_ne($oldnet->{name}, $newnet->{name})) {
1528
1529 PVE::Network::veth_delete($veth);
1530 delete $conf->{$opt};
1531 write_config($vmid, $conf);
1532
1533 hotplug_net($vmid, $conf, $opt, $newnet, $netid);
1534
1535 } elsif (&$safe_string_ne($oldnet->{bridge}, $newnet->{bridge}) ||
1536 &$safe_num_ne($oldnet->{tag}, $newnet->{tag}) ||
1537 &$safe_num_ne($oldnet->{firewall}, $newnet->{firewall})) {
1538
1539 if ($oldnet->{bridge}) {
1540 PVE::Network::tap_unplug($veth);
1541 foreach (qw(bridge tag firewall)) {
1542 delete $oldnet->{$_};
1543 }
1544 $conf->{$opt} = print_lxc_network($oldnet);
1545 write_config($vmid, $conf);
1546 }
1547
1548 PVE::Network::tap_plug($veth, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks});
1549 foreach (qw(bridge tag firewall)) {
1550 $oldnet->{$_} = $newnet->{$_} if $newnet->{$_};
1551 }
1552 $conf->{$opt} = print_lxc_network($oldnet);
1553 write_config($vmid, $conf);
1554 }
1555 } else {
1556 hotplug_net($vmid, $conf, $opt, $newnet, $netid);
1557 }
1558
1559 update_ipconfig($vmid, $conf, $opt, $eth, $newnet, $rootdir);
1560 }
1561
1562 sub hotplug_net {
1563 my ($vmid, $conf, $opt, $newnet, $netid) = @_;
1564
1565 my $veth = "veth${vmid}i${netid}";
1566 my $vethpeer = $veth . "p";
1567 my $eth = $newnet->{name};
1568
1569 PVE::Network::veth_create($veth, $vethpeer, $newnet->{bridge}, $newnet->{hwaddr});
1570 PVE::Network::tap_plug($veth, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks});
1571
1572 # attach peer in container
1573 my $cmd = ['lxc-device', '-n', $vmid, 'add', $vethpeer, "$eth" ];
1574 PVE::Tools::run_command($cmd);
1575
1576 # link up peer in container
1577 $cmd = ['lxc-attach', '-n', $vmid, '-s', 'NETWORK', '--', '/sbin/ip', 'link', 'set', $eth ,'up' ];
1578 PVE::Tools::run_command($cmd);
1579
1580 my $done = { type => 'veth' };
1581 foreach (qw(bridge tag firewall hwaddr name)) {
1582 $done->{$_} = $newnet->{$_} if $newnet->{$_};
1583 }
1584 $conf->{$opt} = print_lxc_network($done);
1585
1586 write_config($vmid, $conf);
1587 }
1588
1589 sub update_ipconfig {
1590 my ($vmid, $conf, $opt, $eth, $newnet, $rootdir) = @_;
1591
1592 my $lxc_setup = PVE::LXC::Setup->new($conf, $rootdir);
1593
1594 my $optdata = parse_lxc_network($conf->{$opt});
1595 my $deleted = [];
1596 my $added = [];
1597 my $nscmd = sub {
1598 my $cmdargs = shift;
1599 PVE::Tools::run_command(['lxc-attach', '-n', $vmid, '-s', 'NETWORK', '--', @_], %$cmdargs);
1600 };
1601 my $ipcmd = sub { &$nscmd({}, '/sbin/ip', @_) };
1602
1603 my $change_ip_config = sub {
1604 my ($ipversion) = @_;
1605
1606 my $family_opt = "-$ipversion";
1607 my $suffix = $ipversion == 4 ? '' : $ipversion;
1608 my $gw= "gw$suffix";
1609 my $ip= "ip$suffix";
1610
1611 my $newip = $newnet->{$ip};
1612 my $newgw = $newnet->{$gw};
1613 my $oldip = $optdata->{$ip};
1614
1615 my $change_ip = &$safe_string_ne($oldip, $newip);
1616 my $change_gw = &$safe_string_ne($optdata->{$gw}, $newgw);
1617
1618 return if !$change_ip && !$change_gw;
1619
1620 # step 1: add new IP, if this fails we cancel
1621 my $is_real_ip = ($newip && $newip !~ /^(?:auto|dhcp|manual)$/);
1622 if ($change_ip && $is_real_ip) {
1623 eval { &$ipcmd($family_opt, 'addr', 'add', $newip, 'dev', $eth); };
1624 if (my $err = $@) {
1625 warn $err;
1626 return;
1627 }
1628 }
1629
1630 # step 2: replace gateway
1631 # If this fails we delete the added IP and cancel.
1632 # If it succeeds we save the config and delete the old IP, ignoring
1633 # errors. The config is then saved.
1634 # Note: 'ip route replace' can add
1635 if ($change_gw) {
1636 if ($newgw) {
1637 eval {
1638 if ($is_real_ip && !PVE::Network::is_ip_in_cidr($newgw, $newip, $ipversion)) {
1639 &$ipcmd($family_opt, 'route', 'add', $newgw, 'dev', $eth);
1640 }
1641 &$ipcmd($family_opt, 'route', 'replace', 'default', 'via', $newgw);
1642 };
1643 if (my $err = $@) {
1644 warn $err;
1645 # the route was not replaced, the old IP is still available
1646 # rollback (delete new IP) and cancel
1647 if ($change_ip) {
1648 eval { &$ipcmd($family_opt, 'addr', 'del', $newip, 'dev', $eth); };
1649 warn $@ if $@; # no need to die here
1650 }
1651 return;
1652 }
1653 } else {
1654 eval { &$ipcmd($family_opt, 'route', 'del', 'default'); };
1655 # if the route was not deleted, the guest might have deleted it manually
1656 # warn and continue
1657 warn $@ if $@;
1658 }
1659 }
1660
1661 # from this point on we save the configuration
1662 # step 3: delete old IP ignoring errors
1663 if ($change_ip && $oldip && $oldip !~ /^(?:auto|dhcp)$/) {
1664 # We need to enable promote_secondaries, otherwise our newly added
1665 # address will be removed along with the old one.
1666 my $promote = 0;
1667 eval {
1668 if ($ipversion == 4) {
1669 &$nscmd({ outfunc => sub { $promote = int(shift) } },
1670 'cat', "/proc/sys/net/ipv4/conf/$eth/promote_secondaries");
1671 &$nscmd({}, 'sysctl', "net.ipv4.conf.$eth.promote_secondaries=1");
1672 }
1673 &$ipcmd($family_opt, 'addr', 'del', $oldip, 'dev', $eth);
1674 };
1675 warn $@ if $@; # no need to die here
1676
1677 if ($ipversion == 4) {
1678 &$nscmd({}, 'sysctl', "net.ipv4.conf.$eth.promote_secondaries=$promote");
1679 }
1680 }
1681
1682 foreach my $property ($ip, $gw) {
1683 if ($newnet->{$property}) {
1684 $optdata->{$property} = $newnet->{$property};
1685 } else {
1686 delete $optdata->{$property};
1687 }
1688 }
1689 $conf->{$opt} = print_lxc_network($optdata);
1690 write_config($vmid, $conf);
1691 $lxc_setup->setup_network($conf);
1692 };
1693
1694 &$change_ip_config(4);
1695 &$change_ip_config(6);
1696
1697 }
1698
1699 # Internal snapshots
1700
1701 # NOTE: Snapshot create/delete involves several non-atomic
1702 # actions, and can take a long time.
1703 # So we try to avoid locking the file and use the 'lock' variable
1704 # inside the config file instead.
1705
1706 my $snapshot_copy_config = sub {
1707 my ($source, $dest) = @_;
1708
1709 foreach my $k (keys %$source) {
1710 next if $k eq 'snapshots';
1711 next if $k eq 'snapstate';
1712 next if $k eq 'snaptime';
1713 next if $k eq 'vmstate';
1714 next if $k eq 'lock';
1715 next if $k eq 'digest';
1716 next if $k eq 'description';
1717
1718 $dest->{$k} = $source->{$k};
1719 }
1720 };
1721
1722 my $snapshot_prepare = sub {
1723 my ($vmid, $snapname, $comment) = @_;
1724
1725 my $snap;
1726
1727 my $updatefn = sub {
1728
1729 my $conf = load_config($vmid);
1730
1731 die "you can't take a snapshot if it's a template\n"
1732 if is_template($conf);
1733
1734 check_lock($conf);
1735
1736 $conf->{lock} = 'snapshot';
1737
1738 die "snapshot name '$snapname' already used\n"
1739 if defined($conf->{snapshots}->{$snapname});
1740
1741 my $storecfg = PVE::Storage::config();
1742 my $feature = $snapname eq 'vzdump' ? 'vzdump' : 'snapshot';
1743 die "snapshot feature is not available\n" if !has_feature($feature, $conf, $storecfg);
1744
1745 $snap = $conf->{snapshots}->{$snapname} = {};
1746
1747 &$snapshot_copy_config($conf, $snap);
1748
1749 $snap->{'snapstate'} = "prepare";
1750 $snap->{'snaptime'} = time();
1751 $snap->{'description'} = $comment if $comment;
1752 $conf->{snapshots}->{$snapname} = $snap;
1753
1754 write_config($vmid, $conf);
1755 };
1756
1757 lock_config($vmid, $updatefn);
1758
1759 return $snap;
1760 };
1761
1762 my $snapshot_commit = sub {
1763 my ($vmid, $snapname) = @_;
1764
1765 my $updatefn = sub {
1766
1767 my $conf = load_config($vmid);
1768
1769 die "missing snapshot lock\n"
1770 if !($conf->{lock} && $conf->{lock} eq 'snapshot');
1771
1772 die "snapshot '$snapname' does not exist\n"
1773 if !defined($conf->{snapshots}->{$snapname});
1774
1775 die "wrong snapshot state\n"
1776 if !($conf->{snapshots}->{$snapname}->{'snapstate'} &&
1777 $conf->{snapshots}->{$snapname}->{'snapstate'} eq "prepare");
1778
1779 delete $conf->{snapshots}->{$snapname}->{'snapstate'};
1780 delete $conf->{lock};
1781 $conf->{parent} = $snapname;
1782
1783 write_config($vmid, $conf);
1784 };
1785
1786 lock_config($vmid ,$updatefn);
1787 };
1788
1789 sub has_feature {
1790 my ($feature, $conf, $storecfg, $snapname) = @_;
1791
1792 my $err;
1793 my $vzdump = $feature eq 'vzdump';
1794 $feature = 'snapshot' if $vzdump;
1795
1796 foreach_mountpoint($conf, sub {
1797 my ($ms, $mountpoint) = @_;
1798
1799 return if $err; # skip further test
1800 return if $vzdump && $ms ne 'rootfs' && !$mountpoint->{backup};
1801
1802 $err = 1 if !PVE::Storage::volume_has_feature($storecfg, $feature, $mountpoint->{volume}, $snapname);
1803
1804 # TODO: implement support for mountpoints
1805 die "unable to handle mountpoint '$ms' - feature not implemented\n"
1806 if $ms ne 'rootfs';
1807 });
1808
1809 return $err ? 0 : 1;
1810 }
1811
1812 my $enter_namespace = sub {
1813 my ($vmid, $pid, $which, $type) = @_;
1814 sysopen my $fd, "/proc/$pid/ns/$which", O_RDONLY
1815 or die "failed to open $which namespace of container $vmid: $!\n";
1816 PVE::Tools::setns(fileno($fd), $type)
1817 or die "failed to enter $which namespace of container $vmid: $!\n";
1818 close $fd;
1819 };
1820
1821 my $do_syncfs = sub {
1822 my ($vmid, $pid, $socket) = @_;
1823
1824 &$enter_namespace($vmid, $pid, 'mnt', PVE::Tools::CLONE_NEWNS);
1825
1826 # Tell the parent process to start reading our /proc/mounts
1827 print {$socket} "go\n";
1828 $socket->flush();
1829
1830 # Receive /proc/self/mounts
1831 my $mountdata = do { local $/ = undef; <$socket> };
1832 close $socket;
1833
1834 # Now sync all mountpoints...
1835 my $mounts = PVE::ProcFSTools::parse_mounts($mountdata);
1836 foreach my $mp (@$mounts) {
1837 my ($what, $dir, $fs) = @$mp;
1838 next if $fs eq 'fuse.lxcfs';
1839 eval { PVE::Tools::sync_mountpoint($dir); };
1840 warn $@ if $@;
1841 }
1842 };
1843
1844 sub sync_container_namespace {
1845 my ($vmid) = @_;
1846 my $pid = find_lxc_pid($vmid);
1847
1848 # SOCK_DGRAM is nicer for barriers but cannot be slurped
1849 socketpair my $pfd, my $cfd, AF_UNIX, SOCK_STREAM, PF_UNSPEC
1850 or die "failed to create socketpair: $!\n";
1851
1852 my $child = fork();
1853 die "fork failed: $!\n" if !defined($child);
1854
1855 if (!$child) {
1856 eval {
1857 close $pfd;
1858 &$do_syncfs($vmid, $pid, $cfd);
1859 };
1860 if (my $err = $@) {
1861 warn $err;
1862 POSIX::_exit(1);
1863 }
1864 POSIX::_exit(0);
1865 }
1866 close $cfd;
1867 my $go = <$pfd>;
1868 die "failed to enter container namespace\n" if $go ne "go\n";
1869
1870 open my $mounts, '<', "/proc/$child/mounts"
1871 or die "failed to open container's /proc/mounts: $!\n";
1872 my $mountdata = do { local $/ = undef; <$mounts> };
1873 close $mounts;
1874 print {$pfd} $mountdata;
1875 close $pfd;
1876
1877 while (waitpid($child, 0) != $child) {}
1878 die "failed to sync container namespace\n" if $? != 0;
1879 }
1880
1881 sub snapshot_create {
1882 my ($vmid, $snapname, $comment) = @_;
1883
1884 my $snap = &$snapshot_prepare($vmid, $snapname, $comment);
1885
1886 my $conf = load_config($vmid);
1887
1888 my $running = check_running($vmid);
1889
1890 my $unfreeze = 0;
1891
1892 my $drivehash = {};
1893
1894 eval {
1895 if ($running) {
1896 $unfreeze = 1;
1897 PVE::Tools::run_command(['/usr/bin/lxc-freeze', '-n', $vmid]);
1898 sync_container_namespace($vmid);
1899 };
1900
1901 my $storecfg = PVE::Storage::config();
1902 my $rootinfo = parse_ct_rootfs($conf->{rootfs});
1903 my $volid = $rootinfo->{volume};
1904
1905 PVE::Storage::volume_snapshot($storecfg, $volid, $snapname);
1906 $drivehash->{rootfs} = 1;
1907 };
1908 my $err = $@;
1909
1910 if ($unfreeze) {
1911 eval { PVE::Tools::run_command(['/usr/bin/lxc-unfreeze', '-n', $vmid]); };
1912 warn $@ if $@;
1913 }
1914
1915 if ($err) {
1916 eval { snapshot_delete($vmid, $snapname, 1, $drivehash); };
1917 warn "$@\n" if $@;
1918 die "$err\n";
1919 }
1920
1921 &$snapshot_commit($vmid, $snapname);
1922 }
1923
1924 # Note: $drivehash is only set when called from snapshot_create.
1925 sub snapshot_delete {
1926 my ($vmid, $snapname, $force, $drivehash) = @_;
1927
1928 my $snap;
1929
1930 my $conf;
1931
1932 my $updatefn = sub {
1933
1934 $conf = load_config($vmid);
1935
1936 die "you can't delete a snapshot if vm is a template\n"
1937 if is_template($conf);
1938
1939 $snap = $conf->{snapshots}->{$snapname};
1940
1941 if (!$drivehash) {
1942 check_lock($conf);
1943 }
1944
1945 die "snapshot '$snapname' does not exist\n" if !defined($snap);
1946
1947 $snap->{snapstate} = 'delete';
1948
1949 write_config($vmid, $conf);
1950 };
1951
1952 lock_config($vmid, $updatefn);
1953
1954 my $storecfg = PVE::Storage::config();
1955
1956 my $unlink_parent = sub {
1957
1958 my ($confref, $new_parent) = @_;
1959
1960 if ($confref->{parent} && $confref->{parent} eq $snapname) {
1961 if ($new_parent) {
1962 $confref->{parent} = $new_parent;
1963 } else {
1964 delete $confref->{parent};
1965 }
1966 }
1967 };
1968
1969 my $del_snap = sub {
1970
1971 $conf = load_config($vmid);
1972
1973 if ($drivehash) {
1974 delete $conf->{lock};
1975 } else {
1976 check_lock($conf);
1977 }
1978
1979 my $parent = $conf->{snapshots}->{$snapname}->{parent};
1980 foreach my $snapkey (keys %{$conf->{snapshots}}) {
1981 &$unlink_parent($conf->{snapshots}->{$snapkey}, $parent);
1982 }
1983
1984 &$unlink_parent($conf, $parent);
1985
1986 delete $conf->{snapshots}->{$snapname};
1987
1988 write_config($vmid, $conf);
1989 };
1990
1991 my $rootfs = $conf->{snapshots}->{$snapname}->{rootfs};
1992 my $rootinfo = parse_ct_rootfs($rootfs);
1993 my $volid = $rootinfo->{volume};
1994
1995 eval {
1996 PVE::Storage::volume_snapshot_delete($storecfg, $volid, $snapname);
1997 };
1998 my $err = $@;
1999
2000 if(!$err || ($err && $force)) {
2001 lock_config($vmid, $del_snap);
2002 if ($err) {
2003 die "Can't delete snapshot: $vmid $snapname $err\n";
2004 }
2005 }
2006 }
2007
2008 sub snapshot_rollback {
2009 my ($vmid, $snapname) = @_;
2010
2011 my $storecfg = PVE::Storage::config();
2012
2013 my $conf = load_config($vmid);
2014
2015 die "you can't rollback if vm is a template\n" if is_template($conf);
2016
2017 my $snap = $conf->{snapshots}->{$snapname};
2018
2019 die "snapshot '$snapname' does not exist\n" if !defined($snap);
2020
2021 my $rootfs = $snap->{rootfs};
2022 my $rootinfo = parse_ct_rootfs($rootfs);
2023 my $volid = $rootinfo->{volume};
2024
2025 PVE::Storage::volume_rollback_is_possible($storecfg, $volid, $snapname);
2026
2027 my $updatefn = sub {
2028
2029 die "unable to rollback to incomplete snapshot (snapstate = $snap->{snapstate})\n"
2030 if $snap->{snapstate};
2031
2032 check_lock($conf);
2033
2034 system("lxc-stop -n $vmid --kill") if check_running($vmid);
2035
2036 die "unable to rollback vm $vmid: vm is running\n"
2037 if check_running($vmid);
2038
2039 $conf->{lock} = 'rollback';
2040
2041 my $forcemachine;
2042
2043 # copy snapshot config to current config
2044
2045 my $tmp_conf = $conf;
2046 &$snapshot_copy_config($tmp_conf->{snapshots}->{$snapname}, $conf);
2047 $conf->{snapshots} = $tmp_conf->{snapshots};
2048 delete $conf->{snaptime};
2049 delete $conf->{snapname};
2050 $conf->{parent} = $snapname;
2051
2052 write_config($vmid, $conf);
2053 };
2054
2055 my $unlockfn = sub {
2056 delete $conf->{lock};
2057 write_config($vmid, $conf);
2058 };
2059
2060 lock_config($vmid, $updatefn);
2061
2062 PVE::Storage::volume_snapshot_rollback($storecfg, $volid, $snapname);
2063
2064 lock_config($vmid, $unlockfn);
2065 }
2066
2067 sub template_create {
2068 my ($vmid, $conf) = @_;
2069
2070 my $storecfg = PVE::Storage::config();
2071
2072 my $rootinfo = parse_ct_rootfs($conf->{rootfs});
2073 my $volid = $rootinfo->{volume};
2074
2075 die "Template feature is not available for '$volid'\n"
2076 if !PVE::Storage::volume_has_feature($storecfg, 'template', $volid);
2077
2078 PVE::Storage::activate_volumes($storecfg, [$volid]);
2079
2080 my $template_volid = PVE::Storage::vdisk_create_base($storecfg, $volid);
2081 $rootinfo->{volume} = $template_volid;
2082 $conf->{rootfs} = print_ct_mountpoint($rootinfo, 1);
2083
2084 write_config($vmid, $conf);
2085 }
2086
2087 sub is_template {
2088 my ($conf) = @_;
2089
2090 return 1 if defined $conf->{template} && $conf->{template} == 1;
2091 }
2092
2093 sub mountpoint_names {
2094 my ($reverse) = @_;
2095
2096 my @names = ('rootfs');
2097
2098 for (my $i = 0; $i < $MAX_MOUNT_POINTS; $i++) {
2099 push @names, "mp$i";
2100 }
2101
2102 return $reverse ? reverse @names : @names;
2103 }
2104
2105
2106 sub foreach_mountpoint_full {
2107 my ($conf, $reverse, $func) = @_;
2108
2109 foreach my $key (mountpoint_names($reverse)) {
2110 my $value = $conf->{$key};
2111 next if !defined($value);
2112 my $mountpoint = $key eq 'rootfs' ? parse_ct_rootfs($value, 1) : parse_ct_mountpoint($value, 1);
2113 next if !defined($mountpoint);
2114
2115 &$func($key, $mountpoint);
2116 }
2117 }
2118
2119 sub foreach_mountpoint {
2120 my ($conf, $func) = @_;
2121
2122 foreach_mountpoint_full($conf, 0, $func);
2123 }
2124
2125 sub foreach_mountpoint_reverse {
2126 my ($conf, $func) = @_;
2127
2128 foreach_mountpoint_full($conf, 1, $func);
2129 }
2130
2131 sub check_ct_modify_config_perm {
2132 my ($rpcenv, $authuser, $vmid, $pool, $key_list) = @_;
2133
2134 return 1 if $authuser ne 'root@pam';
2135
2136 foreach my $opt (@$key_list) {
2137
2138 if ($opt eq 'cpus' || $opt eq 'cpuunits' || $opt eq 'cpulimit') {
2139 $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.CPU']);
2140 } elsif ($opt eq 'rootfs' || $opt =~ /^mp\d+$/) {
2141 $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Disk']);
2142 } elsif ($opt eq 'memory' || $opt eq 'swap') {
2143 $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Memory']);
2144 } elsif ($opt =~ m/^net\d+$/ || $opt eq 'nameserver' ||
2145 $opt eq 'searchdomain' || $opt eq 'hostname') {
2146 $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Network']);
2147 } else {
2148 $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Options']);
2149 }
2150 }
2151
2152 return 1;
2153 }
2154
2155 sub umount_all {
2156 my ($vmid, $storage_cfg, $conf, $noerr) = @_;
2157
2158 my $rootdir = "/var/lib/lxc/$vmid/rootfs";
2159 my $volid_list = get_vm_volumes($conf);
2160
2161 foreach_mountpoint_reverse($conf, sub {
2162 my ($ms, $mountpoint) = @_;
2163
2164 my $volid = $mountpoint->{volume};
2165 my $mount = $mountpoint->{mp};
2166
2167 return if !$volid || !$mount;
2168
2169 my $mount_path = "$rootdir/$mount";
2170 $mount_path =~ s!/+!/!g;
2171
2172 return if !PVE::ProcFSTools::is_mounted($mount_path);
2173
2174 eval {
2175 PVE::Tools::run_command(['umount', '-d', $mount_path]);
2176 };
2177 if (my $err = $@) {
2178 if ($noerr) {
2179 warn $err;
2180 } else {
2181 die $err;
2182 }
2183 }
2184 });
2185 }
2186
2187 sub mount_all {
2188 my ($vmid, $storage_cfg, $conf) = @_;
2189
2190 my $rootdir = "/var/lib/lxc/$vmid/rootfs";
2191 File::Path::make_path($rootdir);
2192
2193 my $volid_list = get_vm_volumes($conf);
2194 PVE::Storage::activate_volumes($storage_cfg, $volid_list);
2195
2196 eval {
2197 foreach_mountpoint($conf, sub {
2198 my ($ms, $mountpoint) = @_;
2199
2200 mountpoint_mount($mountpoint, $rootdir, $storage_cfg);
2201 });
2202 };
2203 if (my $err = $@) {
2204 warn "mounting container failed\n";
2205 umount_all($vmid, $storage_cfg, $conf, 1);
2206 die $err;
2207 }
2208
2209 return $rootdir;
2210 }
2211
2212
2213 sub mountpoint_mount_path {
2214 my ($mountpoint, $storage_cfg, $snapname) = @_;
2215
2216 return mountpoint_mount($mountpoint, undef, $storage_cfg, $snapname);
2217 }
2218
2219 my $check_mount_path = sub {
2220 my ($path) = @_;
2221 $path = File::Spec->canonpath($path);
2222 my $real = Cwd::realpath($path);
2223 if ($real ne $path) {
2224 die "mount path modified by symlink: $path != $real";
2225 }
2226 };
2227
2228 sub query_loopdev {
2229 my ($path) = @_;
2230 my $found;
2231 my $parser = sub {
2232 my $line = shift;
2233 if ($line =~ m@^(/dev/loop\d+):@) {
2234 $found = $1;
2235 }
2236 };
2237 my $cmd = ['losetup', '--associated', $path];
2238 PVE::Tools::run_command($cmd, outfunc => $parser);
2239 return $found;
2240 }
2241
2242 # Run a function with a file attached to a loop device.
2243 # The loop device is always detached afterwards (or set to autoclear).
2244 # Returns the loop device.
2245 sub run_with_loopdev {
2246 my ($func, $file) = @_;
2247 my $device;
2248 my $parser = sub {
2249 my $line = shift;
2250 if ($line =~ m@^(/dev/loop\d+)$@) {
2251 $device = $1;
2252 }
2253 };
2254 PVE::Tools::run_command(['losetup', '--show', '-f', $file], outfunc => $parser);
2255 die "failed to setup loop device for $file\n" if !$device;
2256 eval { &$func($device); };
2257 my $err = $@;
2258 PVE::Tools::run_command(['losetup', '-d', $device]);
2259 die $err if $err;
2260 return $device;
2261 }
2262
2263 sub bindmount {
2264 my ($dir, $dest, $ro, @extra_opts) = @_;
2265 PVE::Tools::run_command(['mount', '-o', 'bind', @extra_opts, $dir, $dest]);
2266 if ($ro) {
2267 eval { PVE::Tools::run_command(['mount', '-o', 'bind,remount,ro', $dest]); };
2268 if (my $err = $@) {
2269 warn "bindmount error\n";
2270 # don't leave writable bind-mounts behind...
2271 PVE::Tools::run_command(['umount', $dest]);
2272 die $err;
2273 }
2274 }
2275 }
2276
2277 # use $rootdir = undef to just return the corresponding mount path
2278 sub mountpoint_mount {
2279 my ($mountpoint, $rootdir, $storage_cfg, $snapname) = @_;
2280
2281 my $volid = $mountpoint->{volume};
2282 my $mount = $mountpoint->{mp};
2283 my $type = $mountpoint->{type};
2284 my $quota = !$snapname && !$mountpoint->{ro} && $mountpoint->{quota};
2285 my $mounted_dev;
2286
2287 return if !$volid || !$mount;
2288
2289 my $mount_path;
2290
2291 if (defined($rootdir)) {
2292 $rootdir =~ s!/+$!!;
2293 $mount_path = "$rootdir/$mount";
2294 $mount_path =~ s!/+!/!g;
2295 &$check_mount_path($mount_path);
2296 File::Path::mkpath($mount_path);
2297 }
2298
2299 my ($storage, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2300
2301 die "unknown snapshot path for '$volid'" if !$storage && defined($snapname);
2302
2303 my $optstring = '';
2304 if (defined($mountpoint->{acl})) {
2305 $optstring .= ($mountpoint->{acl} ? 'acl' : 'noacl');
2306 }
2307 my $readonly = $mountpoint->{ro};
2308
2309 my @extra_opts = ('-o', $optstring);
2310
2311 if ($storage) {
2312
2313 my $scfg = PVE::Storage::storage_config($storage_cfg, $storage);
2314 my $path = PVE::Storage::path($storage_cfg, $volid, $snapname);
2315
2316 my ($vtype, undef, undef, undef, undef, $isBase, $format) =
2317 PVE::Storage::parse_volname($storage_cfg, $volid);
2318
2319 $format = 'iso' if $vtype eq 'iso'; # allow to handle iso files
2320
2321 if ($format eq 'subvol') {
2322 if ($mount_path) {
2323 if ($snapname) {
2324 if ($scfg->{type} eq 'zfspool') {
2325 my $path_arg = $path;
2326 $path_arg =~ s!^/+!!;
2327 PVE::Tools::run_command(['mount', '-o', 'ro', @extra_opts, '-t', 'zfs', $path_arg, $mount_path]);
2328 } else {
2329 die "cannot mount subvol snapshots for storage type '$scfg->{type}'\n";
2330 }
2331 } else {
2332 bindmount($path, $mount_path, $readonly, @extra_opts);
2333 warn "cannot enable quota control for bind mounted subvolumes\n" if $quota;
2334 }
2335 }
2336 return wantarray ? ($path, 0, $mounted_dev) : $path;
2337 } elsif ($format eq 'raw' || $format eq 'iso') {
2338 my $domount = sub {
2339 my ($path) = @_;
2340 if ($mount_path) {
2341 if ($format eq 'iso') {
2342 PVE::Tools::run_command(['mount', '-o', 'ro', @extra_opts, $path, $mount_path]);
2343 } elsif ($isBase || defined($snapname)) {
2344 PVE::Tools::run_command(['mount', '-o', 'ro,noload', @extra_opts, $path, $mount_path]);
2345 } else {
2346 if ($quota) {
2347 push @extra_opts, '-o', 'usrjquota=aquota.user,grpjquota=aquota.group,jqfmt=vfsv0';
2348 }
2349 push @extra_opts, '-o', 'ro' if $readonly;
2350 PVE::Tools::run_command(['mount', @extra_opts, $path, $mount_path]);
2351 }
2352 }
2353 };
2354 my $use_loopdev = 0;
2355 if ($scfg->{path}) {
2356 $mounted_dev = run_with_loopdev($domount, $path);
2357 $use_loopdev = 1;
2358 } elsif ($scfg->{type} eq 'drbd' || $scfg->{type} eq 'lvm' ||
2359 $scfg->{type} eq 'rbd' || $scfg->{type} eq 'lvmthin') {
2360 $mounted_dev = $path;
2361 &$domount($path);
2362 } else {
2363 die "unsupported storage type '$scfg->{type}'\n";
2364 }
2365 return wantarray ? ($path, $use_loopdev, $mounted_dev) : $path;
2366 } else {
2367 die "unsupported image format '$format'\n";
2368 }
2369 } elsif ($type eq 'device') {
2370 push @extra_opts, '-o', 'ro' if $readonly;
2371 PVE::Tools::run_command(['mount', @extra_opts, $volid, $mount_path]) if $mount_path;
2372 return wantarray ? ($volid, 0, $volid) : $volid;
2373 } elsif ($type eq 'bind') {
2374 die "directory '$volid' does not exist\n" if ! -d $volid;
2375 &$check_mount_path($volid);
2376 bindmount($volid, $mount_path, $readonly, @extra_opts) if $mount_path;
2377 warn "cannot enable quota control for bind mounts\n" if $quota;
2378 return wantarray ? ($volid, 0, undef) : $volid;
2379 }
2380
2381 die "unsupported storage";
2382 }
2383
2384 sub get_vm_volumes {
2385 my ($conf, $excludes) = @_;
2386
2387 my $vollist = [];
2388
2389 foreach_mountpoint($conf, sub {
2390 my ($ms, $mountpoint) = @_;
2391
2392 return if $excludes && $ms eq $excludes;
2393
2394 my $volid = $mountpoint->{volume};
2395
2396 return if !$volid || $mountpoint->{type} ne 'volume';
2397
2398 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2399 return if !$sid;
2400
2401 push @$vollist, $volid;
2402 });
2403
2404 return $vollist;
2405 }
2406
2407 sub mkfs {
2408 my ($dev, $rootuid, $rootgid) = @_;
2409
2410 PVE::Tools::run_command(['mkfs.ext4', '-O', 'mmp',
2411 '-E', "root_owner=$rootuid:$rootgid",
2412 $dev]);
2413 }
2414
2415 sub format_disk {
2416 my ($storage_cfg, $volid, $rootuid, $rootgid) = @_;
2417
2418 if ($volid =~ m!^/dev/.+!) {
2419 mkfs($volid);
2420 return;
2421 }
2422
2423 my ($storage, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2424
2425 die "cannot format volume '$volid' with no storage\n" if !$storage;
2426
2427 PVE::Storage::activate_volumes($storage_cfg, [$volid]);
2428
2429 my $path = PVE::Storage::path($storage_cfg, $volid);
2430
2431 my ($vtype, undef, undef, undef, undef, $isBase, $format) =
2432 PVE::Storage::parse_volname($storage_cfg, $volid);
2433
2434 die "cannot format volume '$volid' (format == $format)\n"
2435 if $format ne 'raw';
2436
2437 mkfs($path, $rootuid, $rootgid);
2438 }
2439
2440 sub destroy_disks {
2441 my ($storecfg, $vollist) = @_;
2442
2443 foreach my $volid (@$vollist) {
2444 eval { PVE::Storage::vdisk_free($storecfg, $volid); };
2445 warn $@ if $@;
2446 }
2447 }
2448
2449 sub create_disks {
2450 my ($storecfg, $vmid, $settings, $conf) = @_;
2451
2452 my $vollist = [];
2453
2454 eval {
2455 my (undef, $rootuid, $rootgid) = PVE::LXC::parse_id_maps($conf);
2456 my $chown_vollist = [];
2457
2458 foreach_mountpoint($settings, sub {
2459 my ($ms, $mountpoint) = @_;
2460
2461 my $volid = $mountpoint->{volume};
2462 my $mp = $mountpoint->{mp};
2463
2464 my ($storage, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2465
2466 if ($storage && ($volid =~ m/^([^:\s]+):(\d+(\.\d+)?)$/)) {
2467 my ($storeid, $size_gb) = ($1, $2);
2468
2469 my $size_kb = int(${size_gb}*1024) * 1024;
2470
2471 my $scfg = PVE::Storage::storage_config($storecfg, $storage);
2472 # fixme: use better naming ct-$vmid-disk-X.raw?
2473
2474 if ($scfg->{type} eq 'dir' || $scfg->{type} eq 'nfs') {
2475 if ($size_kb > 0) {
2476 $volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'raw',
2477 undef, $size_kb);
2478 format_disk($storecfg, $volid, $rootuid, $rootgid);
2479 } else {
2480 $volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'subvol',
2481 undef, 0);
2482 push @$chown_vollist, $volid;
2483 }
2484 } elsif ($scfg->{type} eq 'zfspool') {
2485
2486 $volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'subvol',
2487 undef, $size_kb);
2488 push @$chown_vollist, $volid;
2489 } elsif ($scfg->{type} eq 'drbd' || $scfg->{type} eq 'lvm' || $scfg->{type} eq 'lvmthin') {
2490
2491 $volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'raw', undef, $size_kb);
2492 format_disk($storecfg, $volid, $rootuid, $rootgid);
2493
2494 } elsif ($scfg->{type} eq 'rbd') {
2495
2496 die "krbd option must be enabled on storage type '$scfg->{type}'\n" if !$scfg->{krbd};
2497 $volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'raw', undef, $size_kb);
2498 format_disk($storecfg, $volid, $rootuid, $rootgid);
2499 } else {
2500 die "unable to create containers on storage type '$scfg->{type}'\n";
2501 }
2502 push @$vollist, $volid;
2503 $mountpoint->{volume} = $volid;
2504 $mountpoint->{size} = $size_kb * 1024;
2505 $conf->{$ms} = print_ct_mountpoint($mountpoint, $ms eq 'rootfs');
2506 } else {
2507 # use specified/existing volid/dir/device
2508 $conf->{$ms} = print_ct_mountpoint($mountpoint, $ms eq 'rootfs');
2509 }
2510 });
2511
2512 PVE::Storage::activate_volumes($storecfg, $chown_vollist, undef);
2513 foreach my $volid (@$chown_vollist) {
2514 my $path = PVE::Storage::path($storecfg, $volid, undef);
2515 chown($rootuid, $rootgid, $path);
2516 }
2517 PVE::Storage::deactivate_volumes($storecfg, $chown_vollist, undef);
2518 };
2519 # free allocated images on error
2520 if (my $err = $@) {
2521 destroy_disks($storecfg, $vollist);
2522 die $err;
2523 }
2524 return $vollist;
2525 }
2526
2527 # bash completion helper
2528
2529 sub complete_os_templates {
2530 my ($cmdname, $pname, $cvalue) = @_;
2531
2532 my $cfg = PVE::Storage::config();
2533
2534 my $storeid;
2535
2536 if ($cvalue =~ m/^([^:]+):/) {
2537 $storeid = $1;
2538 }
2539
2540 my $vtype = $cmdname eq 'restore' ? 'backup' : 'vztmpl';
2541 my $data = PVE::Storage::template_list($cfg, $storeid, $vtype);
2542
2543 my $res = [];
2544 foreach my $id (keys %$data) {
2545 foreach my $item (@{$data->{$id}}) {
2546 push @$res, $item->{volid} if defined($item->{volid});
2547 }
2548 }
2549
2550 return $res;
2551 }
2552
2553 my $complete_ctid_full = sub {
2554 my ($running) = @_;
2555
2556 my $idlist = vmstatus();
2557
2558 my $active_hash = list_active_containers();
2559
2560 my $res = [];
2561
2562 foreach my $id (keys %$idlist) {
2563 my $d = $idlist->{$id};
2564 if (defined($running)) {
2565 next if $d->{template};
2566 next if $running && !$active_hash->{$id};
2567 next if !$running && $active_hash->{$id};
2568 }
2569 push @$res, $id;
2570
2571 }
2572 return $res;
2573 };
2574
2575 sub complete_ctid {
2576 return &$complete_ctid_full();
2577 }
2578
2579 sub complete_ctid_stopped {
2580 return &$complete_ctid_full(0);
2581 }
2582
2583 sub complete_ctid_running {
2584 return &$complete_ctid_full(1);
2585 }
2586
2587 sub parse_id_maps {
2588 my ($conf) = @_;
2589
2590 my $id_map = [];
2591 my $rootuid = 0;
2592 my $rootgid = 0;
2593
2594 my $lxc = $conf->{lxc};
2595 foreach my $entry (@$lxc) {
2596 my ($key, $value) = @$entry;
2597 next if $key ne 'lxc.id_map';
2598 if ($value =~ /^([ug])\s+(\d+)\s+(\d+)\s+(\d+)\s*$/) {
2599 my ($type, $ct, $host, $length) = ($1, $2, $3, $4);
2600 push @$id_map, [$type, $ct, $host, $length];
2601 if ($ct == 0) {
2602 $rootuid = $host if $type eq 'u';
2603 $rootgid = $host if $type eq 'g';
2604 }
2605 } else {
2606 die "failed to parse id_map: $value\n";
2607 }
2608 }
2609
2610 if (!@$id_map && $conf->{unprivileged}) {
2611 # Should we read them from /etc/subuid?
2612 $id_map = [ ['u', '0', '100000', '65536'],
2613 ['g', '0', '100000', '65536'] ];
2614 $rootuid = $rootgid = 100000;
2615 }
2616
2617 return ($id_map, $rootuid, $rootgid);
2618 }
2619
2620 sub userns_command {
2621 my ($id_map) = @_;
2622 if (@$id_map) {
2623 return ['lxc-usernsexec', (map { ('-m', join(':', @$_)) } @$id_map), '--'];
2624 }
2625 return [];
2626 }
2627
2628 1;