]> git.proxmox.com Git - pve-container.git/blob - src/PVE/LXC.pm
fix mount_all, improve bind mount handling
[pve-container.git] / src / PVE / LXC.pm
1 package PVE::LXC;
2
3 use strict;
4 use warnings;
5 use POSIX qw(EINTR);
6
7 use File::Path;
8 use File::Spec;
9 use Cwd qw();
10 use Fcntl ':flock';
11
12 use PVE::Cluster qw(cfs_register_file cfs_read_file);
13 use PVE::Storage;
14 use PVE::SafeSyslog;
15 use PVE::INotify;
16 use PVE::JSONSchema qw(get_standard_option);
17 use PVE::Tools qw($IPV6RE $IPV4RE dir_glob_foreach);
18 use PVE::Network;
19 use PVE::AccessControl;
20 use PVE::ProcFSTools;
21 use Time::HiRes qw (gettimeofday);
22
23 use Data::Dumper;
24
25 my $nodename = PVE::INotify::nodename();
26
27 my $cpuinfo= PVE::ProcFSTools::read_cpuinfo();
28
29 our $COMMON_TAR_FLAGS = [ '--sparse', '--numeric-owner', '--acls',
30 '--xattrs',
31 '--xattrs-include=user.*',
32 '--xattrs-include=security.capability',
33 '--warning=no-xattr-write' ];
34
35 cfs_register_file('/lxc/', \&parse_pct_config, \&write_pct_config);
36
37 my $rootfs_desc = {
38 volume => {
39 type => 'string',
40 default_key => 1,
41 format_description => 'volume',
42 description => 'Volume, device or directory to mount into the container.',
43 },
44 backup => {
45 type => 'boolean',
46 format_description => '[1|0]',
47 description => 'Whether to include the mountpoint in backups.',
48 optional => 1,
49 },
50 size => {
51 type => 'string',
52 format => 'disk-size',
53 format_description => 'DiskSize',
54 description => 'Volume size (read only value).',
55 optional => 1,
56 },
57 };
58
59 PVE::JSONSchema::register_standard_option('pve-ct-rootfs', {
60 type => 'string', format => $rootfs_desc,
61 description => "Use volume as container root.",
62 optional => 1,
63 });
64
65 PVE::JSONSchema::register_standard_option('pve-lxc-snapshot-name', {
66 description => "The name of the snapshot.",
67 type => 'string', format => 'pve-configid',
68 maxLength => 40,
69 });
70
71 my $confdesc = {
72 lock => {
73 optional => 1,
74 type => 'string',
75 description => "Lock/unlock the VM.",
76 enum => [qw(migrate backup snapshot rollback)],
77 },
78 onboot => {
79 optional => 1,
80 type => 'boolean',
81 description => "Specifies whether a VM will be started during system bootup.",
82 default => 0,
83 },
84 startup => get_standard_option('pve-startup-order'),
85 template => {
86 optional => 1,
87 type => 'boolean',
88 description => "Enable/disable Template.",
89 default => 0,
90 },
91 arch => {
92 optional => 1,
93 type => 'string',
94 enum => ['amd64', 'i386'],
95 description => "OS architecture type.",
96 default => 'amd64',
97 },
98 ostype => {
99 optional => 1,
100 type => 'string',
101 enum => ['debian', 'ubuntu', 'centos', 'fedora', 'opensuse', 'archlinux'],
102 description => "OS type. Corresponds to lxc setup scripts in /usr/share/lxc/config/<ostype>.common.conf.",
103 },
104 console => {
105 optional => 1,
106 type => 'boolean',
107 description => "Attach a console device (/dev/console) to the container.",
108 default => 1,
109 },
110 tty => {
111 optional => 1,
112 type => 'integer',
113 description => "Specify the number of tty available to the container",
114 minimum => 0,
115 maximum => 6,
116 default => 2,
117 },
118 cpulimit => {
119 optional => 1,
120 type => 'number',
121 description => "Limit of CPU usage. Note if the computer has 2 CPUs, it has total of '2' CPU time. Value '0' indicates no CPU limit.",
122 minimum => 0,
123 maximum => 128,
124 default => 0,
125 },
126 cpuunits => {
127 optional => 1,
128 type => 'integer',
129 description => "CPU weight for a VM. Argument is used in the kernel fair scheduler. The larger the number is, the more CPU time this VM gets. Number is relative to weights of all the other running VMs.\n\nNOTE: You can disable fair-scheduler configuration by setting this to 0.",
130 minimum => 0,
131 maximum => 500000,
132 default => 1024,
133 },
134 memory => {
135 optional => 1,
136 type => 'integer',
137 description => "Amount of RAM for the VM in MB.",
138 minimum => 16,
139 default => 512,
140 },
141 swap => {
142 optional => 1,
143 type => 'integer',
144 description => "Amount of SWAP for the VM in MB.",
145 minimum => 0,
146 default => 512,
147 },
148 hostname => {
149 optional => 1,
150 description => "Set a host name for the container.",
151 type => 'string', format => 'dns-name',
152 maxLength => 255,
153 },
154 description => {
155 optional => 1,
156 type => 'string',
157 description => "Container description. Only used on the configuration web interface.",
158 },
159 searchdomain => {
160 optional => 1,
161 type => 'string', format => 'dns-name-list',
162 description => "Sets DNS search domains for a container. Create will automatically use the setting from the host if you neither set searchdomain or nameserver.",
163 },
164 nameserver => {
165 optional => 1,
166 type => 'string', format => 'address-list',
167 description => "Sets DNS server IP address for a container. Create will automatically use the setting from the host if you neither set searchdomain or nameserver.",
168 },
169 rootfs => get_standard_option('pve-ct-rootfs'),
170 parent => {
171 optional => 1,
172 type => 'string', format => 'pve-configid',
173 maxLength => 40,
174 description => "Parent snapshot name. This is used internally, and should not be modified.",
175 },
176 snaptime => {
177 optional => 1,
178 description => "Timestamp for snapshots.",
179 type => 'integer',
180 minimum => 0,
181 },
182 cmode => {
183 optional => 1,
184 description => "Console mode. By default, the console command tries to open a connection to one of the available tty devices. By setting cmode to 'console' it tries to attach to /dev/console instead. If you set cmode to 'shell', it simply invokes a shell inside the container (no login).",
185 type => 'string',
186 enum => ['shell', 'console', 'tty'],
187 default => 'tty',
188 },
189 protection => {
190 optional => 1,
191 type => 'boolean',
192 description => "Sets the protection flag of the container. This will prevent the remove operation. This will prevent the CT or CT's disk remove/update operation.",
193 default => 0,
194 },
195 unprivileged => {
196 optional => 1,
197 type => 'boolean',
198 description => "Makes the container run as unprivileged user. (Should not be modified manually.)",
199 default => 0,
200 },
201 };
202
203 my $valid_lxc_conf_keys = {
204 'lxc.include' => 1,
205 'lxc.arch' => 1,
206 'lxc.utsname' => 1,
207 'lxc.haltsignal' => 1,
208 'lxc.rebootsignal' => 1,
209 'lxc.stopsignal' => 1,
210 'lxc.init_cmd' => 1,
211 'lxc.network.type' => 1,
212 'lxc.network.flags' => 1,
213 'lxc.network.link' => 1,
214 'lxc.network.mtu' => 1,
215 'lxc.network.name' => 1,
216 'lxc.network.hwaddr' => 1,
217 'lxc.network.ipv4' => 1,
218 'lxc.network.ipv4.gateway' => 1,
219 'lxc.network.ipv6' => 1,
220 'lxc.network.ipv6.gateway' => 1,
221 'lxc.network.script.up' => 1,
222 'lxc.network.script.down' => 1,
223 'lxc.pts' => 1,
224 'lxc.console.logfile' => 1,
225 'lxc.console' => 1,
226 'lxc.tty' => 1,
227 'lxc.devttydir' => 1,
228 'lxc.hook.autodev' => 1,
229 'lxc.autodev' => 1,
230 'lxc.kmsg' => 1,
231 'lxc.mount' => 1,
232 'lxc.mount.entry' => 1,
233 'lxc.mount.auto' => 1,
234 'lxc.rootfs' => 1,
235 'lxc.rootfs.mount' => 1,
236 'lxc.rootfs.options' => 1,
237 # lxc.cgroup.*
238 'lxc.cap.drop' => 1,
239 'lxc.cap.keep' => 1,
240 'lxc.aa_profile' => 1,
241 'lxc.aa_allow_incomplete' => 1,
242 'lxc.se_context' => 1,
243 'lxc.seccomp' => 1,
244 'lxc.id_map' => 1,
245 'lxc.hook.pre-start' => 1,
246 'lxc.hook.pre-mount' => 1,
247 'lxc.hook.mount' => 1,
248 'lxc.hook.start' => 1,
249 'lxc.hook.stop' => 1,
250 'lxc.hook.post-stop' => 1,
251 'lxc.hook.clone' => 1,
252 'lxc.hook.destroy' => 1,
253 'lxc.loglevel' => 1,
254 'lxc.logfile' => 1,
255 'lxc.start.auto' => 1,
256 'lxc.start.delay' => 1,
257 'lxc.start.order' => 1,
258 'lxc.group' => 1,
259 'lxc.environment' => 1,
260 'lxc.' => 1,
261 'lxc.' => 1,
262 'lxc.' => 1,
263 'lxc.' => 1,
264 };
265
266 my $netconf_desc = {
267 type => {
268 type => 'string',
269 optional => 1,
270 description => "Network interface type.",
271 enum => [qw(veth)],
272 },
273 name => {
274 type => 'string',
275 format_description => 'String',
276 description => 'Name of the network device as seen from inside the container. (lxc.network.name)',
277 pattern => '[-_.\w\d]+',
278 },
279 bridge => {
280 type => 'string',
281 format_description => 'vmbr<Number>',
282 description => 'Bridge to attach the network device to.',
283 pattern => '[-_.\w\d]+',
284 optional => 1,
285 },
286 hwaddr => {
287 type => 'string',
288 format_description => 'MAC',
289 description => 'Bridge to attach the network device to. (lxc.network.hwaddr)',
290 pattern => qr/(?:[a-f0-9]{2}:){5}[a-f0-9]{2}/i,
291 optional => 1,
292 },
293 mtu => {
294 type => 'integer',
295 format_description => 'Number',
296 description => 'Maximum transfer unit of the interface. (lxc.network.mtu)',
297 minimum => 64, # minimum ethernet frame is 64 bytes
298 optional => 1,
299 },
300 ip => {
301 type => 'string',
302 format => 'pve-ipv4-config',
303 format_description => 'IPv4Format/CIDR',
304 description => 'IPv4 address in CIDR format.',
305 optional => 1,
306 },
307 gw => {
308 type => 'string',
309 format => 'ipv4',
310 format_description => 'GatewayIPv4',
311 description => 'Default gateway for IPv4 traffic.',
312 optional => 1,
313 },
314 ip6 => {
315 type => 'string',
316 format => 'pve-ipv6-config',
317 format_description => 'IPv6Format/CIDR',
318 description => 'IPv6 address in CIDR format.',
319 optional => 1,
320 },
321 gw6 => {
322 type => 'string',
323 format => 'ipv6',
324 format_description => 'GatewayIPv6',
325 description => 'Default gateway for IPv6 traffic.',
326 optional => 1,
327 },
328 firewall => {
329 type => 'boolean',
330 format_description => '[1|0]',
331 description => "Controls whether this interface's firewall rules should be used.",
332 optional => 1,
333 },
334 tag => {
335 type => 'integer',
336 format_description => 'VlanNo',
337 minimum => '2',
338 maximum => '4094',
339 description => "VLAN tag foro this interface.",
340 optional => 1,
341 },
342 };
343 PVE::JSONSchema::register_format('pve-lxc-network', $netconf_desc);
344
345 my $MAX_LXC_NETWORKS = 10;
346 for (my $i = 0; $i < $MAX_LXC_NETWORKS; $i++) {
347 $confdesc->{"net$i"} = {
348 optional => 1,
349 type => 'string', format => $netconf_desc,
350 description => "Specifies network interfaces for the container.",
351 };
352 }
353
354 my $mp_desc = {
355 %$rootfs_desc,
356 mp => {
357 type => 'string',
358 format_description => 'Path',
359 description => 'Path to the mountpoint as seen from inside the container.',
360 },
361 };
362 PVE::JSONSchema::register_format('pve-ct-mountpoint', $mp_desc);
363
364 my $unuseddesc = {
365 optional => 1,
366 type => 'string', format => 'pve-volume-id',
367 description => "Reference to unused volumes.",
368 };
369
370 my $MAX_MOUNT_POINTS = 10;
371 for (my $i = 0; $i < $MAX_MOUNT_POINTS; $i++) {
372 $confdesc->{"mp$i"} = {
373 optional => 1,
374 type => 'string', format => $mp_desc,
375 description => "Use volume as container mount point (experimental feature).",
376 optional => 1,
377 };
378 }
379
380 my $MAX_UNUSED_DISKS = $MAX_MOUNT_POINTS;
381 for (my $i = 0; $i < $MAX_MOUNT_POINTS; $i++) {
382 $confdesc->{"unused$i"} = $unuseddesc;
383 }
384
385 sub write_pct_config {
386 my ($filename, $conf) = @_;
387
388 delete $conf->{snapstate}; # just to be sure
389
390 my $generate_raw_config = sub {
391 my ($conf) = @_;
392
393 my $raw = '';
394
395 # add description as comment to top of file
396 my $descr = $conf->{description} || '';
397 foreach my $cl (split(/\n/, $descr)) {
398 $raw .= '#' . PVE::Tools::encode_text($cl) . "\n";
399 }
400
401 foreach my $key (sort keys %$conf) {
402 next if $key eq 'digest' || $key eq 'description' || $key eq 'pending' ||
403 $key eq 'snapshots' || $key eq 'snapname' || $key eq 'lxc';
404 my $value = $conf->{$key};
405 die "detected invalid newline inside property '$key'\n" if $value =~ m/\n/;
406 $raw .= "$key: $value\n";
407 }
408
409 if (my $lxcconf = $conf->{lxc}) {
410 foreach my $entry (@$lxcconf) {
411 my ($k, $v) = @$entry;
412 $raw .= "$k: $v\n";
413 }
414 }
415
416 return $raw;
417 };
418
419 my $raw = &$generate_raw_config($conf);
420
421 foreach my $snapname (sort keys %{$conf->{snapshots}}) {
422 $raw .= "\n[$snapname]\n";
423 $raw .= &$generate_raw_config($conf->{snapshots}->{$snapname});
424 }
425
426 return $raw;
427 }
428
429 sub check_type {
430 my ($key, $value) = @_;
431
432 die "unknown setting '$key'\n" if !$confdesc->{$key};
433
434 my $type = $confdesc->{$key}->{type};
435
436 if (!defined($value)) {
437 die "got undefined value\n";
438 }
439
440 if ($value =~ m/[\n\r]/) {
441 die "property contains a line feed\n";
442 }
443
444 if ($type eq 'boolean') {
445 return 1 if ($value eq '1') || ($value =~ m/^(on|yes|true)$/i);
446 return 0 if ($value eq '0') || ($value =~ m/^(off|no|false)$/i);
447 die "type check ('boolean') failed - got '$value'\n";
448 } elsif ($type eq 'integer') {
449 return int($1) if $value =~ m/^(\d+)$/;
450 die "type check ('integer') failed - got '$value'\n";
451 } elsif ($type eq 'number') {
452 return $value if $value =~ m/^(\d+)(\.\d+)?$/;
453 die "type check ('number') failed - got '$value'\n";
454 } elsif ($type eq 'string') {
455 if (my $fmt = $confdesc->{$key}->{format}) {
456 PVE::JSONSchema::check_format($fmt, $value);
457 return $value;
458 }
459 return $value;
460 } else {
461 die "internal error"
462 }
463 }
464
465 sub parse_pct_config {
466 my ($filename, $raw) = @_;
467
468 return undef if !defined($raw);
469
470 my $res = {
471 digest => Digest::SHA::sha1_hex($raw),
472 snapshots => {},
473 };
474
475 $filename =~ m|/lxc/(\d+).conf$|
476 || die "got strange filename '$filename'";
477
478 my $vmid = $1;
479
480 my $conf = $res;
481 my $descr = '';
482 my $section = '';
483
484 my @lines = split(/\n/, $raw);
485 foreach my $line (@lines) {
486 next if $line =~ m/^\s*$/;
487
488 if ($line =~ m/^\[([a-z][a-z0-9_\-]+)\]\s*$/i) {
489 $section = $1;
490 $conf->{description} = $descr if $descr;
491 $descr = '';
492 $conf = $res->{snapshots}->{$section} = {};
493 next;
494 }
495
496 if ($line =~ m/^\#(.*)\s*$/) {
497 $descr .= PVE::Tools::decode_text($1) . "\n";
498 next;
499 }
500
501 if ($line =~ m/^(lxc\.[a-z0-9_\-\.]+)(:|\s*=)\s*(.*?)\s*$/) {
502 my $key = $1;
503 my $value = $3;
504 if ($valid_lxc_conf_keys->{$key} || $key =~ m/^lxc\.cgroup\./) {
505 push @{$conf->{lxc}}, [$key, $value];
506 } else {
507 warn "vm $vmid - unable to parse config: $line\n";
508 }
509 } elsif ($line =~ m/^(description):\s*(.*\S)\s*$/) {
510 $descr .= PVE::Tools::decode_text($2);
511 } elsif ($line =~ m/snapstate:\s*(prepare|delete)\s*$/) {
512 $conf->{snapstate} = $1;
513 } elsif ($line =~ m/^([a-z][a-z_]*\d*):\s*(\S.*)\s*$/) {
514 my $key = $1;
515 my $value = $2;
516 eval { $value = check_type($key, $value); };
517 warn "vm $vmid - unable to parse value of '$key' - $@" if $@;
518 $conf->{$key} = $value;
519 } else {
520 warn "vm $vmid - unable to parse config: $line\n";
521 }
522 }
523
524 $conf->{description} = $descr if $descr;
525
526 delete $res->{snapstate}; # just to be sure
527
528 return $res;
529 }
530
531 sub config_list {
532 my $vmlist = PVE::Cluster::get_vmlist();
533 my $res = {};
534 return $res if !$vmlist || !$vmlist->{ids};
535 my $ids = $vmlist->{ids};
536
537 foreach my $vmid (keys %$ids) {
538 next if !$vmid; # skip CT0
539 my $d = $ids->{$vmid};
540 next if !$d->{node} || $d->{node} ne $nodename;
541 next if !$d->{type} || $d->{type} ne 'lxc';
542 $res->{$vmid}->{type} = 'lxc';
543 }
544 return $res;
545 }
546
547 sub cfs_config_path {
548 my ($vmid, $node) = @_;
549
550 $node = $nodename if !$node;
551 return "nodes/$node/lxc/$vmid.conf";
552 }
553
554 sub config_file {
555 my ($vmid, $node) = @_;
556
557 my $cfspath = cfs_config_path($vmid, $node);
558 return "/etc/pve/$cfspath";
559 }
560
561 sub load_config {
562 my ($vmid, $node) = @_;
563
564 $node = $nodename if !$node;
565 my $cfspath = cfs_config_path($vmid, $node);
566
567 my $conf = PVE::Cluster::cfs_read_file($cfspath);
568 die "container $vmid does not exists\n" if !defined($conf);
569
570 return $conf;
571 }
572
573 sub create_config {
574 my ($vmid, $conf) = @_;
575
576 my $dir = "/etc/pve/nodes/$nodename/lxc";
577 mkdir $dir;
578
579 write_config($vmid, $conf);
580 }
581
582 sub destroy_config {
583 my ($vmid) = @_;
584
585 unlink config_file($vmid, $nodename);
586 }
587
588 sub write_config {
589 my ($vmid, $conf) = @_;
590
591 my $cfspath = cfs_config_path($vmid);
592
593 PVE::Cluster::cfs_write_file($cfspath, $conf);
594 }
595
596 # flock: we use one file handle per process, so lock file
597 # can be called multiple times and succeeds for the same process.
598
599 my $lock_handles = {};
600 my $lockdir = "/run/lock/lxc";
601
602 sub lock_filename {
603 my ($vmid) = @_;
604
605 return "$lockdir/pve-config-${vmid}.lock";
606 }
607
608 sub lock_aquire {
609 my ($vmid, $timeout) = @_;
610
611 $timeout = 10 if !$timeout;
612 my $mode = LOCK_EX;
613
614 my $filename = lock_filename($vmid);
615
616 mkdir $lockdir if !-d $lockdir;
617
618 my $lock_func = sub {
619 if (!$lock_handles->{$$}->{$filename}) {
620 my $fh = new IO::File(">>$filename") ||
621 die "can't open file - $!\n";
622 $lock_handles->{$$}->{$filename} = { fh => $fh, refcount => 0};
623 }
624
625 if (!flock($lock_handles->{$$}->{$filename}->{fh}, $mode |LOCK_NB)) {
626 print STDERR "trying to aquire lock...";
627 my $success;
628 while(1) {
629 $success = flock($lock_handles->{$$}->{$filename}->{fh}, $mode);
630 # try again on EINTR (see bug #273)
631 if ($success || ($! != EINTR)) {
632 last;
633 }
634 }
635 if (!$success) {
636 print STDERR " failed\n";
637 die "can't aquire lock - $!\n";
638 }
639
640 print STDERR " OK\n";
641 }
642
643 $lock_handles->{$$}->{$filename}->{refcount}++;
644 };
645
646 eval { PVE::Tools::run_with_timeout($timeout, $lock_func); };
647 my $err = $@;
648 if ($err) {
649 die "can't lock file '$filename' - $err";
650 }
651 }
652
653 sub lock_release {
654 my ($vmid) = @_;
655
656 my $filename = lock_filename($vmid);
657
658 if (my $fh = $lock_handles->{$$}->{$filename}->{fh}) {
659 my $refcount = --$lock_handles->{$$}->{$filename}->{refcount};
660 if ($refcount <= 0) {
661 $lock_handles->{$$}->{$filename} = undef;
662 close ($fh);
663 }
664 }
665 }
666
667 sub lock_container {
668 my ($vmid, $timeout, $code, @param) = @_;
669
670 my $res;
671
672 lock_aquire($vmid, $timeout);
673 eval { $res = &$code(@param) };
674 my $err = $@;
675 lock_release($vmid);
676
677 die $err if $err;
678
679 return $res;
680 }
681
682 sub option_exists {
683 my ($name) = @_;
684
685 return defined($confdesc->{$name});
686 }
687
688 # add JSON properties for create and set function
689 sub json_config_properties {
690 my $prop = shift;
691
692 foreach my $opt (keys %$confdesc) {
693 next if $opt eq 'parent' || $opt eq 'snaptime';
694 next if $prop->{$opt};
695 $prop->{$opt} = $confdesc->{$opt};
696 }
697
698 return $prop;
699 }
700
701 sub json_config_properties_no_rootfs {
702 my $prop = shift;
703
704 foreach my $opt (keys %$confdesc) {
705 next if $prop->{$opt};
706 next if $opt eq 'parent' || $opt eq 'snaptime' || $opt eq 'rootfs';
707 $prop->{$opt} = $confdesc->{$opt};
708 }
709
710 return $prop;
711 }
712
713 # container status helpers
714
715 sub list_active_containers {
716
717 my $filename = "/proc/net/unix";
718
719 # similar test is used by lcxcontainers.c: list_active_containers
720 my $res = {};
721
722 my $fh = IO::File->new ($filename, "r");
723 return $res if !$fh;
724
725 while (defined(my $line = <$fh>)) {
726 if ($line =~ m/^[a-f0-9]+:\s\S+\s\S+\s\S+\s\S+\s\S+\s\d+\s(\S+)$/) {
727 my $path = $1;
728 if ($path =~ m!^@/var/lib/lxc/(\d+)/command$!) {
729 $res->{$1} = 1;
730 }
731 }
732 }
733
734 close($fh);
735
736 return $res;
737 }
738
739 # warning: this is slow
740 sub check_running {
741 my ($vmid) = @_;
742
743 my $active_hash = list_active_containers();
744
745 return 1 if defined($active_hash->{$vmid});
746
747 return undef;
748 }
749
750 sub get_container_disk_usage {
751 my ($vmid, $pid) = @_;
752
753 return PVE::Tools::df("/proc/$pid/root/", 1);
754 }
755
756 my $last_proc_vmid_stat;
757
758 my $parse_cpuacct_stat = sub {
759 my ($vmid) = @_;
760
761 my $raw = read_cgroup_value('cpuacct', $vmid, 'cpuacct.stat', 1);
762
763 my $stat = {};
764
765 if ($raw =~ m/^user (\d+)\nsystem (\d+)\n/) {
766
767 $stat->{utime} = $1;
768 $stat->{stime} = $2;
769
770 }
771
772 return $stat;
773 };
774
775 sub vmstatus {
776 my ($opt_vmid) = @_;
777
778 my $list = $opt_vmid ? { $opt_vmid => { type => 'lxc' }} : config_list();
779
780 my $active_hash = list_active_containers();
781
782 my $cpucount = $cpuinfo->{cpus} || 1;
783
784 my $cdtime = gettimeofday;
785
786 my $uptime = (PVE::ProcFSTools::read_proc_uptime(1))[0];
787
788 foreach my $vmid (keys %$list) {
789 my $d = $list->{$vmid};
790
791 eval { $d->{pid} = find_lxc_pid($vmid) if defined($active_hash->{$vmid}); };
792 warn $@ if $@; # ignore errors (consider them stopped)
793
794 $d->{status} = $d->{pid} ? 'running' : 'stopped';
795
796 my $cfspath = cfs_config_path($vmid);
797 my $conf = PVE::Cluster::cfs_read_file($cfspath) || {};
798
799 $d->{name} = $conf->{'hostname'} || "CT$vmid";
800 $d->{name} =~ s/[\s]//g;
801
802 $d->{cpus} = $conf->{cpulimit} || $cpucount;
803
804 if ($d->{pid}) {
805 my $res = get_container_disk_usage($vmid, $d->{pid});
806 $d->{disk} = $res->{used};
807 $d->{maxdisk} = $res->{total};
808 } else {
809 $d->{disk} = 0;
810 # use 4GB by default ??
811 if (my $rootfs = $conf->{rootfs}) {
812 my $rootinfo = parse_ct_rootfs($rootfs);
813 $d->{maxdisk} = int(($rootinfo->{size} || 4)*1024*1024)*1024;
814 } else {
815 $d->{maxdisk} = 4*1024*1024*1024;
816 }
817 }
818
819 $d->{mem} = 0;
820 $d->{swap} = 0;
821 $d->{maxmem} = ($conf->{memory}||512)*1024*1024;
822 $d->{maxswap} = ($conf->{swap}//0)*1024*1024;
823
824 $d->{uptime} = 0;
825 $d->{cpu} = 0;
826
827 $d->{netout} = 0;
828 $d->{netin} = 0;
829
830 $d->{diskread} = 0;
831 $d->{diskwrite} = 0;
832
833 $d->{template} = is_template($conf);
834 }
835
836 foreach my $vmid (keys %$list) {
837 my $d = $list->{$vmid};
838 my $pid = $d->{pid};
839
840 next if !$pid; # skip stopped CTs
841
842 my $ctime = (stat("/proc/$pid"))[10]; # 10 = ctime
843 $d->{uptime} = time - $ctime; # the method lxcfs uses
844
845 $d->{mem} = read_cgroup_value('memory', $vmid, 'memory.usage_in_bytes');
846 $d->{swap} = read_cgroup_value('memory', $vmid, 'memory.memsw.usage_in_bytes') - $d->{mem};
847
848 my $blkio_bytes = read_cgroup_value('blkio', $vmid, 'blkio.throttle.io_service_bytes', 1);
849 my @bytes = split(/\n/, $blkio_bytes);
850 foreach my $byte (@bytes) {
851 if (my ($key, $value) = $byte =~ /(Read|Write)\s+(\d+)/) {
852 $d->{diskread} = $2 if $key eq 'Read';
853 $d->{diskwrite} = $2 if $key eq 'Write';
854 }
855 }
856
857 my $pstat = &$parse_cpuacct_stat($vmid);
858
859 my $used = $pstat->{utime} + $pstat->{stime};
860
861 my $old = $last_proc_vmid_stat->{$vmid};
862 if (!$old) {
863 $last_proc_vmid_stat->{$vmid} = {
864 time => $cdtime,
865 used => $used,
866 cpu => 0,
867 };
868 next;
869 }
870
871 my $dtime = ($cdtime - $old->{time}) * $cpucount * $cpuinfo->{user_hz};
872
873 if ($dtime > 1000) {
874 my $dutime = $used - $old->{used};
875
876 $d->{cpu} = (($dutime/$dtime)* $cpucount) / $d->{cpus};
877 $last_proc_vmid_stat->{$vmid} = {
878 time => $cdtime,
879 used => $used,
880 cpu => $d->{cpu},
881 };
882 } else {
883 $d->{cpu} = $old->{cpu};
884 }
885 }
886
887 my $netdev = PVE::ProcFSTools::read_proc_net_dev();
888
889 foreach my $dev (keys %$netdev) {
890 next if $dev !~ m/^veth([1-9]\d*)i/;
891 my $vmid = $1;
892 my $d = $list->{$vmid};
893
894 next if !$d;
895
896 $d->{netout} += $netdev->{$dev}->{receive};
897 $d->{netin} += $netdev->{$dev}->{transmit};
898
899 }
900
901 return $list;
902 }
903
904 sub classify_mountpoint {
905 my ($vol) = @_;
906 if ($vol =~ m!^/!) {
907 return 'device' if $vol =~ m!^/dev/!;
908 return 'bind';
909 }
910 return 'volume';
911 }
912
913 my $parse_ct_mountpoint_full = sub {
914 my ($desc, $data, $noerr) = @_;
915
916 $data //= '';
917
918 my $res;
919 eval { $res = PVE::JSONSchema::parse_property_string($desc, $data) };
920 if ($@) {
921 return undef if $noerr;
922 die $@;
923 }
924
925 if (defined(my $size = $res->{size})) {
926 $size = PVE::JSONSchema::parse_size($size);
927 if (!defined($size)) {
928 return undef if $noerr;
929 die "invalid size: $size\n";
930 }
931 $res->{size} = $size;
932 }
933
934 $res->{type} = classify_mountpoint($res->{volume});
935
936 return $res;
937 };
938
939 sub parse_ct_rootfs {
940 my ($data, $noerr) = @_;
941
942 my $res = &$parse_ct_mountpoint_full($rootfs_desc, $data, $noerr);
943
944 $res->{mp} = '/' if defined($res);
945
946 return $res;
947 }
948
949 sub parse_ct_mountpoint {
950 my ($data, $noerr) = @_;
951
952 return &$parse_ct_mountpoint_full($mp_desc, $data, $noerr);
953 }
954
955 sub print_ct_mountpoint {
956 my ($info, $nomp) = @_;
957 my $skip = [ 'type' ];
958 push @$skip, 'mp' if $nomp;
959 return PVE::JSONSchema::print_property_string($info, $mp_desc, $skip);
960 }
961
962 sub print_lxc_network {
963 my $net = shift;
964 return PVE::JSONSchema::print_property_string($net, $netconf_desc);
965 }
966
967 sub parse_lxc_network {
968 my ($data) = @_;
969
970 my $res = {};
971
972 return $res if !$data;
973
974 $res = PVE::JSONSchema::parse_property_string($netconf_desc, $data);
975
976 $res->{type} = 'veth';
977 $res->{hwaddr} = PVE::Tools::random_ether_addr() if !$res->{hwaddr};
978
979 return $res;
980 }
981
982 sub read_cgroup_value {
983 my ($group, $vmid, $name, $full) = @_;
984
985 my $path = "/sys/fs/cgroup/$group/lxc/$vmid/$name";
986
987 return PVE::Tools::file_get_contents($path) if $full;
988
989 return PVE::Tools::file_read_firstline($path);
990 }
991
992 sub write_cgroup_value {
993 my ($group, $vmid, $name, $value) = @_;
994
995 my $path = "/sys/fs/cgroup/$group/lxc/$vmid/$name";
996 PVE::ProcFSTools::write_proc_entry($path, $value) if -e $path;
997
998 }
999
1000 sub find_lxc_console_pids {
1001
1002 my $res = {};
1003
1004 PVE::Tools::dir_glob_foreach('/proc', '\d+', sub {
1005 my ($pid) = @_;
1006
1007 my $cmdline = PVE::Tools::file_read_firstline("/proc/$pid/cmdline");
1008 return if !$cmdline;
1009
1010 my @args = split(/\0/, $cmdline);
1011
1012 # serach for lxc-console -n <vmid>
1013 return if scalar(@args) != 3;
1014 return if $args[1] ne '-n';
1015 return if $args[2] !~ m/^\d+$/;
1016 return if $args[0] !~ m|^(/usr/bin/)?lxc-console$|;
1017
1018 my $vmid = $args[2];
1019
1020 push @{$res->{$vmid}}, $pid;
1021 });
1022
1023 return $res;
1024 }
1025
1026 sub find_lxc_pid {
1027 my ($vmid) = @_;
1028
1029 my $pid = undef;
1030 my $parser = sub {
1031 my $line = shift;
1032 $pid = $1 if $line =~ m/^PID:\s+(\d+)$/;
1033 };
1034 PVE::Tools::run_command(['lxc-info', '-n', $vmid, '-p'], outfunc => $parser);
1035
1036 die "unable to get PID for CT $vmid (not running?)\n" if !$pid;
1037
1038 return $pid;
1039 }
1040
1041 # Note: we cannot use Net:IP, because that only allows strict
1042 # CIDR networks
1043 sub parse_ipv4_cidr {
1044 my ($cidr, $noerr) = @_;
1045
1046 if ($cidr =~ m!^($IPV4RE)(?:/(\d+))$! && ($2 > 7) && ($2 <= 32)) {
1047 return { address => $1, netmask => $PVE::Network::ipv4_reverse_mask->[$2] };
1048 }
1049
1050 return undef if $noerr;
1051
1052 die "unable to parse ipv4 address/mask\n";
1053 }
1054
1055 sub check_lock {
1056 my ($conf) = @_;
1057
1058 die "VM is locked ($conf->{'lock'})\n" if $conf->{'lock'};
1059 }
1060
1061 sub check_protection {
1062 my ($vm_conf, $err_msg) = @_;
1063
1064 if ($vm_conf->{protection}) {
1065 die "$err_msg - protection mode enabled\n";
1066 }
1067 }
1068
1069 sub update_lxc_config {
1070 my ($storage_cfg, $vmid, $conf) = @_;
1071
1072 my $dir = "/var/lib/lxc/$vmid";
1073
1074 if ($conf->{template}) {
1075
1076 unlink "$dir/config";
1077
1078 return;
1079 }
1080
1081 my $raw = '';
1082
1083 die "missing 'arch' - internal error" if !$conf->{arch};
1084 $raw .= "lxc.arch = $conf->{arch}\n";
1085
1086 my $unprivileged = $conf->{unprivileged};
1087 my $custom_idmap = grep { $_->[0] eq 'lxc.id_map' } @{$conf->{lxc}};
1088
1089 my $ostype = $conf->{ostype} || die "missing 'ostype' - internal error";
1090 if ($ostype =~ /^(?:debian | ubuntu | centos | fedora | opensuse | archlinux)$/x) {
1091 $raw .= "lxc.include = /usr/share/lxc/config/$ostype.common.conf\n";
1092 if ($unprivileged || $custom_idmap) {
1093 $raw .= "lxc.include = /usr/share/lxc/config/$ostype.userns.conf\n"
1094 }
1095 } else {
1096 die "implement me (ostype $ostype)";
1097 }
1098
1099 $raw .= "lxc.monitor.unshare = 1\n";
1100
1101 # Should we read them from /etc/subuid?
1102 if ($unprivileged && !$custom_idmap) {
1103 $raw .= "lxc.id_map = u 0 100000 65536\n";
1104 $raw .= "lxc.id_map = g 0 100000 65536\n";
1105 }
1106
1107 if (!has_dev_console($conf)) {
1108 $raw .= "lxc.console = none\n";
1109 $raw .= "lxc.cgroup.devices.deny = c 5:1 rwm\n";
1110 }
1111
1112 my $ttycount = get_tty_count($conf);
1113 $raw .= "lxc.tty = $ttycount\n";
1114
1115 # some init scripts expects a linux terminal (turnkey).
1116 $raw .= "lxc.environment = TERM=linux\n";
1117
1118 my $utsname = $conf->{hostname} || "CT$vmid";
1119 $raw .= "lxc.utsname = $utsname\n";
1120
1121 my $memory = $conf->{memory} || 512;
1122 my $swap = $conf->{swap} // 0;
1123
1124 my $lxcmem = int($memory*1024*1024);
1125 $raw .= "lxc.cgroup.memory.limit_in_bytes = $lxcmem\n";
1126
1127 my $lxcswap = int(($memory + $swap)*1024*1024);
1128 $raw .= "lxc.cgroup.memory.memsw.limit_in_bytes = $lxcswap\n";
1129
1130 if (my $cpulimit = $conf->{cpulimit}) {
1131 $raw .= "lxc.cgroup.cpu.cfs_period_us = 100000\n";
1132 my $value = int(100000*$cpulimit);
1133 $raw .= "lxc.cgroup.cpu.cfs_quota_us = $value\n";
1134 }
1135
1136 my $shares = $conf->{cpuunits} || 1024;
1137 $raw .= "lxc.cgroup.cpu.shares = $shares\n";
1138
1139 my $mountpoint = parse_ct_rootfs($conf->{rootfs});
1140
1141 $raw .= "lxc.rootfs = $dir/rootfs\n";
1142
1143 my $netcount = 0;
1144 foreach my $k (keys %$conf) {
1145 next if $k !~ m/^net(\d+)$/;
1146 my $ind = $1;
1147 my $d = parse_lxc_network($conf->{$k});
1148 $netcount++;
1149 $raw .= "lxc.network.type = veth\n";
1150 $raw .= "lxc.network.veth.pair = veth${vmid}i${ind}\n";
1151 $raw .= "lxc.network.hwaddr = $d->{hwaddr}\n" if defined($d->{hwaddr});
1152 $raw .= "lxc.network.name = $d->{name}\n" if defined($d->{name});
1153 $raw .= "lxc.network.mtu = $d->{mtu}\n" if defined($d->{mtu});
1154 }
1155
1156 if (my $lxcconf = $conf->{lxc}) {
1157 foreach my $entry (@$lxcconf) {
1158 my ($k, $v) = @$entry;
1159 $netcount++ if $k eq 'lxc.network.type';
1160 $raw .= "$k = $v\n";
1161 }
1162 }
1163
1164 $raw .= "lxc.network.type = empty\n" if !$netcount;
1165
1166 File::Path::mkpath("$dir/rootfs");
1167
1168 PVE::Tools::file_set_contents("$dir/config", $raw);
1169 }
1170
1171 # verify and cleanup nameserver list (replace \0 with ' ')
1172 sub verify_nameserver_list {
1173 my ($nameserver_list) = @_;
1174
1175 my @list = ();
1176 foreach my $server (PVE::Tools::split_list($nameserver_list)) {
1177 PVE::JSONSchema::pve_verify_ip($server);
1178 push @list, $server;
1179 }
1180
1181 return join(' ', @list);
1182 }
1183
1184 sub verify_searchdomain_list {
1185 my ($searchdomain_list) = @_;
1186
1187 my @list = ();
1188 foreach my $server (PVE::Tools::split_list($searchdomain_list)) {
1189 # todo: should we add checks for valid dns domains?
1190 push @list, $server;
1191 }
1192
1193 return join(' ', @list);
1194 }
1195
1196 sub add_unused_volume {
1197 my ($config, $volid) = @_;
1198
1199 my $key;
1200 for (my $ind = $MAX_UNUSED_DISKS - 1; $ind >= 0; $ind--) {
1201 my $test = "unused$ind";
1202 if (my $vid = $config->{$test}) {
1203 return if $vid eq $volid; # do not add duplicates
1204 } else {
1205 $key = $test;
1206 }
1207 }
1208
1209 die "To many unused volume - please delete them first.\n" if !$key;
1210
1211 $config->{$key} = $volid;
1212
1213 return $key;
1214 }
1215
1216 sub update_pct_config {
1217 my ($vmid, $conf, $running, $param, $delete) = @_;
1218
1219 my @nohotplug;
1220
1221 my $new_disks = 0;
1222 my @deleted_volumes;
1223
1224 my $rootdir;
1225 if ($running) {
1226 my $pid = find_lxc_pid($vmid);
1227 $rootdir = "/proc/$pid/root";
1228 }
1229
1230 my $hotplug_error = sub {
1231 if ($running) {
1232 push @nohotplug, @_;
1233 return 1;
1234 } else {
1235 return 0;
1236 }
1237 };
1238
1239 if (defined($delete)) {
1240 foreach my $opt (@$delete) {
1241 if (!exists($conf->{$opt})) {
1242 warn "no such option: $opt\n";
1243 next;
1244 }
1245
1246 if ($opt eq 'hostname' || $opt eq 'memory' || $opt eq 'rootfs') {
1247 die "unable to delete required option '$opt'\n";
1248 } elsif ($opt eq 'swap') {
1249 delete $conf->{$opt};
1250 write_cgroup_value("memory", $vmid, "memory.memsw.limit_in_bytes", -1);
1251 } elsif ($opt eq 'description' || $opt eq 'onboot' || $opt eq 'startup') {
1252 delete $conf->{$opt};
1253 } elsif ($opt eq 'nameserver' || $opt eq 'searchdomain' ||
1254 $opt eq 'tty' || $opt eq 'console' || $opt eq 'cmode') {
1255 next if $hotplug_error->($opt);
1256 delete $conf->{$opt};
1257 } elsif ($opt =~ m/^net(\d)$/) {
1258 delete $conf->{$opt};
1259 next if !$running;
1260 my $netid = $1;
1261 PVE::Network::veth_delete("veth${vmid}i$netid");
1262 } elsif ($opt eq 'protection') {
1263 delete $conf->{$opt};
1264 } elsif ($opt =~ m/^unused(\d+)$/) {
1265 next if $hotplug_error->($opt);
1266 check_protection($conf, "can't remove CT $vmid drive '$opt'");
1267 push @deleted_volumes, $conf->{$opt};
1268 delete $conf->{$opt};
1269 } elsif ($opt =~ m/^mp(\d+)$/) {
1270 next if $hotplug_error->($opt);
1271 check_protection($conf, "can't remove CT $vmid drive '$opt'");
1272 my $mountpoint = parse_ct_mountpoint($conf->{$opt});
1273 if ($mountpoint->{type} eq 'volume') {
1274 add_unused_volume($conf, $mountpoint->{volume})
1275 }
1276 delete $conf->{$opt};
1277 } elsif ($opt eq 'unprivileged') {
1278 die "unable to delete read-only option: '$opt'\n";
1279 } else {
1280 die "implement me (delete: $opt)"
1281 }
1282 write_config($vmid, $conf) if $running;
1283 }
1284 }
1285
1286 # There's no separate swap size to configure, there's memory and "total"
1287 # memory (iow. memory+swap). This means we have to change them together.
1288 my $wanted_memory = PVE::Tools::extract_param($param, 'memory');
1289 my $wanted_swap = PVE::Tools::extract_param($param, 'swap');
1290 if (defined($wanted_memory) || defined($wanted_swap)) {
1291
1292 $wanted_memory //= ($conf->{memory} || 512);
1293 $wanted_swap //= ($conf->{swap} || 0);
1294
1295 my $total = $wanted_memory + $wanted_swap;
1296 if ($running) {
1297 write_cgroup_value("memory", $vmid, "memory.limit_in_bytes", int($wanted_memory*1024*1024));
1298 write_cgroup_value("memory", $vmid, "memory.memsw.limit_in_bytes", int($total*1024*1024));
1299 }
1300 $conf->{memory} = $wanted_memory;
1301 $conf->{swap} = $wanted_swap;
1302
1303 write_config($vmid, $conf) if $running;
1304 }
1305
1306 foreach my $opt (keys %$param) {
1307 my $value = $param->{$opt};
1308 if ($opt eq 'hostname') {
1309 $conf->{$opt} = $value;
1310 } elsif ($opt eq 'onboot') {
1311 $conf->{$opt} = $value ? 1 : 0;
1312 } elsif ($opt eq 'startup') {
1313 $conf->{$opt} = $value;
1314 } elsif ($opt eq 'tty' || $opt eq 'console' || $opt eq 'cmode') {
1315 next if $hotplug_error->($opt);
1316 $conf->{$opt} = $value;
1317 } elsif ($opt eq 'nameserver') {
1318 next if $hotplug_error->($opt);
1319 my $list = verify_nameserver_list($value);
1320 $conf->{$opt} = $list;
1321 } elsif ($opt eq 'searchdomain') {
1322 next if $hotplug_error->($opt);
1323 my $list = verify_searchdomain_list($value);
1324 $conf->{$opt} = $list;
1325 } elsif ($opt eq 'cpulimit') {
1326 next if $hotplug_error->($opt); # FIXME: hotplug
1327 $conf->{$opt} = $value;
1328 } elsif ($opt eq 'cpuunits') {
1329 $conf->{$opt} = $value;
1330 write_cgroup_value("cpu", $vmid, "cpu.shares", $value);
1331 } elsif ($opt eq 'description') {
1332 $conf->{$opt} = PVE::Tools::encode_text($value);
1333 } elsif ($opt =~ m/^net(\d+)$/) {
1334 my $netid = $1;
1335 my $net = parse_lxc_network($value);
1336 if (!$running) {
1337 $conf->{$opt} = print_lxc_network($net);
1338 } else {
1339 update_net($vmid, $conf, $opt, $net, $netid, $rootdir);
1340 }
1341 } elsif ($opt eq 'protection') {
1342 $conf->{$opt} = $value ? 1 : 0;
1343 } elsif ($opt =~ m/^mp(\d+)$/) {
1344 next if $hotplug_error->($opt);
1345 check_protection($conf, "can't update CT $vmid drive '$opt'");
1346 $conf->{$opt} = $value;
1347 $new_disks = 1;
1348 } elsif ($opt eq 'rootfs') {
1349 check_protection($conf, "can't update CT $vmid drive '$opt'");
1350 die "implement me: $opt";
1351 } elsif ($opt eq 'unprivileged') {
1352 die "unable to modify read-only option: '$opt'\n";
1353 } else {
1354 die "implement me: $opt";
1355 }
1356 write_config($vmid, $conf) if $running;
1357 }
1358
1359 if (@deleted_volumes) {
1360 my $storage_cfg = PVE::Storage::config();
1361 foreach my $volume (@deleted_volumes) {
1362 delete_mountpoint_volume($storage_cfg, $vmid, $volume);
1363 }
1364 }
1365
1366 if ($new_disks) {
1367 my $storage_cfg = PVE::Storage::config();
1368 create_disks($storage_cfg, $vmid, $conf, $conf);
1369 }
1370
1371 # This should be the last thing we do here
1372 if ($running && scalar(@nohotplug)) {
1373 die "unable to modify " . join(',', @nohotplug) . " while container is running\n";
1374 }
1375 }
1376
1377 sub has_dev_console {
1378 my ($conf) = @_;
1379
1380 return !(defined($conf->{console}) && !$conf->{console});
1381 }
1382
1383 sub get_tty_count {
1384 my ($conf) = @_;
1385
1386 return $conf->{tty} // $confdesc->{tty}->{default};
1387 }
1388
1389 sub get_cmode {
1390 my ($conf) = @_;
1391
1392 return $conf->{cmode} // $confdesc->{cmode}->{default};
1393 }
1394
1395 sub get_console_command {
1396 my ($vmid, $conf) = @_;
1397
1398 my $cmode = get_cmode($conf);
1399
1400 if ($cmode eq 'console') {
1401 return ['lxc-console', '-n', $vmid, '-t', 0];
1402 } elsif ($cmode eq 'tty') {
1403 return ['lxc-console', '-n', $vmid];
1404 } elsif ($cmode eq 'shell') {
1405 return ['lxc-attach', '--clear-env', '-n', $vmid];
1406 } else {
1407 die "internal error";
1408 }
1409 }
1410
1411 sub get_primary_ips {
1412 my ($conf) = @_;
1413
1414 # return data from net0
1415
1416 return undef if !defined($conf->{net0});
1417 my $net = parse_lxc_network($conf->{net0});
1418
1419 my $ipv4 = $net->{ip};
1420 if ($ipv4) {
1421 if ($ipv4 =~ /^(dhcp|manual)$/) {
1422 $ipv4 = undef
1423 } else {
1424 $ipv4 =~ s!/\d+$!!;
1425 }
1426 }
1427 my $ipv6 = $net->{ip6};
1428 if ($ipv6) {
1429 if ($ipv6 =~ /^(auto|dhcp|manual)$/) {
1430 $ipv6 = undef;
1431 } else {
1432 $ipv6 =~ s!/\d+$!!;
1433 }
1434 }
1435
1436 return ($ipv4, $ipv6);
1437 }
1438
1439 sub delete_mountpoint_volume {
1440 my ($storage_cfg, $vmid, $volume) = @_;
1441
1442 return if classify_mountpoint($volume) ne 'volume';
1443
1444 my ($vtype, $name, $owner) = PVE::Storage::parse_volname($storage_cfg, $volume);
1445 PVE::Storage::vdisk_free($storage_cfg, $volume) if $vmid == $owner;
1446 }
1447
1448 sub destroy_lxc_container {
1449 my ($storage_cfg, $vmid, $conf) = @_;
1450
1451 foreach_mountpoint($conf, sub {
1452 my ($ms, $mountpoint) = @_;
1453 delete_mountpoint_volume($storage_cfg, $vmid, $mountpoint->{volume});
1454 });
1455
1456 rmdir "/var/lib/lxc/$vmid/rootfs";
1457 unlink "/var/lib/lxc/$vmid/config";
1458 rmdir "/var/lib/lxc/$vmid";
1459 destroy_config($vmid);
1460
1461 #my $cmd = ['lxc-destroy', '-n', $vmid ];
1462 #PVE::Tools::run_command($cmd);
1463 }
1464
1465 sub vm_stop_cleanup {
1466 my ($storage_cfg, $vmid, $conf, $keepActive) = @_;
1467
1468 eval {
1469 if (!$keepActive) {
1470
1471 my $vollist = get_vm_volumes($conf);
1472 PVE::Storage::deactivate_volumes($storage_cfg, $vollist);
1473 }
1474 };
1475 warn $@ if $@; # avoid errors - just warn
1476 }
1477
1478 my $safe_num_ne = sub {
1479 my ($a, $b) = @_;
1480
1481 return 0 if !defined($a) && !defined($b);
1482 return 1 if !defined($a);
1483 return 1 if !defined($b);
1484
1485 return $a != $b;
1486 };
1487
1488 my $safe_string_ne = sub {
1489 my ($a, $b) = @_;
1490
1491 return 0 if !defined($a) && !defined($b);
1492 return 1 if !defined($a);
1493 return 1 if !defined($b);
1494
1495 return $a ne $b;
1496 };
1497
1498 sub update_net {
1499 my ($vmid, $conf, $opt, $newnet, $netid, $rootdir) = @_;
1500
1501 if ($newnet->{type} ne 'veth') {
1502 # for when there are physical interfaces
1503 die "cannot update interface of type $newnet->{type}";
1504 }
1505
1506 my $veth = "veth${vmid}i${netid}";
1507 my $eth = $newnet->{name};
1508
1509 if (my $oldnetcfg = $conf->{$opt}) {
1510 my $oldnet = parse_lxc_network($oldnetcfg);
1511
1512 if (&$safe_string_ne($oldnet->{hwaddr}, $newnet->{hwaddr}) ||
1513 &$safe_string_ne($oldnet->{name}, $newnet->{name})) {
1514
1515 PVE::Network::veth_delete($veth);
1516 delete $conf->{$opt};
1517 write_config($vmid, $conf);
1518
1519 hotplug_net($vmid, $conf, $opt, $newnet, $netid);
1520
1521 } elsif (&$safe_string_ne($oldnet->{bridge}, $newnet->{bridge}) ||
1522 &$safe_num_ne($oldnet->{tag}, $newnet->{tag}) ||
1523 &$safe_num_ne($oldnet->{firewall}, $newnet->{firewall})) {
1524
1525 if ($oldnet->{bridge}) {
1526 PVE::Network::tap_unplug($veth);
1527 foreach (qw(bridge tag firewall)) {
1528 delete $oldnet->{$_};
1529 }
1530 $conf->{$opt} = print_lxc_network($oldnet);
1531 write_config($vmid, $conf);
1532 }
1533
1534 PVE::Network::tap_plug($veth, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall});
1535 foreach (qw(bridge tag firewall)) {
1536 $oldnet->{$_} = $newnet->{$_} if $newnet->{$_};
1537 }
1538 $conf->{$opt} = print_lxc_network($oldnet);
1539 write_config($vmid, $conf);
1540 }
1541 } else {
1542 hotplug_net($vmid, $conf, $opt, $newnet, $netid);
1543 }
1544
1545 update_ipconfig($vmid, $conf, $opt, $eth, $newnet, $rootdir);
1546 }
1547
1548 sub hotplug_net {
1549 my ($vmid, $conf, $opt, $newnet, $netid) = @_;
1550
1551 my $veth = "veth${vmid}i${netid}";
1552 my $vethpeer = $veth . "p";
1553 my $eth = $newnet->{name};
1554
1555 PVE::Network::veth_create($veth, $vethpeer, $newnet->{bridge}, $newnet->{hwaddr});
1556 PVE::Network::tap_plug($veth, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall});
1557
1558 # attach peer in container
1559 my $cmd = ['lxc-device', '-n', $vmid, 'add', $vethpeer, "$eth" ];
1560 PVE::Tools::run_command($cmd);
1561
1562 # link up peer in container
1563 $cmd = ['lxc-attach', '-n', $vmid, '-s', 'NETWORK', '--', '/sbin/ip', 'link', 'set', $eth ,'up' ];
1564 PVE::Tools::run_command($cmd);
1565
1566 my $done = { type => 'veth' };
1567 foreach (qw(bridge tag firewall hwaddr name)) {
1568 $done->{$_} = $newnet->{$_} if $newnet->{$_};
1569 }
1570 $conf->{$opt} = print_lxc_network($done);
1571
1572 write_config($vmid, $conf);
1573 }
1574
1575 sub update_ipconfig {
1576 my ($vmid, $conf, $opt, $eth, $newnet, $rootdir) = @_;
1577
1578 my $lxc_setup = PVE::LXC::Setup->new($conf, $rootdir);
1579
1580 my $optdata = parse_lxc_network($conf->{$opt});
1581 my $deleted = [];
1582 my $added = [];
1583 my $nscmd = sub {
1584 my $cmdargs = shift;
1585 PVE::Tools::run_command(['lxc-attach', '-n', $vmid, '-s', 'NETWORK', '--', @_], %$cmdargs);
1586 };
1587 my $ipcmd = sub { &$nscmd({}, '/sbin/ip', @_) };
1588
1589 my $change_ip_config = sub {
1590 my ($ipversion) = @_;
1591
1592 my $family_opt = "-$ipversion";
1593 my $suffix = $ipversion == 4 ? '' : $ipversion;
1594 my $gw= "gw$suffix";
1595 my $ip= "ip$suffix";
1596
1597 my $newip = $newnet->{$ip};
1598 my $newgw = $newnet->{$gw};
1599 my $oldip = $optdata->{$ip};
1600
1601 my $change_ip = &$safe_string_ne($oldip, $newip);
1602 my $change_gw = &$safe_string_ne($optdata->{$gw}, $newgw);
1603
1604 return if !$change_ip && !$change_gw;
1605
1606 # step 1: add new IP, if this fails we cancel
1607 my $is_real_ip = ($newip && $newip !~ /^(?:auto|dhcp|manual)$/);
1608 if ($change_ip && $is_real_ip) {
1609 eval { &$ipcmd($family_opt, 'addr', 'add', $newip, 'dev', $eth); };
1610 if (my $err = $@) {
1611 warn $err;
1612 return;
1613 }
1614 }
1615
1616 # step 2: replace gateway
1617 # If this fails we delete the added IP and cancel.
1618 # If it succeeds we save the config and delete the old IP, ignoring
1619 # errors. The config is then saved.
1620 # Note: 'ip route replace' can add
1621 if ($change_gw) {
1622 if ($newgw) {
1623 eval {
1624 if ($is_real_ip && !PVE::Network::is_ip_in_cidr($newgw, $newip, $ipversion)) {
1625 &$ipcmd($family_opt, 'route', 'add', $newgw, 'dev', $eth);
1626 }
1627 &$ipcmd($family_opt, 'route', 'replace', 'default', 'via', $newgw);
1628 };
1629 if (my $err = $@) {
1630 warn $err;
1631 # the route was not replaced, the old IP is still available
1632 # rollback (delete new IP) and cancel
1633 if ($change_ip) {
1634 eval { &$ipcmd($family_opt, 'addr', 'del', $newip, 'dev', $eth); };
1635 warn $@ if $@; # no need to die here
1636 }
1637 return;
1638 }
1639 } else {
1640 eval { &$ipcmd($family_opt, 'route', 'del', 'default'); };
1641 # if the route was not deleted, the guest might have deleted it manually
1642 # warn and continue
1643 warn $@ if $@;
1644 }
1645 }
1646
1647 # from this point on we save the configuration
1648 # step 3: delete old IP ignoring errors
1649 if ($change_ip && $oldip && $oldip !~ /^(?:auto|dhcp)$/) {
1650 # We need to enable promote_secondaries, otherwise our newly added
1651 # address will be removed along with the old one.
1652 my $promote = 0;
1653 eval {
1654 if ($ipversion == 4) {
1655 &$nscmd({ outfunc => sub { $promote = int(shift) } },
1656 'cat', "/proc/sys/net/ipv4/conf/$eth/promote_secondaries");
1657 &$nscmd({}, 'sysctl', "net.ipv4.conf.$eth.promote_secondaries=1");
1658 }
1659 &$ipcmd($family_opt, 'addr', 'del', $oldip, 'dev', $eth);
1660 };
1661 warn $@ if $@; # no need to die here
1662
1663 if ($ipversion == 4) {
1664 &$nscmd({}, 'sysctl', "net.ipv4.conf.$eth.promote_secondaries=$promote");
1665 }
1666 }
1667
1668 foreach my $property ($ip, $gw) {
1669 if ($newnet->{$property}) {
1670 $optdata->{$property} = $newnet->{$property};
1671 } else {
1672 delete $optdata->{$property};
1673 }
1674 }
1675 $conf->{$opt} = print_lxc_network($optdata);
1676 write_config($vmid, $conf);
1677 $lxc_setup->setup_network($conf);
1678 };
1679
1680 &$change_ip_config(4);
1681 &$change_ip_config(6);
1682
1683 }
1684
1685 # Internal snapshots
1686
1687 # NOTE: Snapshot create/delete involves several non-atomic
1688 # action, and can take a long time.
1689 # So we try to avoid locking the file and use 'lock' variable
1690 # inside the config file instead.
1691
1692 my $snapshot_copy_config = sub {
1693 my ($source, $dest) = @_;
1694
1695 foreach my $k (keys %$source) {
1696 next if $k eq 'snapshots';
1697 next if $k eq 'snapstate';
1698 next if $k eq 'snaptime';
1699 next if $k eq 'vmstate';
1700 next if $k eq 'lock';
1701 next if $k eq 'digest';
1702 next if $k eq 'description';
1703
1704 $dest->{$k} = $source->{$k};
1705 }
1706 };
1707
1708 my $snapshot_prepare = sub {
1709 my ($vmid, $snapname, $comment) = @_;
1710
1711 my $snap;
1712
1713 my $updatefn = sub {
1714
1715 my $conf = load_config($vmid);
1716
1717 die "you can't take a snapshot if it's a template\n"
1718 if is_template($conf);
1719
1720 check_lock($conf);
1721
1722 $conf->{lock} = 'snapshot';
1723
1724 die "snapshot name '$snapname' already used\n"
1725 if defined($conf->{snapshots}->{$snapname});
1726
1727 my $storecfg = PVE::Storage::config();
1728 die "snapshot feature is not available\n" if !has_feature('snapshot', $conf, $storecfg);
1729
1730 $snap = $conf->{snapshots}->{$snapname} = {};
1731
1732 &$snapshot_copy_config($conf, $snap);
1733
1734 $snap->{'snapstate'} = "prepare";
1735 $snap->{'snaptime'} = time();
1736 $snap->{'description'} = $comment if $comment;
1737 $conf->{snapshots}->{$snapname} = $snap;
1738
1739 write_config($vmid, $conf);
1740 };
1741
1742 lock_container($vmid, 10, $updatefn);
1743
1744 return $snap;
1745 };
1746
1747 my $snapshot_commit = sub {
1748 my ($vmid, $snapname) = @_;
1749
1750 my $updatefn = sub {
1751
1752 my $conf = load_config($vmid);
1753
1754 die "missing snapshot lock\n"
1755 if !($conf->{lock} && $conf->{lock} eq 'snapshot');
1756
1757 die "snapshot '$snapname' does not exist\n"
1758 if !defined($conf->{snapshots}->{$snapname});
1759
1760 die "wrong snapshot state\n"
1761 if !($conf->{snapshots}->{$snapname}->{'snapstate'} &&
1762 $conf->{snapshots}->{$snapname}->{'snapstate'} eq "prepare");
1763
1764 delete $conf->{snapshots}->{$snapname}->{'snapstate'};
1765 delete $conf->{lock};
1766 $conf->{parent} = $snapname;
1767
1768 write_config($vmid, $conf);
1769 };
1770
1771 lock_container($vmid, 10 ,$updatefn);
1772 };
1773
1774 sub has_feature {
1775 my ($feature, $conf, $storecfg, $snapname) = @_;
1776
1777 my $err;
1778
1779 foreach_mountpoint($conf, sub {
1780 my ($ms, $mountpoint) = @_;
1781
1782 return if $err; # skip further test
1783
1784 $err = 1 if !PVE::Storage::volume_has_feature($storecfg, $feature, $mountpoint->{volume}, $snapname);
1785
1786 # TODO: implement support for mountpoints
1787 die "unable to handle mountpoint '$ms' - feature not implemented\n"
1788 if $ms ne 'rootfs';
1789 });
1790
1791 return $err ? 0 : 1;
1792 }
1793
1794 sub snapshot_create {
1795 my ($vmid, $snapname, $comment) = @_;
1796
1797 my $snap = &$snapshot_prepare($vmid, $snapname, $comment);
1798
1799 my $conf = load_config($vmid);
1800
1801 my $running = check_running($vmid);
1802
1803 my $unfreeze = 0;
1804
1805 eval {
1806 if ($running) {
1807 PVE::Tools::run_command(['/usr/bin/lxc-freeze', '-n', $vmid]);
1808 $unfreeze = 1;
1809 PVE::Tools::run_command(['/bin/sync']);
1810 };
1811
1812 my $storecfg = PVE::Storage::config();
1813 my $rootinfo = parse_ct_rootfs($conf->{rootfs});
1814 my $volid = $rootinfo->{volume};
1815
1816 PVE::Storage::volume_snapshot($storecfg, $volid, $snapname);
1817 &$snapshot_commit($vmid, $snapname);
1818 };
1819 my $err = $@;
1820
1821 if ($unfreeze) {
1822 eval { PVE::Tools::run_command(['/usr/bin/lxc-unfreeze', '-n', $vmid]); };
1823 warn $@ if $@;
1824 }
1825
1826 if ($err) {
1827 snapshot_delete($vmid, $snapname, 1);
1828 die "$err\n";
1829 }
1830 }
1831
1832 sub snapshot_delete {
1833 my ($vmid, $snapname, $force) = @_;
1834
1835 my $snap;
1836
1837 my $conf;
1838
1839 my $updatefn = sub {
1840
1841 $conf = load_config($vmid);
1842
1843 die "you can't delete a snapshot if vm is a template\n"
1844 if is_template($conf);
1845
1846 $snap = $conf->{snapshots}->{$snapname};
1847
1848 check_lock($conf);
1849
1850 die "snapshot '$snapname' does not exist\n" if !defined($snap);
1851
1852 $snap->{snapstate} = 'delete';
1853
1854 write_config($vmid, $conf);
1855 };
1856
1857 lock_container($vmid, 10, $updatefn);
1858
1859 my $storecfg = PVE::Storage::config();
1860
1861 my $del_snap = sub {
1862
1863 check_lock($conf);
1864
1865 if ($conf->{parent} eq $snapname) {
1866 if ($conf->{snapshots}->{$snapname}->{snapname}) {
1867 $conf->{parent} = $conf->{snapshots}->{$snapname}->{parent};
1868 } else {
1869 delete $conf->{parent};
1870 }
1871 }
1872
1873 delete $conf->{snapshots}->{$snapname};
1874
1875 write_config($vmid, $conf);
1876 };
1877
1878 my $rootfs = $conf->{snapshots}->{$snapname}->{rootfs};
1879 my $rootinfo = parse_ct_rootfs($rootfs);
1880 my $volid = $rootinfo->{volume};
1881
1882 eval {
1883 PVE::Storage::volume_snapshot_delete($storecfg, $volid, $snapname);
1884 };
1885 my $err = $@;
1886
1887 if(!$err || ($err && $force)) {
1888 lock_container($vmid, 10, $del_snap);
1889 if ($err) {
1890 die "Can't delete snapshot: $vmid $snapname $err\n";
1891 }
1892 }
1893 }
1894
1895 sub snapshot_rollback {
1896 my ($vmid, $snapname) = @_;
1897
1898 my $storecfg = PVE::Storage::config();
1899
1900 my $conf = load_config($vmid);
1901
1902 die "you can't rollback if vm is a template\n" if is_template($conf);
1903
1904 my $snap = $conf->{snapshots}->{$snapname};
1905
1906 die "snapshot '$snapname' does not exist\n" if !defined($snap);
1907
1908 my $rootfs = $snap->{rootfs};
1909 my $rootinfo = parse_ct_rootfs($rootfs);
1910 my $volid = $rootinfo->{volume};
1911
1912 PVE::Storage::volume_rollback_is_possible($storecfg, $volid, $snapname);
1913
1914 my $updatefn = sub {
1915
1916 die "unable to rollback to incomplete snapshot (snapstate = $snap->{snapstate})\n"
1917 if $snap->{snapstate};
1918
1919 check_lock($conf);
1920
1921 system("lxc-stop -n $vmid --kill") if check_running($vmid);
1922
1923 die "unable to rollback vm $vmid: vm is running\n"
1924 if check_running($vmid);
1925
1926 $conf->{lock} = 'rollback';
1927
1928 my $forcemachine;
1929
1930 # copy snapshot config to current config
1931
1932 my $tmp_conf = $conf;
1933 &$snapshot_copy_config($tmp_conf->{snapshots}->{$snapname}, $conf);
1934 $conf->{snapshots} = $tmp_conf->{snapshots};
1935 delete $conf->{snaptime};
1936 delete $conf->{snapname};
1937 $conf->{parent} = $snapname;
1938
1939 write_config($vmid, $conf);
1940 };
1941
1942 my $unlockfn = sub {
1943 delete $conf->{lock};
1944 write_config($vmid, $conf);
1945 };
1946
1947 lock_container($vmid, 10, $updatefn);
1948
1949 PVE::Storage::volume_snapshot_rollback($storecfg, $volid, $snapname);
1950
1951 lock_container($vmid, 5, $unlockfn);
1952 }
1953
1954 sub template_create {
1955 my ($vmid, $conf) = @_;
1956
1957 my $storecfg = PVE::Storage::config();
1958
1959 my $rootinfo = parse_ct_rootfs($conf->{rootfs});
1960 my $volid = $rootinfo->{volume};
1961
1962 die "Template feature is not available for '$volid'\n"
1963 if !PVE::Storage::volume_has_feature($storecfg, 'template', $volid);
1964
1965 PVE::Storage::activate_volumes($storecfg, [$volid]);
1966
1967 my $template_volid = PVE::Storage::vdisk_create_base($storecfg, $volid);
1968 $rootinfo->{volume} = $template_volid;
1969 $conf->{rootfs} = print_ct_mountpoint($rootinfo, 1);
1970
1971 write_config($vmid, $conf);
1972 }
1973
1974 sub is_template {
1975 my ($conf) = @_;
1976
1977 return 1 if defined $conf->{template} && $conf->{template} == 1;
1978 }
1979
1980 sub mountpoint_names {
1981 my ($reverse) = @_;
1982
1983 my @names = ('rootfs');
1984
1985 for (my $i = 0; $i < $MAX_MOUNT_POINTS; $i++) {
1986 push @names, "mp$i";
1987 }
1988
1989 return $reverse ? reverse @names : @names;
1990 }
1991
1992 # The container might have *different* symlinks than the host. realpath/abs_path
1993 # use the actual filesystem to resolve links.
1994 sub sanitize_mountpoint {
1995 my ($mp) = @_;
1996 $mp = '/' . $mp; # we always start with a slash
1997 $mp =~ s@/{2,}@/@g; # collapse sequences of slashes
1998 $mp =~ s@/\./@@g; # collapse /./
1999 $mp =~ s@/\.(/)?$@$1@; # collapse a trailing /. or /./
2000 $mp =~ s@(.*)/[^/]+/\.\./@$1/@g; # collapse /../ without regard for symlinks
2001 $mp =~ s@/\.\.(/)?$@$1@; # collapse trailing /.. or /../ disregarding symlinks
2002 return $mp;
2003 }
2004
2005 sub foreach_mountpoint_full {
2006 my ($conf, $reverse, $func) = @_;
2007
2008 foreach my $key (mountpoint_names($reverse)) {
2009 my $value = $conf->{$key};
2010 next if !defined($value);
2011 my $mountpoint = $key eq 'rootfs' ? parse_ct_rootfs($value, 1) : parse_ct_mountpoint($value, 1);
2012 next if !defined($mountpoint);
2013
2014 $mountpoint->{mp} = sanitize_mountpoint($mountpoint->{mp});
2015
2016 my $path = $mountpoint->{volume};
2017 $mountpoint->{volume} = sanitize_mountpoint($path) if $path =~ m|^/|;
2018
2019 &$func($key, $mountpoint);
2020 }
2021 }
2022
2023 sub foreach_mountpoint {
2024 my ($conf, $func) = @_;
2025
2026 foreach_mountpoint_full($conf, 0, $func);
2027 }
2028
2029 sub foreach_mountpoint_reverse {
2030 my ($conf, $func) = @_;
2031
2032 foreach_mountpoint_full($conf, 1, $func);
2033 }
2034
2035 sub check_ct_modify_config_perm {
2036 my ($rpcenv, $authuser, $vmid, $pool, $key_list) = @_;
2037
2038 return 1 if $authuser ne 'root@pam';
2039
2040 foreach my $opt (@$key_list) {
2041
2042 if ($opt eq 'cpus' || $opt eq 'cpuunits' || $opt eq 'cpulimit') {
2043 $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.CPU']);
2044 } elsif ($opt eq 'rootfs' || $opt =~ /^mp\d+$/) {
2045 $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Disk']);
2046 } elsif ($opt eq 'memory' || $opt eq 'swap') {
2047 $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Memory']);
2048 } elsif ($opt =~ m/^net\d+$/ || $opt eq 'nameserver' ||
2049 $opt eq 'searchdomain' || $opt eq 'hostname') {
2050 $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Network']);
2051 } else {
2052 $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Options']);
2053 }
2054 }
2055
2056 return 1;
2057 }
2058
2059 sub umount_all {
2060 my ($vmid, $storage_cfg, $conf, $noerr) = @_;
2061
2062 my $rootdir = "/var/lib/lxc/$vmid/rootfs";
2063 my $volid_list = get_vm_volumes($conf);
2064
2065 foreach_mountpoint_reverse($conf, sub {
2066 my ($ms, $mountpoint) = @_;
2067
2068 my $volid = $mountpoint->{volume};
2069 my $mount = $mountpoint->{mp};
2070
2071 return if !$volid || !$mount;
2072
2073 my $mount_path = "$rootdir/$mount";
2074 $mount_path =~ s!/+!/!g;
2075
2076 return if !PVE::ProcFSTools::is_mounted($mount_path);
2077
2078 eval {
2079 PVE::Tools::run_command(['umount', '-d', $mount_path]);
2080 };
2081 if (my $err = $@) {
2082 if ($noerr) {
2083 warn $err;
2084 } else {
2085 die $err;
2086 }
2087 }
2088 });
2089 }
2090
2091 sub mount_all {
2092 my ($vmid, $storage_cfg, $conf) = @_;
2093
2094 my $rootdir = "/var/lib/lxc/$vmid/rootfs";
2095 File::Path::make_path($rootdir);
2096
2097 my $volid_list = get_vm_volumes($conf);
2098 PVE::Storage::activate_volumes($storage_cfg, $volid_list);
2099
2100 eval {
2101 foreach_mountpoint($conf, sub {
2102 my ($ms, $mountpoint) = @_;
2103
2104 mountpoint_mount($mountpoint, $rootdir, $storage_cfg);
2105 });
2106 };
2107 if (my $err = $@) {
2108 warn "mounting container failed\n";
2109 umount_all($vmid, $storage_cfg, $conf, 1);
2110 die $err;
2111 }
2112
2113 return $rootdir;
2114 }
2115
2116
2117 sub mountpoint_mount_path {
2118 my ($mountpoint, $storage_cfg, $snapname) = @_;
2119
2120 return mountpoint_mount($mountpoint, undef, $storage_cfg, $snapname);
2121 }
2122
2123 my $check_mount_path = sub {
2124 my ($path) = @_;
2125 $path = File::Spec->canonpath($path);
2126 my $real = Cwd::realpath($path);
2127 if ($real ne $path) {
2128 die "mount path modified by symlink: $path != $real";
2129 }
2130 };
2131
2132 sub query_loopdev {
2133 my ($path) = @_;
2134 my $found;
2135 my $parser = sub {
2136 my $line = shift;
2137 if ($line =~ m@^(/dev/loop\d+):@) {
2138 $found = $1;
2139 }
2140 };
2141 my $cmd = ['losetup', '--associated', $path];
2142 PVE::Tools::run_command($cmd, outfunc => $parser);
2143 return $found;
2144 }
2145
2146 # use $rootdir = undef to just return the corresponding mount path
2147 sub mountpoint_mount {
2148 my ($mountpoint, $rootdir, $storage_cfg, $snapname) = @_;
2149
2150 my $volid = $mountpoint->{volume};
2151 my $mount = $mountpoint->{mp};
2152 my $type = $mountpoint->{type};
2153
2154 return if !$volid || !$mount;
2155
2156 my $mount_path;
2157
2158 if (defined($rootdir)) {
2159 $rootdir =~ s!/+$!!;
2160 $mount_path = "$rootdir/$mount";
2161 $mount_path =~ s!/+!/!g;
2162 &$check_mount_path($mount_path);
2163 File::Path::mkpath($mount_path);
2164 }
2165
2166 my ($storage, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2167
2168 die "unknown snapshot path for '$volid'" if !$storage && defined($snapname);
2169
2170 if ($storage) {
2171
2172 my $scfg = PVE::Storage::storage_config($storage_cfg, $storage);
2173 my $path = PVE::Storage::path($storage_cfg, $volid, $snapname);
2174
2175 my ($vtype, undef, undef, undef, undef, $isBase, $format) =
2176 PVE::Storage::parse_volname($storage_cfg, $volid);
2177
2178 $format = 'iso' if $vtype eq 'iso'; # allow to handle iso files
2179
2180 if ($format eq 'subvol') {
2181 if ($mount_path) {
2182 if ($snapname) {
2183 if ($scfg->{type} eq 'zfspool') {
2184 my $path_arg = $path;
2185 $path_arg =~ s!^/+!!;
2186 PVE::Tools::run_command(['mount', '-o', 'ro', '-t', 'zfs', $path_arg, $mount_path]);
2187 } else {
2188 die "cannot mount subvol snapshots for storage type '$scfg->{type}'\n";
2189 }
2190 } else {
2191 PVE::Tools::run_command(['mount', '-o', 'bind', $path, $mount_path]);
2192 }
2193 }
2194 return wantarray ? ($path, 0) : $path;
2195 } elsif ($format eq 'raw' || $format eq 'iso') {
2196 my $use_loopdev = 0;
2197 my @extra_opts;
2198 if ($scfg->{path}) {
2199 push @extra_opts, '-o', 'loop';
2200 $use_loopdev = 1;
2201 } elsif ($scfg->{type} eq 'drbd' || $scfg->{type} eq 'lvm' ||
2202 $scfg->{type} eq 'rbd' || $scfg->{type} eq 'lvmthin') {
2203 # do nothing
2204 } else {
2205 die "unsupported storage type '$scfg->{type}'\n";
2206 }
2207 if ($mount_path) {
2208 if ($format eq 'iso') {
2209 PVE::Tools::run_command(['mount', '-o', 'ro', @extra_opts, $path, $mount_path]);
2210 } elsif ($isBase || defined($snapname)) {
2211 PVE::Tools::run_command(['mount', '-o', 'ro,noload', @extra_opts, $path, $mount_path]);
2212 } else {
2213 PVE::Tools::run_command(['mount', @extra_opts, $path, $mount_path]);
2214 }
2215 }
2216 return wantarray ? ($path, $use_loopdev) : $path;
2217 } else {
2218 die "unsupported image format '$format'\n";
2219 }
2220 } elsif ($type eq 'device') {
2221 PVE::Tools::run_command(['mount', $volid, $mount_path]) if $mount_path;
2222 return wantarray ? ($volid, 0) : $volid;
2223 } elsif ($type eq 'bind') {
2224 die "directory '$volid' does not exist\n" if ! -d $volid;
2225 &$check_mount_path($volid);
2226 PVE::Tools::run_command(['mount', '-o', 'bind', $volid, $mount_path]) if $mount_path;
2227 return wantarray ? ($volid, 0) : $volid;
2228 }
2229
2230 die "unsupported storage";
2231 }
2232
2233 sub get_vm_volumes {
2234 my ($conf, $excludes) = @_;
2235
2236 my $vollist = [];
2237
2238 foreach_mountpoint($conf, sub {
2239 my ($ms, $mountpoint) = @_;
2240
2241 return if $excludes && $ms eq $excludes;
2242
2243 my $volid = $mountpoint->{volume};
2244
2245 return if !$volid || $mountpoint->{type} ne 'volume';
2246
2247 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2248 return if !$sid;
2249
2250 push @$vollist, $volid;
2251 });
2252
2253 return $vollist;
2254 }
2255
2256 sub mkfs {
2257 my ($dev, $rootuid, $rootgid) = @_;
2258
2259 PVE::Tools::run_command(['mkfs.ext4', '-O', 'mmp',
2260 '-E', "root_owner=$rootuid:$rootgid",
2261 $dev]);
2262 }
2263
2264 sub format_disk {
2265 my ($storage_cfg, $volid, $rootuid, $rootgid) = @_;
2266
2267 if ($volid =~ m!^/dev/.+!) {
2268 mkfs($volid);
2269 return;
2270 }
2271
2272 my ($storage, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2273
2274 die "cannot format volume '$volid' with no storage\n" if !$storage;
2275
2276 PVE::Storage::activate_volumes($storage_cfg, [$volid]);
2277
2278 my $path = PVE::Storage::path($storage_cfg, $volid);
2279
2280 my ($vtype, undef, undef, undef, undef, $isBase, $format) =
2281 PVE::Storage::parse_volname($storage_cfg, $volid);
2282
2283 die "cannot format volume '$volid' (format == $format)\n"
2284 if $format ne 'raw';
2285
2286 mkfs($path, $rootuid, $rootgid);
2287 }
2288
2289 sub destroy_disks {
2290 my ($storecfg, $vollist) = @_;
2291
2292 foreach my $volid (@$vollist) {
2293 eval { PVE::Storage::vdisk_free($storecfg, $volid); };
2294 warn $@ if $@;
2295 }
2296 }
2297
2298 sub create_disks {
2299 my ($storecfg, $vmid, $settings, $conf) = @_;
2300
2301 my $vollist = [];
2302
2303 eval {
2304 my (undef, $rootuid, $rootgid) = PVE::LXC::parse_id_maps($conf);
2305 my $chown_vollist = [];
2306
2307 foreach_mountpoint($settings, sub {
2308 my ($ms, $mountpoint) = @_;
2309
2310 my $volid = $mountpoint->{volume};
2311 my $mp = $mountpoint->{mp};
2312
2313 my ($storage, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2314
2315 if ($storage && ($volid =~ m/^([^:\s]+):(\d+(\.\d+)?)$/)) {
2316 my ($storeid, $size_gb) = ($1, $2);
2317
2318 my $size_kb = int(${size_gb}*1024) * 1024;
2319
2320 my $scfg = PVE::Storage::storage_config($storecfg, $storage);
2321 # fixme: use better naming ct-$vmid-disk-X.raw?
2322
2323 if ($scfg->{type} eq 'dir' || $scfg->{type} eq 'nfs') {
2324 if ($size_kb > 0) {
2325 $volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'raw',
2326 undef, $size_kb);
2327 format_disk($storecfg, $volid, $rootuid, $rootgid);
2328 } else {
2329 $volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'subvol',
2330 undef, 0);
2331 push @$chown_vollist, $volid;
2332 }
2333 } elsif ($scfg->{type} eq 'zfspool') {
2334
2335 $volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'subvol',
2336 undef, $size_kb);
2337 push @$chown_vollist, $volid;
2338 } elsif ($scfg->{type} eq 'drbd' || $scfg->{type} eq 'lvm' || $scfg->{type} eq 'lvmthin') {
2339
2340 $volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'raw', undef, $size_kb);
2341 format_disk($storecfg, $volid, $rootuid, $rootgid);
2342
2343 } elsif ($scfg->{type} eq 'rbd') {
2344
2345 die "krbd option must be enabled on storage type '$scfg->{type}'\n" if !$scfg->{krbd};
2346 $volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'raw', undef, $size_kb);
2347 format_disk($storecfg, $volid, $rootuid, $rootgid);
2348 } else {
2349 die "unable to create containers on storage type '$scfg->{type}'\n";
2350 }
2351 push @$vollist, $volid;
2352 $mountpoint->{volume} = $volid;
2353 $mountpoint->{size} = $size_kb * 1024;
2354 $conf->{$ms} = print_ct_mountpoint($mountpoint, $ms eq 'rootfs');
2355 } else {
2356 # use specified/existing volid/dir/device
2357 $conf->{$ms} = print_ct_mountpoint($mountpoint, $ms eq 'rootfs');
2358 }
2359 });
2360
2361 PVE::Storage::activate_volumes($storecfg, $chown_vollist, undef);
2362 foreach my $volid (@$chown_vollist) {
2363 my $path = PVE::Storage::path($storecfg, $volid, undef);
2364 chown($rootuid, $rootgid, $path);
2365 }
2366 PVE::Storage::deactivate_volumes($storecfg, $chown_vollist, undef);
2367 };
2368 # free allocated images on error
2369 if (my $err = $@) {
2370 destroy_disks($storecfg, $vollist);
2371 die $err;
2372 }
2373 return $vollist;
2374 }
2375
2376 # bash completion helper
2377
2378 sub complete_os_templates {
2379 my ($cmdname, $pname, $cvalue) = @_;
2380
2381 my $cfg = PVE::Storage::config();
2382
2383 my $storeid;
2384
2385 if ($cvalue =~ m/^([^:]+):/) {
2386 $storeid = $1;
2387 }
2388
2389 my $vtype = $cmdname eq 'restore' ? 'backup' : 'vztmpl';
2390 my $data = PVE::Storage::template_list($cfg, $storeid, $vtype);
2391
2392 my $res = [];
2393 foreach my $id (keys %$data) {
2394 foreach my $item (@{$data->{$id}}) {
2395 push @$res, $item->{volid} if defined($item->{volid});
2396 }
2397 }
2398
2399 return $res;
2400 }
2401
2402 my $complete_ctid_full = sub {
2403 my ($running) = @_;
2404
2405 my $idlist = vmstatus();
2406
2407 my $active_hash = list_active_containers();
2408
2409 my $res = [];
2410
2411 foreach my $id (keys %$idlist) {
2412 my $d = $idlist->{$id};
2413 if (defined($running)) {
2414 next if $d->{template};
2415 next if $running && !$active_hash->{$id};
2416 next if !$running && $active_hash->{$id};
2417 }
2418 push @$res, $id;
2419
2420 }
2421 return $res;
2422 };
2423
2424 sub complete_ctid {
2425 return &$complete_ctid_full();
2426 }
2427
2428 sub complete_ctid_stopped {
2429 return &$complete_ctid_full(0);
2430 }
2431
2432 sub complete_ctid_running {
2433 return &$complete_ctid_full(1);
2434 }
2435
2436 sub parse_id_maps {
2437 my ($conf) = @_;
2438
2439 my $id_map = [];
2440 my $rootuid = 0;
2441 my $rootgid = 0;
2442
2443 my $lxc = $conf->{lxc};
2444 foreach my $entry (@$lxc) {
2445 my ($key, $value) = @$entry;
2446 next if $key ne 'lxc.id_map';
2447 if ($value =~ /^([ug])\s+(\d+)\s+(\d+)\s+(\d+)\s*$/) {
2448 my ($type, $ct, $host, $length) = ($1, $2, $3, $4);
2449 push @$id_map, [$type, $ct, $host, $length];
2450 if ($ct == 0) {
2451 $rootuid = $host if $type eq 'u';
2452 $rootgid = $host if $type eq 'g';
2453 }
2454 } else {
2455 die "failed to parse id_map: $value\n";
2456 }
2457 }
2458
2459 if (!@$id_map && $conf->{unprivileged}) {
2460 # Should we read them from /etc/subuid?
2461 $id_map = [ ['u', '0', '100000', '65536'],
2462 ['g', '0', '100000', '65536'] ];
2463 $rootuid = $rootgid = 100000;
2464 }
2465
2466 return ($id_map, $rootuid, $rootgid);
2467 }
2468
2469 sub userns_command {
2470 my ($id_map) = @_;
2471 if (@$id_map) {
2472 return ['lxc-usernsexec', (map { ('-m', join(':', @$_)) } @$id_map), '--'];
2473 }
2474 return [];
2475 }
2476
2477 1;