]> git.proxmox.com Git - pve-container.git/blob - src/PVE/LXC.pm
fix growing of a running container's memory limit
[pve-container.git] / src / PVE / LXC.pm
1 package PVE::LXC;
2
3 use strict;
4 use warnings;
5 use POSIX qw(EINTR);
6
7 use File::Path;
8 use File::Spec;
9 use Cwd qw();
10 use Fcntl ':flock';
11
12 use PVE::Cluster qw(cfs_register_file cfs_read_file);
13 use PVE::Storage;
14 use PVE::SafeSyslog;
15 use PVE::INotify;
16 use PVE::JSONSchema qw(get_standard_option);
17 use PVE::Tools qw($IPV6RE $IPV4RE dir_glob_foreach);
18 use PVE::Network;
19 use PVE::AccessControl;
20 use PVE::ProcFSTools;
21 use Time::HiRes qw (gettimeofday);
22
23 use Data::Dumper;
24
25 my $nodename = PVE::INotify::nodename();
26
27 my $cpuinfo= PVE::ProcFSTools::read_cpuinfo();
28
29 our $COMMON_TAR_FLAGS = [ '--sparse', '--numeric-owner', '--acls',
30 '--xattrs',
31 '--xattrs-include=user.*',
32 '--xattrs-include=security.capability',
33 '--warning=no-xattr-write' ];
34
35 cfs_register_file('/lxc/', \&parse_pct_config, \&write_pct_config);
36
37 my $rootfs_desc = {
38 volume => {
39 type => 'string',
40 default_key => 1,
41 format_description => 'volume',
42 description => 'Volume, device or directory to mount into the container.',
43 },
44 backup => {
45 type => 'boolean',
46 format_description => '[1|0]',
47 description => 'Whether to include the mountpoint in backups.',
48 optional => 1,
49 },
50 size => {
51 type => 'string',
52 format => 'disk-size',
53 format_description => 'DiskSize',
54 description => 'Volume size (read only value).',
55 optional => 1,
56 },
57 acl => {
58 type => 'boolean',
59 format_description => 'acl',
60 description => 'Explicitly enable or disable ACL support.',
61 optional => 1,
62 },
63 ro => {
64 type => 'boolean',
65 format_description => 'ro',
66 description => 'Read-only mountpoint (not supported with bind mounts)',
67 optional => 1,
68 },
69 };
70
71 PVE::JSONSchema::register_standard_option('pve-ct-rootfs', {
72 type => 'string', format => $rootfs_desc,
73 description => "Use volume as container root.",
74 optional => 1,
75 });
76
77 PVE::JSONSchema::register_standard_option('pve-lxc-snapshot-name', {
78 description => "The name of the snapshot.",
79 type => 'string', format => 'pve-configid',
80 maxLength => 40,
81 });
82
83 my $confdesc = {
84 lock => {
85 optional => 1,
86 type => 'string',
87 description => "Lock/unlock the VM.",
88 enum => [qw(migrate backup snapshot rollback)],
89 },
90 onboot => {
91 optional => 1,
92 type => 'boolean',
93 description => "Specifies whether a VM will be started during system bootup.",
94 default => 0,
95 },
96 startup => get_standard_option('pve-startup-order'),
97 template => {
98 optional => 1,
99 type => 'boolean',
100 description => "Enable/disable Template.",
101 default => 0,
102 },
103 arch => {
104 optional => 1,
105 type => 'string',
106 enum => ['amd64', 'i386'],
107 description => "OS architecture type.",
108 default => 'amd64',
109 },
110 ostype => {
111 optional => 1,
112 type => 'string',
113 enum => ['debian', 'ubuntu', 'centos', 'fedora', 'opensuse', 'archlinux'],
114 description => "OS type. Corresponds to lxc setup scripts in /usr/share/lxc/config/<ostype>.common.conf.",
115 },
116 console => {
117 optional => 1,
118 type => 'boolean',
119 description => "Attach a console device (/dev/console) to the container.",
120 default => 1,
121 },
122 tty => {
123 optional => 1,
124 type => 'integer',
125 description => "Specify the number of tty available to the container",
126 minimum => 0,
127 maximum => 6,
128 default => 2,
129 },
130 cpulimit => {
131 optional => 1,
132 type => 'number',
133 description => "Limit of CPU usage. Note if the computer has 2 CPUs, it has a total of '2' CPU time. Value '0' indicates no CPU limit.",
134 minimum => 0,
135 maximum => 128,
136 default => 0,
137 },
138 cpuunits => {
139 optional => 1,
140 type => 'integer',
141 description => "CPU weight for a VM. Argument is used in the kernel fair scheduler. The larger the number is, the more CPU time this VM gets. Number is relative to the weights of all the other running VMs.\n\nNOTE: You can disable fair-scheduler configuration by setting this to 0.",
142 minimum => 0,
143 maximum => 500000,
144 default => 1024,
145 },
146 memory => {
147 optional => 1,
148 type => 'integer',
149 description => "Amount of RAM for the VM in MB.",
150 minimum => 16,
151 default => 512,
152 },
153 swap => {
154 optional => 1,
155 type => 'integer',
156 description => "Amount of SWAP for the VM in MB.",
157 minimum => 0,
158 default => 512,
159 },
160 hostname => {
161 optional => 1,
162 description => "Set a host name for the container.",
163 type => 'string', format => 'dns-name',
164 maxLength => 255,
165 },
166 description => {
167 optional => 1,
168 type => 'string',
169 description => "Container description. Only used on the configuration web interface.",
170 },
171 searchdomain => {
172 optional => 1,
173 type => 'string', format => 'dns-name-list',
174 description => "Sets DNS search domains for a container. Create will automatically use the setting from the host if you neither set searchdomain nor nameserver.",
175 },
176 nameserver => {
177 optional => 1,
178 type => 'string', format => 'address-list',
179 description => "Sets DNS server IP address for a container. Create will automatically use the setting from the host if you neither set searchdomain nor nameserver.",
180 },
181 rootfs => get_standard_option('pve-ct-rootfs'),
182 parent => {
183 optional => 1,
184 type => 'string', format => 'pve-configid',
185 maxLength => 40,
186 description => "Parent snapshot name. This is used internally, and should not be modified.",
187 },
188 snaptime => {
189 optional => 1,
190 description => "Timestamp for snapshots.",
191 type => 'integer',
192 minimum => 0,
193 },
194 cmode => {
195 optional => 1,
196 description => "Console mode. By default, the console command tries to open a connection to one of the available tty devices. By setting cmode to 'console' it tries to attach to /dev/console instead. If you set cmode to 'shell', it simply invokes a shell inside the container (no login).",
197 type => 'string',
198 enum => ['shell', 'console', 'tty'],
199 default => 'tty',
200 },
201 protection => {
202 optional => 1,
203 type => 'boolean',
204 description => "Sets the protection flag of the container. This will prevent the CT or CT's disk remove/update operation.",
205 default => 0,
206 },
207 unprivileged => {
208 optional => 1,
209 type => 'boolean',
210 description => "Makes the container run as unprivileged user. (Should not be modified manually.)",
211 default => 0,
212 },
213 };
214
215 my $valid_lxc_conf_keys = {
216 'lxc.include' => 1,
217 'lxc.arch' => 1,
218 'lxc.utsname' => 1,
219 'lxc.haltsignal' => 1,
220 'lxc.rebootsignal' => 1,
221 'lxc.stopsignal' => 1,
222 'lxc.init_cmd' => 1,
223 'lxc.network.type' => 1,
224 'lxc.network.flags' => 1,
225 'lxc.network.link' => 1,
226 'lxc.network.mtu' => 1,
227 'lxc.network.name' => 1,
228 'lxc.network.hwaddr' => 1,
229 'lxc.network.ipv4' => 1,
230 'lxc.network.ipv4.gateway' => 1,
231 'lxc.network.ipv6' => 1,
232 'lxc.network.ipv6.gateway' => 1,
233 'lxc.network.script.up' => 1,
234 'lxc.network.script.down' => 1,
235 'lxc.pts' => 1,
236 'lxc.console.logfile' => 1,
237 'lxc.console' => 1,
238 'lxc.tty' => 1,
239 'lxc.devttydir' => 1,
240 'lxc.hook.autodev' => 1,
241 'lxc.autodev' => 1,
242 'lxc.kmsg' => 1,
243 'lxc.mount' => 1,
244 'lxc.mount.entry' => 1,
245 'lxc.mount.auto' => 1,
246 'lxc.rootfs' => 'lxc.rootfs is auto generated from rootfs',
247 'lxc.rootfs.mount' => 1,
248 'lxc.rootfs.options' => 'lxc.rootfs.options is not supported' .
249 ', please use mountpoint options in the "rootfs" key',
250 # lxc.cgroup.*
251 'lxc.cap.drop' => 1,
252 'lxc.cap.keep' => 1,
253 'lxc.aa_profile' => 1,
254 'lxc.aa_allow_incomplete' => 1,
255 'lxc.se_context' => 1,
256 'lxc.seccomp' => 1,
257 'lxc.id_map' => 1,
258 'lxc.hook.pre-start' => 1,
259 'lxc.hook.pre-mount' => 1,
260 'lxc.hook.mount' => 1,
261 'lxc.hook.start' => 1,
262 'lxc.hook.stop' => 1,
263 'lxc.hook.post-stop' => 1,
264 'lxc.hook.clone' => 1,
265 'lxc.hook.destroy' => 1,
266 'lxc.loglevel' => 1,
267 'lxc.logfile' => 1,
268 'lxc.start.auto' => 1,
269 'lxc.start.delay' => 1,
270 'lxc.start.order' => 1,
271 'lxc.group' => 1,
272 'lxc.environment' => 1,
273 };
274
275 my $netconf_desc = {
276 type => {
277 type => 'string',
278 optional => 1,
279 description => "Network interface type.",
280 enum => [qw(veth)],
281 },
282 name => {
283 type => 'string',
284 format_description => 'String',
285 description => 'Name of the network device as seen from inside the container. (lxc.network.name)',
286 pattern => '[-_.\w\d]+',
287 },
288 bridge => {
289 type => 'string',
290 format_description => 'vmbr<Number>',
291 description => 'Bridge to attach the network device to.',
292 pattern => '[-_.\w\d]+',
293 optional => 1,
294 },
295 hwaddr => {
296 type => 'string',
297 format_description => 'MAC',
298 description => 'Bridge to attach the network device to. (lxc.network.hwaddr)',
299 pattern => qr/(?:[a-f0-9]{2}:){5}[a-f0-9]{2}/i,
300 optional => 1,
301 },
302 mtu => {
303 type => 'integer',
304 format_description => 'Number',
305 description => 'Maximum transfer unit of the interface. (lxc.network.mtu)',
306 minimum => 64, # minimum ethernet frame is 64 bytes
307 optional => 1,
308 },
309 ip => {
310 type => 'string',
311 format => 'pve-ipv4-config',
312 format_description => 'IPv4Format/CIDR',
313 description => 'IPv4 address in CIDR format.',
314 optional => 1,
315 },
316 gw => {
317 type => 'string',
318 format => 'ipv4',
319 format_description => 'GatewayIPv4',
320 description => 'Default gateway for IPv4 traffic.',
321 optional => 1,
322 },
323 ip6 => {
324 type => 'string',
325 format => 'pve-ipv6-config',
326 format_description => 'IPv6Format/CIDR',
327 description => 'IPv6 address in CIDR format.',
328 optional => 1,
329 },
330 gw6 => {
331 type => 'string',
332 format => 'ipv6',
333 format_description => 'GatewayIPv6',
334 description => 'Default gateway for IPv6 traffic.',
335 optional => 1,
336 },
337 firewall => {
338 type => 'boolean',
339 format_description => '[1|0]',
340 description => "Controls whether this interface's firewall rules should be used.",
341 optional => 1,
342 },
343 tag => {
344 type => 'integer',
345 format_description => 'VlanNo',
346 minimum => '2',
347 maximum => '4094',
348 description => "VLAN tag for this interface.",
349 optional => 1,
350 },
351 trunks => {
352 type => 'string',
353 pattern => qr/\d+(?:;\d+)*/,
354 format_description => 'vlanid[;vlanid...]',
355 description => "VLAN ids to pass through the interface",
356 optional => 1,
357 },
358 };
359 PVE::JSONSchema::register_format('pve-lxc-network', $netconf_desc);
360
361 my $MAX_LXC_NETWORKS = 10;
362 for (my $i = 0; $i < $MAX_LXC_NETWORKS; $i++) {
363 $confdesc->{"net$i"} = {
364 optional => 1,
365 type => 'string', format => $netconf_desc,
366 description => "Specifies network interfaces for the container.",
367 };
368 }
369
370 my $mp_desc = {
371 %$rootfs_desc,
372 mp => {
373 type => 'string',
374 format_description => 'Path',
375 description => 'Path to the mountpoint as seen from inside the container.',
376 },
377 };
378 PVE::JSONSchema::register_format('pve-ct-mountpoint', $mp_desc);
379
380 my $unuseddesc = {
381 optional => 1,
382 type => 'string', format => 'pve-volume-id',
383 description => "Reference to unused volumes.",
384 };
385
386 my $MAX_MOUNT_POINTS = 10;
387 for (my $i = 0; $i < $MAX_MOUNT_POINTS; $i++) {
388 $confdesc->{"mp$i"} = {
389 optional => 1,
390 type => 'string', format => $mp_desc,
391 description => "Use volume as container mount point (experimental feature).",
392 optional => 1,
393 };
394 }
395
396 my $MAX_UNUSED_DISKS = $MAX_MOUNT_POINTS;
397 for (my $i = 0; $i < $MAX_MOUNT_POINTS; $i++) {
398 $confdesc->{"unused$i"} = $unuseddesc;
399 }
400
401 sub write_pct_config {
402 my ($filename, $conf) = @_;
403
404 delete $conf->{snapstate}; # just to be sure
405
406 my $generate_raw_config = sub {
407 my ($conf) = @_;
408
409 my $raw = '';
410
411 # add description as comment to top of file
412 my $descr = $conf->{description} || '';
413 foreach my $cl (split(/\n/, $descr)) {
414 $raw .= '#' . PVE::Tools::encode_text($cl) . "\n";
415 }
416
417 foreach my $key (sort keys %$conf) {
418 next if $key eq 'digest' || $key eq 'description' || $key eq 'pending' ||
419 $key eq 'snapshots' || $key eq 'snapname' || $key eq 'lxc';
420 my $value = $conf->{$key};
421 die "detected invalid newline inside property '$key'\n" if $value =~ m/\n/;
422 $raw .= "$key: $value\n";
423 }
424
425 if (my $lxcconf = $conf->{lxc}) {
426 foreach my $entry (@$lxcconf) {
427 my ($k, $v) = @$entry;
428 $raw .= "$k: $v\n";
429 }
430 }
431
432 return $raw;
433 };
434
435 my $raw = &$generate_raw_config($conf);
436
437 foreach my $snapname (sort keys %{$conf->{snapshots}}) {
438 $raw .= "\n[$snapname]\n";
439 $raw .= &$generate_raw_config($conf->{snapshots}->{$snapname});
440 }
441
442 return $raw;
443 }
444
445 sub check_type {
446 my ($key, $value) = @_;
447
448 die "unknown setting '$key'\n" if !$confdesc->{$key};
449
450 my $type = $confdesc->{$key}->{type};
451
452 if (!defined($value)) {
453 die "got undefined value\n";
454 }
455
456 if ($value =~ m/[\n\r]/) {
457 die "property contains a line feed\n";
458 }
459
460 if ($type eq 'boolean') {
461 return 1 if ($value eq '1') || ($value =~ m/^(on|yes|true)$/i);
462 return 0 if ($value eq '0') || ($value =~ m/^(off|no|false)$/i);
463 die "type check ('boolean') failed - got '$value'\n";
464 } elsif ($type eq 'integer') {
465 return int($1) if $value =~ m/^(\d+)$/;
466 die "type check ('integer') failed - got '$value'\n";
467 } elsif ($type eq 'number') {
468 return $value if $value =~ m/^(\d+)(\.\d+)?$/;
469 die "type check ('number') failed - got '$value'\n";
470 } elsif ($type eq 'string') {
471 if (my $fmt = $confdesc->{$key}->{format}) {
472 PVE::JSONSchema::check_format($fmt, $value);
473 return $value;
474 }
475 return $value;
476 } else {
477 die "internal error"
478 }
479 }
480
481 sub parse_pct_config {
482 my ($filename, $raw) = @_;
483
484 return undef if !defined($raw);
485
486 my $res = {
487 digest => Digest::SHA::sha1_hex($raw),
488 snapshots => {},
489 };
490
491 $filename =~ m|/lxc/(\d+).conf$|
492 || die "got strange filename '$filename'";
493
494 my $vmid = $1;
495
496 my $conf = $res;
497 my $descr = '';
498 my $section = '';
499
500 my @lines = split(/\n/, $raw);
501 foreach my $line (@lines) {
502 next if $line =~ m/^\s*$/;
503
504 if ($line =~ m/^\[([a-z][a-z0-9_\-]+)\]\s*$/i) {
505 $section = $1;
506 $conf->{description} = $descr if $descr;
507 $descr = '';
508 $conf = $res->{snapshots}->{$section} = {};
509 next;
510 }
511
512 if ($line =~ m/^\#(.*)\s*$/) {
513 $descr .= PVE::Tools::decode_text($1) . "\n";
514 next;
515 }
516
517 if ($line =~ m/^(lxc\.[a-z0-9_\-\.]+)(:|\s*=)\s*(.*?)\s*$/) {
518 my $key = $1;
519 my $value = $3;
520 my $validity = $valid_lxc_conf_keys->{$key} || 0;
521 if ($validity eq 1 || $key =~ m/^lxc\.cgroup\./) {
522 push @{$conf->{lxc}}, [$key, $value];
523 } elsif (my $errmsg = $validity) {
524 warn "vm $vmid - $key: $errmsg\n";
525 } else {
526 warn "vm $vmid - unable to parse config: $line\n";
527 }
528 } elsif ($line =~ m/^(description):\s*(.*\S)\s*$/) {
529 $descr .= PVE::Tools::decode_text($2);
530 } elsif ($line =~ m/snapstate:\s*(prepare|delete)\s*$/) {
531 $conf->{snapstate} = $1;
532 } elsif ($line =~ m/^([a-z][a-z_]*\d*):\s*(\S.*)\s*$/) {
533 my $key = $1;
534 my $value = $2;
535 eval { $value = check_type($key, $value); };
536 warn "vm $vmid - unable to parse value of '$key' - $@" if $@;
537 $conf->{$key} = $value;
538 } else {
539 warn "vm $vmid - unable to parse config: $line\n";
540 }
541 }
542
543 $conf->{description} = $descr if $descr;
544
545 delete $res->{snapstate}; # just to be sure
546
547 return $res;
548 }
549
550 sub config_list {
551 my $vmlist = PVE::Cluster::get_vmlist();
552 my $res = {};
553 return $res if !$vmlist || !$vmlist->{ids};
554 my $ids = $vmlist->{ids};
555
556 foreach my $vmid (keys %$ids) {
557 next if !$vmid; # skip CT0
558 my $d = $ids->{$vmid};
559 next if !$d->{node} || $d->{node} ne $nodename;
560 next if !$d->{type} || $d->{type} ne 'lxc';
561 $res->{$vmid}->{type} = 'lxc';
562 }
563 return $res;
564 }
565
566 sub cfs_config_path {
567 my ($vmid, $node) = @_;
568
569 $node = $nodename if !$node;
570 return "nodes/$node/lxc/$vmid.conf";
571 }
572
573 sub config_file {
574 my ($vmid, $node) = @_;
575
576 my $cfspath = cfs_config_path($vmid, $node);
577 return "/etc/pve/$cfspath";
578 }
579
580 sub load_config {
581 my ($vmid, $node) = @_;
582
583 $node = $nodename if !$node;
584 my $cfspath = cfs_config_path($vmid, $node);
585
586 my $conf = PVE::Cluster::cfs_read_file($cfspath);
587 die "container $vmid does not exist\n" if !defined($conf);
588
589 return $conf;
590 }
591
592 sub create_config {
593 my ($vmid, $conf) = @_;
594
595 my $dir = "/etc/pve/nodes/$nodename/lxc";
596 mkdir $dir;
597
598 write_config($vmid, $conf);
599 }
600
601 sub destroy_config {
602 my ($vmid) = @_;
603
604 unlink config_file($vmid, $nodename);
605 }
606
607 sub write_config {
608 my ($vmid, $conf) = @_;
609
610 my $cfspath = cfs_config_path($vmid);
611
612 PVE::Cluster::cfs_write_file($cfspath, $conf);
613 }
614
615 # flock: we use one file handle per process, so lock file
616 # can be called multiple times and will succeed for the same process.
617
618 my $lock_handles = {};
619 my $lockdir = "/run/lock/lxc";
620
621 sub lock_filename {
622 my ($vmid) = @_;
623
624 return "$lockdir/pve-config-${vmid}.lock";
625 }
626
627 sub lock_aquire {
628 my ($vmid, $timeout) = @_;
629
630 $timeout = 10 if !$timeout;
631 my $mode = LOCK_EX;
632
633 my $filename = lock_filename($vmid);
634
635 mkdir $lockdir if !-d $lockdir;
636
637 my $lock_func = sub {
638 if (!$lock_handles->{$$}->{$filename}) {
639 my $fh = new IO::File(">>$filename") ||
640 die "can't open file - $!\n";
641 $lock_handles->{$$}->{$filename} = { fh => $fh, refcount => 0};
642 }
643
644 if (!flock($lock_handles->{$$}->{$filename}->{fh}, $mode |LOCK_NB)) {
645 print STDERR "trying to aquire lock...";
646 my $success;
647 while(1) {
648 $success = flock($lock_handles->{$$}->{$filename}->{fh}, $mode);
649 # try again on EINTR (see bug #273)
650 if ($success || ($! != EINTR)) {
651 last;
652 }
653 }
654 if (!$success) {
655 print STDERR " failed\n";
656 die "can't aquire lock - $!\n";
657 }
658
659 print STDERR " OK\n";
660 }
661
662 $lock_handles->{$$}->{$filename}->{refcount}++;
663 };
664
665 eval { PVE::Tools::run_with_timeout($timeout, $lock_func); };
666 my $err = $@;
667 if ($err) {
668 die "can't lock file '$filename' - $err";
669 }
670 }
671
672 sub lock_release {
673 my ($vmid) = @_;
674
675 my $filename = lock_filename($vmid);
676
677 if (my $fh = $lock_handles->{$$}->{$filename}->{fh}) {
678 my $refcount = --$lock_handles->{$$}->{$filename}->{refcount};
679 if ($refcount <= 0) {
680 $lock_handles->{$$}->{$filename} = undef;
681 close ($fh);
682 }
683 }
684 }
685
686 sub lock_container {
687 my ($vmid, $timeout, $code, @param) = @_;
688
689 my $res;
690
691 lock_aquire($vmid, $timeout);
692 eval { $res = &$code(@param) };
693 my $err = $@;
694 lock_release($vmid);
695
696 die $err if $err;
697
698 return $res;
699 }
700
701 sub option_exists {
702 my ($name) = @_;
703
704 return defined($confdesc->{$name});
705 }
706
707 # add JSON properties for create and set function
708 sub json_config_properties {
709 my $prop = shift;
710
711 foreach my $opt (keys %$confdesc) {
712 next if $opt eq 'parent' || $opt eq 'snaptime';
713 next if $prop->{$opt};
714 $prop->{$opt} = $confdesc->{$opt};
715 }
716
717 return $prop;
718 }
719
720 sub json_config_properties_no_rootfs {
721 my $prop = shift;
722
723 foreach my $opt (keys %$confdesc) {
724 next if $prop->{$opt};
725 next if $opt eq 'parent' || $opt eq 'snaptime' || $opt eq 'rootfs';
726 $prop->{$opt} = $confdesc->{$opt};
727 }
728
729 return $prop;
730 }
731
732 # container status helpers
733
734 sub list_active_containers {
735
736 my $filename = "/proc/net/unix";
737
738 # similar test is used by lcxcontainers.c: list_active_containers
739 my $res = {};
740
741 my $fh = IO::File->new ($filename, "r");
742 return $res if !$fh;
743
744 while (defined(my $line = <$fh>)) {
745 if ($line =~ m/^[a-f0-9]+:\s\S+\s\S+\s\S+\s\S+\s\S+\s\d+\s(\S+)$/) {
746 my $path = $1;
747 if ($path =~ m!^@/var/lib/lxc/(\d+)/command$!) {
748 $res->{$1} = 1;
749 }
750 }
751 }
752
753 close($fh);
754
755 return $res;
756 }
757
758 # warning: this is slow
759 sub check_running {
760 my ($vmid) = @_;
761
762 my $active_hash = list_active_containers();
763
764 return 1 if defined($active_hash->{$vmid});
765
766 return undef;
767 }
768
769 sub get_container_disk_usage {
770 my ($vmid, $pid) = @_;
771
772 return PVE::Tools::df("/proc/$pid/root/", 1);
773 }
774
775 my $last_proc_vmid_stat;
776
777 my $parse_cpuacct_stat = sub {
778 my ($vmid) = @_;
779
780 my $raw = read_cgroup_value('cpuacct', $vmid, 'cpuacct.stat', 1);
781
782 my $stat = {};
783
784 if ($raw =~ m/^user (\d+)\nsystem (\d+)\n/) {
785
786 $stat->{utime} = $1;
787 $stat->{stime} = $2;
788
789 }
790
791 return $stat;
792 };
793
794 sub vmstatus {
795 my ($opt_vmid) = @_;
796
797 my $list = $opt_vmid ? { $opt_vmid => { type => 'lxc' }} : config_list();
798
799 my $active_hash = list_active_containers();
800
801 my $cpucount = $cpuinfo->{cpus} || 1;
802
803 my $cdtime = gettimeofday;
804
805 my $uptime = (PVE::ProcFSTools::read_proc_uptime(1))[0];
806
807 foreach my $vmid (keys %$list) {
808 my $d = $list->{$vmid};
809
810 eval { $d->{pid} = find_lxc_pid($vmid) if defined($active_hash->{$vmid}); };
811 warn $@ if $@; # ignore errors (consider them stopped)
812
813 $d->{status} = $d->{pid} ? 'running' : 'stopped';
814
815 my $cfspath = cfs_config_path($vmid);
816 my $conf = PVE::Cluster::cfs_read_file($cfspath) || {};
817
818 $d->{name} = $conf->{'hostname'} || "CT$vmid";
819 $d->{name} =~ s/[\s]//g;
820
821 $d->{cpus} = $conf->{cpulimit} || $cpucount;
822
823 if ($d->{pid}) {
824 my $res = get_container_disk_usage($vmid, $d->{pid});
825 $d->{disk} = $res->{used};
826 $d->{maxdisk} = $res->{total};
827 } else {
828 $d->{disk} = 0;
829 # use 4GB by default ??
830 if (my $rootfs = $conf->{rootfs}) {
831 my $rootinfo = parse_ct_rootfs($rootfs);
832 $d->{maxdisk} = int(($rootinfo->{size} || 4)*1024*1024)*1024;
833 } else {
834 $d->{maxdisk} = 4*1024*1024*1024;
835 }
836 }
837
838 $d->{mem} = 0;
839 $d->{swap} = 0;
840 $d->{maxmem} = ($conf->{memory}||512)*1024*1024;
841 $d->{maxswap} = ($conf->{swap}//0)*1024*1024;
842
843 $d->{uptime} = 0;
844 $d->{cpu} = 0;
845
846 $d->{netout} = 0;
847 $d->{netin} = 0;
848
849 $d->{diskread} = 0;
850 $d->{diskwrite} = 0;
851
852 $d->{template} = is_template($conf);
853 }
854
855 foreach my $vmid (keys %$list) {
856 my $d = $list->{$vmid};
857 my $pid = $d->{pid};
858
859 next if !$pid; # skip stopped CTs
860
861 my $ctime = (stat("/proc/$pid"))[10]; # 10 = ctime
862 $d->{uptime} = time - $ctime; # the method lxcfs uses
863
864 $d->{mem} = read_cgroup_value('memory', $vmid, 'memory.usage_in_bytes');
865 $d->{swap} = read_cgroup_value('memory', $vmid, 'memory.memsw.usage_in_bytes') - $d->{mem};
866
867 my $blkio_bytes = read_cgroup_value('blkio', $vmid, 'blkio.throttle.io_service_bytes', 1);
868 my @bytes = split(/\n/, $blkio_bytes);
869 foreach my $byte (@bytes) {
870 if (my ($key, $value) = $byte =~ /(Read|Write)\s+(\d+)/) {
871 $d->{diskread} = $2 if $key eq 'Read';
872 $d->{diskwrite} = $2 if $key eq 'Write';
873 }
874 }
875
876 my $pstat = &$parse_cpuacct_stat($vmid);
877
878 my $used = $pstat->{utime} + $pstat->{stime};
879
880 my $old = $last_proc_vmid_stat->{$vmid};
881 if (!$old) {
882 $last_proc_vmid_stat->{$vmid} = {
883 time => $cdtime,
884 used => $used,
885 cpu => 0,
886 };
887 next;
888 }
889
890 my $dtime = ($cdtime - $old->{time}) * $cpucount * $cpuinfo->{user_hz};
891
892 if ($dtime > 1000) {
893 my $dutime = $used - $old->{used};
894
895 $d->{cpu} = (($dutime/$dtime)* $cpucount) / $d->{cpus};
896 $last_proc_vmid_stat->{$vmid} = {
897 time => $cdtime,
898 used => $used,
899 cpu => $d->{cpu},
900 };
901 } else {
902 $d->{cpu} = $old->{cpu};
903 }
904 }
905
906 my $netdev = PVE::ProcFSTools::read_proc_net_dev();
907
908 foreach my $dev (keys %$netdev) {
909 next if $dev !~ m/^veth([1-9]\d*)i/;
910 my $vmid = $1;
911 my $d = $list->{$vmid};
912
913 next if !$d;
914
915 $d->{netout} += $netdev->{$dev}->{receive};
916 $d->{netin} += $netdev->{$dev}->{transmit};
917
918 }
919
920 return $list;
921 }
922
923 sub classify_mountpoint {
924 my ($vol) = @_;
925 if ($vol =~ m!^/!) {
926 return 'device' if $vol =~ m!^/dev/!;
927 return 'bind';
928 }
929 return 'volume';
930 }
931
932 my $parse_ct_mountpoint_full = sub {
933 my ($desc, $data, $noerr) = @_;
934
935 $data //= '';
936
937 my $res;
938 eval { $res = PVE::JSONSchema::parse_property_string($desc, $data) };
939 if ($@) {
940 return undef if $noerr;
941 die $@;
942 }
943
944 if (defined(my $size = $res->{size})) {
945 $size = PVE::JSONSchema::parse_size($size);
946 if (!defined($size)) {
947 return undef if $noerr;
948 die "invalid size: $size\n";
949 }
950 $res->{size} = $size;
951 }
952
953 $res->{type} = classify_mountpoint($res->{volume});
954
955 return $res;
956 };
957
958 sub parse_ct_rootfs {
959 my ($data, $noerr) = @_;
960
961 my $res = &$parse_ct_mountpoint_full($rootfs_desc, $data, $noerr);
962
963 $res->{mp} = '/' if defined($res);
964
965 return $res;
966 }
967
968 sub parse_ct_mountpoint {
969 my ($data, $noerr) = @_;
970
971 return &$parse_ct_mountpoint_full($mp_desc, $data, $noerr);
972 }
973
974 sub print_ct_mountpoint {
975 my ($info, $nomp) = @_;
976 my $skip = [ 'type' ];
977 push @$skip, 'mp' if $nomp;
978 return PVE::JSONSchema::print_property_string($info, $mp_desc, $skip);
979 }
980
981 sub print_lxc_network {
982 my $net = shift;
983 return PVE::JSONSchema::print_property_string($net, $netconf_desc);
984 }
985
986 sub parse_lxc_network {
987 my ($data) = @_;
988
989 my $res = {};
990
991 return $res if !$data;
992
993 $res = PVE::JSONSchema::parse_property_string($netconf_desc, $data);
994
995 $res->{type} = 'veth';
996 $res->{hwaddr} = PVE::Tools::random_ether_addr() if !$res->{hwaddr};
997
998 return $res;
999 }
1000
1001 sub read_cgroup_value {
1002 my ($group, $vmid, $name, $full) = @_;
1003
1004 my $path = "/sys/fs/cgroup/$group/lxc/$vmid/$name";
1005
1006 return PVE::Tools::file_get_contents($path) if $full;
1007
1008 return PVE::Tools::file_read_firstline($path);
1009 }
1010
1011 sub write_cgroup_value {
1012 my ($group, $vmid, $name, $value) = @_;
1013
1014 my $path = "/sys/fs/cgroup/$group/lxc/$vmid/$name";
1015 PVE::ProcFSTools::write_proc_entry($path, $value) if -e $path;
1016
1017 }
1018
1019 sub find_lxc_console_pids {
1020
1021 my $res = {};
1022
1023 PVE::Tools::dir_glob_foreach('/proc', '\d+', sub {
1024 my ($pid) = @_;
1025
1026 my $cmdline = PVE::Tools::file_read_firstline("/proc/$pid/cmdline");
1027 return if !$cmdline;
1028
1029 my @args = split(/\0/, $cmdline);
1030
1031 # search for lxc-console -n <vmid>
1032 return if scalar(@args) != 3;
1033 return if $args[1] ne '-n';
1034 return if $args[2] !~ m/^\d+$/;
1035 return if $args[0] !~ m|^(/usr/bin/)?lxc-console$|;
1036
1037 my $vmid = $args[2];
1038
1039 push @{$res->{$vmid}}, $pid;
1040 });
1041
1042 return $res;
1043 }
1044
1045 sub find_lxc_pid {
1046 my ($vmid) = @_;
1047
1048 my $pid = undef;
1049 my $parser = sub {
1050 my $line = shift;
1051 $pid = $1 if $line =~ m/^PID:\s+(\d+)$/;
1052 };
1053 PVE::Tools::run_command(['lxc-info', '-n', $vmid, '-p'], outfunc => $parser);
1054
1055 die "unable to get PID for CT $vmid (not running?)\n" if !$pid;
1056
1057 return $pid;
1058 }
1059
1060 # Note: we cannot use Net:IP, because that only allows strict
1061 # CIDR networks
1062 sub parse_ipv4_cidr {
1063 my ($cidr, $noerr) = @_;
1064
1065 if ($cidr =~ m!^($IPV4RE)(?:/(\d+))$! && ($2 > 7) && ($2 <= 32)) {
1066 return { address => $1, netmask => $PVE::Network::ipv4_reverse_mask->[$2] };
1067 }
1068
1069 return undef if $noerr;
1070
1071 die "unable to parse ipv4 address/mask\n";
1072 }
1073
1074 sub check_lock {
1075 my ($conf) = @_;
1076
1077 die "VM is locked ($conf->{'lock'})\n" if $conf->{'lock'};
1078 }
1079
1080 sub check_protection {
1081 my ($vm_conf, $err_msg) = @_;
1082
1083 if ($vm_conf->{protection}) {
1084 die "$err_msg - protection mode enabled\n";
1085 }
1086 }
1087
1088 sub update_lxc_config {
1089 my ($storage_cfg, $vmid, $conf) = @_;
1090
1091 my $dir = "/var/lib/lxc/$vmid";
1092
1093 if ($conf->{template}) {
1094
1095 unlink "$dir/config";
1096
1097 return;
1098 }
1099
1100 my $raw = '';
1101
1102 die "missing 'arch' - internal error" if !$conf->{arch};
1103 $raw .= "lxc.arch = $conf->{arch}\n";
1104
1105 my $unprivileged = $conf->{unprivileged};
1106 my $custom_idmap = grep { $_->[0] eq 'lxc.id_map' } @{$conf->{lxc}};
1107
1108 my $ostype = $conf->{ostype} || die "missing 'ostype' - internal error";
1109 if ($ostype =~ /^(?:debian | ubuntu | centos | fedora | opensuse | archlinux)$/x) {
1110 $raw .= "lxc.include = /usr/share/lxc/config/$ostype.common.conf\n";
1111 if ($unprivileged || $custom_idmap) {
1112 $raw .= "lxc.include = /usr/share/lxc/config/$ostype.userns.conf\n"
1113 }
1114 } else {
1115 die "implement me (ostype $ostype)";
1116 }
1117
1118 $raw .= "lxc.monitor.unshare = 1\n";
1119
1120 # Should we read them from /etc/subuid?
1121 if ($unprivileged && !$custom_idmap) {
1122 $raw .= "lxc.id_map = u 0 100000 65536\n";
1123 $raw .= "lxc.id_map = g 0 100000 65536\n";
1124 }
1125
1126 if (!has_dev_console($conf)) {
1127 $raw .= "lxc.console = none\n";
1128 $raw .= "lxc.cgroup.devices.deny = c 5:1 rwm\n";
1129 }
1130
1131 my $ttycount = get_tty_count($conf);
1132 $raw .= "lxc.tty = $ttycount\n";
1133
1134 # some init scripts expect a linux terminal (turnkey).
1135 $raw .= "lxc.environment = TERM=linux\n";
1136
1137 my $utsname = $conf->{hostname} || "CT$vmid";
1138 $raw .= "lxc.utsname = $utsname\n";
1139
1140 my $memory = $conf->{memory} || 512;
1141 my $swap = $conf->{swap} // 0;
1142
1143 my $lxcmem = int($memory*1024*1024);
1144 $raw .= "lxc.cgroup.memory.limit_in_bytes = $lxcmem\n";
1145
1146 my $lxcswap = int(($memory + $swap)*1024*1024);
1147 $raw .= "lxc.cgroup.memory.memsw.limit_in_bytes = $lxcswap\n";
1148
1149 if (my $cpulimit = $conf->{cpulimit}) {
1150 $raw .= "lxc.cgroup.cpu.cfs_period_us = 100000\n";
1151 my $value = int(100000*$cpulimit);
1152 $raw .= "lxc.cgroup.cpu.cfs_quota_us = $value\n";
1153 }
1154
1155 my $shares = $conf->{cpuunits} || 1024;
1156 $raw .= "lxc.cgroup.cpu.shares = $shares\n";
1157
1158 my $mountpoint = parse_ct_rootfs($conf->{rootfs});
1159
1160 $raw .= "lxc.rootfs = $dir/rootfs\n";
1161
1162 my $netcount = 0;
1163 foreach my $k (keys %$conf) {
1164 next if $k !~ m/^net(\d+)$/;
1165 my $ind = $1;
1166 my $d = parse_lxc_network($conf->{$k});
1167 $netcount++;
1168 $raw .= "lxc.network.type = veth\n";
1169 $raw .= "lxc.network.veth.pair = veth${vmid}i${ind}\n";
1170 $raw .= "lxc.network.hwaddr = $d->{hwaddr}\n" if defined($d->{hwaddr});
1171 $raw .= "lxc.network.name = $d->{name}\n" if defined($d->{name});
1172 $raw .= "lxc.network.mtu = $d->{mtu}\n" if defined($d->{mtu});
1173 }
1174
1175 if (my $lxcconf = $conf->{lxc}) {
1176 foreach my $entry (@$lxcconf) {
1177 my ($k, $v) = @$entry;
1178 $netcount++ if $k eq 'lxc.network.type';
1179 $raw .= "$k = $v\n";
1180 }
1181 }
1182
1183 $raw .= "lxc.network.type = empty\n" if !$netcount;
1184
1185 File::Path::mkpath("$dir/rootfs");
1186
1187 PVE::Tools::file_set_contents("$dir/config", $raw);
1188 }
1189
1190 # verify and cleanup nameserver list (replace \0 with ' ')
1191 sub verify_nameserver_list {
1192 my ($nameserver_list) = @_;
1193
1194 my @list = ();
1195 foreach my $server (PVE::Tools::split_list($nameserver_list)) {
1196 PVE::JSONSchema::pve_verify_ip($server);
1197 push @list, $server;
1198 }
1199
1200 return join(' ', @list);
1201 }
1202
1203 sub verify_searchdomain_list {
1204 my ($searchdomain_list) = @_;
1205
1206 my @list = ();
1207 foreach my $server (PVE::Tools::split_list($searchdomain_list)) {
1208 # todo: should we add checks for valid dns domains?
1209 push @list, $server;
1210 }
1211
1212 return join(' ', @list);
1213 }
1214
1215 sub add_unused_volume {
1216 my ($config, $volid) = @_;
1217
1218 my $key;
1219 for (my $ind = $MAX_UNUSED_DISKS - 1; $ind >= 0; $ind--) {
1220 my $test = "unused$ind";
1221 if (my $vid = $config->{$test}) {
1222 return if $vid eq $volid; # do not add duplicates
1223 } else {
1224 $key = $test;
1225 }
1226 }
1227
1228 die "Too many unused volumes - please delete them first.\n" if !$key;
1229
1230 $config->{$key} = $volid;
1231
1232 return $key;
1233 }
1234
1235 sub update_pct_config {
1236 my ($vmid, $conf, $running, $param, $delete) = @_;
1237
1238 my @nohotplug;
1239
1240 my $new_disks = 0;
1241 my @deleted_volumes;
1242
1243 my $rootdir;
1244 if ($running) {
1245 my $pid = find_lxc_pid($vmid);
1246 $rootdir = "/proc/$pid/root";
1247 }
1248
1249 my $hotplug_error = sub {
1250 if ($running) {
1251 push @nohotplug, @_;
1252 return 1;
1253 } else {
1254 return 0;
1255 }
1256 };
1257
1258 if (defined($delete)) {
1259 foreach my $opt (@$delete) {
1260 if (!exists($conf->{$opt})) {
1261 warn "no such option: $opt\n";
1262 next;
1263 }
1264
1265 if ($opt eq 'hostname' || $opt eq 'memory' || $opt eq 'rootfs') {
1266 die "unable to delete required option '$opt'\n";
1267 } elsif ($opt eq 'swap') {
1268 delete $conf->{$opt};
1269 write_cgroup_value("memory", $vmid, "memory.memsw.limit_in_bytes", -1);
1270 } elsif ($opt eq 'description' || $opt eq 'onboot' || $opt eq 'startup') {
1271 delete $conf->{$opt};
1272 } elsif ($opt eq 'nameserver' || $opt eq 'searchdomain' ||
1273 $opt eq 'tty' || $opt eq 'console' || $opt eq 'cmode') {
1274 next if $hotplug_error->($opt);
1275 delete $conf->{$opt};
1276 } elsif ($opt =~ m/^net(\d)$/) {
1277 delete $conf->{$opt};
1278 next if !$running;
1279 my $netid = $1;
1280 PVE::Network::veth_delete("veth${vmid}i$netid");
1281 } elsif ($opt eq 'protection') {
1282 delete $conf->{$opt};
1283 } elsif ($opt =~ m/^unused(\d+)$/) {
1284 next if $hotplug_error->($opt);
1285 check_protection($conf, "can't remove CT $vmid drive '$opt'");
1286 push @deleted_volumes, $conf->{$opt};
1287 delete $conf->{$opt};
1288 } elsif ($opt =~ m/^mp(\d+)$/) {
1289 next if $hotplug_error->($opt);
1290 check_protection($conf, "can't remove CT $vmid drive '$opt'");
1291 my $mountpoint = parse_ct_mountpoint($conf->{$opt});
1292 if ($mountpoint->{type} eq 'volume') {
1293 add_unused_volume($conf, $mountpoint->{volume})
1294 }
1295 delete $conf->{$opt};
1296 } elsif ($opt eq 'unprivileged') {
1297 die "unable to delete read-only option: '$opt'\n";
1298 } else {
1299 die "implement me (delete: $opt)"
1300 }
1301 write_config($vmid, $conf) if $running;
1302 }
1303 }
1304
1305 # There's no separate swap size to configure, there's memory and "total"
1306 # memory (iow. memory+swap). This means we have to change them together.
1307 my $wanted_memory = PVE::Tools::extract_param($param, 'memory');
1308 my $wanted_swap = PVE::Tools::extract_param($param, 'swap');
1309 if (defined($wanted_memory) || defined($wanted_swap)) {
1310
1311 my $old_memory = ($conf->{memory} || 512);
1312 my $old_swap = ($conf->{swap} || 0);
1313
1314 $wanted_memory //= $old_memory;
1315 $wanted_swap //= $old_swap;
1316
1317 my $total = $wanted_memory + $wanted_swap;
1318 if ($running) {
1319 my $old_total = $old_memory + $old_swap;
1320 if ($total > $old_total) {
1321 write_cgroup_value("memory", $vmid, "memory.memsw.limit_in_bytes", int($total*1024*1024));
1322 write_cgroup_value("memory", $vmid, "memory.limit_in_bytes", int($wanted_memory*1024*1024));
1323 } else {
1324 write_cgroup_value("memory", $vmid, "memory.limit_in_bytes", int($wanted_memory*1024*1024));
1325 write_cgroup_value("memory", $vmid, "memory.memsw.limit_in_bytes", int($total*1024*1024));
1326 }
1327 }
1328 $conf->{memory} = $wanted_memory;
1329 $conf->{swap} = $wanted_swap;
1330
1331 write_config($vmid, $conf) if $running;
1332 }
1333
1334 foreach my $opt (keys %$param) {
1335 my $value = $param->{$opt};
1336 if ($opt eq 'hostname') {
1337 $conf->{$opt} = $value;
1338 } elsif ($opt eq 'onboot') {
1339 $conf->{$opt} = $value ? 1 : 0;
1340 } elsif ($opt eq 'startup') {
1341 $conf->{$opt} = $value;
1342 } elsif ($opt eq 'tty' || $opt eq 'console' || $opt eq 'cmode') {
1343 next if $hotplug_error->($opt);
1344 $conf->{$opt} = $value;
1345 } elsif ($opt eq 'nameserver') {
1346 next if $hotplug_error->($opt);
1347 my $list = verify_nameserver_list($value);
1348 $conf->{$opt} = $list;
1349 } elsif ($opt eq 'searchdomain') {
1350 next if $hotplug_error->($opt);
1351 my $list = verify_searchdomain_list($value);
1352 $conf->{$opt} = $list;
1353 } elsif ($opt eq 'cpulimit') {
1354 next if $hotplug_error->($opt); # FIXME: hotplug
1355 $conf->{$opt} = $value;
1356 } elsif ($opt eq 'cpuunits') {
1357 $conf->{$opt} = $value;
1358 write_cgroup_value("cpu", $vmid, "cpu.shares", $value);
1359 } elsif ($opt eq 'description') {
1360 $conf->{$opt} = PVE::Tools::encode_text($value);
1361 } elsif ($opt =~ m/^net(\d+)$/) {
1362 my $netid = $1;
1363 my $net = parse_lxc_network($value);
1364 if (!$running) {
1365 $conf->{$opt} = print_lxc_network($net);
1366 } else {
1367 update_net($vmid, $conf, $opt, $net, $netid, $rootdir);
1368 }
1369 } elsif ($opt eq 'protection') {
1370 $conf->{$opt} = $value ? 1 : 0;
1371 } elsif ($opt =~ m/^mp(\d+)$/) {
1372 next if $hotplug_error->($opt);
1373 check_protection($conf, "can't update CT $vmid drive '$opt'");
1374 $conf->{$opt} = $value;
1375 $new_disks = 1;
1376 } elsif ($opt eq 'rootfs') {
1377 check_protection($conf, "can't update CT $vmid drive '$opt'");
1378 die "implement me: $opt";
1379 } elsif ($opt eq 'unprivileged') {
1380 die "unable to modify read-only option: '$opt'\n";
1381 } else {
1382 die "implement me: $opt";
1383 }
1384 write_config($vmid, $conf) if $running;
1385 }
1386
1387 if (@deleted_volumes) {
1388 my $storage_cfg = PVE::Storage::config();
1389 foreach my $volume (@deleted_volumes) {
1390 delete_mountpoint_volume($storage_cfg, $vmid, $volume);
1391 }
1392 }
1393
1394 if ($new_disks) {
1395 my $storage_cfg = PVE::Storage::config();
1396 create_disks($storage_cfg, $vmid, $conf, $conf);
1397 }
1398
1399 # This should be the last thing we do here
1400 if ($running && scalar(@nohotplug)) {
1401 die "unable to modify " . join(',', @nohotplug) . " while container is running\n";
1402 }
1403 }
1404
1405 sub has_dev_console {
1406 my ($conf) = @_;
1407
1408 return !(defined($conf->{console}) && !$conf->{console});
1409 }
1410
1411 sub get_tty_count {
1412 my ($conf) = @_;
1413
1414 return $conf->{tty} // $confdesc->{tty}->{default};
1415 }
1416
1417 sub get_cmode {
1418 my ($conf) = @_;
1419
1420 return $conf->{cmode} // $confdesc->{cmode}->{default};
1421 }
1422
1423 sub get_console_command {
1424 my ($vmid, $conf) = @_;
1425
1426 my $cmode = get_cmode($conf);
1427
1428 if ($cmode eq 'console') {
1429 return ['lxc-console', '-n', $vmid, '-t', 0];
1430 } elsif ($cmode eq 'tty') {
1431 return ['lxc-console', '-n', $vmid];
1432 } elsif ($cmode eq 'shell') {
1433 return ['lxc-attach', '--clear-env', '-n', $vmid];
1434 } else {
1435 die "internal error";
1436 }
1437 }
1438
1439 sub get_primary_ips {
1440 my ($conf) = @_;
1441
1442 # return data from net0
1443
1444 return undef if !defined($conf->{net0});
1445 my $net = parse_lxc_network($conf->{net0});
1446
1447 my $ipv4 = $net->{ip};
1448 if ($ipv4) {
1449 if ($ipv4 =~ /^(dhcp|manual)$/) {
1450 $ipv4 = undef
1451 } else {
1452 $ipv4 =~ s!/\d+$!!;
1453 }
1454 }
1455 my $ipv6 = $net->{ip6};
1456 if ($ipv6) {
1457 if ($ipv6 =~ /^(auto|dhcp|manual)$/) {
1458 $ipv6 = undef;
1459 } else {
1460 $ipv6 =~ s!/\d+$!!;
1461 }
1462 }
1463
1464 return ($ipv4, $ipv6);
1465 }
1466
1467 sub delete_mountpoint_volume {
1468 my ($storage_cfg, $vmid, $volume) = @_;
1469
1470 return if classify_mountpoint($volume) ne 'volume';
1471
1472 my ($vtype, $name, $owner) = PVE::Storage::parse_volname($storage_cfg, $volume);
1473 PVE::Storage::vdisk_free($storage_cfg, $volume) if $vmid == $owner;
1474 }
1475
1476 sub destroy_lxc_container {
1477 my ($storage_cfg, $vmid, $conf) = @_;
1478
1479 foreach_mountpoint($conf, sub {
1480 my ($ms, $mountpoint) = @_;
1481 delete_mountpoint_volume($storage_cfg, $vmid, $mountpoint->{volume});
1482 });
1483
1484 rmdir "/var/lib/lxc/$vmid/rootfs";
1485 unlink "/var/lib/lxc/$vmid/config";
1486 rmdir "/var/lib/lxc/$vmid";
1487 destroy_config($vmid);
1488
1489 #my $cmd = ['lxc-destroy', '-n', $vmid ];
1490 #PVE::Tools::run_command($cmd);
1491 }
1492
1493 sub vm_stop_cleanup {
1494 my ($storage_cfg, $vmid, $conf, $keepActive) = @_;
1495
1496 eval {
1497 if (!$keepActive) {
1498
1499 my $vollist = get_vm_volumes($conf);
1500 PVE::Storage::deactivate_volumes($storage_cfg, $vollist);
1501 }
1502 };
1503 warn $@ if $@; # avoid errors - just warn
1504 }
1505
1506 my $safe_num_ne = sub {
1507 my ($a, $b) = @_;
1508
1509 return 0 if !defined($a) && !defined($b);
1510 return 1 if !defined($a);
1511 return 1 if !defined($b);
1512
1513 return $a != $b;
1514 };
1515
1516 my $safe_string_ne = sub {
1517 my ($a, $b) = @_;
1518
1519 return 0 if !defined($a) && !defined($b);
1520 return 1 if !defined($a);
1521 return 1 if !defined($b);
1522
1523 return $a ne $b;
1524 };
1525
1526 sub update_net {
1527 my ($vmid, $conf, $opt, $newnet, $netid, $rootdir) = @_;
1528
1529 if ($newnet->{type} ne 'veth') {
1530 # for when there are physical interfaces
1531 die "cannot update interface of type $newnet->{type}";
1532 }
1533
1534 my $veth = "veth${vmid}i${netid}";
1535 my $eth = $newnet->{name};
1536
1537 if (my $oldnetcfg = $conf->{$opt}) {
1538 my $oldnet = parse_lxc_network($oldnetcfg);
1539
1540 if (&$safe_string_ne($oldnet->{hwaddr}, $newnet->{hwaddr}) ||
1541 &$safe_string_ne($oldnet->{name}, $newnet->{name})) {
1542
1543 PVE::Network::veth_delete($veth);
1544 delete $conf->{$opt};
1545 write_config($vmid, $conf);
1546
1547 hotplug_net($vmid, $conf, $opt, $newnet, $netid);
1548
1549 } elsif (&$safe_string_ne($oldnet->{bridge}, $newnet->{bridge}) ||
1550 &$safe_num_ne($oldnet->{tag}, $newnet->{tag}) ||
1551 &$safe_num_ne($oldnet->{firewall}, $newnet->{firewall})) {
1552
1553 if ($oldnet->{bridge}) {
1554 PVE::Network::tap_unplug($veth);
1555 foreach (qw(bridge tag firewall)) {
1556 delete $oldnet->{$_};
1557 }
1558 $conf->{$opt} = print_lxc_network($oldnet);
1559 write_config($vmid, $conf);
1560 }
1561
1562 PVE::Network::tap_plug($veth, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks});
1563 foreach (qw(bridge tag firewall)) {
1564 $oldnet->{$_} = $newnet->{$_} if $newnet->{$_};
1565 }
1566 $conf->{$opt} = print_lxc_network($oldnet);
1567 write_config($vmid, $conf);
1568 }
1569 } else {
1570 hotplug_net($vmid, $conf, $opt, $newnet, $netid);
1571 }
1572
1573 update_ipconfig($vmid, $conf, $opt, $eth, $newnet, $rootdir);
1574 }
1575
1576 sub hotplug_net {
1577 my ($vmid, $conf, $opt, $newnet, $netid) = @_;
1578
1579 my $veth = "veth${vmid}i${netid}";
1580 my $vethpeer = $veth . "p";
1581 my $eth = $newnet->{name};
1582
1583 PVE::Network::veth_create($veth, $vethpeer, $newnet->{bridge}, $newnet->{hwaddr});
1584 PVE::Network::tap_plug($veth, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks});
1585
1586 # attach peer in container
1587 my $cmd = ['lxc-device', '-n', $vmid, 'add', $vethpeer, "$eth" ];
1588 PVE::Tools::run_command($cmd);
1589
1590 # link up peer in container
1591 $cmd = ['lxc-attach', '-n', $vmid, '-s', 'NETWORK', '--', '/sbin/ip', 'link', 'set', $eth ,'up' ];
1592 PVE::Tools::run_command($cmd);
1593
1594 my $done = { type => 'veth' };
1595 foreach (qw(bridge tag firewall hwaddr name)) {
1596 $done->{$_} = $newnet->{$_} if $newnet->{$_};
1597 }
1598 $conf->{$opt} = print_lxc_network($done);
1599
1600 write_config($vmid, $conf);
1601 }
1602
1603 sub update_ipconfig {
1604 my ($vmid, $conf, $opt, $eth, $newnet, $rootdir) = @_;
1605
1606 my $lxc_setup = PVE::LXC::Setup->new($conf, $rootdir);
1607
1608 my $optdata = parse_lxc_network($conf->{$opt});
1609 my $deleted = [];
1610 my $added = [];
1611 my $nscmd = sub {
1612 my $cmdargs = shift;
1613 PVE::Tools::run_command(['lxc-attach', '-n', $vmid, '-s', 'NETWORK', '--', @_], %$cmdargs);
1614 };
1615 my $ipcmd = sub { &$nscmd({}, '/sbin/ip', @_) };
1616
1617 my $change_ip_config = sub {
1618 my ($ipversion) = @_;
1619
1620 my $family_opt = "-$ipversion";
1621 my $suffix = $ipversion == 4 ? '' : $ipversion;
1622 my $gw= "gw$suffix";
1623 my $ip= "ip$suffix";
1624
1625 my $newip = $newnet->{$ip};
1626 my $newgw = $newnet->{$gw};
1627 my $oldip = $optdata->{$ip};
1628
1629 my $change_ip = &$safe_string_ne($oldip, $newip);
1630 my $change_gw = &$safe_string_ne($optdata->{$gw}, $newgw);
1631
1632 return if !$change_ip && !$change_gw;
1633
1634 # step 1: add new IP, if this fails we cancel
1635 my $is_real_ip = ($newip && $newip !~ /^(?:auto|dhcp|manual)$/);
1636 if ($change_ip && $is_real_ip) {
1637 eval { &$ipcmd($family_opt, 'addr', 'add', $newip, 'dev', $eth); };
1638 if (my $err = $@) {
1639 warn $err;
1640 return;
1641 }
1642 }
1643
1644 # step 2: replace gateway
1645 # If this fails we delete the added IP and cancel.
1646 # If it succeeds we save the config and delete the old IP, ignoring
1647 # errors. The config is then saved.
1648 # Note: 'ip route replace' can add
1649 if ($change_gw) {
1650 if ($newgw) {
1651 eval {
1652 if ($is_real_ip && !PVE::Network::is_ip_in_cidr($newgw, $newip, $ipversion)) {
1653 &$ipcmd($family_opt, 'route', 'add', $newgw, 'dev', $eth);
1654 }
1655 &$ipcmd($family_opt, 'route', 'replace', 'default', 'via', $newgw);
1656 };
1657 if (my $err = $@) {
1658 warn $err;
1659 # the route was not replaced, the old IP is still available
1660 # rollback (delete new IP) and cancel
1661 if ($change_ip) {
1662 eval { &$ipcmd($family_opt, 'addr', 'del', $newip, 'dev', $eth); };
1663 warn $@ if $@; # no need to die here
1664 }
1665 return;
1666 }
1667 } else {
1668 eval { &$ipcmd($family_opt, 'route', 'del', 'default'); };
1669 # if the route was not deleted, the guest might have deleted it manually
1670 # warn and continue
1671 warn $@ if $@;
1672 }
1673 }
1674
1675 # from this point on we save the configuration
1676 # step 3: delete old IP ignoring errors
1677 if ($change_ip && $oldip && $oldip !~ /^(?:auto|dhcp)$/) {
1678 # We need to enable promote_secondaries, otherwise our newly added
1679 # address will be removed along with the old one.
1680 my $promote = 0;
1681 eval {
1682 if ($ipversion == 4) {
1683 &$nscmd({ outfunc => sub { $promote = int(shift) } },
1684 'cat', "/proc/sys/net/ipv4/conf/$eth/promote_secondaries");
1685 &$nscmd({}, 'sysctl', "net.ipv4.conf.$eth.promote_secondaries=1");
1686 }
1687 &$ipcmd($family_opt, 'addr', 'del', $oldip, 'dev', $eth);
1688 };
1689 warn $@ if $@; # no need to die here
1690
1691 if ($ipversion == 4) {
1692 &$nscmd({}, 'sysctl', "net.ipv4.conf.$eth.promote_secondaries=$promote");
1693 }
1694 }
1695
1696 foreach my $property ($ip, $gw) {
1697 if ($newnet->{$property}) {
1698 $optdata->{$property} = $newnet->{$property};
1699 } else {
1700 delete $optdata->{$property};
1701 }
1702 }
1703 $conf->{$opt} = print_lxc_network($optdata);
1704 write_config($vmid, $conf);
1705 $lxc_setup->setup_network($conf);
1706 };
1707
1708 &$change_ip_config(4);
1709 &$change_ip_config(6);
1710
1711 }
1712
1713 # Internal snapshots
1714
1715 # NOTE: Snapshot create/delete involves several non-atomic
1716 # actions, and can take a long time.
1717 # So we try to avoid locking the file and use the 'lock' variable
1718 # inside the config file instead.
1719
1720 my $snapshot_copy_config = sub {
1721 my ($source, $dest) = @_;
1722
1723 foreach my $k (keys %$source) {
1724 next if $k eq 'snapshots';
1725 next if $k eq 'snapstate';
1726 next if $k eq 'snaptime';
1727 next if $k eq 'vmstate';
1728 next if $k eq 'lock';
1729 next if $k eq 'digest';
1730 next if $k eq 'description';
1731
1732 $dest->{$k} = $source->{$k};
1733 }
1734 };
1735
1736 my $snapshot_prepare = sub {
1737 my ($vmid, $snapname, $comment) = @_;
1738
1739 my $snap;
1740
1741 my $updatefn = sub {
1742
1743 my $conf = load_config($vmid);
1744
1745 die "you can't take a snapshot if it's a template\n"
1746 if is_template($conf);
1747
1748 check_lock($conf);
1749
1750 $conf->{lock} = 'snapshot';
1751
1752 die "snapshot name '$snapname' already used\n"
1753 if defined($conf->{snapshots}->{$snapname});
1754
1755 my $storecfg = PVE::Storage::config();
1756 my $feature = $snapname eq 'vzdump' ? 'vzdump' : 'snapshot';
1757 die "snapshot feature is not available\n" if !has_feature($feature, $conf, $storecfg);
1758
1759 $snap = $conf->{snapshots}->{$snapname} = {};
1760
1761 &$snapshot_copy_config($conf, $snap);
1762
1763 $snap->{'snapstate'} = "prepare";
1764 $snap->{'snaptime'} = time();
1765 $snap->{'description'} = $comment if $comment;
1766 $conf->{snapshots}->{$snapname} = $snap;
1767
1768 write_config($vmid, $conf);
1769 };
1770
1771 lock_container($vmid, 10, $updatefn);
1772
1773 return $snap;
1774 };
1775
1776 my $snapshot_commit = sub {
1777 my ($vmid, $snapname) = @_;
1778
1779 my $updatefn = sub {
1780
1781 my $conf = load_config($vmid);
1782
1783 die "missing snapshot lock\n"
1784 if !($conf->{lock} && $conf->{lock} eq 'snapshot');
1785
1786 die "snapshot '$snapname' does not exist\n"
1787 if !defined($conf->{snapshots}->{$snapname});
1788
1789 die "wrong snapshot state\n"
1790 if !($conf->{snapshots}->{$snapname}->{'snapstate'} &&
1791 $conf->{snapshots}->{$snapname}->{'snapstate'} eq "prepare");
1792
1793 delete $conf->{snapshots}->{$snapname}->{'snapstate'};
1794 delete $conf->{lock};
1795 $conf->{parent} = $snapname;
1796
1797 write_config($vmid, $conf);
1798 };
1799
1800 lock_container($vmid, 10 ,$updatefn);
1801 };
1802
1803 sub has_feature {
1804 my ($feature, $conf, $storecfg, $snapname) = @_;
1805
1806 my $err;
1807 my $vzdump = $feature eq 'vzdump';
1808 $feature = 'snapshot' if $vzdump;
1809
1810 foreach_mountpoint($conf, sub {
1811 my ($ms, $mountpoint) = @_;
1812
1813 return if $err; # skip further test
1814 return if $vzdump && $ms ne 'rootfs' && !$mountpoint->{backup};
1815
1816 $err = 1 if !PVE::Storage::volume_has_feature($storecfg, $feature, $mountpoint->{volume}, $snapname);
1817
1818 # TODO: implement support for mountpoints
1819 die "unable to handle mountpoint '$ms' - feature not implemented\n"
1820 if $ms ne 'rootfs';
1821 });
1822
1823 return $err ? 0 : 1;
1824 }
1825
1826 sub snapshot_create {
1827 my ($vmid, $snapname, $comment) = @_;
1828
1829 my $snap = &$snapshot_prepare($vmid, $snapname, $comment);
1830
1831 my $conf = load_config($vmid);
1832
1833 my $running = check_running($vmid);
1834
1835 my $unfreeze = 0;
1836
1837 eval {
1838 if ($running) {
1839 PVE::Tools::run_command(['/usr/bin/lxc-freeze', '-n', $vmid]);
1840 $unfreeze = 1;
1841 PVE::Tools::run_command(['/bin/sync']);
1842 };
1843
1844 my $storecfg = PVE::Storage::config();
1845 my $rootinfo = parse_ct_rootfs($conf->{rootfs});
1846 my $volid = $rootinfo->{volume};
1847
1848 PVE::Storage::volume_snapshot($storecfg, $volid, $snapname);
1849 &$snapshot_commit($vmid, $snapname);
1850 };
1851 my $err = $@;
1852
1853 if ($unfreeze) {
1854 eval { PVE::Tools::run_command(['/usr/bin/lxc-unfreeze', '-n', $vmid]); };
1855 warn $@ if $@;
1856 }
1857
1858 if ($err) {
1859 snapshot_delete($vmid, $snapname, 1);
1860 die "$err\n";
1861 }
1862 }
1863
1864 sub snapshot_delete {
1865 my ($vmid, $snapname, $force) = @_;
1866
1867 my $snap;
1868
1869 my $conf;
1870
1871 my $updatefn = sub {
1872
1873 $conf = load_config($vmid);
1874
1875 die "you can't delete a snapshot if vm is a template\n"
1876 if is_template($conf);
1877
1878 $snap = $conf->{snapshots}->{$snapname};
1879
1880 check_lock($conf);
1881
1882 die "snapshot '$snapname' does not exist\n" if !defined($snap);
1883
1884 $snap->{snapstate} = 'delete';
1885
1886 write_config($vmid, $conf);
1887 };
1888
1889 lock_container($vmid, 10, $updatefn);
1890
1891 my $storecfg = PVE::Storage::config();
1892
1893 my $unlink_parent = sub {
1894
1895 my ($confref, $new_parent) = @_;
1896
1897 if ($confref->{parent} && $confref->{parent} eq $snapname) {
1898 if ($new_parent) {
1899 $confref->{parent} = $new_parent;
1900 } else {
1901 delete $confref->{parent};
1902 }
1903 }
1904 };
1905
1906 my $del_snap = sub {
1907
1908 check_lock($conf);
1909
1910 my $parent = $conf->{snapshots}->{$snapname}->{parent};
1911 foreach my $snapkey (keys %{$conf->{snapshots}}) {
1912 &$unlink_parent($conf->{snapshots}->{$snapkey}, $parent);
1913 }
1914
1915 &$unlink_parent($conf, $parent);
1916
1917 delete $conf->{snapshots}->{$snapname};
1918
1919 write_config($vmid, $conf);
1920 };
1921
1922 my $rootfs = $conf->{snapshots}->{$snapname}->{rootfs};
1923 my $rootinfo = parse_ct_rootfs($rootfs);
1924 my $volid = $rootinfo->{volume};
1925
1926 eval {
1927 PVE::Storage::volume_snapshot_delete($storecfg, $volid, $snapname);
1928 };
1929 my $err = $@;
1930
1931 if(!$err || ($err && $force)) {
1932 lock_container($vmid, 10, $del_snap);
1933 if ($err) {
1934 die "Can't delete snapshot: $vmid $snapname $err\n";
1935 }
1936 }
1937 }
1938
1939 sub snapshot_rollback {
1940 my ($vmid, $snapname) = @_;
1941
1942 my $storecfg = PVE::Storage::config();
1943
1944 my $conf = load_config($vmid);
1945
1946 die "you can't rollback if vm is a template\n" if is_template($conf);
1947
1948 my $snap = $conf->{snapshots}->{$snapname};
1949
1950 die "snapshot '$snapname' does not exist\n" if !defined($snap);
1951
1952 my $rootfs = $snap->{rootfs};
1953 my $rootinfo = parse_ct_rootfs($rootfs);
1954 my $volid = $rootinfo->{volume};
1955
1956 PVE::Storage::volume_rollback_is_possible($storecfg, $volid, $snapname);
1957
1958 my $updatefn = sub {
1959
1960 die "unable to rollback to incomplete snapshot (snapstate = $snap->{snapstate})\n"
1961 if $snap->{snapstate};
1962
1963 check_lock($conf);
1964
1965 system("lxc-stop -n $vmid --kill") if check_running($vmid);
1966
1967 die "unable to rollback vm $vmid: vm is running\n"
1968 if check_running($vmid);
1969
1970 $conf->{lock} = 'rollback';
1971
1972 my $forcemachine;
1973
1974 # copy snapshot config to current config
1975
1976 my $tmp_conf = $conf;
1977 &$snapshot_copy_config($tmp_conf->{snapshots}->{$snapname}, $conf);
1978 $conf->{snapshots} = $tmp_conf->{snapshots};
1979 delete $conf->{snaptime};
1980 delete $conf->{snapname};
1981 $conf->{parent} = $snapname;
1982
1983 write_config($vmid, $conf);
1984 };
1985
1986 my $unlockfn = sub {
1987 delete $conf->{lock};
1988 write_config($vmid, $conf);
1989 };
1990
1991 lock_container($vmid, 10, $updatefn);
1992
1993 PVE::Storage::volume_snapshot_rollback($storecfg, $volid, $snapname);
1994
1995 lock_container($vmid, 5, $unlockfn);
1996 }
1997
1998 sub template_create {
1999 my ($vmid, $conf) = @_;
2000
2001 my $storecfg = PVE::Storage::config();
2002
2003 my $rootinfo = parse_ct_rootfs($conf->{rootfs});
2004 my $volid = $rootinfo->{volume};
2005
2006 die "Template feature is not available for '$volid'\n"
2007 if !PVE::Storage::volume_has_feature($storecfg, 'template', $volid);
2008
2009 PVE::Storage::activate_volumes($storecfg, [$volid]);
2010
2011 my $template_volid = PVE::Storage::vdisk_create_base($storecfg, $volid);
2012 $rootinfo->{volume} = $template_volid;
2013 $conf->{rootfs} = print_ct_mountpoint($rootinfo, 1);
2014
2015 write_config($vmid, $conf);
2016 }
2017
2018 sub is_template {
2019 my ($conf) = @_;
2020
2021 return 1 if defined $conf->{template} && $conf->{template} == 1;
2022 }
2023
2024 sub mountpoint_names {
2025 my ($reverse) = @_;
2026
2027 my @names = ('rootfs');
2028
2029 for (my $i = 0; $i < $MAX_MOUNT_POINTS; $i++) {
2030 push @names, "mp$i";
2031 }
2032
2033 return $reverse ? reverse @names : @names;
2034 }
2035
2036 # The container might have *different* symlinks than the host. realpath/abs_path
2037 # use the actual filesystem to resolve links.
2038 sub sanitize_mountpoint {
2039 my ($mp) = @_;
2040 $mp = '/' . $mp; # we always start with a slash
2041 $mp =~ s@/{2,}@/@g; # collapse sequences of slashes
2042 $mp =~ s@/\./@@g; # collapse /./
2043 $mp =~ s@/\.(/)?$@$1@; # collapse a trailing /. or /./
2044 $mp =~ s@(.*)/[^/]+/\.\./@$1/@g; # collapse /../ without regard for symlinks
2045 $mp =~ s@/\.\.(/)?$@$1@; # collapse trailing /.. or /../ disregarding symlinks
2046 return $mp;
2047 }
2048
2049 sub foreach_mountpoint_full {
2050 my ($conf, $reverse, $func) = @_;
2051
2052 foreach my $key (mountpoint_names($reverse)) {
2053 my $value = $conf->{$key};
2054 next if !defined($value);
2055 my $mountpoint = $key eq 'rootfs' ? parse_ct_rootfs($value, 1) : parse_ct_mountpoint($value, 1);
2056 next if !defined($mountpoint);
2057
2058 $mountpoint->{mp} = sanitize_mountpoint($mountpoint->{mp});
2059
2060 my $path = $mountpoint->{volume};
2061 $mountpoint->{volume} = sanitize_mountpoint($path) if $path =~ m|^/|;
2062
2063 &$func($key, $mountpoint);
2064 }
2065 }
2066
2067 sub foreach_mountpoint {
2068 my ($conf, $func) = @_;
2069
2070 foreach_mountpoint_full($conf, 0, $func);
2071 }
2072
2073 sub foreach_mountpoint_reverse {
2074 my ($conf, $func) = @_;
2075
2076 foreach_mountpoint_full($conf, 1, $func);
2077 }
2078
2079 sub check_ct_modify_config_perm {
2080 my ($rpcenv, $authuser, $vmid, $pool, $key_list) = @_;
2081
2082 return 1 if $authuser ne 'root@pam';
2083
2084 foreach my $opt (@$key_list) {
2085
2086 if ($opt eq 'cpus' || $opt eq 'cpuunits' || $opt eq 'cpulimit') {
2087 $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.CPU']);
2088 } elsif ($opt eq 'rootfs' || $opt =~ /^mp\d+$/) {
2089 $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Disk']);
2090 } elsif ($opt eq 'memory' || $opt eq 'swap') {
2091 $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Memory']);
2092 } elsif ($opt =~ m/^net\d+$/ || $opt eq 'nameserver' ||
2093 $opt eq 'searchdomain' || $opt eq 'hostname') {
2094 $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Network']);
2095 } else {
2096 $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Options']);
2097 }
2098 }
2099
2100 return 1;
2101 }
2102
2103 sub umount_all {
2104 my ($vmid, $storage_cfg, $conf, $noerr) = @_;
2105
2106 my $rootdir = "/var/lib/lxc/$vmid/rootfs";
2107 my $volid_list = get_vm_volumes($conf);
2108
2109 foreach_mountpoint_reverse($conf, sub {
2110 my ($ms, $mountpoint) = @_;
2111
2112 my $volid = $mountpoint->{volume};
2113 my $mount = $mountpoint->{mp};
2114
2115 return if !$volid || !$mount;
2116
2117 my $mount_path = "$rootdir/$mount";
2118 $mount_path =~ s!/+!/!g;
2119
2120 return if !PVE::ProcFSTools::is_mounted($mount_path);
2121
2122 eval {
2123 PVE::Tools::run_command(['umount', '-d', $mount_path]);
2124 };
2125 if (my $err = $@) {
2126 if ($noerr) {
2127 warn $err;
2128 } else {
2129 die $err;
2130 }
2131 }
2132 });
2133 }
2134
2135 sub mount_all {
2136 my ($vmid, $storage_cfg, $conf) = @_;
2137
2138 my $rootdir = "/var/lib/lxc/$vmid/rootfs";
2139 File::Path::make_path($rootdir);
2140
2141 my $volid_list = get_vm_volumes($conf);
2142 PVE::Storage::activate_volumes($storage_cfg, $volid_list);
2143
2144 eval {
2145 foreach_mountpoint($conf, sub {
2146 my ($ms, $mountpoint) = @_;
2147
2148 mountpoint_mount($mountpoint, $rootdir, $storage_cfg);
2149 });
2150 };
2151 if (my $err = $@) {
2152 warn "mounting container failed\n";
2153 umount_all($vmid, $storage_cfg, $conf, 1);
2154 die $err;
2155 }
2156
2157 return $rootdir;
2158 }
2159
2160
2161 sub mountpoint_mount_path {
2162 my ($mountpoint, $storage_cfg, $snapname) = @_;
2163
2164 return mountpoint_mount($mountpoint, undef, $storage_cfg, $snapname);
2165 }
2166
2167 my $check_mount_path = sub {
2168 my ($path) = @_;
2169 $path = File::Spec->canonpath($path);
2170 my $real = Cwd::realpath($path);
2171 if ($real ne $path) {
2172 die "mount path modified by symlink: $path != $real";
2173 }
2174 };
2175
2176 sub query_loopdev {
2177 my ($path) = @_;
2178 my $found;
2179 my $parser = sub {
2180 my $line = shift;
2181 if ($line =~ m@^(/dev/loop\d+):@) {
2182 $found = $1;
2183 }
2184 };
2185 my $cmd = ['losetup', '--associated', $path];
2186 PVE::Tools::run_command($cmd, outfunc => $parser);
2187 return $found;
2188 }
2189
2190 # use $rootdir = undef to just return the corresponding mount path
2191 sub mountpoint_mount {
2192 my ($mountpoint, $rootdir, $storage_cfg, $snapname) = @_;
2193
2194 my $volid = $mountpoint->{volume};
2195 my $mount = $mountpoint->{mp};
2196 my $type = $mountpoint->{type};
2197
2198 return if !$volid || !$mount;
2199
2200 my $mount_path;
2201
2202 if (defined($rootdir)) {
2203 $rootdir =~ s!/+$!!;
2204 $mount_path = "$rootdir/$mount";
2205 $mount_path =~ s!/+!/!g;
2206 &$check_mount_path($mount_path);
2207 File::Path::mkpath($mount_path);
2208 }
2209
2210 my ($storage, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2211
2212 die "unknown snapshot path for '$volid'" if !$storage && defined($snapname);
2213
2214 my $optstring = '';
2215 if (defined($mountpoint->{acl})) {
2216 $optstring .= ($mountpoint->{acl} ? 'acl' : 'noacl');
2217 }
2218 if ($mountpoint->{ro}) {
2219 $optstring .= ',' if $optstring;
2220 $optstring .= 'ro';
2221 }
2222
2223 my @extra_opts = ('-o', $optstring);
2224
2225 if ($storage) {
2226
2227 my $scfg = PVE::Storage::storage_config($storage_cfg, $storage);
2228 my $path = PVE::Storage::path($storage_cfg, $volid, $snapname);
2229
2230 my ($vtype, undef, undef, undef, undef, $isBase, $format) =
2231 PVE::Storage::parse_volname($storage_cfg, $volid);
2232
2233 $format = 'iso' if $vtype eq 'iso'; # allow to handle iso files
2234
2235 if ($format eq 'subvol') {
2236 if ($mount_path) {
2237 if ($snapname) {
2238 if ($scfg->{type} eq 'zfspool') {
2239 my $path_arg = $path;
2240 $path_arg =~ s!^/+!!;
2241 PVE::Tools::run_command(['mount', '-o', 'ro', @extra_opts, '-t', 'zfs', $path_arg, $mount_path]);
2242 } else {
2243 die "cannot mount subvol snapshots for storage type '$scfg->{type}'\n";
2244 }
2245 } else {
2246 if ($mountpoint->{ro}) {
2247 die "read-only bind mounts not supported\n";
2248 }
2249 PVE::Tools::run_command(['mount', '-o', 'bind', @extra_opts, $path, $mount_path]);
2250 }
2251 }
2252 return wantarray ? ($path, 0) : $path;
2253 } elsif ($format eq 'raw' || $format eq 'iso') {
2254 my $use_loopdev = 0;
2255 if ($scfg->{path}) {
2256 push @extra_opts, '-o', 'loop';
2257 $use_loopdev = 1;
2258 } elsif ($scfg->{type} eq 'drbd' || $scfg->{type} eq 'lvm' ||
2259 $scfg->{type} eq 'rbd' || $scfg->{type} eq 'lvmthin') {
2260 # do nothing
2261 } else {
2262 die "unsupported storage type '$scfg->{type}'\n";
2263 }
2264 if ($mount_path) {
2265 if ($format eq 'iso') {
2266 PVE::Tools::run_command(['mount', '-o', 'ro', @extra_opts, $path, $mount_path]);
2267 } elsif ($isBase || defined($snapname)) {
2268 PVE::Tools::run_command(['mount', '-o', 'ro,noload', @extra_opts, $path, $mount_path]);
2269 } else {
2270 PVE::Tools::run_command(['mount', @extra_opts, $path, $mount_path]);
2271 }
2272 }
2273 return wantarray ? ($path, $use_loopdev) : $path;
2274 } else {
2275 die "unsupported image format '$format'\n";
2276 }
2277 } elsif ($type eq 'device') {
2278 PVE::Tools::run_command(['mount', @extra_opts, $volid, $mount_path]) if $mount_path;
2279 return wantarray ? ($volid, 0) : $volid;
2280 } elsif ($type eq 'bind') {
2281 if ($mountpoint->{ro}) {
2282 die "read-only bind mounts not supported\n";
2283 # Theoretically we'd have to execute both:
2284 # mount -o bind $a $b
2285 # mount -o bind,remount,ro $a $b
2286 }
2287 die "directory '$volid' does not exist\n" if ! -d $volid;
2288 &$check_mount_path($volid);
2289 PVE::Tools::run_command(['mount', '-o', 'bind', @extra_opts, $volid, $mount_path]) if $mount_path;
2290 return wantarray ? ($volid, 0) : $volid;
2291 }
2292
2293 die "unsupported storage";
2294 }
2295
2296 sub get_vm_volumes {
2297 my ($conf, $excludes) = @_;
2298
2299 my $vollist = [];
2300
2301 foreach_mountpoint($conf, sub {
2302 my ($ms, $mountpoint) = @_;
2303
2304 return if $excludes && $ms eq $excludes;
2305
2306 my $volid = $mountpoint->{volume};
2307
2308 return if !$volid || $mountpoint->{type} ne 'volume';
2309
2310 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2311 return if !$sid;
2312
2313 push @$vollist, $volid;
2314 });
2315
2316 return $vollist;
2317 }
2318
2319 sub mkfs {
2320 my ($dev, $rootuid, $rootgid) = @_;
2321
2322 PVE::Tools::run_command(['mkfs.ext4', '-O', 'mmp',
2323 '-E', "root_owner=$rootuid:$rootgid",
2324 $dev]);
2325 }
2326
2327 sub format_disk {
2328 my ($storage_cfg, $volid, $rootuid, $rootgid) = @_;
2329
2330 if ($volid =~ m!^/dev/.+!) {
2331 mkfs($volid);
2332 return;
2333 }
2334
2335 my ($storage, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2336
2337 die "cannot format volume '$volid' with no storage\n" if !$storage;
2338
2339 PVE::Storage::activate_volumes($storage_cfg, [$volid]);
2340
2341 my $path = PVE::Storage::path($storage_cfg, $volid);
2342
2343 my ($vtype, undef, undef, undef, undef, $isBase, $format) =
2344 PVE::Storage::parse_volname($storage_cfg, $volid);
2345
2346 die "cannot format volume '$volid' (format == $format)\n"
2347 if $format ne 'raw';
2348
2349 mkfs($path, $rootuid, $rootgid);
2350 }
2351
2352 sub destroy_disks {
2353 my ($storecfg, $vollist) = @_;
2354
2355 foreach my $volid (@$vollist) {
2356 eval { PVE::Storage::vdisk_free($storecfg, $volid); };
2357 warn $@ if $@;
2358 }
2359 }
2360
2361 sub create_disks {
2362 my ($storecfg, $vmid, $settings, $conf) = @_;
2363
2364 my $vollist = [];
2365
2366 eval {
2367 my (undef, $rootuid, $rootgid) = PVE::LXC::parse_id_maps($conf);
2368 my $chown_vollist = [];
2369
2370 foreach_mountpoint($settings, sub {
2371 my ($ms, $mountpoint) = @_;
2372
2373 my $volid = $mountpoint->{volume};
2374 my $mp = $mountpoint->{mp};
2375
2376 my ($storage, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2377
2378 if ($storage && ($volid =~ m/^([^:\s]+):(\d+(\.\d+)?)$/)) {
2379 my ($storeid, $size_gb) = ($1, $2);
2380
2381 my $size_kb = int(${size_gb}*1024) * 1024;
2382
2383 my $scfg = PVE::Storage::storage_config($storecfg, $storage);
2384 # fixme: use better naming ct-$vmid-disk-X.raw?
2385
2386 if ($scfg->{type} eq 'dir' || $scfg->{type} eq 'nfs') {
2387 if ($size_kb > 0) {
2388 $volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'raw',
2389 undef, $size_kb);
2390 format_disk($storecfg, $volid, $rootuid, $rootgid);
2391 } else {
2392 $volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'subvol',
2393 undef, 0);
2394 push @$chown_vollist, $volid;
2395 }
2396 } elsif ($scfg->{type} eq 'zfspool') {
2397
2398 $volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'subvol',
2399 undef, $size_kb);
2400 push @$chown_vollist, $volid;
2401 } elsif ($scfg->{type} eq 'drbd' || $scfg->{type} eq 'lvm' || $scfg->{type} eq 'lvmthin') {
2402
2403 $volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'raw', undef, $size_kb);
2404 format_disk($storecfg, $volid, $rootuid, $rootgid);
2405
2406 } elsif ($scfg->{type} eq 'rbd') {
2407
2408 die "krbd option must be enabled on storage type '$scfg->{type}'\n" if !$scfg->{krbd};
2409 $volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'raw', undef, $size_kb);
2410 format_disk($storecfg, $volid, $rootuid, $rootgid);
2411 } else {
2412 die "unable to create containers on storage type '$scfg->{type}'\n";
2413 }
2414 push @$vollist, $volid;
2415 $mountpoint->{volume} = $volid;
2416 $mountpoint->{size} = $size_kb * 1024;
2417 $conf->{$ms} = print_ct_mountpoint($mountpoint, $ms eq 'rootfs');
2418 } else {
2419 # use specified/existing volid/dir/device
2420 $conf->{$ms} = print_ct_mountpoint($mountpoint, $ms eq 'rootfs');
2421 }
2422 });
2423
2424 PVE::Storage::activate_volumes($storecfg, $chown_vollist, undef);
2425 foreach my $volid (@$chown_vollist) {
2426 my $path = PVE::Storage::path($storecfg, $volid, undef);
2427 chown($rootuid, $rootgid, $path);
2428 }
2429 PVE::Storage::deactivate_volumes($storecfg, $chown_vollist, undef);
2430 };
2431 # free allocated images on error
2432 if (my $err = $@) {
2433 destroy_disks($storecfg, $vollist);
2434 die $err;
2435 }
2436 return $vollist;
2437 }
2438
2439 # bash completion helper
2440
2441 sub complete_os_templates {
2442 my ($cmdname, $pname, $cvalue) = @_;
2443
2444 my $cfg = PVE::Storage::config();
2445
2446 my $storeid;
2447
2448 if ($cvalue =~ m/^([^:]+):/) {
2449 $storeid = $1;
2450 }
2451
2452 my $vtype = $cmdname eq 'restore' ? 'backup' : 'vztmpl';
2453 my $data = PVE::Storage::template_list($cfg, $storeid, $vtype);
2454
2455 my $res = [];
2456 foreach my $id (keys %$data) {
2457 foreach my $item (@{$data->{$id}}) {
2458 push @$res, $item->{volid} if defined($item->{volid});
2459 }
2460 }
2461
2462 return $res;
2463 }
2464
2465 my $complete_ctid_full = sub {
2466 my ($running) = @_;
2467
2468 my $idlist = vmstatus();
2469
2470 my $active_hash = list_active_containers();
2471
2472 my $res = [];
2473
2474 foreach my $id (keys %$idlist) {
2475 my $d = $idlist->{$id};
2476 if (defined($running)) {
2477 next if $d->{template};
2478 next if $running && !$active_hash->{$id};
2479 next if !$running && $active_hash->{$id};
2480 }
2481 push @$res, $id;
2482
2483 }
2484 return $res;
2485 };
2486
2487 sub complete_ctid {
2488 return &$complete_ctid_full();
2489 }
2490
2491 sub complete_ctid_stopped {
2492 return &$complete_ctid_full(0);
2493 }
2494
2495 sub complete_ctid_running {
2496 return &$complete_ctid_full(1);
2497 }
2498
2499 sub parse_id_maps {
2500 my ($conf) = @_;
2501
2502 my $id_map = [];
2503 my $rootuid = 0;
2504 my $rootgid = 0;
2505
2506 my $lxc = $conf->{lxc};
2507 foreach my $entry (@$lxc) {
2508 my ($key, $value) = @$entry;
2509 next if $key ne 'lxc.id_map';
2510 if ($value =~ /^([ug])\s+(\d+)\s+(\d+)\s+(\d+)\s*$/) {
2511 my ($type, $ct, $host, $length) = ($1, $2, $3, $4);
2512 push @$id_map, [$type, $ct, $host, $length];
2513 if ($ct == 0) {
2514 $rootuid = $host if $type eq 'u';
2515 $rootgid = $host if $type eq 'g';
2516 }
2517 } else {
2518 die "failed to parse id_map: $value\n";
2519 }
2520 }
2521
2522 if (!@$id_map && $conf->{unprivileged}) {
2523 # Should we read them from /etc/subuid?
2524 $id_map = [ ['u', '0', '100000', '65536'],
2525 ['g', '0', '100000', '65536'] ];
2526 $rootuid = $rootgid = 100000;
2527 }
2528
2529 return ($id_map, $rootuid, $rootgid);
2530 }
2531
2532 sub userns_command {
2533 my ($id_map) = @_;
2534 if (@$id_map) {
2535 return ['lxc-usernsexec', (map { ('-m', join(':', @$_)) } @$id_map), '--'];
2536 }
2537 return [];
2538 }
2539
2540 1;