]> git.proxmox.com Git - pve-container.git/blob - src/PVE/LXC.pm
config: deal with re-added previously unused volumes
[pve-container.git] / src / PVE / LXC.pm
1 package PVE::LXC;
2
3 use strict;
4 use warnings;
5 use POSIX qw(EINTR);
6
7 use Socket;
8
9 use File::Path;
10 use File::Spec;
11 use Cwd qw();
12 use Fcntl qw(O_RDONLY);
13
14 use PVE::Cluster qw(cfs_register_file cfs_read_file);
15 use PVE::Storage;
16 use PVE::SafeSyslog;
17 use PVE::INotify;
18 use PVE::JSONSchema qw(get_standard_option);
19 use PVE::Tools qw($IPV6RE $IPV4RE dir_glob_foreach lock_file lock_file_full);
20 use PVE::Network;
21 use PVE::AccessControl;
22 use PVE::ProcFSTools;
23 use Time::HiRes qw (gettimeofday);
24
25 use Data::Dumper;
26
27 my $nodename = PVE::INotify::nodename();
28
29 my $cpuinfo= PVE::ProcFSTools::read_cpuinfo();
30
31 our $COMMON_TAR_FLAGS = [ '--sparse', '--numeric-owner', '--acls',
32 '--xattrs',
33 '--xattrs-include=user.*',
34 '--xattrs-include=security.capability',
35 '--warning=no-xattr-write' ];
36
37 cfs_register_file('/lxc/', \&parse_pct_config, \&write_pct_config);
38
39 my $rootfs_desc = {
40 volume => {
41 type => 'string',
42 default_key => 1,
43 format => 'pve-lxc-mp-string',
44 format_description => 'volume',
45 description => 'Volume, device or directory to mount into the container.',
46 },
47 backup => {
48 type => 'boolean',
49 format_description => '[1|0]',
50 description => 'Whether to include the mountpoint in backups.',
51 optional => 1,
52 },
53 size => {
54 type => 'string',
55 format => 'disk-size',
56 format_description => 'DiskSize',
57 description => 'Volume size (read only value).',
58 optional => 1,
59 },
60 acl => {
61 type => 'boolean',
62 format_description => 'acl',
63 description => 'Explicitly enable or disable ACL support.',
64 optional => 1,
65 },
66 ro => {
67 type => 'boolean',
68 format_description => 'ro',
69 description => 'Read-only mountpoint (not supported with bind mounts)',
70 optional => 1,
71 },
72 quota => {
73 type => 'boolean',
74 format_description => '[0|1]',
75 description => 'Enable user quotas inside the container (not supported with zfs subvolumes)',
76 optional => 1,
77 },
78 };
79
80 PVE::JSONSchema::register_standard_option('pve-ct-rootfs', {
81 type => 'string', format => $rootfs_desc,
82 description => "Use volume as container root.",
83 optional => 1,
84 });
85
86 PVE::JSONSchema::register_standard_option('pve-lxc-snapshot-name', {
87 description => "The name of the snapshot.",
88 type => 'string', format => 'pve-configid',
89 maxLength => 40,
90 });
91
92 my $confdesc = {
93 lock => {
94 optional => 1,
95 type => 'string',
96 description => "Lock/unlock the VM.",
97 enum => [qw(migrate backup snapshot rollback)],
98 },
99 onboot => {
100 optional => 1,
101 type => 'boolean',
102 description => "Specifies whether a VM will be started during system bootup.",
103 default => 0,
104 },
105 startup => get_standard_option('pve-startup-order'),
106 template => {
107 optional => 1,
108 type => 'boolean',
109 description => "Enable/disable Template.",
110 default => 0,
111 },
112 arch => {
113 optional => 1,
114 type => 'string',
115 enum => ['amd64', 'i386'],
116 description => "OS architecture type.",
117 default => 'amd64',
118 },
119 ostype => {
120 optional => 1,
121 type => 'string',
122 enum => ['debian', 'ubuntu', 'centos', 'fedora', 'opensuse', 'archlinux', 'alpine'],
123 description => "OS type. Corresponds to lxc setup scripts in /usr/share/lxc/config/<ostype>.common.conf.",
124 },
125 console => {
126 optional => 1,
127 type => 'boolean',
128 description => "Attach a console device (/dev/console) to the container.",
129 default => 1,
130 },
131 tty => {
132 optional => 1,
133 type => 'integer',
134 description => "Specify the number of tty available to the container",
135 minimum => 0,
136 maximum => 6,
137 default => 2,
138 },
139 cpulimit => {
140 optional => 1,
141 type => 'number',
142 description => "Limit of CPU usage. Note if the computer has 2 CPUs, it has a total of '2' CPU time. Value '0' indicates no CPU limit.",
143 minimum => 0,
144 maximum => 128,
145 default => 0,
146 },
147 cpuunits => {
148 optional => 1,
149 type => 'integer',
150 description => "CPU weight for a VM. Argument is used in the kernel fair scheduler. The larger the number is, the more CPU time this VM gets. Number is relative to the weights of all the other running VMs.\n\nNOTE: You can disable fair-scheduler configuration by setting this to 0.",
151 minimum => 0,
152 maximum => 500000,
153 default => 1024,
154 },
155 memory => {
156 optional => 1,
157 type => 'integer',
158 description => "Amount of RAM for the VM in MB.",
159 minimum => 16,
160 default => 512,
161 },
162 swap => {
163 optional => 1,
164 type => 'integer',
165 description => "Amount of SWAP for the VM in MB.",
166 minimum => 0,
167 default => 512,
168 },
169 hostname => {
170 optional => 1,
171 description => "Set a host name for the container.",
172 type => 'string', format => 'dns-name',
173 maxLength => 255,
174 },
175 description => {
176 optional => 1,
177 type => 'string',
178 description => "Container description. Only used on the configuration web interface.",
179 },
180 searchdomain => {
181 optional => 1,
182 type => 'string', format => 'dns-name-list',
183 description => "Sets DNS search domains for a container. Create will automatically use the setting from the host if you neither set searchdomain nor nameserver.",
184 },
185 nameserver => {
186 optional => 1,
187 type => 'string', format => 'address-list',
188 description => "Sets DNS server IP address for a container. Create will automatically use the setting from the host if you neither set searchdomain nor nameserver.",
189 },
190 rootfs => get_standard_option('pve-ct-rootfs'),
191 parent => {
192 optional => 1,
193 type => 'string', format => 'pve-configid',
194 maxLength => 40,
195 description => "Parent snapshot name. This is used internally, and should not be modified.",
196 },
197 snaptime => {
198 optional => 1,
199 description => "Timestamp for snapshots.",
200 type => 'integer',
201 minimum => 0,
202 },
203 cmode => {
204 optional => 1,
205 description => "Console mode. By default, the console command tries to open a connection to one of the available tty devices. By setting cmode to 'console' it tries to attach to /dev/console instead. If you set cmode to 'shell', it simply invokes a shell inside the container (no login).",
206 type => 'string',
207 enum => ['shell', 'console', 'tty'],
208 default => 'tty',
209 },
210 protection => {
211 optional => 1,
212 type => 'boolean',
213 description => "Sets the protection flag of the container. This will prevent the CT or CT's disk remove/update operation.",
214 default => 0,
215 },
216 unprivileged => {
217 optional => 1,
218 type => 'boolean',
219 description => "Makes the container run as unprivileged user. (Should not be modified manually.)",
220 default => 0,
221 },
222 };
223
224 my $valid_lxc_conf_keys = {
225 'lxc.include' => 1,
226 'lxc.arch' => 1,
227 'lxc.utsname' => 1,
228 'lxc.haltsignal' => 1,
229 'lxc.rebootsignal' => 1,
230 'lxc.stopsignal' => 1,
231 'lxc.init_cmd' => 1,
232 'lxc.network.type' => 1,
233 'lxc.network.flags' => 1,
234 'lxc.network.link' => 1,
235 'lxc.network.mtu' => 1,
236 'lxc.network.name' => 1,
237 'lxc.network.hwaddr' => 1,
238 'lxc.network.ipv4' => 1,
239 'lxc.network.ipv4.gateway' => 1,
240 'lxc.network.ipv6' => 1,
241 'lxc.network.ipv6.gateway' => 1,
242 'lxc.network.script.up' => 1,
243 'lxc.network.script.down' => 1,
244 'lxc.pts' => 1,
245 'lxc.console.logfile' => 1,
246 'lxc.console' => 1,
247 'lxc.tty' => 1,
248 'lxc.devttydir' => 1,
249 'lxc.hook.autodev' => 1,
250 'lxc.autodev' => 1,
251 'lxc.kmsg' => 1,
252 'lxc.mount' => 1,
253 'lxc.mount.entry' => 1,
254 'lxc.mount.auto' => 1,
255 'lxc.rootfs' => 'lxc.rootfs is auto generated from rootfs',
256 'lxc.rootfs.mount' => 1,
257 'lxc.rootfs.options' => 'lxc.rootfs.options is not supported' .
258 ', please use mountpoint options in the "rootfs" key',
259 # lxc.cgroup.*
260 'lxc.cap.drop' => 1,
261 'lxc.cap.keep' => 1,
262 'lxc.aa_profile' => 1,
263 'lxc.aa_allow_incomplete' => 1,
264 'lxc.se_context' => 1,
265 'lxc.seccomp' => 1,
266 'lxc.id_map' => 1,
267 'lxc.hook.pre-start' => 1,
268 'lxc.hook.pre-mount' => 1,
269 'lxc.hook.mount' => 1,
270 'lxc.hook.start' => 1,
271 'lxc.hook.stop' => 1,
272 'lxc.hook.post-stop' => 1,
273 'lxc.hook.clone' => 1,
274 'lxc.hook.destroy' => 1,
275 'lxc.loglevel' => 1,
276 'lxc.logfile' => 1,
277 'lxc.start.auto' => 1,
278 'lxc.start.delay' => 1,
279 'lxc.start.order' => 1,
280 'lxc.group' => 1,
281 'lxc.environment' => 1,
282 };
283
284 my $netconf_desc = {
285 type => {
286 type => 'string',
287 optional => 1,
288 description => "Network interface type.",
289 enum => [qw(veth)],
290 },
291 name => {
292 type => 'string',
293 format_description => 'String',
294 description => 'Name of the network device as seen from inside the container. (lxc.network.name)',
295 pattern => '[-_.\w\d]+',
296 },
297 bridge => {
298 type => 'string',
299 format_description => 'vmbr<Number>',
300 description => 'Bridge to attach the network device to.',
301 pattern => '[-_.\w\d]+',
302 optional => 1,
303 },
304 hwaddr => {
305 type => 'string',
306 format_description => 'MAC',
307 description => 'Bridge to attach the network device to. (lxc.network.hwaddr)',
308 pattern => qr/(?:[a-f0-9]{2}:){5}[a-f0-9]{2}/i,
309 optional => 1,
310 },
311 mtu => {
312 type => 'integer',
313 format_description => 'Number',
314 description => 'Maximum transfer unit of the interface. (lxc.network.mtu)',
315 minimum => 64, # minimum ethernet frame is 64 bytes
316 optional => 1,
317 },
318 ip => {
319 type => 'string',
320 format => 'pve-ipv4-config',
321 format_description => 'IPv4Format/CIDR',
322 description => 'IPv4 address in CIDR format.',
323 optional => 1,
324 },
325 gw => {
326 type => 'string',
327 format => 'ipv4',
328 format_description => 'GatewayIPv4',
329 description => 'Default gateway for IPv4 traffic.',
330 optional => 1,
331 },
332 ip6 => {
333 type => 'string',
334 format => 'pve-ipv6-config',
335 format_description => 'IPv6Format/CIDR',
336 description => 'IPv6 address in CIDR format.',
337 optional => 1,
338 },
339 gw6 => {
340 type => 'string',
341 format => 'ipv6',
342 format_description => 'GatewayIPv6',
343 description => 'Default gateway for IPv6 traffic.',
344 optional => 1,
345 },
346 firewall => {
347 type => 'boolean',
348 format_description => '[1|0]',
349 description => "Controls whether this interface's firewall rules should be used.",
350 optional => 1,
351 },
352 tag => {
353 type => 'integer',
354 format_description => 'VlanNo',
355 minimum => '2',
356 maximum => '4094',
357 description => "VLAN tag for this interface.",
358 optional => 1,
359 },
360 trunks => {
361 type => 'string',
362 pattern => qr/\d+(?:;\d+)*/,
363 format_description => 'vlanid[;vlanid...]',
364 description => "VLAN ids to pass through the interface",
365 optional => 1,
366 },
367 };
368 PVE::JSONSchema::register_format('pve-lxc-network', $netconf_desc);
369
370 my $MAX_LXC_NETWORKS = 10;
371 for (my $i = 0; $i < $MAX_LXC_NETWORKS; $i++) {
372 $confdesc->{"net$i"} = {
373 optional => 1,
374 type => 'string', format => $netconf_desc,
375 description => "Specifies network interfaces for the container.",
376 };
377 }
378
379 PVE::JSONSchema::register_format('pve-lxc-mp-string', \&verify_lxc_mp_string);
380 sub verify_lxc_mp_string{
381 my ($mp, $noerr) = @_;
382
383 # do not allow:
384 # /./ or /../
385 # /. or /.. at the end
386 # ../ at the beginning
387
388 if($mp =~ m@/\.\.?/@ ||
389 $mp =~ m@/\.\.?$@ ||
390 $mp =~ m@^\.\./@){
391 return undef if $noerr;
392 die "$mp contains illegal character sequences\n";
393 }
394 return $mp;
395 }
396
397 my $mp_desc = {
398 %$rootfs_desc,
399 mp => {
400 type => 'string',
401 format => 'pve-lxc-mp-string',
402 format_description => 'Path',
403 description => 'Path to the mountpoint as seen from inside the container.',
404 },
405 };
406 PVE::JSONSchema::register_format('pve-ct-mountpoint', $mp_desc);
407
408 my $unuseddesc = {
409 optional => 1,
410 type => 'string', format => 'pve-volume-id',
411 description => "Reference to unused volumes.",
412 };
413
414 my $MAX_MOUNT_POINTS = 10;
415 for (my $i = 0; $i < $MAX_MOUNT_POINTS; $i++) {
416 $confdesc->{"mp$i"} = {
417 optional => 1,
418 type => 'string', format => $mp_desc,
419 description => "Use volume as container mount point (experimental feature).",
420 optional => 1,
421 };
422 }
423
424 my $MAX_UNUSED_DISKS = $MAX_MOUNT_POINTS;
425 for (my $i = 0; $i < $MAX_MOUNT_POINTS; $i++) {
426 $confdesc->{"unused$i"} = $unuseddesc;
427 }
428
429 sub write_pct_config {
430 my ($filename, $conf) = @_;
431
432 delete $conf->{snapstate}; # just to be sure
433
434 my $generate_raw_config = sub {
435 my ($conf) = @_;
436
437 my $raw = '';
438
439 # add description as comment to top of file
440 my $descr = $conf->{description} || '';
441 foreach my $cl (split(/\n/, $descr)) {
442 $raw .= '#' . PVE::Tools::encode_text($cl) . "\n";
443 }
444
445 foreach my $key (sort keys %$conf) {
446 next if $key eq 'digest' || $key eq 'description' || $key eq 'pending' ||
447 $key eq 'snapshots' || $key eq 'snapname' || $key eq 'lxc';
448 my $value = $conf->{$key};
449 die "detected invalid newline inside property '$key'\n" if $value =~ m/\n/;
450 $raw .= "$key: $value\n";
451 }
452
453 if (my $lxcconf = $conf->{lxc}) {
454 foreach my $entry (@$lxcconf) {
455 my ($k, $v) = @$entry;
456 $raw .= "$k: $v\n";
457 }
458 }
459
460 return $raw;
461 };
462
463 my $raw = &$generate_raw_config($conf);
464
465 foreach my $snapname (sort keys %{$conf->{snapshots}}) {
466 $raw .= "\n[$snapname]\n";
467 $raw .= &$generate_raw_config($conf->{snapshots}->{$snapname});
468 }
469
470 return $raw;
471 }
472
473 sub check_type {
474 my ($key, $value) = @_;
475
476 die "unknown setting '$key'\n" if !$confdesc->{$key};
477
478 my $type = $confdesc->{$key}->{type};
479
480 if (!defined($value)) {
481 die "got undefined value\n";
482 }
483
484 if ($value =~ m/[\n\r]/) {
485 die "property contains a line feed\n";
486 }
487
488 if ($type eq 'boolean') {
489 return 1 if ($value eq '1') || ($value =~ m/^(on|yes|true)$/i);
490 return 0 if ($value eq '0') || ($value =~ m/^(off|no|false)$/i);
491 die "type check ('boolean') failed - got '$value'\n";
492 } elsif ($type eq 'integer') {
493 return int($1) if $value =~ m/^(\d+)$/;
494 die "type check ('integer') failed - got '$value'\n";
495 } elsif ($type eq 'number') {
496 return $value if $value =~ m/^(\d+)(\.\d+)?$/;
497 die "type check ('number') failed - got '$value'\n";
498 } elsif ($type eq 'string') {
499 if (my $fmt = $confdesc->{$key}->{format}) {
500 PVE::JSONSchema::check_format($fmt, $value);
501 return $value;
502 }
503 return $value;
504 } else {
505 die "internal error"
506 }
507 }
508
509 sub parse_pct_config {
510 my ($filename, $raw) = @_;
511
512 return undef if !defined($raw);
513
514 my $res = {
515 digest => Digest::SHA::sha1_hex($raw),
516 snapshots => {},
517 };
518
519 $filename =~ m|/lxc/(\d+).conf$|
520 || die "got strange filename '$filename'";
521
522 my $vmid = $1;
523
524 my $conf = $res;
525 my $descr = '';
526 my $section = '';
527
528 my @lines = split(/\n/, $raw);
529 foreach my $line (@lines) {
530 next if $line =~ m/^\s*$/;
531
532 if ($line =~ m/^\[([a-z][a-z0-9_\-]+)\]\s*$/i) {
533 $section = $1;
534 $conf->{description} = $descr if $descr;
535 $descr = '';
536 $conf = $res->{snapshots}->{$section} = {};
537 next;
538 }
539
540 if ($line =~ m/^\#(.*)\s*$/) {
541 $descr .= PVE::Tools::decode_text($1) . "\n";
542 next;
543 }
544
545 if ($line =~ m/^(lxc\.[a-z0-9_\-\.]+)(:|\s*=)\s*(.*?)\s*$/) {
546 my $key = $1;
547 my $value = $3;
548 my $validity = $valid_lxc_conf_keys->{$key} || 0;
549 if ($validity eq 1 || $key =~ m/^lxc\.cgroup\./) {
550 push @{$conf->{lxc}}, [$key, $value];
551 } elsif (my $errmsg = $validity) {
552 warn "vm $vmid - $key: $errmsg\n";
553 } else {
554 warn "vm $vmid - unable to parse config: $line\n";
555 }
556 } elsif ($line =~ m/^(description):\s*(.*\S)\s*$/) {
557 $descr .= PVE::Tools::decode_text($2);
558 } elsif ($line =~ m/snapstate:\s*(prepare|delete)\s*$/) {
559 $conf->{snapstate} = $1;
560 } elsif ($line =~ m/^([a-z][a-z_]*\d*):\s*(\S.*)\s*$/) {
561 my $key = $1;
562 my $value = $2;
563 eval { $value = check_type($key, $value); };
564 warn "vm $vmid - unable to parse value of '$key' - $@" if $@;
565 $conf->{$key} = $value;
566 } else {
567 warn "vm $vmid - unable to parse config: $line\n";
568 }
569 }
570
571 $conf->{description} = $descr if $descr;
572
573 delete $res->{snapstate}; # just to be sure
574
575 return $res;
576 }
577
578 sub config_list {
579 my $vmlist = PVE::Cluster::get_vmlist();
580 my $res = {};
581 return $res if !$vmlist || !$vmlist->{ids};
582 my $ids = $vmlist->{ids};
583
584 foreach my $vmid (keys %$ids) {
585 next if !$vmid; # skip CT0
586 my $d = $ids->{$vmid};
587 next if !$d->{node} || $d->{node} ne $nodename;
588 next if !$d->{type} || $d->{type} ne 'lxc';
589 $res->{$vmid}->{type} = 'lxc';
590 }
591 return $res;
592 }
593
594 sub cfs_config_path {
595 my ($vmid, $node) = @_;
596
597 $node = $nodename if !$node;
598 return "nodes/$node/lxc/$vmid.conf";
599 }
600
601 sub config_file {
602 my ($vmid, $node) = @_;
603
604 my $cfspath = cfs_config_path($vmid, $node);
605 return "/etc/pve/$cfspath";
606 }
607
608 sub load_config {
609 my ($vmid, $node) = @_;
610
611 $node = $nodename if !$node;
612 my $cfspath = cfs_config_path($vmid, $node);
613
614 my $conf = PVE::Cluster::cfs_read_file($cfspath);
615 die "container $vmid does not exist\n" if !defined($conf);
616
617 return $conf;
618 }
619
620 sub create_config {
621 my ($vmid, $conf) = @_;
622
623 my $dir = "/etc/pve/nodes/$nodename/lxc";
624 mkdir $dir;
625
626 write_config($vmid, $conf);
627 }
628
629 sub destroy_config {
630 my ($vmid) = @_;
631
632 unlink config_file($vmid, $nodename);
633 }
634
635 sub write_config {
636 my ($vmid, $conf) = @_;
637
638 my $cfspath = cfs_config_path($vmid);
639
640 PVE::Cluster::cfs_write_file($cfspath, $conf);
641 }
642
643 # flock: we use one file handle per process, so lock file
644 # can be called multiple times and will succeed for the same process.
645
646 my $lock_handles = {};
647 my $lockdir = "/run/lock/lxc";
648
649 sub config_file_lock {
650 my ($vmid) = @_;
651
652 return "$lockdir/pve-config-${vmid}.lock";
653 }
654
655 sub lock_config_full {
656 my ($vmid, $timeout, $code, @param) = @_;
657
658 my $filename = config_file_lock($vmid);
659
660 mkdir $lockdir if !-d $lockdir;
661
662 my $res = lock_file($filename, $timeout, $code, @param);
663
664 die $@ if $@;
665
666 return $res;
667 }
668
669 sub lock_config_mode {
670 my ($vmid, $timeout, $shared, $code, @param) = @_;
671
672 my $filename = config_file_lock($vmid);
673
674 mkdir $lockdir if !-d $lockdir;
675
676 my $res = lock_file_full($filename, $timeout, $shared, $code, @param);
677
678 die $@ if $@;
679
680 return $res;
681 }
682
683 sub lock_config {
684 my ($vmid, $code, @param) = @_;
685
686 return lock_config_full($vmid, 10, $code, @param);
687 }
688
689 sub option_exists {
690 my ($name) = @_;
691
692 return defined($confdesc->{$name});
693 }
694
695 # add JSON properties for create and set function
696 sub json_config_properties {
697 my $prop = shift;
698
699 foreach my $opt (keys %$confdesc) {
700 next if $opt eq 'parent' || $opt eq 'snaptime';
701 next if $prop->{$opt};
702 $prop->{$opt} = $confdesc->{$opt};
703 }
704
705 return $prop;
706 }
707
708 # container status helpers
709
710 sub list_active_containers {
711
712 my $filename = "/proc/net/unix";
713
714 # similar test is used by lcxcontainers.c: list_active_containers
715 my $res = {};
716
717 my $fh = IO::File->new ($filename, "r");
718 return $res if !$fh;
719
720 while (defined(my $line = <$fh>)) {
721 if ($line =~ m/^[a-f0-9]+:\s\S+\s\S+\s\S+\s\S+\s\S+\s\d+\s(\S+)$/) {
722 my $path = $1;
723 if ($path =~ m!^@/var/lib/lxc/(\d+)/command$!) {
724 $res->{$1} = 1;
725 }
726 }
727 }
728
729 close($fh);
730
731 return $res;
732 }
733
734 # warning: this is slow
735 sub check_running {
736 my ($vmid) = @_;
737
738 my $active_hash = list_active_containers();
739
740 return 1 if defined($active_hash->{$vmid});
741
742 return undef;
743 }
744
745 sub get_container_disk_usage {
746 my ($vmid, $pid) = @_;
747
748 return PVE::Tools::df("/proc/$pid/root/", 1);
749 }
750
751 my $last_proc_vmid_stat;
752
753 my $parse_cpuacct_stat = sub {
754 my ($vmid) = @_;
755
756 my $raw = read_cgroup_value('cpuacct', $vmid, 'cpuacct.stat', 1);
757
758 my $stat = {};
759
760 if ($raw =~ m/^user (\d+)\nsystem (\d+)\n/) {
761
762 $stat->{utime} = $1;
763 $stat->{stime} = $2;
764
765 }
766
767 return $stat;
768 };
769
770 sub vmstatus {
771 my ($opt_vmid) = @_;
772
773 my $list = $opt_vmid ? { $opt_vmid => { type => 'lxc' }} : config_list();
774
775 my $active_hash = list_active_containers();
776
777 my $cpucount = $cpuinfo->{cpus} || 1;
778
779 my $cdtime = gettimeofday;
780
781 my $uptime = (PVE::ProcFSTools::read_proc_uptime(1))[0];
782
783 foreach my $vmid (keys %$list) {
784 my $d = $list->{$vmid};
785
786 eval { $d->{pid} = find_lxc_pid($vmid) if defined($active_hash->{$vmid}); };
787 warn $@ if $@; # ignore errors (consider them stopped)
788
789 $d->{status} = $d->{pid} ? 'running' : 'stopped';
790
791 my $cfspath = cfs_config_path($vmid);
792 my $conf = PVE::Cluster::cfs_read_file($cfspath) || {};
793
794 $d->{name} = $conf->{'hostname'} || "CT$vmid";
795 $d->{name} =~ s/[\s]//g;
796
797 $d->{cpus} = $conf->{cpulimit} || $cpucount;
798
799 if ($d->{pid}) {
800 my $res = get_container_disk_usage($vmid, $d->{pid});
801 $d->{disk} = $res->{used};
802 $d->{maxdisk} = $res->{total};
803 } else {
804 $d->{disk} = 0;
805 # use 4GB by default ??
806 if (my $rootfs = $conf->{rootfs}) {
807 my $rootinfo = parse_ct_rootfs($rootfs);
808 $d->{maxdisk} = int(($rootinfo->{size} || 4)*1024*1024)*1024;
809 } else {
810 $d->{maxdisk} = 4*1024*1024*1024;
811 }
812 }
813
814 $d->{mem} = 0;
815 $d->{swap} = 0;
816 $d->{maxmem} = ($conf->{memory}||512)*1024*1024;
817 $d->{maxswap} = ($conf->{swap}//0)*1024*1024;
818
819 $d->{uptime} = 0;
820 $d->{cpu} = 0;
821
822 $d->{netout} = 0;
823 $d->{netin} = 0;
824
825 $d->{diskread} = 0;
826 $d->{diskwrite} = 0;
827
828 $d->{template} = is_template($conf);
829 }
830
831 foreach my $vmid (keys %$list) {
832 my $d = $list->{$vmid};
833 my $pid = $d->{pid};
834
835 next if !$pid; # skip stopped CTs
836
837 my $ctime = (stat("/proc/$pid"))[10]; # 10 = ctime
838 $d->{uptime} = time - $ctime; # the method lxcfs uses
839
840 $d->{mem} = read_cgroup_value('memory', $vmid, 'memory.usage_in_bytes');
841 $d->{swap} = read_cgroup_value('memory', $vmid, 'memory.memsw.usage_in_bytes') - $d->{mem};
842
843 my $blkio_bytes = read_cgroup_value('blkio', $vmid, 'blkio.throttle.io_service_bytes', 1);
844 my @bytes = split(/\n/, $blkio_bytes);
845 foreach my $byte (@bytes) {
846 if (my ($key, $value) = $byte =~ /(Read|Write)\s+(\d+)/) {
847 $d->{diskread} = $2 if $key eq 'Read';
848 $d->{diskwrite} = $2 if $key eq 'Write';
849 }
850 }
851
852 my $pstat = &$parse_cpuacct_stat($vmid);
853
854 my $used = $pstat->{utime} + $pstat->{stime};
855
856 my $old = $last_proc_vmid_stat->{$vmid};
857 if (!$old) {
858 $last_proc_vmid_stat->{$vmid} = {
859 time => $cdtime,
860 used => $used,
861 cpu => 0,
862 };
863 next;
864 }
865
866 my $dtime = ($cdtime - $old->{time}) * $cpucount * $cpuinfo->{user_hz};
867
868 if ($dtime > 1000) {
869 my $dutime = $used - $old->{used};
870
871 $d->{cpu} = (($dutime/$dtime)* $cpucount) / $d->{cpus};
872 $last_proc_vmid_stat->{$vmid} = {
873 time => $cdtime,
874 used => $used,
875 cpu => $d->{cpu},
876 };
877 } else {
878 $d->{cpu} = $old->{cpu};
879 }
880 }
881
882 my $netdev = PVE::ProcFSTools::read_proc_net_dev();
883
884 foreach my $dev (keys %$netdev) {
885 next if $dev !~ m/^veth([1-9]\d*)i/;
886 my $vmid = $1;
887 my $d = $list->{$vmid};
888
889 next if !$d;
890
891 $d->{netout} += $netdev->{$dev}->{receive};
892 $d->{netin} += $netdev->{$dev}->{transmit};
893
894 }
895
896 return $list;
897 }
898
899 sub classify_mountpoint {
900 my ($vol) = @_;
901 if ($vol =~ m!^/!) {
902 return 'device' if $vol =~ m!^/dev/!;
903 return 'bind';
904 }
905 return 'volume';
906 }
907
908 my $parse_ct_mountpoint_full = sub {
909 my ($desc, $data, $noerr) = @_;
910
911 $data //= '';
912
913 my $res;
914 eval { $res = PVE::JSONSchema::parse_property_string($desc, $data) };
915 if ($@) {
916 return undef if $noerr;
917 die $@;
918 }
919
920 if (defined(my $size = $res->{size})) {
921 $size = PVE::JSONSchema::parse_size($size);
922 if (!defined($size)) {
923 return undef if $noerr;
924 die "invalid size: $size\n";
925 }
926 $res->{size} = $size;
927 }
928
929 $res->{type} = classify_mountpoint($res->{volume});
930
931 return $res;
932 };
933
934 sub parse_ct_rootfs {
935 my ($data, $noerr) = @_;
936
937 my $res = &$parse_ct_mountpoint_full($rootfs_desc, $data, $noerr);
938
939 $res->{mp} = '/' if defined($res);
940
941 return $res;
942 }
943
944 sub parse_ct_mountpoint {
945 my ($data, $noerr) = @_;
946
947 return &$parse_ct_mountpoint_full($mp_desc, $data, $noerr);
948 }
949
950 sub print_ct_mountpoint {
951 my ($info, $nomp) = @_;
952 my $skip = [ 'type' ];
953 push @$skip, 'mp' if $nomp;
954 return PVE::JSONSchema::print_property_string($info, $mp_desc, $skip);
955 }
956
957 sub print_lxc_network {
958 my $net = shift;
959 return PVE::JSONSchema::print_property_string($net, $netconf_desc);
960 }
961
962 sub parse_lxc_network {
963 my ($data) = @_;
964
965 my $res = {};
966
967 return $res if !$data;
968
969 $res = PVE::JSONSchema::parse_property_string($netconf_desc, $data);
970
971 $res->{type} = 'veth';
972 $res->{hwaddr} = PVE::Tools::random_ether_addr() if !$res->{hwaddr};
973
974 return $res;
975 }
976
977 sub read_cgroup_value {
978 my ($group, $vmid, $name, $full) = @_;
979
980 my $path = "/sys/fs/cgroup/$group/lxc/$vmid/$name";
981
982 return PVE::Tools::file_get_contents($path) if $full;
983
984 return PVE::Tools::file_read_firstline($path);
985 }
986
987 sub write_cgroup_value {
988 my ($group, $vmid, $name, $value) = @_;
989
990 my $path = "/sys/fs/cgroup/$group/lxc/$vmid/$name";
991 PVE::ProcFSTools::write_proc_entry($path, $value) if -e $path;
992
993 }
994
995 sub find_lxc_console_pids {
996
997 my $res = {};
998
999 PVE::Tools::dir_glob_foreach('/proc', '\d+', sub {
1000 my ($pid) = @_;
1001
1002 my $cmdline = PVE::Tools::file_read_firstline("/proc/$pid/cmdline");
1003 return if !$cmdline;
1004
1005 my @args = split(/\0/, $cmdline);
1006
1007 # search for lxc-console -n <vmid>
1008 return if scalar(@args) != 3;
1009 return if $args[1] ne '-n';
1010 return if $args[2] !~ m/^\d+$/;
1011 return if $args[0] !~ m|^(/usr/bin/)?lxc-console$|;
1012
1013 my $vmid = $args[2];
1014
1015 push @{$res->{$vmid}}, $pid;
1016 });
1017
1018 return $res;
1019 }
1020
1021 sub find_lxc_pid {
1022 my ($vmid) = @_;
1023
1024 my $pid = undef;
1025 my $parser = sub {
1026 my $line = shift;
1027 $pid = $1 if $line =~ m/^PID:\s+(\d+)$/;
1028 };
1029 PVE::Tools::run_command(['lxc-info', '-n', $vmid, '-p'], outfunc => $parser);
1030
1031 die "unable to get PID for CT $vmid (not running?)\n" if !$pid;
1032
1033 return $pid;
1034 }
1035
1036 # Note: we cannot use Net:IP, because that only allows strict
1037 # CIDR networks
1038 sub parse_ipv4_cidr {
1039 my ($cidr, $noerr) = @_;
1040
1041 if ($cidr =~ m!^($IPV4RE)(?:/(\d+))$! && ($2 > 7) && ($2 <= 32)) {
1042 return { address => $1, netmask => $PVE::Network::ipv4_reverse_mask->[$2] };
1043 }
1044
1045 return undef if $noerr;
1046
1047 die "unable to parse ipv4 address/mask\n";
1048 }
1049
1050 sub check_lock {
1051 my ($conf) = @_;
1052
1053 die "VM is locked ($conf->{'lock'})\n" if $conf->{'lock'};
1054 }
1055
1056 sub check_protection {
1057 my ($vm_conf, $err_msg) = @_;
1058
1059 if ($vm_conf->{protection}) {
1060 die "$err_msg - protection mode enabled\n";
1061 }
1062 }
1063
1064 sub update_lxc_config {
1065 my ($storage_cfg, $vmid, $conf) = @_;
1066
1067 my $dir = "/var/lib/lxc/$vmid";
1068
1069 if ($conf->{template}) {
1070
1071 unlink "$dir/config";
1072
1073 return;
1074 }
1075
1076 my $raw = '';
1077
1078 die "missing 'arch' - internal error" if !$conf->{arch};
1079 $raw .= "lxc.arch = $conf->{arch}\n";
1080
1081 my $unprivileged = $conf->{unprivileged};
1082 my $custom_idmap = grep { $_->[0] eq 'lxc.id_map' } @{$conf->{lxc}};
1083
1084 my $ostype = $conf->{ostype} || die "missing 'ostype' - internal error";
1085 if ($ostype =~ /^(?:debian | ubuntu | centos | fedora | opensuse | archlinux | alpine)$/x) {
1086 my $inc ="/usr/share/lxc/config/$ostype.common.conf";
1087 $inc ="/usr/share/lxc/config/common.conf" if !-f $inc;
1088 $raw .= "lxc.include = $inc\n";
1089 if ($unprivileged || $custom_idmap) {
1090 $inc = "/usr/share/lxc/config/$ostype.userns.conf";
1091 $inc = "/usr/share/lxc/config/userns.conf" if !-f $inc;
1092 $raw .= "lxc.include = $inc\n"
1093 }
1094 } else {
1095 die "implement me (ostype $ostype)";
1096 }
1097
1098 # WARNING: DO NOT REMOVE this without making sure that loop device nodes
1099 # cannot be exposed to the container with r/w access (cgroup perms).
1100 # When this is enabled mounts will still remain in the monitor's namespace
1101 # after the container unmounted them and thus will not detach from their
1102 # files while the container is running!
1103 $raw .= "lxc.monitor.unshare = 1\n";
1104
1105 # Should we read them from /etc/subuid?
1106 if ($unprivileged && !$custom_idmap) {
1107 $raw .= "lxc.id_map = u 0 100000 65536\n";
1108 $raw .= "lxc.id_map = g 0 100000 65536\n";
1109 }
1110
1111 if (!has_dev_console($conf)) {
1112 $raw .= "lxc.console = none\n";
1113 $raw .= "lxc.cgroup.devices.deny = c 5:1 rwm\n";
1114 }
1115
1116 my $ttycount = get_tty_count($conf);
1117 $raw .= "lxc.tty = $ttycount\n";
1118
1119 # some init scripts expect a linux terminal (turnkey).
1120 $raw .= "lxc.environment = TERM=linux\n";
1121
1122 my $utsname = $conf->{hostname} || "CT$vmid";
1123 $raw .= "lxc.utsname = $utsname\n";
1124
1125 my $memory = $conf->{memory} || 512;
1126 my $swap = $conf->{swap} // 0;
1127
1128 my $lxcmem = int($memory*1024*1024);
1129 $raw .= "lxc.cgroup.memory.limit_in_bytes = $lxcmem\n";
1130
1131 my $lxcswap = int(($memory + $swap)*1024*1024);
1132 $raw .= "lxc.cgroup.memory.memsw.limit_in_bytes = $lxcswap\n";
1133
1134 if (my $cpulimit = $conf->{cpulimit}) {
1135 $raw .= "lxc.cgroup.cpu.cfs_period_us = 100000\n";
1136 my $value = int(100000*$cpulimit);
1137 $raw .= "lxc.cgroup.cpu.cfs_quota_us = $value\n";
1138 }
1139
1140 my $shares = $conf->{cpuunits} || 1024;
1141 $raw .= "lxc.cgroup.cpu.shares = $shares\n";
1142
1143 my $mountpoint = parse_ct_rootfs($conf->{rootfs});
1144
1145 $raw .= "lxc.rootfs = $dir/rootfs\n";
1146
1147 my $netcount = 0;
1148 foreach my $k (keys %$conf) {
1149 next if $k !~ m/^net(\d+)$/;
1150 my $ind = $1;
1151 my $d = parse_lxc_network($conf->{$k});
1152 $netcount++;
1153 $raw .= "lxc.network.type = veth\n";
1154 $raw .= "lxc.network.veth.pair = veth${vmid}i${ind}\n";
1155 $raw .= "lxc.network.hwaddr = $d->{hwaddr}\n" if defined($d->{hwaddr});
1156 $raw .= "lxc.network.name = $d->{name}\n" if defined($d->{name});
1157 $raw .= "lxc.network.mtu = $d->{mtu}\n" if defined($d->{mtu});
1158 }
1159
1160 if (my $lxcconf = $conf->{lxc}) {
1161 foreach my $entry (@$lxcconf) {
1162 my ($k, $v) = @$entry;
1163 $netcount++ if $k eq 'lxc.network.type';
1164 $raw .= "$k = $v\n";
1165 }
1166 }
1167
1168 $raw .= "lxc.network.type = empty\n" if !$netcount;
1169
1170 File::Path::mkpath("$dir/rootfs");
1171
1172 PVE::Tools::file_set_contents("$dir/config", $raw);
1173 }
1174
1175 # verify and cleanup nameserver list (replace \0 with ' ')
1176 sub verify_nameserver_list {
1177 my ($nameserver_list) = @_;
1178
1179 my @list = ();
1180 foreach my $server (PVE::Tools::split_list($nameserver_list)) {
1181 PVE::JSONSchema::pve_verify_ip($server);
1182 push @list, $server;
1183 }
1184
1185 return join(' ', @list);
1186 }
1187
1188 sub verify_searchdomain_list {
1189 my ($searchdomain_list) = @_;
1190
1191 my @list = ();
1192 foreach my $server (PVE::Tools::split_list($searchdomain_list)) {
1193 # todo: should we add checks for valid dns domains?
1194 push @list, $server;
1195 }
1196
1197 return join(' ', @list);
1198 }
1199
1200 sub is_volume_in_use {
1201 my ($config, $volid) = @_;
1202 my $used = 0;
1203
1204 foreach_mountpoint($config, sub {
1205 my ($ms, $mountpoint) = @_;
1206 return if $used;
1207 if ($mountpoint->{type} eq 'volume' && $mountpoint->{volume} eq $volid) {
1208 $used = 1;
1209 }
1210 });
1211
1212 return $used;
1213 }
1214
1215 sub add_unused_volume {
1216 my ($config, $volid) = @_;
1217
1218 my $key;
1219 for (my $ind = $MAX_UNUSED_DISKS - 1; $ind >= 0; $ind--) {
1220 my $test = "unused$ind";
1221 if (my $vid = $config->{$test}) {
1222 return if $vid eq $volid; # do not add duplicates
1223 } else {
1224 $key = $test;
1225 }
1226 }
1227
1228 die "Too many unused volumes - please delete them first.\n" if !$key;
1229
1230 $config->{$key} = $volid;
1231
1232 return $key;
1233 }
1234
1235 sub update_pct_config {
1236 my ($vmid, $conf, $running, $param, $delete) = @_;
1237
1238 my @nohotplug;
1239
1240 my $new_disks = 0;
1241 my @deleted_volumes;
1242
1243 my $rootdir;
1244 if ($running) {
1245 my $pid = find_lxc_pid($vmid);
1246 $rootdir = "/proc/$pid/root";
1247 }
1248
1249 my $hotplug_error = sub {
1250 if ($running) {
1251 push @nohotplug, @_;
1252 return 1;
1253 } else {
1254 return 0;
1255 }
1256 };
1257
1258 if (defined($delete)) {
1259 foreach my $opt (@$delete) {
1260 if (!exists($conf->{$opt})) {
1261 warn "no such option: $opt\n";
1262 next;
1263 }
1264
1265 if ($opt eq 'hostname' || $opt eq 'memory' || $opt eq 'rootfs') {
1266 die "unable to delete required option '$opt'\n";
1267 } elsif ($opt eq 'swap') {
1268 delete $conf->{$opt};
1269 write_cgroup_value("memory", $vmid, "memory.memsw.limit_in_bytes", -1);
1270 } elsif ($opt eq 'description' || $opt eq 'onboot' || $opt eq 'startup') {
1271 delete $conf->{$opt};
1272 } elsif ($opt eq 'nameserver' || $opt eq 'searchdomain' ||
1273 $opt eq 'tty' || $opt eq 'console' || $opt eq 'cmode') {
1274 next if $hotplug_error->($opt);
1275 delete $conf->{$opt};
1276 } elsif ($opt =~ m/^net(\d)$/) {
1277 delete $conf->{$opt};
1278 next if !$running;
1279 my $netid = $1;
1280 PVE::Network::veth_delete("veth${vmid}i$netid");
1281 } elsif ($opt eq 'protection') {
1282 delete $conf->{$opt};
1283 } elsif ($opt =~ m/^unused(\d+)$/) {
1284 next if $hotplug_error->($opt);
1285 check_protection($conf, "can't remove CT $vmid drive '$opt'");
1286 push @deleted_volumes, $conf->{$opt};
1287 delete $conf->{$opt};
1288 } elsif ($opt =~ m/^mp(\d+)$/) {
1289 next if $hotplug_error->($opt);
1290 check_protection($conf, "can't remove CT $vmid drive '$opt'");
1291 my $mp = parse_ct_mountpoint($conf->{$opt});
1292 delete $conf->{$opt};
1293 if ($mp->{type} eq 'volume' && !is_volume_in_use($conf, $mp->{volume})) {
1294 add_unused_volume($conf, $mp->{volume});
1295 }
1296 } elsif ($opt eq 'unprivileged') {
1297 die "unable to delete read-only option: '$opt'\n";
1298 } else {
1299 die "implement me (delete: $opt)"
1300 }
1301 write_config($vmid, $conf) if $running;
1302 }
1303 }
1304
1305 # There's no separate swap size to configure, there's memory and "total"
1306 # memory (iow. memory+swap). This means we have to change them together.
1307 my $wanted_memory = PVE::Tools::extract_param($param, 'memory');
1308 my $wanted_swap = PVE::Tools::extract_param($param, 'swap');
1309 if (defined($wanted_memory) || defined($wanted_swap)) {
1310
1311 my $old_memory = ($conf->{memory} || 512);
1312 my $old_swap = ($conf->{swap} || 0);
1313
1314 $wanted_memory //= $old_memory;
1315 $wanted_swap //= $old_swap;
1316
1317 my $total = $wanted_memory + $wanted_swap;
1318 if ($running) {
1319 my $old_total = $old_memory + $old_swap;
1320 if ($total > $old_total) {
1321 write_cgroup_value("memory", $vmid, "memory.memsw.limit_in_bytes", int($total*1024*1024));
1322 write_cgroup_value("memory", $vmid, "memory.limit_in_bytes", int($wanted_memory*1024*1024));
1323 } else {
1324 write_cgroup_value("memory", $vmid, "memory.limit_in_bytes", int($wanted_memory*1024*1024));
1325 write_cgroup_value("memory", $vmid, "memory.memsw.limit_in_bytes", int($total*1024*1024));
1326 }
1327 }
1328 $conf->{memory} = $wanted_memory;
1329 $conf->{swap} = $wanted_swap;
1330
1331 write_config($vmid, $conf) if $running;
1332 }
1333
1334 my $used_volids = {};
1335
1336 foreach my $opt (keys %$param) {
1337 my $value = $param->{$opt};
1338 if ($opt eq 'hostname') {
1339 $conf->{$opt} = $value;
1340 } elsif ($opt eq 'onboot') {
1341 $conf->{$opt} = $value ? 1 : 0;
1342 } elsif ($opt eq 'startup') {
1343 $conf->{$opt} = $value;
1344 } elsif ($opt eq 'tty' || $opt eq 'console' || $opt eq 'cmode') {
1345 next if $hotplug_error->($opt);
1346 $conf->{$opt} = $value;
1347 } elsif ($opt eq 'nameserver') {
1348 next if $hotplug_error->($opt);
1349 my $list = verify_nameserver_list($value);
1350 $conf->{$opt} = $list;
1351 } elsif ($opt eq 'searchdomain') {
1352 next if $hotplug_error->($opt);
1353 my $list = verify_searchdomain_list($value);
1354 $conf->{$opt} = $list;
1355 } elsif ($opt eq 'cpulimit') {
1356 next if $hotplug_error->($opt); # FIXME: hotplug
1357 $conf->{$opt} = $value;
1358 } elsif ($opt eq 'cpuunits') {
1359 $conf->{$opt} = $value;
1360 write_cgroup_value("cpu", $vmid, "cpu.shares", $value);
1361 } elsif ($opt eq 'description') {
1362 $conf->{$opt} = PVE::Tools::encode_text($value);
1363 } elsif ($opt =~ m/^net(\d+)$/) {
1364 my $netid = $1;
1365 my $net = parse_lxc_network($value);
1366 if (!$running) {
1367 $conf->{$opt} = print_lxc_network($net);
1368 } else {
1369 update_net($vmid, $conf, $opt, $net, $netid, $rootdir);
1370 }
1371 } elsif ($opt eq 'protection') {
1372 $conf->{$opt} = $value ? 1 : 0;
1373 } elsif ($opt =~ m/^mp(\d+)$/) {
1374 next if $hotplug_error->($opt);
1375 check_protection($conf, "can't update CT $vmid drive '$opt'");
1376 my $old = $conf->{$opt};
1377 $conf->{$opt} = $value;
1378 if (defined($old)) {
1379 my $mp = parse_ct_mountpoint($old);
1380 if ($mp->{type} eq 'volume' && !is_volume_in_use($conf, $mp->{volume})) {
1381 add_unused_volume($conf, $mp->{volume});
1382 }
1383 }
1384 $new_disks = 1;
1385 my $mp = parse_ct_mountpoint($value);
1386 $used_volids->{$mp->{volume}} = 1;
1387 } elsif ($opt eq 'rootfs') {
1388 next if $hotplug_error->($opt);
1389 check_protection($conf, "can't update CT $vmid drive '$opt'");
1390 my $old = $conf->{$opt};
1391 $conf->{$opt} = $value;
1392 if (defined($old)) {
1393 my $mp = parse_ct_rootfs($old);
1394 if ($mp->{type} eq 'volume' && !is_volume_in_use($conf, $mp->{volume})) {
1395 add_unused_volume($conf, $mp->{volume});
1396 }
1397 }
1398 my $mp = parse_ct_rootfs($value);
1399 $used_volids->{$mp->{volume}} = 1;
1400 } elsif ($opt eq 'unprivileged') {
1401 die "unable to modify read-only option: '$opt'\n";
1402 } else {
1403 die "implement me: $opt";
1404 }
1405 write_config($vmid, $conf) if $running;
1406 }
1407
1408 # Cleanup config:
1409
1410 # Remove unused disks after re-adding
1411 foreach my $key (keys %$conf) {
1412 next if $key !~ /^unused\d+/;
1413 my $volid = $conf->{$key};
1414 if ($used_volids->{$volid}) {
1415 delete $conf->{$key};
1416 }
1417 }
1418
1419 # Apply deletions and creations of new volumes
1420 if (@deleted_volumes) {
1421 my $storage_cfg = PVE::Storage::config();
1422 foreach my $volume (@deleted_volumes) {
1423 next if $used_volids->{$volume}; # could have been re-added, too
1424 delete_mountpoint_volume($storage_cfg, $vmid, $volume);
1425 }
1426 }
1427
1428 if ($new_disks) {
1429 my $storage_cfg = PVE::Storage::config();
1430 create_disks($storage_cfg, $vmid, $conf, $conf);
1431 }
1432
1433 # This should be the last thing we do here
1434 if ($running && scalar(@nohotplug)) {
1435 die "unable to modify " . join(',', @nohotplug) . " while container is running\n";
1436 }
1437 }
1438
1439 sub has_dev_console {
1440 my ($conf) = @_;
1441
1442 return !(defined($conf->{console}) && !$conf->{console});
1443 }
1444
1445 sub get_tty_count {
1446 my ($conf) = @_;
1447
1448 return $conf->{tty} // $confdesc->{tty}->{default};
1449 }
1450
1451 sub get_cmode {
1452 my ($conf) = @_;
1453
1454 return $conf->{cmode} // $confdesc->{cmode}->{default};
1455 }
1456
1457 sub get_console_command {
1458 my ($vmid, $conf) = @_;
1459
1460 my $cmode = get_cmode($conf);
1461
1462 if ($cmode eq 'console') {
1463 return ['lxc-console', '-n', $vmid, '-t', 0];
1464 } elsif ($cmode eq 'tty') {
1465 return ['lxc-console', '-n', $vmid];
1466 } elsif ($cmode eq 'shell') {
1467 return ['lxc-attach', '--clear-env', '-n', $vmid];
1468 } else {
1469 die "internal error";
1470 }
1471 }
1472
1473 sub get_primary_ips {
1474 my ($conf) = @_;
1475
1476 # return data from net0
1477
1478 return undef if !defined($conf->{net0});
1479 my $net = parse_lxc_network($conf->{net0});
1480
1481 my $ipv4 = $net->{ip};
1482 if ($ipv4) {
1483 if ($ipv4 =~ /^(dhcp|manual)$/) {
1484 $ipv4 = undef
1485 } else {
1486 $ipv4 =~ s!/\d+$!!;
1487 }
1488 }
1489 my $ipv6 = $net->{ip6};
1490 if ($ipv6) {
1491 if ($ipv6 =~ /^(auto|dhcp|manual)$/) {
1492 $ipv6 = undef;
1493 } else {
1494 $ipv6 =~ s!/\d+$!!;
1495 }
1496 }
1497
1498 return ($ipv4, $ipv6);
1499 }
1500
1501 sub delete_mountpoint_volume {
1502 my ($storage_cfg, $vmid, $volume) = @_;
1503
1504 return if classify_mountpoint($volume) ne 'volume';
1505
1506 my ($vtype, $name, $owner) = PVE::Storage::parse_volname($storage_cfg, $volume);
1507 PVE::Storage::vdisk_free($storage_cfg, $volume) if $vmid == $owner;
1508 }
1509
1510 sub destroy_lxc_container {
1511 my ($storage_cfg, $vmid, $conf) = @_;
1512
1513 foreach_mountpoint($conf, sub {
1514 my ($ms, $mountpoint) = @_;
1515 delete_mountpoint_volume($storage_cfg, $vmid, $mountpoint->{volume});
1516 });
1517
1518 rmdir "/var/lib/lxc/$vmid/rootfs";
1519 unlink "/var/lib/lxc/$vmid/config";
1520 rmdir "/var/lib/lxc/$vmid";
1521 destroy_config($vmid);
1522
1523 #my $cmd = ['lxc-destroy', '-n', $vmid ];
1524 #PVE::Tools::run_command($cmd);
1525 }
1526
1527 sub vm_stop_cleanup {
1528 my ($storage_cfg, $vmid, $conf, $keepActive) = @_;
1529
1530 eval {
1531 if (!$keepActive) {
1532
1533 my $vollist = get_vm_volumes($conf);
1534 PVE::Storage::deactivate_volumes($storage_cfg, $vollist);
1535 }
1536 };
1537 warn $@ if $@; # avoid errors - just warn
1538 }
1539
1540 my $safe_num_ne = sub {
1541 my ($a, $b) = @_;
1542
1543 return 0 if !defined($a) && !defined($b);
1544 return 1 if !defined($a);
1545 return 1 if !defined($b);
1546
1547 return $a != $b;
1548 };
1549
1550 my $safe_string_ne = sub {
1551 my ($a, $b) = @_;
1552
1553 return 0 if !defined($a) && !defined($b);
1554 return 1 if !defined($a);
1555 return 1 if !defined($b);
1556
1557 return $a ne $b;
1558 };
1559
1560 sub update_net {
1561 my ($vmid, $conf, $opt, $newnet, $netid, $rootdir) = @_;
1562
1563 if ($newnet->{type} ne 'veth') {
1564 # for when there are physical interfaces
1565 die "cannot update interface of type $newnet->{type}";
1566 }
1567
1568 my $veth = "veth${vmid}i${netid}";
1569 my $eth = $newnet->{name};
1570
1571 if (my $oldnetcfg = $conf->{$opt}) {
1572 my $oldnet = parse_lxc_network($oldnetcfg);
1573
1574 if (&$safe_string_ne($oldnet->{hwaddr}, $newnet->{hwaddr}) ||
1575 &$safe_string_ne($oldnet->{name}, $newnet->{name})) {
1576
1577 PVE::Network::veth_delete($veth);
1578 delete $conf->{$opt};
1579 write_config($vmid, $conf);
1580
1581 hotplug_net($vmid, $conf, $opt, $newnet, $netid);
1582
1583 } elsif (&$safe_string_ne($oldnet->{bridge}, $newnet->{bridge}) ||
1584 &$safe_num_ne($oldnet->{tag}, $newnet->{tag}) ||
1585 &$safe_num_ne($oldnet->{firewall}, $newnet->{firewall})) {
1586
1587 if ($oldnet->{bridge}) {
1588 PVE::Network::tap_unplug($veth);
1589 foreach (qw(bridge tag firewall)) {
1590 delete $oldnet->{$_};
1591 }
1592 $conf->{$opt} = print_lxc_network($oldnet);
1593 write_config($vmid, $conf);
1594 }
1595
1596 PVE::Network::tap_plug($veth, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks});
1597 foreach (qw(bridge tag firewall)) {
1598 $oldnet->{$_} = $newnet->{$_} if $newnet->{$_};
1599 }
1600 $conf->{$opt} = print_lxc_network($oldnet);
1601 write_config($vmid, $conf);
1602 }
1603 } else {
1604 hotplug_net($vmid, $conf, $opt, $newnet, $netid);
1605 }
1606
1607 update_ipconfig($vmid, $conf, $opt, $eth, $newnet, $rootdir);
1608 }
1609
1610 sub hotplug_net {
1611 my ($vmid, $conf, $opt, $newnet, $netid) = @_;
1612
1613 my $veth = "veth${vmid}i${netid}";
1614 my $vethpeer = $veth . "p";
1615 my $eth = $newnet->{name};
1616
1617 PVE::Network::veth_create($veth, $vethpeer, $newnet->{bridge}, $newnet->{hwaddr});
1618 PVE::Network::tap_plug($veth, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks});
1619
1620 # attach peer in container
1621 my $cmd = ['lxc-device', '-n', $vmid, 'add', $vethpeer, "$eth" ];
1622 PVE::Tools::run_command($cmd);
1623
1624 # link up peer in container
1625 $cmd = ['lxc-attach', '-n', $vmid, '-s', 'NETWORK', '--', '/sbin/ip', 'link', 'set', $eth ,'up' ];
1626 PVE::Tools::run_command($cmd);
1627
1628 my $done = { type => 'veth' };
1629 foreach (qw(bridge tag firewall hwaddr name)) {
1630 $done->{$_} = $newnet->{$_} if $newnet->{$_};
1631 }
1632 $conf->{$opt} = print_lxc_network($done);
1633
1634 write_config($vmid, $conf);
1635 }
1636
1637 sub update_ipconfig {
1638 my ($vmid, $conf, $opt, $eth, $newnet, $rootdir) = @_;
1639
1640 my $lxc_setup = PVE::LXC::Setup->new($conf, $rootdir);
1641
1642 my $optdata = parse_lxc_network($conf->{$opt});
1643 my $deleted = [];
1644 my $added = [];
1645 my $nscmd = sub {
1646 my $cmdargs = shift;
1647 PVE::Tools::run_command(['lxc-attach', '-n', $vmid, '-s', 'NETWORK', '--', @_], %$cmdargs);
1648 };
1649 my $ipcmd = sub { &$nscmd({}, '/sbin/ip', @_) };
1650
1651 my $change_ip_config = sub {
1652 my ($ipversion) = @_;
1653
1654 my $family_opt = "-$ipversion";
1655 my $suffix = $ipversion == 4 ? '' : $ipversion;
1656 my $gw= "gw$suffix";
1657 my $ip= "ip$suffix";
1658
1659 my $newip = $newnet->{$ip};
1660 my $newgw = $newnet->{$gw};
1661 my $oldip = $optdata->{$ip};
1662
1663 my $change_ip = &$safe_string_ne($oldip, $newip);
1664 my $change_gw = &$safe_string_ne($optdata->{$gw}, $newgw);
1665
1666 return if !$change_ip && !$change_gw;
1667
1668 # step 1: add new IP, if this fails we cancel
1669 my $is_real_ip = ($newip && $newip !~ /^(?:auto|dhcp|manual)$/);
1670 if ($change_ip && $is_real_ip) {
1671 eval { &$ipcmd($family_opt, 'addr', 'add', $newip, 'dev', $eth); };
1672 if (my $err = $@) {
1673 warn $err;
1674 return;
1675 }
1676 }
1677
1678 # step 2: replace gateway
1679 # If this fails we delete the added IP and cancel.
1680 # If it succeeds we save the config and delete the old IP, ignoring
1681 # errors. The config is then saved.
1682 # Note: 'ip route replace' can add
1683 if ($change_gw) {
1684 if ($newgw) {
1685 eval {
1686 if ($is_real_ip && !PVE::Network::is_ip_in_cidr($newgw, $newip, $ipversion)) {
1687 &$ipcmd($family_opt, 'route', 'add', $newgw, 'dev', $eth);
1688 }
1689 &$ipcmd($family_opt, 'route', 'replace', 'default', 'via', $newgw);
1690 };
1691 if (my $err = $@) {
1692 warn $err;
1693 # the route was not replaced, the old IP is still available
1694 # rollback (delete new IP) and cancel
1695 if ($change_ip) {
1696 eval { &$ipcmd($family_opt, 'addr', 'del', $newip, 'dev', $eth); };
1697 warn $@ if $@; # no need to die here
1698 }
1699 return;
1700 }
1701 } else {
1702 eval { &$ipcmd($family_opt, 'route', 'del', 'default'); };
1703 # if the route was not deleted, the guest might have deleted it manually
1704 # warn and continue
1705 warn $@ if $@;
1706 }
1707 }
1708
1709 # from this point on we save the configuration
1710 # step 3: delete old IP ignoring errors
1711 if ($change_ip && $oldip && $oldip !~ /^(?:auto|dhcp)$/) {
1712 # We need to enable promote_secondaries, otherwise our newly added
1713 # address will be removed along with the old one.
1714 my $promote = 0;
1715 eval {
1716 if ($ipversion == 4) {
1717 &$nscmd({ outfunc => sub { $promote = int(shift) } },
1718 'cat', "/proc/sys/net/ipv4/conf/$eth/promote_secondaries");
1719 &$nscmd({}, 'sysctl', "net.ipv4.conf.$eth.promote_secondaries=1");
1720 }
1721 &$ipcmd($family_opt, 'addr', 'del', $oldip, 'dev', $eth);
1722 };
1723 warn $@ if $@; # no need to die here
1724
1725 if ($ipversion == 4) {
1726 &$nscmd({}, 'sysctl', "net.ipv4.conf.$eth.promote_secondaries=$promote");
1727 }
1728 }
1729
1730 foreach my $property ($ip, $gw) {
1731 if ($newnet->{$property}) {
1732 $optdata->{$property} = $newnet->{$property};
1733 } else {
1734 delete $optdata->{$property};
1735 }
1736 }
1737 $conf->{$opt} = print_lxc_network($optdata);
1738 write_config($vmid, $conf);
1739 $lxc_setup->setup_network($conf);
1740 };
1741
1742 &$change_ip_config(4);
1743 &$change_ip_config(6);
1744
1745 }
1746
1747 # Internal snapshots
1748
1749 # NOTE: Snapshot create/delete involves several non-atomic
1750 # actions, and can take a long time.
1751 # So we try to avoid locking the file and use the 'lock' variable
1752 # inside the config file instead.
1753
1754 my $snapshot_copy_config = sub {
1755 my ($source, $dest) = @_;
1756
1757 foreach my $k (keys %$source) {
1758 next if $k eq 'snapshots';
1759 next if $k eq 'snapstate';
1760 next if $k eq 'snaptime';
1761 next if $k eq 'vmstate';
1762 next if $k eq 'lock';
1763 next if $k eq 'digest';
1764 next if $k eq 'description';
1765 next if $k =~ m/^unused\d+$/;
1766
1767 $dest->{$k} = $source->{$k};
1768 }
1769 };
1770
1771 my $snapshot_apply_config = sub {
1772 my ($conf, $snap) = @_;
1773
1774 # copy snapshot list
1775 my $newconf = {
1776 snapshots => $conf->{snapshots},
1777 };
1778
1779 # keep description and list of unused disks
1780 foreach my $k (keys %$conf) {
1781 next if !($k =~ m/^unused\d+$/ || $k eq 'description');
1782 $newconf->{$k} = $conf->{$k};
1783 }
1784
1785 &$snapshot_copy_config($snap, $newconf);
1786
1787 return $newconf;
1788 };
1789
1790 my $snapshot_save_vmstate = sub {
1791 die "implement me - snapshot_save_vmstate\n";
1792 };
1793
1794 sub snapshot_prepare {
1795 my ($vmid, $snapname, $save_vmstate, $comment) = @_;
1796
1797 my $snap;
1798
1799 my $updatefn = sub {
1800
1801 my $conf = load_config($vmid);
1802
1803 die "you can't take a snapshot if it's a template\n"
1804 if is_template($conf);
1805
1806 check_lock($conf);
1807
1808 $conf->{lock} = 'snapshot';
1809
1810 die "snapshot name '$snapname' already used\n"
1811 if defined($conf->{snapshots}->{$snapname});
1812
1813 my $storecfg = PVE::Storage::config();
1814
1815 # workaround until mp snapshots are implemented
1816 my $feature = $snapname eq 'vzdump' ? 'vzdump' : 'snapshot';
1817 die "snapshot feature is not available\n" if !has_feature($feature, $conf, $storecfg);
1818
1819 $snap = $conf->{snapshots}->{$snapname} = {};
1820
1821 if ($save_vmstate && check_running($vmid)) {
1822 &$snapshot_save_vmstate($vmid, $conf, $snapname, $storecfg);
1823 }
1824
1825 &$snapshot_copy_config($conf, $snap);
1826
1827 $snap->{snapstate} = "prepare";
1828 $snap->{snaptime} = time();
1829 $snap->{description} = $comment if $comment;
1830
1831 write_config($vmid, $conf);
1832 };
1833
1834 lock_config($vmid, $updatefn);
1835
1836 return $snap;
1837 }
1838
1839 sub snapshot_commit {
1840 my ($vmid, $snapname) = @_;
1841
1842 my $updatefn = sub {
1843
1844 my $conf = load_config($vmid);
1845
1846 die "missing snapshot lock\n"
1847 if !($conf->{lock} && $conf->{lock} eq 'snapshot');
1848
1849 my $snap = $conf->{snapshots}->{$snapname};
1850 die "snapshot '$snapname' does not exist\n" if !defined($snap);
1851
1852 die "wrong snapshot state\n"
1853 if !($snap->{snapstate} && $snap->{snapstate} eq "prepare");
1854
1855 delete $snap->{snapstate};
1856 delete $conf->{lock};
1857
1858 my $newconf = &$snapshot_apply_config($conf, $snap);
1859
1860 $newconf->{parent} = $snapname;
1861
1862 write_config($vmid, $newconf);
1863 };
1864
1865 lock_config($vmid, $updatefn);
1866 }
1867
1868 sub has_feature {
1869 my ($feature, $conf, $storecfg, $snapname) = @_;
1870
1871 my $err;
1872 my $vzdump = $feature eq 'vzdump';
1873 $feature = 'snapshot' if $vzdump;
1874
1875 foreach_mountpoint($conf, sub {
1876 my ($ms, $mountpoint) = @_;
1877
1878 return if $err; # skip further test
1879 return if $vzdump && $ms ne 'rootfs' && !$mountpoint->{backup};
1880
1881 $err = 1 if !PVE::Storage::volume_has_feature($storecfg, $feature, $mountpoint->{volume}, $snapname);
1882
1883 # TODO: implement support for mountpoints
1884 die "unable to handle mountpoint '$ms' - feature not implemented\n"
1885 if $ms ne 'rootfs';
1886 });
1887
1888 return $err ? 0 : 1;
1889 }
1890
1891 my $enter_namespace = sub {
1892 my ($vmid, $pid, $which, $type) = @_;
1893 sysopen my $fd, "/proc/$pid/ns/$which", O_RDONLY
1894 or die "failed to open $which namespace of container $vmid: $!\n";
1895 PVE::Tools::setns(fileno($fd), $type)
1896 or die "failed to enter $which namespace of container $vmid: $!\n";
1897 close $fd;
1898 };
1899
1900 my $do_syncfs = sub {
1901 my ($vmid, $pid, $socket) = @_;
1902
1903 &$enter_namespace($vmid, $pid, 'mnt', PVE::Tools::CLONE_NEWNS);
1904
1905 # Tell the parent process to start reading our /proc/mounts
1906 print {$socket} "go\n";
1907 $socket->flush();
1908
1909 # Receive /proc/self/mounts
1910 my $mountdata = do { local $/ = undef; <$socket> };
1911 close $socket;
1912
1913 # Now sync all mountpoints...
1914 my $mounts = PVE::ProcFSTools::parse_mounts($mountdata);
1915 foreach my $mp (@$mounts) {
1916 my ($what, $dir, $fs) = @$mp;
1917 next if $fs eq 'fuse.lxcfs';
1918 eval { PVE::Tools::sync_mountpoint($dir); };
1919 warn $@ if $@;
1920 }
1921 };
1922
1923 sub sync_container_namespace {
1924 my ($vmid) = @_;
1925 my $pid = find_lxc_pid($vmid);
1926
1927 # SOCK_DGRAM is nicer for barriers but cannot be slurped
1928 socketpair my $pfd, my $cfd, AF_UNIX, SOCK_STREAM, PF_UNSPEC
1929 or die "failed to create socketpair: $!\n";
1930
1931 my $child = fork();
1932 die "fork failed: $!\n" if !defined($child);
1933
1934 if (!$child) {
1935 eval {
1936 close $pfd;
1937 &$do_syncfs($vmid, $pid, $cfd);
1938 };
1939 if (my $err = $@) {
1940 warn $err;
1941 POSIX::_exit(1);
1942 }
1943 POSIX::_exit(0);
1944 }
1945 close $cfd;
1946 my $go = <$pfd>;
1947 die "failed to enter container namespace\n" if $go ne "go\n";
1948
1949 open my $mounts, '<', "/proc/$child/mounts"
1950 or die "failed to open container's /proc/mounts: $!\n";
1951 my $mountdata = do { local $/ = undef; <$mounts> };
1952 close $mounts;
1953 print {$pfd} $mountdata;
1954 close $pfd;
1955
1956 while (waitpid($child, 0) != $child) {}
1957 die "failed to sync container namespace\n" if $? != 0;
1958 }
1959
1960 sub snapshot_create {
1961 my ($vmid, $snapname, $save_vmstate, $comment) = @_;
1962
1963 my $snap = snapshot_prepare($vmid, $snapname, $save_vmstate, $comment);
1964
1965 my $conf = load_config($vmid);
1966
1967 my $running = check_running($vmid);
1968
1969 my $unfreeze = 0;
1970
1971 my $drivehash = {};
1972
1973 eval {
1974 if ($running) {
1975 $unfreeze = 1;
1976 PVE::Tools::run_command(['/usr/bin/lxc-freeze', '-n', $vmid]);
1977 sync_container_namespace($vmid);
1978 };
1979
1980 my $storecfg = PVE::Storage::config();
1981 my $rootinfo = parse_ct_rootfs($conf->{rootfs});
1982 my $volid = $rootinfo->{volume};
1983
1984 PVE::Storage::volume_snapshot($storecfg, $volid, $snapname);
1985 $drivehash->{rootfs} = 1;
1986 };
1987 my $err = $@;
1988
1989 if ($unfreeze) {
1990 eval { PVE::Tools::run_command(['/usr/bin/lxc-unfreeze', '-n', $vmid]); };
1991 warn $@ if $@;
1992 }
1993
1994 if ($err) {
1995 eval { snapshot_delete($vmid, $snapname, 1, $drivehash); };
1996 warn "$@\n" if $@;
1997 die "$err\n";
1998 }
1999
2000 snapshot_commit($vmid, $snapname);
2001 }
2002
2003 # Note: $drivehash is only set when called from snapshot_create.
2004 sub snapshot_delete {
2005 my ($vmid, $snapname, $force, $drivehash) = @_;
2006
2007 my $prepare = 1;
2008
2009 my $snap;
2010
2011 my $unlink_parent = sub {
2012 my ($confref, $new_parent) = @_;
2013
2014 if ($confref->{parent} && $confref->{parent} eq $snapname) {
2015 if ($new_parent) {
2016 $confref->{parent} = $new_parent;
2017 } else {
2018 delete $confref->{parent};
2019 }
2020 }
2021 };
2022
2023 my $updatefn = sub {
2024 my ($remove_drive) = @_;
2025
2026 my $conf = load_config($vmid);
2027
2028 if (!$drivehash) {
2029 check_lock($conf);
2030 die "you can't delete a snapshot if vm is a template\n"
2031 if is_template($conf);
2032 }
2033
2034 $snap = $conf->{snapshots}->{$snapname};
2035
2036 die "snapshot '$snapname' does not exist\n" if !defined($snap);
2037
2038 # remove parent refs
2039 if (!$prepare) {
2040 &$unlink_parent($conf, $snap->{parent});
2041 foreach my $sn (keys %{$conf->{snapshots}}) {
2042 next if $sn eq $snapname;
2043 &$unlink_parent($conf->{snapshots}->{$sn}, $snap->{parent});
2044 }
2045 }
2046
2047 if ($remove_drive) {
2048 if ($remove_drive eq 'vmstate') {
2049 die "implement me - saving vmstate\n";
2050 } else {
2051 die "implement me - remove drive\n";
2052 }
2053 }
2054
2055 if ($prepare) {
2056 $snap->{snapstate} = 'delete';
2057 } else {
2058 delete $conf->{snapshots}->{$snapname};
2059 delete $conf->{lock} if $drivehash;
2060 }
2061
2062 write_config($vmid, $conf);
2063 };
2064
2065 lock_config($vmid, $updatefn);
2066
2067 # now remove vmstate file
2068 # never set for LXC!
2069 my $storecfg = PVE::Storage::config();
2070
2071 if ($snap->{vmstate}) {
2072 die "implement me - saving vmstate\n";
2073 };
2074
2075 # now remove all volume snapshots
2076 # only rootfs for now!
2077 eval {
2078 my $rootfs = $snap->{rootfs};
2079 my $rootinfo = parse_ct_rootfs($rootfs);
2080 my $volid = $rootinfo->{volume};
2081 PVE::Storage::volume_snapshot_delete($storecfg, $volid, $snapname);
2082 };
2083 if (my $err = $@) {
2084 die $err if !$force;
2085 warn $err;
2086 }
2087
2088 # now cleanup config
2089 $prepare = 0;
2090 lock_config($vmid, $updatefn);
2091 }
2092
2093 sub snapshot_rollback {
2094 my ($vmid, $snapname) = @_;
2095
2096 my $prepare = 1;
2097
2098 my $storecfg = PVE::Storage::config();
2099
2100 my $conf = load_config($vmid);
2101
2102 my $get_snapshot_config = sub {
2103
2104 die "you can't rollback if vm is a template\n" if is_template($conf);
2105
2106 my $res = $conf->{snapshots}->{$snapname};
2107
2108 die "snapshot '$snapname' does not exist\n" if !defined($res);
2109
2110 return $res;
2111 };
2112
2113 my $snap = &$get_snapshot_config();
2114
2115 # only for rootfs for now!
2116 my $rootfs = $snap->{rootfs};
2117 my $rootinfo = parse_ct_rootfs($rootfs);
2118 my $volid = $rootinfo->{volume};
2119
2120 PVE::Storage::volume_rollback_is_possible($storecfg, $volid, $snapname);
2121
2122 my $updatefn = sub {
2123
2124 $conf = load_config($vmid);
2125
2126 $snap = &$get_snapshot_config();
2127
2128 die "unable to rollback to incomplete snapshot (snapstate = $snap->{snapstate})\n"
2129 if $snap->{snapstate};
2130
2131 if ($prepare) {
2132 check_lock($conf);
2133 system("lxc-stop -n $vmid --kill") if check_running($vmid);
2134 }
2135
2136 die "unable to rollback vm $vmid: vm is running\n"
2137 if check_running($vmid);
2138
2139 if ($prepare) {
2140 $conf->{lock} = 'rollback';
2141 } else {
2142 die "got wrong lock\n" if !($conf->{lock} && $conf->{lock} eq 'rollback');
2143 delete $conf->{lock};
2144 }
2145
2146 my $forcemachine;
2147
2148 if (!$prepare) {
2149 # copy snapshot config to current config
2150 $conf = &$snapshot_apply_config($conf, $snap);
2151 $conf->{parent} = $snapname;
2152 }
2153
2154 write_config($vmid, $conf);
2155
2156 if (!$prepare && $snap->{vmstate}) {
2157 die "implement me - save vmstate";
2158 }
2159 };
2160
2161 lock_config($vmid, $updatefn);
2162
2163 # only rootfs for now!
2164 PVE::Storage::volume_snapshot_rollback($storecfg, $volid, $snapname);
2165
2166 $prepare = 0;
2167 lock_config($vmid, $updatefn);
2168 }
2169
2170 sub template_create {
2171 my ($vmid, $conf) = @_;
2172
2173 my $storecfg = PVE::Storage::config();
2174
2175 my $rootinfo = parse_ct_rootfs($conf->{rootfs});
2176 my $volid = $rootinfo->{volume};
2177
2178 die "Template feature is not available for '$volid'\n"
2179 if !PVE::Storage::volume_has_feature($storecfg, 'template', $volid);
2180
2181 PVE::Storage::activate_volumes($storecfg, [$volid]);
2182
2183 my $template_volid = PVE::Storage::vdisk_create_base($storecfg, $volid);
2184 $rootinfo->{volume} = $template_volid;
2185 $conf->{rootfs} = print_ct_mountpoint($rootinfo, 1);
2186
2187 write_config($vmid, $conf);
2188 }
2189
2190 sub is_template {
2191 my ($conf) = @_;
2192
2193 return 1 if defined $conf->{template} && $conf->{template} == 1;
2194 }
2195
2196 sub mountpoint_names {
2197 my ($reverse) = @_;
2198
2199 my @names = ('rootfs');
2200
2201 for (my $i = 0; $i < $MAX_MOUNT_POINTS; $i++) {
2202 push @names, "mp$i";
2203 }
2204
2205 return $reverse ? reverse @names : @names;
2206 }
2207
2208
2209 sub foreach_mountpoint_full {
2210 my ($conf, $reverse, $func) = @_;
2211
2212 foreach my $key (mountpoint_names($reverse)) {
2213 my $value = $conf->{$key};
2214 next if !defined($value);
2215 my $mountpoint = $key eq 'rootfs' ? parse_ct_rootfs($value, 1) : parse_ct_mountpoint($value, 1);
2216 next if !defined($mountpoint);
2217
2218 &$func($key, $mountpoint);
2219 }
2220 }
2221
2222 sub foreach_mountpoint {
2223 my ($conf, $func) = @_;
2224
2225 foreach_mountpoint_full($conf, 0, $func);
2226 }
2227
2228 sub foreach_mountpoint_reverse {
2229 my ($conf, $func) = @_;
2230
2231 foreach_mountpoint_full($conf, 1, $func);
2232 }
2233
2234 sub check_ct_modify_config_perm {
2235 my ($rpcenv, $authuser, $vmid, $pool, $key_list) = @_;
2236
2237 return 1 if $authuser ne 'root@pam';
2238
2239 foreach my $opt (@$key_list) {
2240
2241 if ($opt eq 'cpus' || $opt eq 'cpuunits' || $opt eq 'cpulimit') {
2242 $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.CPU']);
2243 } elsif ($opt eq 'rootfs' || $opt =~ /^mp\d+$/) {
2244 $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Disk']);
2245 } elsif ($opt eq 'memory' || $opt eq 'swap') {
2246 $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Memory']);
2247 } elsif ($opt =~ m/^net\d+$/ || $opt eq 'nameserver' ||
2248 $opt eq 'searchdomain' || $opt eq 'hostname') {
2249 $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Network']);
2250 } else {
2251 $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Options']);
2252 }
2253 }
2254
2255 return 1;
2256 }
2257
2258 sub umount_all {
2259 my ($vmid, $storage_cfg, $conf, $noerr) = @_;
2260
2261 my $rootdir = "/var/lib/lxc/$vmid/rootfs";
2262 my $volid_list = get_vm_volumes($conf);
2263
2264 foreach_mountpoint_reverse($conf, sub {
2265 my ($ms, $mountpoint) = @_;
2266
2267 my $volid = $mountpoint->{volume};
2268 my $mount = $mountpoint->{mp};
2269
2270 return if !$volid || !$mount;
2271
2272 my $mount_path = "$rootdir/$mount";
2273 $mount_path =~ s!/+!/!g;
2274
2275 return if !PVE::ProcFSTools::is_mounted($mount_path);
2276
2277 eval {
2278 PVE::Tools::run_command(['umount', '-d', $mount_path]);
2279 };
2280 if (my $err = $@) {
2281 if ($noerr) {
2282 warn $err;
2283 } else {
2284 die $err;
2285 }
2286 }
2287 });
2288 }
2289
2290 sub mount_all {
2291 my ($vmid, $storage_cfg, $conf) = @_;
2292
2293 my $rootdir = "/var/lib/lxc/$vmid/rootfs";
2294 File::Path::make_path($rootdir);
2295
2296 my $volid_list = get_vm_volumes($conf);
2297 PVE::Storage::activate_volumes($storage_cfg, $volid_list);
2298
2299 eval {
2300 foreach_mountpoint($conf, sub {
2301 my ($ms, $mountpoint) = @_;
2302
2303 mountpoint_mount($mountpoint, $rootdir, $storage_cfg);
2304 });
2305 };
2306 if (my $err = $@) {
2307 warn "mounting container failed\n";
2308 umount_all($vmid, $storage_cfg, $conf, 1);
2309 die $err;
2310 }
2311
2312 return $rootdir;
2313 }
2314
2315
2316 sub mountpoint_mount_path {
2317 my ($mountpoint, $storage_cfg, $snapname) = @_;
2318
2319 return mountpoint_mount($mountpoint, undef, $storage_cfg, $snapname);
2320 }
2321
2322 my $check_mount_path = sub {
2323 my ($path) = @_;
2324 $path = File::Spec->canonpath($path);
2325 my $real = Cwd::realpath($path);
2326 if ($real ne $path) {
2327 die "mount path modified by symlink: $path != $real";
2328 }
2329 };
2330
2331 sub query_loopdev {
2332 my ($path) = @_;
2333 my $found;
2334 my $parser = sub {
2335 my $line = shift;
2336 if ($line =~ m@^(/dev/loop\d+):@) {
2337 $found = $1;
2338 }
2339 };
2340 my $cmd = ['losetup', '--associated', $path];
2341 PVE::Tools::run_command($cmd, outfunc => $parser);
2342 return $found;
2343 }
2344
2345 # Run a function with a file attached to a loop device.
2346 # The loop device is always detached afterwards (or set to autoclear).
2347 # Returns the loop device.
2348 sub run_with_loopdev {
2349 my ($func, $file) = @_;
2350 my $device;
2351 my $parser = sub {
2352 my $line = shift;
2353 if ($line =~ m@^(/dev/loop\d+)$@) {
2354 $device = $1;
2355 }
2356 };
2357 PVE::Tools::run_command(['losetup', '--show', '-f', $file], outfunc => $parser);
2358 die "failed to setup loop device for $file\n" if !$device;
2359 eval { &$func($device); };
2360 my $err = $@;
2361 PVE::Tools::run_command(['losetup', '-d', $device]);
2362 die $err if $err;
2363 return $device;
2364 }
2365
2366 sub bindmount {
2367 my ($dir, $dest, $ro, @extra_opts) = @_;
2368 PVE::Tools::run_command(['mount', '-o', 'bind', @extra_opts, $dir, $dest]);
2369 if ($ro) {
2370 eval { PVE::Tools::run_command(['mount', '-o', 'bind,remount,ro', $dest]); };
2371 if (my $err = $@) {
2372 warn "bindmount error\n";
2373 # don't leave writable bind-mounts behind...
2374 PVE::Tools::run_command(['umount', $dest]);
2375 die $err;
2376 }
2377 }
2378 }
2379
2380 # use $rootdir = undef to just return the corresponding mount path
2381 sub mountpoint_mount {
2382 my ($mountpoint, $rootdir, $storage_cfg, $snapname) = @_;
2383
2384 my $volid = $mountpoint->{volume};
2385 my $mount = $mountpoint->{mp};
2386 my $type = $mountpoint->{type};
2387 my $quota = !$snapname && !$mountpoint->{ro} && $mountpoint->{quota};
2388 my $mounted_dev;
2389
2390 return if !$volid || !$mount;
2391
2392 my $mount_path;
2393
2394 if (defined($rootdir)) {
2395 $rootdir =~ s!/+$!!;
2396 $mount_path = "$rootdir/$mount";
2397 $mount_path =~ s!/+!/!g;
2398 &$check_mount_path($mount_path);
2399 File::Path::mkpath($mount_path);
2400 }
2401
2402 my ($storage, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2403
2404 die "unknown snapshot path for '$volid'" if !$storage && defined($snapname);
2405
2406 my $optstring = '';
2407 if (defined($mountpoint->{acl})) {
2408 $optstring .= ($mountpoint->{acl} ? 'acl' : 'noacl');
2409 }
2410 my $readonly = $mountpoint->{ro};
2411
2412 my @extra_opts = ('-o', $optstring);
2413
2414 if ($storage) {
2415
2416 my $scfg = PVE::Storage::storage_config($storage_cfg, $storage);
2417 my $path = PVE::Storage::path($storage_cfg, $volid, $snapname);
2418
2419 my ($vtype, undef, undef, undef, undef, $isBase, $format) =
2420 PVE::Storage::parse_volname($storage_cfg, $volid);
2421
2422 $format = 'iso' if $vtype eq 'iso'; # allow to handle iso files
2423
2424 if ($format eq 'subvol') {
2425 if ($mount_path) {
2426 if ($snapname) {
2427 if ($scfg->{type} eq 'zfspool') {
2428 my $path_arg = $path;
2429 $path_arg =~ s!^/+!!;
2430 PVE::Tools::run_command(['mount', '-o', 'ro', @extra_opts, '-t', 'zfs', $path_arg, $mount_path]);
2431 } else {
2432 die "cannot mount subvol snapshots for storage type '$scfg->{type}'\n";
2433 }
2434 } else {
2435 bindmount($path, $mount_path, $readonly, @extra_opts);
2436 warn "cannot enable quota control for bind mounted subvolumes\n" if $quota;
2437 }
2438 }
2439 return wantarray ? ($path, 0, $mounted_dev) : $path;
2440 } elsif ($format eq 'raw' || $format eq 'iso') {
2441 my $domount = sub {
2442 my ($path) = @_;
2443 if ($mount_path) {
2444 if ($format eq 'iso') {
2445 PVE::Tools::run_command(['mount', '-o', 'ro', @extra_opts, $path, $mount_path]);
2446 } elsif ($isBase || defined($snapname)) {
2447 PVE::Tools::run_command(['mount', '-o', 'ro,noload', @extra_opts, $path, $mount_path]);
2448 } else {
2449 if ($quota) {
2450 push @extra_opts, '-o', 'usrjquota=aquota.user,grpjquota=aquota.group,jqfmt=vfsv0';
2451 }
2452 push @extra_opts, '-o', 'ro' if $readonly;
2453 PVE::Tools::run_command(['mount', @extra_opts, $path, $mount_path]);
2454 }
2455 }
2456 };
2457 my $use_loopdev = 0;
2458 if ($scfg->{path}) {
2459 $mounted_dev = run_with_loopdev($domount, $path);
2460 $use_loopdev = 1;
2461 } elsif ($scfg->{type} eq 'drbd' || $scfg->{type} eq 'lvm' ||
2462 $scfg->{type} eq 'rbd' || $scfg->{type} eq 'lvmthin') {
2463 $mounted_dev = $path;
2464 &$domount($path);
2465 } else {
2466 die "unsupported storage type '$scfg->{type}'\n";
2467 }
2468 return wantarray ? ($path, $use_loopdev, $mounted_dev) : $path;
2469 } else {
2470 die "unsupported image format '$format'\n";
2471 }
2472 } elsif ($type eq 'device') {
2473 push @extra_opts, '-o', 'ro' if $readonly;
2474 PVE::Tools::run_command(['mount', @extra_opts, $volid, $mount_path]) if $mount_path;
2475 return wantarray ? ($volid, 0, $volid) : $volid;
2476 } elsif ($type eq 'bind') {
2477 die "directory '$volid' does not exist\n" if ! -d $volid;
2478 &$check_mount_path($volid);
2479 bindmount($volid, $mount_path, $readonly, @extra_opts) if $mount_path;
2480 warn "cannot enable quota control for bind mounts\n" if $quota;
2481 return wantarray ? ($volid, 0, undef) : $volid;
2482 }
2483
2484 die "unsupported storage";
2485 }
2486
2487 sub get_vm_volumes {
2488 my ($conf, $excludes) = @_;
2489
2490 my $vollist = [];
2491
2492 foreach_mountpoint($conf, sub {
2493 my ($ms, $mountpoint) = @_;
2494
2495 return if $excludes && $ms eq $excludes;
2496
2497 my $volid = $mountpoint->{volume};
2498
2499 return if !$volid || $mountpoint->{type} ne 'volume';
2500
2501 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2502 return if !$sid;
2503
2504 push @$vollist, $volid;
2505 });
2506
2507 return $vollist;
2508 }
2509
2510 sub mkfs {
2511 my ($dev, $rootuid, $rootgid) = @_;
2512
2513 PVE::Tools::run_command(['mkfs.ext4', '-O', 'mmp',
2514 '-E', "root_owner=$rootuid:$rootgid",
2515 $dev]);
2516 }
2517
2518 sub format_disk {
2519 my ($storage_cfg, $volid, $rootuid, $rootgid) = @_;
2520
2521 if ($volid =~ m!^/dev/.+!) {
2522 mkfs($volid);
2523 return;
2524 }
2525
2526 my ($storage, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2527
2528 die "cannot format volume '$volid' with no storage\n" if !$storage;
2529
2530 PVE::Storage::activate_volumes($storage_cfg, [$volid]);
2531
2532 my $path = PVE::Storage::path($storage_cfg, $volid);
2533
2534 my ($vtype, undef, undef, undef, undef, $isBase, $format) =
2535 PVE::Storage::parse_volname($storage_cfg, $volid);
2536
2537 die "cannot format volume '$volid' (format == $format)\n"
2538 if $format ne 'raw';
2539
2540 mkfs($path, $rootuid, $rootgid);
2541 }
2542
2543 sub destroy_disks {
2544 my ($storecfg, $vollist) = @_;
2545
2546 foreach my $volid (@$vollist) {
2547 eval { PVE::Storage::vdisk_free($storecfg, $volid); };
2548 warn $@ if $@;
2549 }
2550 }
2551
2552 sub create_disks {
2553 my ($storecfg, $vmid, $settings, $conf) = @_;
2554
2555 my $vollist = [];
2556
2557 eval {
2558 my (undef, $rootuid, $rootgid) = PVE::LXC::parse_id_maps($conf);
2559 my $chown_vollist = [];
2560
2561 foreach_mountpoint($settings, sub {
2562 my ($ms, $mountpoint) = @_;
2563
2564 my $volid = $mountpoint->{volume};
2565 my $mp = $mountpoint->{mp};
2566
2567 my ($storage, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2568
2569 if ($storage && ($volid =~ m/^([^:\s]+):(\d+(\.\d+)?)$/)) {
2570 my ($storeid, $size_gb) = ($1, $2);
2571
2572 my $size_kb = int(${size_gb}*1024) * 1024;
2573
2574 my $scfg = PVE::Storage::storage_config($storecfg, $storage);
2575 # fixme: use better naming ct-$vmid-disk-X.raw?
2576
2577 if ($scfg->{type} eq 'dir' || $scfg->{type} eq 'nfs') {
2578 if ($size_kb > 0) {
2579 $volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'raw',
2580 undef, $size_kb);
2581 format_disk($storecfg, $volid, $rootuid, $rootgid);
2582 } else {
2583 $volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'subvol',
2584 undef, 0);
2585 push @$chown_vollist, $volid;
2586 }
2587 } elsif ($scfg->{type} eq 'zfspool') {
2588
2589 $volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'subvol',
2590 undef, $size_kb);
2591 push @$chown_vollist, $volid;
2592 } elsif ($scfg->{type} eq 'drbd' || $scfg->{type} eq 'lvm' || $scfg->{type} eq 'lvmthin') {
2593
2594 $volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'raw', undef, $size_kb);
2595 format_disk($storecfg, $volid, $rootuid, $rootgid);
2596
2597 } elsif ($scfg->{type} eq 'rbd') {
2598
2599 die "krbd option must be enabled on storage type '$scfg->{type}'\n" if !$scfg->{krbd};
2600 $volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'raw', undef, $size_kb);
2601 format_disk($storecfg, $volid, $rootuid, $rootgid);
2602 } else {
2603 die "unable to create containers on storage type '$scfg->{type}'\n";
2604 }
2605 push @$vollist, $volid;
2606 $mountpoint->{volume} = $volid;
2607 $mountpoint->{size} = $size_kb * 1024;
2608 $conf->{$ms} = print_ct_mountpoint($mountpoint, $ms eq 'rootfs');
2609 } else {
2610 # use specified/existing volid/dir/device
2611 $conf->{$ms} = print_ct_mountpoint($mountpoint, $ms eq 'rootfs');
2612 }
2613 });
2614
2615 PVE::Storage::activate_volumes($storecfg, $chown_vollist, undef);
2616 foreach my $volid (@$chown_vollist) {
2617 my $path = PVE::Storage::path($storecfg, $volid, undef);
2618 chown($rootuid, $rootgid, $path);
2619 }
2620 PVE::Storage::deactivate_volumes($storecfg, $chown_vollist, undef);
2621 };
2622 # free allocated images on error
2623 if (my $err = $@) {
2624 destroy_disks($storecfg, $vollist);
2625 die $err;
2626 }
2627 return $vollist;
2628 }
2629
2630 # bash completion helper
2631
2632 sub complete_os_templates {
2633 my ($cmdname, $pname, $cvalue) = @_;
2634
2635 my $cfg = PVE::Storage::config();
2636
2637 my $storeid;
2638
2639 if ($cvalue =~ m/^([^:]+):/) {
2640 $storeid = $1;
2641 }
2642
2643 my $vtype = $cmdname eq 'restore' ? 'backup' : 'vztmpl';
2644 my $data = PVE::Storage::template_list($cfg, $storeid, $vtype);
2645
2646 my $res = [];
2647 foreach my $id (keys %$data) {
2648 foreach my $item (@{$data->{$id}}) {
2649 push @$res, $item->{volid} if defined($item->{volid});
2650 }
2651 }
2652
2653 return $res;
2654 }
2655
2656 my $complete_ctid_full = sub {
2657 my ($running) = @_;
2658
2659 my $idlist = vmstatus();
2660
2661 my $active_hash = list_active_containers();
2662
2663 my $res = [];
2664
2665 foreach my $id (keys %$idlist) {
2666 my $d = $idlist->{$id};
2667 if (defined($running)) {
2668 next if $d->{template};
2669 next if $running && !$active_hash->{$id};
2670 next if !$running && $active_hash->{$id};
2671 }
2672 push @$res, $id;
2673
2674 }
2675 return $res;
2676 };
2677
2678 sub complete_ctid {
2679 return &$complete_ctid_full();
2680 }
2681
2682 sub complete_ctid_stopped {
2683 return &$complete_ctid_full(0);
2684 }
2685
2686 sub complete_ctid_running {
2687 return &$complete_ctid_full(1);
2688 }
2689
2690 sub parse_id_maps {
2691 my ($conf) = @_;
2692
2693 my $id_map = [];
2694 my $rootuid = 0;
2695 my $rootgid = 0;
2696
2697 my $lxc = $conf->{lxc};
2698 foreach my $entry (@$lxc) {
2699 my ($key, $value) = @$entry;
2700 next if $key ne 'lxc.id_map';
2701 if ($value =~ /^([ug])\s+(\d+)\s+(\d+)\s+(\d+)\s*$/) {
2702 my ($type, $ct, $host, $length) = ($1, $2, $3, $4);
2703 push @$id_map, [$type, $ct, $host, $length];
2704 if ($ct == 0) {
2705 $rootuid = $host if $type eq 'u';
2706 $rootgid = $host if $type eq 'g';
2707 }
2708 } else {
2709 die "failed to parse id_map: $value\n";
2710 }
2711 }
2712
2713 if (!@$id_map && $conf->{unprivileged}) {
2714 # Should we read them from /etc/subuid?
2715 $id_map = [ ['u', '0', '100000', '65536'],
2716 ['g', '0', '100000', '65536'] ];
2717 $rootuid = $rootgid = 100000;
2718 }
2719
2720 return ($id_map, $rootuid, $rootgid);
2721 }
2722
2723 sub userns_command {
2724 my ($id_map) = @_;
2725 if (@$id_map) {
2726 return ['lxc-usernsexec', (map { ('-m', join(':', @$_)) } @$id_map), '--'];
2727 }
2728 return [];
2729 }
2730
2731 1;