]> git.proxmox.com Git - pve-container.git/blob - src/PVE/LXC.pm
38c9bd86951f8cd421e61b997d7bf230a6e511cf
[pve-container.git] / src / PVE / LXC.pm
1 package PVE::LXC;
2
3 use strict;
4 use warnings;
5 use POSIX qw(EINTR);
6
7 use Socket;
8
9 use File::Path;
10 use File::Spec;
11 use Cwd qw();
12 use Fcntl qw(O_RDONLY);
13
14 use PVE::Cluster qw(cfs_register_file cfs_read_file);
15 use PVE::Storage;
16 use PVE::SafeSyslog;
17 use PVE::INotify;
18 use PVE::JSONSchema qw(get_standard_option);
19 use PVE::Tools qw($IPV6RE $IPV4RE dir_glob_foreach lock_file lock_file_full);
20 use PVE::Network;
21 use PVE::AccessControl;
22 use PVE::ProcFSTools;
23 use Time::HiRes qw (gettimeofday);
24
25 use Data::Dumper;
26
27 my $nodename = PVE::INotify::nodename();
28
29 my $cpuinfo= PVE::ProcFSTools::read_cpuinfo();
30
31 our $COMMON_TAR_FLAGS = [ '--sparse', '--numeric-owner', '--acls',
32 '--xattrs',
33 '--xattrs-include=user.*',
34 '--xattrs-include=security.capability',
35 '--warning=no-xattr-write' ];
36
37 cfs_register_file('/lxc/', \&parse_pct_config, \&write_pct_config);
38
39 my $rootfs_desc = {
40 volume => {
41 type => 'string',
42 default_key => 1,
43 format => 'pve-lxc-mp-string',
44 format_description => 'volume',
45 description => 'Volume, device or directory to mount into the container.',
46 },
47 backup => {
48 type => 'boolean',
49 format_description => '[1|0]',
50 description => 'Whether to include the mountpoint in backups.',
51 optional => 1,
52 },
53 size => {
54 type => 'string',
55 format => 'disk-size',
56 format_description => 'DiskSize',
57 description => 'Volume size (read only value).',
58 optional => 1,
59 },
60 acl => {
61 type => 'boolean',
62 format_description => 'acl',
63 description => 'Explicitly enable or disable ACL support.',
64 optional => 1,
65 },
66 ro => {
67 type => 'boolean',
68 format_description => 'ro',
69 description => 'Read-only mountpoint (not supported with bind mounts)',
70 optional => 1,
71 },
72 quota => {
73 type => 'boolean',
74 format_description => '[0|1]',
75 description => 'Enable user quotas inside the container (not supported with zfs subvolumes)',
76 optional => 1,
77 },
78 };
79
80 PVE::JSONSchema::register_standard_option('pve-ct-rootfs', {
81 type => 'string', format => $rootfs_desc,
82 description => "Use volume as container root.",
83 optional => 1,
84 });
85
86 PVE::JSONSchema::register_standard_option('pve-lxc-snapshot-name', {
87 description => "The name of the snapshot.",
88 type => 'string', format => 'pve-configid',
89 maxLength => 40,
90 });
91
92 my $confdesc = {
93 lock => {
94 optional => 1,
95 type => 'string',
96 description => "Lock/unlock the VM.",
97 enum => [qw(migrate backup snapshot rollback)],
98 },
99 onboot => {
100 optional => 1,
101 type => 'boolean',
102 description => "Specifies whether a VM will be started during system bootup.",
103 default => 0,
104 },
105 startup => get_standard_option('pve-startup-order'),
106 template => {
107 optional => 1,
108 type => 'boolean',
109 description => "Enable/disable Template.",
110 default => 0,
111 },
112 arch => {
113 optional => 1,
114 type => 'string',
115 enum => ['amd64', 'i386'],
116 description => "OS architecture type.",
117 default => 'amd64',
118 },
119 ostype => {
120 optional => 1,
121 type => 'string',
122 enum => ['debian', 'ubuntu', 'centos', 'fedora', 'opensuse', 'archlinux', 'alpine', 'unmanaged'],
123 description => "OS type. This is used to setup configuration inside the container, and corresponds to lxc setup scripts in /usr/share/lxc/config/<ostype>.common.conf. Value 'unmanaged' can be used to skip and OS specific setup.",
124 },
125 console => {
126 optional => 1,
127 type => 'boolean',
128 description => "Attach a console device (/dev/console) to the container.",
129 default => 1,
130 },
131 tty => {
132 optional => 1,
133 type => 'integer',
134 description => "Specify the number of tty available to the container",
135 minimum => 0,
136 maximum => 6,
137 default => 2,
138 },
139 cpulimit => {
140 optional => 1,
141 type => 'number',
142 description => "Limit of CPU usage. Note if the computer has 2 CPUs, it has a total of '2' CPU time. Value '0' indicates no CPU limit.",
143 minimum => 0,
144 maximum => 128,
145 default => 0,
146 },
147 cpuunits => {
148 optional => 1,
149 type => 'integer',
150 description => "CPU weight for a VM. Argument is used in the kernel fair scheduler. The larger the number is, the more CPU time this VM gets. Number is relative to the weights of all the other running VMs.\n\nNOTE: You can disable fair-scheduler configuration by setting this to 0.",
151 minimum => 0,
152 maximum => 500000,
153 default => 1024,
154 },
155 memory => {
156 optional => 1,
157 type => 'integer',
158 description => "Amount of RAM for the VM in MB.",
159 minimum => 16,
160 default => 512,
161 },
162 swap => {
163 optional => 1,
164 type => 'integer',
165 description => "Amount of SWAP for the VM in MB.",
166 minimum => 0,
167 default => 512,
168 },
169 hostname => {
170 optional => 1,
171 description => "Set a host name for the container.",
172 type => 'string', format => 'dns-name',
173 maxLength => 255,
174 },
175 description => {
176 optional => 1,
177 type => 'string',
178 description => "Container description. Only used on the configuration web interface.",
179 },
180 searchdomain => {
181 optional => 1,
182 type => 'string', format => 'dns-name-list',
183 description => "Sets DNS search domains for a container. Create will automatically use the setting from the host if you neither set searchdomain nor nameserver.",
184 },
185 nameserver => {
186 optional => 1,
187 type => 'string', format => 'address-list',
188 description => "Sets DNS server IP address for a container. Create will automatically use the setting from the host if you neither set searchdomain nor nameserver.",
189 },
190 rootfs => get_standard_option('pve-ct-rootfs'),
191 parent => {
192 optional => 1,
193 type => 'string', format => 'pve-configid',
194 maxLength => 40,
195 description => "Parent snapshot name. This is used internally, and should not be modified.",
196 },
197 snaptime => {
198 optional => 1,
199 description => "Timestamp for snapshots.",
200 type => 'integer',
201 minimum => 0,
202 },
203 cmode => {
204 optional => 1,
205 description => "Console mode. By default, the console command tries to open a connection to one of the available tty devices. By setting cmode to 'console' it tries to attach to /dev/console instead. If you set cmode to 'shell', it simply invokes a shell inside the container (no login).",
206 type => 'string',
207 enum => ['shell', 'console', 'tty'],
208 default => 'tty',
209 },
210 protection => {
211 optional => 1,
212 type => 'boolean',
213 description => "Sets the protection flag of the container. This will prevent the CT or CT's disk remove/update operation.",
214 default => 0,
215 },
216 unprivileged => {
217 optional => 1,
218 type => 'boolean',
219 description => "Makes the container run as unprivileged user. (Should not be modified manually.)",
220 default => 0,
221 },
222 };
223
224 my $valid_lxc_conf_keys = {
225 'lxc.include' => 1,
226 'lxc.arch' => 1,
227 'lxc.utsname' => 1,
228 'lxc.haltsignal' => 1,
229 'lxc.rebootsignal' => 1,
230 'lxc.stopsignal' => 1,
231 'lxc.init_cmd' => 1,
232 'lxc.network.type' => 1,
233 'lxc.network.flags' => 1,
234 'lxc.network.link' => 1,
235 'lxc.network.mtu' => 1,
236 'lxc.network.name' => 1,
237 'lxc.network.hwaddr' => 1,
238 'lxc.network.ipv4' => 1,
239 'lxc.network.ipv4.gateway' => 1,
240 'lxc.network.ipv6' => 1,
241 'lxc.network.ipv6.gateway' => 1,
242 'lxc.network.script.up' => 1,
243 'lxc.network.script.down' => 1,
244 'lxc.pts' => 1,
245 'lxc.console.logfile' => 1,
246 'lxc.console' => 1,
247 'lxc.tty' => 1,
248 'lxc.devttydir' => 1,
249 'lxc.hook.autodev' => 1,
250 'lxc.autodev' => 1,
251 'lxc.kmsg' => 1,
252 'lxc.mount' => 1,
253 'lxc.mount.entry' => 1,
254 'lxc.mount.auto' => 1,
255 'lxc.rootfs' => 'lxc.rootfs is auto generated from rootfs',
256 'lxc.rootfs.mount' => 1,
257 'lxc.rootfs.options' => 'lxc.rootfs.options is not supported' .
258 ', please use mountpoint options in the "rootfs" key',
259 # lxc.cgroup.*
260 'lxc.cap.drop' => 1,
261 'lxc.cap.keep' => 1,
262 'lxc.aa_profile' => 1,
263 'lxc.aa_allow_incomplete' => 1,
264 'lxc.se_context' => 1,
265 'lxc.seccomp' => 1,
266 'lxc.id_map' => 1,
267 'lxc.hook.pre-start' => 1,
268 'lxc.hook.pre-mount' => 1,
269 'lxc.hook.mount' => 1,
270 'lxc.hook.start' => 1,
271 'lxc.hook.stop' => 1,
272 'lxc.hook.post-stop' => 1,
273 'lxc.hook.clone' => 1,
274 'lxc.hook.destroy' => 1,
275 'lxc.loglevel' => 1,
276 'lxc.logfile' => 1,
277 'lxc.start.auto' => 1,
278 'lxc.start.delay' => 1,
279 'lxc.start.order' => 1,
280 'lxc.group' => 1,
281 'lxc.environment' => 1,
282 };
283
284 my $netconf_desc = {
285 type => {
286 type => 'string',
287 optional => 1,
288 description => "Network interface type.",
289 enum => [qw(veth)],
290 },
291 name => {
292 type => 'string',
293 format_description => 'String',
294 description => 'Name of the network device as seen from inside the container. (lxc.network.name)',
295 pattern => '[-_.\w\d]+',
296 },
297 bridge => {
298 type => 'string',
299 format_description => 'vmbr<Number>',
300 description => 'Bridge to attach the network device to.',
301 pattern => '[-_.\w\d]+',
302 optional => 1,
303 },
304 hwaddr => {
305 type => 'string',
306 format_description => 'MAC',
307 description => 'Bridge to attach the network device to. (lxc.network.hwaddr)',
308 pattern => qr/(?:[a-f0-9]{2}:){5}[a-f0-9]{2}/i,
309 optional => 1,
310 },
311 mtu => {
312 type => 'integer',
313 format_description => 'Number',
314 description => 'Maximum transfer unit of the interface. (lxc.network.mtu)',
315 minimum => 64, # minimum ethernet frame is 64 bytes
316 optional => 1,
317 },
318 ip => {
319 type => 'string',
320 format => 'pve-ipv4-config',
321 format_description => 'IPv4Format/CIDR',
322 description => 'IPv4 address in CIDR format.',
323 optional => 1,
324 },
325 gw => {
326 type => 'string',
327 format => 'ipv4',
328 format_description => 'GatewayIPv4',
329 description => 'Default gateway for IPv4 traffic.',
330 optional => 1,
331 },
332 ip6 => {
333 type => 'string',
334 format => 'pve-ipv6-config',
335 format_description => 'IPv6Format/CIDR',
336 description => 'IPv6 address in CIDR format.',
337 optional => 1,
338 },
339 gw6 => {
340 type => 'string',
341 format => 'ipv6',
342 format_description => 'GatewayIPv6',
343 description => 'Default gateway for IPv6 traffic.',
344 optional => 1,
345 },
346 firewall => {
347 type => 'boolean',
348 format_description => '[1|0]',
349 description => "Controls whether this interface's firewall rules should be used.",
350 optional => 1,
351 },
352 tag => {
353 type => 'integer',
354 format_description => 'VlanNo',
355 minimum => '2',
356 maximum => '4094',
357 description => "VLAN tag for this interface.",
358 optional => 1,
359 },
360 trunks => {
361 type => 'string',
362 pattern => qr/\d+(?:;\d+)*/,
363 format_description => 'vlanid[;vlanid...]',
364 description => "VLAN ids to pass through the interface",
365 optional => 1,
366 },
367 };
368 PVE::JSONSchema::register_format('pve-lxc-network', $netconf_desc);
369
370 my $MAX_LXC_NETWORKS = 10;
371 for (my $i = 0; $i < $MAX_LXC_NETWORKS; $i++) {
372 $confdesc->{"net$i"} = {
373 optional => 1,
374 type => 'string', format => $netconf_desc,
375 description => "Specifies network interfaces for the container.",
376 };
377 }
378
379 PVE::JSONSchema::register_format('pve-lxc-mp-string', \&verify_lxc_mp_string);
380 sub verify_lxc_mp_string{
381 my ($mp, $noerr) = @_;
382
383 # do not allow:
384 # /./ or /../
385 # /. or /.. at the end
386 # ../ at the beginning
387
388 if($mp =~ m@/\.\.?/@ ||
389 $mp =~ m@/\.\.?$@ ||
390 $mp =~ m@^\.\./@){
391 return undef if $noerr;
392 die "$mp contains illegal character sequences\n";
393 }
394 return $mp;
395 }
396
397 my $mp_desc = {
398 %$rootfs_desc,
399 mp => {
400 type => 'string',
401 format => 'pve-lxc-mp-string',
402 format_description => 'Path',
403 description => 'Path to the mountpoint as seen from inside the container.',
404 },
405 };
406 PVE::JSONSchema::register_format('pve-ct-mountpoint', $mp_desc);
407
408 my $unuseddesc = {
409 optional => 1,
410 type => 'string', format => 'pve-volume-id',
411 description => "Reference to unused volumes.",
412 };
413
414 my $MAX_MOUNT_POINTS = 10;
415 for (my $i = 0; $i < $MAX_MOUNT_POINTS; $i++) {
416 $confdesc->{"mp$i"} = {
417 optional => 1,
418 type => 'string', format => $mp_desc,
419 description => "Use volume as container mount point (experimental feature).",
420 optional => 1,
421 };
422 }
423
424 my $MAX_UNUSED_DISKS = $MAX_MOUNT_POINTS;
425 for (my $i = 0; $i < $MAX_MOUNT_POINTS; $i++) {
426 $confdesc->{"unused$i"} = $unuseddesc;
427 }
428
429 sub write_pct_config {
430 my ($filename, $conf) = @_;
431
432 delete $conf->{snapstate}; # just to be sure
433
434 my $generate_raw_config = sub {
435 my ($conf) = @_;
436
437 my $raw = '';
438
439 # add description as comment to top of file
440 my $descr = $conf->{description} || '';
441 foreach my $cl (split(/\n/, $descr)) {
442 $raw .= '#' . PVE::Tools::encode_text($cl) . "\n";
443 }
444
445 foreach my $key (sort keys %$conf) {
446 next if $key eq 'digest' || $key eq 'description' || $key eq 'pending' ||
447 $key eq 'snapshots' || $key eq 'snapname' || $key eq 'lxc';
448 my $value = $conf->{$key};
449 die "detected invalid newline inside property '$key'\n" if $value =~ m/\n/;
450 $raw .= "$key: $value\n";
451 }
452
453 if (my $lxcconf = $conf->{lxc}) {
454 foreach my $entry (@$lxcconf) {
455 my ($k, $v) = @$entry;
456 $raw .= "$k: $v\n";
457 }
458 }
459
460 return $raw;
461 };
462
463 my $raw = &$generate_raw_config($conf);
464
465 foreach my $snapname (sort keys %{$conf->{snapshots}}) {
466 $raw .= "\n[$snapname]\n";
467 $raw .= &$generate_raw_config($conf->{snapshots}->{$snapname});
468 }
469
470 return $raw;
471 }
472
473 sub check_type {
474 my ($key, $value) = @_;
475
476 die "unknown setting '$key'\n" if !$confdesc->{$key};
477
478 my $type = $confdesc->{$key}->{type};
479
480 if (!defined($value)) {
481 die "got undefined value\n";
482 }
483
484 if ($value =~ m/[\n\r]/) {
485 die "property contains a line feed\n";
486 }
487
488 if ($type eq 'boolean') {
489 return 1 if ($value eq '1') || ($value =~ m/^(on|yes|true)$/i);
490 return 0 if ($value eq '0') || ($value =~ m/^(off|no|false)$/i);
491 die "type check ('boolean') failed - got '$value'\n";
492 } elsif ($type eq 'integer') {
493 return int($1) if $value =~ m/^(\d+)$/;
494 die "type check ('integer') failed - got '$value'\n";
495 } elsif ($type eq 'number') {
496 return $value if $value =~ m/^(\d+)(\.\d+)?$/;
497 die "type check ('number') failed - got '$value'\n";
498 } elsif ($type eq 'string') {
499 if (my $fmt = $confdesc->{$key}->{format}) {
500 PVE::JSONSchema::check_format($fmt, $value);
501 return $value;
502 }
503 return $value;
504 } else {
505 die "internal error"
506 }
507 }
508
509 sub parse_pct_config {
510 my ($filename, $raw) = @_;
511
512 return undef if !defined($raw);
513
514 my $res = {
515 digest => Digest::SHA::sha1_hex($raw),
516 snapshots => {},
517 };
518
519 $filename =~ m|/lxc/(\d+).conf$|
520 || die "got strange filename '$filename'";
521
522 my $vmid = $1;
523
524 my $conf = $res;
525 my $descr = '';
526 my $section = '';
527
528 my @lines = split(/\n/, $raw);
529 foreach my $line (@lines) {
530 next if $line =~ m/^\s*$/;
531
532 if ($line =~ m/^\[([a-z][a-z0-9_\-]+)\]\s*$/i) {
533 $section = $1;
534 $conf->{description} = $descr if $descr;
535 $descr = '';
536 $conf = $res->{snapshots}->{$section} = {};
537 next;
538 }
539
540 if ($line =~ m/^\#(.*)\s*$/) {
541 $descr .= PVE::Tools::decode_text($1) . "\n";
542 next;
543 }
544
545 if ($line =~ m/^(lxc\.[a-z0-9_\-\.]+)(:|\s*=)\s*(.*?)\s*$/) {
546 my $key = $1;
547 my $value = $3;
548 my $validity = $valid_lxc_conf_keys->{$key} || 0;
549 if ($validity eq 1 || $key =~ m/^lxc\.cgroup\./) {
550 push @{$conf->{lxc}}, [$key, $value];
551 } elsif (my $errmsg = $validity) {
552 warn "vm $vmid - $key: $errmsg\n";
553 } else {
554 warn "vm $vmid - unable to parse config: $line\n";
555 }
556 } elsif ($line =~ m/^(description):\s*(.*\S)\s*$/) {
557 $descr .= PVE::Tools::decode_text($2);
558 } elsif ($line =~ m/snapstate:\s*(prepare|delete)\s*$/) {
559 $conf->{snapstate} = $1;
560 } elsif ($line =~ m/^([a-z][a-z_]*\d*):\s*(\S.*)\s*$/) {
561 my $key = $1;
562 my $value = $2;
563 eval { $value = check_type($key, $value); };
564 warn "vm $vmid - unable to parse value of '$key' - $@" if $@;
565 $conf->{$key} = $value;
566 } else {
567 warn "vm $vmid - unable to parse config: $line\n";
568 }
569 }
570
571 $conf->{description} = $descr if $descr;
572
573 delete $res->{snapstate}; # just to be sure
574
575 return $res;
576 }
577
578 sub config_list {
579 my $vmlist = PVE::Cluster::get_vmlist();
580 my $res = {};
581 return $res if !$vmlist || !$vmlist->{ids};
582 my $ids = $vmlist->{ids};
583
584 foreach my $vmid (keys %$ids) {
585 next if !$vmid; # skip CT0
586 my $d = $ids->{$vmid};
587 next if !$d->{node} || $d->{node} ne $nodename;
588 next if !$d->{type} || $d->{type} ne 'lxc';
589 $res->{$vmid}->{type} = 'lxc';
590 }
591 return $res;
592 }
593
594 sub cfs_config_path {
595 my ($vmid, $node) = @_;
596
597 $node = $nodename if !$node;
598 return "nodes/$node/lxc/$vmid.conf";
599 }
600
601 sub config_file {
602 my ($vmid, $node) = @_;
603
604 my $cfspath = cfs_config_path($vmid, $node);
605 return "/etc/pve/$cfspath";
606 }
607
608 sub load_config {
609 my ($vmid, $node) = @_;
610
611 $node = $nodename if !$node;
612 my $cfspath = cfs_config_path($vmid, $node);
613
614 my $conf = PVE::Cluster::cfs_read_file($cfspath);
615 die "container $vmid does not exist\n" if !defined($conf);
616
617 return $conf;
618 }
619
620 sub create_config {
621 my ($vmid, $conf) = @_;
622
623 my $dir = "/etc/pve/nodes/$nodename/lxc";
624 mkdir $dir;
625
626 write_config($vmid, $conf);
627 }
628
629 sub destroy_config {
630 my ($vmid) = @_;
631
632 unlink config_file($vmid, $nodename);
633 }
634
635 sub write_config {
636 my ($vmid, $conf) = @_;
637
638 my $cfspath = cfs_config_path($vmid);
639
640 PVE::Cluster::cfs_write_file($cfspath, $conf);
641 }
642
643 # flock: we use one file handle per process, so lock file
644 # can be called multiple times and will succeed for the same process.
645
646 my $lock_handles = {};
647 my $lockdir = "/run/lock/lxc";
648
649 sub config_file_lock {
650 my ($vmid) = @_;
651
652 return "$lockdir/pve-config-${vmid}.lock";
653 }
654
655 sub lock_config_full {
656 my ($vmid, $timeout, $code, @param) = @_;
657
658 my $filename = config_file_lock($vmid);
659
660 mkdir $lockdir if !-d $lockdir;
661
662 my $res = lock_file($filename, $timeout, $code, @param);
663
664 die $@ if $@;
665
666 return $res;
667 }
668
669 sub lock_config_mode {
670 my ($vmid, $timeout, $shared, $code, @param) = @_;
671
672 my $filename = config_file_lock($vmid);
673
674 mkdir $lockdir if !-d $lockdir;
675
676 my $res = lock_file_full($filename, $timeout, $shared, $code, @param);
677
678 die $@ if $@;
679
680 return $res;
681 }
682
683 sub lock_config {
684 my ($vmid, $code, @param) = @_;
685
686 return lock_config_full($vmid, 10, $code, @param);
687 }
688
689 sub option_exists {
690 my ($name) = @_;
691
692 return defined($confdesc->{$name});
693 }
694
695 # add JSON properties for create and set function
696 sub json_config_properties {
697 my $prop = shift;
698
699 foreach my $opt (keys %$confdesc) {
700 next if $opt eq 'parent' || $opt eq 'snaptime';
701 next if $prop->{$opt};
702 $prop->{$opt} = $confdesc->{$opt};
703 }
704
705 return $prop;
706 }
707
708 # container status helpers
709
710 sub list_active_containers {
711
712 my $filename = "/proc/net/unix";
713
714 # similar test is used by lcxcontainers.c: list_active_containers
715 my $res = {};
716
717 my $fh = IO::File->new ($filename, "r");
718 return $res if !$fh;
719
720 while (defined(my $line = <$fh>)) {
721 if ($line =~ m/^[a-f0-9]+:\s\S+\s\S+\s\S+\s\S+\s\S+\s\d+\s(\S+)$/) {
722 my $path = $1;
723 if ($path =~ m!^@/var/lib/lxc/(\d+)/command$!) {
724 $res->{$1} = 1;
725 }
726 }
727 }
728
729 close($fh);
730
731 return $res;
732 }
733
734 # warning: this is slow
735 sub check_running {
736 my ($vmid) = @_;
737
738 my $active_hash = list_active_containers();
739
740 return 1 if defined($active_hash->{$vmid});
741
742 return undef;
743 }
744
745 sub get_container_disk_usage {
746 my ($vmid, $pid) = @_;
747
748 return PVE::Tools::df("/proc/$pid/root/", 1);
749 }
750
751 my $last_proc_vmid_stat;
752
753 my $parse_cpuacct_stat = sub {
754 my ($vmid) = @_;
755
756 my $raw = read_cgroup_value('cpuacct', $vmid, 'cpuacct.stat', 1);
757
758 my $stat = {};
759
760 if ($raw =~ m/^user (\d+)\nsystem (\d+)\n/) {
761
762 $stat->{utime} = $1;
763 $stat->{stime} = $2;
764
765 }
766
767 return $stat;
768 };
769
770 sub vmstatus {
771 my ($opt_vmid) = @_;
772
773 my $list = $opt_vmid ? { $opt_vmid => { type => 'lxc' }} : config_list();
774
775 my $active_hash = list_active_containers();
776
777 my $cpucount = $cpuinfo->{cpus} || 1;
778
779 my $cdtime = gettimeofday;
780
781 my $uptime = (PVE::ProcFSTools::read_proc_uptime(1))[0];
782
783 foreach my $vmid (keys %$list) {
784 my $d = $list->{$vmid};
785
786 eval { $d->{pid} = find_lxc_pid($vmid) if defined($active_hash->{$vmid}); };
787 warn $@ if $@; # ignore errors (consider them stopped)
788
789 $d->{status} = $d->{pid} ? 'running' : 'stopped';
790
791 my $cfspath = cfs_config_path($vmid);
792 my $conf = PVE::Cluster::cfs_read_file($cfspath) || {};
793
794 $d->{name} = $conf->{'hostname'} || "CT$vmid";
795 $d->{name} =~ s/[\s]//g;
796
797 $d->{cpus} = $conf->{cpulimit} || $cpucount;
798
799 if ($d->{pid}) {
800 my $res = get_container_disk_usage($vmid, $d->{pid});
801 $d->{disk} = $res->{used};
802 $d->{maxdisk} = $res->{total};
803 } else {
804 $d->{disk} = 0;
805 # use 4GB by default ??
806 if (my $rootfs = $conf->{rootfs}) {
807 my $rootinfo = parse_ct_rootfs($rootfs);
808 $d->{maxdisk} = int(($rootinfo->{size} || 4)*1024*1024)*1024;
809 } else {
810 $d->{maxdisk} = 4*1024*1024*1024;
811 }
812 }
813
814 $d->{mem} = 0;
815 $d->{swap} = 0;
816 $d->{maxmem} = ($conf->{memory}||512)*1024*1024;
817 $d->{maxswap} = ($conf->{swap}//0)*1024*1024;
818
819 $d->{uptime} = 0;
820 $d->{cpu} = 0;
821
822 $d->{netout} = 0;
823 $d->{netin} = 0;
824
825 $d->{diskread} = 0;
826 $d->{diskwrite} = 0;
827
828 $d->{template} = is_template($conf);
829 }
830
831 foreach my $vmid (keys %$list) {
832 my $d = $list->{$vmid};
833 my $pid = $d->{pid};
834
835 next if !$pid; # skip stopped CTs
836
837 my $ctime = (stat("/proc/$pid"))[10]; # 10 = ctime
838 $d->{uptime} = time - $ctime; # the method lxcfs uses
839
840 $d->{mem} = read_cgroup_value('memory', $vmid, 'memory.usage_in_bytes');
841 $d->{swap} = read_cgroup_value('memory', $vmid, 'memory.memsw.usage_in_bytes') - $d->{mem};
842
843 my $blkio_bytes = read_cgroup_value('blkio', $vmid, 'blkio.throttle.io_service_bytes', 1);
844 my @bytes = split(/\n/, $blkio_bytes);
845 foreach my $byte (@bytes) {
846 if (my ($key, $value) = $byte =~ /(Read|Write)\s+(\d+)/) {
847 $d->{diskread} = $2 if $key eq 'Read';
848 $d->{diskwrite} = $2 if $key eq 'Write';
849 }
850 }
851
852 my $pstat = &$parse_cpuacct_stat($vmid);
853
854 my $used = $pstat->{utime} + $pstat->{stime};
855
856 my $old = $last_proc_vmid_stat->{$vmid};
857 if (!$old) {
858 $last_proc_vmid_stat->{$vmid} = {
859 time => $cdtime,
860 used => $used,
861 cpu => 0,
862 };
863 next;
864 }
865
866 my $dtime = ($cdtime - $old->{time}) * $cpucount * $cpuinfo->{user_hz};
867
868 if ($dtime > 1000) {
869 my $dutime = $used - $old->{used};
870
871 $d->{cpu} = (($dutime/$dtime)* $cpucount) / $d->{cpus};
872 $last_proc_vmid_stat->{$vmid} = {
873 time => $cdtime,
874 used => $used,
875 cpu => $d->{cpu},
876 };
877 } else {
878 $d->{cpu} = $old->{cpu};
879 }
880 }
881
882 my $netdev = PVE::ProcFSTools::read_proc_net_dev();
883
884 foreach my $dev (keys %$netdev) {
885 next if $dev !~ m/^veth([1-9]\d*)i/;
886 my $vmid = $1;
887 my $d = $list->{$vmid};
888
889 next if !$d;
890
891 $d->{netout} += $netdev->{$dev}->{receive};
892 $d->{netin} += $netdev->{$dev}->{transmit};
893
894 }
895
896 return $list;
897 }
898
899 sub classify_mountpoint {
900 my ($vol) = @_;
901 if ($vol =~ m!^/!) {
902 return 'device' if $vol =~ m!^/dev/!;
903 return 'bind';
904 }
905 return 'volume';
906 }
907
908 my $parse_ct_mountpoint_full = sub {
909 my ($desc, $data, $noerr) = @_;
910
911 $data //= '';
912
913 my $res;
914 eval { $res = PVE::JSONSchema::parse_property_string($desc, $data) };
915 if ($@) {
916 return undef if $noerr;
917 die $@;
918 }
919
920 if (defined(my $size = $res->{size})) {
921 $size = PVE::JSONSchema::parse_size($size);
922 if (!defined($size)) {
923 return undef if $noerr;
924 die "invalid size: $size\n";
925 }
926 $res->{size} = $size;
927 }
928
929 $res->{type} = classify_mountpoint($res->{volume});
930
931 return $res;
932 };
933
934 sub parse_ct_rootfs {
935 my ($data, $noerr) = @_;
936
937 my $res = &$parse_ct_mountpoint_full($rootfs_desc, $data, $noerr);
938
939 $res->{mp} = '/' if defined($res);
940
941 return $res;
942 }
943
944 sub parse_ct_mountpoint {
945 my ($data, $noerr) = @_;
946
947 return &$parse_ct_mountpoint_full($mp_desc, $data, $noerr);
948 }
949
950 sub print_ct_mountpoint {
951 my ($info, $nomp) = @_;
952 my $skip = [ 'type' ];
953 push @$skip, 'mp' if $nomp;
954 return PVE::JSONSchema::print_property_string($info, $mp_desc, $skip);
955 }
956
957 sub print_lxc_network {
958 my $net = shift;
959 return PVE::JSONSchema::print_property_string($net, $netconf_desc);
960 }
961
962 sub parse_lxc_network {
963 my ($data) = @_;
964
965 my $res = {};
966
967 return $res if !$data;
968
969 $res = PVE::JSONSchema::parse_property_string($netconf_desc, $data);
970
971 $res->{type} = 'veth';
972 $res->{hwaddr} = PVE::Tools::random_ether_addr() if !$res->{hwaddr};
973
974 return $res;
975 }
976
977 sub read_cgroup_value {
978 my ($group, $vmid, $name, $full) = @_;
979
980 my $path = "/sys/fs/cgroup/$group/lxc/$vmid/$name";
981
982 return PVE::Tools::file_get_contents($path) if $full;
983
984 return PVE::Tools::file_read_firstline($path);
985 }
986
987 sub write_cgroup_value {
988 my ($group, $vmid, $name, $value) = @_;
989
990 my $path = "/sys/fs/cgroup/$group/lxc/$vmid/$name";
991 PVE::ProcFSTools::write_proc_entry($path, $value) if -e $path;
992
993 }
994
995 sub find_lxc_console_pids {
996
997 my $res = {};
998
999 PVE::Tools::dir_glob_foreach('/proc', '\d+', sub {
1000 my ($pid) = @_;
1001
1002 my $cmdline = PVE::Tools::file_read_firstline("/proc/$pid/cmdline");
1003 return if !$cmdline;
1004
1005 my @args = split(/\0/, $cmdline);
1006
1007 # search for lxc-console -n <vmid>
1008 return if scalar(@args) != 3;
1009 return if $args[1] ne '-n';
1010 return if $args[2] !~ m/^\d+$/;
1011 return if $args[0] !~ m|^(/usr/bin/)?lxc-console$|;
1012
1013 my $vmid = $args[2];
1014
1015 push @{$res->{$vmid}}, $pid;
1016 });
1017
1018 return $res;
1019 }
1020
1021 sub find_lxc_pid {
1022 my ($vmid) = @_;
1023
1024 my $pid = undef;
1025 my $parser = sub {
1026 my $line = shift;
1027 $pid = $1 if $line =~ m/^PID:\s+(\d+)$/;
1028 };
1029 PVE::Tools::run_command(['lxc-info', '-n', $vmid, '-p'], outfunc => $parser);
1030
1031 die "unable to get PID for CT $vmid (not running?)\n" if !$pid;
1032
1033 return $pid;
1034 }
1035
1036 # Note: we cannot use Net:IP, because that only allows strict
1037 # CIDR networks
1038 sub parse_ipv4_cidr {
1039 my ($cidr, $noerr) = @_;
1040
1041 if ($cidr =~ m!^($IPV4RE)(?:/(\d+))$! && ($2 > 7) && ($2 <= 32)) {
1042 return { address => $1, netmask => $PVE::Network::ipv4_reverse_mask->[$2] };
1043 }
1044
1045 return undef if $noerr;
1046
1047 die "unable to parse ipv4 address/mask\n";
1048 }
1049
1050 sub check_lock {
1051 my ($conf) = @_;
1052
1053 die "VM is locked ($conf->{'lock'})\n" if $conf->{'lock'};
1054 }
1055
1056 sub check_protection {
1057 my ($vm_conf, $err_msg) = @_;
1058
1059 if ($vm_conf->{protection}) {
1060 die "$err_msg - protection mode enabled\n";
1061 }
1062 }
1063
1064 sub update_lxc_config {
1065 my ($storage_cfg, $vmid, $conf) = @_;
1066
1067 my $dir = "/var/lib/lxc/$vmid";
1068
1069 if ($conf->{template}) {
1070
1071 unlink "$dir/config";
1072
1073 return;
1074 }
1075
1076 my $raw = '';
1077
1078 die "missing 'arch' - internal error" if !$conf->{arch};
1079 $raw .= "lxc.arch = $conf->{arch}\n";
1080
1081 my $unprivileged = $conf->{unprivileged};
1082 my $custom_idmap = grep { $_->[0] eq 'lxc.id_map' } @{$conf->{lxc}};
1083
1084 my $ostype = $conf->{ostype} || die "missing 'ostype' - internal error";
1085 if ($ostype =~ /^(?:debian | ubuntu | centos | fedora | opensuse | archlinux | alpine | unmanaged)$/x) {
1086 my $inc ="/usr/share/lxc/config/$ostype.common.conf";
1087 $inc ="/usr/share/lxc/config/common.conf" if !-f $inc;
1088 $raw .= "lxc.include = $inc\n";
1089 if ($unprivileged || $custom_idmap) {
1090 $inc = "/usr/share/lxc/config/$ostype.userns.conf";
1091 $inc = "/usr/share/lxc/config/userns.conf" if !-f $inc;
1092 $raw .= "lxc.include = $inc\n"
1093 }
1094 } else {
1095 die "implement me (ostype $ostype)";
1096 }
1097
1098 # WARNING: DO NOT REMOVE this without making sure that loop device nodes
1099 # cannot be exposed to the container with r/w access (cgroup perms).
1100 # When this is enabled mounts will still remain in the monitor's namespace
1101 # after the container unmounted them and thus will not detach from their
1102 # files while the container is running!
1103 $raw .= "lxc.monitor.unshare = 1\n";
1104
1105 # Should we read them from /etc/subuid?
1106 if ($unprivileged && !$custom_idmap) {
1107 $raw .= "lxc.id_map = u 0 100000 65536\n";
1108 $raw .= "lxc.id_map = g 0 100000 65536\n";
1109 }
1110
1111 if (!has_dev_console($conf)) {
1112 $raw .= "lxc.console = none\n";
1113 $raw .= "lxc.cgroup.devices.deny = c 5:1 rwm\n";
1114 }
1115
1116 my $ttycount = get_tty_count($conf);
1117 $raw .= "lxc.tty = $ttycount\n";
1118
1119 # some init scripts expect a linux terminal (turnkey).
1120 $raw .= "lxc.environment = TERM=linux\n";
1121
1122 my $utsname = $conf->{hostname} || "CT$vmid";
1123 $raw .= "lxc.utsname = $utsname\n";
1124
1125 my $memory = $conf->{memory} || 512;
1126 my $swap = $conf->{swap} // 0;
1127
1128 my $lxcmem = int($memory*1024*1024);
1129 $raw .= "lxc.cgroup.memory.limit_in_bytes = $lxcmem\n";
1130
1131 my $lxcswap = int(($memory + $swap)*1024*1024);
1132 $raw .= "lxc.cgroup.memory.memsw.limit_in_bytes = $lxcswap\n";
1133
1134 if (my $cpulimit = $conf->{cpulimit}) {
1135 $raw .= "lxc.cgroup.cpu.cfs_period_us = 100000\n";
1136 my $value = int(100000*$cpulimit);
1137 $raw .= "lxc.cgroup.cpu.cfs_quota_us = $value\n";
1138 }
1139
1140 my $shares = $conf->{cpuunits} || 1024;
1141 $raw .= "lxc.cgroup.cpu.shares = $shares\n";
1142
1143 my $mountpoint = parse_ct_rootfs($conf->{rootfs});
1144
1145 $raw .= "lxc.rootfs = $dir/rootfs\n";
1146
1147 my $netcount = 0;
1148 foreach my $k (keys %$conf) {
1149 next if $k !~ m/^net(\d+)$/;
1150 my $ind = $1;
1151 my $d = parse_lxc_network($conf->{$k});
1152 $netcount++;
1153 $raw .= "lxc.network.type = veth\n";
1154 $raw .= "lxc.network.veth.pair = veth${vmid}i${ind}\n";
1155 $raw .= "lxc.network.hwaddr = $d->{hwaddr}\n" if defined($d->{hwaddr});
1156 $raw .= "lxc.network.name = $d->{name}\n" if defined($d->{name});
1157 $raw .= "lxc.network.mtu = $d->{mtu}\n" if defined($d->{mtu});
1158 }
1159
1160 if (my $lxcconf = $conf->{lxc}) {
1161 foreach my $entry (@$lxcconf) {
1162 my ($k, $v) = @$entry;
1163 $netcount++ if $k eq 'lxc.network.type';
1164 $raw .= "$k = $v\n";
1165 }
1166 }
1167
1168 $raw .= "lxc.network.type = empty\n" if !$netcount;
1169
1170 File::Path::mkpath("$dir/rootfs");
1171
1172 PVE::Tools::file_set_contents("$dir/config", $raw);
1173 }
1174
1175 # verify and cleanup nameserver list (replace \0 with ' ')
1176 sub verify_nameserver_list {
1177 my ($nameserver_list) = @_;
1178
1179 my @list = ();
1180 foreach my $server (PVE::Tools::split_list($nameserver_list)) {
1181 PVE::JSONSchema::pve_verify_ip($server);
1182 push @list, $server;
1183 }
1184
1185 return join(' ', @list);
1186 }
1187
1188 sub verify_searchdomain_list {
1189 my ($searchdomain_list) = @_;
1190
1191 my @list = ();
1192 foreach my $server (PVE::Tools::split_list($searchdomain_list)) {
1193 # todo: should we add checks for valid dns domains?
1194 push @list, $server;
1195 }
1196
1197 return join(' ', @list);
1198 }
1199
1200 sub is_volume_in_use {
1201 my ($config, $volid) = @_;
1202 my $used = 0;
1203
1204 foreach_mountpoint($config, sub {
1205 my ($ms, $mountpoint) = @_;
1206 return if $used;
1207 if ($mountpoint->{type} eq 'volume' && $mountpoint->{volume} eq $volid) {
1208 $used = 1;
1209 }
1210 });
1211
1212 return $used;
1213 }
1214
1215 sub add_unused_volume {
1216 my ($config, $volid) = @_;
1217
1218 my $key;
1219 for (my $ind = $MAX_UNUSED_DISKS - 1; $ind >= 0; $ind--) {
1220 my $test = "unused$ind";
1221 if (my $vid = $config->{$test}) {
1222 return if $vid eq $volid; # do not add duplicates
1223 } else {
1224 $key = $test;
1225 }
1226 }
1227
1228 die "Too many unused volumes - please delete them first.\n" if !$key;
1229
1230 $config->{$key} = $volid;
1231
1232 return $key;
1233 }
1234
1235 sub update_pct_config {
1236 my ($vmid, $conf, $running, $param, $delete) = @_;
1237
1238 my @nohotplug;
1239
1240 my $new_disks = 0;
1241 my @deleted_volumes;
1242
1243 my $rootdir;
1244 if ($running) {
1245 my $pid = find_lxc_pid($vmid);
1246 $rootdir = "/proc/$pid/root";
1247 }
1248
1249 my $hotplug_error = sub {
1250 if ($running) {
1251 push @nohotplug, @_;
1252 return 1;
1253 } else {
1254 return 0;
1255 }
1256 };
1257
1258 if (defined($delete)) {
1259 foreach my $opt (@$delete) {
1260 if (!exists($conf->{$opt})) {
1261 warn "no such option: $opt\n";
1262 next;
1263 }
1264
1265 if ($opt eq 'hostname' || $opt eq 'memory' || $opt eq 'rootfs') {
1266 die "unable to delete required option '$opt'\n";
1267 } elsif ($opt eq 'swap') {
1268 delete $conf->{$opt};
1269 write_cgroup_value("memory", $vmid, "memory.memsw.limit_in_bytes", -1);
1270 } elsif ($opt eq 'description' || $opt eq 'onboot' || $opt eq 'startup') {
1271 delete $conf->{$opt};
1272 } elsif ($opt eq 'nameserver' || $opt eq 'searchdomain' ||
1273 $opt eq 'tty' || $opt eq 'console' || $opt eq 'cmode') {
1274 next if $hotplug_error->($opt);
1275 delete $conf->{$opt};
1276 } elsif ($opt =~ m/^net(\d)$/) {
1277 delete $conf->{$opt};
1278 next if !$running;
1279 my $netid = $1;
1280 PVE::Network::veth_delete("veth${vmid}i$netid");
1281 } elsif ($opt eq 'protection') {
1282 delete $conf->{$opt};
1283 } elsif ($opt =~ m/^unused(\d+)$/) {
1284 next if $hotplug_error->($opt);
1285 check_protection($conf, "can't remove CT $vmid drive '$opt'");
1286 push @deleted_volumes, $conf->{$opt};
1287 delete $conf->{$opt};
1288 } elsif ($opt =~ m/^mp(\d+)$/) {
1289 next if $hotplug_error->($opt);
1290 check_protection($conf, "can't remove CT $vmid drive '$opt'");
1291 my $mp = parse_ct_mountpoint($conf->{$opt});
1292 delete $conf->{$opt};
1293 if ($mp->{type} eq 'volume' && !is_volume_in_use($conf, $mp->{volume})) {
1294 add_unused_volume($conf, $mp->{volume});
1295 }
1296 } elsif ($opt eq 'unprivileged') {
1297 die "unable to delete read-only option: '$opt'\n";
1298 } else {
1299 die "implement me (delete: $opt)"
1300 }
1301 write_config($vmid, $conf) if $running;
1302 }
1303 }
1304
1305 # There's no separate swap size to configure, there's memory and "total"
1306 # memory (iow. memory+swap). This means we have to change them together.
1307 my $wanted_memory = PVE::Tools::extract_param($param, 'memory');
1308 my $wanted_swap = PVE::Tools::extract_param($param, 'swap');
1309 if (defined($wanted_memory) || defined($wanted_swap)) {
1310
1311 my $old_memory = ($conf->{memory} || 512);
1312 my $old_swap = ($conf->{swap} || 0);
1313
1314 $wanted_memory //= $old_memory;
1315 $wanted_swap //= $old_swap;
1316
1317 my $total = $wanted_memory + $wanted_swap;
1318 if ($running) {
1319 my $old_total = $old_memory + $old_swap;
1320 if ($total > $old_total) {
1321 write_cgroup_value("memory", $vmid, "memory.memsw.limit_in_bytes", int($total*1024*1024));
1322 write_cgroup_value("memory", $vmid, "memory.limit_in_bytes", int($wanted_memory*1024*1024));
1323 } else {
1324 write_cgroup_value("memory", $vmid, "memory.limit_in_bytes", int($wanted_memory*1024*1024));
1325 write_cgroup_value("memory", $vmid, "memory.memsw.limit_in_bytes", int($total*1024*1024));
1326 }
1327 }
1328 $conf->{memory} = $wanted_memory;
1329 $conf->{swap} = $wanted_swap;
1330
1331 write_config($vmid, $conf) if $running;
1332 }
1333
1334 my $used_volids = {};
1335
1336 foreach my $opt (keys %$param) {
1337 my $value = $param->{$opt};
1338 if ($opt eq 'hostname') {
1339 $conf->{$opt} = $value;
1340 } elsif ($opt eq 'onboot') {
1341 $conf->{$opt} = $value ? 1 : 0;
1342 } elsif ($opt eq 'startup') {
1343 $conf->{$opt} = $value;
1344 } elsif ($opt eq 'tty' || $opt eq 'console' || $opt eq 'cmode') {
1345 next if $hotplug_error->($opt);
1346 $conf->{$opt} = $value;
1347 } elsif ($opt eq 'nameserver') {
1348 next if $hotplug_error->($opt);
1349 my $list = verify_nameserver_list($value);
1350 $conf->{$opt} = $list;
1351 } elsif ($opt eq 'searchdomain') {
1352 next if $hotplug_error->($opt);
1353 my $list = verify_searchdomain_list($value);
1354 $conf->{$opt} = $list;
1355 } elsif ($opt eq 'cpulimit') {
1356 next if $hotplug_error->($opt); # FIXME: hotplug
1357 $conf->{$opt} = $value;
1358 } elsif ($opt eq 'cpuunits') {
1359 $conf->{$opt} = $value;
1360 write_cgroup_value("cpu", $vmid, "cpu.shares", $value);
1361 } elsif ($opt eq 'description') {
1362 $conf->{$opt} = PVE::Tools::encode_text($value);
1363 } elsif ($opt =~ m/^net(\d+)$/) {
1364 my $netid = $1;
1365 my $net = parse_lxc_network($value);
1366 if (!$running) {
1367 $conf->{$opt} = print_lxc_network($net);
1368 } else {
1369 update_net($vmid, $conf, $opt, $net, $netid, $rootdir);
1370 }
1371 } elsif ($opt eq 'protection') {
1372 $conf->{$opt} = $value ? 1 : 0;
1373 } elsif ($opt =~ m/^mp(\d+)$/) {
1374 next if $hotplug_error->($opt);
1375 check_protection($conf, "can't update CT $vmid drive '$opt'");
1376 my $old = $conf->{$opt};
1377 $conf->{$opt} = $value;
1378 if (defined($old)) {
1379 my $mp = parse_ct_mountpoint($old);
1380 if ($mp->{type} eq 'volume' && !is_volume_in_use($conf, $mp->{volume})) {
1381 add_unused_volume($conf, $mp->{volume});
1382 }
1383 }
1384 $new_disks = 1;
1385 my $mp = parse_ct_mountpoint($value);
1386 $used_volids->{$mp->{volume}} = 1;
1387 } elsif ($opt eq 'rootfs') {
1388 next if $hotplug_error->($opt);
1389 check_protection($conf, "can't update CT $vmid drive '$opt'");
1390 my $old = $conf->{$opt};
1391 $conf->{$opt} = $value;
1392 if (defined($old)) {
1393 my $mp = parse_ct_rootfs($old);
1394 if ($mp->{type} eq 'volume' && !is_volume_in_use($conf, $mp->{volume})) {
1395 add_unused_volume($conf, $mp->{volume});
1396 }
1397 }
1398 my $mp = parse_ct_rootfs($value);
1399 $used_volids->{$mp->{volume}} = 1;
1400 } elsif ($opt eq 'unprivileged') {
1401 die "unable to modify read-only option: '$opt'\n";
1402 } elsif ($opt eq 'ostype') {
1403 next if $hotplug_error->($opt);
1404 $conf->{$opt} = $value;
1405 } else {
1406 die "implement me: $opt";
1407 }
1408 write_config($vmid, $conf) if $running;
1409 }
1410
1411 # Cleanup config:
1412
1413 # Remove unused disks after re-adding
1414 foreach my $key (keys %$conf) {
1415 next if $key !~ /^unused\d+/;
1416 my $volid = $conf->{$key};
1417 if ($used_volids->{$volid}) {
1418 delete $conf->{$key};
1419 }
1420 }
1421
1422 # Apply deletions and creations of new volumes
1423 if (@deleted_volumes) {
1424 my $storage_cfg = PVE::Storage::config();
1425 foreach my $volume (@deleted_volumes) {
1426 next if $used_volids->{$volume}; # could have been re-added, too
1427 delete_mountpoint_volume($storage_cfg, $vmid, $volume);
1428 }
1429 }
1430
1431 if ($new_disks) {
1432 my $storage_cfg = PVE::Storage::config();
1433 create_disks($storage_cfg, $vmid, $conf, $conf);
1434 }
1435
1436 # This should be the last thing we do here
1437 if ($running && scalar(@nohotplug)) {
1438 die "unable to modify " . join(',', @nohotplug) . " while container is running\n";
1439 }
1440 }
1441
1442 sub has_dev_console {
1443 my ($conf) = @_;
1444
1445 return !(defined($conf->{console}) && !$conf->{console});
1446 }
1447
1448 sub get_tty_count {
1449 my ($conf) = @_;
1450
1451 return $conf->{tty} // $confdesc->{tty}->{default};
1452 }
1453
1454 sub get_cmode {
1455 my ($conf) = @_;
1456
1457 return $conf->{cmode} // $confdesc->{cmode}->{default};
1458 }
1459
1460 sub get_console_command {
1461 my ($vmid, $conf) = @_;
1462
1463 my $cmode = get_cmode($conf);
1464
1465 if ($cmode eq 'console') {
1466 return ['lxc-console', '-n', $vmid, '-t', 0];
1467 } elsif ($cmode eq 'tty') {
1468 return ['lxc-console', '-n', $vmid];
1469 } elsif ($cmode eq 'shell') {
1470 return ['lxc-attach', '--clear-env', '-n', $vmid];
1471 } else {
1472 die "internal error";
1473 }
1474 }
1475
1476 sub get_primary_ips {
1477 my ($conf) = @_;
1478
1479 # return data from net0
1480
1481 return undef if !defined($conf->{net0});
1482 my $net = parse_lxc_network($conf->{net0});
1483
1484 my $ipv4 = $net->{ip};
1485 if ($ipv4) {
1486 if ($ipv4 =~ /^(dhcp|manual)$/) {
1487 $ipv4 = undef
1488 } else {
1489 $ipv4 =~ s!/\d+$!!;
1490 }
1491 }
1492 my $ipv6 = $net->{ip6};
1493 if ($ipv6) {
1494 if ($ipv6 =~ /^(auto|dhcp|manual)$/) {
1495 $ipv6 = undef;
1496 } else {
1497 $ipv6 =~ s!/\d+$!!;
1498 }
1499 }
1500
1501 return ($ipv4, $ipv6);
1502 }
1503
1504 sub delete_mountpoint_volume {
1505 my ($storage_cfg, $vmid, $volume) = @_;
1506
1507 return if classify_mountpoint($volume) ne 'volume';
1508
1509 my ($vtype, $name, $owner) = PVE::Storage::parse_volname($storage_cfg, $volume);
1510 PVE::Storage::vdisk_free($storage_cfg, $volume) if $vmid == $owner;
1511 }
1512
1513 sub destroy_lxc_container {
1514 my ($storage_cfg, $vmid, $conf) = @_;
1515
1516 foreach_mountpoint($conf, sub {
1517 my ($ms, $mountpoint) = @_;
1518 delete_mountpoint_volume($storage_cfg, $vmid, $mountpoint->{volume});
1519 });
1520
1521 rmdir "/var/lib/lxc/$vmid/rootfs";
1522 unlink "/var/lib/lxc/$vmid/config";
1523 rmdir "/var/lib/lxc/$vmid";
1524 destroy_config($vmid);
1525
1526 #my $cmd = ['lxc-destroy', '-n', $vmid ];
1527 #PVE::Tools::run_command($cmd);
1528 }
1529
1530 sub vm_stop_cleanup {
1531 my ($storage_cfg, $vmid, $conf, $keepActive) = @_;
1532
1533 eval {
1534 if (!$keepActive) {
1535
1536 my $vollist = get_vm_volumes($conf);
1537 PVE::Storage::deactivate_volumes($storage_cfg, $vollist);
1538 }
1539 };
1540 warn $@ if $@; # avoid errors - just warn
1541 }
1542
1543 my $safe_num_ne = sub {
1544 my ($a, $b) = @_;
1545
1546 return 0 if !defined($a) && !defined($b);
1547 return 1 if !defined($a);
1548 return 1 if !defined($b);
1549
1550 return $a != $b;
1551 };
1552
1553 my $safe_string_ne = sub {
1554 my ($a, $b) = @_;
1555
1556 return 0 if !defined($a) && !defined($b);
1557 return 1 if !defined($a);
1558 return 1 if !defined($b);
1559
1560 return $a ne $b;
1561 };
1562
1563 sub update_net {
1564 my ($vmid, $conf, $opt, $newnet, $netid, $rootdir) = @_;
1565
1566 if ($newnet->{type} ne 'veth') {
1567 # for when there are physical interfaces
1568 die "cannot update interface of type $newnet->{type}";
1569 }
1570
1571 my $veth = "veth${vmid}i${netid}";
1572 my $eth = $newnet->{name};
1573
1574 if (my $oldnetcfg = $conf->{$opt}) {
1575 my $oldnet = parse_lxc_network($oldnetcfg);
1576
1577 if (&$safe_string_ne($oldnet->{hwaddr}, $newnet->{hwaddr}) ||
1578 &$safe_string_ne($oldnet->{name}, $newnet->{name})) {
1579
1580 PVE::Network::veth_delete($veth);
1581 delete $conf->{$opt};
1582 write_config($vmid, $conf);
1583
1584 hotplug_net($vmid, $conf, $opt, $newnet, $netid);
1585
1586 } elsif (&$safe_string_ne($oldnet->{bridge}, $newnet->{bridge}) ||
1587 &$safe_num_ne($oldnet->{tag}, $newnet->{tag}) ||
1588 &$safe_num_ne($oldnet->{firewall}, $newnet->{firewall})) {
1589
1590 if ($oldnet->{bridge}) {
1591 PVE::Network::tap_unplug($veth);
1592 foreach (qw(bridge tag firewall)) {
1593 delete $oldnet->{$_};
1594 }
1595 $conf->{$opt} = print_lxc_network($oldnet);
1596 write_config($vmid, $conf);
1597 }
1598
1599 PVE::Network::tap_plug($veth, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks});
1600 foreach (qw(bridge tag firewall)) {
1601 $oldnet->{$_} = $newnet->{$_} if $newnet->{$_};
1602 }
1603 $conf->{$opt} = print_lxc_network($oldnet);
1604 write_config($vmid, $conf);
1605 }
1606 } else {
1607 hotplug_net($vmid, $conf, $opt, $newnet, $netid);
1608 }
1609
1610 update_ipconfig($vmid, $conf, $opt, $eth, $newnet, $rootdir);
1611 }
1612
1613 sub hotplug_net {
1614 my ($vmid, $conf, $opt, $newnet, $netid) = @_;
1615
1616 my $veth = "veth${vmid}i${netid}";
1617 my $vethpeer = $veth . "p";
1618 my $eth = $newnet->{name};
1619
1620 PVE::Network::veth_create($veth, $vethpeer, $newnet->{bridge}, $newnet->{hwaddr});
1621 PVE::Network::tap_plug($veth, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks});
1622
1623 # attach peer in container
1624 my $cmd = ['lxc-device', '-n', $vmid, 'add', $vethpeer, "$eth" ];
1625 PVE::Tools::run_command($cmd);
1626
1627 # link up peer in container
1628 $cmd = ['lxc-attach', '-n', $vmid, '-s', 'NETWORK', '--', '/sbin/ip', 'link', 'set', $eth ,'up' ];
1629 PVE::Tools::run_command($cmd);
1630
1631 my $done = { type => 'veth' };
1632 foreach (qw(bridge tag firewall hwaddr name)) {
1633 $done->{$_} = $newnet->{$_} if $newnet->{$_};
1634 }
1635 $conf->{$opt} = print_lxc_network($done);
1636
1637 write_config($vmid, $conf);
1638 }
1639
1640 sub update_ipconfig {
1641 my ($vmid, $conf, $opt, $eth, $newnet, $rootdir) = @_;
1642
1643 my $lxc_setup = PVE::LXC::Setup->new($conf, $rootdir);
1644
1645 my $optdata = parse_lxc_network($conf->{$opt});
1646 my $deleted = [];
1647 my $added = [];
1648 my $nscmd = sub {
1649 my $cmdargs = shift;
1650 PVE::Tools::run_command(['lxc-attach', '-n', $vmid, '-s', 'NETWORK', '--', @_], %$cmdargs);
1651 };
1652 my $ipcmd = sub { &$nscmd({}, '/sbin/ip', @_) };
1653
1654 my $change_ip_config = sub {
1655 my ($ipversion) = @_;
1656
1657 my $family_opt = "-$ipversion";
1658 my $suffix = $ipversion == 4 ? '' : $ipversion;
1659 my $gw= "gw$suffix";
1660 my $ip= "ip$suffix";
1661
1662 my $newip = $newnet->{$ip};
1663 my $newgw = $newnet->{$gw};
1664 my $oldip = $optdata->{$ip};
1665
1666 my $change_ip = &$safe_string_ne($oldip, $newip);
1667 my $change_gw = &$safe_string_ne($optdata->{$gw}, $newgw);
1668
1669 return if !$change_ip && !$change_gw;
1670
1671 # step 1: add new IP, if this fails we cancel
1672 my $is_real_ip = ($newip && $newip !~ /^(?:auto|dhcp|manual)$/);
1673 if ($change_ip && $is_real_ip) {
1674 eval { &$ipcmd($family_opt, 'addr', 'add', $newip, 'dev', $eth); };
1675 if (my $err = $@) {
1676 warn $err;
1677 return;
1678 }
1679 }
1680
1681 # step 2: replace gateway
1682 # If this fails we delete the added IP and cancel.
1683 # If it succeeds we save the config and delete the old IP, ignoring
1684 # errors. The config is then saved.
1685 # Note: 'ip route replace' can add
1686 if ($change_gw) {
1687 if ($newgw) {
1688 eval {
1689 if ($is_real_ip && !PVE::Network::is_ip_in_cidr($newgw, $newip, $ipversion)) {
1690 &$ipcmd($family_opt, 'route', 'add', $newgw, 'dev', $eth);
1691 }
1692 &$ipcmd($family_opt, 'route', 'replace', 'default', 'via', $newgw);
1693 };
1694 if (my $err = $@) {
1695 warn $err;
1696 # the route was not replaced, the old IP is still available
1697 # rollback (delete new IP) and cancel
1698 if ($change_ip) {
1699 eval { &$ipcmd($family_opt, 'addr', 'del', $newip, 'dev', $eth); };
1700 warn $@ if $@; # no need to die here
1701 }
1702 return;
1703 }
1704 } else {
1705 eval { &$ipcmd($family_opt, 'route', 'del', 'default'); };
1706 # if the route was not deleted, the guest might have deleted it manually
1707 # warn and continue
1708 warn $@ if $@;
1709 }
1710 }
1711
1712 # from this point on we save the configuration
1713 # step 3: delete old IP ignoring errors
1714 if ($change_ip && $oldip && $oldip !~ /^(?:auto|dhcp)$/) {
1715 # We need to enable promote_secondaries, otherwise our newly added
1716 # address will be removed along with the old one.
1717 my $promote = 0;
1718 eval {
1719 if ($ipversion == 4) {
1720 &$nscmd({ outfunc => sub { $promote = int(shift) } },
1721 'cat', "/proc/sys/net/ipv4/conf/$eth/promote_secondaries");
1722 &$nscmd({}, 'sysctl', "net.ipv4.conf.$eth.promote_secondaries=1");
1723 }
1724 &$ipcmd($family_opt, 'addr', 'del', $oldip, 'dev', $eth);
1725 };
1726 warn $@ if $@; # no need to die here
1727
1728 if ($ipversion == 4) {
1729 &$nscmd({}, 'sysctl', "net.ipv4.conf.$eth.promote_secondaries=$promote");
1730 }
1731 }
1732
1733 foreach my $property ($ip, $gw) {
1734 if ($newnet->{$property}) {
1735 $optdata->{$property} = $newnet->{$property};
1736 } else {
1737 delete $optdata->{$property};
1738 }
1739 }
1740 $conf->{$opt} = print_lxc_network($optdata);
1741 write_config($vmid, $conf);
1742 $lxc_setup->setup_network($conf);
1743 };
1744
1745 &$change_ip_config(4);
1746 &$change_ip_config(6);
1747
1748 }
1749
1750 # Internal snapshots
1751
1752 # NOTE: Snapshot create/delete involves several non-atomic
1753 # actions, and can take a long time.
1754 # So we try to avoid locking the file and use the 'lock' variable
1755 # inside the config file instead.
1756
1757 my $snapshot_copy_config = sub {
1758 my ($source, $dest) = @_;
1759
1760 foreach my $k (keys %$source) {
1761 next if $k eq 'snapshots';
1762 next if $k eq 'snapstate';
1763 next if $k eq 'snaptime';
1764 next if $k eq 'vmstate';
1765 next if $k eq 'lock';
1766 next if $k eq 'digest';
1767 next if $k eq 'description';
1768 next if $k =~ m/^unused\d+$/;
1769
1770 $dest->{$k} = $source->{$k};
1771 }
1772 };
1773
1774 my $snapshot_apply_config = sub {
1775 my ($conf, $snap) = @_;
1776
1777 # copy snapshot list
1778 my $newconf = {
1779 snapshots => $conf->{snapshots},
1780 };
1781
1782 # keep description and list of unused disks
1783 foreach my $k (keys %$conf) {
1784 next if !($k =~ m/^unused\d+$/ || $k eq 'description');
1785 $newconf->{$k} = $conf->{$k};
1786 }
1787
1788 &$snapshot_copy_config($snap, $newconf);
1789
1790 return $newconf;
1791 };
1792
1793 my $snapshot_save_vmstate = sub {
1794 die "implement me - snapshot_save_vmstate\n";
1795 };
1796
1797 sub snapshot_prepare {
1798 my ($vmid, $snapname, $save_vmstate, $comment) = @_;
1799
1800 my $snap;
1801
1802 my $updatefn = sub {
1803
1804 my $conf = load_config($vmid);
1805
1806 die "you can't take a snapshot if it's a template\n"
1807 if is_template($conf);
1808
1809 check_lock($conf);
1810
1811 $conf->{lock} = 'snapshot';
1812
1813 die "snapshot name '$snapname' already used\n"
1814 if defined($conf->{snapshots}->{$snapname});
1815
1816 my $storecfg = PVE::Storage::config();
1817
1818 # workaround until mp snapshots are implemented
1819 my $feature = $snapname eq 'vzdump' ? 'vzdump' : 'snapshot';
1820 die "snapshot feature is not available\n" if !has_feature($feature, $conf, $storecfg);
1821
1822 $snap = $conf->{snapshots}->{$snapname} = {};
1823
1824 if ($save_vmstate && check_running($vmid)) {
1825 &$snapshot_save_vmstate($vmid, $conf, $snapname, $storecfg);
1826 }
1827
1828 &$snapshot_copy_config($conf, $snap);
1829
1830 $snap->{snapstate} = "prepare";
1831 $snap->{snaptime} = time();
1832 $snap->{description} = $comment if $comment;
1833
1834 write_config($vmid, $conf);
1835 };
1836
1837 lock_config($vmid, $updatefn);
1838
1839 return $snap;
1840 }
1841
1842 sub snapshot_commit {
1843 my ($vmid, $snapname) = @_;
1844
1845 my $updatefn = sub {
1846
1847 my $conf = load_config($vmid);
1848
1849 die "missing snapshot lock\n"
1850 if !($conf->{lock} && $conf->{lock} eq 'snapshot');
1851
1852 my $snap = $conf->{snapshots}->{$snapname};
1853 die "snapshot '$snapname' does not exist\n" if !defined($snap);
1854
1855 die "wrong snapshot state\n"
1856 if !($snap->{snapstate} && $snap->{snapstate} eq "prepare");
1857
1858 delete $snap->{snapstate};
1859 delete $conf->{lock};
1860
1861 my $newconf = &$snapshot_apply_config($conf, $snap);
1862
1863 $newconf->{parent} = $snapname;
1864
1865 write_config($vmid, $newconf);
1866 };
1867
1868 lock_config($vmid, $updatefn);
1869 }
1870
1871 sub has_feature {
1872 my ($feature, $conf, $storecfg, $snapname) = @_;
1873
1874 my $err;
1875 my $vzdump = $feature eq 'vzdump';
1876 $feature = 'snapshot' if $vzdump;
1877
1878 foreach_mountpoint($conf, sub {
1879 my ($ms, $mountpoint) = @_;
1880
1881 return if $err; # skip further test
1882 return if $vzdump && $ms ne 'rootfs' && !$mountpoint->{backup};
1883
1884 $err = 1 if !PVE::Storage::volume_has_feature($storecfg, $feature, $mountpoint->{volume}, $snapname);
1885
1886 # TODO: implement support for mountpoints
1887 die "unable to handle mountpoint '$ms' - feature not implemented\n"
1888 if $ms ne 'rootfs';
1889 });
1890
1891 return $err ? 0 : 1;
1892 }
1893
1894 my $enter_namespace = sub {
1895 my ($vmid, $pid, $which, $type) = @_;
1896 sysopen my $fd, "/proc/$pid/ns/$which", O_RDONLY
1897 or die "failed to open $which namespace of container $vmid: $!\n";
1898 PVE::Tools::setns(fileno($fd), $type)
1899 or die "failed to enter $which namespace of container $vmid: $!\n";
1900 close $fd;
1901 };
1902
1903 my $do_syncfs = sub {
1904 my ($vmid, $pid, $socket) = @_;
1905
1906 &$enter_namespace($vmid, $pid, 'mnt', PVE::Tools::CLONE_NEWNS);
1907
1908 # Tell the parent process to start reading our /proc/mounts
1909 print {$socket} "go\n";
1910 $socket->flush();
1911
1912 # Receive /proc/self/mounts
1913 my $mountdata = do { local $/ = undef; <$socket> };
1914 close $socket;
1915
1916 # Now sync all mountpoints...
1917 my $mounts = PVE::ProcFSTools::parse_mounts($mountdata);
1918 foreach my $mp (@$mounts) {
1919 my ($what, $dir, $fs) = @$mp;
1920 next if $fs eq 'fuse.lxcfs';
1921 eval { PVE::Tools::sync_mountpoint($dir); };
1922 warn $@ if $@;
1923 }
1924 };
1925
1926 sub sync_container_namespace {
1927 my ($vmid) = @_;
1928 my $pid = find_lxc_pid($vmid);
1929
1930 # SOCK_DGRAM is nicer for barriers but cannot be slurped
1931 socketpair my $pfd, my $cfd, AF_UNIX, SOCK_STREAM, PF_UNSPEC
1932 or die "failed to create socketpair: $!\n";
1933
1934 my $child = fork();
1935 die "fork failed: $!\n" if !defined($child);
1936
1937 if (!$child) {
1938 eval {
1939 close $pfd;
1940 &$do_syncfs($vmid, $pid, $cfd);
1941 };
1942 if (my $err = $@) {
1943 warn $err;
1944 POSIX::_exit(1);
1945 }
1946 POSIX::_exit(0);
1947 }
1948 close $cfd;
1949 my $go = <$pfd>;
1950 die "failed to enter container namespace\n" if $go ne "go\n";
1951
1952 open my $mounts, '<', "/proc/$child/mounts"
1953 or die "failed to open container's /proc/mounts: $!\n";
1954 my $mountdata = do { local $/ = undef; <$mounts> };
1955 close $mounts;
1956 print {$pfd} $mountdata;
1957 close $pfd;
1958
1959 while (waitpid($child, 0) != $child) {}
1960 die "failed to sync container namespace\n" if $? != 0;
1961 }
1962
1963 sub snapshot_create {
1964 my ($vmid, $snapname, $save_vmstate, $comment) = @_;
1965
1966 my $snap = snapshot_prepare($vmid, $snapname, $save_vmstate, $comment);
1967
1968 my $conf = load_config($vmid);
1969
1970 my $running = check_running($vmid);
1971
1972 my $unfreeze = 0;
1973
1974 my $drivehash = {};
1975
1976 eval {
1977 if ($running) {
1978 $unfreeze = 1;
1979 PVE::Tools::run_command(['/usr/bin/lxc-freeze', '-n', $vmid]);
1980 sync_container_namespace($vmid);
1981 };
1982
1983 my $storecfg = PVE::Storage::config();
1984 my $rootinfo = parse_ct_rootfs($conf->{rootfs});
1985 my $volid = $rootinfo->{volume};
1986
1987 PVE::Storage::volume_snapshot($storecfg, $volid, $snapname);
1988 $drivehash->{rootfs} = 1;
1989 };
1990 my $err = $@;
1991
1992 if ($unfreeze) {
1993 eval { PVE::Tools::run_command(['/usr/bin/lxc-unfreeze', '-n', $vmid]); };
1994 warn $@ if $@;
1995 }
1996
1997 if ($err) {
1998 eval { snapshot_delete($vmid, $snapname, 1, $drivehash); };
1999 warn "$@\n" if $@;
2000 die "$err\n";
2001 }
2002
2003 snapshot_commit($vmid, $snapname);
2004 }
2005
2006 # Note: $drivehash is only set when called from snapshot_create.
2007 sub snapshot_delete {
2008 my ($vmid, $snapname, $force, $drivehash) = @_;
2009
2010 my $prepare = 1;
2011
2012 my $snap;
2013
2014 my $unlink_parent = sub {
2015 my ($confref, $new_parent) = @_;
2016
2017 if ($confref->{parent} && $confref->{parent} eq $snapname) {
2018 if ($new_parent) {
2019 $confref->{parent} = $new_parent;
2020 } else {
2021 delete $confref->{parent};
2022 }
2023 }
2024 };
2025
2026 my $updatefn = sub {
2027 my ($remove_drive) = @_;
2028
2029 my $conf = load_config($vmid);
2030
2031 if (!$drivehash) {
2032 check_lock($conf);
2033 die "you can't delete a snapshot if vm is a template\n"
2034 if is_template($conf);
2035 }
2036
2037 $snap = $conf->{snapshots}->{$snapname};
2038
2039 die "snapshot '$snapname' does not exist\n" if !defined($snap);
2040
2041 # remove parent refs
2042 if (!$prepare) {
2043 &$unlink_parent($conf, $snap->{parent});
2044 foreach my $sn (keys %{$conf->{snapshots}}) {
2045 next if $sn eq $snapname;
2046 &$unlink_parent($conf->{snapshots}->{$sn}, $snap->{parent});
2047 }
2048 }
2049
2050 if ($remove_drive) {
2051 if ($remove_drive eq 'vmstate') {
2052 die "implement me - saving vmstate\n";
2053 } else {
2054 die "implement me - remove drive\n";
2055 }
2056 }
2057
2058 if ($prepare) {
2059 $snap->{snapstate} = 'delete';
2060 } else {
2061 delete $conf->{snapshots}->{$snapname};
2062 delete $conf->{lock} if $drivehash;
2063 }
2064
2065 write_config($vmid, $conf);
2066 };
2067
2068 lock_config($vmid, $updatefn);
2069
2070 # now remove vmstate file
2071 # never set for LXC!
2072 my $storecfg = PVE::Storage::config();
2073
2074 if ($snap->{vmstate}) {
2075 die "implement me - saving vmstate\n";
2076 };
2077
2078 # now remove all volume snapshots
2079 # only rootfs for now!
2080 eval {
2081 my $rootfs = $snap->{rootfs};
2082 my $rootinfo = parse_ct_rootfs($rootfs);
2083 my $volid = $rootinfo->{volume};
2084 PVE::Storage::volume_snapshot_delete($storecfg, $volid, $snapname);
2085 };
2086 if (my $err = $@) {
2087 die $err if !$force;
2088 warn $err;
2089 }
2090
2091 # now cleanup config
2092 $prepare = 0;
2093 lock_config($vmid, $updatefn);
2094 }
2095
2096 sub snapshot_rollback {
2097 my ($vmid, $snapname) = @_;
2098
2099 my $prepare = 1;
2100
2101 my $storecfg = PVE::Storage::config();
2102
2103 my $conf = load_config($vmid);
2104
2105 my $get_snapshot_config = sub {
2106
2107 die "you can't rollback if vm is a template\n" if is_template($conf);
2108
2109 my $res = $conf->{snapshots}->{$snapname};
2110
2111 die "snapshot '$snapname' does not exist\n" if !defined($res);
2112
2113 return $res;
2114 };
2115
2116 my $snap = &$get_snapshot_config();
2117
2118 # only for rootfs for now!
2119 my $rootfs = $snap->{rootfs};
2120 my $rootinfo = parse_ct_rootfs($rootfs);
2121 my $volid = $rootinfo->{volume};
2122
2123 PVE::Storage::volume_rollback_is_possible($storecfg, $volid, $snapname);
2124
2125 my $updatefn = sub {
2126
2127 $conf = load_config($vmid);
2128
2129 $snap = &$get_snapshot_config();
2130
2131 die "unable to rollback to incomplete snapshot (snapstate = $snap->{snapstate})\n"
2132 if $snap->{snapstate};
2133
2134 if ($prepare) {
2135 check_lock($conf);
2136 system("lxc-stop -n $vmid --kill") if check_running($vmid);
2137 }
2138
2139 die "unable to rollback vm $vmid: vm is running\n"
2140 if check_running($vmid);
2141
2142 if ($prepare) {
2143 $conf->{lock} = 'rollback';
2144 } else {
2145 die "got wrong lock\n" if !($conf->{lock} && $conf->{lock} eq 'rollback');
2146 delete $conf->{lock};
2147 }
2148
2149 my $forcemachine;
2150
2151 if (!$prepare) {
2152 # copy snapshot config to current config
2153 $conf = &$snapshot_apply_config($conf, $snap);
2154 $conf->{parent} = $snapname;
2155 }
2156
2157 write_config($vmid, $conf);
2158
2159 if (!$prepare && $snap->{vmstate}) {
2160 die "implement me - save vmstate";
2161 }
2162 };
2163
2164 lock_config($vmid, $updatefn);
2165
2166 # only rootfs for now!
2167 PVE::Storage::volume_snapshot_rollback($storecfg, $volid, $snapname);
2168
2169 $prepare = 0;
2170 lock_config($vmid, $updatefn);
2171 }
2172
2173 sub template_create {
2174 my ($vmid, $conf) = @_;
2175
2176 my $storecfg = PVE::Storage::config();
2177
2178 my $rootinfo = parse_ct_rootfs($conf->{rootfs});
2179 my $volid = $rootinfo->{volume};
2180
2181 die "Template feature is not available for '$volid'\n"
2182 if !PVE::Storage::volume_has_feature($storecfg, 'template', $volid);
2183
2184 PVE::Storage::activate_volumes($storecfg, [$volid]);
2185
2186 my $template_volid = PVE::Storage::vdisk_create_base($storecfg, $volid);
2187 $rootinfo->{volume} = $template_volid;
2188 $conf->{rootfs} = print_ct_mountpoint($rootinfo, 1);
2189
2190 write_config($vmid, $conf);
2191 }
2192
2193 sub is_template {
2194 my ($conf) = @_;
2195
2196 return 1 if defined $conf->{template} && $conf->{template} == 1;
2197 }
2198
2199 sub mountpoint_names {
2200 my ($reverse) = @_;
2201
2202 my @names = ('rootfs');
2203
2204 for (my $i = 0; $i < $MAX_MOUNT_POINTS; $i++) {
2205 push @names, "mp$i";
2206 }
2207
2208 return $reverse ? reverse @names : @names;
2209 }
2210
2211
2212 sub foreach_mountpoint_full {
2213 my ($conf, $reverse, $func) = @_;
2214
2215 foreach my $key (mountpoint_names($reverse)) {
2216 my $value = $conf->{$key};
2217 next if !defined($value);
2218 my $mountpoint = $key eq 'rootfs' ? parse_ct_rootfs($value, 1) : parse_ct_mountpoint($value, 1);
2219 next if !defined($mountpoint);
2220
2221 &$func($key, $mountpoint);
2222 }
2223 }
2224
2225 sub foreach_mountpoint {
2226 my ($conf, $func) = @_;
2227
2228 foreach_mountpoint_full($conf, 0, $func);
2229 }
2230
2231 sub foreach_mountpoint_reverse {
2232 my ($conf, $func) = @_;
2233
2234 foreach_mountpoint_full($conf, 1, $func);
2235 }
2236
2237 sub check_ct_modify_config_perm {
2238 my ($rpcenv, $authuser, $vmid, $pool, $key_list) = @_;
2239
2240 return 1 if $authuser ne 'root@pam';
2241
2242 foreach my $opt (@$key_list) {
2243
2244 if ($opt eq 'cpus' || $opt eq 'cpuunits' || $opt eq 'cpulimit') {
2245 $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.CPU']);
2246 } elsif ($opt eq 'rootfs' || $opt =~ /^mp\d+$/) {
2247 $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Disk']);
2248 } elsif ($opt eq 'memory' || $opt eq 'swap') {
2249 $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Memory']);
2250 } elsif ($opt =~ m/^net\d+$/ || $opt eq 'nameserver' ||
2251 $opt eq 'searchdomain' || $opt eq 'hostname') {
2252 $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Network']);
2253 } else {
2254 $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Options']);
2255 }
2256 }
2257
2258 return 1;
2259 }
2260
2261 sub umount_all {
2262 my ($vmid, $storage_cfg, $conf, $noerr) = @_;
2263
2264 my $rootdir = "/var/lib/lxc/$vmid/rootfs";
2265 my $volid_list = get_vm_volumes($conf);
2266
2267 foreach_mountpoint_reverse($conf, sub {
2268 my ($ms, $mountpoint) = @_;
2269
2270 my $volid = $mountpoint->{volume};
2271 my $mount = $mountpoint->{mp};
2272
2273 return if !$volid || !$mount;
2274
2275 my $mount_path = "$rootdir/$mount";
2276 $mount_path =~ s!/+!/!g;
2277
2278 return if !PVE::ProcFSTools::is_mounted($mount_path);
2279
2280 eval {
2281 PVE::Tools::run_command(['umount', '-d', $mount_path]);
2282 };
2283 if (my $err = $@) {
2284 if ($noerr) {
2285 warn $err;
2286 } else {
2287 die $err;
2288 }
2289 }
2290 });
2291 }
2292
2293 sub mount_all {
2294 my ($vmid, $storage_cfg, $conf) = @_;
2295
2296 my $rootdir = "/var/lib/lxc/$vmid/rootfs";
2297 File::Path::make_path($rootdir);
2298
2299 my $volid_list = get_vm_volumes($conf);
2300 PVE::Storage::activate_volumes($storage_cfg, $volid_list);
2301
2302 eval {
2303 foreach_mountpoint($conf, sub {
2304 my ($ms, $mountpoint) = @_;
2305
2306 mountpoint_mount($mountpoint, $rootdir, $storage_cfg);
2307 });
2308 };
2309 if (my $err = $@) {
2310 warn "mounting container failed\n";
2311 umount_all($vmid, $storage_cfg, $conf, 1);
2312 die $err;
2313 }
2314
2315 return $rootdir;
2316 }
2317
2318
2319 sub mountpoint_mount_path {
2320 my ($mountpoint, $storage_cfg, $snapname) = @_;
2321
2322 return mountpoint_mount($mountpoint, undef, $storage_cfg, $snapname);
2323 }
2324
2325 my $check_mount_path = sub {
2326 my ($path) = @_;
2327 $path = File::Spec->canonpath($path);
2328 my $real = Cwd::realpath($path);
2329 if ($real ne $path) {
2330 die "mount path modified by symlink: $path != $real";
2331 }
2332 };
2333
2334 sub query_loopdev {
2335 my ($path) = @_;
2336 my $found;
2337 my $parser = sub {
2338 my $line = shift;
2339 if ($line =~ m@^(/dev/loop\d+):@) {
2340 $found = $1;
2341 }
2342 };
2343 my $cmd = ['losetup', '--associated', $path];
2344 PVE::Tools::run_command($cmd, outfunc => $parser);
2345 return $found;
2346 }
2347
2348 # Run a function with a file attached to a loop device.
2349 # The loop device is always detached afterwards (or set to autoclear).
2350 # Returns the loop device.
2351 sub run_with_loopdev {
2352 my ($func, $file) = @_;
2353 my $device;
2354 my $parser = sub {
2355 my $line = shift;
2356 if ($line =~ m@^(/dev/loop\d+)$@) {
2357 $device = $1;
2358 }
2359 };
2360 PVE::Tools::run_command(['losetup', '--show', '-f', $file], outfunc => $parser);
2361 die "failed to setup loop device for $file\n" if !$device;
2362 eval { &$func($device); };
2363 my $err = $@;
2364 PVE::Tools::run_command(['losetup', '-d', $device]);
2365 die $err if $err;
2366 return $device;
2367 }
2368
2369 sub bindmount {
2370 my ($dir, $dest, $ro, @extra_opts) = @_;
2371 PVE::Tools::run_command(['mount', '-o', 'bind', @extra_opts, $dir, $dest]);
2372 if ($ro) {
2373 eval { PVE::Tools::run_command(['mount', '-o', 'bind,remount,ro', $dest]); };
2374 if (my $err = $@) {
2375 warn "bindmount error\n";
2376 # don't leave writable bind-mounts behind...
2377 PVE::Tools::run_command(['umount', $dest]);
2378 die $err;
2379 }
2380 }
2381 }
2382
2383 # use $rootdir = undef to just return the corresponding mount path
2384 sub mountpoint_mount {
2385 my ($mountpoint, $rootdir, $storage_cfg, $snapname) = @_;
2386
2387 my $volid = $mountpoint->{volume};
2388 my $mount = $mountpoint->{mp};
2389 my $type = $mountpoint->{type};
2390 my $quota = !$snapname && !$mountpoint->{ro} && $mountpoint->{quota};
2391 my $mounted_dev;
2392
2393 return if !$volid || !$mount;
2394
2395 my $mount_path;
2396
2397 if (defined($rootdir)) {
2398 $rootdir =~ s!/+$!!;
2399 $mount_path = "$rootdir/$mount";
2400 $mount_path =~ s!/+!/!g;
2401 &$check_mount_path($mount_path);
2402 File::Path::mkpath($mount_path);
2403 }
2404
2405 my ($storage, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2406
2407 die "unknown snapshot path for '$volid'" if !$storage && defined($snapname);
2408
2409 my $optstring = '';
2410 if (defined($mountpoint->{acl})) {
2411 $optstring .= ($mountpoint->{acl} ? 'acl' : 'noacl');
2412 }
2413 my $readonly = $mountpoint->{ro};
2414
2415 my @extra_opts = ('-o', $optstring);
2416
2417 if ($storage) {
2418
2419 my $scfg = PVE::Storage::storage_config($storage_cfg, $storage);
2420 my $path = PVE::Storage::path($storage_cfg, $volid, $snapname);
2421
2422 my ($vtype, undef, undef, undef, undef, $isBase, $format) =
2423 PVE::Storage::parse_volname($storage_cfg, $volid);
2424
2425 $format = 'iso' if $vtype eq 'iso'; # allow to handle iso files
2426
2427 if ($format eq 'subvol') {
2428 if ($mount_path) {
2429 if ($snapname) {
2430 if ($scfg->{type} eq 'zfspool') {
2431 my $path_arg = $path;
2432 $path_arg =~ s!^/+!!;
2433 PVE::Tools::run_command(['mount', '-o', 'ro', @extra_opts, '-t', 'zfs', $path_arg, $mount_path]);
2434 } else {
2435 die "cannot mount subvol snapshots for storage type '$scfg->{type}'\n";
2436 }
2437 } else {
2438 bindmount($path, $mount_path, $readonly, @extra_opts);
2439 warn "cannot enable quota control for bind mounted subvolumes\n" if $quota;
2440 }
2441 }
2442 return wantarray ? ($path, 0, $mounted_dev) : $path;
2443 } elsif ($format eq 'raw' || $format eq 'iso') {
2444 my $domount = sub {
2445 my ($path) = @_;
2446 if ($mount_path) {
2447 if ($format eq 'iso') {
2448 PVE::Tools::run_command(['mount', '-o', 'ro', @extra_opts, $path, $mount_path]);
2449 } elsif ($isBase || defined($snapname)) {
2450 PVE::Tools::run_command(['mount', '-o', 'ro,noload', @extra_opts, $path, $mount_path]);
2451 } else {
2452 if ($quota) {
2453 push @extra_opts, '-o', 'usrjquota=aquota.user,grpjquota=aquota.group,jqfmt=vfsv0';
2454 }
2455 push @extra_opts, '-o', 'ro' if $readonly;
2456 PVE::Tools::run_command(['mount', @extra_opts, $path, $mount_path]);
2457 }
2458 }
2459 };
2460 my $use_loopdev = 0;
2461 if ($scfg->{path}) {
2462 $mounted_dev = run_with_loopdev($domount, $path);
2463 $use_loopdev = 1;
2464 } elsif ($scfg->{type} eq 'drbd' || $scfg->{type} eq 'lvm' ||
2465 $scfg->{type} eq 'rbd' || $scfg->{type} eq 'lvmthin') {
2466 $mounted_dev = $path;
2467 &$domount($path);
2468 } else {
2469 die "unsupported storage type '$scfg->{type}'\n";
2470 }
2471 return wantarray ? ($path, $use_loopdev, $mounted_dev) : $path;
2472 } else {
2473 die "unsupported image format '$format'\n";
2474 }
2475 } elsif ($type eq 'device') {
2476 push @extra_opts, '-o', 'ro' if $readonly;
2477 PVE::Tools::run_command(['mount', @extra_opts, $volid, $mount_path]) if $mount_path;
2478 return wantarray ? ($volid, 0, $volid) : $volid;
2479 } elsif ($type eq 'bind') {
2480 die "directory '$volid' does not exist\n" if ! -d $volid;
2481 &$check_mount_path($volid);
2482 bindmount($volid, $mount_path, $readonly, @extra_opts) if $mount_path;
2483 warn "cannot enable quota control for bind mounts\n" if $quota;
2484 return wantarray ? ($volid, 0, undef) : $volid;
2485 }
2486
2487 die "unsupported storage";
2488 }
2489
2490 sub get_vm_volumes {
2491 my ($conf, $excludes) = @_;
2492
2493 my $vollist = [];
2494
2495 foreach_mountpoint($conf, sub {
2496 my ($ms, $mountpoint) = @_;
2497
2498 return if $excludes && $ms eq $excludes;
2499
2500 my $volid = $mountpoint->{volume};
2501
2502 return if !$volid || $mountpoint->{type} ne 'volume';
2503
2504 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2505 return if !$sid;
2506
2507 push @$vollist, $volid;
2508 });
2509
2510 return $vollist;
2511 }
2512
2513 sub mkfs {
2514 my ($dev, $rootuid, $rootgid) = @_;
2515
2516 PVE::Tools::run_command(['mkfs.ext4', '-O', 'mmp',
2517 '-E', "root_owner=$rootuid:$rootgid",
2518 $dev]);
2519 }
2520
2521 sub format_disk {
2522 my ($storage_cfg, $volid, $rootuid, $rootgid) = @_;
2523
2524 if ($volid =~ m!^/dev/.+!) {
2525 mkfs($volid);
2526 return;
2527 }
2528
2529 my ($storage, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2530
2531 die "cannot format volume '$volid' with no storage\n" if !$storage;
2532
2533 PVE::Storage::activate_volumes($storage_cfg, [$volid]);
2534
2535 my $path = PVE::Storage::path($storage_cfg, $volid);
2536
2537 my ($vtype, undef, undef, undef, undef, $isBase, $format) =
2538 PVE::Storage::parse_volname($storage_cfg, $volid);
2539
2540 die "cannot format volume '$volid' (format == $format)\n"
2541 if $format ne 'raw';
2542
2543 mkfs($path, $rootuid, $rootgid);
2544 }
2545
2546 sub destroy_disks {
2547 my ($storecfg, $vollist) = @_;
2548
2549 foreach my $volid (@$vollist) {
2550 eval { PVE::Storage::vdisk_free($storecfg, $volid); };
2551 warn $@ if $@;
2552 }
2553 }
2554
2555 sub create_disks {
2556 my ($storecfg, $vmid, $settings, $conf) = @_;
2557
2558 my $vollist = [];
2559
2560 eval {
2561 my (undef, $rootuid, $rootgid) = PVE::LXC::parse_id_maps($conf);
2562 my $chown_vollist = [];
2563
2564 foreach_mountpoint($settings, sub {
2565 my ($ms, $mountpoint) = @_;
2566
2567 my $volid = $mountpoint->{volume};
2568 my $mp = $mountpoint->{mp};
2569
2570 my ($storage, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2571
2572 if ($storage && ($volid =~ m/^([^:\s]+):(\d+(\.\d+)?)$/)) {
2573 my ($storeid, $size_gb) = ($1, $2);
2574
2575 my $size_kb = int(${size_gb}*1024) * 1024;
2576
2577 my $scfg = PVE::Storage::storage_config($storecfg, $storage);
2578 # fixme: use better naming ct-$vmid-disk-X.raw?
2579
2580 if ($scfg->{type} eq 'dir' || $scfg->{type} eq 'nfs') {
2581 if ($size_kb > 0) {
2582 $volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'raw',
2583 undef, $size_kb);
2584 format_disk($storecfg, $volid, $rootuid, $rootgid);
2585 } else {
2586 $volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'subvol',
2587 undef, 0);
2588 push @$chown_vollist, $volid;
2589 }
2590 } elsif ($scfg->{type} eq 'zfspool') {
2591
2592 $volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'subvol',
2593 undef, $size_kb);
2594 push @$chown_vollist, $volid;
2595 } elsif ($scfg->{type} eq 'drbd' || $scfg->{type} eq 'lvm' || $scfg->{type} eq 'lvmthin') {
2596
2597 $volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'raw', undef, $size_kb);
2598 format_disk($storecfg, $volid, $rootuid, $rootgid);
2599
2600 } elsif ($scfg->{type} eq 'rbd') {
2601
2602 die "krbd option must be enabled on storage type '$scfg->{type}'\n" if !$scfg->{krbd};
2603 $volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'raw', undef, $size_kb);
2604 format_disk($storecfg, $volid, $rootuid, $rootgid);
2605 } else {
2606 die "unable to create containers on storage type '$scfg->{type}'\n";
2607 }
2608 push @$vollist, $volid;
2609 $mountpoint->{volume} = $volid;
2610 $mountpoint->{size} = $size_kb * 1024;
2611 $conf->{$ms} = print_ct_mountpoint($mountpoint, $ms eq 'rootfs');
2612 } else {
2613 # use specified/existing volid/dir/device
2614 $conf->{$ms} = print_ct_mountpoint($mountpoint, $ms eq 'rootfs');
2615 }
2616 });
2617
2618 PVE::Storage::activate_volumes($storecfg, $chown_vollist, undef);
2619 foreach my $volid (@$chown_vollist) {
2620 my $path = PVE::Storage::path($storecfg, $volid, undef);
2621 chown($rootuid, $rootgid, $path);
2622 }
2623 PVE::Storage::deactivate_volumes($storecfg, $chown_vollist, undef);
2624 };
2625 # free allocated images on error
2626 if (my $err = $@) {
2627 destroy_disks($storecfg, $vollist);
2628 die $err;
2629 }
2630 return $vollist;
2631 }
2632
2633 # bash completion helper
2634
2635 sub complete_os_templates {
2636 my ($cmdname, $pname, $cvalue) = @_;
2637
2638 my $cfg = PVE::Storage::config();
2639
2640 my $storeid;
2641
2642 if ($cvalue =~ m/^([^:]+):/) {
2643 $storeid = $1;
2644 }
2645
2646 my $vtype = $cmdname eq 'restore' ? 'backup' : 'vztmpl';
2647 my $data = PVE::Storage::template_list($cfg, $storeid, $vtype);
2648
2649 my $res = [];
2650 foreach my $id (keys %$data) {
2651 foreach my $item (@{$data->{$id}}) {
2652 push @$res, $item->{volid} if defined($item->{volid});
2653 }
2654 }
2655
2656 return $res;
2657 }
2658
2659 my $complete_ctid_full = sub {
2660 my ($running) = @_;
2661
2662 my $idlist = vmstatus();
2663
2664 my $active_hash = list_active_containers();
2665
2666 my $res = [];
2667
2668 foreach my $id (keys %$idlist) {
2669 my $d = $idlist->{$id};
2670 if (defined($running)) {
2671 next if $d->{template};
2672 next if $running && !$active_hash->{$id};
2673 next if !$running && $active_hash->{$id};
2674 }
2675 push @$res, $id;
2676
2677 }
2678 return $res;
2679 };
2680
2681 sub complete_ctid {
2682 return &$complete_ctid_full();
2683 }
2684
2685 sub complete_ctid_stopped {
2686 return &$complete_ctid_full(0);
2687 }
2688
2689 sub complete_ctid_running {
2690 return &$complete_ctid_full(1);
2691 }
2692
2693 sub parse_id_maps {
2694 my ($conf) = @_;
2695
2696 my $id_map = [];
2697 my $rootuid = 0;
2698 my $rootgid = 0;
2699
2700 my $lxc = $conf->{lxc};
2701 foreach my $entry (@$lxc) {
2702 my ($key, $value) = @$entry;
2703 next if $key ne 'lxc.id_map';
2704 if ($value =~ /^([ug])\s+(\d+)\s+(\d+)\s+(\d+)\s*$/) {
2705 my ($type, $ct, $host, $length) = ($1, $2, $3, $4);
2706 push @$id_map, [$type, $ct, $host, $length];
2707 if ($ct == 0) {
2708 $rootuid = $host if $type eq 'u';
2709 $rootgid = $host if $type eq 'g';
2710 }
2711 } else {
2712 die "failed to parse id_map: $value\n";
2713 }
2714 }
2715
2716 if (!@$id_map && $conf->{unprivileged}) {
2717 # Should we read them from /etc/subuid?
2718 $id_map = [ ['u', '0', '100000', '65536'],
2719 ['g', '0', '100000', '65536'] ];
2720 $rootuid = $rootgid = 100000;
2721 }
2722
2723 return ($id_map, $rootuid, $rootgid);
2724 }
2725
2726 sub userns_command {
2727 my ($id_map) = @_;
2728 if (@$id_map) {
2729 return ['lxc-usernsexec', (map { ('-m', join(':', @$_)) } @$id_map), '--'];
2730 }
2731 return [];
2732 }
2733
2734 1;