]> git.proxmox.com Git - pve-container.git/blob - src/PVE/LXC.pm
Add '\n' to die statements
[pve-container.git] / src / PVE / LXC.pm
1 package PVE::LXC;
2
3 use strict;
4 use warnings;
5 use POSIX qw(EINTR);
6
7 use Socket;
8
9 use File::Path;
10 use File::Spec;
11 use Cwd qw();
12 use Fcntl qw(O_RDONLY);
13
14 use PVE::Cluster qw(cfs_register_file cfs_read_file);
15 use PVE::Exception qw(raise_perm_exc);
16 use PVE::Storage;
17 use PVE::SafeSyslog;
18 use PVE::INotify;
19 use PVE::JSONSchema qw(get_standard_option);
20 use PVE::Tools qw($IPV6RE $IPV4RE dir_glob_foreach lock_file lock_file_full);
21 use PVE::Network;
22 use PVE::AccessControl;
23 use PVE::ProcFSTools;
24 use Time::HiRes qw (gettimeofday);
25
26 use Data::Dumper;
27
28 my $nodename = PVE::INotify::nodename();
29
30 my $cpuinfo= PVE::ProcFSTools::read_cpuinfo();
31
32 our $COMMON_TAR_FLAGS = [ '--sparse', '--numeric-owner', '--acls',
33 '--xattrs',
34 '--xattrs-include=user.*',
35 '--xattrs-include=security.capability',
36 '--warning=no-xattr-write' ];
37
38 cfs_register_file('/lxc/', \&parse_pct_config, \&write_pct_config);
39
40 my $rootfs_desc = {
41 volume => {
42 type => 'string',
43 default_key => 1,
44 format => 'pve-lxc-mp-string',
45 format_description => 'volume',
46 description => 'Volume, device or directory to mount into the container.',
47 },
48 backup => {
49 type => 'boolean',
50 format_description => '[1|0]',
51 description => 'Whether to include the mountpoint in backups.',
52 optional => 1,
53 },
54 size => {
55 type => 'string',
56 format => 'disk-size',
57 format_description => 'DiskSize',
58 description => 'Volume size (read only value).',
59 optional => 1,
60 },
61 acl => {
62 type => 'boolean',
63 format_description => 'acl',
64 description => 'Explicitly enable or disable ACL support.',
65 optional => 1,
66 },
67 ro => {
68 type => 'boolean',
69 format_description => 'ro',
70 description => 'Read-only mountpoint (not supported with bind mounts)',
71 optional => 1,
72 },
73 quota => {
74 type => 'boolean',
75 format_description => '[0|1]',
76 description => 'Enable user quotas inside the container (not supported with zfs subvolumes)',
77 optional => 1,
78 },
79 };
80
81 PVE::JSONSchema::register_standard_option('pve-ct-rootfs', {
82 type => 'string', format => $rootfs_desc,
83 description => "Use volume as container root.",
84 optional => 1,
85 });
86
87 PVE::JSONSchema::register_standard_option('pve-lxc-snapshot-name', {
88 description => "The name of the snapshot.",
89 type => 'string', format => 'pve-configid',
90 maxLength => 40,
91 });
92
93 my $confdesc = {
94 lock => {
95 optional => 1,
96 type => 'string',
97 description => "Lock/unlock the VM.",
98 enum => [qw(migrate backup snapshot rollback)],
99 },
100 onboot => {
101 optional => 1,
102 type => 'boolean',
103 description => "Specifies whether a VM will be started during system bootup.",
104 default => 0,
105 },
106 startup => get_standard_option('pve-startup-order'),
107 template => {
108 optional => 1,
109 type => 'boolean',
110 description => "Enable/disable Template.",
111 default => 0,
112 },
113 arch => {
114 optional => 1,
115 type => 'string',
116 enum => ['amd64', 'i386'],
117 description => "OS architecture type.",
118 default => 'amd64',
119 },
120 ostype => {
121 optional => 1,
122 type => 'string',
123 enum => ['debian', 'ubuntu', 'centos', 'fedora', 'opensuse', 'archlinux', 'alpine', 'unmanaged'],
124 description => "OS type. This is used to setup configuration inside the container, and corresponds to lxc setup scripts in /usr/share/lxc/config/<ostype>.common.conf. Value 'unmanaged' can be used to skip and OS specific setup.",
125 },
126 console => {
127 optional => 1,
128 type => 'boolean',
129 description => "Attach a console device (/dev/console) to the container.",
130 default => 1,
131 },
132 tty => {
133 optional => 1,
134 type => 'integer',
135 description => "Specify the number of tty available to the container",
136 minimum => 0,
137 maximum => 6,
138 default => 2,
139 },
140 cpulimit => {
141 optional => 1,
142 type => 'number',
143 description => "Limit of CPU usage. Note if the computer has 2 CPUs, it has a total of '2' CPU time. Value '0' indicates no CPU limit.",
144 minimum => 0,
145 maximum => 128,
146 default => 0,
147 },
148 cpuunits => {
149 optional => 1,
150 type => 'integer',
151 description => "CPU weight for a VM. Argument is used in the kernel fair scheduler. The larger the number is, the more CPU time this VM gets. Number is relative to the weights of all the other running VMs.\n\nNOTE: You can disable fair-scheduler configuration by setting this to 0.",
152 minimum => 0,
153 maximum => 500000,
154 default => 1024,
155 },
156 memory => {
157 optional => 1,
158 type => 'integer',
159 description => "Amount of RAM for the VM in MB.",
160 minimum => 16,
161 default => 512,
162 },
163 swap => {
164 optional => 1,
165 type => 'integer',
166 description => "Amount of SWAP for the VM in MB.",
167 minimum => 0,
168 default => 512,
169 },
170 hostname => {
171 optional => 1,
172 description => "Set a host name for the container.",
173 type => 'string', format => 'dns-name',
174 maxLength => 255,
175 },
176 description => {
177 optional => 1,
178 type => 'string',
179 description => "Container description. Only used on the configuration web interface.",
180 },
181 searchdomain => {
182 optional => 1,
183 type => 'string', format => 'dns-name-list',
184 description => "Sets DNS search domains for a container. Create will automatically use the setting from the host if you neither set searchdomain nor nameserver.",
185 },
186 nameserver => {
187 optional => 1,
188 type => 'string', format => 'address-list',
189 description => "Sets DNS server IP address for a container. Create will automatically use the setting from the host if you neither set searchdomain nor nameserver.",
190 },
191 rootfs => get_standard_option('pve-ct-rootfs'),
192 parent => {
193 optional => 1,
194 type => 'string', format => 'pve-configid',
195 maxLength => 40,
196 description => "Parent snapshot name. This is used internally, and should not be modified.",
197 },
198 snaptime => {
199 optional => 1,
200 description => "Timestamp for snapshots.",
201 type => 'integer',
202 minimum => 0,
203 },
204 cmode => {
205 optional => 1,
206 description => "Console mode. By default, the console command tries to open a connection to one of the available tty devices. By setting cmode to 'console' it tries to attach to /dev/console instead. If you set cmode to 'shell', it simply invokes a shell inside the container (no login).",
207 type => 'string',
208 enum => ['shell', 'console', 'tty'],
209 default => 'tty',
210 },
211 protection => {
212 optional => 1,
213 type => 'boolean',
214 description => "Sets the protection flag of the container. This will prevent the CT or CT's disk remove/update operation.",
215 default => 0,
216 },
217 unprivileged => {
218 optional => 1,
219 type => 'boolean',
220 description => "Makes the container run as unprivileged user. (Should not be modified manually.)",
221 default => 0,
222 },
223 };
224
225 my $valid_lxc_conf_keys = {
226 'lxc.include' => 1,
227 'lxc.arch' => 1,
228 'lxc.utsname' => 1,
229 'lxc.haltsignal' => 1,
230 'lxc.rebootsignal' => 1,
231 'lxc.stopsignal' => 1,
232 'lxc.init_cmd' => 1,
233 'lxc.network.type' => 1,
234 'lxc.network.flags' => 1,
235 'lxc.network.link' => 1,
236 'lxc.network.mtu' => 1,
237 'lxc.network.name' => 1,
238 'lxc.network.hwaddr' => 1,
239 'lxc.network.ipv4' => 1,
240 'lxc.network.ipv4.gateway' => 1,
241 'lxc.network.ipv6' => 1,
242 'lxc.network.ipv6.gateway' => 1,
243 'lxc.network.script.up' => 1,
244 'lxc.network.script.down' => 1,
245 'lxc.pts' => 1,
246 'lxc.console.logfile' => 1,
247 'lxc.console' => 1,
248 'lxc.tty' => 1,
249 'lxc.devttydir' => 1,
250 'lxc.hook.autodev' => 1,
251 'lxc.autodev' => 1,
252 'lxc.kmsg' => 1,
253 'lxc.mount' => 1,
254 'lxc.mount.entry' => 1,
255 'lxc.mount.auto' => 1,
256 'lxc.rootfs' => 'lxc.rootfs is auto generated from rootfs',
257 'lxc.rootfs.mount' => 1,
258 'lxc.rootfs.options' => 'lxc.rootfs.options is not supported' .
259 ', please use mountpoint options in the "rootfs" key',
260 # lxc.cgroup.*
261 'lxc.cap.drop' => 1,
262 'lxc.cap.keep' => 1,
263 'lxc.aa_profile' => 1,
264 'lxc.aa_allow_incomplete' => 1,
265 'lxc.se_context' => 1,
266 'lxc.seccomp' => 1,
267 'lxc.id_map' => 1,
268 'lxc.hook.pre-start' => 1,
269 'lxc.hook.pre-mount' => 1,
270 'lxc.hook.mount' => 1,
271 'lxc.hook.start' => 1,
272 'lxc.hook.stop' => 1,
273 'lxc.hook.post-stop' => 1,
274 'lxc.hook.clone' => 1,
275 'lxc.hook.destroy' => 1,
276 'lxc.loglevel' => 1,
277 'lxc.logfile' => 1,
278 'lxc.start.auto' => 1,
279 'lxc.start.delay' => 1,
280 'lxc.start.order' => 1,
281 'lxc.group' => 1,
282 'lxc.environment' => 1,
283 };
284
285 my $netconf_desc = {
286 type => {
287 type => 'string',
288 optional => 1,
289 description => "Network interface type.",
290 enum => [qw(veth)],
291 },
292 name => {
293 type => 'string',
294 format_description => 'String',
295 description => 'Name of the network device as seen from inside the container. (lxc.network.name)',
296 pattern => '[-_.\w\d]+',
297 },
298 bridge => {
299 type => 'string',
300 format_description => 'vmbr<Number>',
301 description => 'Bridge to attach the network device to.',
302 pattern => '[-_.\w\d]+',
303 optional => 1,
304 },
305 hwaddr => {
306 type => 'string',
307 format_description => 'MAC',
308 description => 'Bridge to attach the network device to. (lxc.network.hwaddr)',
309 pattern => qr/(?:[a-f0-9]{2}:){5}[a-f0-9]{2}/i,
310 optional => 1,
311 },
312 mtu => {
313 type => 'integer',
314 format_description => 'Number',
315 description => 'Maximum transfer unit of the interface. (lxc.network.mtu)',
316 minimum => 64, # minimum ethernet frame is 64 bytes
317 optional => 1,
318 },
319 ip => {
320 type => 'string',
321 format => 'pve-ipv4-config',
322 format_description => 'IPv4Format/CIDR',
323 description => 'IPv4 address in CIDR format.',
324 optional => 1,
325 },
326 gw => {
327 type => 'string',
328 format => 'ipv4',
329 format_description => 'GatewayIPv4',
330 description => 'Default gateway for IPv4 traffic.',
331 optional => 1,
332 },
333 ip6 => {
334 type => 'string',
335 format => 'pve-ipv6-config',
336 format_description => 'IPv6Format/CIDR',
337 description => 'IPv6 address in CIDR format.',
338 optional => 1,
339 },
340 gw6 => {
341 type => 'string',
342 format => 'ipv6',
343 format_description => 'GatewayIPv6',
344 description => 'Default gateway for IPv6 traffic.',
345 optional => 1,
346 },
347 firewall => {
348 type => 'boolean',
349 format_description => '[1|0]',
350 description => "Controls whether this interface's firewall rules should be used.",
351 optional => 1,
352 },
353 tag => {
354 type => 'integer',
355 format_description => 'VlanNo',
356 minimum => '2',
357 maximum => '4094',
358 description => "VLAN tag for this interface.",
359 optional => 1,
360 },
361 trunks => {
362 type => 'string',
363 pattern => qr/\d+(?:;\d+)*/,
364 format_description => 'vlanid[;vlanid...]',
365 description => "VLAN ids to pass through the interface",
366 optional => 1,
367 },
368 };
369 PVE::JSONSchema::register_format('pve-lxc-network', $netconf_desc);
370
371 my $MAX_LXC_NETWORKS = 10;
372 for (my $i = 0; $i < $MAX_LXC_NETWORKS; $i++) {
373 $confdesc->{"net$i"} = {
374 optional => 1,
375 type => 'string', format => $netconf_desc,
376 description => "Specifies network interfaces for the container.",
377 };
378 }
379
380 PVE::JSONSchema::register_format('pve-lxc-mp-string', \&verify_lxc_mp_string);
381 sub verify_lxc_mp_string{
382 my ($mp, $noerr) = @_;
383
384 # do not allow:
385 # /./ or /../
386 # /. or /.. at the end
387 # ../ at the beginning
388
389 if($mp =~ m@/\.\.?/@ ||
390 $mp =~ m@/\.\.?$@ ||
391 $mp =~ m@^\.\./@){
392 return undef if $noerr;
393 die "$mp contains illegal character sequences\n";
394 }
395 return $mp;
396 }
397
398 my $mp_desc = {
399 %$rootfs_desc,
400 mp => {
401 type => 'string',
402 format => 'pve-lxc-mp-string',
403 format_description => 'Path',
404 description => 'Path to the mountpoint as seen from inside the container.',
405 },
406 };
407 PVE::JSONSchema::register_format('pve-ct-mountpoint', $mp_desc);
408
409 my $unuseddesc = {
410 optional => 1,
411 type => 'string', format => 'pve-volume-id',
412 description => "Reference to unused volumes.",
413 };
414
415 my $MAX_MOUNT_POINTS = 10;
416 for (my $i = 0; $i < $MAX_MOUNT_POINTS; $i++) {
417 $confdesc->{"mp$i"} = {
418 optional => 1,
419 type => 'string', format => $mp_desc,
420 description => "Use volume as container mount point (experimental feature).",
421 optional => 1,
422 };
423 }
424
425 my $MAX_UNUSED_DISKS = $MAX_MOUNT_POINTS;
426 for (my $i = 0; $i < $MAX_MOUNT_POINTS; $i++) {
427 $confdesc->{"unused$i"} = $unuseddesc;
428 }
429
430 sub write_pct_config {
431 my ($filename, $conf) = @_;
432
433 delete $conf->{snapstate}; # just to be sure
434
435 my $generate_raw_config = sub {
436 my ($conf) = @_;
437
438 my $raw = '';
439
440 # add description as comment to top of file
441 my $descr = $conf->{description} || '';
442 foreach my $cl (split(/\n/, $descr)) {
443 $raw .= '#' . PVE::Tools::encode_text($cl) . "\n";
444 }
445
446 foreach my $key (sort keys %$conf) {
447 next if $key eq 'digest' || $key eq 'description' || $key eq 'pending' ||
448 $key eq 'snapshots' || $key eq 'snapname' || $key eq 'lxc';
449 my $value = $conf->{$key};
450 die "detected invalid newline inside property '$key'\n" if $value =~ m/\n/;
451 $raw .= "$key: $value\n";
452 }
453
454 if (my $lxcconf = $conf->{lxc}) {
455 foreach my $entry (@$lxcconf) {
456 my ($k, $v) = @$entry;
457 $raw .= "$k: $v\n";
458 }
459 }
460
461 return $raw;
462 };
463
464 my $raw = &$generate_raw_config($conf);
465
466 foreach my $snapname (sort keys %{$conf->{snapshots}}) {
467 $raw .= "\n[$snapname]\n";
468 $raw .= &$generate_raw_config($conf->{snapshots}->{$snapname});
469 }
470
471 return $raw;
472 }
473
474 sub check_type {
475 my ($key, $value) = @_;
476
477 die "unknown setting '$key'\n" if !$confdesc->{$key};
478
479 my $type = $confdesc->{$key}->{type};
480
481 if (!defined($value)) {
482 die "got undefined value\n";
483 }
484
485 if ($value =~ m/[\n\r]/) {
486 die "property contains a line feed\n";
487 }
488
489 if ($type eq 'boolean') {
490 return 1 if ($value eq '1') || ($value =~ m/^(on|yes|true)$/i);
491 return 0 if ($value eq '0') || ($value =~ m/^(off|no|false)$/i);
492 die "type check ('boolean') failed - got '$value'\n";
493 } elsif ($type eq 'integer') {
494 return int($1) if $value =~ m/^(\d+)$/;
495 die "type check ('integer') failed - got '$value'\n";
496 } elsif ($type eq 'number') {
497 return $value if $value =~ m/^(\d+)(\.\d+)?$/;
498 die "type check ('number') failed - got '$value'\n";
499 } elsif ($type eq 'string') {
500 if (my $fmt = $confdesc->{$key}->{format}) {
501 PVE::JSONSchema::check_format($fmt, $value);
502 return $value;
503 }
504 return $value;
505 } else {
506 die "internal error"
507 }
508 }
509
510 sub parse_pct_config {
511 my ($filename, $raw) = @_;
512
513 return undef if !defined($raw);
514
515 my $res = {
516 digest => Digest::SHA::sha1_hex($raw),
517 snapshots => {},
518 };
519
520 $filename =~ m|/lxc/(\d+).conf$|
521 || die "got strange filename '$filename'";
522
523 my $vmid = $1;
524
525 my $conf = $res;
526 my $descr = '';
527 my $section = '';
528
529 my @lines = split(/\n/, $raw);
530 foreach my $line (@lines) {
531 next if $line =~ m/^\s*$/;
532
533 if ($line =~ m/^\[([a-z][a-z0-9_\-]+)\]\s*$/i) {
534 $section = $1;
535 $conf->{description} = $descr if $descr;
536 $descr = '';
537 $conf = $res->{snapshots}->{$section} = {};
538 next;
539 }
540
541 if ($line =~ m/^\#(.*)\s*$/) {
542 $descr .= PVE::Tools::decode_text($1) . "\n";
543 next;
544 }
545
546 if ($line =~ m/^(lxc\.[a-z0-9_\-\.]+)(:|\s*=)\s*(.*?)\s*$/) {
547 my $key = $1;
548 my $value = $3;
549 my $validity = $valid_lxc_conf_keys->{$key} || 0;
550 if ($validity eq 1 || $key =~ m/^lxc\.cgroup\./) {
551 push @{$conf->{lxc}}, [$key, $value];
552 } elsif (my $errmsg = $validity) {
553 warn "vm $vmid - $key: $errmsg\n";
554 } else {
555 warn "vm $vmid - unable to parse config: $line\n";
556 }
557 } elsif ($line =~ m/^(description):\s*(.*\S)\s*$/) {
558 $descr .= PVE::Tools::decode_text($2);
559 } elsif ($line =~ m/snapstate:\s*(prepare|delete)\s*$/) {
560 $conf->{snapstate} = $1;
561 } elsif ($line =~ m/^([a-z][a-z_]*\d*):\s*(\S.*)\s*$/) {
562 my $key = $1;
563 my $value = $2;
564 eval { $value = check_type($key, $value); };
565 warn "vm $vmid - unable to parse value of '$key' - $@" if $@;
566 $conf->{$key} = $value;
567 } else {
568 warn "vm $vmid - unable to parse config: $line\n";
569 }
570 }
571
572 $conf->{description} = $descr if $descr;
573
574 delete $res->{snapstate}; # just to be sure
575
576 return $res;
577 }
578
579 sub config_list {
580 my $vmlist = PVE::Cluster::get_vmlist();
581 my $res = {};
582 return $res if !$vmlist || !$vmlist->{ids};
583 my $ids = $vmlist->{ids};
584
585 foreach my $vmid (keys %$ids) {
586 next if !$vmid; # skip CT0
587 my $d = $ids->{$vmid};
588 next if !$d->{node} || $d->{node} ne $nodename;
589 next if !$d->{type} || $d->{type} ne 'lxc';
590 $res->{$vmid}->{type} = 'lxc';
591 }
592 return $res;
593 }
594
595 sub cfs_config_path {
596 my ($vmid, $node) = @_;
597
598 $node = $nodename if !$node;
599 return "nodes/$node/lxc/$vmid.conf";
600 }
601
602 sub config_file {
603 my ($vmid, $node) = @_;
604
605 my $cfspath = cfs_config_path($vmid, $node);
606 return "/etc/pve/$cfspath";
607 }
608
609 sub load_config {
610 my ($vmid, $node) = @_;
611
612 $node = $nodename if !$node;
613 my $cfspath = cfs_config_path($vmid, $node);
614
615 my $conf = PVE::Cluster::cfs_read_file($cfspath);
616 die "container $vmid does not exist\n" if !defined($conf);
617
618 return $conf;
619 }
620
621 sub create_config {
622 my ($vmid, $conf) = @_;
623
624 my $dir = "/etc/pve/nodes/$nodename/lxc";
625 mkdir $dir;
626
627 write_config($vmid, $conf);
628 }
629
630 sub destroy_config {
631 my ($vmid) = @_;
632
633 unlink config_file($vmid, $nodename);
634 }
635
636 sub write_config {
637 my ($vmid, $conf) = @_;
638
639 my $cfspath = cfs_config_path($vmid);
640
641 PVE::Cluster::cfs_write_file($cfspath, $conf);
642 }
643
644 # flock: we use one file handle per process, so lock file
645 # can be called multiple times and will succeed for the same process.
646
647 my $lock_handles = {};
648 my $lockdir = "/run/lock/lxc";
649
650 sub config_file_lock {
651 my ($vmid) = @_;
652
653 return "$lockdir/pve-config-${vmid}.lock";
654 }
655
656 sub lock_config_full {
657 my ($vmid, $timeout, $code, @param) = @_;
658
659 my $filename = config_file_lock($vmid);
660
661 mkdir $lockdir if !-d $lockdir;
662
663 my $res = lock_file($filename, $timeout, $code, @param);
664
665 die $@ if $@;
666
667 return $res;
668 }
669
670 sub lock_config_mode {
671 my ($vmid, $timeout, $shared, $code, @param) = @_;
672
673 my $filename = config_file_lock($vmid);
674
675 mkdir $lockdir if !-d $lockdir;
676
677 my $res = lock_file_full($filename, $timeout, $shared, $code, @param);
678
679 die $@ if $@;
680
681 return $res;
682 }
683
684 sub lock_config {
685 my ($vmid, $code, @param) = @_;
686
687 return lock_config_full($vmid, 10, $code, @param);
688 }
689
690 sub option_exists {
691 my ($name) = @_;
692
693 return defined($confdesc->{$name});
694 }
695
696 # add JSON properties for create and set function
697 sub json_config_properties {
698 my $prop = shift;
699
700 foreach my $opt (keys %$confdesc) {
701 next if $opt eq 'parent' || $opt eq 'snaptime';
702 next if $prop->{$opt};
703 $prop->{$opt} = $confdesc->{$opt};
704 }
705
706 return $prop;
707 }
708
709 # container status helpers
710
711 sub list_active_containers {
712
713 my $filename = "/proc/net/unix";
714
715 # similar test is used by lcxcontainers.c: list_active_containers
716 my $res = {};
717
718 my $fh = IO::File->new ($filename, "r");
719 return $res if !$fh;
720
721 while (defined(my $line = <$fh>)) {
722 if ($line =~ m/^[a-f0-9]+:\s\S+\s\S+\s\S+\s\S+\s\S+\s\d+\s(\S+)$/) {
723 my $path = $1;
724 if ($path =~ m!^@/var/lib/lxc/(\d+)/command$!) {
725 $res->{$1} = 1;
726 }
727 }
728 }
729
730 close($fh);
731
732 return $res;
733 }
734
735 # warning: this is slow
736 sub check_running {
737 my ($vmid) = @_;
738
739 my $active_hash = list_active_containers();
740
741 return 1 if defined($active_hash->{$vmid});
742
743 return undef;
744 }
745
746 sub get_container_disk_usage {
747 my ($vmid, $pid) = @_;
748
749 return PVE::Tools::df("/proc/$pid/root/", 1);
750 }
751
752 my $last_proc_vmid_stat;
753
754 my $parse_cpuacct_stat = sub {
755 my ($vmid) = @_;
756
757 my $raw = read_cgroup_value('cpuacct', $vmid, 'cpuacct.stat', 1);
758
759 my $stat = {};
760
761 if ($raw =~ m/^user (\d+)\nsystem (\d+)\n/) {
762
763 $stat->{utime} = $1;
764 $stat->{stime} = $2;
765
766 }
767
768 return $stat;
769 };
770
771 sub vmstatus {
772 my ($opt_vmid) = @_;
773
774 my $list = $opt_vmid ? { $opt_vmid => { type => 'lxc' }} : config_list();
775
776 my $active_hash = list_active_containers();
777
778 my $cpucount = $cpuinfo->{cpus} || 1;
779
780 my $cdtime = gettimeofday;
781
782 my $uptime = (PVE::ProcFSTools::read_proc_uptime(1))[0];
783
784 foreach my $vmid (keys %$list) {
785 my $d = $list->{$vmid};
786
787 eval { $d->{pid} = find_lxc_pid($vmid) if defined($active_hash->{$vmid}); };
788 warn $@ if $@; # ignore errors (consider them stopped)
789
790 $d->{status} = $d->{pid} ? 'running' : 'stopped';
791
792 my $cfspath = cfs_config_path($vmid);
793 my $conf = PVE::Cluster::cfs_read_file($cfspath) || {};
794
795 $d->{name} = $conf->{'hostname'} || "CT$vmid";
796 $d->{name} =~ s/[\s]//g;
797
798 $d->{cpus} = $conf->{cpulimit} || $cpucount;
799
800 if ($d->{pid}) {
801 my $res = get_container_disk_usage($vmid, $d->{pid});
802 $d->{disk} = $res->{used};
803 $d->{maxdisk} = $res->{total};
804 } else {
805 $d->{disk} = 0;
806 # use 4GB by default ??
807 if (my $rootfs = $conf->{rootfs}) {
808 my $rootinfo = parse_ct_rootfs($rootfs);
809 $d->{maxdisk} = int(($rootinfo->{size} || 4)*1024*1024)*1024;
810 } else {
811 $d->{maxdisk} = 4*1024*1024*1024;
812 }
813 }
814
815 $d->{mem} = 0;
816 $d->{swap} = 0;
817 $d->{maxmem} = ($conf->{memory}||512)*1024*1024;
818 $d->{maxswap} = ($conf->{swap}//0)*1024*1024;
819
820 $d->{uptime} = 0;
821 $d->{cpu} = 0;
822
823 $d->{netout} = 0;
824 $d->{netin} = 0;
825
826 $d->{diskread} = 0;
827 $d->{diskwrite} = 0;
828
829 $d->{template} = is_template($conf);
830 }
831
832 foreach my $vmid (keys %$list) {
833 my $d = $list->{$vmid};
834 my $pid = $d->{pid};
835
836 next if !$pid; # skip stopped CTs
837
838 my $ctime = (stat("/proc/$pid"))[10]; # 10 = ctime
839 $d->{uptime} = time - $ctime; # the method lxcfs uses
840
841 $d->{mem} = read_cgroup_value('memory', $vmid, 'memory.usage_in_bytes');
842 $d->{swap} = read_cgroup_value('memory', $vmid, 'memory.memsw.usage_in_bytes') - $d->{mem};
843
844 my $blkio_bytes = read_cgroup_value('blkio', $vmid, 'blkio.throttle.io_service_bytes', 1);
845 my @bytes = split(/\n/, $blkio_bytes);
846 foreach my $byte (@bytes) {
847 if (my ($key, $value) = $byte =~ /(Read|Write)\s+(\d+)/) {
848 $d->{diskread} = $2 if $key eq 'Read';
849 $d->{diskwrite} = $2 if $key eq 'Write';
850 }
851 }
852
853 my $pstat = &$parse_cpuacct_stat($vmid);
854
855 my $used = $pstat->{utime} + $pstat->{stime};
856
857 my $old = $last_proc_vmid_stat->{$vmid};
858 if (!$old) {
859 $last_proc_vmid_stat->{$vmid} = {
860 time => $cdtime,
861 used => $used,
862 cpu => 0,
863 };
864 next;
865 }
866
867 my $dtime = ($cdtime - $old->{time}) * $cpucount * $cpuinfo->{user_hz};
868
869 if ($dtime > 1000) {
870 my $dutime = $used - $old->{used};
871
872 $d->{cpu} = (($dutime/$dtime)* $cpucount) / $d->{cpus};
873 $last_proc_vmid_stat->{$vmid} = {
874 time => $cdtime,
875 used => $used,
876 cpu => $d->{cpu},
877 };
878 } else {
879 $d->{cpu} = $old->{cpu};
880 }
881 }
882
883 my $netdev = PVE::ProcFSTools::read_proc_net_dev();
884
885 foreach my $dev (keys %$netdev) {
886 next if $dev !~ m/^veth([1-9]\d*)i/;
887 my $vmid = $1;
888 my $d = $list->{$vmid};
889
890 next if !$d;
891
892 $d->{netout} += $netdev->{$dev}->{receive};
893 $d->{netin} += $netdev->{$dev}->{transmit};
894
895 }
896
897 return $list;
898 }
899
900 sub classify_mountpoint {
901 my ($vol) = @_;
902 if ($vol =~ m!^/!) {
903 return 'device' if $vol =~ m!^/dev/!;
904 return 'bind';
905 }
906 return 'volume';
907 }
908
909 my $parse_ct_mountpoint_full = sub {
910 my ($desc, $data, $noerr) = @_;
911
912 $data //= '';
913
914 my $res;
915 eval { $res = PVE::JSONSchema::parse_property_string($desc, $data) };
916 if ($@) {
917 return undef if $noerr;
918 die $@;
919 }
920
921 if (defined(my $size = $res->{size})) {
922 $size = PVE::JSONSchema::parse_size($size);
923 if (!defined($size)) {
924 return undef if $noerr;
925 die "invalid size: $size\n";
926 }
927 $res->{size} = $size;
928 }
929
930 $res->{type} = classify_mountpoint($res->{volume});
931
932 return $res;
933 };
934
935 sub parse_ct_rootfs {
936 my ($data, $noerr) = @_;
937
938 my $res = &$parse_ct_mountpoint_full($rootfs_desc, $data, $noerr);
939
940 $res->{mp} = '/' if defined($res);
941
942 return $res;
943 }
944
945 sub parse_ct_mountpoint {
946 my ($data, $noerr) = @_;
947
948 return &$parse_ct_mountpoint_full($mp_desc, $data, $noerr);
949 }
950
951 sub print_ct_mountpoint {
952 my ($info, $nomp) = @_;
953 my $skip = [ 'type' ];
954 push @$skip, 'mp' if $nomp;
955 return PVE::JSONSchema::print_property_string($info, $mp_desc, $skip);
956 }
957
958 sub print_lxc_network {
959 my $net = shift;
960 return PVE::JSONSchema::print_property_string($net, $netconf_desc);
961 }
962
963 sub parse_lxc_network {
964 my ($data) = @_;
965
966 my $res = {};
967
968 return $res if !$data;
969
970 $res = PVE::JSONSchema::parse_property_string($netconf_desc, $data);
971
972 $res->{type} = 'veth';
973 $res->{hwaddr} = PVE::Tools::random_ether_addr() if !$res->{hwaddr};
974
975 return $res;
976 }
977
978 sub read_cgroup_value {
979 my ($group, $vmid, $name, $full) = @_;
980
981 my $path = "/sys/fs/cgroup/$group/lxc/$vmid/$name";
982
983 return PVE::Tools::file_get_contents($path) if $full;
984
985 return PVE::Tools::file_read_firstline($path);
986 }
987
988 sub write_cgroup_value {
989 my ($group, $vmid, $name, $value) = @_;
990
991 my $path = "/sys/fs/cgroup/$group/lxc/$vmid/$name";
992 PVE::ProcFSTools::write_proc_entry($path, $value) if -e $path;
993
994 }
995
996 sub find_lxc_console_pids {
997
998 my $res = {};
999
1000 PVE::Tools::dir_glob_foreach('/proc', '\d+', sub {
1001 my ($pid) = @_;
1002
1003 my $cmdline = PVE::Tools::file_read_firstline("/proc/$pid/cmdline");
1004 return if !$cmdline;
1005
1006 my @args = split(/\0/, $cmdline);
1007
1008 # search for lxc-console -n <vmid>
1009 return if scalar(@args) != 3;
1010 return if $args[1] ne '-n';
1011 return if $args[2] !~ m/^\d+$/;
1012 return if $args[0] !~ m|^(/usr/bin/)?lxc-console$|;
1013
1014 my $vmid = $args[2];
1015
1016 push @{$res->{$vmid}}, $pid;
1017 });
1018
1019 return $res;
1020 }
1021
1022 sub find_lxc_pid {
1023 my ($vmid) = @_;
1024
1025 my $pid = undef;
1026 my $parser = sub {
1027 my $line = shift;
1028 $pid = $1 if $line =~ m/^PID:\s+(\d+)$/;
1029 };
1030 PVE::Tools::run_command(['lxc-info', '-n', $vmid, '-p'], outfunc => $parser);
1031
1032 die "unable to get PID for CT $vmid (not running?)\n" if !$pid;
1033
1034 return $pid;
1035 }
1036
1037 # Note: we cannot use Net:IP, because that only allows strict
1038 # CIDR networks
1039 sub parse_ipv4_cidr {
1040 my ($cidr, $noerr) = @_;
1041
1042 if ($cidr =~ m!^($IPV4RE)(?:/(\d+))$! && ($2 > 7) && ($2 <= 32)) {
1043 return { address => $1, netmask => $PVE::Network::ipv4_reverse_mask->[$2] };
1044 }
1045
1046 return undef if $noerr;
1047
1048 die "unable to parse ipv4 address/mask\n";
1049 }
1050
1051 sub check_lock {
1052 my ($conf) = @_;
1053
1054 die "VM is locked ($conf->{'lock'})\n" if $conf->{'lock'};
1055 }
1056
1057 sub check_protection {
1058 my ($vm_conf, $err_msg) = @_;
1059
1060 if ($vm_conf->{protection}) {
1061 die "$err_msg - protection mode enabled\n";
1062 }
1063 }
1064
1065 sub update_lxc_config {
1066 my ($storage_cfg, $vmid, $conf) = @_;
1067
1068 my $dir = "/var/lib/lxc/$vmid";
1069
1070 if ($conf->{template}) {
1071
1072 unlink "$dir/config";
1073
1074 return;
1075 }
1076
1077 my $raw = '';
1078
1079 die "missing 'arch' - internal error" if !$conf->{arch};
1080 $raw .= "lxc.arch = $conf->{arch}\n";
1081
1082 my $unprivileged = $conf->{unprivileged};
1083 my $custom_idmap = grep { $_->[0] eq 'lxc.id_map' } @{$conf->{lxc}};
1084
1085 my $ostype = $conf->{ostype} || die "missing 'ostype' - internal error";
1086 if ($ostype =~ /^(?:debian | ubuntu | centos | fedora | opensuse | archlinux | alpine | unmanaged)$/x) {
1087 my $inc ="/usr/share/lxc/config/$ostype.common.conf";
1088 $inc ="/usr/share/lxc/config/common.conf" if !-f $inc;
1089 $raw .= "lxc.include = $inc\n";
1090 if ($unprivileged || $custom_idmap) {
1091 $inc = "/usr/share/lxc/config/$ostype.userns.conf";
1092 $inc = "/usr/share/lxc/config/userns.conf" if !-f $inc;
1093 $raw .= "lxc.include = $inc\n"
1094 }
1095 } else {
1096 die "implement me (ostype $ostype)";
1097 }
1098
1099 # WARNING: DO NOT REMOVE this without making sure that loop device nodes
1100 # cannot be exposed to the container with r/w access (cgroup perms).
1101 # When this is enabled mounts will still remain in the monitor's namespace
1102 # after the container unmounted them and thus will not detach from their
1103 # files while the container is running!
1104 $raw .= "lxc.monitor.unshare = 1\n";
1105
1106 # Should we read them from /etc/subuid?
1107 if ($unprivileged && !$custom_idmap) {
1108 $raw .= "lxc.id_map = u 0 100000 65536\n";
1109 $raw .= "lxc.id_map = g 0 100000 65536\n";
1110 }
1111
1112 if (!has_dev_console($conf)) {
1113 $raw .= "lxc.console = none\n";
1114 $raw .= "lxc.cgroup.devices.deny = c 5:1 rwm\n";
1115 }
1116
1117 my $ttycount = get_tty_count($conf);
1118 $raw .= "lxc.tty = $ttycount\n";
1119
1120 # some init scripts expect a linux terminal (turnkey).
1121 $raw .= "lxc.environment = TERM=linux\n";
1122
1123 my $utsname = $conf->{hostname} || "CT$vmid";
1124 $raw .= "lxc.utsname = $utsname\n";
1125
1126 my $memory = $conf->{memory} || 512;
1127 my $swap = $conf->{swap} // 0;
1128
1129 my $lxcmem = int($memory*1024*1024);
1130 $raw .= "lxc.cgroup.memory.limit_in_bytes = $lxcmem\n";
1131
1132 my $lxcswap = int(($memory + $swap)*1024*1024);
1133 $raw .= "lxc.cgroup.memory.memsw.limit_in_bytes = $lxcswap\n";
1134
1135 if (my $cpulimit = $conf->{cpulimit}) {
1136 $raw .= "lxc.cgroup.cpu.cfs_period_us = 100000\n";
1137 my $value = int(100000*$cpulimit);
1138 $raw .= "lxc.cgroup.cpu.cfs_quota_us = $value\n";
1139 }
1140
1141 my $shares = $conf->{cpuunits} || 1024;
1142 $raw .= "lxc.cgroup.cpu.shares = $shares\n";
1143
1144 my $mountpoint = parse_ct_rootfs($conf->{rootfs});
1145
1146 $raw .= "lxc.rootfs = $dir/rootfs\n";
1147
1148 my $netcount = 0;
1149 foreach my $k (keys %$conf) {
1150 next if $k !~ m/^net(\d+)$/;
1151 my $ind = $1;
1152 my $d = parse_lxc_network($conf->{$k});
1153 $netcount++;
1154 $raw .= "lxc.network.type = veth\n";
1155 $raw .= "lxc.network.veth.pair = veth${vmid}i${ind}\n";
1156 $raw .= "lxc.network.hwaddr = $d->{hwaddr}\n" if defined($d->{hwaddr});
1157 $raw .= "lxc.network.name = $d->{name}\n" if defined($d->{name});
1158 $raw .= "lxc.network.mtu = $d->{mtu}\n" if defined($d->{mtu});
1159 }
1160
1161 if (my $lxcconf = $conf->{lxc}) {
1162 foreach my $entry (@$lxcconf) {
1163 my ($k, $v) = @$entry;
1164 $netcount++ if $k eq 'lxc.network.type';
1165 $raw .= "$k = $v\n";
1166 }
1167 }
1168
1169 $raw .= "lxc.network.type = empty\n" if !$netcount;
1170
1171 File::Path::mkpath("$dir/rootfs");
1172
1173 PVE::Tools::file_set_contents("$dir/config", $raw);
1174 }
1175
1176 # verify and cleanup nameserver list (replace \0 with ' ')
1177 sub verify_nameserver_list {
1178 my ($nameserver_list) = @_;
1179
1180 my @list = ();
1181 foreach my $server (PVE::Tools::split_list($nameserver_list)) {
1182 PVE::JSONSchema::pve_verify_ip($server);
1183 push @list, $server;
1184 }
1185
1186 return join(' ', @list);
1187 }
1188
1189 sub verify_searchdomain_list {
1190 my ($searchdomain_list) = @_;
1191
1192 my @list = ();
1193 foreach my $server (PVE::Tools::split_list($searchdomain_list)) {
1194 # todo: should we add checks for valid dns domains?
1195 push @list, $server;
1196 }
1197
1198 return join(' ', @list);
1199 }
1200
1201 sub is_volume_in_use {
1202 my ($config, $volid) = @_;
1203 my $used = 0;
1204
1205 foreach_mountpoint($config, sub {
1206 my ($ms, $mountpoint) = @_;
1207 return if $used;
1208 if ($mountpoint->{type} eq 'volume' && $mountpoint->{volume} eq $volid) {
1209 $used = 1;
1210 }
1211 });
1212
1213 return $used;
1214 }
1215
1216 sub add_unused_volume {
1217 my ($config, $volid) = @_;
1218
1219 my $key;
1220 for (my $ind = $MAX_UNUSED_DISKS - 1; $ind >= 0; $ind--) {
1221 my $test = "unused$ind";
1222 if (my $vid = $config->{$test}) {
1223 return if $vid eq $volid; # do not add duplicates
1224 } else {
1225 $key = $test;
1226 }
1227 }
1228
1229 die "Too many unused volumes - please delete them first.\n" if !$key;
1230
1231 $config->{$key} = $volid;
1232
1233 return $key;
1234 }
1235
1236 sub update_pct_config {
1237 my ($vmid, $conf, $running, $param, $delete) = @_;
1238
1239 my @nohotplug;
1240
1241 my $new_disks = 0;
1242 my @deleted_volumes;
1243
1244 my $rootdir;
1245 if ($running) {
1246 my $pid = find_lxc_pid($vmid);
1247 $rootdir = "/proc/$pid/root";
1248 }
1249
1250 my $hotplug_error = sub {
1251 if ($running) {
1252 push @nohotplug, @_;
1253 return 1;
1254 } else {
1255 return 0;
1256 }
1257 };
1258
1259 if (defined($delete)) {
1260 foreach my $opt (@$delete) {
1261 if (!exists($conf->{$opt})) {
1262 warn "no such option: $opt\n";
1263 next;
1264 }
1265
1266 if ($opt eq 'hostname' || $opt eq 'memory' || $opt eq 'rootfs') {
1267 die "unable to delete required option '$opt'\n";
1268 } elsif ($opt eq 'swap') {
1269 delete $conf->{$opt};
1270 write_cgroup_value("memory", $vmid, "memory.memsw.limit_in_bytes", -1);
1271 } elsif ($opt eq 'description' || $opt eq 'onboot' || $opt eq 'startup') {
1272 delete $conf->{$opt};
1273 } elsif ($opt eq 'nameserver' || $opt eq 'searchdomain' ||
1274 $opt eq 'tty' || $opt eq 'console' || $opt eq 'cmode') {
1275 next if $hotplug_error->($opt);
1276 delete $conf->{$opt};
1277 } elsif ($opt =~ m/^net(\d)$/) {
1278 delete $conf->{$opt};
1279 next if !$running;
1280 my $netid = $1;
1281 PVE::Network::veth_delete("veth${vmid}i$netid");
1282 } elsif ($opt eq 'protection') {
1283 delete $conf->{$opt};
1284 } elsif ($opt =~ m/^unused(\d+)$/) {
1285 next if $hotplug_error->($opt);
1286 check_protection($conf, "can't remove CT $vmid drive '$opt'");
1287 push @deleted_volumes, $conf->{$opt};
1288 delete $conf->{$opt};
1289 } elsif ($opt =~ m/^mp(\d+)$/) {
1290 next if $hotplug_error->($opt);
1291 check_protection($conf, "can't remove CT $vmid drive '$opt'");
1292 my $mp = parse_ct_mountpoint($conf->{$opt});
1293 delete $conf->{$opt};
1294 if ($mp->{type} eq 'volume' && !is_volume_in_use($conf, $mp->{volume})) {
1295 add_unused_volume($conf, $mp->{volume});
1296 }
1297 } elsif ($opt eq 'unprivileged') {
1298 die "unable to delete read-only option: '$opt'\n";
1299 } else {
1300 die "implement me (delete: $opt)"
1301 }
1302 write_config($vmid, $conf) if $running;
1303 }
1304 }
1305
1306 # There's no separate swap size to configure, there's memory and "total"
1307 # memory (iow. memory+swap). This means we have to change them together.
1308 my $wanted_memory = PVE::Tools::extract_param($param, 'memory');
1309 my $wanted_swap = PVE::Tools::extract_param($param, 'swap');
1310 if (defined($wanted_memory) || defined($wanted_swap)) {
1311
1312 my $old_memory = ($conf->{memory} || 512);
1313 my $old_swap = ($conf->{swap} || 0);
1314
1315 $wanted_memory //= $old_memory;
1316 $wanted_swap //= $old_swap;
1317
1318 my $total = $wanted_memory + $wanted_swap;
1319 if ($running) {
1320 my $old_total = $old_memory + $old_swap;
1321 if ($total > $old_total) {
1322 write_cgroup_value("memory", $vmid, "memory.memsw.limit_in_bytes", int($total*1024*1024));
1323 write_cgroup_value("memory", $vmid, "memory.limit_in_bytes", int($wanted_memory*1024*1024));
1324 } else {
1325 write_cgroup_value("memory", $vmid, "memory.limit_in_bytes", int($wanted_memory*1024*1024));
1326 write_cgroup_value("memory", $vmid, "memory.memsw.limit_in_bytes", int($total*1024*1024));
1327 }
1328 }
1329 $conf->{memory} = $wanted_memory;
1330 $conf->{swap} = $wanted_swap;
1331
1332 write_config($vmid, $conf) if $running;
1333 }
1334
1335 my $used_volids = {};
1336
1337 foreach my $opt (keys %$param) {
1338 my $value = $param->{$opt};
1339 if ($opt eq 'hostname') {
1340 $conf->{$opt} = $value;
1341 } elsif ($opt eq 'onboot') {
1342 $conf->{$opt} = $value ? 1 : 0;
1343 } elsif ($opt eq 'startup') {
1344 $conf->{$opt} = $value;
1345 } elsif ($opt eq 'tty' || $opt eq 'console' || $opt eq 'cmode') {
1346 next if $hotplug_error->($opt);
1347 $conf->{$opt} = $value;
1348 } elsif ($opt eq 'nameserver') {
1349 next if $hotplug_error->($opt);
1350 my $list = verify_nameserver_list($value);
1351 $conf->{$opt} = $list;
1352 } elsif ($opt eq 'searchdomain') {
1353 next if $hotplug_error->($opt);
1354 my $list = verify_searchdomain_list($value);
1355 $conf->{$opt} = $list;
1356 } elsif ($opt eq 'cpulimit') {
1357 next if $hotplug_error->($opt); # FIXME: hotplug
1358 $conf->{$opt} = $value;
1359 } elsif ($opt eq 'cpuunits') {
1360 $conf->{$opt} = $value;
1361 write_cgroup_value("cpu", $vmid, "cpu.shares", $value);
1362 } elsif ($opt eq 'description') {
1363 $conf->{$opt} = PVE::Tools::encode_text($value);
1364 } elsif ($opt =~ m/^net(\d+)$/) {
1365 my $netid = $1;
1366 my $net = parse_lxc_network($value);
1367 if (!$running) {
1368 $conf->{$opt} = print_lxc_network($net);
1369 } else {
1370 update_net($vmid, $conf, $opt, $net, $netid, $rootdir);
1371 }
1372 } elsif ($opt eq 'protection') {
1373 $conf->{$opt} = $value ? 1 : 0;
1374 } elsif ($opt =~ m/^mp(\d+)$/) {
1375 next if $hotplug_error->($opt);
1376 check_protection($conf, "can't update CT $vmid drive '$opt'");
1377 my $old = $conf->{$opt};
1378 $conf->{$opt} = $value;
1379 if (defined($old)) {
1380 my $mp = parse_ct_mountpoint($old);
1381 if ($mp->{type} eq 'volume' && !is_volume_in_use($conf, $mp->{volume})) {
1382 add_unused_volume($conf, $mp->{volume});
1383 }
1384 }
1385 $new_disks = 1;
1386 my $mp = parse_ct_mountpoint($value);
1387 $used_volids->{$mp->{volume}} = 1;
1388 } elsif ($opt eq 'rootfs') {
1389 next if $hotplug_error->($opt);
1390 check_protection($conf, "can't update CT $vmid drive '$opt'");
1391 my $old = $conf->{$opt};
1392 $conf->{$opt} = $value;
1393 if (defined($old)) {
1394 my $mp = parse_ct_rootfs($old);
1395 if ($mp->{type} eq 'volume' && !is_volume_in_use($conf, $mp->{volume})) {
1396 add_unused_volume($conf, $mp->{volume});
1397 }
1398 }
1399 my $mp = parse_ct_rootfs($value);
1400 $used_volids->{$mp->{volume}} = 1;
1401 } elsif ($opt eq 'unprivileged') {
1402 die "unable to modify read-only option: '$opt'\n";
1403 } elsif ($opt eq 'ostype') {
1404 next if $hotplug_error->($opt);
1405 $conf->{$opt} = $value;
1406 } else {
1407 die "implement me: $opt";
1408 }
1409 write_config($vmid, $conf) if $running;
1410 }
1411
1412 # Cleanup config:
1413
1414 # Remove unused disks after re-adding
1415 foreach my $key (keys %$conf) {
1416 next if $key !~ /^unused\d+/;
1417 my $volid = $conf->{$key};
1418 if ($used_volids->{$volid}) {
1419 delete $conf->{$key};
1420 }
1421 }
1422
1423 # Apply deletions and creations of new volumes
1424 if (@deleted_volumes) {
1425 my $storage_cfg = PVE::Storage::config();
1426 foreach my $volume (@deleted_volumes) {
1427 next if $used_volids->{$volume}; # could have been re-added, too
1428 delete_mountpoint_volume($storage_cfg, $vmid, $volume);
1429 }
1430 }
1431
1432 if ($new_disks) {
1433 my $storage_cfg = PVE::Storage::config();
1434 create_disks($storage_cfg, $vmid, $conf, $conf);
1435 }
1436
1437 # This should be the last thing we do here
1438 if ($running && scalar(@nohotplug)) {
1439 die "unable to modify " . join(',', @nohotplug) . " while container is running\n";
1440 }
1441 }
1442
1443 sub has_dev_console {
1444 my ($conf) = @_;
1445
1446 return !(defined($conf->{console}) && !$conf->{console});
1447 }
1448
1449 sub get_tty_count {
1450 my ($conf) = @_;
1451
1452 return $conf->{tty} // $confdesc->{tty}->{default};
1453 }
1454
1455 sub get_cmode {
1456 my ($conf) = @_;
1457
1458 return $conf->{cmode} // $confdesc->{cmode}->{default};
1459 }
1460
1461 sub get_console_command {
1462 my ($vmid, $conf) = @_;
1463
1464 my $cmode = get_cmode($conf);
1465
1466 if ($cmode eq 'console') {
1467 return ['lxc-console', '-n', $vmid, '-t', 0];
1468 } elsif ($cmode eq 'tty') {
1469 return ['lxc-console', '-n', $vmid];
1470 } elsif ($cmode eq 'shell') {
1471 return ['lxc-attach', '--clear-env', '-n', $vmid];
1472 } else {
1473 die "internal error";
1474 }
1475 }
1476
1477 sub get_primary_ips {
1478 my ($conf) = @_;
1479
1480 # return data from net0
1481
1482 return undef if !defined($conf->{net0});
1483 my $net = parse_lxc_network($conf->{net0});
1484
1485 my $ipv4 = $net->{ip};
1486 if ($ipv4) {
1487 if ($ipv4 =~ /^(dhcp|manual)$/) {
1488 $ipv4 = undef
1489 } else {
1490 $ipv4 =~ s!/\d+$!!;
1491 }
1492 }
1493 my $ipv6 = $net->{ip6};
1494 if ($ipv6) {
1495 if ($ipv6 =~ /^(auto|dhcp|manual)$/) {
1496 $ipv6 = undef;
1497 } else {
1498 $ipv6 =~ s!/\d+$!!;
1499 }
1500 }
1501
1502 return ($ipv4, $ipv6);
1503 }
1504
1505 sub delete_mountpoint_volume {
1506 my ($storage_cfg, $vmid, $volume) = @_;
1507
1508 return if classify_mountpoint($volume) ne 'volume';
1509
1510 my ($vtype, $name, $owner) = PVE::Storage::parse_volname($storage_cfg, $volume);
1511 PVE::Storage::vdisk_free($storage_cfg, $volume) if $vmid == $owner;
1512 }
1513
1514 sub destroy_lxc_container {
1515 my ($storage_cfg, $vmid, $conf) = @_;
1516
1517 foreach_mountpoint($conf, sub {
1518 my ($ms, $mountpoint) = @_;
1519 delete_mountpoint_volume($storage_cfg, $vmid, $mountpoint->{volume});
1520 });
1521
1522 rmdir "/var/lib/lxc/$vmid/rootfs";
1523 unlink "/var/lib/lxc/$vmid/config";
1524 rmdir "/var/lib/lxc/$vmid";
1525 destroy_config($vmid);
1526
1527 #my $cmd = ['lxc-destroy', '-n', $vmid ];
1528 #PVE::Tools::run_command($cmd);
1529 }
1530
1531 sub vm_stop_cleanup {
1532 my ($storage_cfg, $vmid, $conf, $keepActive) = @_;
1533
1534 eval {
1535 if (!$keepActive) {
1536
1537 my $vollist = get_vm_volumes($conf);
1538 PVE::Storage::deactivate_volumes($storage_cfg, $vollist);
1539 }
1540 };
1541 warn $@ if $@; # avoid errors - just warn
1542 }
1543
1544 my $safe_num_ne = sub {
1545 my ($a, $b) = @_;
1546
1547 return 0 if !defined($a) && !defined($b);
1548 return 1 if !defined($a);
1549 return 1 if !defined($b);
1550
1551 return $a != $b;
1552 };
1553
1554 my $safe_string_ne = sub {
1555 my ($a, $b) = @_;
1556
1557 return 0 if !defined($a) && !defined($b);
1558 return 1 if !defined($a);
1559 return 1 if !defined($b);
1560
1561 return $a ne $b;
1562 };
1563
1564 sub update_net {
1565 my ($vmid, $conf, $opt, $newnet, $netid, $rootdir) = @_;
1566
1567 if ($newnet->{type} ne 'veth') {
1568 # for when there are physical interfaces
1569 die "cannot update interface of type $newnet->{type}";
1570 }
1571
1572 my $veth = "veth${vmid}i${netid}";
1573 my $eth = $newnet->{name};
1574
1575 if (my $oldnetcfg = $conf->{$opt}) {
1576 my $oldnet = parse_lxc_network($oldnetcfg);
1577
1578 if (&$safe_string_ne($oldnet->{hwaddr}, $newnet->{hwaddr}) ||
1579 &$safe_string_ne($oldnet->{name}, $newnet->{name})) {
1580
1581 PVE::Network::veth_delete($veth);
1582 delete $conf->{$opt};
1583 write_config($vmid, $conf);
1584
1585 hotplug_net($vmid, $conf, $opt, $newnet, $netid);
1586
1587 } elsif (&$safe_string_ne($oldnet->{bridge}, $newnet->{bridge}) ||
1588 &$safe_num_ne($oldnet->{tag}, $newnet->{tag}) ||
1589 &$safe_num_ne($oldnet->{firewall}, $newnet->{firewall})) {
1590
1591 if ($oldnet->{bridge}) {
1592 PVE::Network::tap_unplug($veth);
1593 foreach (qw(bridge tag firewall)) {
1594 delete $oldnet->{$_};
1595 }
1596 $conf->{$opt} = print_lxc_network($oldnet);
1597 write_config($vmid, $conf);
1598 }
1599
1600 PVE::Network::tap_plug($veth, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks});
1601 foreach (qw(bridge tag firewall)) {
1602 $oldnet->{$_} = $newnet->{$_} if $newnet->{$_};
1603 }
1604 $conf->{$opt} = print_lxc_network($oldnet);
1605 write_config($vmid, $conf);
1606 }
1607 } else {
1608 hotplug_net($vmid, $conf, $opt, $newnet, $netid);
1609 }
1610
1611 update_ipconfig($vmid, $conf, $opt, $eth, $newnet, $rootdir);
1612 }
1613
1614 sub hotplug_net {
1615 my ($vmid, $conf, $opt, $newnet, $netid) = @_;
1616
1617 my $veth = "veth${vmid}i${netid}";
1618 my $vethpeer = $veth . "p";
1619 my $eth = $newnet->{name};
1620
1621 PVE::Network::veth_create($veth, $vethpeer, $newnet->{bridge}, $newnet->{hwaddr});
1622 PVE::Network::tap_plug($veth, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks});
1623
1624 # attach peer in container
1625 my $cmd = ['lxc-device', '-n', $vmid, 'add', $vethpeer, "$eth" ];
1626 PVE::Tools::run_command($cmd);
1627
1628 # link up peer in container
1629 $cmd = ['lxc-attach', '-n', $vmid, '-s', 'NETWORK', '--', '/sbin/ip', 'link', 'set', $eth ,'up' ];
1630 PVE::Tools::run_command($cmd);
1631
1632 my $done = { type => 'veth' };
1633 foreach (qw(bridge tag firewall hwaddr name)) {
1634 $done->{$_} = $newnet->{$_} if $newnet->{$_};
1635 }
1636 $conf->{$opt} = print_lxc_network($done);
1637
1638 write_config($vmid, $conf);
1639 }
1640
1641 sub update_ipconfig {
1642 my ($vmid, $conf, $opt, $eth, $newnet, $rootdir) = @_;
1643
1644 my $lxc_setup = PVE::LXC::Setup->new($conf, $rootdir);
1645
1646 my $optdata = parse_lxc_network($conf->{$opt});
1647 my $deleted = [];
1648 my $added = [];
1649 my $nscmd = sub {
1650 my $cmdargs = shift;
1651 PVE::Tools::run_command(['lxc-attach', '-n', $vmid, '-s', 'NETWORK', '--', @_], %$cmdargs);
1652 };
1653 my $ipcmd = sub { &$nscmd({}, '/sbin/ip', @_) };
1654
1655 my $change_ip_config = sub {
1656 my ($ipversion) = @_;
1657
1658 my $family_opt = "-$ipversion";
1659 my $suffix = $ipversion == 4 ? '' : $ipversion;
1660 my $gw= "gw$suffix";
1661 my $ip= "ip$suffix";
1662
1663 my $newip = $newnet->{$ip};
1664 my $newgw = $newnet->{$gw};
1665 my $oldip = $optdata->{$ip};
1666
1667 my $change_ip = &$safe_string_ne($oldip, $newip);
1668 my $change_gw = &$safe_string_ne($optdata->{$gw}, $newgw);
1669
1670 return if !$change_ip && !$change_gw;
1671
1672 # step 1: add new IP, if this fails we cancel
1673 my $is_real_ip = ($newip && $newip !~ /^(?:auto|dhcp|manual)$/);
1674 if ($change_ip && $is_real_ip) {
1675 eval { &$ipcmd($family_opt, 'addr', 'add', $newip, 'dev', $eth); };
1676 if (my $err = $@) {
1677 warn $err;
1678 return;
1679 }
1680 }
1681
1682 # step 2: replace gateway
1683 # If this fails we delete the added IP and cancel.
1684 # If it succeeds we save the config and delete the old IP, ignoring
1685 # errors. The config is then saved.
1686 # Note: 'ip route replace' can add
1687 if ($change_gw) {
1688 if ($newgw) {
1689 eval {
1690 if ($is_real_ip && !PVE::Network::is_ip_in_cidr($newgw, $newip, $ipversion)) {
1691 &$ipcmd($family_opt, 'route', 'add', $newgw, 'dev', $eth);
1692 }
1693 &$ipcmd($family_opt, 'route', 'replace', 'default', 'via', $newgw);
1694 };
1695 if (my $err = $@) {
1696 warn $err;
1697 # the route was not replaced, the old IP is still available
1698 # rollback (delete new IP) and cancel
1699 if ($change_ip) {
1700 eval { &$ipcmd($family_opt, 'addr', 'del', $newip, 'dev', $eth); };
1701 warn $@ if $@; # no need to die here
1702 }
1703 return;
1704 }
1705 } else {
1706 eval { &$ipcmd($family_opt, 'route', 'del', 'default'); };
1707 # if the route was not deleted, the guest might have deleted it manually
1708 # warn and continue
1709 warn $@ if $@;
1710 }
1711 }
1712
1713 # from this point on we save the configuration
1714 # step 3: delete old IP ignoring errors
1715 if ($change_ip && $oldip && $oldip !~ /^(?:auto|dhcp)$/) {
1716 # We need to enable promote_secondaries, otherwise our newly added
1717 # address will be removed along with the old one.
1718 my $promote = 0;
1719 eval {
1720 if ($ipversion == 4) {
1721 &$nscmd({ outfunc => sub { $promote = int(shift) } },
1722 'cat', "/proc/sys/net/ipv4/conf/$eth/promote_secondaries");
1723 &$nscmd({}, 'sysctl', "net.ipv4.conf.$eth.promote_secondaries=1");
1724 }
1725 &$ipcmd($family_opt, 'addr', 'del', $oldip, 'dev', $eth);
1726 };
1727 warn $@ if $@; # no need to die here
1728
1729 if ($ipversion == 4) {
1730 &$nscmd({}, 'sysctl', "net.ipv4.conf.$eth.promote_secondaries=$promote");
1731 }
1732 }
1733
1734 foreach my $property ($ip, $gw) {
1735 if ($newnet->{$property}) {
1736 $optdata->{$property} = $newnet->{$property};
1737 } else {
1738 delete $optdata->{$property};
1739 }
1740 }
1741 $conf->{$opt} = print_lxc_network($optdata);
1742 write_config($vmid, $conf);
1743 $lxc_setup->setup_network($conf);
1744 };
1745
1746 &$change_ip_config(4);
1747 &$change_ip_config(6);
1748
1749 }
1750
1751 # Internal snapshots
1752
1753 # NOTE: Snapshot create/delete involves several non-atomic
1754 # actions, and can take a long time.
1755 # So we try to avoid locking the file and use the 'lock' variable
1756 # inside the config file instead.
1757
1758 my $snapshot_copy_config = sub {
1759 my ($source, $dest) = @_;
1760
1761 foreach my $k (keys %$source) {
1762 next if $k eq 'snapshots';
1763 next if $k eq 'snapstate';
1764 next if $k eq 'snaptime';
1765 next if $k eq 'vmstate';
1766 next if $k eq 'lock';
1767 next if $k eq 'digest';
1768 next if $k eq 'description';
1769 next if $k =~ m/^unused\d+$/;
1770
1771 $dest->{$k} = $source->{$k};
1772 }
1773 };
1774
1775 my $snapshot_apply_config = sub {
1776 my ($conf, $snap) = @_;
1777
1778 # copy snapshot list
1779 my $newconf = {
1780 snapshots => $conf->{snapshots},
1781 };
1782
1783 # keep description and list of unused disks
1784 foreach my $k (keys %$conf) {
1785 next if !($k =~ m/^unused\d+$/ || $k eq 'description');
1786 $newconf->{$k} = $conf->{$k};
1787 }
1788
1789 &$snapshot_copy_config($snap, $newconf);
1790
1791 return $newconf;
1792 };
1793
1794 my $snapshot_save_vmstate = sub {
1795 die "implement me - snapshot_save_vmstate\n";
1796 };
1797
1798 sub snapshot_prepare {
1799 my ($vmid, $snapname, $save_vmstate, $comment) = @_;
1800
1801 my $snap;
1802
1803 my $updatefn = sub {
1804
1805 my $conf = load_config($vmid);
1806
1807 die "you can't take a snapshot if it's a template\n"
1808 if is_template($conf);
1809
1810 check_lock($conf);
1811
1812 $conf->{lock} = 'snapshot';
1813
1814 die "snapshot name '$snapname' already used\n"
1815 if defined($conf->{snapshots}->{$snapname});
1816
1817 my $storecfg = PVE::Storage::config();
1818
1819 # workaround until mp snapshots are implemented
1820 my $feature = $snapname eq 'vzdump' ? 'vzdump' : 'snapshot';
1821 die "snapshot feature is not available\n" if !has_feature($feature, $conf, $storecfg);
1822
1823 $snap = $conf->{snapshots}->{$snapname} = {};
1824
1825 if ($save_vmstate && check_running($vmid)) {
1826 &$snapshot_save_vmstate($vmid, $conf, $snapname, $storecfg);
1827 }
1828
1829 &$snapshot_copy_config($conf, $snap);
1830
1831 $snap->{snapstate} = "prepare";
1832 $snap->{snaptime} = time();
1833 $snap->{description} = $comment if $comment;
1834
1835 write_config($vmid, $conf);
1836 };
1837
1838 lock_config($vmid, $updatefn);
1839
1840 return $snap;
1841 }
1842
1843 sub snapshot_commit {
1844 my ($vmid, $snapname) = @_;
1845
1846 my $updatefn = sub {
1847
1848 my $conf = load_config($vmid);
1849
1850 die "missing snapshot lock\n"
1851 if !($conf->{lock} && $conf->{lock} eq 'snapshot');
1852
1853 my $snap = $conf->{snapshots}->{$snapname};
1854 die "snapshot '$snapname' does not exist\n" if !defined($snap);
1855
1856 die "wrong snapshot state\n"
1857 if !($snap->{snapstate} && $snap->{snapstate} eq "prepare");
1858
1859 delete $snap->{snapstate};
1860 delete $conf->{lock};
1861
1862 my $newconf = &$snapshot_apply_config($conf, $snap);
1863
1864 $newconf->{parent} = $snapname;
1865
1866 write_config($vmid, $newconf);
1867 };
1868
1869 lock_config($vmid, $updatefn);
1870 }
1871
1872 sub has_feature {
1873 my ($feature, $conf, $storecfg, $snapname) = @_;
1874
1875 my $err;
1876 my $vzdump = $feature eq 'vzdump';
1877 $feature = 'snapshot' if $vzdump;
1878
1879 foreach_mountpoint($conf, sub {
1880 my ($ms, $mountpoint) = @_;
1881
1882 return if $err; # skip further test
1883 return if $vzdump && $ms ne 'rootfs' && !$mountpoint->{backup};
1884
1885 $err = 1 if !PVE::Storage::volume_has_feature($storecfg, $feature, $mountpoint->{volume}, $snapname);
1886
1887 # TODO: implement support for mountpoints
1888 die "unable to handle mountpoint '$ms' - feature not implemented\n"
1889 if $ms ne 'rootfs';
1890 });
1891
1892 return $err ? 0 : 1;
1893 }
1894
1895 my $enter_namespace = sub {
1896 my ($vmid, $pid, $which, $type) = @_;
1897 sysopen my $fd, "/proc/$pid/ns/$which", O_RDONLY
1898 or die "failed to open $which namespace of container $vmid: $!\n";
1899 PVE::Tools::setns(fileno($fd), $type)
1900 or die "failed to enter $which namespace of container $vmid: $!\n";
1901 close $fd;
1902 };
1903
1904 my $do_syncfs = sub {
1905 my ($vmid, $pid, $socket) = @_;
1906
1907 &$enter_namespace($vmid, $pid, 'mnt', PVE::Tools::CLONE_NEWNS);
1908
1909 # Tell the parent process to start reading our /proc/mounts
1910 print {$socket} "go\n";
1911 $socket->flush();
1912
1913 # Receive /proc/self/mounts
1914 my $mountdata = do { local $/ = undef; <$socket> };
1915 close $socket;
1916
1917 # Now sync all mountpoints...
1918 my $mounts = PVE::ProcFSTools::parse_mounts($mountdata);
1919 foreach my $mp (@$mounts) {
1920 my ($what, $dir, $fs) = @$mp;
1921 next if $fs eq 'fuse.lxcfs';
1922 eval { PVE::Tools::sync_mountpoint($dir); };
1923 warn $@ if $@;
1924 }
1925 };
1926
1927 sub sync_container_namespace {
1928 my ($vmid) = @_;
1929 my $pid = find_lxc_pid($vmid);
1930
1931 # SOCK_DGRAM is nicer for barriers but cannot be slurped
1932 socketpair my $pfd, my $cfd, AF_UNIX, SOCK_STREAM, PF_UNSPEC
1933 or die "failed to create socketpair: $!\n";
1934
1935 my $child = fork();
1936 die "fork failed: $!\n" if !defined($child);
1937
1938 if (!$child) {
1939 eval {
1940 close $pfd;
1941 &$do_syncfs($vmid, $pid, $cfd);
1942 };
1943 if (my $err = $@) {
1944 warn $err;
1945 POSIX::_exit(1);
1946 }
1947 POSIX::_exit(0);
1948 }
1949 close $cfd;
1950 my $go = <$pfd>;
1951 die "failed to enter container namespace\n" if $go ne "go\n";
1952
1953 open my $mounts, '<', "/proc/$child/mounts"
1954 or die "failed to open container's /proc/mounts: $!\n";
1955 my $mountdata = do { local $/ = undef; <$mounts> };
1956 close $mounts;
1957 print {$pfd} $mountdata;
1958 close $pfd;
1959
1960 while (waitpid($child, 0) != $child) {}
1961 die "failed to sync container namespace\n" if $? != 0;
1962 }
1963
1964 sub snapshot_create {
1965 my ($vmid, $snapname, $save_vmstate, $comment) = @_;
1966
1967 my $snap = snapshot_prepare($vmid, $snapname, $save_vmstate, $comment);
1968
1969 my $conf = load_config($vmid);
1970
1971 my $running = check_running($vmid);
1972
1973 my $unfreeze = 0;
1974
1975 my $drivehash = {};
1976
1977 eval {
1978 if ($running) {
1979 $unfreeze = 1;
1980 PVE::Tools::run_command(['/usr/bin/lxc-freeze', '-n', $vmid]);
1981 sync_container_namespace($vmid);
1982 };
1983
1984 my $storecfg = PVE::Storage::config();
1985 my $rootinfo = parse_ct_rootfs($conf->{rootfs});
1986 my $volid = $rootinfo->{volume};
1987
1988 PVE::Storage::volume_snapshot($storecfg, $volid, $snapname);
1989 $drivehash->{rootfs} = 1;
1990 };
1991 my $err = $@;
1992
1993 if ($unfreeze) {
1994 eval { PVE::Tools::run_command(['/usr/bin/lxc-unfreeze', '-n', $vmid]); };
1995 warn $@ if $@;
1996 }
1997
1998 if ($err) {
1999 eval { snapshot_delete($vmid, $snapname, 1, $drivehash); };
2000 warn "$@\n" if $@;
2001 die "$err\n";
2002 }
2003
2004 snapshot_commit($vmid, $snapname);
2005 }
2006
2007 # Note: $drivehash is only set when called from snapshot_create.
2008 sub snapshot_delete {
2009 my ($vmid, $snapname, $force, $drivehash) = @_;
2010
2011 my $prepare = 1;
2012
2013 my $snap;
2014
2015 my $unlink_parent = sub {
2016 my ($confref, $new_parent) = @_;
2017
2018 if ($confref->{parent} && $confref->{parent} eq $snapname) {
2019 if ($new_parent) {
2020 $confref->{parent} = $new_parent;
2021 } else {
2022 delete $confref->{parent};
2023 }
2024 }
2025 };
2026
2027 my $updatefn = sub {
2028 my ($remove_drive) = @_;
2029
2030 my $conf = load_config($vmid);
2031
2032 if (!$drivehash) {
2033 check_lock($conf);
2034 die "you can't delete a snapshot if vm is a template\n"
2035 if is_template($conf);
2036 }
2037
2038 $snap = $conf->{snapshots}->{$snapname};
2039
2040 die "snapshot '$snapname' does not exist\n" if !defined($snap);
2041
2042 # remove parent refs
2043 if (!$prepare) {
2044 &$unlink_parent($conf, $snap->{parent});
2045 foreach my $sn (keys %{$conf->{snapshots}}) {
2046 next if $sn eq $snapname;
2047 &$unlink_parent($conf->{snapshots}->{$sn}, $snap->{parent});
2048 }
2049 }
2050
2051 if ($remove_drive) {
2052 if ($remove_drive eq 'vmstate') {
2053 die "implement me - saving vmstate\n";
2054 } else {
2055 die "implement me - remove drive\n";
2056 }
2057 }
2058
2059 if ($prepare) {
2060 $snap->{snapstate} = 'delete';
2061 } else {
2062 delete $conf->{snapshots}->{$snapname};
2063 delete $conf->{lock} if $drivehash;
2064 }
2065
2066 write_config($vmid, $conf);
2067 };
2068
2069 lock_config($vmid, $updatefn);
2070
2071 # now remove vmstate file
2072 # never set for LXC!
2073 my $storecfg = PVE::Storage::config();
2074
2075 if ($snap->{vmstate}) {
2076 die "implement me - saving vmstate\n";
2077 };
2078
2079 # now remove all volume snapshots
2080 # only rootfs for now!
2081 eval {
2082 my $rootfs = $snap->{rootfs};
2083 my $rootinfo = parse_ct_rootfs($rootfs);
2084 my $volid = $rootinfo->{volume};
2085 PVE::Storage::volume_snapshot_delete($storecfg, $volid, $snapname);
2086 };
2087 if (my $err = $@) {
2088 die $err if !$force;
2089 warn $err;
2090 }
2091
2092 # now cleanup config
2093 $prepare = 0;
2094 lock_config($vmid, $updatefn);
2095 }
2096
2097 sub snapshot_rollback {
2098 my ($vmid, $snapname) = @_;
2099
2100 my $prepare = 1;
2101
2102 my $storecfg = PVE::Storage::config();
2103
2104 my $conf = load_config($vmid);
2105
2106 my $get_snapshot_config = sub {
2107
2108 die "you can't rollback if vm is a template\n" if is_template($conf);
2109
2110 my $res = $conf->{snapshots}->{$snapname};
2111
2112 die "snapshot '$snapname' does not exist\n" if !defined($res);
2113
2114 return $res;
2115 };
2116
2117 my $snap = &$get_snapshot_config();
2118
2119 # only for rootfs for now!
2120 my $rootfs = $snap->{rootfs};
2121 my $rootinfo = parse_ct_rootfs($rootfs);
2122 my $volid = $rootinfo->{volume};
2123
2124 PVE::Storage::volume_rollback_is_possible($storecfg, $volid, $snapname);
2125
2126 my $updatefn = sub {
2127
2128 $conf = load_config($vmid);
2129
2130 $snap = &$get_snapshot_config();
2131
2132 die "unable to rollback to incomplete snapshot (snapstate = $snap->{snapstate})\n"
2133 if $snap->{snapstate};
2134
2135 if ($prepare) {
2136 check_lock($conf);
2137 system("lxc-stop -n $vmid --kill") if check_running($vmid);
2138 }
2139
2140 die "unable to rollback vm $vmid: vm is running\n"
2141 if check_running($vmid);
2142
2143 if ($prepare) {
2144 $conf->{lock} = 'rollback';
2145 } else {
2146 die "got wrong lock\n" if !($conf->{lock} && $conf->{lock} eq 'rollback');
2147 delete $conf->{lock};
2148 }
2149
2150 my $forcemachine;
2151
2152 if (!$prepare) {
2153 # copy snapshot config to current config
2154 $conf = &$snapshot_apply_config($conf, $snap);
2155 $conf->{parent} = $snapname;
2156 }
2157
2158 write_config($vmid, $conf);
2159
2160 if (!$prepare && $snap->{vmstate}) {
2161 die "implement me - save vmstate\n";
2162 }
2163 };
2164
2165 lock_config($vmid, $updatefn);
2166
2167 # only rootfs for now!
2168 PVE::Storage::volume_snapshot_rollback($storecfg, $volid, $snapname);
2169
2170 $prepare = 0;
2171 lock_config($vmid, $updatefn);
2172 }
2173
2174 sub template_create {
2175 my ($vmid, $conf) = @_;
2176
2177 my $storecfg = PVE::Storage::config();
2178
2179 my $rootinfo = parse_ct_rootfs($conf->{rootfs});
2180 my $volid = $rootinfo->{volume};
2181
2182 die "Template feature is not available for '$volid'\n"
2183 if !PVE::Storage::volume_has_feature($storecfg, 'template', $volid);
2184
2185 PVE::Storage::activate_volumes($storecfg, [$volid]);
2186
2187 my $template_volid = PVE::Storage::vdisk_create_base($storecfg, $volid);
2188 $rootinfo->{volume} = $template_volid;
2189 $conf->{rootfs} = print_ct_mountpoint($rootinfo, 1);
2190
2191 write_config($vmid, $conf);
2192 }
2193
2194 sub is_template {
2195 my ($conf) = @_;
2196
2197 return 1 if defined $conf->{template} && $conf->{template} == 1;
2198 }
2199
2200 sub mountpoint_names {
2201 my ($reverse) = @_;
2202
2203 my @names = ('rootfs');
2204
2205 for (my $i = 0; $i < $MAX_MOUNT_POINTS; $i++) {
2206 push @names, "mp$i";
2207 }
2208
2209 return $reverse ? reverse @names : @names;
2210 }
2211
2212
2213 sub foreach_mountpoint_full {
2214 my ($conf, $reverse, $func) = @_;
2215
2216 foreach my $key (mountpoint_names($reverse)) {
2217 my $value = $conf->{$key};
2218 next if !defined($value);
2219 my $mountpoint = $key eq 'rootfs' ? parse_ct_rootfs($value, 1) : parse_ct_mountpoint($value, 1);
2220 next if !defined($mountpoint);
2221
2222 &$func($key, $mountpoint);
2223 }
2224 }
2225
2226 sub foreach_mountpoint {
2227 my ($conf, $func) = @_;
2228
2229 foreach_mountpoint_full($conf, 0, $func);
2230 }
2231
2232 sub foreach_mountpoint_reverse {
2233 my ($conf, $func) = @_;
2234
2235 foreach_mountpoint_full($conf, 1, $func);
2236 }
2237
2238 sub check_ct_modify_config_perm {
2239 my ($rpcenv, $authuser, $vmid, $pool, $newconf, $delete) = @_;
2240
2241 return 1 if $authuser eq 'root@pam';
2242
2243 my $check = sub {
2244 my ($opt, $delete) = @_;
2245 if ($opt eq 'cpus' || $opt eq 'cpuunits' || $opt eq 'cpulimit') {
2246 $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.CPU']);
2247 } elsif ($opt eq 'rootfs' || $opt =~ /^mp\d+$/) {
2248 $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Disk']);
2249 return if $delete;
2250 my $data = $opt eq 'rootfs' ? parse_ct_rootfs($newconf->{$opt})
2251 : parse_ct_mountpoint($newconf->{$opt});
2252 raise_perm_exc("mountpoint type $data->{type}") if $data->{type} ne 'volume';
2253 } elsif ($opt eq 'memory' || $opt eq 'swap') {
2254 $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Memory']);
2255 } elsif ($opt =~ m/^net\d+$/ || $opt eq 'nameserver' ||
2256 $opt eq 'searchdomain' || $opt eq 'hostname') {
2257 $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Network']);
2258 } else {
2259 $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Options']);
2260 }
2261 };
2262
2263 foreach my $opt (keys %$newconf) {
2264 &$check($opt, 0);
2265 }
2266 foreach my $opt (@$delete) {
2267 &$check($opt, 1);
2268 }
2269
2270 return 1;
2271 }
2272
2273 sub umount_all {
2274 my ($vmid, $storage_cfg, $conf, $noerr) = @_;
2275
2276 my $rootdir = "/var/lib/lxc/$vmid/rootfs";
2277 my $volid_list = get_vm_volumes($conf);
2278
2279 foreach_mountpoint_reverse($conf, sub {
2280 my ($ms, $mountpoint) = @_;
2281
2282 my $volid = $mountpoint->{volume};
2283 my $mount = $mountpoint->{mp};
2284
2285 return if !$volid || !$mount;
2286
2287 my $mount_path = "$rootdir/$mount";
2288 $mount_path =~ s!/+!/!g;
2289
2290 return if !PVE::ProcFSTools::is_mounted($mount_path);
2291
2292 eval {
2293 PVE::Tools::run_command(['umount', '-d', $mount_path]);
2294 };
2295 if (my $err = $@) {
2296 if ($noerr) {
2297 warn $err;
2298 } else {
2299 die $err;
2300 }
2301 }
2302 });
2303 }
2304
2305 sub mount_all {
2306 my ($vmid, $storage_cfg, $conf) = @_;
2307
2308 my $rootdir = "/var/lib/lxc/$vmid/rootfs";
2309 File::Path::make_path($rootdir);
2310
2311 my $volid_list = get_vm_volumes($conf);
2312 PVE::Storage::activate_volumes($storage_cfg, $volid_list);
2313
2314 eval {
2315 foreach_mountpoint($conf, sub {
2316 my ($ms, $mountpoint) = @_;
2317
2318 mountpoint_mount($mountpoint, $rootdir, $storage_cfg);
2319 });
2320 };
2321 if (my $err = $@) {
2322 warn "mounting container failed\n";
2323 umount_all($vmid, $storage_cfg, $conf, 1);
2324 die $err;
2325 }
2326
2327 return $rootdir;
2328 }
2329
2330
2331 sub mountpoint_mount_path {
2332 my ($mountpoint, $storage_cfg, $snapname) = @_;
2333
2334 return mountpoint_mount($mountpoint, undef, $storage_cfg, $snapname);
2335 }
2336
2337 my $check_mount_path = sub {
2338 my ($path) = @_;
2339 $path = File::Spec->canonpath($path);
2340 my $real = Cwd::realpath($path);
2341 if ($real ne $path) {
2342 die "mount path modified by symlink: $path != $real";
2343 }
2344 };
2345
2346 sub query_loopdev {
2347 my ($path) = @_;
2348 my $found;
2349 my $parser = sub {
2350 my $line = shift;
2351 if ($line =~ m@^(/dev/loop\d+):@) {
2352 $found = $1;
2353 }
2354 };
2355 my $cmd = ['losetup', '--associated', $path];
2356 PVE::Tools::run_command($cmd, outfunc => $parser);
2357 return $found;
2358 }
2359
2360 # Run a function with a file attached to a loop device.
2361 # The loop device is always detached afterwards (or set to autoclear).
2362 # Returns the loop device.
2363 sub run_with_loopdev {
2364 my ($func, $file) = @_;
2365 my $device;
2366 my $parser = sub {
2367 my $line = shift;
2368 if ($line =~ m@^(/dev/loop\d+)$@) {
2369 $device = $1;
2370 }
2371 };
2372 PVE::Tools::run_command(['losetup', '--show', '-f', $file], outfunc => $parser);
2373 die "failed to setup loop device for $file\n" if !$device;
2374 eval { &$func($device); };
2375 my $err = $@;
2376 PVE::Tools::run_command(['losetup', '-d', $device]);
2377 die $err if $err;
2378 return $device;
2379 }
2380
2381 sub bindmount {
2382 my ($dir, $dest, $ro, @extra_opts) = @_;
2383 PVE::Tools::run_command(['mount', '-o', 'bind', @extra_opts, $dir, $dest]);
2384 if ($ro) {
2385 eval { PVE::Tools::run_command(['mount', '-o', 'bind,remount,ro', $dest]); };
2386 if (my $err = $@) {
2387 warn "bindmount error\n";
2388 # don't leave writable bind-mounts behind...
2389 PVE::Tools::run_command(['umount', $dest]);
2390 die $err;
2391 }
2392 }
2393 }
2394
2395 # use $rootdir = undef to just return the corresponding mount path
2396 sub mountpoint_mount {
2397 my ($mountpoint, $rootdir, $storage_cfg, $snapname) = @_;
2398
2399 my $volid = $mountpoint->{volume};
2400 my $mount = $mountpoint->{mp};
2401 my $type = $mountpoint->{type};
2402 my $quota = !$snapname && !$mountpoint->{ro} && $mountpoint->{quota};
2403 my $mounted_dev;
2404
2405 return if !$volid || !$mount;
2406
2407 my $mount_path;
2408
2409 if (defined($rootdir)) {
2410 $rootdir =~ s!/+$!!;
2411 $mount_path = "$rootdir/$mount";
2412 $mount_path =~ s!/+!/!g;
2413 &$check_mount_path($mount_path);
2414 File::Path::mkpath($mount_path);
2415 }
2416
2417 my ($storage, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2418
2419 die "unknown snapshot path for '$volid'" if !$storage && defined($snapname);
2420
2421 my $optstring = '';
2422 if (defined($mountpoint->{acl})) {
2423 $optstring .= ($mountpoint->{acl} ? 'acl' : 'noacl');
2424 }
2425 my $readonly = $mountpoint->{ro};
2426
2427 my @extra_opts = ('-o', $optstring);
2428
2429 if ($storage) {
2430
2431 my $scfg = PVE::Storage::storage_config($storage_cfg, $storage);
2432 my $path = PVE::Storage::path($storage_cfg, $volid, $snapname);
2433
2434 my ($vtype, undef, undef, undef, undef, $isBase, $format) =
2435 PVE::Storage::parse_volname($storage_cfg, $volid);
2436
2437 $format = 'iso' if $vtype eq 'iso'; # allow to handle iso files
2438
2439 if ($format eq 'subvol') {
2440 if ($mount_path) {
2441 if ($snapname) {
2442 if ($scfg->{type} eq 'zfspool') {
2443 my $path_arg = $path;
2444 $path_arg =~ s!^/+!!;
2445 PVE::Tools::run_command(['mount', '-o', 'ro', @extra_opts, '-t', 'zfs', $path_arg, $mount_path]);
2446 } else {
2447 die "cannot mount subvol snapshots for storage type '$scfg->{type}'\n";
2448 }
2449 } else {
2450 bindmount($path, $mount_path, $readonly, @extra_opts);
2451 warn "cannot enable quota control for bind mounted subvolumes\n" if $quota;
2452 }
2453 }
2454 return wantarray ? ($path, 0, $mounted_dev) : $path;
2455 } elsif ($format eq 'raw' || $format eq 'iso') {
2456 my $domount = sub {
2457 my ($path) = @_;
2458 if ($mount_path) {
2459 if ($format eq 'iso') {
2460 PVE::Tools::run_command(['mount', '-o', 'ro', @extra_opts, $path, $mount_path]);
2461 } elsif ($isBase || defined($snapname)) {
2462 PVE::Tools::run_command(['mount', '-o', 'ro,noload', @extra_opts, $path, $mount_path]);
2463 } else {
2464 if ($quota) {
2465 push @extra_opts, '-o', 'usrjquota=aquota.user,grpjquota=aquota.group,jqfmt=vfsv0';
2466 }
2467 push @extra_opts, '-o', 'ro' if $readonly;
2468 PVE::Tools::run_command(['mount', @extra_opts, $path, $mount_path]);
2469 }
2470 }
2471 };
2472 my $use_loopdev = 0;
2473 if ($scfg->{path}) {
2474 $mounted_dev = run_with_loopdev($domount, $path);
2475 $use_loopdev = 1;
2476 } elsif ($scfg->{type} eq 'drbd' || $scfg->{type} eq 'lvm' ||
2477 $scfg->{type} eq 'rbd' || $scfg->{type} eq 'lvmthin') {
2478 $mounted_dev = $path;
2479 &$domount($path);
2480 } else {
2481 die "unsupported storage type '$scfg->{type}'\n";
2482 }
2483 return wantarray ? ($path, $use_loopdev, $mounted_dev) : $path;
2484 } else {
2485 die "unsupported image format '$format'\n";
2486 }
2487 } elsif ($type eq 'device') {
2488 push @extra_opts, '-o', 'ro' if $readonly;
2489 PVE::Tools::run_command(['mount', @extra_opts, $volid, $mount_path]) if $mount_path;
2490 return wantarray ? ($volid, 0, $volid) : $volid;
2491 } elsif ($type eq 'bind') {
2492 die "directory '$volid' does not exist\n" if ! -d $volid;
2493 &$check_mount_path($volid);
2494 bindmount($volid, $mount_path, $readonly, @extra_opts) if $mount_path;
2495 warn "cannot enable quota control for bind mounts\n" if $quota;
2496 return wantarray ? ($volid, 0, undef) : $volid;
2497 }
2498
2499 die "unsupported storage";
2500 }
2501
2502 sub get_vm_volumes {
2503 my ($conf, $excludes) = @_;
2504
2505 my $vollist = [];
2506
2507 foreach_mountpoint($conf, sub {
2508 my ($ms, $mountpoint) = @_;
2509
2510 return if $excludes && $ms eq $excludes;
2511
2512 my $volid = $mountpoint->{volume};
2513
2514 return if !$volid || $mountpoint->{type} ne 'volume';
2515
2516 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2517 return if !$sid;
2518
2519 push @$vollist, $volid;
2520 });
2521
2522 return $vollist;
2523 }
2524
2525 sub mkfs {
2526 my ($dev, $rootuid, $rootgid) = @_;
2527
2528 PVE::Tools::run_command(['mkfs.ext4', '-O', 'mmp',
2529 '-E', "root_owner=$rootuid:$rootgid",
2530 $dev]);
2531 }
2532
2533 sub format_disk {
2534 my ($storage_cfg, $volid, $rootuid, $rootgid) = @_;
2535
2536 if ($volid =~ m!^/dev/.+!) {
2537 mkfs($volid);
2538 return;
2539 }
2540
2541 my ($storage, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2542
2543 die "cannot format volume '$volid' with no storage\n" if !$storage;
2544
2545 PVE::Storage::activate_volumes($storage_cfg, [$volid]);
2546
2547 my $path = PVE::Storage::path($storage_cfg, $volid);
2548
2549 my ($vtype, undef, undef, undef, undef, $isBase, $format) =
2550 PVE::Storage::parse_volname($storage_cfg, $volid);
2551
2552 die "cannot format volume '$volid' (format == $format)\n"
2553 if $format ne 'raw';
2554
2555 mkfs($path, $rootuid, $rootgid);
2556 }
2557
2558 sub destroy_disks {
2559 my ($storecfg, $vollist) = @_;
2560
2561 foreach my $volid (@$vollist) {
2562 eval { PVE::Storage::vdisk_free($storecfg, $volid); };
2563 warn $@ if $@;
2564 }
2565 }
2566
2567 sub create_disks {
2568 my ($storecfg, $vmid, $settings, $conf) = @_;
2569
2570 my $vollist = [];
2571
2572 eval {
2573 my (undef, $rootuid, $rootgid) = PVE::LXC::parse_id_maps($conf);
2574 my $chown_vollist = [];
2575
2576 foreach_mountpoint($settings, sub {
2577 my ($ms, $mountpoint) = @_;
2578
2579 my $volid = $mountpoint->{volume};
2580 my $mp = $mountpoint->{mp};
2581
2582 my ($storage, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2583
2584 if ($storage && ($volid =~ m/^([^:\s]+):(\d+(\.\d+)?)$/)) {
2585 my ($storeid, $size_gb) = ($1, $2);
2586
2587 my $size_kb = int(${size_gb}*1024) * 1024;
2588
2589 my $scfg = PVE::Storage::storage_config($storecfg, $storage);
2590 # fixme: use better naming ct-$vmid-disk-X.raw?
2591
2592 if ($scfg->{type} eq 'dir' || $scfg->{type} eq 'nfs') {
2593 if ($size_kb > 0) {
2594 $volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'raw',
2595 undef, $size_kb);
2596 format_disk($storecfg, $volid, $rootuid, $rootgid);
2597 } else {
2598 $volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'subvol',
2599 undef, 0);
2600 push @$chown_vollist, $volid;
2601 }
2602 } elsif ($scfg->{type} eq 'zfspool') {
2603
2604 $volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'subvol',
2605 undef, $size_kb);
2606 push @$chown_vollist, $volid;
2607 } elsif ($scfg->{type} eq 'drbd' || $scfg->{type} eq 'lvm' || $scfg->{type} eq 'lvmthin') {
2608
2609 $volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'raw', undef, $size_kb);
2610 format_disk($storecfg, $volid, $rootuid, $rootgid);
2611
2612 } elsif ($scfg->{type} eq 'rbd') {
2613
2614 die "krbd option must be enabled on storage type '$scfg->{type}'\n" if !$scfg->{krbd};
2615 $volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'raw', undef, $size_kb);
2616 format_disk($storecfg, $volid, $rootuid, $rootgid);
2617 } else {
2618 die "unable to create containers on storage type '$scfg->{type}'\n";
2619 }
2620 push @$vollist, $volid;
2621 $mountpoint->{volume} = $volid;
2622 $mountpoint->{size} = $size_kb * 1024;
2623 $conf->{$ms} = print_ct_mountpoint($mountpoint, $ms eq 'rootfs');
2624 } else {
2625 # use specified/existing volid/dir/device
2626 $conf->{$ms} = print_ct_mountpoint($mountpoint, $ms eq 'rootfs');
2627 }
2628 });
2629
2630 PVE::Storage::activate_volumes($storecfg, $chown_vollist, undef);
2631 foreach my $volid (@$chown_vollist) {
2632 my $path = PVE::Storage::path($storecfg, $volid, undef);
2633 chown($rootuid, $rootgid, $path);
2634 }
2635 PVE::Storage::deactivate_volumes($storecfg, $chown_vollist, undef);
2636 };
2637 # free allocated images on error
2638 if (my $err = $@) {
2639 destroy_disks($storecfg, $vollist);
2640 die $err;
2641 }
2642 return $vollist;
2643 }
2644
2645 # bash completion helper
2646
2647 sub complete_os_templates {
2648 my ($cmdname, $pname, $cvalue) = @_;
2649
2650 my $cfg = PVE::Storage::config();
2651
2652 my $storeid;
2653
2654 if ($cvalue =~ m/^([^:]+):/) {
2655 $storeid = $1;
2656 }
2657
2658 my $vtype = $cmdname eq 'restore' ? 'backup' : 'vztmpl';
2659 my $data = PVE::Storage::template_list($cfg, $storeid, $vtype);
2660
2661 my $res = [];
2662 foreach my $id (keys %$data) {
2663 foreach my $item (@{$data->{$id}}) {
2664 push @$res, $item->{volid} if defined($item->{volid});
2665 }
2666 }
2667
2668 return $res;
2669 }
2670
2671 my $complete_ctid_full = sub {
2672 my ($running) = @_;
2673
2674 my $idlist = vmstatus();
2675
2676 my $active_hash = list_active_containers();
2677
2678 my $res = [];
2679
2680 foreach my $id (keys %$idlist) {
2681 my $d = $idlist->{$id};
2682 if (defined($running)) {
2683 next if $d->{template};
2684 next if $running && !$active_hash->{$id};
2685 next if !$running && $active_hash->{$id};
2686 }
2687 push @$res, $id;
2688
2689 }
2690 return $res;
2691 };
2692
2693 sub complete_ctid {
2694 return &$complete_ctid_full();
2695 }
2696
2697 sub complete_ctid_stopped {
2698 return &$complete_ctid_full(0);
2699 }
2700
2701 sub complete_ctid_running {
2702 return &$complete_ctid_full(1);
2703 }
2704
2705 sub parse_id_maps {
2706 my ($conf) = @_;
2707
2708 my $id_map = [];
2709 my $rootuid = 0;
2710 my $rootgid = 0;
2711
2712 my $lxc = $conf->{lxc};
2713 foreach my $entry (@$lxc) {
2714 my ($key, $value) = @$entry;
2715 next if $key ne 'lxc.id_map';
2716 if ($value =~ /^([ug])\s+(\d+)\s+(\d+)\s+(\d+)\s*$/) {
2717 my ($type, $ct, $host, $length) = ($1, $2, $3, $4);
2718 push @$id_map, [$type, $ct, $host, $length];
2719 if ($ct == 0) {
2720 $rootuid = $host if $type eq 'u';
2721 $rootgid = $host if $type eq 'g';
2722 }
2723 } else {
2724 die "failed to parse id_map: $value\n";
2725 }
2726 }
2727
2728 if (!@$id_map && $conf->{unprivileged}) {
2729 # Should we read them from /etc/subuid?
2730 $id_map = [ ['u', '0', '100000', '65536'],
2731 ['g', '0', '100000', '65536'] ];
2732 $rootuid = $rootgid = 100000;
2733 }
2734
2735 return ($id_map, $rootuid, $rootgid);
2736 }
2737
2738 sub userns_command {
2739 my ($id_map) = @_;
2740 if (@$id_map) {
2741 return ['lxc-usernsexec', (map { ('-m', join(':', @$_)) } @$id_map), '--'];
2742 }
2743 return [];
2744 }
2745
2746 1;