]> git.proxmox.com Git - pve-container.git/blob - src/PVE/LXC.pm
snapshot: replace global sync with a namespace sync
[pve-container.git] / src / PVE / LXC.pm
1 package PVE::LXC;
2
3 use strict;
4 use warnings;
5 use POSIX qw(EINTR);
6
7 use Socket;
8
9 use File::Path;
10 use File::Spec;
11 use Cwd qw();
12 use Fcntl qw(O_RDONLY :flock);
13
14 use PVE::Cluster qw(cfs_register_file cfs_read_file);
15 use PVE::Storage;
16 use PVE::SafeSyslog;
17 use PVE::INotify;
18 use PVE::JSONSchema qw(get_standard_option);
19 use PVE::Tools qw($IPV6RE $IPV4RE dir_glob_foreach);
20 use PVE::Network;
21 use PVE::AccessControl;
22 use PVE::ProcFSTools;
23 use Time::HiRes qw (gettimeofday);
24
25 use Data::Dumper;
26
27 my $nodename = PVE::INotify::nodename();
28
29 my $cpuinfo= PVE::ProcFSTools::read_cpuinfo();
30
31 our $COMMON_TAR_FLAGS = [ '--sparse', '--numeric-owner', '--acls',
32 '--xattrs',
33 '--xattrs-include=user.*',
34 '--xattrs-include=security.capability',
35 '--warning=no-xattr-write' ];
36
37 cfs_register_file('/lxc/', \&parse_pct_config, \&write_pct_config);
38
39 my $rootfs_desc = {
40 volume => {
41 type => 'string',
42 default_key => 1,
43 format => 'pve-lxc-mp-string',
44 format_description => 'volume',
45 description => 'Volume, device or directory to mount into the container.',
46 },
47 backup => {
48 type => 'boolean',
49 format_description => '[1|0]',
50 description => 'Whether to include the mountpoint in backups.',
51 optional => 1,
52 },
53 size => {
54 type => 'string',
55 format => 'disk-size',
56 format_description => 'DiskSize',
57 description => 'Volume size (read only value).',
58 optional => 1,
59 },
60 acl => {
61 type => 'boolean',
62 format_description => 'acl',
63 description => 'Explicitly enable or disable ACL support.',
64 optional => 1,
65 },
66 ro => {
67 type => 'boolean',
68 format_description => 'ro',
69 description => 'Read-only mountpoint (not supported with bind mounts)',
70 optional => 1,
71 },
72 quota => {
73 type => 'boolean',
74 format_description => '[0|1]',
75 description => 'Enable user quotas inside the container (not supported with zfs subvolumes)',
76 optional => 1,
77 },
78 };
79
80 PVE::JSONSchema::register_standard_option('pve-ct-rootfs', {
81 type => 'string', format => $rootfs_desc,
82 description => "Use volume as container root.",
83 optional => 1,
84 });
85
86 PVE::JSONSchema::register_standard_option('pve-lxc-snapshot-name', {
87 description => "The name of the snapshot.",
88 type => 'string', format => 'pve-configid',
89 maxLength => 40,
90 });
91
92 my $confdesc = {
93 lock => {
94 optional => 1,
95 type => 'string',
96 description => "Lock/unlock the VM.",
97 enum => [qw(migrate backup snapshot rollback)],
98 },
99 onboot => {
100 optional => 1,
101 type => 'boolean',
102 description => "Specifies whether a VM will be started during system bootup.",
103 default => 0,
104 },
105 startup => get_standard_option('pve-startup-order'),
106 template => {
107 optional => 1,
108 type => 'boolean',
109 description => "Enable/disable Template.",
110 default => 0,
111 },
112 arch => {
113 optional => 1,
114 type => 'string',
115 enum => ['amd64', 'i386'],
116 description => "OS architecture type.",
117 default => 'amd64',
118 },
119 ostype => {
120 optional => 1,
121 type => 'string',
122 enum => ['debian', 'ubuntu', 'centos', 'fedora', 'opensuse', 'archlinux'],
123 description => "OS type. Corresponds to lxc setup scripts in /usr/share/lxc/config/<ostype>.common.conf.",
124 },
125 console => {
126 optional => 1,
127 type => 'boolean',
128 description => "Attach a console device (/dev/console) to the container.",
129 default => 1,
130 },
131 tty => {
132 optional => 1,
133 type => 'integer',
134 description => "Specify the number of tty available to the container",
135 minimum => 0,
136 maximum => 6,
137 default => 2,
138 },
139 cpulimit => {
140 optional => 1,
141 type => 'number',
142 description => "Limit of CPU usage. Note if the computer has 2 CPUs, it has a total of '2' CPU time. Value '0' indicates no CPU limit.",
143 minimum => 0,
144 maximum => 128,
145 default => 0,
146 },
147 cpuunits => {
148 optional => 1,
149 type => 'integer',
150 description => "CPU weight for a VM. Argument is used in the kernel fair scheduler. The larger the number is, the more CPU time this VM gets. Number is relative to the weights of all the other running VMs.\n\nNOTE: You can disable fair-scheduler configuration by setting this to 0.",
151 minimum => 0,
152 maximum => 500000,
153 default => 1024,
154 },
155 memory => {
156 optional => 1,
157 type => 'integer',
158 description => "Amount of RAM for the VM in MB.",
159 minimum => 16,
160 default => 512,
161 },
162 swap => {
163 optional => 1,
164 type => 'integer',
165 description => "Amount of SWAP for the VM in MB.",
166 minimum => 0,
167 default => 512,
168 },
169 hostname => {
170 optional => 1,
171 description => "Set a host name for the container.",
172 type => 'string', format => 'dns-name',
173 maxLength => 255,
174 },
175 description => {
176 optional => 1,
177 type => 'string',
178 description => "Container description. Only used on the configuration web interface.",
179 },
180 searchdomain => {
181 optional => 1,
182 type => 'string', format => 'dns-name-list',
183 description => "Sets DNS search domains for a container. Create will automatically use the setting from the host if you neither set searchdomain nor nameserver.",
184 },
185 nameserver => {
186 optional => 1,
187 type => 'string', format => 'address-list',
188 description => "Sets DNS server IP address for a container. Create will automatically use the setting from the host if you neither set searchdomain nor nameserver.",
189 },
190 rootfs => get_standard_option('pve-ct-rootfs'),
191 parent => {
192 optional => 1,
193 type => 'string', format => 'pve-configid',
194 maxLength => 40,
195 description => "Parent snapshot name. This is used internally, and should not be modified.",
196 },
197 snaptime => {
198 optional => 1,
199 description => "Timestamp for snapshots.",
200 type => 'integer',
201 minimum => 0,
202 },
203 cmode => {
204 optional => 1,
205 description => "Console mode. By default, the console command tries to open a connection to one of the available tty devices. By setting cmode to 'console' it tries to attach to /dev/console instead. If you set cmode to 'shell', it simply invokes a shell inside the container (no login).",
206 type => 'string',
207 enum => ['shell', 'console', 'tty'],
208 default => 'tty',
209 },
210 protection => {
211 optional => 1,
212 type => 'boolean',
213 description => "Sets the protection flag of the container. This will prevent the CT or CT's disk remove/update operation.",
214 default => 0,
215 },
216 unprivileged => {
217 optional => 1,
218 type => 'boolean',
219 description => "Makes the container run as unprivileged user. (Should not be modified manually.)",
220 default => 0,
221 },
222 };
223
224 my $valid_lxc_conf_keys = {
225 'lxc.include' => 1,
226 'lxc.arch' => 1,
227 'lxc.utsname' => 1,
228 'lxc.haltsignal' => 1,
229 'lxc.rebootsignal' => 1,
230 'lxc.stopsignal' => 1,
231 'lxc.init_cmd' => 1,
232 'lxc.network.type' => 1,
233 'lxc.network.flags' => 1,
234 'lxc.network.link' => 1,
235 'lxc.network.mtu' => 1,
236 'lxc.network.name' => 1,
237 'lxc.network.hwaddr' => 1,
238 'lxc.network.ipv4' => 1,
239 'lxc.network.ipv4.gateway' => 1,
240 'lxc.network.ipv6' => 1,
241 'lxc.network.ipv6.gateway' => 1,
242 'lxc.network.script.up' => 1,
243 'lxc.network.script.down' => 1,
244 'lxc.pts' => 1,
245 'lxc.console.logfile' => 1,
246 'lxc.console' => 1,
247 'lxc.tty' => 1,
248 'lxc.devttydir' => 1,
249 'lxc.hook.autodev' => 1,
250 'lxc.autodev' => 1,
251 'lxc.kmsg' => 1,
252 'lxc.mount' => 1,
253 'lxc.mount.entry' => 1,
254 'lxc.mount.auto' => 1,
255 'lxc.rootfs' => 'lxc.rootfs is auto generated from rootfs',
256 'lxc.rootfs.mount' => 1,
257 'lxc.rootfs.options' => 'lxc.rootfs.options is not supported' .
258 ', please use mountpoint options in the "rootfs" key',
259 # lxc.cgroup.*
260 'lxc.cap.drop' => 1,
261 'lxc.cap.keep' => 1,
262 'lxc.aa_profile' => 1,
263 'lxc.aa_allow_incomplete' => 1,
264 'lxc.se_context' => 1,
265 'lxc.seccomp' => 1,
266 'lxc.id_map' => 1,
267 'lxc.hook.pre-start' => 1,
268 'lxc.hook.pre-mount' => 1,
269 'lxc.hook.mount' => 1,
270 'lxc.hook.start' => 1,
271 'lxc.hook.stop' => 1,
272 'lxc.hook.post-stop' => 1,
273 'lxc.hook.clone' => 1,
274 'lxc.hook.destroy' => 1,
275 'lxc.loglevel' => 1,
276 'lxc.logfile' => 1,
277 'lxc.start.auto' => 1,
278 'lxc.start.delay' => 1,
279 'lxc.start.order' => 1,
280 'lxc.group' => 1,
281 'lxc.environment' => 1,
282 };
283
284 my $netconf_desc = {
285 type => {
286 type => 'string',
287 optional => 1,
288 description => "Network interface type.",
289 enum => [qw(veth)],
290 },
291 name => {
292 type => 'string',
293 format_description => 'String',
294 description => 'Name of the network device as seen from inside the container. (lxc.network.name)',
295 pattern => '[-_.\w\d]+',
296 },
297 bridge => {
298 type => 'string',
299 format_description => 'vmbr<Number>',
300 description => 'Bridge to attach the network device to.',
301 pattern => '[-_.\w\d]+',
302 optional => 1,
303 },
304 hwaddr => {
305 type => 'string',
306 format_description => 'MAC',
307 description => 'Bridge to attach the network device to. (lxc.network.hwaddr)',
308 pattern => qr/(?:[a-f0-9]{2}:){5}[a-f0-9]{2}/i,
309 optional => 1,
310 },
311 mtu => {
312 type => 'integer',
313 format_description => 'Number',
314 description => 'Maximum transfer unit of the interface. (lxc.network.mtu)',
315 minimum => 64, # minimum ethernet frame is 64 bytes
316 optional => 1,
317 },
318 ip => {
319 type => 'string',
320 format => 'pve-ipv4-config',
321 format_description => 'IPv4Format/CIDR',
322 description => 'IPv4 address in CIDR format.',
323 optional => 1,
324 },
325 gw => {
326 type => 'string',
327 format => 'ipv4',
328 format_description => 'GatewayIPv4',
329 description => 'Default gateway for IPv4 traffic.',
330 optional => 1,
331 },
332 ip6 => {
333 type => 'string',
334 format => 'pve-ipv6-config',
335 format_description => 'IPv6Format/CIDR',
336 description => 'IPv6 address in CIDR format.',
337 optional => 1,
338 },
339 gw6 => {
340 type => 'string',
341 format => 'ipv6',
342 format_description => 'GatewayIPv6',
343 description => 'Default gateway for IPv6 traffic.',
344 optional => 1,
345 },
346 firewall => {
347 type => 'boolean',
348 format_description => '[1|0]',
349 description => "Controls whether this interface's firewall rules should be used.",
350 optional => 1,
351 },
352 tag => {
353 type => 'integer',
354 format_description => 'VlanNo',
355 minimum => '2',
356 maximum => '4094',
357 description => "VLAN tag for this interface.",
358 optional => 1,
359 },
360 trunks => {
361 type => 'string',
362 pattern => qr/\d+(?:;\d+)*/,
363 format_description => 'vlanid[;vlanid...]',
364 description => "VLAN ids to pass through the interface",
365 optional => 1,
366 },
367 };
368 PVE::JSONSchema::register_format('pve-lxc-network', $netconf_desc);
369
370 my $MAX_LXC_NETWORKS = 10;
371 for (my $i = 0; $i < $MAX_LXC_NETWORKS; $i++) {
372 $confdesc->{"net$i"} = {
373 optional => 1,
374 type => 'string', format => $netconf_desc,
375 description => "Specifies network interfaces for the container.",
376 };
377 }
378
379 PVE::JSONSchema::register_format('pve-lxc-mp-string', \&verify_lxc_mp_string);
380 sub verify_lxc_mp_string{
381 my ($mp, $noerr) = @_;
382
383 # do not allow:
384 # /./ or /../
385 # /. or /.. at the end
386 # ../ at the beginning
387
388 if($mp =~ m@/\.\.?/@ ||
389 $mp =~ m@/\.\.?$@ ||
390 $mp =~ m@^\.\./@){
391 return undef if $noerr;
392 die "$mp contains illegal character sequences\n";
393 }
394 return $mp;
395 }
396
397 my $mp_desc = {
398 %$rootfs_desc,
399 mp => {
400 type => 'string',
401 format => 'pve-lxc-mp-string',
402 format_description => 'Path',
403 description => 'Path to the mountpoint as seen from inside the container.',
404 },
405 };
406 PVE::JSONSchema::register_format('pve-ct-mountpoint', $mp_desc);
407
408 my $unuseddesc = {
409 optional => 1,
410 type => 'string', format => 'pve-volume-id',
411 description => "Reference to unused volumes.",
412 };
413
414 my $MAX_MOUNT_POINTS = 10;
415 for (my $i = 0; $i < $MAX_MOUNT_POINTS; $i++) {
416 $confdesc->{"mp$i"} = {
417 optional => 1,
418 type => 'string', format => $mp_desc,
419 description => "Use volume as container mount point (experimental feature).",
420 optional => 1,
421 };
422 }
423
424 my $MAX_UNUSED_DISKS = $MAX_MOUNT_POINTS;
425 for (my $i = 0; $i < $MAX_MOUNT_POINTS; $i++) {
426 $confdesc->{"unused$i"} = $unuseddesc;
427 }
428
429 sub write_pct_config {
430 my ($filename, $conf) = @_;
431
432 delete $conf->{snapstate}; # just to be sure
433
434 my $generate_raw_config = sub {
435 my ($conf) = @_;
436
437 my $raw = '';
438
439 # add description as comment to top of file
440 my $descr = $conf->{description} || '';
441 foreach my $cl (split(/\n/, $descr)) {
442 $raw .= '#' . PVE::Tools::encode_text($cl) . "\n";
443 }
444
445 foreach my $key (sort keys %$conf) {
446 next if $key eq 'digest' || $key eq 'description' || $key eq 'pending' ||
447 $key eq 'snapshots' || $key eq 'snapname' || $key eq 'lxc';
448 my $value = $conf->{$key};
449 die "detected invalid newline inside property '$key'\n" if $value =~ m/\n/;
450 $raw .= "$key: $value\n";
451 }
452
453 if (my $lxcconf = $conf->{lxc}) {
454 foreach my $entry (@$lxcconf) {
455 my ($k, $v) = @$entry;
456 $raw .= "$k: $v\n";
457 }
458 }
459
460 return $raw;
461 };
462
463 my $raw = &$generate_raw_config($conf);
464
465 foreach my $snapname (sort keys %{$conf->{snapshots}}) {
466 $raw .= "\n[$snapname]\n";
467 $raw .= &$generate_raw_config($conf->{snapshots}->{$snapname});
468 }
469
470 return $raw;
471 }
472
473 sub check_type {
474 my ($key, $value) = @_;
475
476 die "unknown setting '$key'\n" if !$confdesc->{$key};
477
478 my $type = $confdesc->{$key}->{type};
479
480 if (!defined($value)) {
481 die "got undefined value\n";
482 }
483
484 if ($value =~ m/[\n\r]/) {
485 die "property contains a line feed\n";
486 }
487
488 if ($type eq 'boolean') {
489 return 1 if ($value eq '1') || ($value =~ m/^(on|yes|true)$/i);
490 return 0 if ($value eq '0') || ($value =~ m/^(off|no|false)$/i);
491 die "type check ('boolean') failed - got '$value'\n";
492 } elsif ($type eq 'integer') {
493 return int($1) if $value =~ m/^(\d+)$/;
494 die "type check ('integer') failed - got '$value'\n";
495 } elsif ($type eq 'number') {
496 return $value if $value =~ m/^(\d+)(\.\d+)?$/;
497 die "type check ('number') failed - got '$value'\n";
498 } elsif ($type eq 'string') {
499 if (my $fmt = $confdesc->{$key}->{format}) {
500 PVE::JSONSchema::check_format($fmt, $value);
501 return $value;
502 }
503 return $value;
504 } else {
505 die "internal error"
506 }
507 }
508
509 sub parse_pct_config {
510 my ($filename, $raw) = @_;
511
512 return undef if !defined($raw);
513
514 my $res = {
515 digest => Digest::SHA::sha1_hex($raw),
516 snapshots => {},
517 };
518
519 $filename =~ m|/lxc/(\d+).conf$|
520 || die "got strange filename '$filename'";
521
522 my $vmid = $1;
523
524 my $conf = $res;
525 my $descr = '';
526 my $section = '';
527
528 my @lines = split(/\n/, $raw);
529 foreach my $line (@lines) {
530 next if $line =~ m/^\s*$/;
531
532 if ($line =~ m/^\[([a-z][a-z0-9_\-]+)\]\s*$/i) {
533 $section = $1;
534 $conf->{description} = $descr if $descr;
535 $descr = '';
536 $conf = $res->{snapshots}->{$section} = {};
537 next;
538 }
539
540 if ($line =~ m/^\#(.*)\s*$/) {
541 $descr .= PVE::Tools::decode_text($1) . "\n";
542 next;
543 }
544
545 if ($line =~ m/^(lxc\.[a-z0-9_\-\.]+)(:|\s*=)\s*(.*?)\s*$/) {
546 my $key = $1;
547 my $value = $3;
548 my $validity = $valid_lxc_conf_keys->{$key} || 0;
549 if ($validity eq 1 || $key =~ m/^lxc\.cgroup\./) {
550 push @{$conf->{lxc}}, [$key, $value];
551 } elsif (my $errmsg = $validity) {
552 warn "vm $vmid - $key: $errmsg\n";
553 } else {
554 warn "vm $vmid - unable to parse config: $line\n";
555 }
556 } elsif ($line =~ m/^(description):\s*(.*\S)\s*$/) {
557 $descr .= PVE::Tools::decode_text($2);
558 } elsif ($line =~ m/snapstate:\s*(prepare|delete)\s*$/) {
559 $conf->{snapstate} = $1;
560 } elsif ($line =~ m/^([a-z][a-z_]*\d*):\s*(\S.*)\s*$/) {
561 my $key = $1;
562 my $value = $2;
563 eval { $value = check_type($key, $value); };
564 warn "vm $vmid - unable to parse value of '$key' - $@" if $@;
565 $conf->{$key} = $value;
566 } else {
567 warn "vm $vmid - unable to parse config: $line\n";
568 }
569 }
570
571 $conf->{description} = $descr if $descr;
572
573 delete $res->{snapstate}; # just to be sure
574
575 return $res;
576 }
577
578 sub config_list {
579 my $vmlist = PVE::Cluster::get_vmlist();
580 my $res = {};
581 return $res if !$vmlist || !$vmlist->{ids};
582 my $ids = $vmlist->{ids};
583
584 foreach my $vmid (keys %$ids) {
585 next if !$vmid; # skip CT0
586 my $d = $ids->{$vmid};
587 next if !$d->{node} || $d->{node} ne $nodename;
588 next if !$d->{type} || $d->{type} ne 'lxc';
589 $res->{$vmid}->{type} = 'lxc';
590 }
591 return $res;
592 }
593
594 sub cfs_config_path {
595 my ($vmid, $node) = @_;
596
597 $node = $nodename if !$node;
598 return "nodes/$node/lxc/$vmid.conf";
599 }
600
601 sub config_file {
602 my ($vmid, $node) = @_;
603
604 my $cfspath = cfs_config_path($vmid, $node);
605 return "/etc/pve/$cfspath";
606 }
607
608 sub load_config {
609 my ($vmid, $node) = @_;
610
611 $node = $nodename if !$node;
612 my $cfspath = cfs_config_path($vmid, $node);
613
614 my $conf = PVE::Cluster::cfs_read_file($cfspath);
615 die "container $vmid does not exist\n" if !defined($conf);
616
617 return $conf;
618 }
619
620 sub create_config {
621 my ($vmid, $conf) = @_;
622
623 my $dir = "/etc/pve/nodes/$nodename/lxc";
624 mkdir $dir;
625
626 write_config($vmid, $conf);
627 }
628
629 sub destroy_config {
630 my ($vmid) = @_;
631
632 unlink config_file($vmid, $nodename);
633 }
634
635 sub write_config {
636 my ($vmid, $conf) = @_;
637
638 my $cfspath = cfs_config_path($vmid);
639
640 PVE::Cluster::cfs_write_file($cfspath, $conf);
641 }
642
643 # flock: we use one file handle per process, so lock file
644 # can be called multiple times and will succeed for the same process.
645
646 my $lock_handles = {};
647 my $lockdir = "/run/lock/lxc";
648
649 sub lock_filename {
650 my ($vmid) = @_;
651
652 return "$lockdir/pve-config-${vmid}.lock";
653 }
654
655 sub lock_container {
656 my ($vmid, $timeout, $code, @param) = @_;
657
658 $timeout = 10 if !$timeout;
659
660 my $filename = lock_filename($vmid);
661
662 mkdir $lockdir if !-d $lockdir;
663
664 my $res = PVE::Tools::lock_file_full($filename, $timeout, 0, $code, @param);
665
666 die $@ if $@;
667
668 return $res;
669 }
670
671 sub option_exists {
672 my ($name) = @_;
673
674 return defined($confdesc->{$name});
675 }
676
677 # add JSON properties for create and set function
678 sub json_config_properties {
679 my $prop = shift;
680
681 foreach my $opt (keys %$confdesc) {
682 next if $opt eq 'parent' || $opt eq 'snaptime';
683 next if $prop->{$opt};
684 $prop->{$opt} = $confdesc->{$opt};
685 }
686
687 return $prop;
688 }
689
690 sub json_config_properties_no_rootfs {
691 my $prop = shift;
692
693 foreach my $opt (keys %$confdesc) {
694 next if $prop->{$opt};
695 next if $opt eq 'parent' || $opt eq 'snaptime' || $opt eq 'rootfs';
696 $prop->{$opt} = $confdesc->{$opt};
697 }
698
699 return $prop;
700 }
701
702 # container status helpers
703
704 sub list_active_containers {
705
706 my $filename = "/proc/net/unix";
707
708 # similar test is used by lcxcontainers.c: list_active_containers
709 my $res = {};
710
711 my $fh = IO::File->new ($filename, "r");
712 return $res if !$fh;
713
714 while (defined(my $line = <$fh>)) {
715 if ($line =~ m/^[a-f0-9]+:\s\S+\s\S+\s\S+\s\S+\s\S+\s\d+\s(\S+)$/) {
716 my $path = $1;
717 if ($path =~ m!^@/var/lib/lxc/(\d+)/command$!) {
718 $res->{$1} = 1;
719 }
720 }
721 }
722
723 close($fh);
724
725 return $res;
726 }
727
728 # warning: this is slow
729 sub check_running {
730 my ($vmid) = @_;
731
732 my $active_hash = list_active_containers();
733
734 return 1 if defined($active_hash->{$vmid});
735
736 return undef;
737 }
738
739 sub get_container_disk_usage {
740 my ($vmid, $pid) = @_;
741
742 return PVE::Tools::df("/proc/$pid/root/", 1);
743 }
744
745 my $last_proc_vmid_stat;
746
747 my $parse_cpuacct_stat = sub {
748 my ($vmid) = @_;
749
750 my $raw = read_cgroup_value('cpuacct', $vmid, 'cpuacct.stat', 1);
751
752 my $stat = {};
753
754 if ($raw =~ m/^user (\d+)\nsystem (\d+)\n/) {
755
756 $stat->{utime} = $1;
757 $stat->{stime} = $2;
758
759 }
760
761 return $stat;
762 };
763
764 sub vmstatus {
765 my ($opt_vmid) = @_;
766
767 my $list = $opt_vmid ? { $opt_vmid => { type => 'lxc' }} : config_list();
768
769 my $active_hash = list_active_containers();
770
771 my $cpucount = $cpuinfo->{cpus} || 1;
772
773 my $cdtime = gettimeofday;
774
775 my $uptime = (PVE::ProcFSTools::read_proc_uptime(1))[0];
776
777 foreach my $vmid (keys %$list) {
778 my $d = $list->{$vmid};
779
780 eval { $d->{pid} = find_lxc_pid($vmid) if defined($active_hash->{$vmid}); };
781 warn $@ if $@; # ignore errors (consider them stopped)
782
783 $d->{status} = $d->{pid} ? 'running' : 'stopped';
784
785 my $cfspath = cfs_config_path($vmid);
786 my $conf = PVE::Cluster::cfs_read_file($cfspath) || {};
787
788 $d->{name} = $conf->{'hostname'} || "CT$vmid";
789 $d->{name} =~ s/[\s]//g;
790
791 $d->{cpus} = $conf->{cpulimit} || $cpucount;
792
793 if ($d->{pid}) {
794 my $res = get_container_disk_usage($vmid, $d->{pid});
795 $d->{disk} = $res->{used};
796 $d->{maxdisk} = $res->{total};
797 } else {
798 $d->{disk} = 0;
799 # use 4GB by default ??
800 if (my $rootfs = $conf->{rootfs}) {
801 my $rootinfo = parse_ct_rootfs($rootfs);
802 $d->{maxdisk} = int(($rootinfo->{size} || 4)*1024*1024)*1024;
803 } else {
804 $d->{maxdisk} = 4*1024*1024*1024;
805 }
806 }
807
808 $d->{mem} = 0;
809 $d->{swap} = 0;
810 $d->{maxmem} = ($conf->{memory}||512)*1024*1024;
811 $d->{maxswap} = ($conf->{swap}//0)*1024*1024;
812
813 $d->{uptime} = 0;
814 $d->{cpu} = 0;
815
816 $d->{netout} = 0;
817 $d->{netin} = 0;
818
819 $d->{diskread} = 0;
820 $d->{diskwrite} = 0;
821
822 $d->{template} = is_template($conf);
823 }
824
825 foreach my $vmid (keys %$list) {
826 my $d = $list->{$vmid};
827 my $pid = $d->{pid};
828
829 next if !$pid; # skip stopped CTs
830
831 my $ctime = (stat("/proc/$pid"))[10]; # 10 = ctime
832 $d->{uptime} = time - $ctime; # the method lxcfs uses
833
834 $d->{mem} = read_cgroup_value('memory', $vmid, 'memory.usage_in_bytes');
835 $d->{swap} = read_cgroup_value('memory', $vmid, 'memory.memsw.usage_in_bytes') - $d->{mem};
836
837 my $blkio_bytes = read_cgroup_value('blkio', $vmid, 'blkio.throttle.io_service_bytes', 1);
838 my @bytes = split(/\n/, $blkio_bytes);
839 foreach my $byte (@bytes) {
840 if (my ($key, $value) = $byte =~ /(Read|Write)\s+(\d+)/) {
841 $d->{diskread} = $2 if $key eq 'Read';
842 $d->{diskwrite} = $2 if $key eq 'Write';
843 }
844 }
845
846 my $pstat = &$parse_cpuacct_stat($vmid);
847
848 my $used = $pstat->{utime} + $pstat->{stime};
849
850 my $old = $last_proc_vmid_stat->{$vmid};
851 if (!$old) {
852 $last_proc_vmid_stat->{$vmid} = {
853 time => $cdtime,
854 used => $used,
855 cpu => 0,
856 };
857 next;
858 }
859
860 my $dtime = ($cdtime - $old->{time}) * $cpucount * $cpuinfo->{user_hz};
861
862 if ($dtime > 1000) {
863 my $dutime = $used - $old->{used};
864
865 $d->{cpu} = (($dutime/$dtime)* $cpucount) / $d->{cpus};
866 $last_proc_vmid_stat->{$vmid} = {
867 time => $cdtime,
868 used => $used,
869 cpu => $d->{cpu},
870 };
871 } else {
872 $d->{cpu} = $old->{cpu};
873 }
874 }
875
876 my $netdev = PVE::ProcFSTools::read_proc_net_dev();
877
878 foreach my $dev (keys %$netdev) {
879 next if $dev !~ m/^veth([1-9]\d*)i/;
880 my $vmid = $1;
881 my $d = $list->{$vmid};
882
883 next if !$d;
884
885 $d->{netout} += $netdev->{$dev}->{receive};
886 $d->{netin} += $netdev->{$dev}->{transmit};
887
888 }
889
890 return $list;
891 }
892
893 sub classify_mountpoint {
894 my ($vol) = @_;
895 if ($vol =~ m!^/!) {
896 return 'device' if $vol =~ m!^/dev/!;
897 return 'bind';
898 }
899 return 'volume';
900 }
901
902 my $parse_ct_mountpoint_full = sub {
903 my ($desc, $data, $noerr) = @_;
904
905 $data //= '';
906
907 my $res;
908 eval { $res = PVE::JSONSchema::parse_property_string($desc, $data) };
909 if ($@) {
910 return undef if $noerr;
911 die $@;
912 }
913
914 if (defined(my $size = $res->{size})) {
915 $size = PVE::JSONSchema::parse_size($size);
916 if (!defined($size)) {
917 return undef if $noerr;
918 die "invalid size: $size\n";
919 }
920 $res->{size} = $size;
921 }
922
923 $res->{type} = classify_mountpoint($res->{volume});
924
925 return $res;
926 };
927
928 sub parse_ct_rootfs {
929 my ($data, $noerr) = @_;
930
931 my $res = &$parse_ct_mountpoint_full($rootfs_desc, $data, $noerr);
932
933 $res->{mp} = '/' if defined($res);
934
935 return $res;
936 }
937
938 sub parse_ct_mountpoint {
939 my ($data, $noerr) = @_;
940
941 return &$parse_ct_mountpoint_full($mp_desc, $data, $noerr);
942 }
943
944 sub print_ct_mountpoint {
945 my ($info, $nomp) = @_;
946 my $skip = [ 'type' ];
947 push @$skip, 'mp' if $nomp;
948 return PVE::JSONSchema::print_property_string($info, $mp_desc, $skip);
949 }
950
951 sub print_lxc_network {
952 my $net = shift;
953 return PVE::JSONSchema::print_property_string($net, $netconf_desc);
954 }
955
956 sub parse_lxc_network {
957 my ($data) = @_;
958
959 my $res = {};
960
961 return $res if !$data;
962
963 $res = PVE::JSONSchema::parse_property_string($netconf_desc, $data);
964
965 $res->{type} = 'veth';
966 $res->{hwaddr} = PVE::Tools::random_ether_addr() if !$res->{hwaddr};
967
968 return $res;
969 }
970
971 sub read_cgroup_value {
972 my ($group, $vmid, $name, $full) = @_;
973
974 my $path = "/sys/fs/cgroup/$group/lxc/$vmid/$name";
975
976 return PVE::Tools::file_get_contents($path) if $full;
977
978 return PVE::Tools::file_read_firstline($path);
979 }
980
981 sub write_cgroup_value {
982 my ($group, $vmid, $name, $value) = @_;
983
984 my $path = "/sys/fs/cgroup/$group/lxc/$vmid/$name";
985 PVE::ProcFSTools::write_proc_entry($path, $value) if -e $path;
986
987 }
988
989 sub find_lxc_console_pids {
990
991 my $res = {};
992
993 PVE::Tools::dir_glob_foreach('/proc', '\d+', sub {
994 my ($pid) = @_;
995
996 my $cmdline = PVE::Tools::file_read_firstline("/proc/$pid/cmdline");
997 return if !$cmdline;
998
999 my @args = split(/\0/, $cmdline);
1000
1001 # search for lxc-console -n <vmid>
1002 return if scalar(@args) != 3;
1003 return if $args[1] ne '-n';
1004 return if $args[2] !~ m/^\d+$/;
1005 return if $args[0] !~ m|^(/usr/bin/)?lxc-console$|;
1006
1007 my $vmid = $args[2];
1008
1009 push @{$res->{$vmid}}, $pid;
1010 });
1011
1012 return $res;
1013 }
1014
1015 sub find_lxc_pid {
1016 my ($vmid) = @_;
1017
1018 my $pid = undef;
1019 my $parser = sub {
1020 my $line = shift;
1021 $pid = $1 if $line =~ m/^PID:\s+(\d+)$/;
1022 };
1023 PVE::Tools::run_command(['lxc-info', '-n', $vmid, '-p'], outfunc => $parser);
1024
1025 die "unable to get PID for CT $vmid (not running?)\n" if !$pid;
1026
1027 return $pid;
1028 }
1029
1030 # Note: we cannot use Net:IP, because that only allows strict
1031 # CIDR networks
1032 sub parse_ipv4_cidr {
1033 my ($cidr, $noerr) = @_;
1034
1035 if ($cidr =~ m!^($IPV4RE)(?:/(\d+))$! && ($2 > 7) && ($2 <= 32)) {
1036 return { address => $1, netmask => $PVE::Network::ipv4_reverse_mask->[$2] };
1037 }
1038
1039 return undef if $noerr;
1040
1041 die "unable to parse ipv4 address/mask\n";
1042 }
1043
1044 sub check_lock {
1045 my ($conf) = @_;
1046
1047 die "VM is locked ($conf->{'lock'})\n" if $conf->{'lock'};
1048 }
1049
1050 sub check_protection {
1051 my ($vm_conf, $err_msg) = @_;
1052
1053 if ($vm_conf->{protection}) {
1054 die "$err_msg - protection mode enabled\n";
1055 }
1056 }
1057
1058 sub update_lxc_config {
1059 my ($storage_cfg, $vmid, $conf) = @_;
1060
1061 my $dir = "/var/lib/lxc/$vmid";
1062
1063 if ($conf->{template}) {
1064
1065 unlink "$dir/config";
1066
1067 return;
1068 }
1069
1070 my $raw = '';
1071
1072 die "missing 'arch' - internal error" if !$conf->{arch};
1073 $raw .= "lxc.arch = $conf->{arch}\n";
1074
1075 my $unprivileged = $conf->{unprivileged};
1076 my $custom_idmap = grep { $_->[0] eq 'lxc.id_map' } @{$conf->{lxc}};
1077
1078 my $ostype = $conf->{ostype} || die "missing 'ostype' - internal error";
1079 if ($ostype =~ /^(?:debian | ubuntu | centos | fedora | opensuse | archlinux)$/x) {
1080 $raw .= "lxc.include = /usr/share/lxc/config/$ostype.common.conf\n";
1081 if ($unprivileged || $custom_idmap) {
1082 $raw .= "lxc.include = /usr/share/lxc/config/$ostype.userns.conf\n"
1083 }
1084 } else {
1085 die "implement me (ostype $ostype)";
1086 }
1087
1088 # WARNING: DO NOT REMOVE this without making sure that loop device nodes
1089 # cannot be exposed to the container with r/w access (cgroup perms).
1090 # When this is enabled mounts will still remain in the monitor's namespace
1091 # after the container unmounted them and thus will not detach from their
1092 # files while the container is running!
1093 $raw .= "lxc.monitor.unshare = 1\n";
1094
1095 # Should we read them from /etc/subuid?
1096 if ($unprivileged && !$custom_idmap) {
1097 $raw .= "lxc.id_map = u 0 100000 65536\n";
1098 $raw .= "lxc.id_map = g 0 100000 65536\n";
1099 }
1100
1101 if (!has_dev_console($conf)) {
1102 $raw .= "lxc.console = none\n";
1103 $raw .= "lxc.cgroup.devices.deny = c 5:1 rwm\n";
1104 }
1105
1106 my $ttycount = get_tty_count($conf);
1107 $raw .= "lxc.tty = $ttycount\n";
1108
1109 # some init scripts expect a linux terminal (turnkey).
1110 $raw .= "lxc.environment = TERM=linux\n";
1111
1112 my $utsname = $conf->{hostname} || "CT$vmid";
1113 $raw .= "lxc.utsname = $utsname\n";
1114
1115 my $memory = $conf->{memory} || 512;
1116 my $swap = $conf->{swap} // 0;
1117
1118 my $lxcmem = int($memory*1024*1024);
1119 $raw .= "lxc.cgroup.memory.limit_in_bytes = $lxcmem\n";
1120
1121 my $lxcswap = int(($memory + $swap)*1024*1024);
1122 $raw .= "lxc.cgroup.memory.memsw.limit_in_bytes = $lxcswap\n";
1123
1124 if (my $cpulimit = $conf->{cpulimit}) {
1125 $raw .= "lxc.cgroup.cpu.cfs_period_us = 100000\n";
1126 my $value = int(100000*$cpulimit);
1127 $raw .= "lxc.cgroup.cpu.cfs_quota_us = $value\n";
1128 }
1129
1130 my $shares = $conf->{cpuunits} || 1024;
1131 $raw .= "lxc.cgroup.cpu.shares = $shares\n";
1132
1133 my $mountpoint = parse_ct_rootfs($conf->{rootfs});
1134
1135 $raw .= "lxc.rootfs = $dir/rootfs\n";
1136
1137 my $netcount = 0;
1138 foreach my $k (keys %$conf) {
1139 next if $k !~ m/^net(\d+)$/;
1140 my $ind = $1;
1141 my $d = parse_lxc_network($conf->{$k});
1142 $netcount++;
1143 $raw .= "lxc.network.type = veth\n";
1144 $raw .= "lxc.network.veth.pair = veth${vmid}i${ind}\n";
1145 $raw .= "lxc.network.hwaddr = $d->{hwaddr}\n" if defined($d->{hwaddr});
1146 $raw .= "lxc.network.name = $d->{name}\n" if defined($d->{name});
1147 $raw .= "lxc.network.mtu = $d->{mtu}\n" if defined($d->{mtu});
1148 }
1149
1150 if (my $lxcconf = $conf->{lxc}) {
1151 foreach my $entry (@$lxcconf) {
1152 my ($k, $v) = @$entry;
1153 $netcount++ if $k eq 'lxc.network.type';
1154 $raw .= "$k = $v\n";
1155 }
1156 }
1157
1158 $raw .= "lxc.network.type = empty\n" if !$netcount;
1159
1160 File::Path::mkpath("$dir/rootfs");
1161
1162 PVE::Tools::file_set_contents("$dir/config", $raw);
1163 }
1164
1165 # verify and cleanup nameserver list (replace \0 with ' ')
1166 sub verify_nameserver_list {
1167 my ($nameserver_list) = @_;
1168
1169 my @list = ();
1170 foreach my $server (PVE::Tools::split_list($nameserver_list)) {
1171 PVE::JSONSchema::pve_verify_ip($server);
1172 push @list, $server;
1173 }
1174
1175 return join(' ', @list);
1176 }
1177
1178 sub verify_searchdomain_list {
1179 my ($searchdomain_list) = @_;
1180
1181 my @list = ();
1182 foreach my $server (PVE::Tools::split_list($searchdomain_list)) {
1183 # todo: should we add checks for valid dns domains?
1184 push @list, $server;
1185 }
1186
1187 return join(' ', @list);
1188 }
1189
1190 sub add_unused_volume {
1191 my ($config, $volid) = @_;
1192
1193 my $key;
1194 for (my $ind = $MAX_UNUSED_DISKS - 1; $ind >= 0; $ind--) {
1195 my $test = "unused$ind";
1196 if (my $vid = $config->{$test}) {
1197 return if $vid eq $volid; # do not add duplicates
1198 } else {
1199 $key = $test;
1200 }
1201 }
1202
1203 die "Too many unused volumes - please delete them first.\n" if !$key;
1204
1205 $config->{$key} = $volid;
1206
1207 return $key;
1208 }
1209
1210 sub update_pct_config {
1211 my ($vmid, $conf, $running, $param, $delete) = @_;
1212
1213 my @nohotplug;
1214
1215 my $new_disks = 0;
1216 my @deleted_volumes;
1217
1218 my $rootdir;
1219 if ($running) {
1220 my $pid = find_lxc_pid($vmid);
1221 $rootdir = "/proc/$pid/root";
1222 }
1223
1224 my $hotplug_error = sub {
1225 if ($running) {
1226 push @nohotplug, @_;
1227 return 1;
1228 } else {
1229 return 0;
1230 }
1231 };
1232
1233 if (defined($delete)) {
1234 foreach my $opt (@$delete) {
1235 if (!exists($conf->{$opt})) {
1236 warn "no such option: $opt\n";
1237 next;
1238 }
1239
1240 if ($opt eq 'hostname' || $opt eq 'memory' || $opt eq 'rootfs') {
1241 die "unable to delete required option '$opt'\n";
1242 } elsif ($opt eq 'swap') {
1243 delete $conf->{$opt};
1244 write_cgroup_value("memory", $vmid, "memory.memsw.limit_in_bytes", -1);
1245 } elsif ($opt eq 'description' || $opt eq 'onboot' || $opt eq 'startup') {
1246 delete $conf->{$opt};
1247 } elsif ($opt eq 'nameserver' || $opt eq 'searchdomain' ||
1248 $opt eq 'tty' || $opt eq 'console' || $opt eq 'cmode') {
1249 next if $hotplug_error->($opt);
1250 delete $conf->{$opt};
1251 } elsif ($opt =~ m/^net(\d)$/) {
1252 delete $conf->{$opt};
1253 next if !$running;
1254 my $netid = $1;
1255 PVE::Network::veth_delete("veth${vmid}i$netid");
1256 } elsif ($opt eq 'protection') {
1257 delete $conf->{$opt};
1258 } elsif ($opt =~ m/^unused(\d+)$/) {
1259 next if $hotplug_error->($opt);
1260 check_protection($conf, "can't remove CT $vmid drive '$opt'");
1261 push @deleted_volumes, $conf->{$opt};
1262 delete $conf->{$opt};
1263 } elsif ($opt =~ m/^mp(\d+)$/) {
1264 next if $hotplug_error->($opt);
1265 check_protection($conf, "can't remove CT $vmid drive '$opt'");
1266 my $mountpoint = parse_ct_mountpoint($conf->{$opt});
1267 if ($mountpoint->{type} eq 'volume') {
1268 add_unused_volume($conf, $mountpoint->{volume})
1269 }
1270 delete $conf->{$opt};
1271 } elsif ($opt eq 'unprivileged') {
1272 die "unable to delete read-only option: '$opt'\n";
1273 } else {
1274 die "implement me (delete: $opt)"
1275 }
1276 write_config($vmid, $conf) if $running;
1277 }
1278 }
1279
1280 # There's no separate swap size to configure, there's memory and "total"
1281 # memory (iow. memory+swap). This means we have to change them together.
1282 my $wanted_memory = PVE::Tools::extract_param($param, 'memory');
1283 my $wanted_swap = PVE::Tools::extract_param($param, 'swap');
1284 if (defined($wanted_memory) || defined($wanted_swap)) {
1285
1286 my $old_memory = ($conf->{memory} || 512);
1287 my $old_swap = ($conf->{swap} || 0);
1288
1289 $wanted_memory //= $old_memory;
1290 $wanted_swap //= $old_swap;
1291
1292 my $total = $wanted_memory + $wanted_swap;
1293 if ($running) {
1294 my $old_total = $old_memory + $old_swap;
1295 if ($total > $old_total) {
1296 write_cgroup_value("memory", $vmid, "memory.memsw.limit_in_bytes", int($total*1024*1024));
1297 write_cgroup_value("memory", $vmid, "memory.limit_in_bytes", int($wanted_memory*1024*1024));
1298 } else {
1299 write_cgroup_value("memory", $vmid, "memory.limit_in_bytes", int($wanted_memory*1024*1024));
1300 write_cgroup_value("memory", $vmid, "memory.memsw.limit_in_bytes", int($total*1024*1024));
1301 }
1302 }
1303 $conf->{memory} = $wanted_memory;
1304 $conf->{swap} = $wanted_swap;
1305
1306 write_config($vmid, $conf) if $running;
1307 }
1308
1309 foreach my $opt (keys %$param) {
1310 my $value = $param->{$opt};
1311 if ($opt eq 'hostname') {
1312 $conf->{$opt} = $value;
1313 } elsif ($opt eq 'onboot') {
1314 $conf->{$opt} = $value ? 1 : 0;
1315 } elsif ($opt eq 'startup') {
1316 $conf->{$opt} = $value;
1317 } elsif ($opt eq 'tty' || $opt eq 'console' || $opt eq 'cmode') {
1318 next if $hotplug_error->($opt);
1319 $conf->{$opt} = $value;
1320 } elsif ($opt eq 'nameserver') {
1321 next if $hotplug_error->($opt);
1322 my $list = verify_nameserver_list($value);
1323 $conf->{$opt} = $list;
1324 } elsif ($opt eq 'searchdomain') {
1325 next if $hotplug_error->($opt);
1326 my $list = verify_searchdomain_list($value);
1327 $conf->{$opt} = $list;
1328 } elsif ($opt eq 'cpulimit') {
1329 next if $hotplug_error->($opt); # FIXME: hotplug
1330 $conf->{$opt} = $value;
1331 } elsif ($opt eq 'cpuunits') {
1332 $conf->{$opt} = $value;
1333 write_cgroup_value("cpu", $vmid, "cpu.shares", $value);
1334 } elsif ($opt eq 'description') {
1335 $conf->{$opt} = PVE::Tools::encode_text($value);
1336 } elsif ($opt =~ m/^net(\d+)$/) {
1337 my $netid = $1;
1338 my $net = parse_lxc_network($value);
1339 if (!$running) {
1340 $conf->{$opt} = print_lxc_network($net);
1341 } else {
1342 update_net($vmid, $conf, $opt, $net, $netid, $rootdir);
1343 }
1344 } elsif ($opt eq 'protection') {
1345 $conf->{$opt} = $value ? 1 : 0;
1346 } elsif ($opt =~ m/^mp(\d+)$/) {
1347 next if $hotplug_error->($opt);
1348 check_protection($conf, "can't update CT $vmid drive '$opt'");
1349 $conf->{$opt} = $value;
1350 $new_disks = 1;
1351 } elsif ($opt eq 'rootfs') {
1352 next if $hotplug_error->($opt);
1353 check_protection($conf, "can't update CT $vmid drive '$opt'");
1354 $conf->{$opt} = $value;
1355 } elsif ($opt eq 'unprivileged') {
1356 die "unable to modify read-only option: '$opt'\n";
1357 } else {
1358 die "implement me: $opt";
1359 }
1360 write_config($vmid, $conf) if $running;
1361 }
1362
1363 if (@deleted_volumes) {
1364 my $storage_cfg = PVE::Storage::config();
1365 foreach my $volume (@deleted_volumes) {
1366 delete_mountpoint_volume($storage_cfg, $vmid, $volume);
1367 }
1368 }
1369
1370 if ($new_disks) {
1371 my $storage_cfg = PVE::Storage::config();
1372 create_disks($storage_cfg, $vmid, $conf, $conf);
1373 }
1374
1375 # This should be the last thing we do here
1376 if ($running && scalar(@nohotplug)) {
1377 die "unable to modify " . join(',', @nohotplug) . " while container is running\n";
1378 }
1379 }
1380
1381 sub has_dev_console {
1382 my ($conf) = @_;
1383
1384 return !(defined($conf->{console}) && !$conf->{console});
1385 }
1386
1387 sub get_tty_count {
1388 my ($conf) = @_;
1389
1390 return $conf->{tty} // $confdesc->{tty}->{default};
1391 }
1392
1393 sub get_cmode {
1394 my ($conf) = @_;
1395
1396 return $conf->{cmode} // $confdesc->{cmode}->{default};
1397 }
1398
1399 sub get_console_command {
1400 my ($vmid, $conf) = @_;
1401
1402 my $cmode = get_cmode($conf);
1403
1404 if ($cmode eq 'console') {
1405 return ['lxc-console', '-n', $vmid, '-t', 0];
1406 } elsif ($cmode eq 'tty') {
1407 return ['lxc-console', '-n', $vmid];
1408 } elsif ($cmode eq 'shell') {
1409 return ['lxc-attach', '--clear-env', '-n', $vmid];
1410 } else {
1411 die "internal error";
1412 }
1413 }
1414
1415 sub get_primary_ips {
1416 my ($conf) = @_;
1417
1418 # return data from net0
1419
1420 return undef if !defined($conf->{net0});
1421 my $net = parse_lxc_network($conf->{net0});
1422
1423 my $ipv4 = $net->{ip};
1424 if ($ipv4) {
1425 if ($ipv4 =~ /^(dhcp|manual)$/) {
1426 $ipv4 = undef
1427 } else {
1428 $ipv4 =~ s!/\d+$!!;
1429 }
1430 }
1431 my $ipv6 = $net->{ip6};
1432 if ($ipv6) {
1433 if ($ipv6 =~ /^(auto|dhcp|manual)$/) {
1434 $ipv6 = undef;
1435 } else {
1436 $ipv6 =~ s!/\d+$!!;
1437 }
1438 }
1439
1440 return ($ipv4, $ipv6);
1441 }
1442
1443 sub delete_mountpoint_volume {
1444 my ($storage_cfg, $vmid, $volume) = @_;
1445
1446 return if classify_mountpoint($volume) ne 'volume';
1447
1448 my ($vtype, $name, $owner) = PVE::Storage::parse_volname($storage_cfg, $volume);
1449 PVE::Storage::vdisk_free($storage_cfg, $volume) if $vmid == $owner;
1450 }
1451
1452 sub destroy_lxc_container {
1453 my ($storage_cfg, $vmid, $conf) = @_;
1454
1455 foreach_mountpoint($conf, sub {
1456 my ($ms, $mountpoint) = @_;
1457 delete_mountpoint_volume($storage_cfg, $vmid, $mountpoint->{volume});
1458 });
1459
1460 rmdir "/var/lib/lxc/$vmid/rootfs";
1461 unlink "/var/lib/lxc/$vmid/config";
1462 rmdir "/var/lib/lxc/$vmid";
1463 destroy_config($vmid);
1464
1465 #my $cmd = ['lxc-destroy', '-n', $vmid ];
1466 #PVE::Tools::run_command($cmd);
1467 }
1468
1469 sub vm_stop_cleanup {
1470 my ($storage_cfg, $vmid, $conf, $keepActive) = @_;
1471
1472 eval {
1473 if (!$keepActive) {
1474
1475 my $vollist = get_vm_volumes($conf);
1476 PVE::Storage::deactivate_volumes($storage_cfg, $vollist);
1477 }
1478 };
1479 warn $@ if $@; # avoid errors - just warn
1480 }
1481
1482 my $safe_num_ne = sub {
1483 my ($a, $b) = @_;
1484
1485 return 0 if !defined($a) && !defined($b);
1486 return 1 if !defined($a);
1487 return 1 if !defined($b);
1488
1489 return $a != $b;
1490 };
1491
1492 my $safe_string_ne = sub {
1493 my ($a, $b) = @_;
1494
1495 return 0 if !defined($a) && !defined($b);
1496 return 1 if !defined($a);
1497 return 1 if !defined($b);
1498
1499 return $a ne $b;
1500 };
1501
1502 sub update_net {
1503 my ($vmid, $conf, $opt, $newnet, $netid, $rootdir) = @_;
1504
1505 if ($newnet->{type} ne 'veth') {
1506 # for when there are physical interfaces
1507 die "cannot update interface of type $newnet->{type}";
1508 }
1509
1510 my $veth = "veth${vmid}i${netid}";
1511 my $eth = $newnet->{name};
1512
1513 if (my $oldnetcfg = $conf->{$opt}) {
1514 my $oldnet = parse_lxc_network($oldnetcfg);
1515
1516 if (&$safe_string_ne($oldnet->{hwaddr}, $newnet->{hwaddr}) ||
1517 &$safe_string_ne($oldnet->{name}, $newnet->{name})) {
1518
1519 PVE::Network::veth_delete($veth);
1520 delete $conf->{$opt};
1521 write_config($vmid, $conf);
1522
1523 hotplug_net($vmid, $conf, $opt, $newnet, $netid);
1524
1525 } elsif (&$safe_string_ne($oldnet->{bridge}, $newnet->{bridge}) ||
1526 &$safe_num_ne($oldnet->{tag}, $newnet->{tag}) ||
1527 &$safe_num_ne($oldnet->{firewall}, $newnet->{firewall})) {
1528
1529 if ($oldnet->{bridge}) {
1530 PVE::Network::tap_unplug($veth);
1531 foreach (qw(bridge tag firewall)) {
1532 delete $oldnet->{$_};
1533 }
1534 $conf->{$opt} = print_lxc_network($oldnet);
1535 write_config($vmid, $conf);
1536 }
1537
1538 PVE::Network::tap_plug($veth, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks});
1539 foreach (qw(bridge tag firewall)) {
1540 $oldnet->{$_} = $newnet->{$_} if $newnet->{$_};
1541 }
1542 $conf->{$opt} = print_lxc_network($oldnet);
1543 write_config($vmid, $conf);
1544 }
1545 } else {
1546 hotplug_net($vmid, $conf, $opt, $newnet, $netid);
1547 }
1548
1549 update_ipconfig($vmid, $conf, $opt, $eth, $newnet, $rootdir);
1550 }
1551
1552 sub hotplug_net {
1553 my ($vmid, $conf, $opt, $newnet, $netid) = @_;
1554
1555 my $veth = "veth${vmid}i${netid}";
1556 my $vethpeer = $veth . "p";
1557 my $eth = $newnet->{name};
1558
1559 PVE::Network::veth_create($veth, $vethpeer, $newnet->{bridge}, $newnet->{hwaddr});
1560 PVE::Network::tap_plug($veth, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks});
1561
1562 # attach peer in container
1563 my $cmd = ['lxc-device', '-n', $vmid, 'add', $vethpeer, "$eth" ];
1564 PVE::Tools::run_command($cmd);
1565
1566 # link up peer in container
1567 $cmd = ['lxc-attach', '-n', $vmid, '-s', 'NETWORK', '--', '/sbin/ip', 'link', 'set', $eth ,'up' ];
1568 PVE::Tools::run_command($cmd);
1569
1570 my $done = { type => 'veth' };
1571 foreach (qw(bridge tag firewall hwaddr name)) {
1572 $done->{$_} = $newnet->{$_} if $newnet->{$_};
1573 }
1574 $conf->{$opt} = print_lxc_network($done);
1575
1576 write_config($vmid, $conf);
1577 }
1578
1579 sub update_ipconfig {
1580 my ($vmid, $conf, $opt, $eth, $newnet, $rootdir) = @_;
1581
1582 my $lxc_setup = PVE::LXC::Setup->new($conf, $rootdir);
1583
1584 my $optdata = parse_lxc_network($conf->{$opt});
1585 my $deleted = [];
1586 my $added = [];
1587 my $nscmd = sub {
1588 my $cmdargs = shift;
1589 PVE::Tools::run_command(['lxc-attach', '-n', $vmid, '-s', 'NETWORK', '--', @_], %$cmdargs);
1590 };
1591 my $ipcmd = sub { &$nscmd({}, '/sbin/ip', @_) };
1592
1593 my $change_ip_config = sub {
1594 my ($ipversion) = @_;
1595
1596 my $family_opt = "-$ipversion";
1597 my $suffix = $ipversion == 4 ? '' : $ipversion;
1598 my $gw= "gw$suffix";
1599 my $ip= "ip$suffix";
1600
1601 my $newip = $newnet->{$ip};
1602 my $newgw = $newnet->{$gw};
1603 my $oldip = $optdata->{$ip};
1604
1605 my $change_ip = &$safe_string_ne($oldip, $newip);
1606 my $change_gw = &$safe_string_ne($optdata->{$gw}, $newgw);
1607
1608 return if !$change_ip && !$change_gw;
1609
1610 # step 1: add new IP, if this fails we cancel
1611 my $is_real_ip = ($newip && $newip !~ /^(?:auto|dhcp|manual)$/);
1612 if ($change_ip && $is_real_ip) {
1613 eval { &$ipcmd($family_opt, 'addr', 'add', $newip, 'dev', $eth); };
1614 if (my $err = $@) {
1615 warn $err;
1616 return;
1617 }
1618 }
1619
1620 # step 2: replace gateway
1621 # If this fails we delete the added IP and cancel.
1622 # If it succeeds we save the config and delete the old IP, ignoring
1623 # errors. The config is then saved.
1624 # Note: 'ip route replace' can add
1625 if ($change_gw) {
1626 if ($newgw) {
1627 eval {
1628 if ($is_real_ip && !PVE::Network::is_ip_in_cidr($newgw, $newip, $ipversion)) {
1629 &$ipcmd($family_opt, 'route', 'add', $newgw, 'dev', $eth);
1630 }
1631 &$ipcmd($family_opt, 'route', 'replace', 'default', 'via', $newgw);
1632 };
1633 if (my $err = $@) {
1634 warn $err;
1635 # the route was not replaced, the old IP is still available
1636 # rollback (delete new IP) and cancel
1637 if ($change_ip) {
1638 eval { &$ipcmd($family_opt, 'addr', 'del', $newip, 'dev', $eth); };
1639 warn $@ if $@; # no need to die here
1640 }
1641 return;
1642 }
1643 } else {
1644 eval { &$ipcmd($family_opt, 'route', 'del', 'default'); };
1645 # if the route was not deleted, the guest might have deleted it manually
1646 # warn and continue
1647 warn $@ if $@;
1648 }
1649 }
1650
1651 # from this point on we save the configuration
1652 # step 3: delete old IP ignoring errors
1653 if ($change_ip && $oldip && $oldip !~ /^(?:auto|dhcp)$/) {
1654 # We need to enable promote_secondaries, otherwise our newly added
1655 # address will be removed along with the old one.
1656 my $promote = 0;
1657 eval {
1658 if ($ipversion == 4) {
1659 &$nscmd({ outfunc => sub { $promote = int(shift) } },
1660 'cat', "/proc/sys/net/ipv4/conf/$eth/promote_secondaries");
1661 &$nscmd({}, 'sysctl', "net.ipv4.conf.$eth.promote_secondaries=1");
1662 }
1663 &$ipcmd($family_opt, 'addr', 'del', $oldip, 'dev', $eth);
1664 };
1665 warn $@ if $@; # no need to die here
1666
1667 if ($ipversion == 4) {
1668 &$nscmd({}, 'sysctl', "net.ipv4.conf.$eth.promote_secondaries=$promote");
1669 }
1670 }
1671
1672 foreach my $property ($ip, $gw) {
1673 if ($newnet->{$property}) {
1674 $optdata->{$property} = $newnet->{$property};
1675 } else {
1676 delete $optdata->{$property};
1677 }
1678 }
1679 $conf->{$opt} = print_lxc_network($optdata);
1680 write_config($vmid, $conf);
1681 $lxc_setup->setup_network($conf);
1682 };
1683
1684 &$change_ip_config(4);
1685 &$change_ip_config(6);
1686
1687 }
1688
1689 # Internal snapshots
1690
1691 # NOTE: Snapshot create/delete involves several non-atomic
1692 # actions, and can take a long time.
1693 # So we try to avoid locking the file and use the 'lock' variable
1694 # inside the config file instead.
1695
1696 my $snapshot_copy_config = sub {
1697 my ($source, $dest) = @_;
1698
1699 foreach my $k (keys %$source) {
1700 next if $k eq 'snapshots';
1701 next if $k eq 'snapstate';
1702 next if $k eq 'snaptime';
1703 next if $k eq 'vmstate';
1704 next if $k eq 'lock';
1705 next if $k eq 'digest';
1706 next if $k eq 'description';
1707
1708 $dest->{$k} = $source->{$k};
1709 }
1710 };
1711
1712 my $snapshot_prepare = sub {
1713 my ($vmid, $snapname, $comment) = @_;
1714
1715 my $snap;
1716
1717 my $updatefn = sub {
1718
1719 my $conf = load_config($vmid);
1720
1721 die "you can't take a snapshot if it's a template\n"
1722 if is_template($conf);
1723
1724 check_lock($conf);
1725
1726 $conf->{lock} = 'snapshot';
1727
1728 die "snapshot name '$snapname' already used\n"
1729 if defined($conf->{snapshots}->{$snapname});
1730
1731 my $storecfg = PVE::Storage::config();
1732 my $feature = $snapname eq 'vzdump' ? 'vzdump' : 'snapshot';
1733 die "snapshot feature is not available\n" if !has_feature($feature, $conf, $storecfg);
1734
1735 $snap = $conf->{snapshots}->{$snapname} = {};
1736
1737 &$snapshot_copy_config($conf, $snap);
1738
1739 $snap->{'snapstate'} = "prepare";
1740 $snap->{'snaptime'} = time();
1741 $snap->{'description'} = $comment if $comment;
1742 $conf->{snapshots}->{$snapname} = $snap;
1743
1744 write_config($vmid, $conf);
1745 };
1746
1747 lock_container($vmid, 10, $updatefn);
1748
1749 return $snap;
1750 };
1751
1752 my $snapshot_commit = sub {
1753 my ($vmid, $snapname) = @_;
1754
1755 my $updatefn = sub {
1756
1757 my $conf = load_config($vmid);
1758
1759 die "missing snapshot lock\n"
1760 if !($conf->{lock} && $conf->{lock} eq 'snapshot');
1761
1762 die "snapshot '$snapname' does not exist\n"
1763 if !defined($conf->{snapshots}->{$snapname});
1764
1765 die "wrong snapshot state\n"
1766 if !($conf->{snapshots}->{$snapname}->{'snapstate'} &&
1767 $conf->{snapshots}->{$snapname}->{'snapstate'} eq "prepare");
1768
1769 delete $conf->{snapshots}->{$snapname}->{'snapstate'};
1770 delete $conf->{lock};
1771 $conf->{parent} = $snapname;
1772
1773 write_config($vmid, $conf);
1774 };
1775
1776 lock_container($vmid, 10 ,$updatefn);
1777 };
1778
1779 sub has_feature {
1780 my ($feature, $conf, $storecfg, $snapname) = @_;
1781
1782 my $err;
1783 my $vzdump = $feature eq 'vzdump';
1784 $feature = 'snapshot' if $vzdump;
1785
1786 foreach_mountpoint($conf, sub {
1787 my ($ms, $mountpoint) = @_;
1788
1789 return if $err; # skip further test
1790 return if $vzdump && $ms ne 'rootfs' && !$mountpoint->{backup};
1791
1792 $err = 1 if !PVE::Storage::volume_has_feature($storecfg, $feature, $mountpoint->{volume}, $snapname);
1793
1794 # TODO: implement support for mountpoints
1795 die "unable to handle mountpoint '$ms' - feature not implemented\n"
1796 if $ms ne 'rootfs';
1797 });
1798
1799 return $err ? 0 : 1;
1800 }
1801
1802 my $enter_namespace = sub {
1803 my ($vmid, $pid, $which, $type) = @_;
1804 sysopen my $fd, "/proc/$pid/ns/$which", O_RDONLY
1805 or die "failed to open $which namespace of container $vmid: $!\n";
1806 PVE::Tools::setns(fileno($fd), $type)
1807 or die "failed to enter $which namespace of container $vmid: $!\n";
1808 close $fd;
1809 };
1810
1811 my $do_syncfs = sub {
1812 my ($vmid, $pid, $socket) = @_;
1813
1814 &$enter_namespace($vmid, $pid, 'mnt', PVE::Tools::CLONE_NEWNS);
1815
1816 # Tell the parent process to start reading our /proc/mounts
1817 print {$socket} "go\n";
1818 $socket->flush();
1819
1820 # Receive /proc/self/mounts
1821 my $mountdata = do { local $/ = undef; <$socket> };
1822 close $socket;
1823
1824 # Now sync all mountpoints...
1825 my $mounts = PVE::ProcFSTools::parse_mounts($mountdata);
1826 foreach my $mp (@$mounts) {
1827 my ($what, $dir, $fs) = @$mp;
1828 next if $fs eq 'fuse.lxcfs';
1829 eval { PVE::Tools::sync_mountpoint($dir); };
1830 warn $@ if $@;
1831 }
1832 };
1833
1834 sub sync_container_namespace {
1835 my ($vmid) = @_;
1836 my $pid = find_lxc_pid($vmid);
1837
1838 # SOCK_DGRAM is nicer for barriers but cannot be slurped
1839 socketpair my $pfd, my $cfd, AF_UNIX, SOCK_STREAM, PF_UNSPEC
1840 or die "failed to create socketpair: $!\n";
1841
1842 my $child = fork();
1843 die "fork failed: $!\n" if !defined($child);
1844
1845 if (!$child) {
1846 eval {
1847 close $pfd;
1848 &$do_syncfs($vmid, $pid, $cfd);
1849 };
1850 if (my $err = $@) {
1851 warn $err;
1852 POSIX::_exit(1);
1853 }
1854 POSIX::_exit(0);
1855 }
1856 close $cfd;
1857 my $go = <$pfd>;
1858 die "failed to enter container namespace\n" if $go ne "go\n";
1859
1860 open my $mounts, '<', "/proc/$child/mounts"
1861 or die "failed to open container's /proc/mounts: $!\n";
1862 my $mountdata = do { local $/ = undef; <$mounts> };
1863 close $mounts;
1864 print {$pfd} $mountdata;
1865 close $pfd;
1866
1867 while (waitpid($child, 0) != $child) {}
1868 die "failed to sync container namespace\n" if $? != 0;
1869 }
1870
1871 sub snapshot_create {
1872 my ($vmid, $snapname, $comment) = @_;
1873
1874 my $snap = &$snapshot_prepare($vmid, $snapname, $comment);
1875
1876 my $conf = load_config($vmid);
1877
1878 my $running = check_running($vmid);
1879
1880 my $unfreeze = 0;
1881
1882 my $drivehash = {};
1883
1884 eval {
1885 if ($running) {
1886 $unfreeze = 1;
1887 PVE::Tools::run_command(['/usr/bin/lxc-freeze', '-n', $vmid]);
1888 sync_container_namespace($vmid);
1889 };
1890
1891 my $storecfg = PVE::Storage::config();
1892 my $rootinfo = parse_ct_rootfs($conf->{rootfs});
1893 my $volid = $rootinfo->{volume};
1894
1895 PVE::Storage::volume_snapshot($storecfg, $volid, $snapname);
1896 $drivehash->{rootfs} = 1;
1897 };
1898 my $err = $@;
1899
1900 if ($unfreeze) {
1901 eval { PVE::Tools::run_command(['/usr/bin/lxc-unfreeze', '-n', $vmid]); };
1902 warn $@ if $@;
1903 }
1904
1905 if ($err) {
1906 eval { snapshot_delete($vmid, $snapname, 1, $drivehash); };
1907 warn "$@\n" if $@;
1908 die "$err\n";
1909 }
1910
1911 &$snapshot_commit($vmid, $snapname);
1912 }
1913
1914 # Note: $drivehash is only set when called from snapshot_create.
1915 sub snapshot_delete {
1916 my ($vmid, $snapname, $force, $drivehash) = @_;
1917
1918 my $snap;
1919
1920 my $conf;
1921
1922 my $updatefn = sub {
1923
1924 $conf = load_config($vmid);
1925
1926 die "you can't delete a snapshot if vm is a template\n"
1927 if is_template($conf);
1928
1929 $snap = $conf->{snapshots}->{$snapname};
1930
1931 if (!$drivehash) {
1932 check_lock($conf);
1933 }
1934
1935 die "snapshot '$snapname' does not exist\n" if !defined($snap);
1936
1937 $snap->{snapstate} = 'delete';
1938
1939 write_config($vmid, $conf);
1940 };
1941
1942 lock_container($vmid, 10, $updatefn);
1943
1944 my $storecfg = PVE::Storage::config();
1945
1946 my $unlink_parent = sub {
1947
1948 my ($confref, $new_parent) = @_;
1949
1950 if ($confref->{parent} && $confref->{parent} eq $snapname) {
1951 if ($new_parent) {
1952 $confref->{parent} = $new_parent;
1953 } else {
1954 delete $confref->{parent};
1955 }
1956 }
1957 };
1958
1959 my $del_snap = sub {
1960
1961 $conf = load_config($vmid);
1962
1963 if ($drivehash) {
1964 delete $conf->{lock};
1965 } else {
1966 check_lock($conf);
1967 }
1968
1969 my $parent = $conf->{snapshots}->{$snapname}->{parent};
1970 foreach my $snapkey (keys %{$conf->{snapshots}}) {
1971 &$unlink_parent($conf->{snapshots}->{$snapkey}, $parent);
1972 }
1973
1974 &$unlink_parent($conf, $parent);
1975
1976 delete $conf->{snapshots}->{$snapname};
1977
1978 write_config($vmid, $conf);
1979 };
1980
1981 my $rootfs = $conf->{snapshots}->{$snapname}->{rootfs};
1982 my $rootinfo = parse_ct_rootfs($rootfs);
1983 my $volid = $rootinfo->{volume};
1984
1985 eval {
1986 PVE::Storage::volume_snapshot_delete($storecfg, $volid, $snapname);
1987 };
1988 my $err = $@;
1989
1990 if(!$err || ($err && $force)) {
1991 lock_container($vmid, 10, $del_snap);
1992 if ($err) {
1993 die "Can't delete snapshot: $vmid $snapname $err\n";
1994 }
1995 }
1996 }
1997
1998 sub snapshot_rollback {
1999 my ($vmid, $snapname) = @_;
2000
2001 my $storecfg = PVE::Storage::config();
2002
2003 my $conf = load_config($vmid);
2004
2005 die "you can't rollback if vm is a template\n" if is_template($conf);
2006
2007 my $snap = $conf->{snapshots}->{$snapname};
2008
2009 die "snapshot '$snapname' does not exist\n" if !defined($snap);
2010
2011 my $rootfs = $snap->{rootfs};
2012 my $rootinfo = parse_ct_rootfs($rootfs);
2013 my $volid = $rootinfo->{volume};
2014
2015 PVE::Storage::volume_rollback_is_possible($storecfg, $volid, $snapname);
2016
2017 my $updatefn = sub {
2018
2019 die "unable to rollback to incomplete snapshot (snapstate = $snap->{snapstate})\n"
2020 if $snap->{snapstate};
2021
2022 check_lock($conf);
2023
2024 system("lxc-stop -n $vmid --kill") if check_running($vmid);
2025
2026 die "unable to rollback vm $vmid: vm is running\n"
2027 if check_running($vmid);
2028
2029 $conf->{lock} = 'rollback';
2030
2031 my $forcemachine;
2032
2033 # copy snapshot config to current config
2034
2035 my $tmp_conf = $conf;
2036 &$snapshot_copy_config($tmp_conf->{snapshots}->{$snapname}, $conf);
2037 $conf->{snapshots} = $tmp_conf->{snapshots};
2038 delete $conf->{snaptime};
2039 delete $conf->{snapname};
2040 $conf->{parent} = $snapname;
2041
2042 write_config($vmid, $conf);
2043 };
2044
2045 my $unlockfn = sub {
2046 delete $conf->{lock};
2047 write_config($vmid, $conf);
2048 };
2049
2050 lock_container($vmid, 10, $updatefn);
2051
2052 PVE::Storage::volume_snapshot_rollback($storecfg, $volid, $snapname);
2053
2054 lock_container($vmid, 5, $unlockfn);
2055 }
2056
2057 sub template_create {
2058 my ($vmid, $conf) = @_;
2059
2060 my $storecfg = PVE::Storage::config();
2061
2062 my $rootinfo = parse_ct_rootfs($conf->{rootfs});
2063 my $volid = $rootinfo->{volume};
2064
2065 die "Template feature is not available for '$volid'\n"
2066 if !PVE::Storage::volume_has_feature($storecfg, 'template', $volid);
2067
2068 PVE::Storage::activate_volumes($storecfg, [$volid]);
2069
2070 my $template_volid = PVE::Storage::vdisk_create_base($storecfg, $volid);
2071 $rootinfo->{volume} = $template_volid;
2072 $conf->{rootfs} = print_ct_mountpoint($rootinfo, 1);
2073
2074 write_config($vmid, $conf);
2075 }
2076
2077 sub is_template {
2078 my ($conf) = @_;
2079
2080 return 1 if defined $conf->{template} && $conf->{template} == 1;
2081 }
2082
2083 sub mountpoint_names {
2084 my ($reverse) = @_;
2085
2086 my @names = ('rootfs');
2087
2088 for (my $i = 0; $i < $MAX_MOUNT_POINTS; $i++) {
2089 push @names, "mp$i";
2090 }
2091
2092 return $reverse ? reverse @names : @names;
2093 }
2094
2095
2096 sub foreach_mountpoint_full {
2097 my ($conf, $reverse, $func) = @_;
2098
2099 foreach my $key (mountpoint_names($reverse)) {
2100 my $value = $conf->{$key};
2101 next if !defined($value);
2102 my $mountpoint = $key eq 'rootfs' ? parse_ct_rootfs($value, 1) : parse_ct_mountpoint($value, 1);
2103 next if !defined($mountpoint);
2104
2105 &$func($key, $mountpoint);
2106 }
2107 }
2108
2109 sub foreach_mountpoint {
2110 my ($conf, $func) = @_;
2111
2112 foreach_mountpoint_full($conf, 0, $func);
2113 }
2114
2115 sub foreach_mountpoint_reverse {
2116 my ($conf, $func) = @_;
2117
2118 foreach_mountpoint_full($conf, 1, $func);
2119 }
2120
2121 sub check_ct_modify_config_perm {
2122 my ($rpcenv, $authuser, $vmid, $pool, $key_list) = @_;
2123
2124 return 1 if $authuser ne 'root@pam';
2125
2126 foreach my $opt (@$key_list) {
2127
2128 if ($opt eq 'cpus' || $opt eq 'cpuunits' || $opt eq 'cpulimit') {
2129 $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.CPU']);
2130 } elsif ($opt eq 'rootfs' || $opt =~ /^mp\d+$/) {
2131 $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Disk']);
2132 } elsif ($opt eq 'memory' || $opt eq 'swap') {
2133 $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Memory']);
2134 } elsif ($opt =~ m/^net\d+$/ || $opt eq 'nameserver' ||
2135 $opt eq 'searchdomain' || $opt eq 'hostname') {
2136 $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Network']);
2137 } else {
2138 $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Options']);
2139 }
2140 }
2141
2142 return 1;
2143 }
2144
2145 sub umount_all {
2146 my ($vmid, $storage_cfg, $conf, $noerr) = @_;
2147
2148 my $rootdir = "/var/lib/lxc/$vmid/rootfs";
2149 my $volid_list = get_vm_volumes($conf);
2150
2151 foreach_mountpoint_reverse($conf, sub {
2152 my ($ms, $mountpoint) = @_;
2153
2154 my $volid = $mountpoint->{volume};
2155 my $mount = $mountpoint->{mp};
2156
2157 return if !$volid || !$mount;
2158
2159 my $mount_path = "$rootdir/$mount";
2160 $mount_path =~ s!/+!/!g;
2161
2162 return if !PVE::ProcFSTools::is_mounted($mount_path);
2163
2164 eval {
2165 PVE::Tools::run_command(['umount', '-d', $mount_path]);
2166 };
2167 if (my $err = $@) {
2168 if ($noerr) {
2169 warn $err;
2170 } else {
2171 die $err;
2172 }
2173 }
2174 });
2175 }
2176
2177 sub mount_all {
2178 my ($vmid, $storage_cfg, $conf) = @_;
2179
2180 my $rootdir = "/var/lib/lxc/$vmid/rootfs";
2181 File::Path::make_path($rootdir);
2182
2183 my $volid_list = get_vm_volumes($conf);
2184 PVE::Storage::activate_volumes($storage_cfg, $volid_list);
2185
2186 eval {
2187 foreach_mountpoint($conf, sub {
2188 my ($ms, $mountpoint) = @_;
2189
2190 mountpoint_mount($mountpoint, $rootdir, $storage_cfg);
2191 });
2192 };
2193 if (my $err = $@) {
2194 warn "mounting container failed\n";
2195 umount_all($vmid, $storage_cfg, $conf, 1);
2196 die $err;
2197 }
2198
2199 return $rootdir;
2200 }
2201
2202
2203 sub mountpoint_mount_path {
2204 my ($mountpoint, $storage_cfg, $snapname) = @_;
2205
2206 return mountpoint_mount($mountpoint, undef, $storage_cfg, $snapname);
2207 }
2208
2209 my $check_mount_path = sub {
2210 my ($path) = @_;
2211 $path = File::Spec->canonpath($path);
2212 my $real = Cwd::realpath($path);
2213 if ($real ne $path) {
2214 die "mount path modified by symlink: $path != $real";
2215 }
2216 };
2217
2218 sub query_loopdev {
2219 my ($path) = @_;
2220 my $found;
2221 my $parser = sub {
2222 my $line = shift;
2223 if ($line =~ m@^(/dev/loop\d+):@) {
2224 $found = $1;
2225 }
2226 };
2227 my $cmd = ['losetup', '--associated', $path];
2228 PVE::Tools::run_command($cmd, outfunc => $parser);
2229 return $found;
2230 }
2231
2232 # Run a function with a file attached to a loop device.
2233 # The loop device is always detached afterwards (or set to autoclear).
2234 # Returns the loop device.
2235 sub run_with_loopdev {
2236 my ($func, $file) = @_;
2237 my $device;
2238 my $parser = sub {
2239 my $line = shift;
2240 if ($line =~ m@^(/dev/loop\d+)$@) {
2241 $device = $1;
2242 }
2243 };
2244 PVE::Tools::run_command(['losetup', '--show', '-f', $file], outfunc => $parser);
2245 die "failed to setup loop device for $file\n" if !$device;
2246 eval { &$func($device); };
2247 my $err = $@;
2248 PVE::Tools::run_command(['losetup', '-d', $device]);
2249 die $err if $err;
2250 return $device;
2251 }
2252
2253 # use $rootdir = undef to just return the corresponding mount path
2254 sub mountpoint_mount {
2255 my ($mountpoint, $rootdir, $storage_cfg, $snapname) = @_;
2256
2257 my $volid = $mountpoint->{volume};
2258 my $mount = $mountpoint->{mp};
2259 my $type = $mountpoint->{type};
2260 my $quota = !$snapname && !$mountpoint->{ro} && $mountpoint->{quota};
2261 my $mounted_dev;
2262
2263 return if !$volid || !$mount;
2264
2265 my $mount_path;
2266
2267 if (defined($rootdir)) {
2268 $rootdir =~ s!/+$!!;
2269 $mount_path = "$rootdir/$mount";
2270 $mount_path =~ s!/+!/!g;
2271 &$check_mount_path($mount_path);
2272 File::Path::mkpath($mount_path);
2273 }
2274
2275 my ($storage, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2276
2277 die "unknown snapshot path for '$volid'" if !$storage && defined($snapname);
2278
2279 my $optstring = '';
2280 if (defined($mountpoint->{acl})) {
2281 $optstring .= ($mountpoint->{acl} ? 'acl' : 'noacl');
2282 }
2283 if ($mountpoint->{ro}) {
2284 $optstring .= ',' if $optstring;
2285 $optstring .= 'ro';
2286 }
2287
2288 my @extra_opts = ('-o', $optstring);
2289
2290 if ($storage) {
2291
2292 my $scfg = PVE::Storage::storage_config($storage_cfg, $storage);
2293 my $path = PVE::Storage::path($storage_cfg, $volid, $snapname);
2294
2295 my ($vtype, undef, undef, undef, undef, $isBase, $format) =
2296 PVE::Storage::parse_volname($storage_cfg, $volid);
2297
2298 $format = 'iso' if $vtype eq 'iso'; # allow to handle iso files
2299
2300 if ($format eq 'subvol') {
2301 if ($mount_path) {
2302 if ($snapname) {
2303 if ($scfg->{type} eq 'zfspool') {
2304 my $path_arg = $path;
2305 $path_arg =~ s!^/+!!;
2306 PVE::Tools::run_command(['mount', '-o', 'ro', @extra_opts, '-t', 'zfs', $path_arg, $mount_path]);
2307 } else {
2308 die "cannot mount subvol snapshots for storage type '$scfg->{type}'\n";
2309 }
2310 } else {
2311 if ($mountpoint->{ro}) {
2312 die "read-only bind mounts not supported\n";
2313 }
2314 PVE::Tools::run_command(['mount', '-o', 'bind', @extra_opts, $path, $mount_path]);
2315 warn "cannot enable quota control for bind mounted subvolumes\n" if $quota;
2316 }
2317 }
2318 return wantarray ? ($path, 0, $mounted_dev) : $path;
2319 } elsif ($format eq 'raw' || $format eq 'iso') {
2320 my $domount = sub {
2321 my ($path) = @_;
2322 if ($mount_path) {
2323 if ($format eq 'iso') {
2324 PVE::Tools::run_command(['mount', '-o', 'ro', @extra_opts, $path, $mount_path]);
2325 } elsif ($isBase || defined($snapname)) {
2326 PVE::Tools::run_command(['mount', '-o', 'ro,noload', @extra_opts, $path, $mount_path]);
2327 } else {
2328 if ($quota) {
2329 push @extra_opts, '-o', 'usrjquota=aquota.user,grpjquota=aquota.group,jqfmt=vfsv0';
2330 }
2331 PVE::Tools::run_command(['mount', @extra_opts, $path, $mount_path]);
2332 }
2333 }
2334 };
2335 my $use_loopdev = 0;
2336 if ($scfg->{path}) {
2337 $mounted_dev = run_with_loopdev($domount, $path);
2338 $use_loopdev = 1;
2339 } elsif ($scfg->{type} eq 'drbd' || $scfg->{type} eq 'lvm' ||
2340 $scfg->{type} eq 'rbd' || $scfg->{type} eq 'lvmthin') {
2341 $mounted_dev = $path;
2342 &$domount($path);
2343 } else {
2344 die "unsupported storage type '$scfg->{type}'\n";
2345 }
2346 return wantarray ? ($path, $use_loopdev, $mounted_dev) : $path;
2347 } else {
2348 die "unsupported image format '$format'\n";
2349 }
2350 } elsif ($type eq 'device') {
2351 PVE::Tools::run_command(['mount', @extra_opts, $volid, $mount_path]) if $mount_path;
2352 return wantarray ? ($volid, 0, $volid) : $volid;
2353 } elsif ($type eq 'bind') {
2354 if ($mountpoint->{ro}) {
2355 die "read-only bind mounts not supported\n";
2356 # Theoretically we'd have to execute both:
2357 # mount -o bind $a $b
2358 # mount -o bind,remount,ro $a $b
2359 }
2360 die "directory '$volid' does not exist\n" if ! -d $volid;
2361 &$check_mount_path($volid);
2362 PVE::Tools::run_command(['mount', '-o', 'bind', @extra_opts, $volid, $mount_path]) if $mount_path;
2363 warn "cannot enable quota control for bind mounts\n" if $quota;
2364 return wantarray ? ($volid, 0, undef) : $volid;
2365 }
2366
2367 die "unsupported storage";
2368 }
2369
2370 sub get_vm_volumes {
2371 my ($conf, $excludes) = @_;
2372
2373 my $vollist = [];
2374
2375 foreach_mountpoint($conf, sub {
2376 my ($ms, $mountpoint) = @_;
2377
2378 return if $excludes && $ms eq $excludes;
2379
2380 my $volid = $mountpoint->{volume};
2381
2382 return if !$volid || $mountpoint->{type} ne 'volume';
2383
2384 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2385 return if !$sid;
2386
2387 push @$vollist, $volid;
2388 });
2389
2390 return $vollist;
2391 }
2392
2393 sub mkfs {
2394 my ($dev, $rootuid, $rootgid) = @_;
2395
2396 PVE::Tools::run_command(['mkfs.ext4', '-O', 'mmp',
2397 '-E', "root_owner=$rootuid:$rootgid",
2398 $dev]);
2399 }
2400
2401 sub format_disk {
2402 my ($storage_cfg, $volid, $rootuid, $rootgid) = @_;
2403
2404 if ($volid =~ m!^/dev/.+!) {
2405 mkfs($volid);
2406 return;
2407 }
2408
2409 my ($storage, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2410
2411 die "cannot format volume '$volid' with no storage\n" if !$storage;
2412
2413 PVE::Storage::activate_volumes($storage_cfg, [$volid]);
2414
2415 my $path = PVE::Storage::path($storage_cfg, $volid);
2416
2417 my ($vtype, undef, undef, undef, undef, $isBase, $format) =
2418 PVE::Storage::parse_volname($storage_cfg, $volid);
2419
2420 die "cannot format volume '$volid' (format == $format)\n"
2421 if $format ne 'raw';
2422
2423 mkfs($path, $rootuid, $rootgid);
2424 }
2425
2426 sub destroy_disks {
2427 my ($storecfg, $vollist) = @_;
2428
2429 foreach my $volid (@$vollist) {
2430 eval { PVE::Storage::vdisk_free($storecfg, $volid); };
2431 warn $@ if $@;
2432 }
2433 }
2434
2435 sub create_disks {
2436 my ($storecfg, $vmid, $settings, $conf) = @_;
2437
2438 my $vollist = [];
2439
2440 eval {
2441 my (undef, $rootuid, $rootgid) = PVE::LXC::parse_id_maps($conf);
2442 my $chown_vollist = [];
2443
2444 foreach_mountpoint($settings, sub {
2445 my ($ms, $mountpoint) = @_;
2446
2447 my $volid = $mountpoint->{volume};
2448 my $mp = $mountpoint->{mp};
2449
2450 my ($storage, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2451
2452 if ($storage && ($volid =~ m/^([^:\s]+):(\d+(\.\d+)?)$/)) {
2453 my ($storeid, $size_gb) = ($1, $2);
2454
2455 my $size_kb = int(${size_gb}*1024) * 1024;
2456
2457 my $scfg = PVE::Storage::storage_config($storecfg, $storage);
2458 # fixme: use better naming ct-$vmid-disk-X.raw?
2459
2460 if ($scfg->{type} eq 'dir' || $scfg->{type} eq 'nfs') {
2461 if ($size_kb > 0) {
2462 $volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'raw',
2463 undef, $size_kb);
2464 format_disk($storecfg, $volid, $rootuid, $rootgid);
2465 } else {
2466 $volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'subvol',
2467 undef, 0);
2468 push @$chown_vollist, $volid;
2469 }
2470 } elsif ($scfg->{type} eq 'zfspool') {
2471
2472 $volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'subvol',
2473 undef, $size_kb);
2474 push @$chown_vollist, $volid;
2475 } elsif ($scfg->{type} eq 'drbd' || $scfg->{type} eq 'lvm' || $scfg->{type} eq 'lvmthin') {
2476
2477 $volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'raw', undef, $size_kb);
2478 format_disk($storecfg, $volid, $rootuid, $rootgid);
2479
2480 } elsif ($scfg->{type} eq 'rbd') {
2481
2482 die "krbd option must be enabled on storage type '$scfg->{type}'\n" if !$scfg->{krbd};
2483 $volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'raw', undef, $size_kb);
2484 format_disk($storecfg, $volid, $rootuid, $rootgid);
2485 } else {
2486 die "unable to create containers on storage type '$scfg->{type}'\n";
2487 }
2488 push @$vollist, $volid;
2489 $mountpoint->{volume} = $volid;
2490 $mountpoint->{size} = $size_kb * 1024;
2491 $conf->{$ms} = print_ct_mountpoint($mountpoint, $ms eq 'rootfs');
2492 } else {
2493 # use specified/existing volid/dir/device
2494 $conf->{$ms} = print_ct_mountpoint($mountpoint, $ms eq 'rootfs');
2495 }
2496 });
2497
2498 PVE::Storage::activate_volumes($storecfg, $chown_vollist, undef);
2499 foreach my $volid (@$chown_vollist) {
2500 my $path = PVE::Storage::path($storecfg, $volid, undef);
2501 chown($rootuid, $rootgid, $path);
2502 }
2503 PVE::Storage::deactivate_volumes($storecfg, $chown_vollist, undef);
2504 };
2505 # free allocated images on error
2506 if (my $err = $@) {
2507 destroy_disks($storecfg, $vollist);
2508 die $err;
2509 }
2510 return $vollist;
2511 }
2512
2513 # bash completion helper
2514
2515 sub complete_os_templates {
2516 my ($cmdname, $pname, $cvalue) = @_;
2517
2518 my $cfg = PVE::Storage::config();
2519
2520 my $storeid;
2521
2522 if ($cvalue =~ m/^([^:]+):/) {
2523 $storeid = $1;
2524 }
2525
2526 my $vtype = $cmdname eq 'restore' ? 'backup' : 'vztmpl';
2527 my $data = PVE::Storage::template_list($cfg, $storeid, $vtype);
2528
2529 my $res = [];
2530 foreach my $id (keys %$data) {
2531 foreach my $item (@{$data->{$id}}) {
2532 push @$res, $item->{volid} if defined($item->{volid});
2533 }
2534 }
2535
2536 return $res;
2537 }
2538
2539 my $complete_ctid_full = sub {
2540 my ($running) = @_;
2541
2542 my $idlist = vmstatus();
2543
2544 my $active_hash = list_active_containers();
2545
2546 my $res = [];
2547
2548 foreach my $id (keys %$idlist) {
2549 my $d = $idlist->{$id};
2550 if (defined($running)) {
2551 next if $d->{template};
2552 next if $running && !$active_hash->{$id};
2553 next if !$running && $active_hash->{$id};
2554 }
2555 push @$res, $id;
2556
2557 }
2558 return $res;
2559 };
2560
2561 sub complete_ctid {
2562 return &$complete_ctid_full();
2563 }
2564
2565 sub complete_ctid_stopped {
2566 return &$complete_ctid_full(0);
2567 }
2568
2569 sub complete_ctid_running {
2570 return &$complete_ctid_full(1);
2571 }
2572
2573 sub parse_id_maps {
2574 my ($conf) = @_;
2575
2576 my $id_map = [];
2577 my $rootuid = 0;
2578 my $rootgid = 0;
2579
2580 my $lxc = $conf->{lxc};
2581 foreach my $entry (@$lxc) {
2582 my ($key, $value) = @$entry;
2583 next if $key ne 'lxc.id_map';
2584 if ($value =~ /^([ug])\s+(\d+)\s+(\d+)\s+(\d+)\s*$/) {
2585 my ($type, $ct, $host, $length) = ($1, $2, $3, $4);
2586 push @$id_map, [$type, $ct, $host, $length];
2587 if ($ct == 0) {
2588 $rootuid = $host if $type eq 'u';
2589 $rootgid = $host if $type eq 'g';
2590 }
2591 } else {
2592 die "failed to parse id_map: $value\n";
2593 }
2594 }
2595
2596 if (!@$id_map && $conf->{unprivileged}) {
2597 # Should we read them from /etc/subuid?
2598 $id_map = [ ['u', '0', '100000', '65536'],
2599 ['g', '0', '100000', '65536'] ];
2600 $rootuid = $rootgid = 100000;
2601 }
2602
2603 return ($id_map, $rootuid, $rootgid);
2604 }
2605
2606 sub userns_command {
2607 my ($id_map) = @_;
2608 if (@$id_map) {
2609 return ['lxc-usernsexec', (map { ('-m', join(':', @$_)) } @$id_map), '--'];
2610 }
2611 return [];
2612 }
2613
2614 1;