]> git.proxmox.com Git - pve-container.git/blob - src/PVE/LXC.pm
Add missing use statement
[pve-container.git] / src / PVE / LXC.pm
1 package PVE::LXC;
2
3 use strict;
4 use warnings;
5 use POSIX qw(EINTR);
6
7 use Socket;
8
9 use File::Path;
10 use File::Spec;
11 use Cwd qw();
12 use Fcntl qw(O_RDONLY :flock);
13
14 use PVE::Cluster qw(cfs_register_file cfs_read_file);
15 use PVE::Storage;
16 use PVE::SafeSyslog;
17 use PVE::INotify;
18 use PVE::JSONSchema qw(get_standard_option);
19 use PVE::Tools qw($IPV6RE $IPV4RE dir_glob_foreach lock_file lock_file_full);
20 use PVE::Network;
21 use PVE::AccessControl;
22 use PVE::ProcFSTools;
23 use Time::HiRes qw (gettimeofday);
24
25 use Data::Dumper;
26
27 my $nodename = PVE::INotify::nodename();
28
29 my $cpuinfo= PVE::ProcFSTools::read_cpuinfo();
30
31 our $COMMON_TAR_FLAGS = [ '--sparse', '--numeric-owner', '--acls',
32 '--xattrs',
33 '--xattrs-include=user.*',
34 '--xattrs-include=security.capability',
35 '--warning=no-xattr-write' ];
36
37 cfs_register_file('/lxc/', \&parse_pct_config, \&write_pct_config);
38
39 my $rootfs_desc = {
40 volume => {
41 type => 'string',
42 default_key => 1,
43 format => 'pve-lxc-mp-string',
44 format_description => 'volume',
45 description => 'Volume, device or directory to mount into the container.',
46 },
47 backup => {
48 type => 'boolean',
49 format_description => '[1|0]',
50 description => 'Whether to include the mountpoint in backups.',
51 optional => 1,
52 },
53 size => {
54 type => 'string',
55 format => 'disk-size',
56 format_description => 'DiskSize',
57 description => 'Volume size (read only value).',
58 optional => 1,
59 },
60 acl => {
61 type => 'boolean',
62 format_description => 'acl',
63 description => 'Explicitly enable or disable ACL support.',
64 optional => 1,
65 },
66 ro => {
67 type => 'boolean',
68 format_description => 'ro',
69 description => 'Read-only mountpoint (not supported with bind mounts)',
70 optional => 1,
71 },
72 quota => {
73 type => 'boolean',
74 format_description => '[0|1]',
75 description => 'Enable user quotas inside the container (not supported with zfs subvolumes)',
76 optional => 1,
77 },
78 };
79
80 PVE::JSONSchema::register_standard_option('pve-ct-rootfs', {
81 type => 'string', format => $rootfs_desc,
82 description => "Use volume as container root.",
83 optional => 1,
84 });
85
86 PVE::JSONSchema::register_standard_option('pve-lxc-snapshot-name', {
87 description => "The name of the snapshot.",
88 type => 'string', format => 'pve-configid',
89 maxLength => 40,
90 });
91
92 my $confdesc = {
93 lock => {
94 optional => 1,
95 type => 'string',
96 description => "Lock/unlock the VM.",
97 enum => [qw(migrate backup snapshot rollback)],
98 },
99 onboot => {
100 optional => 1,
101 type => 'boolean',
102 description => "Specifies whether a VM will be started during system bootup.",
103 default => 0,
104 },
105 startup => get_standard_option('pve-startup-order'),
106 template => {
107 optional => 1,
108 type => 'boolean',
109 description => "Enable/disable Template.",
110 default => 0,
111 },
112 arch => {
113 optional => 1,
114 type => 'string',
115 enum => ['amd64', 'i386'],
116 description => "OS architecture type.",
117 default => 'amd64',
118 },
119 ostype => {
120 optional => 1,
121 type => 'string',
122 enum => ['debian', 'ubuntu', 'centos', 'fedora', 'opensuse', 'archlinux'],
123 description => "OS type. Corresponds to lxc setup scripts in /usr/share/lxc/config/<ostype>.common.conf.",
124 },
125 console => {
126 optional => 1,
127 type => 'boolean',
128 description => "Attach a console device (/dev/console) to the container.",
129 default => 1,
130 },
131 tty => {
132 optional => 1,
133 type => 'integer',
134 description => "Specify the number of tty available to the container",
135 minimum => 0,
136 maximum => 6,
137 default => 2,
138 },
139 cpulimit => {
140 optional => 1,
141 type => 'number',
142 description => "Limit of CPU usage. Note if the computer has 2 CPUs, it has a total of '2' CPU time. Value '0' indicates no CPU limit.",
143 minimum => 0,
144 maximum => 128,
145 default => 0,
146 },
147 cpuunits => {
148 optional => 1,
149 type => 'integer',
150 description => "CPU weight for a VM. Argument is used in the kernel fair scheduler. The larger the number is, the more CPU time this VM gets. Number is relative to the weights of all the other running VMs.\n\nNOTE: You can disable fair-scheduler configuration by setting this to 0.",
151 minimum => 0,
152 maximum => 500000,
153 default => 1024,
154 },
155 memory => {
156 optional => 1,
157 type => 'integer',
158 description => "Amount of RAM for the VM in MB.",
159 minimum => 16,
160 default => 512,
161 },
162 swap => {
163 optional => 1,
164 type => 'integer',
165 description => "Amount of SWAP for the VM in MB.",
166 minimum => 0,
167 default => 512,
168 },
169 hostname => {
170 optional => 1,
171 description => "Set a host name for the container.",
172 type => 'string', format => 'dns-name',
173 maxLength => 255,
174 },
175 description => {
176 optional => 1,
177 type => 'string',
178 description => "Container description. Only used on the configuration web interface.",
179 },
180 searchdomain => {
181 optional => 1,
182 type => 'string', format => 'dns-name-list',
183 description => "Sets DNS search domains for a container. Create will automatically use the setting from the host if you neither set searchdomain nor nameserver.",
184 },
185 nameserver => {
186 optional => 1,
187 type => 'string', format => 'address-list',
188 description => "Sets DNS server IP address for a container. Create will automatically use the setting from the host if you neither set searchdomain nor nameserver.",
189 },
190 rootfs => get_standard_option('pve-ct-rootfs'),
191 parent => {
192 optional => 1,
193 type => 'string', format => 'pve-configid',
194 maxLength => 40,
195 description => "Parent snapshot name. This is used internally, and should not be modified.",
196 },
197 snaptime => {
198 optional => 1,
199 description => "Timestamp for snapshots.",
200 type => 'integer',
201 minimum => 0,
202 },
203 cmode => {
204 optional => 1,
205 description => "Console mode. By default, the console command tries to open a connection to one of the available tty devices. By setting cmode to 'console' it tries to attach to /dev/console instead. If you set cmode to 'shell', it simply invokes a shell inside the container (no login).",
206 type => 'string',
207 enum => ['shell', 'console', 'tty'],
208 default => 'tty',
209 },
210 protection => {
211 optional => 1,
212 type => 'boolean',
213 description => "Sets the protection flag of the container. This will prevent the CT or CT's disk remove/update operation.",
214 default => 0,
215 },
216 unprivileged => {
217 optional => 1,
218 type => 'boolean',
219 description => "Makes the container run as unprivileged user. (Should not be modified manually.)",
220 default => 0,
221 },
222 };
223
224 my $valid_lxc_conf_keys = {
225 'lxc.include' => 1,
226 'lxc.arch' => 1,
227 'lxc.utsname' => 1,
228 'lxc.haltsignal' => 1,
229 'lxc.rebootsignal' => 1,
230 'lxc.stopsignal' => 1,
231 'lxc.init_cmd' => 1,
232 'lxc.network.type' => 1,
233 'lxc.network.flags' => 1,
234 'lxc.network.link' => 1,
235 'lxc.network.mtu' => 1,
236 'lxc.network.name' => 1,
237 'lxc.network.hwaddr' => 1,
238 'lxc.network.ipv4' => 1,
239 'lxc.network.ipv4.gateway' => 1,
240 'lxc.network.ipv6' => 1,
241 'lxc.network.ipv6.gateway' => 1,
242 'lxc.network.script.up' => 1,
243 'lxc.network.script.down' => 1,
244 'lxc.pts' => 1,
245 'lxc.console.logfile' => 1,
246 'lxc.console' => 1,
247 'lxc.tty' => 1,
248 'lxc.devttydir' => 1,
249 'lxc.hook.autodev' => 1,
250 'lxc.autodev' => 1,
251 'lxc.kmsg' => 1,
252 'lxc.mount' => 1,
253 'lxc.mount.entry' => 1,
254 'lxc.mount.auto' => 1,
255 'lxc.rootfs' => 'lxc.rootfs is auto generated from rootfs',
256 'lxc.rootfs.mount' => 1,
257 'lxc.rootfs.options' => 'lxc.rootfs.options is not supported' .
258 ', please use mountpoint options in the "rootfs" key',
259 # lxc.cgroup.*
260 'lxc.cap.drop' => 1,
261 'lxc.cap.keep' => 1,
262 'lxc.aa_profile' => 1,
263 'lxc.aa_allow_incomplete' => 1,
264 'lxc.se_context' => 1,
265 'lxc.seccomp' => 1,
266 'lxc.id_map' => 1,
267 'lxc.hook.pre-start' => 1,
268 'lxc.hook.pre-mount' => 1,
269 'lxc.hook.mount' => 1,
270 'lxc.hook.start' => 1,
271 'lxc.hook.stop' => 1,
272 'lxc.hook.post-stop' => 1,
273 'lxc.hook.clone' => 1,
274 'lxc.hook.destroy' => 1,
275 'lxc.loglevel' => 1,
276 'lxc.logfile' => 1,
277 'lxc.start.auto' => 1,
278 'lxc.start.delay' => 1,
279 'lxc.start.order' => 1,
280 'lxc.group' => 1,
281 'lxc.environment' => 1,
282 };
283
284 my $netconf_desc = {
285 type => {
286 type => 'string',
287 optional => 1,
288 description => "Network interface type.",
289 enum => [qw(veth)],
290 },
291 name => {
292 type => 'string',
293 format_description => 'String',
294 description => 'Name of the network device as seen from inside the container. (lxc.network.name)',
295 pattern => '[-_.\w\d]+',
296 },
297 bridge => {
298 type => 'string',
299 format_description => 'vmbr<Number>',
300 description => 'Bridge to attach the network device to.',
301 pattern => '[-_.\w\d]+',
302 optional => 1,
303 },
304 hwaddr => {
305 type => 'string',
306 format_description => 'MAC',
307 description => 'Bridge to attach the network device to. (lxc.network.hwaddr)',
308 pattern => qr/(?:[a-f0-9]{2}:){5}[a-f0-9]{2}/i,
309 optional => 1,
310 },
311 mtu => {
312 type => 'integer',
313 format_description => 'Number',
314 description => 'Maximum transfer unit of the interface. (lxc.network.mtu)',
315 minimum => 64, # minimum ethernet frame is 64 bytes
316 optional => 1,
317 },
318 ip => {
319 type => 'string',
320 format => 'pve-ipv4-config',
321 format_description => 'IPv4Format/CIDR',
322 description => 'IPv4 address in CIDR format.',
323 optional => 1,
324 },
325 gw => {
326 type => 'string',
327 format => 'ipv4',
328 format_description => 'GatewayIPv4',
329 description => 'Default gateway for IPv4 traffic.',
330 optional => 1,
331 },
332 ip6 => {
333 type => 'string',
334 format => 'pve-ipv6-config',
335 format_description => 'IPv6Format/CIDR',
336 description => 'IPv6 address in CIDR format.',
337 optional => 1,
338 },
339 gw6 => {
340 type => 'string',
341 format => 'ipv6',
342 format_description => 'GatewayIPv6',
343 description => 'Default gateway for IPv6 traffic.',
344 optional => 1,
345 },
346 firewall => {
347 type => 'boolean',
348 format_description => '[1|0]',
349 description => "Controls whether this interface's firewall rules should be used.",
350 optional => 1,
351 },
352 tag => {
353 type => 'integer',
354 format_description => 'VlanNo',
355 minimum => '2',
356 maximum => '4094',
357 description => "VLAN tag for this interface.",
358 optional => 1,
359 },
360 trunks => {
361 type => 'string',
362 pattern => qr/\d+(?:;\d+)*/,
363 format_description => 'vlanid[;vlanid...]',
364 description => "VLAN ids to pass through the interface",
365 optional => 1,
366 },
367 };
368 PVE::JSONSchema::register_format('pve-lxc-network', $netconf_desc);
369
370 my $MAX_LXC_NETWORKS = 10;
371 for (my $i = 0; $i < $MAX_LXC_NETWORKS; $i++) {
372 $confdesc->{"net$i"} = {
373 optional => 1,
374 type => 'string', format => $netconf_desc,
375 description => "Specifies network interfaces for the container.",
376 };
377 }
378
379 PVE::JSONSchema::register_format('pve-lxc-mp-string', \&verify_lxc_mp_string);
380 sub verify_lxc_mp_string{
381 my ($mp, $noerr) = @_;
382
383 # do not allow:
384 # /./ or /../
385 # /. or /.. at the end
386 # ../ at the beginning
387
388 if($mp =~ m@/\.\.?/@ ||
389 $mp =~ m@/\.\.?$@ ||
390 $mp =~ m@^\.\./@){
391 return undef if $noerr;
392 die "$mp contains illegal character sequences\n";
393 }
394 return $mp;
395 }
396
397 my $mp_desc = {
398 %$rootfs_desc,
399 mp => {
400 type => 'string',
401 format => 'pve-lxc-mp-string',
402 format_description => 'Path',
403 description => 'Path to the mountpoint as seen from inside the container.',
404 },
405 };
406 PVE::JSONSchema::register_format('pve-ct-mountpoint', $mp_desc);
407
408 my $unuseddesc = {
409 optional => 1,
410 type => 'string', format => 'pve-volume-id',
411 description => "Reference to unused volumes.",
412 };
413
414 my $MAX_MOUNT_POINTS = 10;
415 for (my $i = 0; $i < $MAX_MOUNT_POINTS; $i++) {
416 $confdesc->{"mp$i"} = {
417 optional => 1,
418 type => 'string', format => $mp_desc,
419 description => "Use volume as container mount point (experimental feature).",
420 optional => 1,
421 };
422 }
423
424 my $MAX_UNUSED_DISKS = $MAX_MOUNT_POINTS;
425 for (my $i = 0; $i < $MAX_MOUNT_POINTS; $i++) {
426 $confdesc->{"unused$i"} = $unuseddesc;
427 }
428
429 sub write_pct_config {
430 my ($filename, $conf) = @_;
431
432 delete $conf->{snapstate}; # just to be sure
433
434 my $generate_raw_config = sub {
435 my ($conf) = @_;
436
437 my $raw = '';
438
439 # add description as comment to top of file
440 my $descr = $conf->{description} || '';
441 foreach my $cl (split(/\n/, $descr)) {
442 $raw .= '#' . PVE::Tools::encode_text($cl) . "\n";
443 }
444
445 foreach my $key (sort keys %$conf) {
446 next if $key eq 'digest' || $key eq 'description' || $key eq 'pending' ||
447 $key eq 'snapshots' || $key eq 'snapname' || $key eq 'lxc';
448 my $value = $conf->{$key};
449 die "detected invalid newline inside property '$key'\n" if $value =~ m/\n/;
450 $raw .= "$key: $value\n";
451 }
452
453 if (my $lxcconf = $conf->{lxc}) {
454 foreach my $entry (@$lxcconf) {
455 my ($k, $v) = @$entry;
456 $raw .= "$k: $v\n";
457 }
458 }
459
460 return $raw;
461 };
462
463 my $raw = &$generate_raw_config($conf);
464
465 foreach my $snapname (sort keys %{$conf->{snapshots}}) {
466 $raw .= "\n[$snapname]\n";
467 $raw .= &$generate_raw_config($conf->{snapshots}->{$snapname});
468 }
469
470 return $raw;
471 }
472
473 sub check_type {
474 my ($key, $value) = @_;
475
476 die "unknown setting '$key'\n" if !$confdesc->{$key};
477
478 my $type = $confdesc->{$key}->{type};
479
480 if (!defined($value)) {
481 die "got undefined value\n";
482 }
483
484 if ($value =~ m/[\n\r]/) {
485 die "property contains a line feed\n";
486 }
487
488 if ($type eq 'boolean') {
489 return 1 if ($value eq '1') || ($value =~ m/^(on|yes|true)$/i);
490 return 0 if ($value eq '0') || ($value =~ m/^(off|no|false)$/i);
491 die "type check ('boolean') failed - got '$value'\n";
492 } elsif ($type eq 'integer') {
493 return int($1) if $value =~ m/^(\d+)$/;
494 die "type check ('integer') failed - got '$value'\n";
495 } elsif ($type eq 'number') {
496 return $value if $value =~ m/^(\d+)(\.\d+)?$/;
497 die "type check ('number') failed - got '$value'\n";
498 } elsif ($type eq 'string') {
499 if (my $fmt = $confdesc->{$key}->{format}) {
500 PVE::JSONSchema::check_format($fmt, $value);
501 return $value;
502 }
503 return $value;
504 } else {
505 die "internal error"
506 }
507 }
508
509 sub parse_pct_config {
510 my ($filename, $raw) = @_;
511
512 return undef if !defined($raw);
513
514 my $res = {
515 digest => Digest::SHA::sha1_hex($raw),
516 snapshots => {},
517 };
518
519 $filename =~ m|/lxc/(\d+).conf$|
520 || die "got strange filename '$filename'";
521
522 my $vmid = $1;
523
524 my $conf = $res;
525 my $descr = '';
526 my $section = '';
527
528 my @lines = split(/\n/, $raw);
529 foreach my $line (@lines) {
530 next if $line =~ m/^\s*$/;
531
532 if ($line =~ m/^\[([a-z][a-z0-9_\-]+)\]\s*$/i) {
533 $section = $1;
534 $conf->{description} = $descr if $descr;
535 $descr = '';
536 $conf = $res->{snapshots}->{$section} = {};
537 next;
538 }
539
540 if ($line =~ m/^\#(.*)\s*$/) {
541 $descr .= PVE::Tools::decode_text($1) . "\n";
542 next;
543 }
544
545 if ($line =~ m/^(lxc\.[a-z0-9_\-\.]+)(:|\s*=)\s*(.*?)\s*$/) {
546 my $key = $1;
547 my $value = $3;
548 my $validity = $valid_lxc_conf_keys->{$key} || 0;
549 if ($validity eq 1 || $key =~ m/^lxc\.cgroup\./) {
550 push @{$conf->{lxc}}, [$key, $value];
551 } elsif (my $errmsg = $validity) {
552 warn "vm $vmid - $key: $errmsg\n";
553 } else {
554 warn "vm $vmid - unable to parse config: $line\n";
555 }
556 } elsif ($line =~ m/^(description):\s*(.*\S)\s*$/) {
557 $descr .= PVE::Tools::decode_text($2);
558 } elsif ($line =~ m/snapstate:\s*(prepare|delete)\s*$/) {
559 $conf->{snapstate} = $1;
560 } elsif ($line =~ m/^([a-z][a-z_]*\d*):\s*(\S.*)\s*$/) {
561 my $key = $1;
562 my $value = $2;
563 eval { $value = check_type($key, $value); };
564 warn "vm $vmid - unable to parse value of '$key' - $@" if $@;
565 $conf->{$key} = $value;
566 } else {
567 warn "vm $vmid - unable to parse config: $line\n";
568 }
569 }
570
571 $conf->{description} = $descr if $descr;
572
573 delete $res->{snapstate}; # just to be sure
574
575 return $res;
576 }
577
578 sub config_list {
579 my $vmlist = PVE::Cluster::get_vmlist();
580 my $res = {};
581 return $res if !$vmlist || !$vmlist->{ids};
582 my $ids = $vmlist->{ids};
583
584 foreach my $vmid (keys %$ids) {
585 next if !$vmid; # skip CT0
586 my $d = $ids->{$vmid};
587 next if !$d->{node} || $d->{node} ne $nodename;
588 next if !$d->{type} || $d->{type} ne 'lxc';
589 $res->{$vmid}->{type} = 'lxc';
590 }
591 return $res;
592 }
593
594 sub cfs_config_path {
595 my ($vmid, $node) = @_;
596
597 $node = $nodename if !$node;
598 return "nodes/$node/lxc/$vmid.conf";
599 }
600
601 sub config_file {
602 my ($vmid, $node) = @_;
603
604 my $cfspath = cfs_config_path($vmid, $node);
605 return "/etc/pve/$cfspath";
606 }
607
608 sub load_config {
609 my ($vmid, $node) = @_;
610
611 $node = $nodename if !$node;
612 my $cfspath = cfs_config_path($vmid, $node);
613
614 my $conf = PVE::Cluster::cfs_read_file($cfspath);
615 die "container $vmid does not exist\n" if !defined($conf);
616
617 return $conf;
618 }
619
620 sub create_config {
621 my ($vmid, $conf) = @_;
622
623 my $dir = "/etc/pve/nodes/$nodename/lxc";
624 mkdir $dir;
625
626 write_config($vmid, $conf);
627 }
628
629 sub destroy_config {
630 my ($vmid) = @_;
631
632 unlink config_file($vmid, $nodename);
633 }
634
635 sub write_config {
636 my ($vmid, $conf) = @_;
637
638 my $cfspath = cfs_config_path($vmid);
639
640 PVE::Cluster::cfs_write_file($cfspath, $conf);
641 }
642
643 # flock: we use one file handle per process, so lock file
644 # can be called multiple times and will succeed for the same process.
645
646 my $lock_handles = {};
647 my $lockdir = "/run/lock/lxc";
648
649 sub lock_filename {
650 my ($vmid) = @_;
651
652 return "$lockdir/pve-config-${vmid}.lock";
653 }
654
655 sub lock_container {
656 my ($vmid, $timeout, $code, @param) = @_;
657
658 $timeout = 10 if !$timeout;
659
660 my $filename = lock_filename($vmid);
661
662 mkdir $lockdir if !-d $lockdir;
663
664 my $res = PVE::Tools::lock_file_full($filename, $timeout, 0, $code, @param);
665
666 die $@ if $@;
667
668 return $res;
669 }
670
671 sub option_exists {
672 my ($name) = @_;
673
674 return defined($confdesc->{$name});
675 }
676
677 # add JSON properties for create and set function
678 sub json_config_properties {
679 my $prop = shift;
680
681 foreach my $opt (keys %$confdesc) {
682 next if $opt eq 'parent' || $opt eq 'snaptime';
683 next if $prop->{$opt};
684 $prop->{$opt} = $confdesc->{$opt};
685 }
686
687 return $prop;
688 }
689
690 # container status helpers
691
692 sub list_active_containers {
693
694 my $filename = "/proc/net/unix";
695
696 # similar test is used by lcxcontainers.c: list_active_containers
697 my $res = {};
698
699 my $fh = IO::File->new ($filename, "r");
700 return $res if !$fh;
701
702 while (defined(my $line = <$fh>)) {
703 if ($line =~ m/^[a-f0-9]+:\s\S+\s\S+\s\S+\s\S+\s\S+\s\d+\s(\S+)$/) {
704 my $path = $1;
705 if ($path =~ m!^@/var/lib/lxc/(\d+)/command$!) {
706 $res->{$1} = 1;
707 }
708 }
709 }
710
711 close($fh);
712
713 return $res;
714 }
715
716 # warning: this is slow
717 sub check_running {
718 my ($vmid) = @_;
719
720 my $active_hash = list_active_containers();
721
722 return 1 if defined($active_hash->{$vmid});
723
724 return undef;
725 }
726
727 sub get_container_disk_usage {
728 my ($vmid, $pid) = @_;
729
730 return PVE::Tools::df("/proc/$pid/root/", 1);
731 }
732
733 my $last_proc_vmid_stat;
734
735 my $parse_cpuacct_stat = sub {
736 my ($vmid) = @_;
737
738 my $raw = read_cgroup_value('cpuacct', $vmid, 'cpuacct.stat', 1);
739
740 my $stat = {};
741
742 if ($raw =~ m/^user (\d+)\nsystem (\d+)\n/) {
743
744 $stat->{utime} = $1;
745 $stat->{stime} = $2;
746
747 }
748
749 return $stat;
750 };
751
752 sub vmstatus {
753 my ($opt_vmid) = @_;
754
755 my $list = $opt_vmid ? { $opt_vmid => { type => 'lxc' }} : config_list();
756
757 my $active_hash = list_active_containers();
758
759 my $cpucount = $cpuinfo->{cpus} || 1;
760
761 my $cdtime = gettimeofday;
762
763 my $uptime = (PVE::ProcFSTools::read_proc_uptime(1))[0];
764
765 foreach my $vmid (keys %$list) {
766 my $d = $list->{$vmid};
767
768 eval { $d->{pid} = find_lxc_pid($vmid) if defined($active_hash->{$vmid}); };
769 warn $@ if $@; # ignore errors (consider them stopped)
770
771 $d->{status} = $d->{pid} ? 'running' : 'stopped';
772
773 my $cfspath = cfs_config_path($vmid);
774 my $conf = PVE::Cluster::cfs_read_file($cfspath) || {};
775
776 $d->{name} = $conf->{'hostname'} || "CT$vmid";
777 $d->{name} =~ s/[\s]//g;
778
779 $d->{cpus} = $conf->{cpulimit} || $cpucount;
780
781 if ($d->{pid}) {
782 my $res = get_container_disk_usage($vmid, $d->{pid});
783 $d->{disk} = $res->{used};
784 $d->{maxdisk} = $res->{total};
785 } else {
786 $d->{disk} = 0;
787 # use 4GB by default ??
788 if (my $rootfs = $conf->{rootfs}) {
789 my $rootinfo = parse_ct_rootfs($rootfs);
790 $d->{maxdisk} = int(($rootinfo->{size} || 4)*1024*1024)*1024;
791 } else {
792 $d->{maxdisk} = 4*1024*1024*1024;
793 }
794 }
795
796 $d->{mem} = 0;
797 $d->{swap} = 0;
798 $d->{maxmem} = ($conf->{memory}||512)*1024*1024;
799 $d->{maxswap} = ($conf->{swap}//0)*1024*1024;
800
801 $d->{uptime} = 0;
802 $d->{cpu} = 0;
803
804 $d->{netout} = 0;
805 $d->{netin} = 0;
806
807 $d->{diskread} = 0;
808 $d->{diskwrite} = 0;
809
810 $d->{template} = is_template($conf);
811 }
812
813 foreach my $vmid (keys %$list) {
814 my $d = $list->{$vmid};
815 my $pid = $d->{pid};
816
817 next if !$pid; # skip stopped CTs
818
819 my $ctime = (stat("/proc/$pid"))[10]; # 10 = ctime
820 $d->{uptime} = time - $ctime; # the method lxcfs uses
821
822 $d->{mem} = read_cgroup_value('memory', $vmid, 'memory.usage_in_bytes');
823 $d->{swap} = read_cgroup_value('memory', $vmid, 'memory.memsw.usage_in_bytes') - $d->{mem};
824
825 my $blkio_bytes = read_cgroup_value('blkio', $vmid, 'blkio.throttle.io_service_bytes', 1);
826 my @bytes = split(/\n/, $blkio_bytes);
827 foreach my $byte (@bytes) {
828 if (my ($key, $value) = $byte =~ /(Read|Write)\s+(\d+)/) {
829 $d->{diskread} = $2 if $key eq 'Read';
830 $d->{diskwrite} = $2 if $key eq 'Write';
831 }
832 }
833
834 my $pstat = &$parse_cpuacct_stat($vmid);
835
836 my $used = $pstat->{utime} + $pstat->{stime};
837
838 my $old = $last_proc_vmid_stat->{$vmid};
839 if (!$old) {
840 $last_proc_vmid_stat->{$vmid} = {
841 time => $cdtime,
842 used => $used,
843 cpu => 0,
844 };
845 next;
846 }
847
848 my $dtime = ($cdtime - $old->{time}) * $cpucount * $cpuinfo->{user_hz};
849
850 if ($dtime > 1000) {
851 my $dutime = $used - $old->{used};
852
853 $d->{cpu} = (($dutime/$dtime)* $cpucount) / $d->{cpus};
854 $last_proc_vmid_stat->{$vmid} = {
855 time => $cdtime,
856 used => $used,
857 cpu => $d->{cpu},
858 };
859 } else {
860 $d->{cpu} = $old->{cpu};
861 }
862 }
863
864 my $netdev = PVE::ProcFSTools::read_proc_net_dev();
865
866 foreach my $dev (keys %$netdev) {
867 next if $dev !~ m/^veth([1-9]\d*)i/;
868 my $vmid = $1;
869 my $d = $list->{$vmid};
870
871 next if !$d;
872
873 $d->{netout} += $netdev->{$dev}->{receive};
874 $d->{netin} += $netdev->{$dev}->{transmit};
875
876 }
877
878 return $list;
879 }
880
881 sub classify_mountpoint {
882 my ($vol) = @_;
883 if ($vol =~ m!^/!) {
884 return 'device' if $vol =~ m!^/dev/!;
885 return 'bind';
886 }
887 return 'volume';
888 }
889
890 my $parse_ct_mountpoint_full = sub {
891 my ($desc, $data, $noerr) = @_;
892
893 $data //= '';
894
895 my $res;
896 eval { $res = PVE::JSONSchema::parse_property_string($desc, $data) };
897 if ($@) {
898 return undef if $noerr;
899 die $@;
900 }
901
902 if (defined(my $size = $res->{size})) {
903 $size = PVE::JSONSchema::parse_size($size);
904 if (!defined($size)) {
905 return undef if $noerr;
906 die "invalid size: $size\n";
907 }
908 $res->{size} = $size;
909 }
910
911 $res->{type} = classify_mountpoint($res->{volume});
912
913 return $res;
914 };
915
916 sub parse_ct_rootfs {
917 my ($data, $noerr) = @_;
918
919 my $res = &$parse_ct_mountpoint_full($rootfs_desc, $data, $noerr);
920
921 $res->{mp} = '/' if defined($res);
922
923 return $res;
924 }
925
926 sub parse_ct_mountpoint {
927 my ($data, $noerr) = @_;
928
929 return &$parse_ct_mountpoint_full($mp_desc, $data, $noerr);
930 }
931
932 sub print_ct_mountpoint {
933 my ($info, $nomp) = @_;
934 my $skip = [ 'type' ];
935 push @$skip, 'mp' if $nomp;
936 return PVE::JSONSchema::print_property_string($info, $mp_desc, $skip);
937 }
938
939 sub print_lxc_network {
940 my $net = shift;
941 return PVE::JSONSchema::print_property_string($net, $netconf_desc);
942 }
943
944 sub parse_lxc_network {
945 my ($data) = @_;
946
947 my $res = {};
948
949 return $res if !$data;
950
951 $res = PVE::JSONSchema::parse_property_string($netconf_desc, $data);
952
953 $res->{type} = 'veth';
954 $res->{hwaddr} = PVE::Tools::random_ether_addr() if !$res->{hwaddr};
955
956 return $res;
957 }
958
959 sub read_cgroup_value {
960 my ($group, $vmid, $name, $full) = @_;
961
962 my $path = "/sys/fs/cgroup/$group/lxc/$vmid/$name";
963
964 return PVE::Tools::file_get_contents($path) if $full;
965
966 return PVE::Tools::file_read_firstline($path);
967 }
968
969 sub write_cgroup_value {
970 my ($group, $vmid, $name, $value) = @_;
971
972 my $path = "/sys/fs/cgroup/$group/lxc/$vmid/$name";
973 PVE::ProcFSTools::write_proc_entry($path, $value) if -e $path;
974
975 }
976
977 sub find_lxc_console_pids {
978
979 my $res = {};
980
981 PVE::Tools::dir_glob_foreach('/proc', '\d+', sub {
982 my ($pid) = @_;
983
984 my $cmdline = PVE::Tools::file_read_firstline("/proc/$pid/cmdline");
985 return if !$cmdline;
986
987 my @args = split(/\0/, $cmdline);
988
989 # search for lxc-console -n <vmid>
990 return if scalar(@args) != 3;
991 return if $args[1] ne '-n';
992 return if $args[2] !~ m/^\d+$/;
993 return if $args[0] !~ m|^(/usr/bin/)?lxc-console$|;
994
995 my $vmid = $args[2];
996
997 push @{$res->{$vmid}}, $pid;
998 });
999
1000 return $res;
1001 }
1002
1003 sub find_lxc_pid {
1004 my ($vmid) = @_;
1005
1006 my $pid = undef;
1007 my $parser = sub {
1008 my $line = shift;
1009 $pid = $1 if $line =~ m/^PID:\s+(\d+)$/;
1010 };
1011 PVE::Tools::run_command(['lxc-info', '-n', $vmid, '-p'], outfunc => $parser);
1012
1013 die "unable to get PID for CT $vmid (not running?)\n" if !$pid;
1014
1015 return $pid;
1016 }
1017
1018 # Note: we cannot use Net:IP, because that only allows strict
1019 # CIDR networks
1020 sub parse_ipv4_cidr {
1021 my ($cidr, $noerr) = @_;
1022
1023 if ($cidr =~ m!^($IPV4RE)(?:/(\d+))$! && ($2 > 7) && ($2 <= 32)) {
1024 return { address => $1, netmask => $PVE::Network::ipv4_reverse_mask->[$2] };
1025 }
1026
1027 return undef if $noerr;
1028
1029 die "unable to parse ipv4 address/mask\n";
1030 }
1031
1032 sub check_lock {
1033 my ($conf) = @_;
1034
1035 die "VM is locked ($conf->{'lock'})\n" if $conf->{'lock'};
1036 }
1037
1038 sub check_protection {
1039 my ($vm_conf, $err_msg) = @_;
1040
1041 if ($vm_conf->{protection}) {
1042 die "$err_msg - protection mode enabled\n";
1043 }
1044 }
1045
1046 sub update_lxc_config {
1047 my ($storage_cfg, $vmid, $conf) = @_;
1048
1049 my $dir = "/var/lib/lxc/$vmid";
1050
1051 if ($conf->{template}) {
1052
1053 unlink "$dir/config";
1054
1055 return;
1056 }
1057
1058 my $raw = '';
1059
1060 die "missing 'arch' - internal error" if !$conf->{arch};
1061 $raw .= "lxc.arch = $conf->{arch}\n";
1062
1063 my $unprivileged = $conf->{unprivileged};
1064 my $custom_idmap = grep { $_->[0] eq 'lxc.id_map' } @{$conf->{lxc}};
1065
1066 my $ostype = $conf->{ostype} || die "missing 'ostype' - internal error";
1067 if ($ostype =~ /^(?:debian | ubuntu | centos | fedora | opensuse | archlinux)$/x) {
1068 $raw .= "lxc.include = /usr/share/lxc/config/$ostype.common.conf\n";
1069 if ($unprivileged || $custom_idmap) {
1070 $raw .= "lxc.include = /usr/share/lxc/config/$ostype.userns.conf\n"
1071 }
1072 } else {
1073 die "implement me (ostype $ostype)";
1074 }
1075
1076 # WARNING: DO NOT REMOVE this without making sure that loop device nodes
1077 # cannot be exposed to the container with r/w access (cgroup perms).
1078 # When this is enabled mounts will still remain in the monitor's namespace
1079 # after the container unmounted them and thus will not detach from their
1080 # files while the container is running!
1081 $raw .= "lxc.monitor.unshare = 1\n";
1082
1083 # Should we read them from /etc/subuid?
1084 if ($unprivileged && !$custom_idmap) {
1085 $raw .= "lxc.id_map = u 0 100000 65536\n";
1086 $raw .= "lxc.id_map = g 0 100000 65536\n";
1087 }
1088
1089 if (!has_dev_console($conf)) {
1090 $raw .= "lxc.console = none\n";
1091 $raw .= "lxc.cgroup.devices.deny = c 5:1 rwm\n";
1092 }
1093
1094 my $ttycount = get_tty_count($conf);
1095 $raw .= "lxc.tty = $ttycount\n";
1096
1097 # some init scripts expect a linux terminal (turnkey).
1098 $raw .= "lxc.environment = TERM=linux\n";
1099
1100 my $utsname = $conf->{hostname} || "CT$vmid";
1101 $raw .= "lxc.utsname = $utsname\n";
1102
1103 my $memory = $conf->{memory} || 512;
1104 my $swap = $conf->{swap} // 0;
1105
1106 my $lxcmem = int($memory*1024*1024);
1107 $raw .= "lxc.cgroup.memory.limit_in_bytes = $lxcmem\n";
1108
1109 my $lxcswap = int(($memory + $swap)*1024*1024);
1110 $raw .= "lxc.cgroup.memory.memsw.limit_in_bytes = $lxcswap\n";
1111
1112 if (my $cpulimit = $conf->{cpulimit}) {
1113 $raw .= "lxc.cgroup.cpu.cfs_period_us = 100000\n";
1114 my $value = int(100000*$cpulimit);
1115 $raw .= "lxc.cgroup.cpu.cfs_quota_us = $value\n";
1116 }
1117
1118 my $shares = $conf->{cpuunits} || 1024;
1119 $raw .= "lxc.cgroup.cpu.shares = $shares\n";
1120
1121 my $mountpoint = parse_ct_rootfs($conf->{rootfs});
1122
1123 $raw .= "lxc.rootfs = $dir/rootfs\n";
1124
1125 my $netcount = 0;
1126 foreach my $k (keys %$conf) {
1127 next if $k !~ m/^net(\d+)$/;
1128 my $ind = $1;
1129 my $d = parse_lxc_network($conf->{$k});
1130 $netcount++;
1131 $raw .= "lxc.network.type = veth\n";
1132 $raw .= "lxc.network.veth.pair = veth${vmid}i${ind}\n";
1133 $raw .= "lxc.network.hwaddr = $d->{hwaddr}\n" if defined($d->{hwaddr});
1134 $raw .= "lxc.network.name = $d->{name}\n" if defined($d->{name});
1135 $raw .= "lxc.network.mtu = $d->{mtu}\n" if defined($d->{mtu});
1136 }
1137
1138 if (my $lxcconf = $conf->{lxc}) {
1139 foreach my $entry (@$lxcconf) {
1140 my ($k, $v) = @$entry;
1141 $netcount++ if $k eq 'lxc.network.type';
1142 $raw .= "$k = $v\n";
1143 }
1144 }
1145
1146 $raw .= "lxc.network.type = empty\n" if !$netcount;
1147
1148 File::Path::mkpath("$dir/rootfs");
1149
1150 PVE::Tools::file_set_contents("$dir/config", $raw);
1151 }
1152
1153 # verify and cleanup nameserver list (replace \0 with ' ')
1154 sub verify_nameserver_list {
1155 my ($nameserver_list) = @_;
1156
1157 my @list = ();
1158 foreach my $server (PVE::Tools::split_list($nameserver_list)) {
1159 PVE::JSONSchema::pve_verify_ip($server);
1160 push @list, $server;
1161 }
1162
1163 return join(' ', @list);
1164 }
1165
1166 sub verify_searchdomain_list {
1167 my ($searchdomain_list) = @_;
1168
1169 my @list = ();
1170 foreach my $server (PVE::Tools::split_list($searchdomain_list)) {
1171 # todo: should we add checks for valid dns domains?
1172 push @list, $server;
1173 }
1174
1175 return join(' ', @list);
1176 }
1177
1178 sub add_unused_volume {
1179 my ($config, $volid) = @_;
1180
1181 my $key;
1182 for (my $ind = $MAX_UNUSED_DISKS - 1; $ind >= 0; $ind--) {
1183 my $test = "unused$ind";
1184 if (my $vid = $config->{$test}) {
1185 return if $vid eq $volid; # do not add duplicates
1186 } else {
1187 $key = $test;
1188 }
1189 }
1190
1191 die "Too many unused volumes - please delete them first.\n" if !$key;
1192
1193 $config->{$key} = $volid;
1194
1195 return $key;
1196 }
1197
1198 sub update_pct_config {
1199 my ($vmid, $conf, $running, $param, $delete) = @_;
1200
1201 my @nohotplug;
1202
1203 my $new_disks = 0;
1204 my @deleted_volumes;
1205
1206 my $rootdir;
1207 if ($running) {
1208 my $pid = find_lxc_pid($vmid);
1209 $rootdir = "/proc/$pid/root";
1210 }
1211
1212 my $hotplug_error = sub {
1213 if ($running) {
1214 push @nohotplug, @_;
1215 return 1;
1216 } else {
1217 return 0;
1218 }
1219 };
1220
1221 if (defined($delete)) {
1222 foreach my $opt (@$delete) {
1223 if (!exists($conf->{$opt})) {
1224 warn "no such option: $opt\n";
1225 next;
1226 }
1227
1228 if ($opt eq 'hostname' || $opt eq 'memory' || $opt eq 'rootfs') {
1229 die "unable to delete required option '$opt'\n";
1230 } elsif ($opt eq 'swap') {
1231 delete $conf->{$opt};
1232 write_cgroup_value("memory", $vmid, "memory.memsw.limit_in_bytes", -1);
1233 } elsif ($opt eq 'description' || $opt eq 'onboot' || $opt eq 'startup') {
1234 delete $conf->{$opt};
1235 } elsif ($opt eq 'nameserver' || $opt eq 'searchdomain' ||
1236 $opt eq 'tty' || $opt eq 'console' || $opt eq 'cmode') {
1237 next if $hotplug_error->($opt);
1238 delete $conf->{$opt};
1239 } elsif ($opt =~ m/^net(\d)$/) {
1240 delete $conf->{$opt};
1241 next if !$running;
1242 my $netid = $1;
1243 PVE::Network::veth_delete("veth${vmid}i$netid");
1244 } elsif ($opt eq 'protection') {
1245 delete $conf->{$opt};
1246 } elsif ($opt =~ m/^unused(\d+)$/) {
1247 next if $hotplug_error->($opt);
1248 check_protection($conf, "can't remove CT $vmid drive '$opt'");
1249 push @deleted_volumes, $conf->{$opt};
1250 delete $conf->{$opt};
1251 } elsif ($opt =~ m/^mp(\d+)$/) {
1252 next if $hotplug_error->($opt);
1253 check_protection($conf, "can't remove CT $vmid drive '$opt'");
1254 my $mountpoint = parse_ct_mountpoint($conf->{$opt});
1255 if ($mountpoint->{type} eq 'volume') {
1256 add_unused_volume($conf, $mountpoint->{volume})
1257 }
1258 delete $conf->{$opt};
1259 } elsif ($opt eq 'unprivileged') {
1260 die "unable to delete read-only option: '$opt'\n";
1261 } else {
1262 die "implement me (delete: $opt)"
1263 }
1264 write_config($vmid, $conf) if $running;
1265 }
1266 }
1267
1268 # There's no separate swap size to configure, there's memory and "total"
1269 # memory (iow. memory+swap). This means we have to change them together.
1270 my $wanted_memory = PVE::Tools::extract_param($param, 'memory');
1271 my $wanted_swap = PVE::Tools::extract_param($param, 'swap');
1272 if (defined($wanted_memory) || defined($wanted_swap)) {
1273
1274 my $old_memory = ($conf->{memory} || 512);
1275 my $old_swap = ($conf->{swap} || 0);
1276
1277 $wanted_memory //= $old_memory;
1278 $wanted_swap //= $old_swap;
1279
1280 my $total = $wanted_memory + $wanted_swap;
1281 if ($running) {
1282 my $old_total = $old_memory + $old_swap;
1283 if ($total > $old_total) {
1284 write_cgroup_value("memory", $vmid, "memory.memsw.limit_in_bytes", int($total*1024*1024));
1285 write_cgroup_value("memory", $vmid, "memory.limit_in_bytes", int($wanted_memory*1024*1024));
1286 } else {
1287 write_cgroup_value("memory", $vmid, "memory.limit_in_bytes", int($wanted_memory*1024*1024));
1288 write_cgroup_value("memory", $vmid, "memory.memsw.limit_in_bytes", int($total*1024*1024));
1289 }
1290 }
1291 $conf->{memory} = $wanted_memory;
1292 $conf->{swap} = $wanted_swap;
1293
1294 write_config($vmid, $conf) if $running;
1295 }
1296
1297 foreach my $opt (keys %$param) {
1298 my $value = $param->{$opt};
1299 if ($opt eq 'hostname') {
1300 $conf->{$opt} = $value;
1301 } elsif ($opt eq 'onboot') {
1302 $conf->{$opt} = $value ? 1 : 0;
1303 } elsif ($opt eq 'startup') {
1304 $conf->{$opt} = $value;
1305 } elsif ($opt eq 'tty' || $opt eq 'console' || $opt eq 'cmode') {
1306 next if $hotplug_error->($opt);
1307 $conf->{$opt} = $value;
1308 } elsif ($opt eq 'nameserver') {
1309 next if $hotplug_error->($opt);
1310 my $list = verify_nameserver_list($value);
1311 $conf->{$opt} = $list;
1312 } elsif ($opt eq 'searchdomain') {
1313 next if $hotplug_error->($opt);
1314 my $list = verify_searchdomain_list($value);
1315 $conf->{$opt} = $list;
1316 } elsif ($opt eq 'cpulimit') {
1317 next if $hotplug_error->($opt); # FIXME: hotplug
1318 $conf->{$opt} = $value;
1319 } elsif ($opt eq 'cpuunits') {
1320 $conf->{$opt} = $value;
1321 write_cgroup_value("cpu", $vmid, "cpu.shares", $value);
1322 } elsif ($opt eq 'description') {
1323 $conf->{$opt} = PVE::Tools::encode_text($value);
1324 } elsif ($opt =~ m/^net(\d+)$/) {
1325 my $netid = $1;
1326 my $net = parse_lxc_network($value);
1327 if (!$running) {
1328 $conf->{$opt} = print_lxc_network($net);
1329 } else {
1330 update_net($vmid, $conf, $opt, $net, $netid, $rootdir);
1331 }
1332 } elsif ($opt eq 'protection') {
1333 $conf->{$opt} = $value ? 1 : 0;
1334 } elsif ($opt =~ m/^mp(\d+)$/) {
1335 next if $hotplug_error->($opt);
1336 check_protection($conf, "can't update CT $vmid drive '$opt'");
1337 $conf->{$opt} = $value;
1338 $new_disks = 1;
1339 } elsif ($opt eq 'rootfs') {
1340 next if $hotplug_error->($opt);
1341 check_protection($conf, "can't update CT $vmid drive '$opt'");
1342 $conf->{$opt} = $value;
1343 } elsif ($opt eq 'unprivileged') {
1344 die "unable to modify read-only option: '$opt'\n";
1345 } else {
1346 die "implement me: $opt";
1347 }
1348 write_config($vmid, $conf) if $running;
1349 }
1350
1351 if (@deleted_volumes) {
1352 my $storage_cfg = PVE::Storage::config();
1353 foreach my $volume (@deleted_volumes) {
1354 delete_mountpoint_volume($storage_cfg, $vmid, $volume);
1355 }
1356 }
1357
1358 if ($new_disks) {
1359 my $storage_cfg = PVE::Storage::config();
1360 create_disks($storage_cfg, $vmid, $conf, $conf);
1361 }
1362
1363 # This should be the last thing we do here
1364 if ($running && scalar(@nohotplug)) {
1365 die "unable to modify " . join(',', @nohotplug) . " while container is running\n";
1366 }
1367 }
1368
1369 sub has_dev_console {
1370 my ($conf) = @_;
1371
1372 return !(defined($conf->{console}) && !$conf->{console});
1373 }
1374
1375 sub get_tty_count {
1376 my ($conf) = @_;
1377
1378 return $conf->{tty} // $confdesc->{tty}->{default};
1379 }
1380
1381 sub get_cmode {
1382 my ($conf) = @_;
1383
1384 return $conf->{cmode} // $confdesc->{cmode}->{default};
1385 }
1386
1387 sub get_console_command {
1388 my ($vmid, $conf) = @_;
1389
1390 my $cmode = get_cmode($conf);
1391
1392 if ($cmode eq 'console') {
1393 return ['lxc-console', '-n', $vmid, '-t', 0];
1394 } elsif ($cmode eq 'tty') {
1395 return ['lxc-console', '-n', $vmid];
1396 } elsif ($cmode eq 'shell') {
1397 return ['lxc-attach', '--clear-env', '-n', $vmid];
1398 } else {
1399 die "internal error";
1400 }
1401 }
1402
1403 sub get_primary_ips {
1404 my ($conf) = @_;
1405
1406 # return data from net0
1407
1408 return undef if !defined($conf->{net0});
1409 my $net = parse_lxc_network($conf->{net0});
1410
1411 my $ipv4 = $net->{ip};
1412 if ($ipv4) {
1413 if ($ipv4 =~ /^(dhcp|manual)$/) {
1414 $ipv4 = undef
1415 } else {
1416 $ipv4 =~ s!/\d+$!!;
1417 }
1418 }
1419 my $ipv6 = $net->{ip6};
1420 if ($ipv6) {
1421 if ($ipv6 =~ /^(auto|dhcp|manual)$/) {
1422 $ipv6 = undef;
1423 } else {
1424 $ipv6 =~ s!/\d+$!!;
1425 }
1426 }
1427
1428 return ($ipv4, $ipv6);
1429 }
1430
1431 sub delete_mountpoint_volume {
1432 my ($storage_cfg, $vmid, $volume) = @_;
1433
1434 return if classify_mountpoint($volume) ne 'volume';
1435
1436 my ($vtype, $name, $owner) = PVE::Storage::parse_volname($storage_cfg, $volume);
1437 PVE::Storage::vdisk_free($storage_cfg, $volume) if $vmid == $owner;
1438 }
1439
1440 sub destroy_lxc_container {
1441 my ($storage_cfg, $vmid, $conf) = @_;
1442
1443 foreach_mountpoint($conf, sub {
1444 my ($ms, $mountpoint) = @_;
1445 delete_mountpoint_volume($storage_cfg, $vmid, $mountpoint->{volume});
1446 });
1447
1448 rmdir "/var/lib/lxc/$vmid/rootfs";
1449 unlink "/var/lib/lxc/$vmid/config";
1450 rmdir "/var/lib/lxc/$vmid";
1451 destroy_config($vmid);
1452
1453 #my $cmd = ['lxc-destroy', '-n', $vmid ];
1454 #PVE::Tools::run_command($cmd);
1455 }
1456
1457 sub vm_stop_cleanup {
1458 my ($storage_cfg, $vmid, $conf, $keepActive) = @_;
1459
1460 eval {
1461 if (!$keepActive) {
1462
1463 my $vollist = get_vm_volumes($conf);
1464 PVE::Storage::deactivate_volumes($storage_cfg, $vollist);
1465 }
1466 };
1467 warn $@ if $@; # avoid errors - just warn
1468 }
1469
1470 my $safe_num_ne = sub {
1471 my ($a, $b) = @_;
1472
1473 return 0 if !defined($a) && !defined($b);
1474 return 1 if !defined($a);
1475 return 1 if !defined($b);
1476
1477 return $a != $b;
1478 };
1479
1480 my $safe_string_ne = sub {
1481 my ($a, $b) = @_;
1482
1483 return 0 if !defined($a) && !defined($b);
1484 return 1 if !defined($a);
1485 return 1 if !defined($b);
1486
1487 return $a ne $b;
1488 };
1489
1490 sub update_net {
1491 my ($vmid, $conf, $opt, $newnet, $netid, $rootdir) = @_;
1492
1493 if ($newnet->{type} ne 'veth') {
1494 # for when there are physical interfaces
1495 die "cannot update interface of type $newnet->{type}";
1496 }
1497
1498 my $veth = "veth${vmid}i${netid}";
1499 my $eth = $newnet->{name};
1500
1501 if (my $oldnetcfg = $conf->{$opt}) {
1502 my $oldnet = parse_lxc_network($oldnetcfg);
1503
1504 if (&$safe_string_ne($oldnet->{hwaddr}, $newnet->{hwaddr}) ||
1505 &$safe_string_ne($oldnet->{name}, $newnet->{name})) {
1506
1507 PVE::Network::veth_delete($veth);
1508 delete $conf->{$opt};
1509 write_config($vmid, $conf);
1510
1511 hotplug_net($vmid, $conf, $opt, $newnet, $netid);
1512
1513 } elsif (&$safe_string_ne($oldnet->{bridge}, $newnet->{bridge}) ||
1514 &$safe_num_ne($oldnet->{tag}, $newnet->{tag}) ||
1515 &$safe_num_ne($oldnet->{firewall}, $newnet->{firewall})) {
1516
1517 if ($oldnet->{bridge}) {
1518 PVE::Network::tap_unplug($veth);
1519 foreach (qw(bridge tag firewall)) {
1520 delete $oldnet->{$_};
1521 }
1522 $conf->{$opt} = print_lxc_network($oldnet);
1523 write_config($vmid, $conf);
1524 }
1525
1526 PVE::Network::tap_plug($veth, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks});
1527 foreach (qw(bridge tag firewall)) {
1528 $oldnet->{$_} = $newnet->{$_} if $newnet->{$_};
1529 }
1530 $conf->{$opt} = print_lxc_network($oldnet);
1531 write_config($vmid, $conf);
1532 }
1533 } else {
1534 hotplug_net($vmid, $conf, $opt, $newnet, $netid);
1535 }
1536
1537 update_ipconfig($vmid, $conf, $opt, $eth, $newnet, $rootdir);
1538 }
1539
1540 sub hotplug_net {
1541 my ($vmid, $conf, $opt, $newnet, $netid) = @_;
1542
1543 my $veth = "veth${vmid}i${netid}";
1544 my $vethpeer = $veth . "p";
1545 my $eth = $newnet->{name};
1546
1547 PVE::Network::veth_create($veth, $vethpeer, $newnet->{bridge}, $newnet->{hwaddr});
1548 PVE::Network::tap_plug($veth, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks});
1549
1550 # attach peer in container
1551 my $cmd = ['lxc-device', '-n', $vmid, 'add', $vethpeer, "$eth" ];
1552 PVE::Tools::run_command($cmd);
1553
1554 # link up peer in container
1555 $cmd = ['lxc-attach', '-n', $vmid, '-s', 'NETWORK', '--', '/sbin/ip', 'link', 'set', $eth ,'up' ];
1556 PVE::Tools::run_command($cmd);
1557
1558 my $done = { type => 'veth' };
1559 foreach (qw(bridge tag firewall hwaddr name)) {
1560 $done->{$_} = $newnet->{$_} if $newnet->{$_};
1561 }
1562 $conf->{$opt} = print_lxc_network($done);
1563
1564 write_config($vmid, $conf);
1565 }
1566
1567 sub update_ipconfig {
1568 my ($vmid, $conf, $opt, $eth, $newnet, $rootdir) = @_;
1569
1570 my $lxc_setup = PVE::LXC::Setup->new($conf, $rootdir);
1571
1572 my $optdata = parse_lxc_network($conf->{$opt});
1573 my $deleted = [];
1574 my $added = [];
1575 my $nscmd = sub {
1576 my $cmdargs = shift;
1577 PVE::Tools::run_command(['lxc-attach', '-n', $vmid, '-s', 'NETWORK', '--', @_], %$cmdargs);
1578 };
1579 my $ipcmd = sub { &$nscmd({}, '/sbin/ip', @_) };
1580
1581 my $change_ip_config = sub {
1582 my ($ipversion) = @_;
1583
1584 my $family_opt = "-$ipversion";
1585 my $suffix = $ipversion == 4 ? '' : $ipversion;
1586 my $gw= "gw$suffix";
1587 my $ip= "ip$suffix";
1588
1589 my $newip = $newnet->{$ip};
1590 my $newgw = $newnet->{$gw};
1591 my $oldip = $optdata->{$ip};
1592
1593 my $change_ip = &$safe_string_ne($oldip, $newip);
1594 my $change_gw = &$safe_string_ne($optdata->{$gw}, $newgw);
1595
1596 return if !$change_ip && !$change_gw;
1597
1598 # step 1: add new IP, if this fails we cancel
1599 my $is_real_ip = ($newip && $newip !~ /^(?:auto|dhcp|manual)$/);
1600 if ($change_ip && $is_real_ip) {
1601 eval { &$ipcmd($family_opt, 'addr', 'add', $newip, 'dev', $eth); };
1602 if (my $err = $@) {
1603 warn $err;
1604 return;
1605 }
1606 }
1607
1608 # step 2: replace gateway
1609 # If this fails we delete the added IP and cancel.
1610 # If it succeeds we save the config and delete the old IP, ignoring
1611 # errors. The config is then saved.
1612 # Note: 'ip route replace' can add
1613 if ($change_gw) {
1614 if ($newgw) {
1615 eval {
1616 if ($is_real_ip && !PVE::Network::is_ip_in_cidr($newgw, $newip, $ipversion)) {
1617 &$ipcmd($family_opt, 'route', 'add', $newgw, 'dev', $eth);
1618 }
1619 &$ipcmd($family_opt, 'route', 'replace', 'default', 'via', $newgw);
1620 };
1621 if (my $err = $@) {
1622 warn $err;
1623 # the route was not replaced, the old IP is still available
1624 # rollback (delete new IP) and cancel
1625 if ($change_ip) {
1626 eval { &$ipcmd($family_opt, 'addr', 'del', $newip, 'dev', $eth); };
1627 warn $@ if $@; # no need to die here
1628 }
1629 return;
1630 }
1631 } else {
1632 eval { &$ipcmd($family_opt, 'route', 'del', 'default'); };
1633 # if the route was not deleted, the guest might have deleted it manually
1634 # warn and continue
1635 warn $@ if $@;
1636 }
1637 }
1638
1639 # from this point on we save the configuration
1640 # step 3: delete old IP ignoring errors
1641 if ($change_ip && $oldip && $oldip !~ /^(?:auto|dhcp)$/) {
1642 # We need to enable promote_secondaries, otherwise our newly added
1643 # address will be removed along with the old one.
1644 my $promote = 0;
1645 eval {
1646 if ($ipversion == 4) {
1647 &$nscmd({ outfunc => sub { $promote = int(shift) } },
1648 'cat', "/proc/sys/net/ipv4/conf/$eth/promote_secondaries");
1649 &$nscmd({}, 'sysctl', "net.ipv4.conf.$eth.promote_secondaries=1");
1650 }
1651 &$ipcmd($family_opt, 'addr', 'del', $oldip, 'dev', $eth);
1652 };
1653 warn $@ if $@; # no need to die here
1654
1655 if ($ipversion == 4) {
1656 &$nscmd({}, 'sysctl', "net.ipv4.conf.$eth.promote_secondaries=$promote");
1657 }
1658 }
1659
1660 foreach my $property ($ip, $gw) {
1661 if ($newnet->{$property}) {
1662 $optdata->{$property} = $newnet->{$property};
1663 } else {
1664 delete $optdata->{$property};
1665 }
1666 }
1667 $conf->{$opt} = print_lxc_network($optdata);
1668 write_config($vmid, $conf);
1669 $lxc_setup->setup_network($conf);
1670 };
1671
1672 &$change_ip_config(4);
1673 &$change_ip_config(6);
1674
1675 }
1676
1677 # Internal snapshots
1678
1679 # NOTE: Snapshot create/delete involves several non-atomic
1680 # actions, and can take a long time.
1681 # So we try to avoid locking the file and use the 'lock' variable
1682 # inside the config file instead.
1683
1684 my $snapshot_copy_config = sub {
1685 my ($source, $dest) = @_;
1686
1687 foreach my $k (keys %$source) {
1688 next if $k eq 'snapshots';
1689 next if $k eq 'snapstate';
1690 next if $k eq 'snaptime';
1691 next if $k eq 'vmstate';
1692 next if $k eq 'lock';
1693 next if $k eq 'digest';
1694 next if $k eq 'description';
1695
1696 $dest->{$k} = $source->{$k};
1697 }
1698 };
1699
1700 my $snapshot_prepare = sub {
1701 my ($vmid, $snapname, $comment) = @_;
1702
1703 my $snap;
1704
1705 my $updatefn = sub {
1706
1707 my $conf = load_config($vmid);
1708
1709 die "you can't take a snapshot if it's a template\n"
1710 if is_template($conf);
1711
1712 check_lock($conf);
1713
1714 $conf->{lock} = 'snapshot';
1715
1716 die "snapshot name '$snapname' already used\n"
1717 if defined($conf->{snapshots}->{$snapname});
1718
1719 my $storecfg = PVE::Storage::config();
1720 my $feature = $snapname eq 'vzdump' ? 'vzdump' : 'snapshot';
1721 die "snapshot feature is not available\n" if !has_feature($feature, $conf, $storecfg);
1722
1723 $snap = $conf->{snapshots}->{$snapname} = {};
1724
1725 &$snapshot_copy_config($conf, $snap);
1726
1727 $snap->{'snapstate'} = "prepare";
1728 $snap->{'snaptime'} = time();
1729 $snap->{'description'} = $comment if $comment;
1730 $conf->{snapshots}->{$snapname} = $snap;
1731
1732 write_config($vmid, $conf);
1733 };
1734
1735 lock_container($vmid, 10, $updatefn);
1736
1737 return $snap;
1738 };
1739
1740 my $snapshot_commit = sub {
1741 my ($vmid, $snapname) = @_;
1742
1743 my $updatefn = sub {
1744
1745 my $conf = load_config($vmid);
1746
1747 die "missing snapshot lock\n"
1748 if !($conf->{lock} && $conf->{lock} eq 'snapshot');
1749
1750 die "snapshot '$snapname' does not exist\n"
1751 if !defined($conf->{snapshots}->{$snapname});
1752
1753 die "wrong snapshot state\n"
1754 if !($conf->{snapshots}->{$snapname}->{'snapstate'} &&
1755 $conf->{snapshots}->{$snapname}->{'snapstate'} eq "prepare");
1756
1757 delete $conf->{snapshots}->{$snapname}->{'snapstate'};
1758 delete $conf->{lock};
1759 $conf->{parent} = $snapname;
1760
1761 write_config($vmid, $conf);
1762 };
1763
1764 lock_container($vmid, 10 ,$updatefn);
1765 };
1766
1767 sub has_feature {
1768 my ($feature, $conf, $storecfg, $snapname) = @_;
1769
1770 my $err;
1771 my $vzdump = $feature eq 'vzdump';
1772 $feature = 'snapshot' if $vzdump;
1773
1774 foreach_mountpoint($conf, sub {
1775 my ($ms, $mountpoint) = @_;
1776
1777 return if $err; # skip further test
1778 return if $vzdump && $ms ne 'rootfs' && !$mountpoint->{backup};
1779
1780 $err = 1 if !PVE::Storage::volume_has_feature($storecfg, $feature, $mountpoint->{volume}, $snapname);
1781
1782 # TODO: implement support for mountpoints
1783 die "unable to handle mountpoint '$ms' - feature not implemented\n"
1784 if $ms ne 'rootfs';
1785 });
1786
1787 return $err ? 0 : 1;
1788 }
1789
1790 my $enter_namespace = sub {
1791 my ($vmid, $pid, $which, $type) = @_;
1792 sysopen my $fd, "/proc/$pid/ns/$which", O_RDONLY
1793 or die "failed to open $which namespace of container $vmid: $!\n";
1794 PVE::Tools::setns(fileno($fd), $type)
1795 or die "failed to enter $which namespace of container $vmid: $!\n";
1796 close $fd;
1797 };
1798
1799 my $do_syncfs = sub {
1800 my ($vmid, $pid, $socket) = @_;
1801
1802 &$enter_namespace($vmid, $pid, 'mnt', PVE::Tools::CLONE_NEWNS);
1803
1804 # Tell the parent process to start reading our /proc/mounts
1805 print {$socket} "go\n";
1806 $socket->flush();
1807
1808 # Receive /proc/self/mounts
1809 my $mountdata = do { local $/ = undef; <$socket> };
1810 close $socket;
1811
1812 # Now sync all mountpoints...
1813 my $mounts = PVE::ProcFSTools::parse_mounts($mountdata);
1814 foreach my $mp (@$mounts) {
1815 my ($what, $dir, $fs) = @$mp;
1816 next if $fs eq 'fuse.lxcfs';
1817 eval { PVE::Tools::sync_mountpoint($dir); };
1818 warn $@ if $@;
1819 }
1820 };
1821
1822 sub sync_container_namespace {
1823 my ($vmid) = @_;
1824 my $pid = find_lxc_pid($vmid);
1825
1826 # SOCK_DGRAM is nicer for barriers but cannot be slurped
1827 socketpair my $pfd, my $cfd, AF_UNIX, SOCK_STREAM, PF_UNSPEC
1828 or die "failed to create socketpair: $!\n";
1829
1830 my $child = fork();
1831 die "fork failed: $!\n" if !defined($child);
1832
1833 if (!$child) {
1834 eval {
1835 close $pfd;
1836 &$do_syncfs($vmid, $pid, $cfd);
1837 };
1838 if (my $err = $@) {
1839 warn $err;
1840 POSIX::_exit(1);
1841 }
1842 POSIX::_exit(0);
1843 }
1844 close $cfd;
1845 my $go = <$pfd>;
1846 die "failed to enter container namespace\n" if $go ne "go\n";
1847
1848 open my $mounts, '<', "/proc/$child/mounts"
1849 or die "failed to open container's /proc/mounts: $!\n";
1850 my $mountdata = do { local $/ = undef; <$mounts> };
1851 close $mounts;
1852 print {$pfd} $mountdata;
1853 close $pfd;
1854
1855 while (waitpid($child, 0) != $child) {}
1856 die "failed to sync container namespace\n" if $? != 0;
1857 }
1858
1859 sub snapshot_create {
1860 my ($vmid, $snapname, $comment) = @_;
1861
1862 my $snap = &$snapshot_prepare($vmid, $snapname, $comment);
1863
1864 my $conf = load_config($vmid);
1865
1866 my $running = check_running($vmid);
1867
1868 my $unfreeze = 0;
1869
1870 my $drivehash = {};
1871
1872 eval {
1873 if ($running) {
1874 $unfreeze = 1;
1875 PVE::Tools::run_command(['/usr/bin/lxc-freeze', '-n', $vmid]);
1876 sync_container_namespace($vmid);
1877 };
1878
1879 my $storecfg = PVE::Storage::config();
1880 my $rootinfo = parse_ct_rootfs($conf->{rootfs});
1881 my $volid = $rootinfo->{volume};
1882
1883 PVE::Storage::volume_snapshot($storecfg, $volid, $snapname);
1884 $drivehash->{rootfs} = 1;
1885 };
1886 my $err = $@;
1887
1888 if ($unfreeze) {
1889 eval { PVE::Tools::run_command(['/usr/bin/lxc-unfreeze', '-n', $vmid]); };
1890 warn $@ if $@;
1891 }
1892
1893 if ($err) {
1894 eval { snapshot_delete($vmid, $snapname, 1, $drivehash); };
1895 warn "$@\n" if $@;
1896 die "$err\n";
1897 }
1898
1899 &$snapshot_commit($vmid, $snapname);
1900 }
1901
1902 # Note: $drivehash is only set when called from snapshot_create.
1903 sub snapshot_delete {
1904 my ($vmid, $snapname, $force, $drivehash) = @_;
1905
1906 my $snap;
1907
1908 my $conf;
1909
1910 my $updatefn = sub {
1911
1912 $conf = load_config($vmid);
1913
1914 die "you can't delete a snapshot if vm is a template\n"
1915 if is_template($conf);
1916
1917 $snap = $conf->{snapshots}->{$snapname};
1918
1919 if (!$drivehash) {
1920 check_lock($conf);
1921 }
1922
1923 die "snapshot '$snapname' does not exist\n" if !defined($snap);
1924
1925 $snap->{snapstate} = 'delete';
1926
1927 write_config($vmid, $conf);
1928 };
1929
1930 lock_container($vmid, 10, $updatefn);
1931
1932 my $storecfg = PVE::Storage::config();
1933
1934 my $unlink_parent = sub {
1935
1936 my ($confref, $new_parent) = @_;
1937
1938 if ($confref->{parent} && $confref->{parent} eq $snapname) {
1939 if ($new_parent) {
1940 $confref->{parent} = $new_parent;
1941 } else {
1942 delete $confref->{parent};
1943 }
1944 }
1945 };
1946
1947 my $del_snap = sub {
1948
1949 $conf = load_config($vmid);
1950
1951 if ($drivehash) {
1952 delete $conf->{lock};
1953 } else {
1954 check_lock($conf);
1955 }
1956
1957 my $parent = $conf->{snapshots}->{$snapname}->{parent};
1958 foreach my $snapkey (keys %{$conf->{snapshots}}) {
1959 &$unlink_parent($conf->{snapshots}->{$snapkey}, $parent);
1960 }
1961
1962 &$unlink_parent($conf, $parent);
1963
1964 delete $conf->{snapshots}->{$snapname};
1965
1966 write_config($vmid, $conf);
1967 };
1968
1969 my $rootfs = $conf->{snapshots}->{$snapname}->{rootfs};
1970 my $rootinfo = parse_ct_rootfs($rootfs);
1971 my $volid = $rootinfo->{volume};
1972
1973 eval {
1974 PVE::Storage::volume_snapshot_delete($storecfg, $volid, $snapname);
1975 };
1976 my $err = $@;
1977
1978 if(!$err || ($err && $force)) {
1979 lock_container($vmid, 10, $del_snap);
1980 if ($err) {
1981 die "Can't delete snapshot: $vmid $snapname $err\n";
1982 }
1983 }
1984 }
1985
1986 sub snapshot_rollback {
1987 my ($vmid, $snapname) = @_;
1988
1989 my $storecfg = PVE::Storage::config();
1990
1991 my $conf = load_config($vmid);
1992
1993 die "you can't rollback if vm is a template\n" if is_template($conf);
1994
1995 my $snap = $conf->{snapshots}->{$snapname};
1996
1997 die "snapshot '$snapname' does not exist\n" if !defined($snap);
1998
1999 my $rootfs = $snap->{rootfs};
2000 my $rootinfo = parse_ct_rootfs($rootfs);
2001 my $volid = $rootinfo->{volume};
2002
2003 PVE::Storage::volume_rollback_is_possible($storecfg, $volid, $snapname);
2004
2005 my $updatefn = sub {
2006
2007 die "unable to rollback to incomplete snapshot (snapstate = $snap->{snapstate})\n"
2008 if $snap->{snapstate};
2009
2010 check_lock($conf);
2011
2012 system("lxc-stop -n $vmid --kill") if check_running($vmid);
2013
2014 die "unable to rollback vm $vmid: vm is running\n"
2015 if check_running($vmid);
2016
2017 $conf->{lock} = 'rollback';
2018
2019 my $forcemachine;
2020
2021 # copy snapshot config to current config
2022
2023 my $tmp_conf = $conf;
2024 &$snapshot_copy_config($tmp_conf->{snapshots}->{$snapname}, $conf);
2025 $conf->{snapshots} = $tmp_conf->{snapshots};
2026 delete $conf->{snaptime};
2027 delete $conf->{snapname};
2028 $conf->{parent} = $snapname;
2029
2030 write_config($vmid, $conf);
2031 };
2032
2033 my $unlockfn = sub {
2034 delete $conf->{lock};
2035 write_config($vmid, $conf);
2036 };
2037
2038 lock_container($vmid, 10, $updatefn);
2039
2040 PVE::Storage::volume_snapshot_rollback($storecfg, $volid, $snapname);
2041
2042 lock_container($vmid, 5, $unlockfn);
2043 }
2044
2045 sub template_create {
2046 my ($vmid, $conf) = @_;
2047
2048 my $storecfg = PVE::Storage::config();
2049
2050 my $rootinfo = parse_ct_rootfs($conf->{rootfs});
2051 my $volid = $rootinfo->{volume};
2052
2053 die "Template feature is not available for '$volid'\n"
2054 if !PVE::Storage::volume_has_feature($storecfg, 'template', $volid);
2055
2056 PVE::Storage::activate_volumes($storecfg, [$volid]);
2057
2058 my $template_volid = PVE::Storage::vdisk_create_base($storecfg, $volid);
2059 $rootinfo->{volume} = $template_volid;
2060 $conf->{rootfs} = print_ct_mountpoint($rootinfo, 1);
2061
2062 write_config($vmid, $conf);
2063 }
2064
2065 sub is_template {
2066 my ($conf) = @_;
2067
2068 return 1 if defined $conf->{template} && $conf->{template} == 1;
2069 }
2070
2071 sub mountpoint_names {
2072 my ($reverse) = @_;
2073
2074 my @names = ('rootfs');
2075
2076 for (my $i = 0; $i < $MAX_MOUNT_POINTS; $i++) {
2077 push @names, "mp$i";
2078 }
2079
2080 return $reverse ? reverse @names : @names;
2081 }
2082
2083
2084 sub foreach_mountpoint_full {
2085 my ($conf, $reverse, $func) = @_;
2086
2087 foreach my $key (mountpoint_names($reverse)) {
2088 my $value = $conf->{$key};
2089 next if !defined($value);
2090 my $mountpoint = $key eq 'rootfs' ? parse_ct_rootfs($value, 1) : parse_ct_mountpoint($value, 1);
2091 next if !defined($mountpoint);
2092
2093 &$func($key, $mountpoint);
2094 }
2095 }
2096
2097 sub foreach_mountpoint {
2098 my ($conf, $func) = @_;
2099
2100 foreach_mountpoint_full($conf, 0, $func);
2101 }
2102
2103 sub foreach_mountpoint_reverse {
2104 my ($conf, $func) = @_;
2105
2106 foreach_mountpoint_full($conf, 1, $func);
2107 }
2108
2109 sub check_ct_modify_config_perm {
2110 my ($rpcenv, $authuser, $vmid, $pool, $key_list) = @_;
2111
2112 return 1 if $authuser ne 'root@pam';
2113
2114 foreach my $opt (@$key_list) {
2115
2116 if ($opt eq 'cpus' || $opt eq 'cpuunits' || $opt eq 'cpulimit') {
2117 $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.CPU']);
2118 } elsif ($opt eq 'rootfs' || $opt =~ /^mp\d+$/) {
2119 $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Disk']);
2120 } elsif ($opt eq 'memory' || $opt eq 'swap') {
2121 $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Memory']);
2122 } elsif ($opt =~ m/^net\d+$/ || $opt eq 'nameserver' ||
2123 $opt eq 'searchdomain' || $opt eq 'hostname') {
2124 $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Network']);
2125 } else {
2126 $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Options']);
2127 }
2128 }
2129
2130 return 1;
2131 }
2132
2133 sub umount_all {
2134 my ($vmid, $storage_cfg, $conf, $noerr) = @_;
2135
2136 my $rootdir = "/var/lib/lxc/$vmid/rootfs";
2137 my $volid_list = get_vm_volumes($conf);
2138
2139 foreach_mountpoint_reverse($conf, sub {
2140 my ($ms, $mountpoint) = @_;
2141
2142 my $volid = $mountpoint->{volume};
2143 my $mount = $mountpoint->{mp};
2144
2145 return if !$volid || !$mount;
2146
2147 my $mount_path = "$rootdir/$mount";
2148 $mount_path =~ s!/+!/!g;
2149
2150 return if !PVE::ProcFSTools::is_mounted($mount_path);
2151
2152 eval {
2153 PVE::Tools::run_command(['umount', '-d', $mount_path]);
2154 };
2155 if (my $err = $@) {
2156 if ($noerr) {
2157 warn $err;
2158 } else {
2159 die $err;
2160 }
2161 }
2162 });
2163 }
2164
2165 sub mount_all {
2166 my ($vmid, $storage_cfg, $conf) = @_;
2167
2168 my $rootdir = "/var/lib/lxc/$vmid/rootfs";
2169 File::Path::make_path($rootdir);
2170
2171 my $volid_list = get_vm_volumes($conf);
2172 PVE::Storage::activate_volumes($storage_cfg, $volid_list);
2173
2174 eval {
2175 foreach_mountpoint($conf, sub {
2176 my ($ms, $mountpoint) = @_;
2177
2178 mountpoint_mount($mountpoint, $rootdir, $storage_cfg);
2179 });
2180 };
2181 if (my $err = $@) {
2182 warn "mounting container failed\n";
2183 umount_all($vmid, $storage_cfg, $conf, 1);
2184 die $err;
2185 }
2186
2187 return $rootdir;
2188 }
2189
2190
2191 sub mountpoint_mount_path {
2192 my ($mountpoint, $storage_cfg, $snapname) = @_;
2193
2194 return mountpoint_mount($mountpoint, undef, $storage_cfg, $snapname);
2195 }
2196
2197 my $check_mount_path = sub {
2198 my ($path) = @_;
2199 $path = File::Spec->canonpath($path);
2200 my $real = Cwd::realpath($path);
2201 if ($real ne $path) {
2202 die "mount path modified by symlink: $path != $real";
2203 }
2204 };
2205
2206 sub query_loopdev {
2207 my ($path) = @_;
2208 my $found;
2209 my $parser = sub {
2210 my $line = shift;
2211 if ($line =~ m@^(/dev/loop\d+):@) {
2212 $found = $1;
2213 }
2214 };
2215 my $cmd = ['losetup', '--associated', $path];
2216 PVE::Tools::run_command($cmd, outfunc => $parser);
2217 return $found;
2218 }
2219
2220 # Run a function with a file attached to a loop device.
2221 # The loop device is always detached afterwards (or set to autoclear).
2222 # Returns the loop device.
2223 sub run_with_loopdev {
2224 my ($func, $file) = @_;
2225 my $device;
2226 my $parser = sub {
2227 my $line = shift;
2228 if ($line =~ m@^(/dev/loop\d+)$@) {
2229 $device = $1;
2230 }
2231 };
2232 PVE::Tools::run_command(['losetup', '--show', '-f', $file], outfunc => $parser);
2233 die "failed to setup loop device for $file\n" if !$device;
2234 eval { &$func($device); };
2235 my $err = $@;
2236 PVE::Tools::run_command(['losetup', '-d', $device]);
2237 die $err if $err;
2238 return $device;
2239 }
2240
2241 # use $rootdir = undef to just return the corresponding mount path
2242 sub mountpoint_mount {
2243 my ($mountpoint, $rootdir, $storage_cfg, $snapname) = @_;
2244
2245 my $volid = $mountpoint->{volume};
2246 my $mount = $mountpoint->{mp};
2247 my $type = $mountpoint->{type};
2248 my $quota = !$snapname && !$mountpoint->{ro} && $mountpoint->{quota};
2249 my $mounted_dev;
2250
2251 return if !$volid || !$mount;
2252
2253 my $mount_path;
2254
2255 if (defined($rootdir)) {
2256 $rootdir =~ s!/+$!!;
2257 $mount_path = "$rootdir/$mount";
2258 $mount_path =~ s!/+!/!g;
2259 &$check_mount_path($mount_path);
2260 File::Path::mkpath($mount_path);
2261 }
2262
2263 my ($storage, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2264
2265 die "unknown snapshot path for '$volid'" if !$storage && defined($snapname);
2266
2267 my $optstring = '';
2268 if (defined($mountpoint->{acl})) {
2269 $optstring .= ($mountpoint->{acl} ? 'acl' : 'noacl');
2270 }
2271 if ($mountpoint->{ro}) {
2272 $optstring .= ',' if $optstring;
2273 $optstring .= 'ro';
2274 }
2275
2276 my @extra_opts = ('-o', $optstring);
2277
2278 if ($storage) {
2279
2280 my $scfg = PVE::Storage::storage_config($storage_cfg, $storage);
2281 my $path = PVE::Storage::path($storage_cfg, $volid, $snapname);
2282
2283 my ($vtype, undef, undef, undef, undef, $isBase, $format) =
2284 PVE::Storage::parse_volname($storage_cfg, $volid);
2285
2286 $format = 'iso' if $vtype eq 'iso'; # allow to handle iso files
2287
2288 if ($format eq 'subvol') {
2289 if ($mount_path) {
2290 if ($snapname) {
2291 if ($scfg->{type} eq 'zfspool') {
2292 my $path_arg = $path;
2293 $path_arg =~ s!^/+!!;
2294 PVE::Tools::run_command(['mount', '-o', 'ro', @extra_opts, '-t', 'zfs', $path_arg, $mount_path]);
2295 } else {
2296 die "cannot mount subvol snapshots for storage type '$scfg->{type}'\n";
2297 }
2298 } else {
2299 if ($mountpoint->{ro}) {
2300 die "read-only bind mounts not supported\n";
2301 }
2302 PVE::Tools::run_command(['mount', '-o', 'bind', @extra_opts, $path, $mount_path]);
2303 warn "cannot enable quota control for bind mounted subvolumes\n" if $quota;
2304 }
2305 }
2306 return wantarray ? ($path, 0, $mounted_dev) : $path;
2307 } elsif ($format eq 'raw' || $format eq 'iso') {
2308 my $domount = sub {
2309 my ($path) = @_;
2310 if ($mount_path) {
2311 if ($format eq 'iso') {
2312 PVE::Tools::run_command(['mount', '-o', 'ro', @extra_opts, $path, $mount_path]);
2313 } elsif ($isBase || defined($snapname)) {
2314 PVE::Tools::run_command(['mount', '-o', 'ro,noload', @extra_opts, $path, $mount_path]);
2315 } else {
2316 if ($quota) {
2317 push @extra_opts, '-o', 'usrjquota=aquota.user,grpjquota=aquota.group,jqfmt=vfsv0';
2318 }
2319 PVE::Tools::run_command(['mount', @extra_opts, $path, $mount_path]);
2320 }
2321 }
2322 };
2323 my $use_loopdev = 0;
2324 if ($scfg->{path}) {
2325 $mounted_dev = run_with_loopdev($domount, $path);
2326 $use_loopdev = 1;
2327 } elsif ($scfg->{type} eq 'drbd' || $scfg->{type} eq 'lvm' ||
2328 $scfg->{type} eq 'rbd' || $scfg->{type} eq 'lvmthin') {
2329 $mounted_dev = $path;
2330 &$domount($path);
2331 } else {
2332 die "unsupported storage type '$scfg->{type}'\n";
2333 }
2334 return wantarray ? ($path, $use_loopdev, $mounted_dev) : $path;
2335 } else {
2336 die "unsupported image format '$format'\n";
2337 }
2338 } elsif ($type eq 'device') {
2339 PVE::Tools::run_command(['mount', @extra_opts, $volid, $mount_path]) if $mount_path;
2340 return wantarray ? ($volid, 0, $volid) : $volid;
2341 } elsif ($type eq 'bind') {
2342 if ($mountpoint->{ro}) {
2343 die "read-only bind mounts not supported\n";
2344 # Theoretically we'd have to execute both:
2345 # mount -o bind $a $b
2346 # mount -o bind,remount,ro $a $b
2347 }
2348 die "directory '$volid' does not exist\n" if ! -d $volid;
2349 &$check_mount_path($volid);
2350 PVE::Tools::run_command(['mount', '-o', 'bind', @extra_opts, $volid, $mount_path]) if $mount_path;
2351 warn "cannot enable quota control for bind mounts\n" if $quota;
2352 return wantarray ? ($volid, 0, undef) : $volid;
2353 }
2354
2355 die "unsupported storage";
2356 }
2357
2358 sub get_vm_volumes {
2359 my ($conf, $excludes) = @_;
2360
2361 my $vollist = [];
2362
2363 foreach_mountpoint($conf, sub {
2364 my ($ms, $mountpoint) = @_;
2365
2366 return if $excludes && $ms eq $excludes;
2367
2368 my $volid = $mountpoint->{volume};
2369
2370 return if !$volid || $mountpoint->{type} ne 'volume';
2371
2372 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2373 return if !$sid;
2374
2375 push @$vollist, $volid;
2376 });
2377
2378 return $vollist;
2379 }
2380
2381 sub mkfs {
2382 my ($dev, $rootuid, $rootgid) = @_;
2383
2384 PVE::Tools::run_command(['mkfs.ext4', '-O', 'mmp',
2385 '-E', "root_owner=$rootuid:$rootgid",
2386 $dev]);
2387 }
2388
2389 sub format_disk {
2390 my ($storage_cfg, $volid, $rootuid, $rootgid) = @_;
2391
2392 if ($volid =~ m!^/dev/.+!) {
2393 mkfs($volid);
2394 return;
2395 }
2396
2397 my ($storage, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2398
2399 die "cannot format volume '$volid' with no storage\n" if !$storage;
2400
2401 PVE::Storage::activate_volumes($storage_cfg, [$volid]);
2402
2403 my $path = PVE::Storage::path($storage_cfg, $volid);
2404
2405 my ($vtype, undef, undef, undef, undef, $isBase, $format) =
2406 PVE::Storage::parse_volname($storage_cfg, $volid);
2407
2408 die "cannot format volume '$volid' (format == $format)\n"
2409 if $format ne 'raw';
2410
2411 mkfs($path, $rootuid, $rootgid);
2412 }
2413
2414 sub destroy_disks {
2415 my ($storecfg, $vollist) = @_;
2416
2417 foreach my $volid (@$vollist) {
2418 eval { PVE::Storage::vdisk_free($storecfg, $volid); };
2419 warn $@ if $@;
2420 }
2421 }
2422
2423 sub create_disks {
2424 my ($storecfg, $vmid, $settings, $conf) = @_;
2425
2426 my $vollist = [];
2427
2428 eval {
2429 my (undef, $rootuid, $rootgid) = PVE::LXC::parse_id_maps($conf);
2430 my $chown_vollist = [];
2431
2432 foreach_mountpoint($settings, sub {
2433 my ($ms, $mountpoint) = @_;
2434
2435 my $volid = $mountpoint->{volume};
2436 my $mp = $mountpoint->{mp};
2437
2438 my ($storage, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2439
2440 if ($storage && ($volid =~ m/^([^:\s]+):(\d+(\.\d+)?)$/)) {
2441 my ($storeid, $size_gb) = ($1, $2);
2442
2443 my $size_kb = int(${size_gb}*1024) * 1024;
2444
2445 my $scfg = PVE::Storage::storage_config($storecfg, $storage);
2446 # fixme: use better naming ct-$vmid-disk-X.raw?
2447
2448 if ($scfg->{type} eq 'dir' || $scfg->{type} eq 'nfs') {
2449 if ($size_kb > 0) {
2450 $volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'raw',
2451 undef, $size_kb);
2452 format_disk($storecfg, $volid, $rootuid, $rootgid);
2453 } else {
2454 $volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'subvol',
2455 undef, 0);
2456 push @$chown_vollist, $volid;
2457 }
2458 } elsif ($scfg->{type} eq 'zfspool') {
2459
2460 $volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'subvol',
2461 undef, $size_kb);
2462 push @$chown_vollist, $volid;
2463 } elsif ($scfg->{type} eq 'drbd' || $scfg->{type} eq 'lvm' || $scfg->{type} eq 'lvmthin') {
2464
2465 $volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'raw', undef, $size_kb);
2466 format_disk($storecfg, $volid, $rootuid, $rootgid);
2467
2468 } elsif ($scfg->{type} eq 'rbd') {
2469
2470 die "krbd option must be enabled on storage type '$scfg->{type}'\n" if !$scfg->{krbd};
2471 $volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'raw', undef, $size_kb);
2472 format_disk($storecfg, $volid, $rootuid, $rootgid);
2473 } else {
2474 die "unable to create containers on storage type '$scfg->{type}'\n";
2475 }
2476 push @$vollist, $volid;
2477 $mountpoint->{volume} = $volid;
2478 $mountpoint->{size} = $size_kb * 1024;
2479 $conf->{$ms} = print_ct_mountpoint($mountpoint, $ms eq 'rootfs');
2480 } else {
2481 # use specified/existing volid/dir/device
2482 $conf->{$ms} = print_ct_mountpoint($mountpoint, $ms eq 'rootfs');
2483 }
2484 });
2485
2486 PVE::Storage::activate_volumes($storecfg, $chown_vollist, undef);
2487 foreach my $volid (@$chown_vollist) {
2488 my $path = PVE::Storage::path($storecfg, $volid, undef);
2489 chown($rootuid, $rootgid, $path);
2490 }
2491 PVE::Storage::deactivate_volumes($storecfg, $chown_vollist, undef);
2492 };
2493 # free allocated images on error
2494 if (my $err = $@) {
2495 destroy_disks($storecfg, $vollist);
2496 die $err;
2497 }
2498 return $vollist;
2499 }
2500
2501 # bash completion helper
2502
2503 sub complete_os_templates {
2504 my ($cmdname, $pname, $cvalue) = @_;
2505
2506 my $cfg = PVE::Storage::config();
2507
2508 my $storeid;
2509
2510 if ($cvalue =~ m/^([^:]+):/) {
2511 $storeid = $1;
2512 }
2513
2514 my $vtype = $cmdname eq 'restore' ? 'backup' : 'vztmpl';
2515 my $data = PVE::Storage::template_list($cfg, $storeid, $vtype);
2516
2517 my $res = [];
2518 foreach my $id (keys %$data) {
2519 foreach my $item (@{$data->{$id}}) {
2520 push @$res, $item->{volid} if defined($item->{volid});
2521 }
2522 }
2523
2524 return $res;
2525 }
2526
2527 my $complete_ctid_full = sub {
2528 my ($running) = @_;
2529
2530 my $idlist = vmstatus();
2531
2532 my $active_hash = list_active_containers();
2533
2534 my $res = [];
2535
2536 foreach my $id (keys %$idlist) {
2537 my $d = $idlist->{$id};
2538 if (defined($running)) {
2539 next if $d->{template};
2540 next if $running && !$active_hash->{$id};
2541 next if !$running && $active_hash->{$id};
2542 }
2543 push @$res, $id;
2544
2545 }
2546 return $res;
2547 };
2548
2549 sub complete_ctid {
2550 return &$complete_ctid_full();
2551 }
2552
2553 sub complete_ctid_stopped {
2554 return &$complete_ctid_full(0);
2555 }
2556
2557 sub complete_ctid_running {
2558 return &$complete_ctid_full(1);
2559 }
2560
2561 sub parse_id_maps {
2562 my ($conf) = @_;
2563
2564 my $id_map = [];
2565 my $rootuid = 0;
2566 my $rootgid = 0;
2567
2568 my $lxc = $conf->{lxc};
2569 foreach my $entry (@$lxc) {
2570 my ($key, $value) = @$entry;
2571 next if $key ne 'lxc.id_map';
2572 if ($value =~ /^([ug])\s+(\d+)\s+(\d+)\s+(\d+)\s*$/) {
2573 my ($type, $ct, $host, $length) = ($1, $2, $3, $4);
2574 push @$id_map, [$type, $ct, $host, $length];
2575 if ($ct == 0) {
2576 $rootuid = $host if $type eq 'u';
2577 $rootgid = $host if $type eq 'g';
2578 }
2579 } else {
2580 die "failed to parse id_map: $value\n";
2581 }
2582 }
2583
2584 if (!@$id_map && $conf->{unprivileged}) {
2585 # Should we read them from /etc/subuid?
2586 $id_map = [ ['u', '0', '100000', '65536'],
2587 ['g', '0', '100000', '65536'] ];
2588 $rootuid = $rootgid = 100000;
2589 }
2590
2591 return ($id_map, $rootuid, $rootgid);
2592 }
2593
2594 sub userns_command {
2595 my ($id_map) = @_;
2596 if (@$id_map) {
2597 return ['lxc-usernsexec', (map { ('-m', join(':', @$_)) } @$id_map), '--'];
2598 }
2599 return [];
2600 }
2601
2602 1;