]> git.proxmox.com Git - pve-container.git/blob - src/PVE/LXC.pm
add support for network trunks
[pve-container.git] / src / PVE / LXC.pm
1 package PVE::LXC;
2
3 use strict;
4 use warnings;
5 use POSIX qw(EINTR);
6
7 use File::Path;
8 use File::Spec;
9 use Cwd qw();
10 use Fcntl ':flock';
11
12 use PVE::Cluster qw(cfs_register_file cfs_read_file);
13 use PVE::Storage;
14 use PVE::SafeSyslog;
15 use PVE::INotify;
16 use PVE::JSONSchema qw(get_standard_option);
17 use PVE::Tools qw($IPV6RE $IPV4RE dir_glob_foreach);
18 use PVE::Network;
19 use PVE::AccessControl;
20 use PVE::ProcFSTools;
21 use Time::HiRes qw (gettimeofday);
22
23 use Data::Dumper;
24
25 my $nodename = PVE::INotify::nodename();
26
27 my $cpuinfo= PVE::ProcFSTools::read_cpuinfo();
28
29 our $COMMON_TAR_FLAGS = [ '--sparse', '--numeric-owner', '--acls',
30 '--xattrs',
31 '--xattrs-include=user.*',
32 '--xattrs-include=security.capability',
33 '--warning=no-xattr-write' ];
34
35 cfs_register_file('/lxc/', \&parse_pct_config, \&write_pct_config);
36
37 my $rootfs_desc = {
38 volume => {
39 type => 'string',
40 default_key => 1,
41 format_description => 'volume',
42 description => 'Volume, device or directory to mount into the container.',
43 },
44 backup => {
45 type => 'boolean',
46 format_description => '[1|0]',
47 description => 'Whether to include the mountpoint in backups.',
48 optional => 1,
49 },
50 size => {
51 type => 'string',
52 format => 'disk-size',
53 format_description => 'DiskSize',
54 description => 'Volume size (read only value).',
55 optional => 1,
56 },
57 };
58
59 PVE::JSONSchema::register_standard_option('pve-ct-rootfs', {
60 type => 'string', format => $rootfs_desc,
61 description => "Use volume as container root.",
62 optional => 1,
63 });
64
65 PVE::JSONSchema::register_standard_option('pve-lxc-snapshot-name', {
66 description => "The name of the snapshot.",
67 type => 'string', format => 'pve-configid',
68 maxLength => 40,
69 });
70
71 my $confdesc = {
72 lock => {
73 optional => 1,
74 type => 'string',
75 description => "Lock/unlock the VM.",
76 enum => [qw(migrate backup snapshot rollback)],
77 },
78 onboot => {
79 optional => 1,
80 type => 'boolean',
81 description => "Specifies whether a VM will be started during system bootup.",
82 default => 0,
83 },
84 startup => get_standard_option('pve-startup-order'),
85 template => {
86 optional => 1,
87 type => 'boolean',
88 description => "Enable/disable Template.",
89 default => 0,
90 },
91 arch => {
92 optional => 1,
93 type => 'string',
94 enum => ['amd64', 'i386'],
95 description => "OS architecture type.",
96 default => 'amd64',
97 },
98 ostype => {
99 optional => 1,
100 type => 'string',
101 enum => ['debian', 'ubuntu', 'centos', 'fedora', 'opensuse', 'archlinux'],
102 description => "OS type. Corresponds to lxc setup scripts in /usr/share/lxc/config/<ostype>.common.conf.",
103 },
104 console => {
105 optional => 1,
106 type => 'boolean',
107 description => "Attach a console device (/dev/console) to the container.",
108 default => 1,
109 },
110 tty => {
111 optional => 1,
112 type => 'integer',
113 description => "Specify the number of tty available to the container",
114 minimum => 0,
115 maximum => 6,
116 default => 2,
117 },
118 cpulimit => {
119 optional => 1,
120 type => 'number',
121 description => "Limit of CPU usage. Note if the computer has 2 CPUs, it has total of '2' CPU time. Value '0' indicates no CPU limit.",
122 minimum => 0,
123 maximum => 128,
124 default => 0,
125 },
126 cpuunits => {
127 optional => 1,
128 type => 'integer',
129 description => "CPU weight for a VM. Argument is used in the kernel fair scheduler. The larger the number is, the more CPU time this VM gets. Number is relative to weights of all the other running VMs.\n\nNOTE: You can disable fair-scheduler configuration by setting this to 0.",
130 minimum => 0,
131 maximum => 500000,
132 default => 1024,
133 },
134 memory => {
135 optional => 1,
136 type => 'integer',
137 description => "Amount of RAM for the VM in MB.",
138 minimum => 16,
139 default => 512,
140 },
141 swap => {
142 optional => 1,
143 type => 'integer',
144 description => "Amount of SWAP for the VM in MB.",
145 minimum => 0,
146 default => 512,
147 },
148 hostname => {
149 optional => 1,
150 description => "Set a host name for the container.",
151 type => 'string', format => 'dns-name',
152 maxLength => 255,
153 },
154 description => {
155 optional => 1,
156 type => 'string',
157 description => "Container description. Only used on the configuration web interface.",
158 },
159 searchdomain => {
160 optional => 1,
161 type => 'string', format => 'dns-name-list',
162 description => "Sets DNS search domains for a container. Create will automatically use the setting from the host if you neither set searchdomain or nameserver.",
163 },
164 nameserver => {
165 optional => 1,
166 type => 'string', format => 'address-list',
167 description => "Sets DNS server IP address for a container. Create will automatically use the setting from the host if you neither set searchdomain or nameserver.",
168 },
169 rootfs => get_standard_option('pve-ct-rootfs'),
170 parent => {
171 optional => 1,
172 type => 'string', format => 'pve-configid',
173 maxLength => 40,
174 description => "Parent snapshot name. This is used internally, and should not be modified.",
175 },
176 snaptime => {
177 optional => 1,
178 description => "Timestamp for snapshots.",
179 type => 'integer',
180 minimum => 0,
181 },
182 cmode => {
183 optional => 1,
184 description => "Console mode. By default, the console command tries to open a connection to one of the available tty devices. By setting cmode to 'console' it tries to attach to /dev/console instead. If you set cmode to 'shell', it simply invokes a shell inside the container (no login).",
185 type => 'string',
186 enum => ['shell', 'console', 'tty'],
187 default => 'tty',
188 },
189 protection => {
190 optional => 1,
191 type => 'boolean',
192 description => "Sets the protection flag of the container. This will prevent the remove operation. This will prevent the CT or CT's disk remove/update operation.",
193 default => 0,
194 },
195 unprivileged => {
196 optional => 1,
197 type => 'boolean',
198 description => "Makes the container run as unprivileged user. (Should not be modified manually.)",
199 default => 0,
200 },
201 };
202
203 my $valid_lxc_conf_keys = {
204 'lxc.include' => 1,
205 'lxc.arch' => 1,
206 'lxc.utsname' => 1,
207 'lxc.haltsignal' => 1,
208 'lxc.rebootsignal' => 1,
209 'lxc.stopsignal' => 1,
210 'lxc.init_cmd' => 1,
211 'lxc.network.type' => 1,
212 'lxc.network.flags' => 1,
213 'lxc.network.link' => 1,
214 'lxc.network.mtu' => 1,
215 'lxc.network.name' => 1,
216 'lxc.network.hwaddr' => 1,
217 'lxc.network.ipv4' => 1,
218 'lxc.network.ipv4.gateway' => 1,
219 'lxc.network.ipv6' => 1,
220 'lxc.network.ipv6.gateway' => 1,
221 'lxc.network.script.up' => 1,
222 'lxc.network.script.down' => 1,
223 'lxc.pts' => 1,
224 'lxc.console.logfile' => 1,
225 'lxc.console' => 1,
226 'lxc.tty' => 1,
227 'lxc.devttydir' => 1,
228 'lxc.hook.autodev' => 1,
229 'lxc.autodev' => 1,
230 'lxc.kmsg' => 1,
231 'lxc.mount' => 1,
232 'lxc.mount.entry' => 1,
233 'lxc.mount.auto' => 1,
234 'lxc.rootfs' => 1,
235 'lxc.rootfs.mount' => 1,
236 'lxc.rootfs.options' => 1,
237 # lxc.cgroup.*
238 'lxc.cap.drop' => 1,
239 'lxc.cap.keep' => 1,
240 'lxc.aa_profile' => 1,
241 'lxc.aa_allow_incomplete' => 1,
242 'lxc.se_context' => 1,
243 'lxc.seccomp' => 1,
244 'lxc.id_map' => 1,
245 'lxc.hook.pre-start' => 1,
246 'lxc.hook.pre-mount' => 1,
247 'lxc.hook.mount' => 1,
248 'lxc.hook.start' => 1,
249 'lxc.hook.stop' => 1,
250 'lxc.hook.post-stop' => 1,
251 'lxc.hook.clone' => 1,
252 'lxc.hook.destroy' => 1,
253 'lxc.loglevel' => 1,
254 'lxc.logfile' => 1,
255 'lxc.start.auto' => 1,
256 'lxc.start.delay' => 1,
257 'lxc.start.order' => 1,
258 'lxc.group' => 1,
259 'lxc.environment' => 1,
260 'lxc.' => 1,
261 'lxc.' => 1,
262 'lxc.' => 1,
263 'lxc.' => 1,
264 };
265
266 my $netconf_desc = {
267 type => {
268 type => 'string',
269 optional => 1,
270 description => "Network interface type.",
271 enum => [qw(veth)],
272 },
273 name => {
274 type => 'string',
275 format_description => 'String',
276 description => 'Name of the network device as seen from inside the container. (lxc.network.name)',
277 pattern => '[-_.\w\d]+',
278 },
279 bridge => {
280 type => 'string',
281 format_description => 'vmbr<Number>',
282 description => 'Bridge to attach the network device to.',
283 pattern => '[-_.\w\d]+',
284 optional => 1,
285 },
286 hwaddr => {
287 type => 'string',
288 format_description => 'MAC',
289 description => 'Bridge to attach the network device to. (lxc.network.hwaddr)',
290 pattern => qr/(?:[a-f0-9]{2}:){5}[a-f0-9]{2}/i,
291 optional => 1,
292 },
293 mtu => {
294 type => 'integer',
295 format_description => 'Number',
296 description => 'Maximum transfer unit of the interface. (lxc.network.mtu)',
297 minimum => 64, # minimum ethernet frame is 64 bytes
298 optional => 1,
299 },
300 ip => {
301 type => 'string',
302 format => 'pve-ipv4-config',
303 format_description => 'IPv4Format/CIDR',
304 description => 'IPv4 address in CIDR format.',
305 optional => 1,
306 },
307 gw => {
308 type => 'string',
309 format => 'ipv4',
310 format_description => 'GatewayIPv4',
311 description => 'Default gateway for IPv4 traffic.',
312 optional => 1,
313 },
314 ip6 => {
315 type => 'string',
316 format => 'pve-ipv6-config',
317 format_description => 'IPv6Format/CIDR',
318 description => 'IPv6 address in CIDR format.',
319 optional => 1,
320 },
321 gw6 => {
322 type => 'string',
323 format => 'ipv6',
324 format_description => 'GatewayIPv6',
325 description => 'Default gateway for IPv6 traffic.',
326 optional => 1,
327 },
328 firewall => {
329 type => 'boolean',
330 format_description => '[1|0]',
331 description => "Controls whether this interface's firewall rules should be used.",
332 optional => 1,
333 },
334 tag => {
335 type => 'integer',
336 format_description => 'VlanNo',
337 minimum => '2',
338 maximum => '4094',
339 description => "VLAN tag for this interface.",
340 optional => 1,
341 },
342 trunks => {
343 type => 'string',
344 pattern => qr/\d+(?:;\d+)*/,
345 format_description => 'vlanid[;vlanid...]',
346 description => "VLAN ids to pass through the interface",
347 optional => 1,
348 },
349 };
350 PVE::JSONSchema::register_format('pve-lxc-network', $netconf_desc);
351
352 my $MAX_LXC_NETWORKS = 10;
353 for (my $i = 0; $i < $MAX_LXC_NETWORKS; $i++) {
354 $confdesc->{"net$i"} = {
355 optional => 1,
356 type => 'string', format => $netconf_desc,
357 description => "Specifies network interfaces for the container.",
358 };
359 }
360
361 my $mp_desc = {
362 %$rootfs_desc,
363 mp => {
364 type => 'string',
365 format_description => 'Path',
366 description => 'Path to the mountpoint as seen from inside the container.',
367 },
368 };
369 PVE::JSONSchema::register_format('pve-ct-mountpoint', $mp_desc);
370
371 my $unuseddesc = {
372 optional => 1,
373 type => 'string', format => 'pve-volume-id',
374 description => "Reference to unused volumes.",
375 };
376
377 my $MAX_MOUNT_POINTS = 10;
378 for (my $i = 0; $i < $MAX_MOUNT_POINTS; $i++) {
379 $confdesc->{"mp$i"} = {
380 optional => 1,
381 type => 'string', format => $mp_desc,
382 description => "Use volume as container mount point (experimental feature).",
383 optional => 1,
384 };
385 }
386
387 my $MAX_UNUSED_DISKS = $MAX_MOUNT_POINTS;
388 for (my $i = 0; $i < $MAX_MOUNT_POINTS; $i++) {
389 $confdesc->{"unused$i"} = $unuseddesc;
390 }
391
392 sub write_pct_config {
393 my ($filename, $conf) = @_;
394
395 delete $conf->{snapstate}; # just to be sure
396
397 my $generate_raw_config = sub {
398 my ($conf) = @_;
399
400 my $raw = '';
401
402 # add description as comment to top of file
403 my $descr = $conf->{description} || '';
404 foreach my $cl (split(/\n/, $descr)) {
405 $raw .= '#' . PVE::Tools::encode_text($cl) . "\n";
406 }
407
408 foreach my $key (sort keys %$conf) {
409 next if $key eq 'digest' || $key eq 'description' || $key eq 'pending' ||
410 $key eq 'snapshots' || $key eq 'snapname' || $key eq 'lxc';
411 my $value = $conf->{$key};
412 die "detected invalid newline inside property '$key'\n" if $value =~ m/\n/;
413 $raw .= "$key: $value\n";
414 }
415
416 if (my $lxcconf = $conf->{lxc}) {
417 foreach my $entry (@$lxcconf) {
418 my ($k, $v) = @$entry;
419 $raw .= "$k: $v\n";
420 }
421 }
422
423 return $raw;
424 };
425
426 my $raw = &$generate_raw_config($conf);
427
428 foreach my $snapname (sort keys %{$conf->{snapshots}}) {
429 $raw .= "\n[$snapname]\n";
430 $raw .= &$generate_raw_config($conf->{snapshots}->{$snapname});
431 }
432
433 return $raw;
434 }
435
436 sub check_type {
437 my ($key, $value) = @_;
438
439 die "unknown setting '$key'\n" if !$confdesc->{$key};
440
441 my $type = $confdesc->{$key}->{type};
442
443 if (!defined($value)) {
444 die "got undefined value\n";
445 }
446
447 if ($value =~ m/[\n\r]/) {
448 die "property contains a line feed\n";
449 }
450
451 if ($type eq 'boolean') {
452 return 1 if ($value eq '1') || ($value =~ m/^(on|yes|true)$/i);
453 return 0 if ($value eq '0') || ($value =~ m/^(off|no|false)$/i);
454 die "type check ('boolean') failed - got '$value'\n";
455 } elsif ($type eq 'integer') {
456 return int($1) if $value =~ m/^(\d+)$/;
457 die "type check ('integer') failed - got '$value'\n";
458 } elsif ($type eq 'number') {
459 return $value if $value =~ m/^(\d+)(\.\d+)?$/;
460 die "type check ('number') failed - got '$value'\n";
461 } elsif ($type eq 'string') {
462 if (my $fmt = $confdesc->{$key}->{format}) {
463 PVE::JSONSchema::check_format($fmt, $value);
464 return $value;
465 }
466 return $value;
467 } else {
468 die "internal error"
469 }
470 }
471
472 sub parse_pct_config {
473 my ($filename, $raw) = @_;
474
475 return undef if !defined($raw);
476
477 my $res = {
478 digest => Digest::SHA::sha1_hex($raw),
479 snapshots => {},
480 };
481
482 $filename =~ m|/lxc/(\d+).conf$|
483 || die "got strange filename '$filename'";
484
485 my $vmid = $1;
486
487 my $conf = $res;
488 my $descr = '';
489 my $section = '';
490
491 my @lines = split(/\n/, $raw);
492 foreach my $line (@lines) {
493 next if $line =~ m/^\s*$/;
494
495 if ($line =~ m/^\[([a-z][a-z0-9_\-]+)\]\s*$/i) {
496 $section = $1;
497 $conf->{description} = $descr if $descr;
498 $descr = '';
499 $conf = $res->{snapshots}->{$section} = {};
500 next;
501 }
502
503 if ($line =~ m/^\#(.*)\s*$/) {
504 $descr .= PVE::Tools::decode_text($1) . "\n";
505 next;
506 }
507
508 if ($line =~ m/^(lxc\.[a-z0-9_\-\.]+)(:|\s*=)\s*(.*?)\s*$/) {
509 my $key = $1;
510 my $value = $3;
511 if ($valid_lxc_conf_keys->{$key} || $key =~ m/^lxc\.cgroup\./) {
512 push @{$conf->{lxc}}, [$key, $value];
513 } else {
514 warn "vm $vmid - unable to parse config: $line\n";
515 }
516 } elsif ($line =~ m/^(description):\s*(.*\S)\s*$/) {
517 $descr .= PVE::Tools::decode_text($2);
518 } elsif ($line =~ m/snapstate:\s*(prepare|delete)\s*$/) {
519 $conf->{snapstate} = $1;
520 } elsif ($line =~ m/^([a-z][a-z_]*\d*):\s*(\S.*)\s*$/) {
521 my $key = $1;
522 my $value = $2;
523 eval { $value = check_type($key, $value); };
524 warn "vm $vmid - unable to parse value of '$key' - $@" if $@;
525 $conf->{$key} = $value;
526 } else {
527 warn "vm $vmid - unable to parse config: $line\n";
528 }
529 }
530
531 $conf->{description} = $descr if $descr;
532
533 delete $res->{snapstate}; # just to be sure
534
535 return $res;
536 }
537
538 sub config_list {
539 my $vmlist = PVE::Cluster::get_vmlist();
540 my $res = {};
541 return $res if !$vmlist || !$vmlist->{ids};
542 my $ids = $vmlist->{ids};
543
544 foreach my $vmid (keys %$ids) {
545 next if !$vmid; # skip CT0
546 my $d = $ids->{$vmid};
547 next if !$d->{node} || $d->{node} ne $nodename;
548 next if !$d->{type} || $d->{type} ne 'lxc';
549 $res->{$vmid}->{type} = 'lxc';
550 }
551 return $res;
552 }
553
554 sub cfs_config_path {
555 my ($vmid, $node) = @_;
556
557 $node = $nodename if !$node;
558 return "nodes/$node/lxc/$vmid.conf";
559 }
560
561 sub config_file {
562 my ($vmid, $node) = @_;
563
564 my $cfspath = cfs_config_path($vmid, $node);
565 return "/etc/pve/$cfspath";
566 }
567
568 sub load_config {
569 my ($vmid, $node) = @_;
570
571 $node = $nodename if !$node;
572 my $cfspath = cfs_config_path($vmid, $node);
573
574 my $conf = PVE::Cluster::cfs_read_file($cfspath);
575 die "container $vmid does not exists\n" if !defined($conf);
576
577 return $conf;
578 }
579
580 sub create_config {
581 my ($vmid, $conf) = @_;
582
583 my $dir = "/etc/pve/nodes/$nodename/lxc";
584 mkdir $dir;
585
586 write_config($vmid, $conf);
587 }
588
589 sub destroy_config {
590 my ($vmid) = @_;
591
592 unlink config_file($vmid, $nodename);
593 }
594
595 sub write_config {
596 my ($vmid, $conf) = @_;
597
598 my $cfspath = cfs_config_path($vmid);
599
600 PVE::Cluster::cfs_write_file($cfspath, $conf);
601 }
602
603 # flock: we use one file handle per process, so lock file
604 # can be called multiple times and succeeds for the same process.
605
606 my $lock_handles = {};
607 my $lockdir = "/run/lock/lxc";
608
609 sub lock_filename {
610 my ($vmid) = @_;
611
612 return "$lockdir/pve-config-${vmid}.lock";
613 }
614
615 sub lock_aquire {
616 my ($vmid, $timeout) = @_;
617
618 $timeout = 10 if !$timeout;
619 my $mode = LOCK_EX;
620
621 my $filename = lock_filename($vmid);
622
623 mkdir $lockdir if !-d $lockdir;
624
625 my $lock_func = sub {
626 if (!$lock_handles->{$$}->{$filename}) {
627 my $fh = new IO::File(">>$filename") ||
628 die "can't open file - $!\n";
629 $lock_handles->{$$}->{$filename} = { fh => $fh, refcount => 0};
630 }
631
632 if (!flock($lock_handles->{$$}->{$filename}->{fh}, $mode |LOCK_NB)) {
633 print STDERR "trying to aquire lock...";
634 my $success;
635 while(1) {
636 $success = flock($lock_handles->{$$}->{$filename}->{fh}, $mode);
637 # try again on EINTR (see bug #273)
638 if ($success || ($! != EINTR)) {
639 last;
640 }
641 }
642 if (!$success) {
643 print STDERR " failed\n";
644 die "can't aquire lock - $!\n";
645 }
646
647 print STDERR " OK\n";
648 }
649
650 $lock_handles->{$$}->{$filename}->{refcount}++;
651 };
652
653 eval { PVE::Tools::run_with_timeout($timeout, $lock_func); };
654 my $err = $@;
655 if ($err) {
656 die "can't lock file '$filename' - $err";
657 }
658 }
659
660 sub lock_release {
661 my ($vmid) = @_;
662
663 my $filename = lock_filename($vmid);
664
665 if (my $fh = $lock_handles->{$$}->{$filename}->{fh}) {
666 my $refcount = --$lock_handles->{$$}->{$filename}->{refcount};
667 if ($refcount <= 0) {
668 $lock_handles->{$$}->{$filename} = undef;
669 close ($fh);
670 }
671 }
672 }
673
674 sub lock_container {
675 my ($vmid, $timeout, $code, @param) = @_;
676
677 my $res;
678
679 lock_aquire($vmid, $timeout);
680 eval { $res = &$code(@param) };
681 my $err = $@;
682 lock_release($vmid);
683
684 die $err if $err;
685
686 return $res;
687 }
688
689 sub option_exists {
690 my ($name) = @_;
691
692 return defined($confdesc->{$name});
693 }
694
695 # add JSON properties for create and set function
696 sub json_config_properties {
697 my $prop = shift;
698
699 foreach my $opt (keys %$confdesc) {
700 next if $opt eq 'parent' || $opt eq 'snaptime';
701 next if $prop->{$opt};
702 $prop->{$opt} = $confdesc->{$opt};
703 }
704
705 return $prop;
706 }
707
708 sub json_config_properties_no_rootfs {
709 my $prop = shift;
710
711 foreach my $opt (keys %$confdesc) {
712 next if $prop->{$opt};
713 next if $opt eq 'parent' || $opt eq 'snaptime' || $opt eq 'rootfs';
714 $prop->{$opt} = $confdesc->{$opt};
715 }
716
717 return $prop;
718 }
719
720 # container status helpers
721
722 sub list_active_containers {
723
724 my $filename = "/proc/net/unix";
725
726 # similar test is used by lcxcontainers.c: list_active_containers
727 my $res = {};
728
729 my $fh = IO::File->new ($filename, "r");
730 return $res if !$fh;
731
732 while (defined(my $line = <$fh>)) {
733 if ($line =~ m/^[a-f0-9]+:\s\S+\s\S+\s\S+\s\S+\s\S+\s\d+\s(\S+)$/) {
734 my $path = $1;
735 if ($path =~ m!^@/var/lib/lxc/(\d+)/command$!) {
736 $res->{$1} = 1;
737 }
738 }
739 }
740
741 close($fh);
742
743 return $res;
744 }
745
746 # warning: this is slow
747 sub check_running {
748 my ($vmid) = @_;
749
750 my $active_hash = list_active_containers();
751
752 return 1 if defined($active_hash->{$vmid});
753
754 return undef;
755 }
756
757 sub get_container_disk_usage {
758 my ($vmid, $pid) = @_;
759
760 return PVE::Tools::df("/proc/$pid/root/", 1);
761 }
762
763 my $last_proc_vmid_stat;
764
765 my $parse_cpuacct_stat = sub {
766 my ($vmid) = @_;
767
768 my $raw = read_cgroup_value('cpuacct', $vmid, 'cpuacct.stat', 1);
769
770 my $stat = {};
771
772 if ($raw =~ m/^user (\d+)\nsystem (\d+)\n/) {
773
774 $stat->{utime} = $1;
775 $stat->{stime} = $2;
776
777 }
778
779 return $stat;
780 };
781
782 sub vmstatus {
783 my ($opt_vmid) = @_;
784
785 my $list = $opt_vmid ? { $opt_vmid => { type => 'lxc' }} : config_list();
786
787 my $active_hash = list_active_containers();
788
789 my $cpucount = $cpuinfo->{cpus} || 1;
790
791 my $cdtime = gettimeofday;
792
793 my $uptime = (PVE::ProcFSTools::read_proc_uptime(1))[0];
794
795 foreach my $vmid (keys %$list) {
796 my $d = $list->{$vmid};
797
798 eval { $d->{pid} = find_lxc_pid($vmid) if defined($active_hash->{$vmid}); };
799 warn $@ if $@; # ignore errors (consider them stopped)
800
801 $d->{status} = $d->{pid} ? 'running' : 'stopped';
802
803 my $cfspath = cfs_config_path($vmid);
804 my $conf = PVE::Cluster::cfs_read_file($cfspath) || {};
805
806 $d->{name} = $conf->{'hostname'} || "CT$vmid";
807 $d->{name} =~ s/[\s]//g;
808
809 $d->{cpus} = $conf->{cpulimit} || $cpucount;
810
811 if ($d->{pid}) {
812 my $res = get_container_disk_usage($vmid, $d->{pid});
813 $d->{disk} = $res->{used};
814 $d->{maxdisk} = $res->{total};
815 } else {
816 $d->{disk} = 0;
817 # use 4GB by default ??
818 if (my $rootfs = $conf->{rootfs}) {
819 my $rootinfo = parse_ct_rootfs($rootfs);
820 $d->{maxdisk} = int(($rootinfo->{size} || 4)*1024*1024)*1024;
821 } else {
822 $d->{maxdisk} = 4*1024*1024*1024;
823 }
824 }
825
826 $d->{mem} = 0;
827 $d->{swap} = 0;
828 $d->{maxmem} = ($conf->{memory}||512)*1024*1024;
829 $d->{maxswap} = ($conf->{swap}//0)*1024*1024;
830
831 $d->{uptime} = 0;
832 $d->{cpu} = 0;
833
834 $d->{netout} = 0;
835 $d->{netin} = 0;
836
837 $d->{diskread} = 0;
838 $d->{diskwrite} = 0;
839
840 $d->{template} = is_template($conf);
841 }
842
843 foreach my $vmid (keys %$list) {
844 my $d = $list->{$vmid};
845 my $pid = $d->{pid};
846
847 next if !$pid; # skip stopped CTs
848
849 my $ctime = (stat("/proc/$pid"))[10]; # 10 = ctime
850 $d->{uptime} = time - $ctime; # the method lxcfs uses
851
852 $d->{mem} = read_cgroup_value('memory', $vmid, 'memory.usage_in_bytes');
853 $d->{swap} = read_cgroup_value('memory', $vmid, 'memory.memsw.usage_in_bytes') - $d->{mem};
854
855 my $blkio_bytes = read_cgroup_value('blkio', $vmid, 'blkio.throttle.io_service_bytes', 1);
856 my @bytes = split(/\n/, $blkio_bytes);
857 foreach my $byte (@bytes) {
858 if (my ($key, $value) = $byte =~ /(Read|Write)\s+(\d+)/) {
859 $d->{diskread} = $2 if $key eq 'Read';
860 $d->{diskwrite} = $2 if $key eq 'Write';
861 }
862 }
863
864 my $pstat = &$parse_cpuacct_stat($vmid);
865
866 my $used = $pstat->{utime} + $pstat->{stime};
867
868 my $old = $last_proc_vmid_stat->{$vmid};
869 if (!$old) {
870 $last_proc_vmid_stat->{$vmid} = {
871 time => $cdtime,
872 used => $used,
873 cpu => 0,
874 };
875 next;
876 }
877
878 my $dtime = ($cdtime - $old->{time}) * $cpucount * $cpuinfo->{user_hz};
879
880 if ($dtime > 1000) {
881 my $dutime = $used - $old->{used};
882
883 $d->{cpu} = (($dutime/$dtime)* $cpucount) / $d->{cpus};
884 $last_proc_vmid_stat->{$vmid} = {
885 time => $cdtime,
886 used => $used,
887 cpu => $d->{cpu},
888 };
889 } else {
890 $d->{cpu} = $old->{cpu};
891 }
892 }
893
894 my $netdev = PVE::ProcFSTools::read_proc_net_dev();
895
896 foreach my $dev (keys %$netdev) {
897 next if $dev !~ m/^veth([1-9]\d*)i/;
898 my $vmid = $1;
899 my $d = $list->{$vmid};
900
901 next if !$d;
902
903 $d->{netout} += $netdev->{$dev}->{receive};
904 $d->{netin} += $netdev->{$dev}->{transmit};
905
906 }
907
908 return $list;
909 }
910
911 sub classify_mountpoint {
912 my ($vol) = @_;
913 if ($vol =~ m!^/!) {
914 return 'device' if $vol =~ m!^/dev/!;
915 return 'bind';
916 }
917 return 'volume';
918 }
919
920 my $parse_ct_mountpoint_full = sub {
921 my ($desc, $data, $noerr) = @_;
922
923 $data //= '';
924
925 my $res;
926 eval { $res = PVE::JSONSchema::parse_property_string($desc, $data) };
927 if ($@) {
928 return undef if $noerr;
929 die $@;
930 }
931
932 if (defined(my $size = $res->{size})) {
933 $size = PVE::JSONSchema::parse_size($size);
934 if (!defined($size)) {
935 return undef if $noerr;
936 die "invalid size: $size\n";
937 }
938 $res->{size} = $size;
939 }
940
941 $res->{type} = classify_mountpoint($res->{volume});
942
943 return $res;
944 };
945
946 sub parse_ct_rootfs {
947 my ($data, $noerr) = @_;
948
949 my $res = &$parse_ct_mountpoint_full($rootfs_desc, $data, $noerr);
950
951 $res->{mp} = '/' if defined($res);
952
953 return $res;
954 }
955
956 sub parse_ct_mountpoint {
957 my ($data, $noerr) = @_;
958
959 return &$parse_ct_mountpoint_full($mp_desc, $data, $noerr);
960 }
961
962 sub print_ct_mountpoint {
963 my ($info, $nomp) = @_;
964 my $skip = [ 'type' ];
965 push @$skip, 'mp' if $nomp;
966 return PVE::JSONSchema::print_property_string($info, $mp_desc, $skip);
967 }
968
969 sub print_lxc_network {
970 my $net = shift;
971 return PVE::JSONSchema::print_property_string($net, $netconf_desc);
972 }
973
974 sub parse_lxc_network {
975 my ($data) = @_;
976
977 my $res = {};
978
979 return $res if !$data;
980
981 $res = PVE::JSONSchema::parse_property_string($netconf_desc, $data);
982
983 $res->{type} = 'veth';
984 $res->{hwaddr} = PVE::Tools::random_ether_addr() if !$res->{hwaddr};
985
986 return $res;
987 }
988
989 sub read_cgroup_value {
990 my ($group, $vmid, $name, $full) = @_;
991
992 my $path = "/sys/fs/cgroup/$group/lxc/$vmid/$name";
993
994 return PVE::Tools::file_get_contents($path) if $full;
995
996 return PVE::Tools::file_read_firstline($path);
997 }
998
999 sub write_cgroup_value {
1000 my ($group, $vmid, $name, $value) = @_;
1001
1002 my $path = "/sys/fs/cgroup/$group/lxc/$vmid/$name";
1003 PVE::ProcFSTools::write_proc_entry($path, $value) if -e $path;
1004
1005 }
1006
1007 sub find_lxc_console_pids {
1008
1009 my $res = {};
1010
1011 PVE::Tools::dir_glob_foreach('/proc', '\d+', sub {
1012 my ($pid) = @_;
1013
1014 my $cmdline = PVE::Tools::file_read_firstline("/proc/$pid/cmdline");
1015 return if !$cmdline;
1016
1017 my @args = split(/\0/, $cmdline);
1018
1019 # serach for lxc-console -n <vmid>
1020 return if scalar(@args) != 3;
1021 return if $args[1] ne '-n';
1022 return if $args[2] !~ m/^\d+$/;
1023 return if $args[0] !~ m|^(/usr/bin/)?lxc-console$|;
1024
1025 my $vmid = $args[2];
1026
1027 push @{$res->{$vmid}}, $pid;
1028 });
1029
1030 return $res;
1031 }
1032
1033 sub find_lxc_pid {
1034 my ($vmid) = @_;
1035
1036 my $pid = undef;
1037 my $parser = sub {
1038 my $line = shift;
1039 $pid = $1 if $line =~ m/^PID:\s+(\d+)$/;
1040 };
1041 PVE::Tools::run_command(['lxc-info', '-n', $vmid, '-p'], outfunc => $parser);
1042
1043 die "unable to get PID for CT $vmid (not running?)\n" if !$pid;
1044
1045 return $pid;
1046 }
1047
1048 # Note: we cannot use Net:IP, because that only allows strict
1049 # CIDR networks
1050 sub parse_ipv4_cidr {
1051 my ($cidr, $noerr) = @_;
1052
1053 if ($cidr =~ m!^($IPV4RE)(?:/(\d+))$! && ($2 > 7) && ($2 <= 32)) {
1054 return { address => $1, netmask => $PVE::Network::ipv4_reverse_mask->[$2] };
1055 }
1056
1057 return undef if $noerr;
1058
1059 die "unable to parse ipv4 address/mask\n";
1060 }
1061
1062 sub check_lock {
1063 my ($conf) = @_;
1064
1065 die "VM is locked ($conf->{'lock'})\n" if $conf->{'lock'};
1066 }
1067
1068 sub check_protection {
1069 my ($vm_conf, $err_msg) = @_;
1070
1071 if ($vm_conf->{protection}) {
1072 die "$err_msg - protection mode enabled\n";
1073 }
1074 }
1075
1076 sub update_lxc_config {
1077 my ($storage_cfg, $vmid, $conf) = @_;
1078
1079 my $dir = "/var/lib/lxc/$vmid";
1080
1081 if ($conf->{template}) {
1082
1083 unlink "$dir/config";
1084
1085 return;
1086 }
1087
1088 my $raw = '';
1089
1090 die "missing 'arch' - internal error" if !$conf->{arch};
1091 $raw .= "lxc.arch = $conf->{arch}\n";
1092
1093 my $unprivileged = $conf->{unprivileged};
1094 my $custom_idmap = grep { $_->[0] eq 'lxc.id_map' } @{$conf->{lxc}};
1095
1096 my $ostype = $conf->{ostype} || die "missing 'ostype' - internal error";
1097 if ($ostype =~ /^(?:debian | ubuntu | centos | fedora | opensuse | archlinux)$/x) {
1098 $raw .= "lxc.include = /usr/share/lxc/config/$ostype.common.conf\n";
1099 if ($unprivileged || $custom_idmap) {
1100 $raw .= "lxc.include = /usr/share/lxc/config/$ostype.userns.conf\n"
1101 }
1102 } else {
1103 die "implement me (ostype $ostype)";
1104 }
1105
1106 $raw .= "lxc.monitor.unshare = 1\n";
1107
1108 # Should we read them from /etc/subuid?
1109 if ($unprivileged && !$custom_idmap) {
1110 $raw .= "lxc.id_map = u 0 100000 65536\n";
1111 $raw .= "lxc.id_map = g 0 100000 65536\n";
1112 }
1113
1114 if (!has_dev_console($conf)) {
1115 $raw .= "lxc.console = none\n";
1116 $raw .= "lxc.cgroup.devices.deny = c 5:1 rwm\n";
1117 }
1118
1119 my $ttycount = get_tty_count($conf);
1120 $raw .= "lxc.tty = $ttycount\n";
1121
1122 # some init scripts expects a linux terminal (turnkey).
1123 $raw .= "lxc.environment = TERM=linux\n";
1124
1125 my $utsname = $conf->{hostname} || "CT$vmid";
1126 $raw .= "lxc.utsname = $utsname\n";
1127
1128 my $memory = $conf->{memory} || 512;
1129 my $swap = $conf->{swap} // 0;
1130
1131 my $lxcmem = int($memory*1024*1024);
1132 $raw .= "lxc.cgroup.memory.limit_in_bytes = $lxcmem\n";
1133
1134 my $lxcswap = int(($memory + $swap)*1024*1024);
1135 $raw .= "lxc.cgroup.memory.memsw.limit_in_bytes = $lxcswap\n";
1136
1137 if (my $cpulimit = $conf->{cpulimit}) {
1138 $raw .= "lxc.cgroup.cpu.cfs_period_us = 100000\n";
1139 my $value = int(100000*$cpulimit);
1140 $raw .= "lxc.cgroup.cpu.cfs_quota_us = $value\n";
1141 }
1142
1143 my $shares = $conf->{cpuunits} || 1024;
1144 $raw .= "lxc.cgroup.cpu.shares = $shares\n";
1145
1146 my $mountpoint = parse_ct_rootfs($conf->{rootfs});
1147
1148 $raw .= "lxc.rootfs = $dir/rootfs\n";
1149
1150 my $netcount = 0;
1151 foreach my $k (keys %$conf) {
1152 next if $k !~ m/^net(\d+)$/;
1153 my $ind = $1;
1154 my $d = parse_lxc_network($conf->{$k});
1155 $netcount++;
1156 $raw .= "lxc.network.type = veth\n";
1157 $raw .= "lxc.network.veth.pair = veth${vmid}i${ind}\n";
1158 $raw .= "lxc.network.hwaddr = $d->{hwaddr}\n" if defined($d->{hwaddr});
1159 $raw .= "lxc.network.name = $d->{name}\n" if defined($d->{name});
1160 $raw .= "lxc.network.mtu = $d->{mtu}\n" if defined($d->{mtu});
1161 }
1162
1163 if (my $lxcconf = $conf->{lxc}) {
1164 foreach my $entry (@$lxcconf) {
1165 my ($k, $v) = @$entry;
1166 $netcount++ if $k eq 'lxc.network.type';
1167 $raw .= "$k = $v\n";
1168 }
1169 }
1170
1171 $raw .= "lxc.network.type = empty\n" if !$netcount;
1172
1173 File::Path::mkpath("$dir/rootfs");
1174
1175 PVE::Tools::file_set_contents("$dir/config", $raw);
1176 }
1177
1178 # verify and cleanup nameserver list (replace \0 with ' ')
1179 sub verify_nameserver_list {
1180 my ($nameserver_list) = @_;
1181
1182 my @list = ();
1183 foreach my $server (PVE::Tools::split_list($nameserver_list)) {
1184 PVE::JSONSchema::pve_verify_ip($server);
1185 push @list, $server;
1186 }
1187
1188 return join(' ', @list);
1189 }
1190
1191 sub verify_searchdomain_list {
1192 my ($searchdomain_list) = @_;
1193
1194 my @list = ();
1195 foreach my $server (PVE::Tools::split_list($searchdomain_list)) {
1196 # todo: should we add checks for valid dns domains?
1197 push @list, $server;
1198 }
1199
1200 return join(' ', @list);
1201 }
1202
1203 sub add_unused_volume {
1204 my ($config, $volid) = @_;
1205
1206 my $key;
1207 for (my $ind = $MAX_UNUSED_DISKS - 1; $ind >= 0; $ind--) {
1208 my $test = "unused$ind";
1209 if (my $vid = $config->{$test}) {
1210 return if $vid eq $volid; # do not add duplicates
1211 } else {
1212 $key = $test;
1213 }
1214 }
1215
1216 die "To many unused volume - please delete them first.\n" if !$key;
1217
1218 $config->{$key} = $volid;
1219
1220 return $key;
1221 }
1222
1223 sub update_pct_config {
1224 my ($vmid, $conf, $running, $param, $delete) = @_;
1225
1226 my @nohotplug;
1227
1228 my $new_disks = 0;
1229 my @deleted_volumes;
1230
1231 my $rootdir;
1232 if ($running) {
1233 my $pid = find_lxc_pid($vmid);
1234 $rootdir = "/proc/$pid/root";
1235 }
1236
1237 my $hotplug_error = sub {
1238 if ($running) {
1239 push @nohotplug, @_;
1240 return 1;
1241 } else {
1242 return 0;
1243 }
1244 };
1245
1246 if (defined($delete)) {
1247 foreach my $opt (@$delete) {
1248 if (!exists($conf->{$opt})) {
1249 warn "no such option: $opt\n";
1250 next;
1251 }
1252
1253 if ($opt eq 'hostname' || $opt eq 'memory' || $opt eq 'rootfs') {
1254 die "unable to delete required option '$opt'\n";
1255 } elsif ($opt eq 'swap') {
1256 delete $conf->{$opt};
1257 write_cgroup_value("memory", $vmid, "memory.memsw.limit_in_bytes", -1);
1258 } elsif ($opt eq 'description' || $opt eq 'onboot' || $opt eq 'startup') {
1259 delete $conf->{$opt};
1260 } elsif ($opt eq 'nameserver' || $opt eq 'searchdomain' ||
1261 $opt eq 'tty' || $opt eq 'console' || $opt eq 'cmode') {
1262 next if $hotplug_error->($opt);
1263 delete $conf->{$opt};
1264 } elsif ($opt =~ m/^net(\d)$/) {
1265 delete $conf->{$opt};
1266 next if !$running;
1267 my $netid = $1;
1268 PVE::Network::veth_delete("veth${vmid}i$netid");
1269 } elsif ($opt eq 'protection') {
1270 delete $conf->{$opt};
1271 } elsif ($opt =~ m/^unused(\d+)$/) {
1272 next if $hotplug_error->($opt);
1273 check_protection($conf, "can't remove CT $vmid drive '$opt'");
1274 push @deleted_volumes, $conf->{$opt};
1275 delete $conf->{$opt};
1276 } elsif ($opt =~ m/^mp(\d+)$/) {
1277 next if $hotplug_error->($opt);
1278 check_protection($conf, "can't remove CT $vmid drive '$opt'");
1279 my $mountpoint = parse_ct_mountpoint($conf->{$opt});
1280 if ($mountpoint->{type} eq 'volume') {
1281 add_unused_volume($conf, $mountpoint->{volume})
1282 }
1283 delete $conf->{$opt};
1284 } elsif ($opt eq 'unprivileged') {
1285 die "unable to delete read-only option: '$opt'\n";
1286 } else {
1287 die "implement me (delete: $opt)"
1288 }
1289 write_config($vmid, $conf) if $running;
1290 }
1291 }
1292
1293 # There's no separate swap size to configure, there's memory and "total"
1294 # memory (iow. memory+swap). This means we have to change them together.
1295 my $wanted_memory = PVE::Tools::extract_param($param, 'memory');
1296 my $wanted_swap = PVE::Tools::extract_param($param, 'swap');
1297 if (defined($wanted_memory) || defined($wanted_swap)) {
1298
1299 $wanted_memory //= ($conf->{memory} || 512);
1300 $wanted_swap //= ($conf->{swap} || 0);
1301
1302 my $total = $wanted_memory + $wanted_swap;
1303 if ($running) {
1304 write_cgroup_value("memory", $vmid, "memory.limit_in_bytes", int($wanted_memory*1024*1024));
1305 write_cgroup_value("memory", $vmid, "memory.memsw.limit_in_bytes", int($total*1024*1024));
1306 }
1307 $conf->{memory} = $wanted_memory;
1308 $conf->{swap} = $wanted_swap;
1309
1310 write_config($vmid, $conf) if $running;
1311 }
1312
1313 foreach my $opt (keys %$param) {
1314 my $value = $param->{$opt};
1315 if ($opt eq 'hostname') {
1316 $conf->{$opt} = $value;
1317 } elsif ($opt eq 'onboot') {
1318 $conf->{$opt} = $value ? 1 : 0;
1319 } elsif ($opt eq 'startup') {
1320 $conf->{$opt} = $value;
1321 } elsif ($opt eq 'tty' || $opt eq 'console' || $opt eq 'cmode') {
1322 next if $hotplug_error->($opt);
1323 $conf->{$opt} = $value;
1324 } elsif ($opt eq 'nameserver') {
1325 next if $hotplug_error->($opt);
1326 my $list = verify_nameserver_list($value);
1327 $conf->{$opt} = $list;
1328 } elsif ($opt eq 'searchdomain') {
1329 next if $hotplug_error->($opt);
1330 my $list = verify_searchdomain_list($value);
1331 $conf->{$opt} = $list;
1332 } elsif ($opt eq 'cpulimit') {
1333 next if $hotplug_error->($opt); # FIXME: hotplug
1334 $conf->{$opt} = $value;
1335 } elsif ($opt eq 'cpuunits') {
1336 $conf->{$opt} = $value;
1337 write_cgroup_value("cpu", $vmid, "cpu.shares", $value);
1338 } elsif ($opt eq 'description') {
1339 $conf->{$opt} = PVE::Tools::encode_text($value);
1340 } elsif ($opt =~ m/^net(\d+)$/) {
1341 my $netid = $1;
1342 my $net = parse_lxc_network($value);
1343 if (!$running) {
1344 $conf->{$opt} = print_lxc_network($net);
1345 } else {
1346 update_net($vmid, $conf, $opt, $net, $netid, $rootdir);
1347 }
1348 } elsif ($opt eq 'protection') {
1349 $conf->{$opt} = $value ? 1 : 0;
1350 } elsif ($opt =~ m/^mp(\d+)$/) {
1351 next if $hotplug_error->($opt);
1352 check_protection($conf, "can't update CT $vmid drive '$opt'");
1353 $conf->{$opt} = $value;
1354 $new_disks = 1;
1355 } elsif ($opt eq 'rootfs') {
1356 check_protection($conf, "can't update CT $vmid drive '$opt'");
1357 die "implement me: $opt";
1358 } elsif ($opt eq 'unprivileged') {
1359 die "unable to modify read-only option: '$opt'\n";
1360 } else {
1361 die "implement me: $opt";
1362 }
1363 write_config($vmid, $conf) if $running;
1364 }
1365
1366 if (@deleted_volumes) {
1367 my $storage_cfg = PVE::Storage::config();
1368 foreach my $volume (@deleted_volumes) {
1369 delete_mountpoint_volume($storage_cfg, $vmid, $volume);
1370 }
1371 }
1372
1373 if ($new_disks) {
1374 my $storage_cfg = PVE::Storage::config();
1375 create_disks($storage_cfg, $vmid, $conf, $conf);
1376 }
1377
1378 # This should be the last thing we do here
1379 if ($running && scalar(@nohotplug)) {
1380 die "unable to modify " . join(',', @nohotplug) . " while container is running\n";
1381 }
1382 }
1383
1384 sub has_dev_console {
1385 my ($conf) = @_;
1386
1387 return !(defined($conf->{console}) && !$conf->{console});
1388 }
1389
1390 sub get_tty_count {
1391 my ($conf) = @_;
1392
1393 return $conf->{tty} // $confdesc->{tty}->{default};
1394 }
1395
1396 sub get_cmode {
1397 my ($conf) = @_;
1398
1399 return $conf->{cmode} // $confdesc->{cmode}->{default};
1400 }
1401
1402 sub get_console_command {
1403 my ($vmid, $conf) = @_;
1404
1405 my $cmode = get_cmode($conf);
1406
1407 if ($cmode eq 'console') {
1408 return ['lxc-console', '-n', $vmid, '-t', 0];
1409 } elsif ($cmode eq 'tty') {
1410 return ['lxc-console', '-n', $vmid];
1411 } elsif ($cmode eq 'shell') {
1412 return ['lxc-attach', '--clear-env', '-n', $vmid];
1413 } else {
1414 die "internal error";
1415 }
1416 }
1417
1418 sub get_primary_ips {
1419 my ($conf) = @_;
1420
1421 # return data from net0
1422
1423 return undef if !defined($conf->{net0});
1424 my $net = parse_lxc_network($conf->{net0});
1425
1426 my $ipv4 = $net->{ip};
1427 if ($ipv4) {
1428 if ($ipv4 =~ /^(dhcp|manual)$/) {
1429 $ipv4 = undef
1430 } else {
1431 $ipv4 =~ s!/\d+$!!;
1432 }
1433 }
1434 my $ipv6 = $net->{ip6};
1435 if ($ipv6) {
1436 if ($ipv6 =~ /^(auto|dhcp|manual)$/) {
1437 $ipv6 = undef;
1438 } else {
1439 $ipv6 =~ s!/\d+$!!;
1440 }
1441 }
1442
1443 return ($ipv4, $ipv6);
1444 }
1445
1446 sub delete_mountpoint_volume {
1447 my ($storage_cfg, $vmid, $volume) = @_;
1448
1449 return if classify_mountpoint($volume) ne 'volume';
1450
1451 my ($vtype, $name, $owner) = PVE::Storage::parse_volname($storage_cfg, $volume);
1452 PVE::Storage::vdisk_free($storage_cfg, $volume) if $vmid == $owner;
1453 }
1454
1455 sub destroy_lxc_container {
1456 my ($storage_cfg, $vmid, $conf) = @_;
1457
1458 foreach_mountpoint($conf, sub {
1459 my ($ms, $mountpoint) = @_;
1460 delete_mountpoint_volume($storage_cfg, $vmid, $mountpoint->{volume});
1461 });
1462
1463 rmdir "/var/lib/lxc/$vmid/rootfs";
1464 unlink "/var/lib/lxc/$vmid/config";
1465 rmdir "/var/lib/lxc/$vmid";
1466 destroy_config($vmid);
1467
1468 #my $cmd = ['lxc-destroy', '-n', $vmid ];
1469 #PVE::Tools::run_command($cmd);
1470 }
1471
1472 sub vm_stop_cleanup {
1473 my ($storage_cfg, $vmid, $conf, $keepActive) = @_;
1474
1475 eval {
1476 if (!$keepActive) {
1477
1478 my $vollist = get_vm_volumes($conf);
1479 PVE::Storage::deactivate_volumes($storage_cfg, $vollist);
1480 }
1481 };
1482 warn $@ if $@; # avoid errors - just warn
1483 }
1484
1485 my $safe_num_ne = sub {
1486 my ($a, $b) = @_;
1487
1488 return 0 if !defined($a) && !defined($b);
1489 return 1 if !defined($a);
1490 return 1 if !defined($b);
1491
1492 return $a != $b;
1493 };
1494
1495 my $safe_string_ne = sub {
1496 my ($a, $b) = @_;
1497
1498 return 0 if !defined($a) && !defined($b);
1499 return 1 if !defined($a);
1500 return 1 if !defined($b);
1501
1502 return $a ne $b;
1503 };
1504
1505 sub update_net {
1506 my ($vmid, $conf, $opt, $newnet, $netid, $rootdir) = @_;
1507
1508 if ($newnet->{type} ne 'veth') {
1509 # for when there are physical interfaces
1510 die "cannot update interface of type $newnet->{type}";
1511 }
1512
1513 my $veth = "veth${vmid}i${netid}";
1514 my $eth = $newnet->{name};
1515
1516 if (my $oldnetcfg = $conf->{$opt}) {
1517 my $oldnet = parse_lxc_network($oldnetcfg);
1518
1519 if (&$safe_string_ne($oldnet->{hwaddr}, $newnet->{hwaddr}) ||
1520 &$safe_string_ne($oldnet->{name}, $newnet->{name})) {
1521
1522 PVE::Network::veth_delete($veth);
1523 delete $conf->{$opt};
1524 write_config($vmid, $conf);
1525
1526 hotplug_net($vmid, $conf, $opt, $newnet, $netid);
1527
1528 } elsif (&$safe_string_ne($oldnet->{bridge}, $newnet->{bridge}) ||
1529 &$safe_num_ne($oldnet->{tag}, $newnet->{tag}) ||
1530 &$safe_num_ne($oldnet->{firewall}, $newnet->{firewall})) {
1531
1532 if ($oldnet->{bridge}) {
1533 PVE::Network::tap_unplug($veth);
1534 foreach (qw(bridge tag firewall)) {
1535 delete $oldnet->{$_};
1536 }
1537 $conf->{$opt} = print_lxc_network($oldnet);
1538 write_config($vmid, $conf);
1539 }
1540
1541 PVE::Network::tap_plug($veth, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks});
1542 foreach (qw(bridge tag firewall)) {
1543 $oldnet->{$_} = $newnet->{$_} if $newnet->{$_};
1544 }
1545 $conf->{$opt} = print_lxc_network($oldnet);
1546 write_config($vmid, $conf);
1547 }
1548 } else {
1549 hotplug_net($vmid, $conf, $opt, $newnet, $netid);
1550 }
1551
1552 update_ipconfig($vmid, $conf, $opt, $eth, $newnet, $rootdir);
1553 }
1554
1555 sub hotplug_net {
1556 my ($vmid, $conf, $opt, $newnet, $netid) = @_;
1557
1558 my $veth = "veth${vmid}i${netid}";
1559 my $vethpeer = $veth . "p";
1560 my $eth = $newnet->{name};
1561
1562 PVE::Network::veth_create($veth, $vethpeer, $newnet->{bridge}, $newnet->{hwaddr});
1563 PVE::Network::tap_plug($veth, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks});
1564
1565 # attach peer in container
1566 my $cmd = ['lxc-device', '-n', $vmid, 'add', $vethpeer, "$eth" ];
1567 PVE::Tools::run_command($cmd);
1568
1569 # link up peer in container
1570 $cmd = ['lxc-attach', '-n', $vmid, '-s', 'NETWORK', '--', '/sbin/ip', 'link', 'set', $eth ,'up' ];
1571 PVE::Tools::run_command($cmd);
1572
1573 my $done = { type => 'veth' };
1574 foreach (qw(bridge tag firewall hwaddr name)) {
1575 $done->{$_} = $newnet->{$_} if $newnet->{$_};
1576 }
1577 $conf->{$opt} = print_lxc_network($done);
1578
1579 write_config($vmid, $conf);
1580 }
1581
1582 sub update_ipconfig {
1583 my ($vmid, $conf, $opt, $eth, $newnet, $rootdir) = @_;
1584
1585 my $lxc_setup = PVE::LXC::Setup->new($conf, $rootdir);
1586
1587 my $optdata = parse_lxc_network($conf->{$opt});
1588 my $deleted = [];
1589 my $added = [];
1590 my $nscmd = sub {
1591 my $cmdargs = shift;
1592 PVE::Tools::run_command(['lxc-attach', '-n', $vmid, '-s', 'NETWORK', '--', @_], %$cmdargs);
1593 };
1594 my $ipcmd = sub { &$nscmd({}, '/sbin/ip', @_) };
1595
1596 my $change_ip_config = sub {
1597 my ($ipversion) = @_;
1598
1599 my $family_opt = "-$ipversion";
1600 my $suffix = $ipversion == 4 ? '' : $ipversion;
1601 my $gw= "gw$suffix";
1602 my $ip= "ip$suffix";
1603
1604 my $newip = $newnet->{$ip};
1605 my $newgw = $newnet->{$gw};
1606 my $oldip = $optdata->{$ip};
1607
1608 my $change_ip = &$safe_string_ne($oldip, $newip);
1609 my $change_gw = &$safe_string_ne($optdata->{$gw}, $newgw);
1610
1611 return if !$change_ip && !$change_gw;
1612
1613 # step 1: add new IP, if this fails we cancel
1614 my $is_real_ip = ($newip && $newip !~ /^(?:auto|dhcp|manual)$/);
1615 if ($change_ip && $is_real_ip) {
1616 eval { &$ipcmd($family_opt, 'addr', 'add', $newip, 'dev', $eth); };
1617 if (my $err = $@) {
1618 warn $err;
1619 return;
1620 }
1621 }
1622
1623 # step 2: replace gateway
1624 # If this fails we delete the added IP and cancel.
1625 # If it succeeds we save the config and delete the old IP, ignoring
1626 # errors. The config is then saved.
1627 # Note: 'ip route replace' can add
1628 if ($change_gw) {
1629 if ($newgw) {
1630 eval {
1631 if ($is_real_ip && !PVE::Network::is_ip_in_cidr($newgw, $newip, $ipversion)) {
1632 &$ipcmd($family_opt, 'route', 'add', $newgw, 'dev', $eth);
1633 }
1634 &$ipcmd($family_opt, 'route', 'replace', 'default', 'via', $newgw);
1635 };
1636 if (my $err = $@) {
1637 warn $err;
1638 # the route was not replaced, the old IP is still available
1639 # rollback (delete new IP) and cancel
1640 if ($change_ip) {
1641 eval { &$ipcmd($family_opt, 'addr', 'del', $newip, 'dev', $eth); };
1642 warn $@ if $@; # no need to die here
1643 }
1644 return;
1645 }
1646 } else {
1647 eval { &$ipcmd($family_opt, 'route', 'del', 'default'); };
1648 # if the route was not deleted, the guest might have deleted it manually
1649 # warn and continue
1650 warn $@ if $@;
1651 }
1652 }
1653
1654 # from this point on we save the configuration
1655 # step 3: delete old IP ignoring errors
1656 if ($change_ip && $oldip && $oldip !~ /^(?:auto|dhcp)$/) {
1657 # We need to enable promote_secondaries, otherwise our newly added
1658 # address will be removed along with the old one.
1659 my $promote = 0;
1660 eval {
1661 if ($ipversion == 4) {
1662 &$nscmd({ outfunc => sub { $promote = int(shift) } },
1663 'cat', "/proc/sys/net/ipv4/conf/$eth/promote_secondaries");
1664 &$nscmd({}, 'sysctl', "net.ipv4.conf.$eth.promote_secondaries=1");
1665 }
1666 &$ipcmd($family_opt, 'addr', 'del', $oldip, 'dev', $eth);
1667 };
1668 warn $@ if $@; # no need to die here
1669
1670 if ($ipversion == 4) {
1671 &$nscmd({}, 'sysctl', "net.ipv4.conf.$eth.promote_secondaries=$promote");
1672 }
1673 }
1674
1675 foreach my $property ($ip, $gw) {
1676 if ($newnet->{$property}) {
1677 $optdata->{$property} = $newnet->{$property};
1678 } else {
1679 delete $optdata->{$property};
1680 }
1681 }
1682 $conf->{$opt} = print_lxc_network($optdata);
1683 write_config($vmid, $conf);
1684 $lxc_setup->setup_network($conf);
1685 };
1686
1687 &$change_ip_config(4);
1688 &$change_ip_config(6);
1689
1690 }
1691
1692 # Internal snapshots
1693
1694 # NOTE: Snapshot create/delete involves several non-atomic
1695 # action, and can take a long time.
1696 # So we try to avoid locking the file and use 'lock' variable
1697 # inside the config file instead.
1698
1699 my $snapshot_copy_config = sub {
1700 my ($source, $dest) = @_;
1701
1702 foreach my $k (keys %$source) {
1703 next if $k eq 'snapshots';
1704 next if $k eq 'snapstate';
1705 next if $k eq 'snaptime';
1706 next if $k eq 'vmstate';
1707 next if $k eq 'lock';
1708 next if $k eq 'digest';
1709 next if $k eq 'description';
1710
1711 $dest->{$k} = $source->{$k};
1712 }
1713 };
1714
1715 my $snapshot_prepare = sub {
1716 my ($vmid, $snapname, $comment) = @_;
1717
1718 my $snap;
1719
1720 my $updatefn = sub {
1721
1722 my $conf = load_config($vmid);
1723
1724 die "you can't take a snapshot if it's a template\n"
1725 if is_template($conf);
1726
1727 check_lock($conf);
1728
1729 $conf->{lock} = 'snapshot';
1730
1731 die "snapshot name '$snapname' already used\n"
1732 if defined($conf->{snapshots}->{$snapname});
1733
1734 my $storecfg = PVE::Storage::config();
1735 die "snapshot feature is not available\n" if !has_feature('snapshot', $conf, $storecfg);
1736
1737 $snap = $conf->{snapshots}->{$snapname} = {};
1738
1739 &$snapshot_copy_config($conf, $snap);
1740
1741 $snap->{'snapstate'} = "prepare";
1742 $snap->{'snaptime'} = time();
1743 $snap->{'description'} = $comment if $comment;
1744 $conf->{snapshots}->{$snapname} = $snap;
1745
1746 write_config($vmid, $conf);
1747 };
1748
1749 lock_container($vmid, 10, $updatefn);
1750
1751 return $snap;
1752 };
1753
1754 my $snapshot_commit = sub {
1755 my ($vmid, $snapname) = @_;
1756
1757 my $updatefn = sub {
1758
1759 my $conf = load_config($vmid);
1760
1761 die "missing snapshot lock\n"
1762 if !($conf->{lock} && $conf->{lock} eq 'snapshot');
1763
1764 die "snapshot '$snapname' does not exist\n"
1765 if !defined($conf->{snapshots}->{$snapname});
1766
1767 die "wrong snapshot state\n"
1768 if !($conf->{snapshots}->{$snapname}->{'snapstate'} &&
1769 $conf->{snapshots}->{$snapname}->{'snapstate'} eq "prepare");
1770
1771 delete $conf->{snapshots}->{$snapname}->{'snapstate'};
1772 delete $conf->{lock};
1773 $conf->{parent} = $snapname;
1774
1775 write_config($vmid, $conf);
1776 };
1777
1778 lock_container($vmid, 10 ,$updatefn);
1779 };
1780
1781 sub has_feature {
1782 my ($feature, $conf, $storecfg, $snapname) = @_;
1783
1784 my $err;
1785
1786 foreach_mountpoint($conf, sub {
1787 my ($ms, $mountpoint) = @_;
1788
1789 return if $err; # skip further test
1790
1791 $err = 1 if !PVE::Storage::volume_has_feature($storecfg, $feature, $mountpoint->{volume}, $snapname);
1792
1793 # TODO: implement support for mountpoints
1794 die "unable to handle mountpoint '$ms' - feature not implemented\n"
1795 if $ms ne 'rootfs';
1796 });
1797
1798 return $err ? 0 : 1;
1799 }
1800
1801 sub snapshot_create {
1802 my ($vmid, $snapname, $comment) = @_;
1803
1804 my $snap = &$snapshot_prepare($vmid, $snapname, $comment);
1805
1806 my $conf = load_config($vmid);
1807
1808 my $running = check_running($vmid);
1809
1810 my $unfreeze = 0;
1811
1812 eval {
1813 if ($running) {
1814 PVE::Tools::run_command(['/usr/bin/lxc-freeze', '-n', $vmid]);
1815 $unfreeze = 1;
1816 PVE::Tools::run_command(['/bin/sync']);
1817 };
1818
1819 my $storecfg = PVE::Storage::config();
1820 my $rootinfo = parse_ct_rootfs($conf->{rootfs});
1821 my $volid = $rootinfo->{volume};
1822
1823 PVE::Storage::volume_snapshot($storecfg, $volid, $snapname);
1824 &$snapshot_commit($vmid, $snapname);
1825 };
1826 my $err = $@;
1827
1828 if ($unfreeze) {
1829 eval { PVE::Tools::run_command(['/usr/bin/lxc-unfreeze', '-n', $vmid]); };
1830 warn $@ if $@;
1831 }
1832
1833 if ($err) {
1834 snapshot_delete($vmid, $snapname, 1);
1835 die "$err\n";
1836 }
1837 }
1838
1839 sub snapshot_delete {
1840 my ($vmid, $snapname, $force) = @_;
1841
1842 my $snap;
1843
1844 my $conf;
1845
1846 my $updatefn = sub {
1847
1848 $conf = load_config($vmid);
1849
1850 die "you can't delete a snapshot if vm is a template\n"
1851 if is_template($conf);
1852
1853 $snap = $conf->{snapshots}->{$snapname};
1854
1855 check_lock($conf);
1856
1857 die "snapshot '$snapname' does not exist\n" if !defined($snap);
1858
1859 $snap->{snapstate} = 'delete';
1860
1861 write_config($vmid, $conf);
1862 };
1863
1864 lock_container($vmid, 10, $updatefn);
1865
1866 my $storecfg = PVE::Storage::config();
1867
1868 my $del_snap = sub {
1869
1870 check_lock($conf);
1871
1872 if ($conf->{parent} eq $snapname) {
1873 if ($conf->{snapshots}->{$snapname}->{snapname}) {
1874 $conf->{parent} = $conf->{snapshots}->{$snapname}->{parent};
1875 } else {
1876 delete $conf->{parent};
1877 }
1878 }
1879
1880 delete $conf->{snapshots}->{$snapname};
1881
1882 write_config($vmid, $conf);
1883 };
1884
1885 my $rootfs = $conf->{snapshots}->{$snapname}->{rootfs};
1886 my $rootinfo = parse_ct_rootfs($rootfs);
1887 my $volid = $rootinfo->{volume};
1888
1889 eval {
1890 PVE::Storage::volume_snapshot_delete($storecfg, $volid, $snapname);
1891 };
1892 my $err = $@;
1893
1894 if(!$err || ($err && $force)) {
1895 lock_container($vmid, 10, $del_snap);
1896 if ($err) {
1897 die "Can't delete snapshot: $vmid $snapname $err\n";
1898 }
1899 }
1900 }
1901
1902 sub snapshot_rollback {
1903 my ($vmid, $snapname) = @_;
1904
1905 my $storecfg = PVE::Storage::config();
1906
1907 my $conf = load_config($vmid);
1908
1909 die "you can't rollback if vm is a template\n" if is_template($conf);
1910
1911 my $snap = $conf->{snapshots}->{$snapname};
1912
1913 die "snapshot '$snapname' does not exist\n" if !defined($snap);
1914
1915 my $rootfs = $snap->{rootfs};
1916 my $rootinfo = parse_ct_rootfs($rootfs);
1917 my $volid = $rootinfo->{volume};
1918
1919 PVE::Storage::volume_rollback_is_possible($storecfg, $volid, $snapname);
1920
1921 my $updatefn = sub {
1922
1923 die "unable to rollback to incomplete snapshot (snapstate = $snap->{snapstate})\n"
1924 if $snap->{snapstate};
1925
1926 check_lock($conf);
1927
1928 system("lxc-stop -n $vmid --kill") if check_running($vmid);
1929
1930 die "unable to rollback vm $vmid: vm is running\n"
1931 if check_running($vmid);
1932
1933 $conf->{lock} = 'rollback';
1934
1935 my $forcemachine;
1936
1937 # copy snapshot config to current config
1938
1939 my $tmp_conf = $conf;
1940 &$snapshot_copy_config($tmp_conf->{snapshots}->{$snapname}, $conf);
1941 $conf->{snapshots} = $tmp_conf->{snapshots};
1942 delete $conf->{snaptime};
1943 delete $conf->{snapname};
1944 $conf->{parent} = $snapname;
1945
1946 write_config($vmid, $conf);
1947 };
1948
1949 my $unlockfn = sub {
1950 delete $conf->{lock};
1951 write_config($vmid, $conf);
1952 };
1953
1954 lock_container($vmid, 10, $updatefn);
1955
1956 PVE::Storage::volume_snapshot_rollback($storecfg, $volid, $snapname);
1957
1958 lock_container($vmid, 5, $unlockfn);
1959 }
1960
1961 sub template_create {
1962 my ($vmid, $conf) = @_;
1963
1964 my $storecfg = PVE::Storage::config();
1965
1966 my $rootinfo = parse_ct_rootfs($conf->{rootfs});
1967 my $volid = $rootinfo->{volume};
1968
1969 die "Template feature is not available for '$volid'\n"
1970 if !PVE::Storage::volume_has_feature($storecfg, 'template', $volid);
1971
1972 PVE::Storage::activate_volumes($storecfg, [$volid]);
1973
1974 my $template_volid = PVE::Storage::vdisk_create_base($storecfg, $volid);
1975 $rootinfo->{volume} = $template_volid;
1976 $conf->{rootfs} = print_ct_mountpoint($rootinfo, 1);
1977
1978 write_config($vmid, $conf);
1979 }
1980
1981 sub is_template {
1982 my ($conf) = @_;
1983
1984 return 1 if defined $conf->{template} && $conf->{template} == 1;
1985 }
1986
1987 sub mountpoint_names {
1988 my ($reverse) = @_;
1989
1990 my @names = ('rootfs');
1991
1992 for (my $i = 0; $i < $MAX_MOUNT_POINTS; $i++) {
1993 push @names, "mp$i";
1994 }
1995
1996 return $reverse ? reverse @names : @names;
1997 }
1998
1999 # The container might have *different* symlinks than the host. realpath/abs_path
2000 # use the actual filesystem to resolve links.
2001 sub sanitize_mountpoint {
2002 my ($mp) = @_;
2003 $mp = '/' . $mp; # we always start with a slash
2004 $mp =~ s@/{2,}@/@g; # collapse sequences of slashes
2005 $mp =~ s@/\./@@g; # collapse /./
2006 $mp =~ s@/\.(/)?$@$1@; # collapse a trailing /. or /./
2007 $mp =~ s@(.*)/[^/]+/\.\./@$1/@g; # collapse /../ without regard for symlinks
2008 $mp =~ s@/\.\.(/)?$@$1@; # collapse trailing /.. or /../ disregarding symlinks
2009 return $mp;
2010 }
2011
2012 sub foreach_mountpoint_full {
2013 my ($conf, $reverse, $func) = @_;
2014
2015 foreach my $key (mountpoint_names($reverse)) {
2016 my $value = $conf->{$key};
2017 next if !defined($value);
2018 my $mountpoint = $key eq 'rootfs' ? parse_ct_rootfs($value, 1) : parse_ct_mountpoint($value, 1);
2019 next if !defined($mountpoint);
2020
2021 $mountpoint->{mp} = sanitize_mountpoint($mountpoint->{mp});
2022
2023 my $path = $mountpoint->{volume};
2024 $mountpoint->{volume} = sanitize_mountpoint($path) if $path =~ m|^/|;
2025
2026 &$func($key, $mountpoint);
2027 }
2028 }
2029
2030 sub foreach_mountpoint {
2031 my ($conf, $func) = @_;
2032
2033 foreach_mountpoint_full($conf, 0, $func);
2034 }
2035
2036 sub foreach_mountpoint_reverse {
2037 my ($conf, $func) = @_;
2038
2039 foreach_mountpoint_full($conf, 1, $func);
2040 }
2041
2042 sub check_ct_modify_config_perm {
2043 my ($rpcenv, $authuser, $vmid, $pool, $key_list) = @_;
2044
2045 return 1 if $authuser ne 'root@pam';
2046
2047 foreach my $opt (@$key_list) {
2048
2049 if ($opt eq 'cpus' || $opt eq 'cpuunits' || $opt eq 'cpulimit') {
2050 $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.CPU']);
2051 } elsif ($opt eq 'rootfs' || $opt =~ /^mp\d+$/) {
2052 $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Disk']);
2053 } elsif ($opt eq 'memory' || $opt eq 'swap') {
2054 $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Memory']);
2055 } elsif ($opt =~ m/^net\d+$/ || $opt eq 'nameserver' ||
2056 $opt eq 'searchdomain' || $opt eq 'hostname') {
2057 $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Network']);
2058 } else {
2059 $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Options']);
2060 }
2061 }
2062
2063 return 1;
2064 }
2065
2066 sub umount_all {
2067 my ($vmid, $storage_cfg, $conf, $noerr) = @_;
2068
2069 my $rootdir = "/var/lib/lxc/$vmid/rootfs";
2070 my $volid_list = get_vm_volumes($conf);
2071
2072 foreach_mountpoint_reverse($conf, sub {
2073 my ($ms, $mountpoint) = @_;
2074
2075 my $volid = $mountpoint->{volume};
2076 my $mount = $mountpoint->{mp};
2077
2078 return if !$volid || !$mount;
2079
2080 my $mount_path = "$rootdir/$mount";
2081 $mount_path =~ s!/+!/!g;
2082
2083 return if !PVE::ProcFSTools::is_mounted($mount_path);
2084
2085 eval {
2086 PVE::Tools::run_command(['umount', '-d', $mount_path]);
2087 };
2088 if (my $err = $@) {
2089 if ($noerr) {
2090 warn $err;
2091 } else {
2092 die $err;
2093 }
2094 }
2095 });
2096 }
2097
2098 sub mount_all {
2099 my ($vmid, $storage_cfg, $conf) = @_;
2100
2101 my $rootdir = "/var/lib/lxc/$vmid/rootfs";
2102 File::Path::make_path($rootdir);
2103
2104 my $volid_list = get_vm_volumes($conf);
2105 PVE::Storage::activate_volumes($storage_cfg, $volid_list);
2106
2107 eval {
2108 foreach_mountpoint($conf, sub {
2109 my ($ms, $mountpoint) = @_;
2110
2111 mountpoint_mount($mountpoint, $rootdir, $storage_cfg);
2112 });
2113 };
2114 if (my $err = $@) {
2115 warn "mounting container failed\n";
2116 umount_all($vmid, $storage_cfg, $conf, 1);
2117 die $err;
2118 }
2119
2120 return $rootdir;
2121 }
2122
2123
2124 sub mountpoint_mount_path {
2125 my ($mountpoint, $storage_cfg, $snapname) = @_;
2126
2127 return mountpoint_mount($mountpoint, undef, $storage_cfg, $snapname);
2128 }
2129
2130 my $check_mount_path = sub {
2131 my ($path) = @_;
2132 $path = File::Spec->canonpath($path);
2133 my $real = Cwd::realpath($path);
2134 if ($real ne $path) {
2135 die "mount path modified by symlink: $path != $real";
2136 }
2137 };
2138
2139 sub query_loopdev {
2140 my ($path) = @_;
2141 my $found;
2142 my $parser = sub {
2143 my $line = shift;
2144 if ($line =~ m@^(/dev/loop\d+):@) {
2145 $found = $1;
2146 }
2147 };
2148 my $cmd = ['losetup', '--associated', $path];
2149 PVE::Tools::run_command($cmd, outfunc => $parser);
2150 return $found;
2151 }
2152
2153 # use $rootdir = undef to just return the corresponding mount path
2154 sub mountpoint_mount {
2155 my ($mountpoint, $rootdir, $storage_cfg, $snapname) = @_;
2156
2157 my $volid = $mountpoint->{volume};
2158 my $mount = $mountpoint->{mp};
2159 my $type = $mountpoint->{type};
2160
2161 return if !$volid || !$mount;
2162
2163 my $mount_path;
2164
2165 if (defined($rootdir)) {
2166 $rootdir =~ s!/+$!!;
2167 $mount_path = "$rootdir/$mount";
2168 $mount_path =~ s!/+!/!g;
2169 &$check_mount_path($mount_path);
2170 File::Path::mkpath($mount_path);
2171 }
2172
2173 my ($storage, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2174
2175 die "unknown snapshot path for '$volid'" if !$storage && defined($snapname);
2176
2177 if ($storage) {
2178
2179 my $scfg = PVE::Storage::storage_config($storage_cfg, $storage);
2180 my $path = PVE::Storage::path($storage_cfg, $volid, $snapname);
2181
2182 my ($vtype, undef, undef, undef, undef, $isBase, $format) =
2183 PVE::Storage::parse_volname($storage_cfg, $volid);
2184
2185 $format = 'iso' if $vtype eq 'iso'; # allow to handle iso files
2186
2187 if ($format eq 'subvol') {
2188 if ($mount_path) {
2189 if ($snapname) {
2190 if ($scfg->{type} eq 'zfspool') {
2191 my $path_arg = $path;
2192 $path_arg =~ s!^/+!!;
2193 PVE::Tools::run_command(['mount', '-o', 'ro', '-t', 'zfs', $path_arg, $mount_path]);
2194 } else {
2195 die "cannot mount subvol snapshots for storage type '$scfg->{type}'\n";
2196 }
2197 } else {
2198 PVE::Tools::run_command(['mount', '-o', 'bind', $path, $mount_path]);
2199 }
2200 }
2201 return wantarray ? ($path, 0) : $path;
2202 } elsif ($format eq 'raw' || $format eq 'iso') {
2203 my $use_loopdev = 0;
2204 my @extra_opts;
2205 if ($scfg->{path}) {
2206 push @extra_opts, '-o', 'loop';
2207 $use_loopdev = 1;
2208 } elsif ($scfg->{type} eq 'drbd' || $scfg->{type} eq 'lvm' ||
2209 $scfg->{type} eq 'rbd' || $scfg->{type} eq 'lvmthin') {
2210 # do nothing
2211 } else {
2212 die "unsupported storage type '$scfg->{type}'\n";
2213 }
2214 if ($mount_path) {
2215 if ($format eq 'iso') {
2216 PVE::Tools::run_command(['mount', '-o', 'ro', @extra_opts, $path, $mount_path]);
2217 } elsif ($isBase || defined($snapname)) {
2218 PVE::Tools::run_command(['mount', '-o', 'ro,noload', @extra_opts, $path, $mount_path]);
2219 } else {
2220 PVE::Tools::run_command(['mount', @extra_opts, $path, $mount_path]);
2221 }
2222 }
2223 return wantarray ? ($path, $use_loopdev) : $path;
2224 } else {
2225 die "unsupported image format '$format'\n";
2226 }
2227 } elsif ($type eq 'device') {
2228 PVE::Tools::run_command(['mount', $volid, $mount_path]) if $mount_path;
2229 return wantarray ? ($volid, 0) : $volid;
2230 } elsif ($type eq 'bind') {
2231 die "directory '$volid' does not exist\n" if ! -d $volid;
2232 &$check_mount_path($volid);
2233 PVE::Tools::run_command(['mount', '-o', 'bind', $volid, $mount_path]) if $mount_path;
2234 return wantarray ? ($volid, 0) : $volid;
2235 }
2236
2237 die "unsupported storage";
2238 }
2239
2240 sub get_vm_volumes {
2241 my ($conf, $excludes) = @_;
2242
2243 my $vollist = [];
2244
2245 foreach_mountpoint($conf, sub {
2246 my ($ms, $mountpoint) = @_;
2247
2248 return if $excludes && $ms eq $excludes;
2249
2250 my $volid = $mountpoint->{volume};
2251
2252 return if !$volid || $mountpoint->{type} ne 'volume';
2253
2254 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2255 return if !$sid;
2256
2257 push @$vollist, $volid;
2258 });
2259
2260 return $vollist;
2261 }
2262
2263 sub mkfs {
2264 my ($dev, $rootuid, $rootgid) = @_;
2265
2266 PVE::Tools::run_command(['mkfs.ext4', '-O', 'mmp',
2267 '-E', "root_owner=$rootuid:$rootgid",
2268 $dev]);
2269 }
2270
2271 sub format_disk {
2272 my ($storage_cfg, $volid, $rootuid, $rootgid) = @_;
2273
2274 if ($volid =~ m!^/dev/.+!) {
2275 mkfs($volid);
2276 return;
2277 }
2278
2279 my ($storage, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2280
2281 die "cannot format volume '$volid' with no storage\n" if !$storage;
2282
2283 PVE::Storage::activate_volumes($storage_cfg, [$volid]);
2284
2285 my $path = PVE::Storage::path($storage_cfg, $volid);
2286
2287 my ($vtype, undef, undef, undef, undef, $isBase, $format) =
2288 PVE::Storage::parse_volname($storage_cfg, $volid);
2289
2290 die "cannot format volume '$volid' (format == $format)\n"
2291 if $format ne 'raw';
2292
2293 mkfs($path, $rootuid, $rootgid);
2294 }
2295
2296 sub destroy_disks {
2297 my ($storecfg, $vollist) = @_;
2298
2299 foreach my $volid (@$vollist) {
2300 eval { PVE::Storage::vdisk_free($storecfg, $volid); };
2301 warn $@ if $@;
2302 }
2303 }
2304
2305 sub create_disks {
2306 my ($storecfg, $vmid, $settings, $conf) = @_;
2307
2308 my $vollist = [];
2309
2310 eval {
2311 my (undef, $rootuid, $rootgid) = PVE::LXC::parse_id_maps($conf);
2312 my $chown_vollist = [];
2313
2314 foreach_mountpoint($settings, sub {
2315 my ($ms, $mountpoint) = @_;
2316
2317 my $volid = $mountpoint->{volume};
2318 my $mp = $mountpoint->{mp};
2319
2320 my ($storage, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2321
2322 if ($storage && ($volid =~ m/^([^:\s]+):(\d+(\.\d+)?)$/)) {
2323 my ($storeid, $size_gb) = ($1, $2);
2324
2325 my $size_kb = int(${size_gb}*1024) * 1024;
2326
2327 my $scfg = PVE::Storage::storage_config($storecfg, $storage);
2328 # fixme: use better naming ct-$vmid-disk-X.raw?
2329
2330 if ($scfg->{type} eq 'dir' || $scfg->{type} eq 'nfs') {
2331 if ($size_kb > 0) {
2332 $volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'raw',
2333 undef, $size_kb);
2334 format_disk($storecfg, $volid, $rootuid, $rootgid);
2335 } else {
2336 $volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'subvol',
2337 undef, 0);
2338 push @$chown_vollist, $volid;
2339 }
2340 } elsif ($scfg->{type} eq 'zfspool') {
2341
2342 $volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'subvol',
2343 undef, $size_kb);
2344 push @$chown_vollist, $volid;
2345 } elsif ($scfg->{type} eq 'drbd' || $scfg->{type} eq 'lvm' || $scfg->{type} eq 'lvmthin') {
2346
2347 $volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'raw', undef, $size_kb);
2348 format_disk($storecfg, $volid, $rootuid, $rootgid);
2349
2350 } elsif ($scfg->{type} eq 'rbd') {
2351
2352 die "krbd option must be enabled on storage type '$scfg->{type}'\n" if !$scfg->{krbd};
2353 $volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'raw', undef, $size_kb);
2354 format_disk($storecfg, $volid, $rootuid, $rootgid);
2355 } else {
2356 die "unable to create containers on storage type '$scfg->{type}'\n";
2357 }
2358 push @$vollist, $volid;
2359 $mountpoint->{volume} = $volid;
2360 $mountpoint->{size} = $size_kb * 1024;
2361 $conf->{$ms} = print_ct_mountpoint($mountpoint, $ms eq 'rootfs');
2362 } else {
2363 # use specified/existing volid/dir/device
2364 $conf->{$ms} = print_ct_mountpoint($mountpoint, $ms eq 'rootfs');
2365 }
2366 });
2367
2368 PVE::Storage::activate_volumes($storecfg, $chown_vollist, undef);
2369 foreach my $volid (@$chown_vollist) {
2370 my $path = PVE::Storage::path($storecfg, $volid, undef);
2371 chown($rootuid, $rootgid, $path);
2372 }
2373 PVE::Storage::deactivate_volumes($storecfg, $chown_vollist, undef);
2374 };
2375 # free allocated images on error
2376 if (my $err = $@) {
2377 destroy_disks($storecfg, $vollist);
2378 die $err;
2379 }
2380 return $vollist;
2381 }
2382
2383 # bash completion helper
2384
2385 sub complete_os_templates {
2386 my ($cmdname, $pname, $cvalue) = @_;
2387
2388 my $cfg = PVE::Storage::config();
2389
2390 my $storeid;
2391
2392 if ($cvalue =~ m/^([^:]+):/) {
2393 $storeid = $1;
2394 }
2395
2396 my $vtype = $cmdname eq 'restore' ? 'backup' : 'vztmpl';
2397 my $data = PVE::Storage::template_list($cfg, $storeid, $vtype);
2398
2399 my $res = [];
2400 foreach my $id (keys %$data) {
2401 foreach my $item (@{$data->{$id}}) {
2402 push @$res, $item->{volid} if defined($item->{volid});
2403 }
2404 }
2405
2406 return $res;
2407 }
2408
2409 my $complete_ctid_full = sub {
2410 my ($running) = @_;
2411
2412 my $idlist = vmstatus();
2413
2414 my $active_hash = list_active_containers();
2415
2416 my $res = [];
2417
2418 foreach my $id (keys %$idlist) {
2419 my $d = $idlist->{$id};
2420 if (defined($running)) {
2421 next if $d->{template};
2422 next if $running && !$active_hash->{$id};
2423 next if !$running && $active_hash->{$id};
2424 }
2425 push @$res, $id;
2426
2427 }
2428 return $res;
2429 };
2430
2431 sub complete_ctid {
2432 return &$complete_ctid_full();
2433 }
2434
2435 sub complete_ctid_stopped {
2436 return &$complete_ctid_full(0);
2437 }
2438
2439 sub complete_ctid_running {
2440 return &$complete_ctid_full(1);
2441 }
2442
2443 sub parse_id_maps {
2444 my ($conf) = @_;
2445
2446 my $id_map = [];
2447 my $rootuid = 0;
2448 my $rootgid = 0;
2449
2450 my $lxc = $conf->{lxc};
2451 foreach my $entry (@$lxc) {
2452 my ($key, $value) = @$entry;
2453 next if $key ne 'lxc.id_map';
2454 if ($value =~ /^([ug])\s+(\d+)\s+(\d+)\s+(\d+)\s*$/) {
2455 my ($type, $ct, $host, $length) = ($1, $2, $3, $4);
2456 push @$id_map, [$type, $ct, $host, $length];
2457 if ($ct == 0) {
2458 $rootuid = $host if $type eq 'u';
2459 $rootgid = $host if $type eq 'g';
2460 }
2461 } else {
2462 die "failed to parse id_map: $value\n";
2463 }
2464 }
2465
2466 if (!@$id_map && $conf->{unprivileged}) {
2467 # Should we read them from /etc/subuid?
2468 $id_map = [ ['u', '0', '100000', '65536'],
2469 ['g', '0', '100000', '65536'] ];
2470 $rootuid = $rootgid = 100000;
2471 }
2472
2473 return ($id_map, $rootuid, $rootgid);
2474 }
2475
2476 sub userns_command {
2477 my ($id_map) = @_;
2478 if (@$id_map) {
2479 return ['lxc-usernsexec', (map { ('-m', join(':', @$_)) } @$id_map), '--'];
2480 }
2481 return [];
2482 }
2483
2484 1;