]>
Commit | Line | Data |
---|---|---|
1 | package PVE::LXC; | |
2 | ||
3 | use strict; | |
4 | use warnings; | |
5 | ||
6 | use POSIX qw(EINTR); | |
7 | ||
8 | use Socket; | |
9 | ||
10 | use File::Path; | |
11 | use File::Spec; | |
12 | use Cwd qw(); | |
13 | use Fcntl qw(O_RDONLY); | |
14 | ||
15 | use PVE::Exception qw(raise_perm_exc); | |
16 | use PVE::Storage; | |
17 | use PVE::SafeSyslog; | |
18 | use PVE::INotify; | |
19 | use PVE::Tools qw($IPV6RE $IPV4RE dir_glob_foreach lock_file lock_file_full); | |
20 | use PVE::Network; | |
21 | use PVE::AccessControl; | |
22 | use PVE::ProcFSTools; | |
23 | use PVE::LXC::Config; | |
24 | use Time::HiRes qw (gettimeofday); | |
25 | ||
26 | use Data::Dumper; | |
27 | ||
28 | my $nodename = PVE::INotify::nodename(); | |
29 | ||
30 | my $cpuinfo= PVE::ProcFSTools::read_cpuinfo(); | |
31 | ||
32 | our $COMMON_TAR_FLAGS = [ '--sparse', '--numeric-owner', '--acls', | |
33 | '--xattrs', | |
34 | '--xattrs-include=user.*', | |
35 | '--xattrs-include=security.capability', | |
36 | '--warning=no-xattr-write' ]; | |
37 | ||
38 | sub config_list { | |
39 | my $vmlist = PVE::Cluster::get_vmlist(); | |
40 | my $res = {}; | |
41 | return $res if !$vmlist || !$vmlist->{ids}; | |
42 | my $ids = $vmlist->{ids}; | |
43 | ||
44 | foreach my $vmid (keys %$ids) { | |
45 | next if !$vmid; # skip CT0 | |
46 | my $d = $ids->{$vmid}; | |
47 | next if !$d->{node} || $d->{node} ne $nodename; | |
48 | next if !$d->{type} || $d->{type} ne 'lxc'; | |
49 | $res->{$vmid}->{type} = 'lxc'; | |
50 | } | |
51 | return $res; | |
52 | } | |
53 | ||
54 | sub destroy_config { | |
55 | my ($vmid) = @_; | |
56 | ||
57 | unlink PVE::LXC::Config->config_file($vmid, $nodename); | |
58 | } | |
59 | ||
60 | # container status helpers | |
61 | ||
62 | sub list_active_containers { | |
63 | ||
64 | my $filename = "/proc/net/unix"; | |
65 | ||
66 | # similar test is used by lcxcontainers.c: list_active_containers | |
67 | my $res = {}; | |
68 | ||
69 | my $fh = IO::File->new ($filename, "r"); | |
70 | return $res if !$fh; | |
71 | ||
72 | while (defined(my $line = <$fh>)) { | |
73 | if ($line =~ m/^[a-f0-9]+:\s+\S+\s+\S+\s+\S+\s+\S+\s+\S+\s+\d+\s+(\S+)$/) { | |
74 | my $path = $1; | |
75 | if ($path =~ m!^@/var/lib/lxc/(\d+)/command$!) { | |
76 | $res->{$1} = 1; | |
77 | } | |
78 | } | |
79 | } | |
80 | ||
81 | close($fh); | |
82 | ||
83 | return $res; | |
84 | } | |
85 | ||
86 | # warning: this is slow | |
87 | sub check_running { | |
88 | my ($vmid) = @_; | |
89 | ||
90 | my $active_hash = list_active_containers(); | |
91 | ||
92 | return 1 if defined($active_hash->{$vmid}); | |
93 | ||
94 | return undef; | |
95 | } | |
96 | ||
97 | sub get_container_disk_usage { | |
98 | my ($vmid, $pid) = @_; | |
99 | ||
100 | return PVE::Tools::df("/proc/$pid/root/", 1); | |
101 | } | |
102 | ||
103 | my $last_proc_vmid_stat; | |
104 | ||
105 | my $parse_cpuacct_stat = sub { | |
106 | my ($vmid) = @_; | |
107 | ||
108 | my $raw = read_cgroup_value('cpuacct', $vmid, 'cpuacct.stat', 1); | |
109 | ||
110 | my $stat = {}; | |
111 | ||
112 | if ($raw =~ m/^user (\d+)\nsystem (\d+)\n/) { | |
113 | ||
114 | $stat->{utime} = $1; | |
115 | $stat->{stime} = $2; | |
116 | ||
117 | } | |
118 | ||
119 | return $stat; | |
120 | }; | |
121 | ||
122 | sub vmstatus { | |
123 | my ($opt_vmid) = @_; | |
124 | ||
125 | my $list = $opt_vmid ? { $opt_vmid => { type => 'lxc' }} : config_list(); | |
126 | ||
127 | my $active_hash = list_active_containers(); | |
128 | ||
129 | my $cpucount = $cpuinfo->{cpus} || 1; | |
130 | ||
131 | my $cdtime = gettimeofday; | |
132 | ||
133 | my $uptime = (PVE::ProcFSTools::read_proc_uptime(1))[0]; | |
134 | ||
135 | foreach my $vmid (keys %$list) { | |
136 | my $d = $list->{$vmid}; | |
137 | ||
138 | eval { $d->{pid} = find_lxc_pid($vmid) if defined($active_hash->{$vmid}); }; | |
139 | warn $@ if $@; # ignore errors (consider them stopped) | |
140 | ||
141 | $d->{status} = $d->{pid} ? 'running' : 'stopped'; | |
142 | ||
143 | my $cfspath = PVE::LXC::Config->cfs_config_path($vmid); | |
144 | my $conf = PVE::Cluster::cfs_read_file($cfspath) || {}; | |
145 | ||
146 | $d->{name} = $conf->{'hostname'} || "CT$vmid"; | |
147 | $d->{name} =~ s/[\s]//g; | |
148 | ||
149 | $d->{cpus} = $conf->{cpulimit} || $cpucount; | |
150 | ||
151 | $d->{lock} = $conf->{lock} || ''; | |
152 | ||
153 | if ($d->{pid}) { | |
154 | my $res = get_container_disk_usage($vmid, $d->{pid}); | |
155 | $d->{disk} = $res->{used}; | |
156 | $d->{maxdisk} = $res->{total}; | |
157 | } else { | |
158 | $d->{disk} = 0; | |
159 | # use 4GB by default ?? | |
160 | if (my $rootfs = $conf->{rootfs}) { | |
161 | my $rootinfo = PVE::LXC::Config->parse_ct_rootfs($rootfs); | |
162 | $d->{maxdisk} = $rootinfo->{size} || (4*1024*1024*1024); | |
163 | } else { | |
164 | $d->{maxdisk} = 4*1024*1024*1024; | |
165 | } | |
166 | } | |
167 | ||
168 | $d->{mem} = 0; | |
169 | $d->{swap} = 0; | |
170 | $d->{maxmem} = ($conf->{memory}||512)*1024*1024; | |
171 | $d->{maxswap} = ($conf->{swap}//0)*1024*1024; | |
172 | ||
173 | $d->{uptime} = 0; | |
174 | $d->{cpu} = 0; | |
175 | ||
176 | $d->{netout} = 0; | |
177 | $d->{netin} = 0; | |
178 | ||
179 | $d->{diskread} = 0; | |
180 | $d->{diskwrite} = 0; | |
181 | ||
182 | $d->{template} = PVE::LXC::Config->is_template($conf); | |
183 | } | |
184 | ||
185 | foreach my $vmid (keys %$list) { | |
186 | my $d = $list->{$vmid}; | |
187 | my $pid = $d->{pid}; | |
188 | ||
189 | next if !$pid; # skip stopped CTs | |
190 | ||
191 | my $ctime = (stat("/proc/$pid"))[10]; # 10 = ctime | |
192 | $d->{uptime} = time - $ctime; # the method lxcfs uses | |
193 | ||
194 | $d->{mem} = read_cgroup_value('memory', $vmid, 'memory.usage_in_bytes'); | |
195 | $d->{swap} = read_cgroup_value('memory', $vmid, 'memory.memsw.usage_in_bytes') - $d->{mem}; | |
196 | ||
197 | my $blkio_bytes = read_cgroup_value('blkio', $vmid, 'blkio.throttle.io_service_bytes', 1); | |
198 | my @bytes = split(/\n/, $blkio_bytes); | |
199 | foreach my $byte (@bytes) { | |
200 | if (my ($key, $value) = $byte =~ /(Read|Write)\s+(\d+)/) { | |
201 | $d->{diskread} = $2 if $key eq 'Read'; | |
202 | $d->{diskwrite} = $2 if $key eq 'Write'; | |
203 | } | |
204 | } | |
205 | ||
206 | my $pstat = &$parse_cpuacct_stat($vmid); | |
207 | ||
208 | my $used = $pstat->{utime} + $pstat->{stime}; | |
209 | ||
210 | my $old = $last_proc_vmid_stat->{$vmid}; | |
211 | if (!$old) { | |
212 | $last_proc_vmid_stat->{$vmid} = { | |
213 | time => $cdtime, | |
214 | used => $used, | |
215 | cpu => 0, | |
216 | }; | |
217 | next; | |
218 | } | |
219 | ||
220 | my $dtime = ($cdtime - $old->{time}) * $cpucount * $cpuinfo->{user_hz}; | |
221 | ||
222 | if ($dtime > 1000) { | |
223 | my $dutime = $used - $old->{used}; | |
224 | ||
225 | $d->{cpu} = (($dutime/$dtime)* $cpucount) / $d->{cpus}; | |
226 | $last_proc_vmid_stat->{$vmid} = { | |
227 | time => $cdtime, | |
228 | used => $used, | |
229 | cpu => $d->{cpu}, | |
230 | }; | |
231 | } else { | |
232 | $d->{cpu} = $old->{cpu}; | |
233 | } | |
234 | } | |
235 | ||
236 | my $netdev = PVE::ProcFSTools::read_proc_net_dev(); | |
237 | ||
238 | foreach my $dev (keys %$netdev) { | |
239 | next if $dev !~ m/^veth([1-9]\d*)i/; | |
240 | my $vmid = $1; | |
241 | my $d = $list->{$vmid}; | |
242 | ||
243 | next if !$d; | |
244 | ||
245 | $d->{netout} += $netdev->{$dev}->{receive}; | |
246 | $d->{netin} += $netdev->{$dev}->{transmit}; | |
247 | ||
248 | } | |
249 | ||
250 | return $list; | |
251 | } | |
252 | ||
253 | sub read_cgroup_value { | |
254 | my ($group, $vmid, $name, $full) = @_; | |
255 | ||
256 | my $path = "/sys/fs/cgroup/$group/lxc/$vmid/$name"; | |
257 | ||
258 | return PVE::Tools::file_get_contents($path) if $full; | |
259 | ||
260 | return PVE::Tools::file_read_firstline($path); | |
261 | } | |
262 | ||
263 | sub write_cgroup_value { | |
264 | my ($group, $vmid, $name, $value) = @_; | |
265 | ||
266 | my $path = "/sys/fs/cgroup/$group/lxc/$vmid/$name"; | |
267 | PVE::ProcFSTools::write_proc_entry($path, $value) if -e $path; | |
268 | ||
269 | } | |
270 | ||
271 | sub find_lxc_console_pids { | |
272 | ||
273 | my $res = {}; | |
274 | ||
275 | PVE::Tools::dir_glob_foreach('/proc', '\d+', sub { | |
276 | my ($pid) = @_; | |
277 | ||
278 | my $cmdline = PVE::Tools::file_read_firstline("/proc/$pid/cmdline"); | |
279 | return if !$cmdline; | |
280 | ||
281 | my @args = split(/\0/, $cmdline); | |
282 | ||
283 | # search for lxc-console -n <vmid> | |
284 | return if scalar(@args) != 3; | |
285 | return if $args[1] ne '-n'; | |
286 | return if $args[2] !~ m/^\d+$/; | |
287 | return if $args[0] !~ m|^(/usr/bin/)?lxc-console$|; | |
288 | ||
289 | my $vmid = $args[2]; | |
290 | ||
291 | push @{$res->{$vmid}}, $pid; | |
292 | }); | |
293 | ||
294 | return $res; | |
295 | } | |
296 | ||
297 | sub find_lxc_pid { | |
298 | my ($vmid) = @_; | |
299 | ||
300 | my $pid = undef; | |
301 | my $parser = sub { | |
302 | my $line = shift; | |
303 | $pid = $1 if $line =~ m/^PID:\s+(\d+)$/; | |
304 | }; | |
305 | PVE::Tools::run_command(['lxc-info', '-n', $vmid, '-p'], outfunc => $parser); | |
306 | ||
307 | die "unable to get PID for CT $vmid (not running?)\n" if !$pid; | |
308 | ||
309 | return $pid; | |
310 | } | |
311 | ||
312 | # Note: we cannot use Net:IP, because that only allows strict | |
313 | # CIDR networks | |
314 | sub parse_ipv4_cidr { | |
315 | my ($cidr, $noerr) = @_; | |
316 | ||
317 | if ($cidr =~ m!^($IPV4RE)(?:/(\d+))$! && ($2 > 7) && ($2 <= 32)) { | |
318 | return { address => $1, netmask => $PVE::Network::ipv4_reverse_mask->[$2] }; | |
319 | } | |
320 | ||
321 | return undef if $noerr; | |
322 | ||
323 | die "unable to parse ipv4 address/mask\n"; | |
324 | } | |
325 | ||
326 | ||
327 | sub update_lxc_config { | |
328 | my ($storage_cfg, $vmid, $conf) = @_; | |
329 | ||
330 | my $dir = "/var/lib/lxc/$vmid"; | |
331 | ||
332 | if ($conf->{template}) { | |
333 | ||
334 | unlink "$dir/config"; | |
335 | ||
336 | return; | |
337 | } | |
338 | ||
339 | my $raw = ''; | |
340 | ||
341 | die "missing 'arch' - internal error" if !$conf->{arch}; | |
342 | $raw .= "lxc.arch = $conf->{arch}\n"; | |
343 | ||
344 | my $unprivileged = $conf->{unprivileged}; | |
345 | my $custom_idmap = grep { $_->[0] eq 'lxc.id_map' } @{$conf->{lxc}}; | |
346 | ||
347 | my $ostype = $conf->{ostype} || die "missing 'ostype' - internal error"; | |
348 | if ($ostype =~ /^(?:debian | ubuntu | centos | fedora | opensuse | archlinux | alpine | gentoo | unmanaged)$/x) { | |
349 | my $inc ="/usr/share/lxc/config/$ostype.common.conf"; | |
350 | $inc ="/usr/share/lxc/config/common.conf" if !-f $inc; | |
351 | $raw .= "lxc.include = $inc\n"; | |
352 | if ($unprivileged || $custom_idmap) { | |
353 | $inc = "/usr/share/lxc/config/$ostype.userns.conf"; | |
354 | $inc = "/usr/share/lxc/config/userns.conf" if !-f $inc; | |
355 | $raw .= "lxc.include = $inc\n" | |
356 | } | |
357 | } else { | |
358 | die "implement me (ostype $ostype)"; | |
359 | } | |
360 | ||
361 | # WARNING: DO NOT REMOVE this without making sure that loop device nodes | |
362 | # cannot be exposed to the container with r/w access (cgroup perms). | |
363 | # When this is enabled mounts will still remain in the monitor's namespace | |
364 | # after the container unmounted them and thus will not detach from their | |
365 | # files while the container is running! | |
366 | $raw .= "lxc.monitor.unshare = 1\n"; | |
367 | ||
368 | # Should we read them from /etc/subuid? | |
369 | if ($unprivileged && !$custom_idmap) { | |
370 | $raw .= "lxc.id_map = u 0 100000 65536\n"; | |
371 | $raw .= "lxc.id_map = g 0 100000 65536\n"; | |
372 | } | |
373 | ||
374 | if (!PVE::LXC::Config->has_dev_console($conf)) { | |
375 | $raw .= "lxc.console = none\n"; | |
376 | $raw .= "lxc.cgroup.devices.deny = c 5:1 rwm\n"; | |
377 | } | |
378 | ||
379 | my $ttycount = PVE::LXC::Config->get_tty_count($conf); | |
380 | $raw .= "lxc.tty = $ttycount\n"; | |
381 | ||
382 | # some init scripts expect a linux terminal (turnkey). | |
383 | $raw .= "lxc.environment = TERM=linux\n"; | |
384 | ||
385 | my $utsname = $conf->{hostname} || "CT$vmid"; | |
386 | $raw .= "lxc.utsname = $utsname\n"; | |
387 | ||
388 | my $memory = $conf->{memory} || 512; | |
389 | my $swap = $conf->{swap} // 0; | |
390 | ||
391 | my $lxcmem = int($memory*1024*1024); | |
392 | $raw .= "lxc.cgroup.memory.limit_in_bytes = $lxcmem\n"; | |
393 | ||
394 | my $lxcswap = int(($memory + $swap)*1024*1024); | |
395 | $raw .= "lxc.cgroup.memory.memsw.limit_in_bytes = $lxcswap\n"; | |
396 | ||
397 | if (my $cpulimit = $conf->{cpulimit}) { | |
398 | $raw .= "lxc.cgroup.cpu.cfs_period_us = 100000\n"; | |
399 | my $value = int(100000*$cpulimit); | |
400 | $raw .= "lxc.cgroup.cpu.cfs_quota_us = $value\n"; | |
401 | } | |
402 | ||
403 | my $shares = $conf->{cpuunits} || 1024; | |
404 | $raw .= "lxc.cgroup.cpu.shares = $shares\n"; | |
405 | ||
406 | die "missing 'rootfs' configuration\n" | |
407 | if !defined($conf->{rootfs}); | |
408 | ||
409 | my $mountpoint = PVE::LXC::Config->parse_ct_rootfs($conf->{rootfs}); | |
410 | ||
411 | $raw .= "lxc.rootfs = $dir/rootfs\n"; | |
412 | ||
413 | my $netcount = 0; | |
414 | foreach my $k (keys %$conf) { | |
415 | next if $k !~ m/^net(\d+)$/; | |
416 | my $ind = $1; | |
417 | my $d = PVE::LXC::Config->parse_lxc_network($conf->{$k}); | |
418 | $netcount++; | |
419 | $raw .= "lxc.network.type = veth\n"; | |
420 | $raw .= "lxc.network.veth.pair = veth${vmid}i${ind}\n"; | |
421 | $raw .= "lxc.network.hwaddr = $d->{hwaddr}\n" if defined($d->{hwaddr}); | |
422 | $raw .= "lxc.network.name = $d->{name}\n" if defined($d->{name}); | |
423 | $raw .= "lxc.network.mtu = $d->{mtu}\n" if defined($d->{mtu}); | |
424 | } | |
425 | ||
426 | if (my $lxcconf = $conf->{lxc}) { | |
427 | foreach my $entry (@$lxcconf) { | |
428 | my ($k, $v) = @$entry; | |
429 | $netcount++ if $k eq 'lxc.network.type'; | |
430 | $raw .= "$k = $v\n"; | |
431 | } | |
432 | } | |
433 | ||
434 | $raw .= "lxc.network.type = empty\n" if !$netcount; | |
435 | ||
436 | File::Path::mkpath("$dir/rootfs"); | |
437 | ||
438 | PVE::Tools::file_set_contents("$dir/config", $raw); | |
439 | } | |
440 | ||
441 | # verify and cleanup nameserver list (replace \0 with ' ') | |
442 | sub verify_nameserver_list { | |
443 | my ($nameserver_list) = @_; | |
444 | ||
445 | my @list = (); | |
446 | foreach my $server (PVE::Tools::split_list($nameserver_list)) { | |
447 | PVE::JSONSchema::pve_verify_ip($server); | |
448 | push @list, $server; | |
449 | } | |
450 | ||
451 | return join(' ', @list); | |
452 | } | |
453 | ||
454 | sub verify_searchdomain_list { | |
455 | my ($searchdomain_list) = @_; | |
456 | ||
457 | my @list = (); | |
458 | foreach my $server (PVE::Tools::split_list($searchdomain_list)) { | |
459 | # todo: should we add checks for valid dns domains? | |
460 | push @list, $server; | |
461 | } | |
462 | ||
463 | return join(' ', @list); | |
464 | } | |
465 | ||
466 | sub get_console_command { | |
467 | my ($vmid, $conf) = @_; | |
468 | ||
469 | my $cmode = PVE::LXC::Config->get_cmode($conf); | |
470 | ||
471 | if ($cmode eq 'console') { | |
472 | return ['lxc-console', '-n', $vmid, '-t', 0]; | |
473 | } elsif ($cmode eq 'tty') { | |
474 | return ['lxc-console', '-n', $vmid]; | |
475 | } elsif ($cmode eq 'shell') { | |
476 | return ['lxc-attach', '--clear-env', '-n', $vmid]; | |
477 | } else { | |
478 | die "internal error"; | |
479 | } | |
480 | } | |
481 | ||
482 | sub get_primary_ips { | |
483 | my ($conf) = @_; | |
484 | ||
485 | # return data from net0 | |
486 | ||
487 | return undef if !defined($conf->{net0}); | |
488 | my $net = PVE::LXC::Config->parse_lxc_network($conf->{net0}); | |
489 | ||
490 | my $ipv4 = $net->{ip}; | |
491 | if ($ipv4) { | |
492 | if ($ipv4 =~ /^(dhcp|manual)$/) { | |
493 | $ipv4 = undef | |
494 | } else { | |
495 | $ipv4 =~ s!/\d+$!!; | |
496 | } | |
497 | } | |
498 | my $ipv6 = $net->{ip6}; | |
499 | if ($ipv6) { | |
500 | if ($ipv6 =~ /^(auto|dhcp|manual)$/) { | |
501 | $ipv6 = undef; | |
502 | } else { | |
503 | $ipv6 =~ s!/\d+$!!; | |
504 | } | |
505 | } | |
506 | ||
507 | return ($ipv4, $ipv6); | |
508 | } | |
509 | ||
510 | sub delete_mountpoint_volume { | |
511 | my ($storage_cfg, $vmid, $volume) = @_; | |
512 | ||
513 | return if PVE::LXC::Config->classify_mountpoint($volume) ne 'volume'; | |
514 | ||
515 | my ($vtype, $name, $owner) = PVE::Storage::parse_volname($storage_cfg, $volume); | |
516 | PVE::Storage::vdisk_free($storage_cfg, $volume) if $vmid == $owner; | |
517 | } | |
518 | ||
519 | sub destroy_lxc_container { | |
520 | my ($storage_cfg, $vmid, $conf) = @_; | |
521 | ||
522 | PVE::LXC::Config->foreach_mountpoint($conf, sub { | |
523 | my ($ms, $mountpoint) = @_; | |
524 | delete_mountpoint_volume($storage_cfg, $vmid, $mountpoint->{volume}); | |
525 | }); | |
526 | ||
527 | rmdir "/var/lib/lxc/$vmid/rootfs"; | |
528 | unlink "/var/lib/lxc/$vmid/config"; | |
529 | rmdir "/var/lib/lxc/$vmid"; | |
530 | destroy_config($vmid); | |
531 | ||
532 | #my $cmd = ['lxc-destroy', '-n', $vmid ]; | |
533 | #PVE::Tools::run_command($cmd); | |
534 | } | |
535 | ||
536 | sub vm_stop_cleanup { | |
537 | my ($storage_cfg, $vmid, $conf, $keepActive) = @_; | |
538 | ||
539 | eval { | |
540 | if (!$keepActive) { | |
541 | ||
542 | my $vollist = PVE::LXC::Config->get_vm_volumes($conf); | |
543 | PVE::Storage::deactivate_volumes($storage_cfg, $vollist); | |
544 | } | |
545 | }; | |
546 | warn $@ if $@; # avoid errors - just warn | |
547 | } | |
548 | ||
549 | my $safe_num_ne = sub { | |
550 | my ($a, $b) = @_; | |
551 | ||
552 | return 0 if !defined($a) && !defined($b); | |
553 | return 1 if !defined($a); | |
554 | return 1 if !defined($b); | |
555 | ||
556 | return $a != $b; | |
557 | }; | |
558 | ||
559 | my $safe_string_ne = sub { | |
560 | my ($a, $b) = @_; | |
561 | ||
562 | return 0 if !defined($a) && !defined($b); | |
563 | return 1 if !defined($a); | |
564 | return 1 if !defined($b); | |
565 | ||
566 | return $a ne $b; | |
567 | }; | |
568 | ||
569 | sub update_net { | |
570 | my ($vmid, $conf, $opt, $newnet, $netid, $rootdir) = @_; | |
571 | ||
572 | if ($newnet->{type} ne 'veth') { | |
573 | # for when there are physical interfaces | |
574 | die "cannot update interface of type $newnet->{type}"; | |
575 | } | |
576 | ||
577 | my $veth = "veth${vmid}i${netid}"; | |
578 | my $eth = $newnet->{name}; | |
579 | ||
580 | if (my $oldnetcfg = $conf->{$opt}) { | |
581 | my $oldnet = PVE::LXC::Config->parse_lxc_network($oldnetcfg); | |
582 | ||
583 | if (&$safe_string_ne($oldnet->{hwaddr}, $newnet->{hwaddr}) || | |
584 | &$safe_string_ne($oldnet->{name}, $newnet->{name})) { | |
585 | ||
586 | PVE::Network::veth_delete($veth); | |
587 | delete $conf->{$opt}; | |
588 | PVE::LXC::Config->write_config($vmid, $conf); | |
589 | ||
590 | hotplug_net($vmid, $conf, $opt, $newnet, $netid); | |
591 | ||
592 | } else { | |
593 | if (&$safe_string_ne($oldnet->{bridge}, $newnet->{bridge}) || | |
594 | &$safe_num_ne($oldnet->{tag}, $newnet->{tag}) || | |
595 | &$safe_num_ne($oldnet->{firewall}, $newnet->{firewall})) { | |
596 | ||
597 | if ($oldnet->{bridge}) { | |
598 | PVE::Network::tap_unplug($veth); | |
599 | foreach (qw(bridge tag firewall)) { | |
600 | delete $oldnet->{$_}; | |
601 | } | |
602 | $conf->{$opt} = PVE::LXC::Config->print_lxc_network($oldnet); | |
603 | PVE::LXC::Config->write_config($vmid, $conf); | |
604 | } | |
605 | ||
606 | PVE::Network::tap_plug($veth, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks}, $newnet->{rate}); | |
607 | # This includes the rate: | |
608 | foreach (qw(bridge tag firewall rate)) { | |
609 | $oldnet->{$_} = $newnet->{$_} if $newnet->{$_}; | |
610 | } | |
611 | } elsif (&$safe_string_ne($oldnet->{rate}, $newnet->{rate})) { | |
612 | # Rate can be applied on its own but any change above needs to | |
613 | # include the rate in tap_plug since OVS resets everything. | |
614 | PVE::Network::tap_rate_limit($veth, $newnet->{rate}); | |
615 | $oldnet->{rate} = $newnet->{rate} | |
616 | } | |
617 | $conf->{$opt} = PVE::LXC::Config->print_lxc_network($oldnet); | |
618 | PVE::LXC::Config->write_config($vmid, $conf); | |
619 | } | |
620 | } else { | |
621 | hotplug_net($vmid, $conf, $opt, $newnet, $netid); | |
622 | } | |
623 | ||
624 | update_ipconfig($vmid, $conf, $opt, $eth, $newnet, $rootdir); | |
625 | } | |
626 | ||
627 | sub hotplug_net { | |
628 | my ($vmid, $conf, $opt, $newnet, $netid) = @_; | |
629 | ||
630 | my $veth = "veth${vmid}i${netid}"; | |
631 | my $vethpeer = $veth . "p"; | |
632 | my $eth = $newnet->{name}; | |
633 | ||
634 | PVE::Network::veth_create($veth, $vethpeer, $newnet->{bridge}, $newnet->{hwaddr}); | |
635 | PVE::Network::tap_plug($veth, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks}, $newnet->{rate}); | |
636 | ||
637 | # attach peer in container | |
638 | my $cmd = ['lxc-device', '-n', $vmid, 'add', $vethpeer, "$eth" ]; | |
639 | PVE::Tools::run_command($cmd); | |
640 | ||
641 | # link up peer in container | |
642 | $cmd = ['lxc-attach', '-n', $vmid, '-s', 'NETWORK', '--', '/sbin/ip', 'link', 'set', $eth ,'up' ]; | |
643 | PVE::Tools::run_command($cmd); | |
644 | ||
645 | my $done = { type => 'veth' }; | |
646 | foreach (qw(bridge tag firewall hwaddr name)) { | |
647 | $done->{$_} = $newnet->{$_} if $newnet->{$_}; | |
648 | } | |
649 | $conf->{$opt} = PVE::LXC::Config->print_lxc_network($done); | |
650 | ||
651 | PVE::LXC::Config->write_config($vmid, $conf); | |
652 | } | |
653 | ||
654 | sub update_ipconfig { | |
655 | my ($vmid, $conf, $opt, $eth, $newnet, $rootdir) = @_; | |
656 | ||
657 | my $lxc_setup = PVE::LXC::Setup->new($conf, $rootdir); | |
658 | ||
659 | my $optdata = PVE::LXC::Config->parse_lxc_network($conf->{$opt}); | |
660 | my $deleted = []; | |
661 | my $added = []; | |
662 | my $nscmd = sub { | |
663 | my $cmdargs = shift; | |
664 | PVE::Tools::run_command(['lxc-attach', '-n', $vmid, '-s', 'NETWORK', '--', @_], %$cmdargs); | |
665 | }; | |
666 | my $ipcmd = sub { &$nscmd({}, '/sbin/ip', @_) }; | |
667 | ||
668 | my $change_ip_config = sub { | |
669 | my ($ipversion) = @_; | |
670 | ||
671 | my $family_opt = "-$ipversion"; | |
672 | my $suffix = $ipversion == 4 ? '' : $ipversion; | |
673 | my $gw= "gw$suffix"; | |
674 | my $ip= "ip$suffix"; | |
675 | ||
676 | my $newip = $newnet->{$ip}; | |
677 | my $newgw = $newnet->{$gw}; | |
678 | my $oldip = $optdata->{$ip}; | |
679 | ||
680 | my $change_ip = &$safe_string_ne($oldip, $newip); | |
681 | my $change_gw = &$safe_string_ne($optdata->{$gw}, $newgw); | |
682 | ||
683 | return if !$change_ip && !$change_gw; | |
684 | ||
685 | # step 1: add new IP, if this fails we cancel | |
686 | my $is_real_ip = ($newip && $newip !~ /^(?:auto|dhcp|manual)$/); | |
687 | if ($change_ip && $is_real_ip) { | |
688 | eval { &$ipcmd($family_opt, 'addr', 'add', $newip, 'dev', $eth); }; | |
689 | if (my $err = $@) { | |
690 | warn $err; | |
691 | return; | |
692 | } | |
693 | } | |
694 | ||
695 | # step 2: replace gateway | |
696 | # If this fails we delete the added IP and cancel. | |
697 | # If it succeeds we save the config and delete the old IP, ignoring | |
698 | # errors. The config is then saved. | |
699 | # Note: 'ip route replace' can add | |
700 | if ($change_gw) { | |
701 | if ($newgw) { | |
702 | eval { | |
703 | if ($is_real_ip && !PVE::Network::is_ip_in_cidr($newgw, $newip, $ipversion)) { | |
704 | &$ipcmd($family_opt, 'route', 'add', $newgw, 'dev', $eth); | |
705 | } | |
706 | &$ipcmd($family_opt, 'route', 'replace', 'default', 'via', $newgw); | |
707 | }; | |
708 | if (my $err = $@) { | |
709 | warn $err; | |
710 | # the route was not replaced, the old IP is still available | |
711 | # rollback (delete new IP) and cancel | |
712 | if ($change_ip) { | |
713 | eval { &$ipcmd($family_opt, 'addr', 'del', $newip, 'dev', $eth); }; | |
714 | warn $@ if $@; # no need to die here | |
715 | } | |
716 | return; | |
717 | } | |
718 | } else { | |
719 | eval { &$ipcmd($family_opt, 'route', 'del', 'default'); }; | |
720 | # if the route was not deleted, the guest might have deleted it manually | |
721 | # warn and continue | |
722 | warn $@ if $@; | |
723 | } | |
724 | } | |
725 | ||
726 | # from this point on we save the configuration | |
727 | # step 3: delete old IP ignoring errors | |
728 | if ($change_ip && $oldip && $oldip !~ /^(?:auto|dhcp)$/) { | |
729 | # We need to enable promote_secondaries, otherwise our newly added | |
730 | # address will be removed along with the old one. | |
731 | my $promote = 0; | |
732 | eval { | |
733 | if ($ipversion == 4) { | |
734 | &$nscmd({ outfunc => sub { $promote = int(shift) } }, | |
735 | 'cat', "/proc/sys/net/ipv4/conf/$eth/promote_secondaries"); | |
736 | &$nscmd({}, 'sysctl', "net.ipv4.conf.$eth.promote_secondaries=1"); | |
737 | } | |
738 | &$ipcmd($family_opt, 'addr', 'del', $oldip, 'dev', $eth); | |
739 | }; | |
740 | warn $@ if $@; # no need to die here | |
741 | ||
742 | if ($ipversion == 4) { | |
743 | &$nscmd({}, 'sysctl', "net.ipv4.conf.$eth.promote_secondaries=$promote"); | |
744 | } | |
745 | } | |
746 | ||
747 | foreach my $property ($ip, $gw) { | |
748 | if ($newnet->{$property}) { | |
749 | $optdata->{$property} = $newnet->{$property}; | |
750 | } else { | |
751 | delete $optdata->{$property}; | |
752 | } | |
753 | } | |
754 | $conf->{$opt} = PVE::LXC::Config->print_lxc_network($optdata); | |
755 | PVE::LXC::Config->write_config($vmid, $conf); | |
756 | $lxc_setup->setup_network($conf); | |
757 | }; | |
758 | ||
759 | &$change_ip_config(4); | |
760 | &$change_ip_config(6); | |
761 | ||
762 | } | |
763 | ||
764 | my $enter_namespace = sub { | |
765 | my ($vmid, $pid, $which, $type) = @_; | |
766 | sysopen my $fd, "/proc/$pid/ns/$which", O_RDONLY | |
767 | or die "failed to open $which namespace of container $vmid: $!\n"; | |
768 | PVE::Tools::setns(fileno($fd), $type) | |
769 | or die "failed to enter $which namespace of container $vmid: $!\n"; | |
770 | close $fd; | |
771 | }; | |
772 | ||
773 | my $do_syncfs = sub { | |
774 | my ($vmid, $pid, $socket) = @_; | |
775 | ||
776 | &$enter_namespace($vmid, $pid, 'mnt', PVE::Tools::CLONE_NEWNS); | |
777 | ||
778 | # Tell the parent process to start reading our /proc/mounts | |
779 | print {$socket} "go\n"; | |
780 | $socket->flush(); | |
781 | ||
782 | # Receive /proc/self/mounts | |
783 | my $mountdata = do { local $/ = undef; <$socket> }; | |
784 | close $socket; | |
785 | ||
786 | # Now sync all mountpoints... | |
787 | my $mounts = PVE::ProcFSTools::parse_mounts($mountdata); | |
788 | foreach my $mp (@$mounts) { | |
789 | my ($what, $dir, $fs) = @$mp; | |
790 | next if $fs eq 'fuse.lxcfs'; | |
791 | eval { PVE::Tools::sync_mountpoint($dir); }; | |
792 | warn $@ if $@; | |
793 | } | |
794 | }; | |
795 | ||
796 | sub sync_container_namespace { | |
797 | my ($vmid) = @_; | |
798 | my $pid = find_lxc_pid($vmid); | |
799 | ||
800 | # SOCK_DGRAM is nicer for barriers but cannot be slurped | |
801 | socketpair my $pfd, my $cfd, AF_UNIX, SOCK_STREAM, PF_UNSPEC | |
802 | or die "failed to create socketpair: $!\n"; | |
803 | ||
804 | my $child = fork(); | |
805 | die "fork failed: $!\n" if !defined($child); | |
806 | ||
807 | if (!$child) { | |
808 | eval { | |
809 | close $pfd; | |
810 | &$do_syncfs($vmid, $pid, $cfd); | |
811 | }; | |
812 | if (my $err = $@) { | |
813 | warn $err; | |
814 | POSIX::_exit(1); | |
815 | } | |
816 | POSIX::_exit(0); | |
817 | } | |
818 | close $cfd; | |
819 | my $go = <$pfd>; | |
820 | die "failed to enter container namespace\n" if $go ne "go\n"; | |
821 | ||
822 | open my $mounts, '<', "/proc/$child/mounts" | |
823 | or die "failed to open container's /proc/mounts: $!\n"; | |
824 | my $mountdata = do { local $/ = undef; <$mounts> }; | |
825 | close $mounts; | |
826 | print {$pfd} $mountdata; | |
827 | close $pfd; | |
828 | ||
829 | while (waitpid($child, 0) != $child) {} | |
830 | die "failed to sync container namespace\n" if $? != 0; | |
831 | } | |
832 | ||
833 | sub template_create { | |
834 | my ($vmid, $conf) = @_; | |
835 | ||
836 | my $storecfg = PVE::Storage::config(); | |
837 | ||
838 | my $rootinfo = PVE::LXC::Config->parse_ct_rootfs($conf->{rootfs}); | |
839 | my $volid = $rootinfo->{volume}; | |
840 | ||
841 | die "Template feature is not available for '$volid'\n" | |
842 | if !PVE::Storage::volume_has_feature($storecfg, 'template', $volid); | |
843 | ||
844 | PVE::Storage::activate_volumes($storecfg, [$volid]); | |
845 | ||
846 | my $template_volid = PVE::Storage::vdisk_create_base($storecfg, $volid); | |
847 | $rootinfo->{volume} = $template_volid; | |
848 | $conf->{rootfs} = PVE::LXC::Config->print_ct_mountpoint($rootinfo, 1); | |
849 | ||
850 | PVE::LXC::Config->write_config($vmid, $conf); | |
851 | } | |
852 | ||
853 | sub check_ct_modify_config_perm { | |
854 | my ($rpcenv, $authuser, $vmid, $pool, $newconf, $delete) = @_; | |
855 | ||
856 | return 1 if $authuser eq 'root@pam'; | |
857 | ||
858 | my $check = sub { | |
859 | my ($opt, $delete) = @_; | |
860 | if ($opt eq 'cpus' || $opt eq 'cpuunits' || $opt eq 'cpulimit') { | |
861 | $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.CPU']); | |
862 | } elsif ($opt eq 'rootfs' || $opt =~ /^mp\d+$/) { | |
863 | $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Disk']); | |
864 | return if $delete; | |
865 | my $data = $opt eq 'rootfs' ? PVE::LXC::Config->parse_ct_rootfs($newconf->{$opt}) | |
866 | : PVE::LXC::Config->parse_ct_mountpoint($newconf->{$opt}); | |
867 | raise_perm_exc("mountpoint type $data->{type}") if $data->{type} ne 'volume'; | |
868 | } elsif ($opt eq 'memory' || $opt eq 'swap') { | |
869 | $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Memory']); | |
870 | } elsif ($opt =~ m/^net\d+$/ || $opt eq 'nameserver' || | |
871 | $opt eq 'searchdomain' || $opt eq 'hostname') { | |
872 | $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Network']); | |
873 | } else { | |
874 | $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Options']); | |
875 | } | |
876 | }; | |
877 | ||
878 | foreach my $opt (keys %$newconf) { | |
879 | &$check($opt, 0); | |
880 | } | |
881 | foreach my $opt (@$delete) { | |
882 | &$check($opt, 1); | |
883 | } | |
884 | ||
885 | return 1; | |
886 | } | |
887 | ||
888 | sub umount_all { | |
889 | my ($vmid, $storage_cfg, $conf, $noerr) = @_; | |
890 | ||
891 | my $rootdir = "/var/lib/lxc/$vmid/rootfs"; | |
892 | my $volid_list = PVE::LXC::Config->get_vm_volumes($conf); | |
893 | ||
894 | PVE::LXC::Config->foreach_mountpoint_reverse($conf, sub { | |
895 | my ($ms, $mountpoint) = @_; | |
896 | ||
897 | my $volid = $mountpoint->{volume}; | |
898 | my $mount = $mountpoint->{mp}; | |
899 | ||
900 | return if !$volid || !$mount; | |
901 | ||
902 | my $mount_path = "$rootdir/$mount"; | |
903 | $mount_path =~ s!/+!/!g; | |
904 | ||
905 | return if !PVE::ProcFSTools::is_mounted($mount_path); | |
906 | ||
907 | eval { | |
908 | PVE::Tools::run_command(['umount', '-d', $mount_path]); | |
909 | }; | |
910 | if (my $err = $@) { | |
911 | if ($noerr) { | |
912 | warn $err; | |
913 | } else { | |
914 | die $err; | |
915 | } | |
916 | } | |
917 | }); | |
918 | } | |
919 | ||
920 | sub mount_all { | |
921 | my ($vmid, $storage_cfg, $conf) = @_; | |
922 | ||
923 | my $rootdir = "/var/lib/lxc/$vmid/rootfs"; | |
924 | File::Path::make_path($rootdir); | |
925 | ||
926 | my $volid_list = PVE::LXC::Config->get_vm_volumes($conf); | |
927 | PVE::Storage::activate_volumes($storage_cfg, $volid_list); | |
928 | ||
929 | eval { | |
930 | PVE::LXC::Config->foreach_mountpoint($conf, sub { | |
931 | my ($ms, $mountpoint) = @_; | |
932 | ||
933 | mountpoint_mount($mountpoint, $rootdir, $storage_cfg); | |
934 | }); | |
935 | }; | |
936 | if (my $err = $@) { | |
937 | warn "mounting container failed\n"; | |
938 | umount_all($vmid, $storage_cfg, $conf, 1); | |
939 | die $err; | |
940 | } | |
941 | ||
942 | return $rootdir; | |
943 | } | |
944 | ||
945 | ||
946 | sub mountpoint_mount_path { | |
947 | my ($mountpoint, $storage_cfg, $snapname) = @_; | |
948 | ||
949 | return mountpoint_mount($mountpoint, undef, $storage_cfg, $snapname); | |
950 | } | |
951 | ||
952 | my $check_mount_path = sub { | |
953 | my ($path) = @_; | |
954 | $path = File::Spec->canonpath($path); | |
955 | my $real = Cwd::realpath($path); | |
956 | if ($real ne $path) { | |
957 | die "mount path modified by symlink: $path != $real"; | |
958 | } | |
959 | }; | |
960 | ||
961 | sub query_loopdev { | |
962 | my ($path) = @_; | |
963 | my $found; | |
964 | my $parser = sub { | |
965 | my $line = shift; | |
966 | if ($line =~ m@^(/dev/loop\d+):@) { | |
967 | $found = $1; | |
968 | } | |
969 | }; | |
970 | my $cmd = ['losetup', '--associated', $path]; | |
971 | PVE::Tools::run_command($cmd, outfunc => $parser); | |
972 | return $found; | |
973 | } | |
974 | ||
975 | # Run a function with a file attached to a loop device. | |
976 | # The loop device is always detached afterwards (or set to autoclear). | |
977 | # Returns the loop device. | |
978 | sub run_with_loopdev { | |
979 | my ($func, $file) = @_; | |
980 | my $device = query_loopdev($file); | |
981 | # Try to reuse an existing device | |
982 | if ($device) { | |
983 | # We assume that whoever setup the loop device is responsible for | |
984 | # detaching it. | |
985 | &$func($device); | |
986 | return $device; | |
987 | } | |
988 | ||
989 | my $parser = sub { | |
990 | my $line = shift; | |
991 | if ($line =~ m@^(/dev/loop\d+)$@) { | |
992 | $device = $1; | |
993 | } | |
994 | }; | |
995 | PVE::Tools::run_command(['losetup', '--show', '-f', $file], outfunc => $parser); | |
996 | die "failed to setup loop device for $file\n" if !$device; | |
997 | eval { &$func($device); }; | |
998 | my $err = $@; | |
999 | PVE::Tools::run_command(['losetup', '-d', $device]); | |
1000 | die $err if $err; | |
1001 | return $device; | |
1002 | } | |
1003 | ||
1004 | sub bindmount { | |
1005 | my ($dir, $dest, $ro, @extra_opts) = @_; | |
1006 | PVE::Tools::run_command(['mount', '-o', 'bind', @extra_opts, $dir, $dest]); | |
1007 | if ($ro) { | |
1008 | eval { PVE::Tools::run_command(['mount', '-o', 'bind,remount,ro', $dest]); }; | |
1009 | if (my $err = $@) { | |
1010 | warn "bindmount error\n"; | |
1011 | # don't leave writable bind-mounts behind... | |
1012 | PVE::Tools::run_command(['umount', $dest]); | |
1013 | die $err; | |
1014 | } | |
1015 | } | |
1016 | } | |
1017 | ||
1018 | # use $rootdir = undef to just return the corresponding mount path | |
1019 | sub mountpoint_mount { | |
1020 | my ($mountpoint, $rootdir, $storage_cfg, $snapname) = @_; | |
1021 | ||
1022 | my $volid = $mountpoint->{volume}; | |
1023 | my $mount = $mountpoint->{mp}; | |
1024 | my $type = $mountpoint->{type}; | |
1025 | my $quota = !$snapname && !$mountpoint->{ro} && $mountpoint->{quota}; | |
1026 | my $mounted_dev; | |
1027 | ||
1028 | return if !$volid || !$mount; | |
1029 | ||
1030 | my $mount_path; | |
1031 | ||
1032 | if (defined($rootdir)) { | |
1033 | $rootdir =~ s!/+$!!; | |
1034 | $mount_path = "$rootdir/$mount"; | |
1035 | $mount_path =~ s!/+!/!g; | |
1036 | &$check_mount_path($mount_path); | |
1037 | File::Path::mkpath($mount_path); | |
1038 | } | |
1039 | ||
1040 | my ($storage, $volname) = PVE::Storage::parse_volume_id($volid, 1); | |
1041 | ||
1042 | die "unknown snapshot path for '$volid'" if !$storage && defined($snapname); | |
1043 | ||
1044 | my $optstring = ''; | |
1045 | my $acl = $mountpoint->{acl}; | |
1046 | if (defined($acl)) { | |
1047 | $optstring .= ($acl ? 'acl' : 'noacl'); | |
1048 | } | |
1049 | my $readonly = $mountpoint->{ro}; | |
1050 | ||
1051 | my @extra_opts = ('-o', $optstring) if $optstring; | |
1052 | ||
1053 | if ($storage) { | |
1054 | ||
1055 | my $scfg = PVE::Storage::storage_config($storage_cfg, $storage); | |
1056 | my $path = PVE::Storage::path($storage_cfg, $volid, $snapname); | |
1057 | ||
1058 | my ($vtype, undef, undef, undef, undef, $isBase, $format) = | |
1059 | PVE::Storage::parse_volname($storage_cfg, $volid); | |
1060 | ||
1061 | $format = 'iso' if $vtype eq 'iso'; # allow to handle iso files | |
1062 | ||
1063 | if ($format eq 'subvol') { | |
1064 | if ($mount_path) { | |
1065 | if ($snapname) { | |
1066 | if ($scfg->{type} eq 'zfspool') { | |
1067 | my $path_arg = $path; | |
1068 | $path_arg =~ s!^/+!!; | |
1069 | PVE::Tools::run_command(['mount', '-o', 'ro', @extra_opts, '-t', 'zfs', $path_arg, $mount_path]); | |
1070 | } else { | |
1071 | die "cannot mount subvol snapshots for storage type '$scfg->{type}'\n"; | |
1072 | } | |
1073 | } else { | |
1074 | if (defined($acl) && $scfg->{type} eq 'zfspool') { | |
1075 | my $acltype = ($acl ? 'acltype=posixacl' : 'acltype=noacl'); | |
1076 | my (undef, $name) = PVE::Storage::parse_volname($storage_cfg, $volid); | |
1077 | $name .= "\@$snapname" if defined($snapname); | |
1078 | PVE::Tools::run_command(['zfs', 'set', $acltype, "$scfg->{pool}/$name"]); | |
1079 | } | |
1080 | bindmount($path, $mount_path, $readonly, @extra_opts); | |
1081 | warn "cannot enable quota control for bind mounted subvolumes\n" if $quota; | |
1082 | } | |
1083 | } | |
1084 | return wantarray ? ($path, 0, undef) : $path; | |
1085 | } elsif ($format eq 'raw' || $format eq 'iso') { | |
1086 | # NOTE: 'mount' performs canonicalization without the '-c' switch, which for | |
1087 | # device-mapper devices is special-cased to use the /dev/mapper symlinks. | |
1088 | # Our autodev hook expects the /dev/dm-* device currently | |
1089 | # and will create the /dev/mapper symlink accordingly | |
1090 | ($path) = (Cwd::realpath($path) =~ /^(.*)$/s); # realpath() taints | |
1091 | my $domount = sub { | |
1092 | my ($path) = @_; | |
1093 | if ($mount_path) { | |
1094 | if ($format eq 'iso') { | |
1095 | PVE::Tools::run_command(['mount', '-o', 'ro', @extra_opts, $path, $mount_path]); | |
1096 | } elsif ($isBase || defined($snapname)) { | |
1097 | PVE::Tools::run_command(['mount', '-o', 'ro,noload', @extra_opts, $path, $mount_path]); | |
1098 | } else { | |
1099 | if ($quota) { | |
1100 | push @extra_opts, '-o', 'usrjquota=aquota.user,grpjquota=aquota.group,jqfmt=vfsv0'; | |
1101 | } | |
1102 | push @extra_opts, '-o', 'ro' if $readonly; | |
1103 | PVE::Tools::run_command(['mount', @extra_opts, $path, $mount_path]); | |
1104 | } | |
1105 | } | |
1106 | }; | |
1107 | my $use_loopdev = 0; | |
1108 | if ($scfg->{path}) { | |
1109 | $mounted_dev = run_with_loopdev($domount, $path); | |
1110 | $use_loopdev = 1; | |
1111 | } elsif ($scfg->{type} eq 'drbd' || $scfg->{type} eq 'lvm' || | |
1112 | $scfg->{type} eq 'rbd' || $scfg->{type} eq 'lvmthin') { | |
1113 | $mounted_dev = $path; | |
1114 | &$domount($path); | |
1115 | } else { | |
1116 | die "unsupported storage type '$scfg->{type}'\n"; | |
1117 | } | |
1118 | return wantarray ? ($path, $use_loopdev, $mounted_dev) : $path; | |
1119 | } else { | |
1120 | die "unsupported image format '$format'\n"; | |
1121 | } | |
1122 | } elsif ($type eq 'device') { | |
1123 | push @extra_opts, '-o', 'ro' if $readonly; | |
1124 | push @extra_opts, '-o', 'usrjquota=aquota.user,grpjquota=aquota.group,jqfmt=vfsv0' if $quota; | |
1125 | # See the NOTE above about devicemapper canonicalization | |
1126 | my ($devpath) = (Cwd::realpath($volid) =~ /^(.*)$/s); # realpath() taints | |
1127 | PVE::Tools::run_command(['mount', @extra_opts, $volid, $mount_path]) if $mount_path; | |
1128 | return wantarray ? ($volid, 0, $devpath) : $volid; | |
1129 | } elsif ($type eq 'bind') { | |
1130 | die "directory '$volid' does not exist\n" if ! -d $volid; | |
1131 | &$check_mount_path($volid); | |
1132 | bindmount($volid, $mount_path, $readonly, @extra_opts) if $mount_path; | |
1133 | warn "cannot enable quota control for bind mounts\n" if $quota; | |
1134 | return wantarray ? ($volid, 0, undef) : $volid; | |
1135 | } | |
1136 | ||
1137 | die "unsupported storage"; | |
1138 | } | |
1139 | ||
1140 | sub mkfs { | |
1141 | my ($dev, $rootuid, $rootgid) = @_; | |
1142 | ||
1143 | PVE::Tools::run_command(['mkfs.ext4', '-O', 'mmp', | |
1144 | '-E', "root_owner=$rootuid:$rootgid", | |
1145 | $dev]); | |
1146 | } | |
1147 | ||
1148 | sub format_disk { | |
1149 | my ($storage_cfg, $volid, $rootuid, $rootgid) = @_; | |
1150 | ||
1151 | if ($volid =~ m!^/dev/.+!) { | |
1152 | mkfs($volid); | |
1153 | return; | |
1154 | } | |
1155 | ||
1156 | my ($storage, $volname) = PVE::Storage::parse_volume_id($volid, 1); | |
1157 | ||
1158 | die "cannot format volume '$volid' with no storage\n" if !$storage; | |
1159 | ||
1160 | PVE::Storage::activate_volumes($storage_cfg, [$volid]); | |
1161 | ||
1162 | my $path = PVE::Storage::path($storage_cfg, $volid); | |
1163 | ||
1164 | my ($vtype, undef, undef, undef, undef, $isBase, $format) = | |
1165 | PVE::Storage::parse_volname($storage_cfg, $volid); | |
1166 | ||
1167 | die "cannot format volume '$volid' (format == $format)\n" | |
1168 | if $format ne 'raw'; | |
1169 | ||
1170 | mkfs($path, $rootuid, $rootgid); | |
1171 | } | |
1172 | ||
1173 | sub destroy_disks { | |
1174 | my ($storecfg, $vollist) = @_; | |
1175 | ||
1176 | foreach my $volid (@$vollist) { | |
1177 | eval { PVE::Storage::vdisk_free($storecfg, $volid); }; | |
1178 | warn $@ if $@; | |
1179 | } | |
1180 | } | |
1181 | ||
1182 | sub create_disks { | |
1183 | my ($storecfg, $vmid, $settings, $conf) = @_; | |
1184 | ||
1185 | my $vollist = []; | |
1186 | ||
1187 | eval { | |
1188 | my (undef, $rootuid, $rootgid) = PVE::LXC::parse_id_maps($conf); | |
1189 | my $chown_vollist = []; | |
1190 | ||
1191 | PVE::LXC::Config->foreach_mountpoint($settings, sub { | |
1192 | my ($ms, $mountpoint) = @_; | |
1193 | ||
1194 | my $volid = $mountpoint->{volume}; | |
1195 | my $mp = $mountpoint->{mp}; | |
1196 | ||
1197 | my ($storage, $volname) = PVE::Storage::parse_volume_id($volid, 1); | |
1198 | ||
1199 | if ($storage && ($volid =~ m/^([^:\s]+):(\d+(\.\d+)?)$/)) { | |
1200 | my ($storeid, $size_gb) = ($1, $2); | |
1201 | ||
1202 | my $size_kb = int(${size_gb}*1024) * 1024; | |
1203 | ||
1204 | my $scfg = PVE::Storage::storage_config($storecfg, $storage); | |
1205 | # fixme: use better naming ct-$vmid-disk-X.raw? | |
1206 | ||
1207 | if ($scfg->{type} eq 'dir' || $scfg->{type} eq 'nfs') { | |
1208 | if ($size_kb > 0) { | |
1209 | $volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'raw', | |
1210 | undef, $size_kb); | |
1211 | format_disk($storecfg, $volid, $rootuid, $rootgid); | |
1212 | } else { | |
1213 | $volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'subvol', | |
1214 | undef, 0); | |
1215 | push @$chown_vollist, $volid; | |
1216 | } | |
1217 | } elsif ($scfg->{type} eq 'zfspool') { | |
1218 | ||
1219 | $volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'subvol', | |
1220 | undef, $size_kb); | |
1221 | push @$chown_vollist, $volid; | |
1222 | } elsif ($scfg->{type} eq 'drbd' || $scfg->{type} eq 'lvm' || $scfg->{type} eq 'lvmthin') { | |
1223 | ||
1224 | $volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'raw', undef, $size_kb); | |
1225 | format_disk($storecfg, $volid, $rootuid, $rootgid); | |
1226 | ||
1227 | } elsif ($scfg->{type} eq 'rbd') { | |
1228 | ||
1229 | die "krbd option must be enabled on storage type '$scfg->{type}'\n" if !$scfg->{krbd}; | |
1230 | $volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'raw', undef, $size_kb); | |
1231 | format_disk($storecfg, $volid, $rootuid, $rootgid); | |
1232 | } else { | |
1233 | die "unable to create containers on storage type '$scfg->{type}'\n"; | |
1234 | } | |
1235 | push @$vollist, $volid; | |
1236 | $mountpoint->{volume} = $volid; | |
1237 | $mountpoint->{size} = $size_kb * 1024; | |
1238 | $conf->{$ms} = PVE::LXC::Config->print_ct_mountpoint($mountpoint, $ms eq 'rootfs'); | |
1239 | } else { | |
1240 | # use specified/existing volid/dir/device | |
1241 | $conf->{$ms} = PVE::LXC::Config->print_ct_mountpoint($mountpoint, $ms eq 'rootfs'); | |
1242 | } | |
1243 | }); | |
1244 | ||
1245 | PVE::Storage::activate_volumes($storecfg, $chown_vollist, undef); | |
1246 | foreach my $volid (@$chown_vollist) { | |
1247 | my $path = PVE::Storage::path($storecfg, $volid, undef); | |
1248 | chown($rootuid, $rootgid, $path); | |
1249 | } | |
1250 | PVE::Storage::deactivate_volumes($storecfg, $chown_vollist, undef); | |
1251 | }; | |
1252 | # free allocated images on error | |
1253 | if (my $err = $@) { | |
1254 | destroy_disks($storecfg, $vollist); | |
1255 | die $err; | |
1256 | } | |
1257 | return $vollist; | |
1258 | } | |
1259 | ||
1260 | # bash completion helper | |
1261 | ||
1262 | sub complete_os_templates { | |
1263 | my ($cmdname, $pname, $cvalue) = @_; | |
1264 | ||
1265 | my $cfg = PVE::Storage::config(); | |
1266 | ||
1267 | my $storeid; | |
1268 | ||
1269 | if ($cvalue =~ m/^([^:]+):/) { | |
1270 | $storeid = $1; | |
1271 | } | |
1272 | ||
1273 | my $vtype = $cmdname eq 'restore' ? 'backup' : 'vztmpl'; | |
1274 | my $data = PVE::Storage::template_list($cfg, $storeid, $vtype); | |
1275 | ||
1276 | my $res = []; | |
1277 | foreach my $id (keys %$data) { | |
1278 | foreach my $item (@{$data->{$id}}) { | |
1279 | push @$res, $item->{volid} if defined($item->{volid}); | |
1280 | } | |
1281 | } | |
1282 | ||
1283 | return $res; | |
1284 | } | |
1285 | ||
1286 | my $complete_ctid_full = sub { | |
1287 | my ($running) = @_; | |
1288 | ||
1289 | my $idlist = vmstatus(); | |
1290 | ||
1291 | my $active_hash = list_active_containers(); | |
1292 | ||
1293 | my $res = []; | |
1294 | ||
1295 | foreach my $id (keys %$idlist) { | |
1296 | my $d = $idlist->{$id}; | |
1297 | if (defined($running)) { | |
1298 | next if $d->{template}; | |
1299 | next if $running && !$active_hash->{$id}; | |
1300 | next if !$running && $active_hash->{$id}; | |
1301 | } | |
1302 | push @$res, $id; | |
1303 | ||
1304 | } | |
1305 | return $res; | |
1306 | }; | |
1307 | ||
1308 | sub complete_ctid { | |
1309 | return &$complete_ctid_full(); | |
1310 | } | |
1311 | ||
1312 | sub complete_ctid_stopped { | |
1313 | return &$complete_ctid_full(0); | |
1314 | } | |
1315 | ||
1316 | sub complete_ctid_running { | |
1317 | return &$complete_ctid_full(1); | |
1318 | } | |
1319 | ||
1320 | sub parse_id_maps { | |
1321 | my ($conf) = @_; | |
1322 | ||
1323 | my $id_map = []; | |
1324 | my $rootuid = 0; | |
1325 | my $rootgid = 0; | |
1326 | ||
1327 | my $lxc = $conf->{lxc}; | |
1328 | foreach my $entry (@$lxc) { | |
1329 | my ($key, $value) = @$entry; | |
1330 | next if $key ne 'lxc.id_map'; | |
1331 | if ($value =~ /^([ug])\s+(\d+)\s+(\d+)\s+(\d+)\s*$/) { | |
1332 | my ($type, $ct, $host, $length) = ($1, $2, $3, $4); | |
1333 | push @$id_map, [$type, $ct, $host, $length]; | |
1334 | if ($ct == 0) { | |
1335 | $rootuid = $host if $type eq 'u'; | |
1336 | $rootgid = $host if $type eq 'g'; | |
1337 | } | |
1338 | } else { | |
1339 | die "failed to parse id_map: $value\n"; | |
1340 | } | |
1341 | } | |
1342 | ||
1343 | if (!@$id_map && $conf->{unprivileged}) { | |
1344 | # Should we read them from /etc/subuid? | |
1345 | $id_map = [ ['u', '0', '100000', '65536'], | |
1346 | ['g', '0', '100000', '65536'] ]; | |
1347 | $rootuid = $rootgid = 100000; | |
1348 | } | |
1349 | ||
1350 | return ($id_map, $rootuid, $rootgid); | |
1351 | } | |
1352 | ||
1353 | sub userns_command { | |
1354 | my ($id_map) = @_; | |
1355 | if (@$id_map) { | |
1356 | return ['lxc-usernsexec', (map { ('-m', join(':', @$_)) } @$id_map), '--']; | |
1357 | } | |
1358 | return []; | |
1359 | } | |
1360 | ||
1361 | ||
1362 | 1; |