]>
Commit | Line | Data |
---|---|---|
deaf7667 AD |
1 | #!/usr/bin/perl |
2 | ||
b056d074 DM |
3 | package lxc_pve_prestart_hook; |
4 | ||
deaf7667 AD |
5 | use strict; |
6 | use warnings; | |
4ed2b825 | 7 | |
3bd97c10 | 8 | use Fcntl qw(O_DIRECTORY :mode); |
deaf7667 | 9 | use File::Path; |
3bd97c10 | 10 | use POSIX; |
deaf7667 | 11 | |
36def186 | 12 | use PVE::CGroup; |
deaf7667 | 13 | use PVE::Cluster; |
0a49c44e | 14 | use PVE::LXC::Config; |
c9a5774b | 15 | use PVE::LXC::Setup; |
0a49c44e WB |
16 | use PVE::LXC::Tools; |
17 | use PVE::LXC; | |
f7073b99 | 18 | use PVE::SafeSyslog; |
0a49c44e | 19 | use PVE::Storage; |
3bd97c10 WB |
20 | use PVE::Syscall qw(:fsmount); |
21 | use PVE::Tools qw(AT_FDCWD O_PATH); | |
deaf7667 | 22 | |
0a49c44e WB |
23 | PVE::LXC::Tools::lxc_hook('pre-start', 'lxc', sub { |
24 | my ($vmid, $vars, undef, undef) = @_; | |
c9a5774b | 25 | |
0a49c44e WB |
26 | my $skiplock_flag_fn = "/run/lxc/skiplock-$vmid"; |
27 | my $skiplock = 1 if -e $skiplock_flag_fn; | |
28 | unlink $skiplock_flag_fn if $skiplock; | |
c9a5774b | 29 | |
0a49c44e | 30 | PVE::Cluster::check_cfs_quorum(); # only start if we have quorum |
1e1fad99 | 31 | |
0a49c44e | 32 | return undef if ! -f PVE::LXC::Config->config_file($vmid); |
50df544c | 33 | |
0a49c44e WB |
34 | my $conf = PVE::LXC::Config->load_config($vmid); |
35 | if (!$skiplock && !PVE::LXC::Config->has_lock($conf, 'mounted')) { | |
36 | PVE::LXC::Config->check_lock($conf); | |
37 | } | |
4c98d66c | 38 | |
51ae28ec WB |
39 | cleanup_cgroups($vmid); |
40 | ||
896823c4 | 41 | my $storage_cfg = PVE::Storage::config(); |
c9a5774b | 42 | |
0a49c44e | 43 | my $rootdir = $vars->{ROOTFS_PATH}; |
c9a5774b | 44 | |
0a49c44e WB |
45 | # Delete any leftover reboot-trigger file |
46 | unlink("/var/lib/lxc/$vmid/reboot"); | |
50df544c | 47 | |
0a49c44e WB |
48 | my $devlist_file = "/var/lib/lxc/$vmid/devices"; |
49 | unlink $devlist_file; | |
50 | my $devices = []; | |
deaf7667 | 51 | |
0a49c44e | 52 | my (undef, $rootuid, $rootgid) = PVE::LXC::parse_id_maps($conf); |
deaf7667 | 53 | |
0a49c44e WB |
54 | # Unmount first when the user mounted the container with "pct mount". |
55 | eval { | |
56 | PVE::Tools::run_command(['umount', '--recursive', $rootdir], outfunc => sub {}, errfunc => sub {}); | |
57 | }; | |
deaf7667 | 58 | |
3bd97c10 WB |
59 | my $setup_mountpoint; |
60 | if (!PVE::LXC::Tools::can_use_new_mount_api()) { | |
61 | # Legacy mode for old kernels: | |
62 | $setup_mountpoint = sub { | |
63 | my ($opt, $mountpoint) = @_; | |
64 | ||
65 | my (undef, undef, $dev) = PVE::LXC::mountpoint_mount( | |
66 | $mountpoint, | |
67 | $rootdir, | |
68 | $storage_cfg, | |
69 | undef, | |
70 | $rootuid, | |
71 | $rootgid, | |
72 | ); | |
73 | push @$devices, $dev if $dev && $mountpoint->{quota}; | |
74 | }; | |
75 | } else { | |
76 | # With newer kernels we stage mount points and then use move_mount(). | |
77 | my $rootdir_fd = undef; | |
78 | $setup_mountpoint = sub { | |
79 | my ($opt, $mountpoint) = @_; | |
80 | ||
81 | my $dir = PVE::LXC::get_staging_mount_path($opt); | |
82 | my (undef, undef, $dev, $mount_fd) = PVE::LXC::mountpoint_stage( | |
83 | $mountpoint, | |
84 | $dir, | |
85 | $storage_cfg, | |
86 | undef, | |
87 | $rootuid, | |
88 | $rootgid, | |
89 | ); | |
90 | ||
fe4cd0a7 | 91 | my ($dest_dir, $dest_base_fd); |
3bd97c10 WB |
92 | if ($rootdir_fd) { |
93 | # Mount relative to the rootdir fd. | |
fe4cd0a7 WB |
94 | $dest_base_fd = $rootdir_fd; |
95 | $dest_dir = './' . $mountpoint->{mp}; | |
3bd97c10 WB |
96 | } else { |
97 | # Assert that 'rootfs' is the first one: | |
98 | die "foreach_mount() error\n" if $opt ne 'rootfs'; | |
99 | ||
fe4cd0a7 | 100 | # Mount the rootfs absolutely. |
3bd97c10 | 101 | # $rootdir is not controlled by the container, so this is fine. |
fe4cd0a7 WB |
102 | sysopen($dest_base_fd, '/', O_PATH | O_DIRECTORY) |
103 | or die "failed to open '.': $!\n"; | |
104 | $dest_dir = $rootdir; | |
3bd97c10 WB |
105 | } |
106 | ||
107 | PVE::LXC::mountpoint_insert_staged( | |
108 | $mount_fd, | |
fe4cd0a7 WB |
109 | $dest_base_fd, |
110 | $dest_dir, | |
3bd97c10 WB |
111 | $opt, |
112 | $rootuid, | |
113 | $rootgid, | |
114 | ); | |
115 | ||
116 | # From now on we mount inside our rootfs: | |
117 | if (!$rootdir_fd) { | |
118 | $rootdir_fd = $mount_fd; | |
119 | } | |
120 | ||
121 | push @$devices, $dev if $dev && $mountpoint->{quota}; | |
122 | }; | |
123 | } | |
124 | ||
015740e6 | 125 | PVE::LXC::Config->foreach_volume($conf, $setup_mountpoint); |
deaf7667 | 126 | |
0a49c44e WB |
127 | my $lxc_setup = PVE::LXC::Setup->new($conf, $rootdir); |
128 | $lxc_setup->pre_start_hook(); | |
deaf7667 | 129 | |
f7073b99 SI |
130 | if (PVE::CGroup::cgroup_mode() == 2) { |
131 | if(!$lxc_setup->unified_cgroupv2_support()) { | |
132 | syslog('err', "CT $vmid does not support running in a pure cgroupv2 environment\n"); | |
133 | } | |
134 | } | |
135 | ||
0a49c44e WB |
136 | if (@$devices) { |
137 | my $devlist = ''; | |
138 | foreach my $dev (@$devices) { | |
139 | my ($mode, $rdev) = (stat($dev))[2,6]; | |
140 | next if !$mode || !S_ISBLK($mode) || !$rdev; | |
141 | my $major = PVE::Tools::dev_t_major($rdev); | |
142 | my $minor = PVE::Tools::dev_t_minor($rdev); | |
143 | $devlist .= "b:$major:$minor:$dev\n"; | |
144 | } | |
145 | PVE::Tools::file_set_contents($devlist_file, $devlist); | |
146 | } | |
147 | }); | |
51ae28ec WB |
148 | |
149 | # Leftover cgroups prevent lxc from starting without any useful information | |
150 | # showing up in the journal, it is also often unable to properly clean them up | |
151 | # at shutdown, so we do this here. | |
152 | sub cleanup_cgroups($) { | |
153 | my ($vmid) = @_; | |
154 | ||
85ccb17f | 155 | if (PVE::CGroup::cgroup_mode() == 2) { |
51ae28ec WB |
156 | rmdir_recursive("/sys/fs/cgroup/lxc/$vmid"); |
157 | rmdir_recursive("/sys/fs/cgroup/lxc.monitor/$vmid"); | |
158 | } else { | |
36def186 | 159 | my ($v1, $v2) = PVE::CGroup::get_cgroup_controllers(); |
51ae28ec WB |
160 | |
161 | my @controllers_cgv1 = keys %$v1; | |
162 | foreach my $controller (@controllers_cgv1) { | |
163 | $controller =~ s/^name=//; # `name=systemd` is mounted just as `systemd` | |
de16102c WB |
164 | rmdir_recursive("/sys/fs/cgroup/$controller/lxc/$vmid"); |
165 | rmdir_recursive("/sys/fs/cgroup/$controller/lxc.monitor/$vmid"); | |
51ae28ec WB |
166 | } |
167 | ||
168 | if ($v2) { | |
169 | rmdir_recursive("/sys/fs/cgroup/unified/lxc/$vmid"); | |
170 | rmdir_recursive("/sys/fs/cgroup/unified/lxc.monitor/$vmid"); | |
171 | } | |
172 | } | |
173 | } | |
174 | ||
175 | # FIXME: This is an ugly version without openat() because perl has no equivalent | |
176 | # of fdopendir() so we cannot readdir from an openat() opened handle. | |
177 | sub rmdir_recursive { | |
178 | my ($path) = @_; | |
179 | ||
180 | my $dh; | |
181 | if (!opendir($dh, $path)) { | |
182 | return if $!{ENOENT}; | |
183 | die "failed to open directory '$path': $!\n"; | |
184 | } | |
185 | ||
186 | while (defined(my $entry = readdir($dh))) { | |
187 | next if $entry eq '.' || $entry eq '..'; | |
188 | my $next = "$path/$entry"; | |
189 | next if ! -d $next; | |
190 | rmdir_recursive($next); | |
191 | } | |
192 | ||
193 | rmdir($path) or die "failed to remove directory '$path': $!\n"; | |
194 | } |