]>
Commit | Line | Data |
---|---|---|
deaf7667 AD |
1 | #!/usr/bin/perl |
2 | ||
b056d074 DM |
3 | package lxc_pve_prestart_hook; |
4 | ||
deaf7667 AD |
5 | use strict; |
6 | use warnings; | |
4ed2b825 | 7 | |
3bd97c10 | 8 | use Fcntl qw(O_DIRECTORY :mode); |
deaf7667 | 9 | use File::Path; |
3bd97c10 | 10 | use POSIX; |
deaf7667 | 11 | |
deaf7667 | 12 | use PVE::Cluster; |
0a49c44e | 13 | use PVE::LXC::Config; |
c9a5774b | 14 | use PVE::LXC::Setup; |
0a49c44e WB |
15 | use PVE::LXC::Tools; |
16 | use PVE::LXC; | |
17 | use PVE::Storage; | |
3bd97c10 WB |
18 | use PVE::Syscall qw(:fsmount); |
19 | use PVE::Tools qw(AT_FDCWD O_PATH); | |
deaf7667 | 20 | |
0a49c44e WB |
21 | PVE::LXC::Tools::lxc_hook('pre-start', 'lxc', sub { |
22 | my ($vmid, $vars, undef, undef) = @_; | |
c9a5774b | 23 | |
0a49c44e WB |
24 | my $skiplock_flag_fn = "/run/lxc/skiplock-$vmid"; |
25 | my $skiplock = 1 if -e $skiplock_flag_fn; | |
26 | unlink $skiplock_flag_fn if $skiplock; | |
c9a5774b | 27 | |
0a49c44e | 28 | PVE::Cluster::check_cfs_quorum(); # only start if we have quorum |
1e1fad99 | 29 | |
0a49c44e | 30 | return undef if ! -f PVE::LXC::Config->config_file($vmid); |
50df544c | 31 | |
0a49c44e WB |
32 | my $conf = PVE::LXC::Config->load_config($vmid); |
33 | if (!$skiplock && !PVE::LXC::Config->has_lock($conf, 'mounted')) { | |
34 | PVE::LXC::Config->check_lock($conf); | |
35 | } | |
4c98d66c | 36 | |
51ae28ec WB |
37 | cleanup_cgroups($vmid); |
38 | ||
896823c4 | 39 | my $storage_cfg = PVE::Storage::config(); |
c9a5774b | 40 | |
0a49c44e | 41 | my $rootdir = $vars->{ROOTFS_PATH}; |
c9a5774b | 42 | |
0a49c44e WB |
43 | # Delete any leftover reboot-trigger file |
44 | unlink("/var/lib/lxc/$vmid/reboot"); | |
50df544c | 45 | |
0a49c44e WB |
46 | my $devlist_file = "/var/lib/lxc/$vmid/devices"; |
47 | unlink $devlist_file; | |
48 | my $devices = []; | |
deaf7667 | 49 | |
0a49c44e | 50 | my (undef, $rootuid, $rootgid) = PVE::LXC::parse_id_maps($conf); |
deaf7667 | 51 | |
0a49c44e WB |
52 | # Unmount first when the user mounted the container with "pct mount". |
53 | eval { | |
54 | PVE::Tools::run_command(['umount', '--recursive', $rootdir], outfunc => sub {}, errfunc => sub {}); | |
55 | }; | |
deaf7667 | 56 | |
3bd97c10 WB |
57 | my $setup_mountpoint; |
58 | if (!PVE::LXC::Tools::can_use_new_mount_api()) { | |
59 | # Legacy mode for old kernels: | |
60 | $setup_mountpoint = sub { | |
61 | my ($opt, $mountpoint) = @_; | |
62 | ||
63 | my (undef, undef, $dev) = PVE::LXC::mountpoint_mount( | |
64 | $mountpoint, | |
65 | $rootdir, | |
66 | $storage_cfg, | |
67 | undef, | |
68 | $rootuid, | |
69 | $rootgid, | |
70 | ); | |
71 | push @$devices, $dev if $dev && $mountpoint->{quota}; | |
72 | }; | |
73 | } else { | |
74 | # With newer kernels we stage mount points and then use move_mount(). | |
75 | my $rootdir_fd = undef; | |
76 | $setup_mountpoint = sub { | |
77 | my ($opt, $mountpoint) = @_; | |
78 | ||
79 | my $dir = PVE::LXC::get_staging_mount_path($opt); | |
80 | my (undef, undef, $dev, $mount_fd) = PVE::LXC::mountpoint_stage( | |
81 | $mountpoint, | |
82 | $dir, | |
83 | $storage_cfg, | |
84 | undef, | |
85 | $rootuid, | |
86 | $rootgid, | |
87 | ); | |
88 | ||
fe4cd0a7 | 89 | my ($dest_dir, $dest_base_fd); |
3bd97c10 WB |
90 | if ($rootdir_fd) { |
91 | # Mount relative to the rootdir fd. | |
fe4cd0a7 WB |
92 | $dest_base_fd = $rootdir_fd; |
93 | $dest_dir = './' . $mountpoint->{mp}; | |
3bd97c10 WB |
94 | } else { |
95 | # Assert that 'rootfs' is the first one: | |
96 | die "foreach_mount() error\n" if $opt ne 'rootfs'; | |
97 | ||
fe4cd0a7 | 98 | # Mount the rootfs absolutely. |
3bd97c10 | 99 | # $rootdir is not controlled by the container, so this is fine. |
fe4cd0a7 WB |
100 | sysopen($dest_base_fd, '/', O_PATH | O_DIRECTORY) |
101 | or die "failed to open '.': $!\n"; | |
102 | $dest_dir = $rootdir; | |
3bd97c10 WB |
103 | } |
104 | ||
105 | PVE::LXC::mountpoint_insert_staged( | |
106 | $mount_fd, | |
fe4cd0a7 WB |
107 | $dest_base_fd, |
108 | $dest_dir, | |
3bd97c10 WB |
109 | $opt, |
110 | $rootuid, | |
111 | $rootgid, | |
112 | ); | |
113 | ||
114 | # From now on we mount inside our rootfs: | |
115 | if (!$rootdir_fd) { | |
116 | $rootdir_fd = $mount_fd; | |
117 | } | |
118 | ||
119 | push @$devices, $dev if $dev && $mountpoint->{quota}; | |
120 | }; | |
121 | } | |
122 | ||
015740e6 | 123 | PVE::LXC::Config->foreach_volume($conf, $setup_mountpoint); |
deaf7667 | 124 | |
0a49c44e WB |
125 | my $lxc_setup = PVE::LXC::Setup->new($conf, $rootdir); |
126 | $lxc_setup->pre_start_hook(); | |
deaf7667 | 127 | |
0a49c44e WB |
128 | if (@$devices) { |
129 | my $devlist = ''; | |
130 | foreach my $dev (@$devices) { | |
131 | my ($mode, $rdev) = (stat($dev))[2,6]; | |
132 | next if !$mode || !S_ISBLK($mode) || !$rdev; | |
133 | my $major = PVE::Tools::dev_t_major($rdev); | |
134 | my $minor = PVE::Tools::dev_t_minor($rdev); | |
135 | $devlist .= "b:$major:$minor:$dev\n"; | |
136 | } | |
137 | PVE::Tools::file_set_contents($devlist_file, $devlist); | |
138 | } | |
139 | }); | |
51ae28ec WB |
140 | |
141 | # Leftover cgroups prevent lxc from starting without any useful information | |
142 | # showing up in the journal, it is also often unable to properly clean them up | |
143 | # at shutdown, so we do this here. | |
144 | sub cleanup_cgroups($) { | |
145 | my ($vmid) = @_; | |
146 | ||
85ccb17f | 147 | if (PVE::CGroup::cgroup_mode() == 2) { |
51ae28ec WB |
148 | rmdir_recursive("/sys/fs/cgroup/lxc/$vmid"); |
149 | rmdir_recursive("/sys/fs/cgroup/lxc.monitor/$vmid"); | |
150 | } else { | |
151 | my ($v1, $v2) = PVE::LXC::get_cgroup_subsystems(); | |
152 | ||
153 | my @controllers_cgv1 = keys %$v1; | |
154 | foreach my $controller (@controllers_cgv1) { | |
155 | $controller =~ s/^name=//; # `name=systemd` is mounted just as `systemd` | |
de16102c WB |
156 | rmdir_recursive("/sys/fs/cgroup/$controller/lxc/$vmid"); |
157 | rmdir_recursive("/sys/fs/cgroup/$controller/lxc.monitor/$vmid"); | |
51ae28ec WB |
158 | } |
159 | ||
160 | if ($v2) { | |
161 | rmdir_recursive("/sys/fs/cgroup/unified/lxc/$vmid"); | |
162 | rmdir_recursive("/sys/fs/cgroup/unified/lxc.monitor/$vmid"); | |
163 | } | |
164 | } | |
165 | } | |
166 | ||
167 | # FIXME: This is an ugly version without openat() because perl has no equivalent | |
168 | # of fdopendir() so we cannot readdir from an openat() opened handle. | |
169 | sub rmdir_recursive { | |
170 | my ($path) = @_; | |
171 | ||
172 | my $dh; | |
173 | if (!opendir($dh, $path)) { | |
174 | return if $!{ENOENT}; | |
175 | die "failed to open directory '$path': $!\n"; | |
176 | } | |
177 | ||
178 | while (defined(my $entry = readdir($dh))) { | |
179 | next if $entry eq '.' || $entry eq '..'; | |
180 | my $next = "$path/$entry"; | |
181 | next if ! -d $next; | |
182 | rmdir_recursive($next); | |
183 | } | |
184 | ||
185 | rmdir($path) or die "failed to remove directory '$path': $!\n"; | |
186 | } |