]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blob - fs/aufs/plink.c
tracing/histogram: Fix sorting on old "cpu" value
[mirror_ubuntu-jammy-kernel.git] / fs / aufs / plink.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (C) 2005-2021 Junjiro R. Okajima
4 *
5 * This program, aufs is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19 /*
20 * pseudo-link
21 */
22
23 #include "aufs.h"
24
25 /*
26 * the pseudo-link maintenance mode.
27 * during a user process maintains the pseudo-links,
28 * prohibit adding a new plink and branch manipulation.
29 *
30 * Flags
31 * NOPLM:
32 * For entry functions which will handle plink, and i_mutex is already held
33 * in VFS.
34 * They cannot wait and should return an error at once.
35 * Callers has to check the error.
36 * NOPLMW:
37 * For entry functions which will handle plink, but i_mutex is not held
38 * in VFS.
39 * They can wait the plink maintenance mode to finish.
40 *
41 * They behave like F_SETLK and F_SETLKW.
42 * If the caller never handle plink, then both flags are unnecessary.
43 */
44
45 int au_plink_maint(struct super_block *sb, int flags)
46 {
47 int err;
48 pid_t pid, ppid;
49 struct task_struct *parent, *prev;
50 struct au_sbinfo *sbi;
51
52 SiMustAnyLock(sb);
53
54 err = 0;
55 if (!au_opt_test(au_mntflags(sb), PLINK))
56 goto out;
57
58 sbi = au_sbi(sb);
59 pid = sbi->si_plink_maint_pid;
60 if (!pid || pid == current->pid)
61 goto out;
62
63 /* todo: it highly depends upon /sbin/mount.aufs */
64 prev = NULL;
65 parent = current;
66 ppid = 0;
67 rcu_read_lock();
68 while (1) {
69 parent = rcu_dereference(parent->real_parent);
70 if (parent == prev)
71 break;
72 ppid = task_pid_vnr(parent);
73 if (pid == ppid) {
74 rcu_read_unlock();
75 goto out;
76 }
77 prev = parent;
78 }
79 rcu_read_unlock();
80
81 if (au_ftest_lock(flags, NOPLMW)) {
82 /* if there is no i_mutex lock in VFS, we don't need to wait */
83 /* AuDebugOn(!lockdep_depth(current)); */
84 while (sbi->si_plink_maint_pid) {
85 si_read_unlock(sb);
86 /* gave up wake_up_bit() */
87 wait_event(sbi->si_plink_wq, !sbi->si_plink_maint_pid);
88
89 if (au_ftest_lock(flags, FLUSH))
90 au_nwt_flush(&sbi->si_nowait);
91 si_noflush_read_lock(sb);
92 }
93 } else if (au_ftest_lock(flags, NOPLM)) {
94 AuDbg("ppid %d, pid %d\n", ppid, pid);
95 err = -EAGAIN;
96 }
97
98 out:
99 return err;
100 }
101
102 void au_plink_maint_leave(struct au_sbinfo *sbinfo)
103 {
104 spin_lock(&sbinfo->si_plink_maint_lock);
105 sbinfo->si_plink_maint_pid = 0;
106 spin_unlock(&sbinfo->si_plink_maint_lock);
107 wake_up_all(&sbinfo->si_plink_wq);
108 }
109
110 int au_plink_maint_enter(struct super_block *sb)
111 {
112 int err;
113 struct au_sbinfo *sbinfo;
114
115 err = 0;
116 sbinfo = au_sbi(sb);
117 /* make sure i am the only one in this fs */
118 si_write_lock(sb, AuLock_FLUSH);
119 if (au_opt_test(au_mntflags(sb), PLINK)) {
120 spin_lock(&sbinfo->si_plink_maint_lock);
121 if (!sbinfo->si_plink_maint_pid)
122 sbinfo->si_plink_maint_pid = current->pid;
123 else
124 err = -EBUSY;
125 spin_unlock(&sbinfo->si_plink_maint_lock);
126 }
127 si_write_unlock(sb);
128
129 return err;
130 }
131
132 /* ---------------------------------------------------------------------- */
133
134 #ifdef CONFIG_AUFS_DEBUG
135 void au_plink_list(struct super_block *sb)
136 {
137 int i;
138 struct au_sbinfo *sbinfo;
139 struct hlist_bl_head *hbl;
140 struct hlist_bl_node *pos;
141 struct au_icntnr *icntnr;
142
143 SiMustAnyLock(sb);
144
145 sbinfo = au_sbi(sb);
146 AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
147 AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
148
149 for (i = 0; i < AuPlink_NHASH; i++) {
150 hbl = sbinfo->si_plink + i;
151 hlist_bl_lock(hbl);
152 hlist_bl_for_each_entry(icntnr, pos, hbl, plink)
153 AuDbg("%lu\n", icntnr->vfs_inode.i_ino);
154 hlist_bl_unlock(hbl);
155 }
156 }
157 #endif
158
159 /* is the inode pseudo-linked? */
160 int au_plink_test(struct inode *inode)
161 {
162 int found, i;
163 struct au_sbinfo *sbinfo;
164 struct hlist_bl_head *hbl;
165 struct hlist_bl_node *pos;
166 struct au_icntnr *icntnr;
167
168 sbinfo = au_sbi(inode->i_sb);
169 AuRwMustAnyLock(&sbinfo->si_rwsem);
170 AuDebugOn(!au_opt_test(au_mntflags(inode->i_sb), PLINK));
171 AuDebugOn(au_plink_maint(inode->i_sb, AuLock_NOPLM));
172
173 found = 0;
174 i = au_plink_hash(inode->i_ino);
175 hbl = sbinfo->si_plink + i;
176 hlist_bl_lock(hbl);
177 hlist_bl_for_each_entry(icntnr, pos, hbl, plink)
178 if (&icntnr->vfs_inode == inode) {
179 found = 1;
180 break;
181 }
182 hlist_bl_unlock(hbl);
183 return found;
184 }
185
186 /* ---------------------------------------------------------------------- */
187
188 /*
189 * generate a name for plink.
190 * the file will be stored under AUFS_WH_PLINKDIR.
191 */
192 /* 20 is max digits length of ulong 64 */
193 #define PLINK_NAME_LEN ((20 + 1) * 2)
194
195 static int plink_name(char *name, int len, struct inode *inode,
196 aufs_bindex_t bindex)
197 {
198 int rlen;
199 struct inode *h_inode;
200
201 h_inode = au_h_iptr(inode, bindex);
202 rlen = snprintf(name, len, "%lu.%lu", inode->i_ino, h_inode->i_ino);
203 return rlen;
204 }
205
206 struct au_do_plink_lkup_args {
207 struct dentry **errp;
208 struct qstr *tgtname;
209 struct path *h_ppath;
210 };
211
212 static struct dentry *au_do_plink_lkup(struct qstr *tgtname,
213 struct path *h_ppath)
214 {
215 struct dentry *h_dentry;
216 struct inode *h_inode;
217
218 h_inode = d_inode(h_ppath->dentry);
219 inode_lock_shared_nested(h_inode, AuLsc_I_CHILD2);
220 h_dentry = vfsub_lkup_one(tgtname, h_ppath);
221 inode_unlock_shared(h_inode);
222
223 return h_dentry;
224 }
225
226 static void au_call_do_plink_lkup(void *args)
227 {
228 struct au_do_plink_lkup_args *a = args;
229 *a->errp = au_do_plink_lkup(a->tgtname, a->h_ppath);
230 }
231
232 /* lookup the plink-ed @inode under the branch at @bindex */
233 struct dentry *au_plink_lkup(struct inode *inode, aufs_bindex_t bindex)
234 {
235 struct dentry *h_dentry;
236 struct au_branch *br;
237 struct path h_ppath;
238 int wkq_err;
239 char a[PLINK_NAME_LEN];
240 struct qstr tgtname = QSTR_INIT(a, 0);
241
242 AuDebugOn(au_plink_maint(inode->i_sb, AuLock_NOPLM));
243
244 br = au_sbr(inode->i_sb, bindex);
245 h_ppath.dentry = br->br_wbr->wbr_plink;
246 h_ppath.mnt = au_br_mnt(br);
247 tgtname.len = plink_name(a, sizeof(a), inode, bindex);
248
249 if (!uid_eq(current_fsuid(), GLOBAL_ROOT_UID)) {
250 struct au_do_plink_lkup_args args = {
251 .errp = &h_dentry,
252 .tgtname = &tgtname,
253 .h_ppath = &h_ppath
254 };
255
256 wkq_err = au_wkq_wait(au_call_do_plink_lkup, &args);
257 if (unlikely(wkq_err))
258 h_dentry = ERR_PTR(wkq_err);
259 } else
260 h_dentry = au_do_plink_lkup(&tgtname, &h_ppath);
261
262 return h_dentry;
263 }
264
265 /* create a pseudo-link */
266 static int do_whplink(struct qstr *tgt, struct path *h_ppath,
267 struct dentry *h_dentry)
268 {
269 int err;
270 struct path h_path;
271 struct inode *h_dir, *delegated;
272
273 h_dir = d_inode(h_ppath->dentry);
274 inode_lock_nested(h_dir, AuLsc_I_CHILD2);
275 h_path.mnt = h_ppath->mnt;
276 again:
277 h_path.dentry = vfsub_lkup_one(tgt, h_ppath);
278 err = PTR_ERR(h_path.dentry);
279 if (IS_ERR(h_path.dentry))
280 goto out;
281
282 err = 0;
283 /* wh.plink dir is not monitored */
284 /* todo: is it really safe? */
285 if (d_is_positive(h_path.dentry)
286 && d_inode(h_path.dentry) != d_inode(h_dentry)) {
287 delegated = NULL;
288 err = vfsub_unlink(h_dir, &h_path, &delegated, /*force*/0);
289 if (unlikely(err == -EWOULDBLOCK)) {
290 pr_warn("cannot retry for NFSv4 delegation"
291 " for an internal unlink\n");
292 iput(delegated);
293 }
294 dput(h_path.dentry);
295 h_path.dentry = NULL;
296 if (!err)
297 goto again;
298 }
299 if (!err && d_is_negative(h_path.dentry)) {
300 delegated = NULL;
301 err = vfsub_link(h_dentry, h_dir, &h_path, &delegated);
302 if (unlikely(err == -EWOULDBLOCK)) {
303 pr_warn("cannot retry for NFSv4 delegation"
304 " for an internal link\n");
305 iput(delegated);
306 }
307 }
308 dput(h_path.dentry);
309
310 out:
311 inode_unlock(h_dir);
312 return err;
313 }
314
315 struct do_whplink_args {
316 int *errp;
317 struct qstr *tgt;
318 struct path *h_ppath;
319 struct dentry *h_dentry;
320 };
321
322 static void call_do_whplink(void *args)
323 {
324 struct do_whplink_args *a = args;
325 *a->errp = do_whplink(a->tgt, a->h_ppath, a->h_dentry);
326 }
327
328 static int whplink(struct dentry *h_dentry, struct inode *inode,
329 aufs_bindex_t bindex)
330 {
331 int err, wkq_err;
332 struct au_branch *br;
333 struct au_wbr *wbr;
334 struct path h_ppath;
335 char a[PLINK_NAME_LEN];
336 struct qstr tgtname = QSTR_INIT(a, 0);
337
338 br = au_sbr(inode->i_sb, bindex);
339 wbr = br->br_wbr;
340 h_ppath.dentry = wbr->wbr_plink;
341 h_ppath.mnt = au_br_mnt(br);
342 tgtname.len = plink_name(a, sizeof(a), inode, bindex);
343
344 /* always superio. */
345 if (!uid_eq(current_fsuid(), GLOBAL_ROOT_UID)) {
346 struct do_whplink_args args = {
347 .errp = &err,
348 .tgt = &tgtname,
349 .h_ppath = &h_ppath,
350 .h_dentry = h_dentry
351 };
352 wkq_err = au_wkq_wait(call_do_whplink, &args);
353 if (unlikely(wkq_err))
354 err = wkq_err;
355 } else
356 err = do_whplink(&tgtname, &h_ppath, h_dentry);
357
358 return err;
359 }
360
361 /*
362 * create a new pseudo-link for @h_dentry on @bindex.
363 * the linked inode is held in aufs @inode.
364 */
365 void au_plink_append(struct inode *inode, aufs_bindex_t bindex,
366 struct dentry *h_dentry)
367 {
368 struct super_block *sb;
369 struct au_sbinfo *sbinfo;
370 struct hlist_bl_head *hbl;
371 struct hlist_bl_node *pos;
372 struct au_icntnr *icntnr;
373 int found, err, cnt, i;
374
375 sb = inode->i_sb;
376 sbinfo = au_sbi(sb);
377 AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
378 AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
379
380 found = au_plink_test(inode);
381 if (found)
382 return;
383
384 i = au_plink_hash(inode->i_ino);
385 hbl = sbinfo->si_plink + i;
386 au_igrab(inode);
387
388 hlist_bl_lock(hbl);
389 hlist_bl_for_each_entry(icntnr, pos, hbl, plink) {
390 if (&icntnr->vfs_inode == inode) {
391 found = 1;
392 break;
393 }
394 }
395 if (!found) {
396 icntnr = container_of(inode, struct au_icntnr, vfs_inode);
397 hlist_bl_add_head(&icntnr->plink, hbl);
398 }
399 hlist_bl_unlock(hbl);
400 if (!found) {
401 cnt = au_hbl_count(hbl);
402 #define msg "unexpectedly unbalanced or too many pseudo-links"
403 if (cnt > AUFS_PLINK_WARN)
404 AuWarn1(msg ", %d\n", cnt);
405 #undef msg
406 err = whplink(h_dentry, inode, bindex);
407 if (unlikely(err)) {
408 pr_warn("err %d, damaged pseudo link.\n", err);
409 au_hbl_del(&icntnr->plink, hbl);
410 iput(&icntnr->vfs_inode);
411 }
412 } else
413 iput(&icntnr->vfs_inode);
414 }
415
416 /* free all plinks */
417 void au_plink_put(struct super_block *sb, int verbose)
418 {
419 int i, warned;
420 struct au_sbinfo *sbinfo;
421 struct hlist_bl_head *hbl;
422 struct hlist_bl_node *pos, *tmp;
423 struct au_icntnr *icntnr;
424
425 SiMustWriteLock(sb);
426
427 sbinfo = au_sbi(sb);
428 AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
429 AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
430
431 /* no spin_lock since sbinfo is write-locked */
432 warned = 0;
433 for (i = 0; i < AuPlink_NHASH; i++) {
434 hbl = sbinfo->si_plink + i;
435 if (!warned && verbose && !hlist_bl_empty(hbl)) {
436 pr_warn("pseudo-link is not flushed");
437 warned = 1;
438 }
439 hlist_bl_for_each_entry_safe(icntnr, pos, tmp, hbl, plink)
440 iput(&icntnr->vfs_inode);
441 INIT_HLIST_BL_HEAD(hbl);
442 }
443 }
444
445 void au_plink_clean(struct super_block *sb, int verbose)
446 {
447 struct dentry *root;
448
449 root = sb->s_root;
450 aufs_write_lock(root);
451 if (au_opt_test(au_mntflags(sb), PLINK))
452 au_plink_put(sb, verbose);
453 aufs_write_unlock(root);
454 }
455
456 static int au_plink_do_half_refresh(struct inode *inode, aufs_bindex_t br_id)
457 {
458 int do_put;
459 aufs_bindex_t btop, bbot, bindex;
460
461 do_put = 0;
462 btop = au_ibtop(inode);
463 bbot = au_ibbot(inode);
464 if (btop >= 0) {
465 for (bindex = btop; bindex <= bbot; bindex++) {
466 if (!au_h_iptr(inode, bindex)
467 || au_ii_br_id(inode, bindex) != br_id)
468 continue;
469 au_set_h_iptr(inode, bindex, NULL, 0);
470 do_put = 1;
471 break;
472 }
473 if (do_put)
474 for (bindex = btop; bindex <= bbot; bindex++)
475 if (au_h_iptr(inode, bindex)) {
476 do_put = 0;
477 break;
478 }
479 } else
480 do_put = 1;
481
482 return do_put;
483 }
484
485 /* free the plinks on a branch specified by @br_id */
486 void au_plink_half_refresh(struct super_block *sb, aufs_bindex_t br_id)
487 {
488 struct au_sbinfo *sbinfo;
489 struct hlist_bl_head *hbl;
490 struct hlist_bl_node *pos, *tmp;
491 struct au_icntnr *icntnr;
492 struct inode *inode;
493 int i, do_put;
494
495 SiMustWriteLock(sb);
496
497 sbinfo = au_sbi(sb);
498 AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
499 AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
500
501 /* no bit_lock since sbinfo is write-locked */
502 for (i = 0; i < AuPlink_NHASH; i++) {
503 hbl = sbinfo->si_plink + i;
504 hlist_bl_for_each_entry_safe(icntnr, pos, tmp, hbl, plink) {
505 inode = au_igrab(&icntnr->vfs_inode);
506 ii_write_lock_child(inode);
507 do_put = au_plink_do_half_refresh(inode, br_id);
508 if (do_put) {
509 hlist_bl_del(&icntnr->plink);
510 iput(inode);
511 }
512 ii_write_unlock(inode);
513 iput(inode);
514 }
515 }
516 }