]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blame - fs/aufs/plink.c
x86/mm: Add TLB purge to free pmd/pte page interfaces
[mirror_ubuntu-bionic-kernel.git] / fs / aufs / plink.c
CommitLineData
c088e31d
SF
1/*
2 * Copyright (C) 2005-2017 Junjiro R. Okajima
3 *
4 * This program, aufs is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18/*
19 * pseudo-link
20 */
21
22#include "aufs.h"
23
24/*
25 * the pseudo-link maintenance mode.
26 * during a user process maintains the pseudo-links,
27 * prohibit adding a new plink and branch manipulation.
28 *
29 * Flags
30 * NOPLM:
31 * For entry functions which will handle plink, and i_mutex is already held
32 * in VFS.
33 * They cannot wait and should return an error at once.
34 * Callers has to check the error.
35 * NOPLMW:
36 * For entry functions which will handle plink, but i_mutex is not held
37 * in VFS.
38 * They can wait the plink maintenance mode to finish.
39 *
40 * They behave like F_SETLK and F_SETLKW.
41 * If the caller never handle plink, then both flags are unnecessary.
42 */
43
44int au_plink_maint(struct super_block *sb, int flags)
45{
46 int err;
47 pid_t pid, ppid;
48 struct task_struct *parent, *prev;
49 struct au_sbinfo *sbi;
50
51 SiMustAnyLock(sb);
52
53 err = 0;
54 if (!au_opt_test(au_mntflags(sb), PLINK))
55 goto out;
56
57 sbi = au_sbi(sb);
58 pid = sbi->si_plink_maint_pid;
59 if (!pid || pid == current->pid)
60 goto out;
61
62 /* todo: it highly depends upon /sbin/mount.aufs */
63 prev = NULL;
64 parent = current;
65 ppid = 0;
66 rcu_read_lock();
67 while (1) {
68 parent = rcu_dereference(parent->real_parent);
69 if (parent == prev)
70 break;
71 ppid = task_pid_vnr(parent);
72 if (pid == ppid) {
73 rcu_read_unlock();
74 goto out;
75 }
76 prev = parent;
77 }
78 rcu_read_unlock();
79
80 if (au_ftest_lock(flags, NOPLMW)) {
81 /* if there is no i_mutex lock in VFS, we don't need to wait */
82 /* AuDebugOn(!lockdep_depth(current)); */
83 while (sbi->si_plink_maint_pid) {
84 si_read_unlock(sb);
85 /* gave up wake_up_bit() */
86 wait_event(sbi->si_plink_wq, !sbi->si_plink_maint_pid);
87
88 if (au_ftest_lock(flags, FLUSH))
89 au_nwt_flush(&sbi->si_nowait);
90 si_noflush_read_lock(sb);
91 }
92 } else if (au_ftest_lock(flags, NOPLM)) {
93 AuDbg("ppid %d, pid %d\n", ppid, pid);
94 err = -EAGAIN;
95 }
96
97out:
98 return err;
99}
100
101void au_plink_maint_leave(struct au_sbinfo *sbinfo)
102{
103 spin_lock(&sbinfo->si_plink_maint_lock);
104 sbinfo->si_plink_maint_pid = 0;
105 spin_unlock(&sbinfo->si_plink_maint_lock);
106 wake_up_all(&sbinfo->si_plink_wq);
107}
108
109int au_plink_maint_enter(struct super_block *sb)
110{
111 int err;
112 struct au_sbinfo *sbinfo;
113
114 err = 0;
115 sbinfo = au_sbi(sb);
116 /* make sure i am the only one in this fs */
117 si_write_lock(sb, AuLock_FLUSH);
118 if (au_opt_test(au_mntflags(sb), PLINK)) {
119 spin_lock(&sbinfo->si_plink_maint_lock);
120 if (!sbinfo->si_plink_maint_pid)
121 sbinfo->si_plink_maint_pid = current->pid;
122 else
123 err = -EBUSY;
124 spin_unlock(&sbinfo->si_plink_maint_lock);
125 }
126 si_write_unlock(sb);
127
128 return err;
129}
130
131/* ---------------------------------------------------------------------- */
132
133#ifdef CONFIG_AUFS_DEBUG
134void au_plink_list(struct super_block *sb)
135{
136 int i;
137 struct au_sbinfo *sbinfo;
138 struct hlist_bl_head *hbl;
139 struct hlist_bl_node *pos;
140 struct au_icntnr *icntnr;
141
142 SiMustAnyLock(sb);
143
144 sbinfo = au_sbi(sb);
145 AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
146 AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
147
148 for (i = 0; i < AuPlink_NHASH; i++) {
149 hbl = sbinfo->si_plink + i;
150 hlist_bl_lock(hbl);
151 hlist_bl_for_each_entry(icntnr, pos, hbl, plink)
152 AuDbg("%lu\n", icntnr->vfs_inode.i_ino);
153 hlist_bl_unlock(hbl);
154 }
155}
156#endif
157
158/* is the inode pseudo-linked? */
159int au_plink_test(struct inode *inode)
160{
161 int found, i;
162 struct au_sbinfo *sbinfo;
163 struct hlist_bl_head *hbl;
164 struct hlist_bl_node *pos;
165 struct au_icntnr *icntnr;
166
167 sbinfo = au_sbi(inode->i_sb);
168 AuRwMustAnyLock(&sbinfo->si_rwsem);
169 AuDebugOn(!au_opt_test(au_mntflags(inode->i_sb), PLINK));
170 AuDebugOn(au_plink_maint(inode->i_sb, AuLock_NOPLM));
171
172 found = 0;
173 i = au_plink_hash(inode->i_ino);
174 hbl = sbinfo->si_plink + i;
175 hlist_bl_lock(hbl);
176 hlist_bl_for_each_entry(icntnr, pos, hbl, plink)
177 if (&icntnr->vfs_inode == inode) {
178 found = 1;
179 break;
180 }
181 hlist_bl_unlock(hbl);
182 return found;
183}
184
185/* ---------------------------------------------------------------------- */
186
187/*
188 * generate a name for plink.
189 * the file will be stored under AUFS_WH_PLINKDIR.
190 */
191/* 20 is max digits length of ulong 64 */
192#define PLINK_NAME_LEN ((20 + 1) * 2)
193
194static int plink_name(char *name, int len, struct inode *inode,
195 aufs_bindex_t bindex)
196{
197 int rlen;
198 struct inode *h_inode;
199
200 h_inode = au_h_iptr(inode, bindex);
201 rlen = snprintf(name, len, "%lu.%lu", inode->i_ino, h_inode->i_ino);
202 return rlen;
203}
204
205struct au_do_plink_lkup_args {
206 struct dentry **errp;
207 struct qstr *tgtname;
208 struct dentry *h_parent;
209 struct au_branch *br;
210};
211
212static struct dentry *au_do_plink_lkup(struct qstr *tgtname,
213 struct dentry *h_parent,
214 struct au_branch *br)
215{
216 struct dentry *h_dentry;
217 struct inode *h_inode;
218
219 h_inode = d_inode(h_parent);
220 vfsub_inode_lock_shared_nested(h_inode, AuLsc_I_CHILD2);
221 h_dentry = vfsub_lkup_one(tgtname, h_parent);
222 inode_unlock_shared(h_inode);
223 return h_dentry;
224}
225
226static void au_call_do_plink_lkup(void *args)
227{
228 struct au_do_plink_lkup_args *a = args;
229 *a->errp = au_do_plink_lkup(a->tgtname, a->h_parent, a->br);
230}
231
232/* lookup the plink-ed @inode under the branch at @bindex */
233struct dentry *au_plink_lkup(struct inode *inode, aufs_bindex_t bindex)
234{
235 struct dentry *h_dentry, *h_parent;
236 struct au_branch *br;
237 int wkq_err;
238 char a[PLINK_NAME_LEN];
239 struct qstr tgtname = QSTR_INIT(a, 0);
240
241 AuDebugOn(au_plink_maint(inode->i_sb, AuLock_NOPLM));
242
243 br = au_sbr(inode->i_sb, bindex);
244 h_parent = br->br_wbr->wbr_plink;
245 tgtname.len = plink_name(a, sizeof(a), inode, bindex);
246
247 if (!uid_eq(current_fsuid(), GLOBAL_ROOT_UID)) {
248 struct au_do_plink_lkup_args args = {
249 .errp = &h_dentry,
250 .tgtname = &tgtname,
251 .h_parent = h_parent,
252 .br = br
253 };
254
255 wkq_err = au_wkq_wait(au_call_do_plink_lkup, &args);
256 if (unlikely(wkq_err))
257 h_dentry = ERR_PTR(wkq_err);
258 } else
259 h_dentry = au_do_plink_lkup(&tgtname, h_parent, br);
260
261 return h_dentry;
262}
263
264/* create a pseudo-link */
265static int do_whplink(struct qstr *tgt, struct dentry *h_parent,
266 struct dentry *h_dentry, struct au_branch *br)
267{
268 int err;
269 struct path h_path = {
270 .mnt = au_br_mnt(br)
271 };
272 struct inode *h_dir, *delegated;
273
274 h_dir = d_inode(h_parent);
275 inode_lock_nested(h_dir, AuLsc_I_CHILD2);
276again:
277 h_path.dentry = vfsub_lkup_one(tgt, h_parent);
278 err = PTR_ERR(h_path.dentry);
279 if (IS_ERR(h_path.dentry))
280 goto out;
281
282 err = 0;
283 /* wh.plink dir is not monitored */
284 /* todo: is it really safe? */
285 if (d_is_positive(h_path.dentry)
286 && d_inode(h_path.dentry) != d_inode(h_dentry)) {
287 delegated = NULL;
288 err = vfsub_unlink(h_dir, &h_path, &delegated, /*force*/0);
289 if (unlikely(err == -EWOULDBLOCK)) {
290 pr_warn("cannot retry for NFSv4 delegation"
291 " for an internal unlink\n");
292 iput(delegated);
293 }
294 dput(h_path.dentry);
295 h_path.dentry = NULL;
296 if (!err)
297 goto again;
298 }
299 if (!err && d_is_negative(h_path.dentry)) {
300 delegated = NULL;
301 err = vfsub_link(h_dentry, h_dir, &h_path, &delegated);
302 if (unlikely(err == -EWOULDBLOCK)) {
303 pr_warn("cannot retry for NFSv4 delegation"
304 " for an internal link\n");
305 iput(delegated);
306 }
307 }
308 dput(h_path.dentry);
309
310out:
311 inode_unlock(h_dir);
312 return err;
313}
314
315struct do_whplink_args {
316 int *errp;
317 struct qstr *tgt;
318 struct dentry *h_parent;
319 struct dentry *h_dentry;
320 struct au_branch *br;
321};
322
323static void call_do_whplink(void *args)
324{
325 struct do_whplink_args *a = args;
326 *a->errp = do_whplink(a->tgt, a->h_parent, a->h_dentry, a->br);
327}
328
329static int whplink(struct dentry *h_dentry, struct inode *inode,
330 aufs_bindex_t bindex, struct au_branch *br)
331{
332 int err, wkq_err;
333 struct au_wbr *wbr;
334 struct dentry *h_parent;
335 char a[PLINK_NAME_LEN];
336 struct qstr tgtname = QSTR_INIT(a, 0);
337
338 wbr = au_sbr(inode->i_sb, bindex)->br_wbr;
339 h_parent = wbr->wbr_plink;
340 tgtname.len = plink_name(a, sizeof(a), inode, bindex);
341
342 /* always superio. */
343 if (!uid_eq(current_fsuid(), GLOBAL_ROOT_UID)) {
344 struct do_whplink_args args = {
345 .errp = &err,
346 .tgt = &tgtname,
347 .h_parent = h_parent,
348 .h_dentry = h_dentry,
349 .br = br
350 };
351 wkq_err = au_wkq_wait(call_do_whplink, &args);
352 if (unlikely(wkq_err))
353 err = wkq_err;
354 } else
355 err = do_whplink(&tgtname, h_parent, h_dentry, br);
356
357 return err;
358}
359
360/*
361 * create a new pseudo-link for @h_dentry on @bindex.
362 * the linked inode is held in aufs @inode.
363 */
364void au_plink_append(struct inode *inode, aufs_bindex_t bindex,
365 struct dentry *h_dentry)
366{
367 struct super_block *sb;
368 struct au_sbinfo *sbinfo;
369 struct hlist_bl_head *hbl;
370 struct hlist_bl_node *pos;
371 struct au_icntnr *icntnr;
372 int found, err, cnt, i;
373
374 sb = inode->i_sb;
375 sbinfo = au_sbi(sb);
376 AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
377 AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
378
379 found = au_plink_test(inode);
380 if (found)
381 return;
382
383 i = au_plink_hash(inode->i_ino);
384 hbl = sbinfo->si_plink + i;
385 au_igrab(inode);
386
387 hlist_bl_lock(hbl);
388 hlist_bl_for_each_entry(icntnr, pos, hbl, plink) {
389 if (&icntnr->vfs_inode == inode) {
390 found = 1;
391 break;
392 }
393 }
394 if (!found) {
395 icntnr = container_of(inode, struct au_icntnr, vfs_inode);
396 hlist_bl_add_head(&icntnr->plink, hbl);
397 }
398 hlist_bl_unlock(hbl);
399 if (!found) {
400 cnt = au_hbl_count(hbl);
401#define msg "unexpectedly unblanced or too many pseudo-links"
402 if (cnt > AUFS_PLINK_WARN)
403 AuWarn1(msg ", %d\n", cnt);
404#undef msg
405 err = whplink(h_dentry, inode, bindex, au_sbr(sb, bindex));
406 if (unlikely(err)) {
407 pr_warn("err %d, damaged pseudo link.\n", err);
408 au_hbl_del(&icntnr->plink, hbl);
409 iput(&icntnr->vfs_inode);
410 }
411 } else
412 iput(&icntnr->vfs_inode);
413}
414
415/* free all plinks */
416void au_plink_put(struct super_block *sb, int verbose)
417{
418 int i, warned;
419 struct au_sbinfo *sbinfo;
420 struct hlist_bl_head *hbl;
421 struct hlist_bl_node *pos, *tmp;
422 struct au_icntnr *icntnr;
423
424 SiMustWriteLock(sb);
425
426 sbinfo = au_sbi(sb);
427 AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
428 AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
429
430 /* no spin_lock since sbinfo is write-locked */
431 warned = 0;
432 for (i = 0; i < AuPlink_NHASH; i++) {
433 hbl = sbinfo->si_plink + i;
434 if (!warned && verbose && !hlist_bl_empty(hbl)) {
435 pr_warn("pseudo-link is not flushed");
436 warned = 1;
437 }
438 hlist_bl_for_each_entry_safe(icntnr, pos, tmp, hbl, plink)
439 iput(&icntnr->vfs_inode);
440 INIT_HLIST_BL_HEAD(hbl);
441 }
442}
443
444void au_plink_clean(struct super_block *sb, int verbose)
445{
446 struct dentry *root;
447
448 root = sb->s_root;
449 aufs_write_lock(root);
450 if (au_opt_test(au_mntflags(sb), PLINK))
451 au_plink_put(sb, verbose);
452 aufs_write_unlock(root);
453}
454
455static int au_plink_do_half_refresh(struct inode *inode, aufs_bindex_t br_id)
456{
457 int do_put;
458 aufs_bindex_t btop, bbot, bindex;
459
460 do_put = 0;
461 btop = au_ibtop(inode);
462 bbot = au_ibbot(inode);
463 if (btop >= 0) {
464 for (bindex = btop; bindex <= bbot; bindex++) {
465 if (!au_h_iptr(inode, bindex)
466 || au_ii_br_id(inode, bindex) != br_id)
467 continue;
468 au_set_h_iptr(inode, bindex, NULL, 0);
469 do_put = 1;
470 break;
471 }
472 if (do_put)
473 for (bindex = btop; bindex <= bbot; bindex++)
474 if (au_h_iptr(inode, bindex)) {
475 do_put = 0;
476 break;
477 }
478 } else
479 do_put = 1;
480
481 return do_put;
482}
483
484/* free the plinks on a branch specified by @br_id */
485void au_plink_half_refresh(struct super_block *sb, aufs_bindex_t br_id)
486{
487 struct au_sbinfo *sbinfo;
488 struct hlist_bl_head *hbl;
489 struct hlist_bl_node *pos, *tmp;
490 struct au_icntnr *icntnr;
491 struct inode *inode;
492 int i, do_put;
493
494 SiMustWriteLock(sb);
495
496 sbinfo = au_sbi(sb);
497 AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
498 AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
499
500 /* no bit_lock since sbinfo is write-locked */
501 for (i = 0; i < AuPlink_NHASH; i++) {
502 hbl = sbinfo->si_plink + i;
503 hlist_bl_for_each_entry_safe(icntnr, pos, tmp, hbl, plink) {
504 inode = au_igrab(&icntnr->vfs_inode);
505 ii_write_lock_child(inode);
506 do_put = au_plink_do_half_refresh(inode, br_id);
507 if (do_put) {
508 hlist_bl_del(&icntnr->plink);
509 iput(inode);
510 }
511 ii_write_unlock(inode);
512 iput(inode);
513 }
514 }
515}