]>
Commit | Line | Data |
---|---|---|
a3a49a17 SF |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* | |
e4a3f096 | 3 | * Copyright (C) 2005-2020 Junjiro R. Okajima |
a3a49a17 SF |
4 | * |
5 | * This program, aufs is free software; you can redistribute it and/or modify | |
6 | * it under the terms of the GNU General Public License as published by | |
7 | * the Free Software Foundation; either version 2 of the License, or | |
8 | * (at your option) any later version. | |
9 | * | |
10 | * This program is distributed in the hope that it will be useful, | |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
13 | * GNU General Public License for more details. | |
14 | * | |
15 | * You should have received a copy of the GNU General Public License | |
16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | |
17 | */ | |
18 | ||
19 | /* | |
20 | * whiteout for logical deletion and opaque directory | |
21 | */ | |
22 | ||
23 | #include "aufs.h" | |
24 | ||
25 | #define WH_MASK 0444 | |
26 | ||
27 | /* | |
28 | * If a directory contains this file, then it is opaque. We start with the | |
29 | * .wh. flag so that it is blocked by lookup. | |
30 | */ | |
31 | static struct qstr diropq_name = QSTR_INIT(AUFS_WH_DIROPQ, | |
32 | sizeof(AUFS_WH_DIROPQ) - 1); | |
33 | ||
34 | /* | |
35 | * generate whiteout name, which is NOT terminated by NULL. | |
36 | * @name: original d_name.name | |
37 | * @len: original d_name.len | |
38 | * @wh: whiteout qstr | |
39 | * returns zero when succeeds, otherwise error. | |
40 | * succeeded value as wh->name should be freed by kfree(). | |
41 | */ | |
42 | int au_wh_name_alloc(struct qstr *wh, const struct qstr *name) | |
43 | { | |
44 | char *p; | |
45 | ||
46 | if (unlikely(name->len > PATH_MAX - AUFS_WH_PFX_LEN)) | |
47 | return -ENAMETOOLONG; | |
48 | ||
49 | wh->len = name->len + AUFS_WH_PFX_LEN; | |
50 | p = kmalloc(wh->len, GFP_NOFS); | |
51 | wh->name = p; | |
52 | if (p) { | |
53 | memcpy(p, AUFS_WH_PFX, AUFS_WH_PFX_LEN); | |
54 | memcpy(p + AUFS_WH_PFX_LEN, name->name, name->len); | |
55 | /* smp_mb(); */ | |
56 | return 0; | |
57 | } | |
58 | return -ENOMEM; | |
59 | } | |
60 | ||
61 | /* ---------------------------------------------------------------------- */ | |
62 | ||
63 | /* | |
64 | * test if the @wh_name exists under @h_parent. | |
65 | * @try_sio specifies the necessary of super-io. | |
66 | */ | |
67 | int au_wh_test(struct dentry *h_parent, struct qstr *wh_name, int try_sio) | |
68 | { | |
69 | int err; | |
70 | struct dentry *wh_dentry; | |
71 | ||
72 | if (!try_sio) | |
73 | wh_dentry = vfsub_lkup_one(wh_name, h_parent); | |
74 | else | |
75 | wh_dentry = au_sio_lkup_one(wh_name, h_parent); | |
76 | err = PTR_ERR(wh_dentry); | |
77 | if (IS_ERR(wh_dentry)) { | |
78 | if (err == -ENAMETOOLONG) | |
79 | err = 0; | |
80 | goto out; | |
81 | } | |
82 | ||
83 | err = 0; | |
84 | if (d_is_negative(wh_dentry)) | |
85 | goto out_wh; /* success */ | |
86 | ||
87 | err = 1; | |
88 | if (d_is_reg(wh_dentry)) | |
89 | goto out_wh; /* success */ | |
90 | ||
91 | err = -EIO; | |
92 | AuIOErr("%pd Invalid whiteout entry type 0%o.\n", | |
93 | wh_dentry, d_inode(wh_dentry)->i_mode); | |
94 | ||
95 | out_wh: | |
96 | dput(wh_dentry); | |
97 | out: | |
98 | return err; | |
99 | } | |
100 | ||
101 | /* | |
102 | * test if the @h_dentry sets opaque or not. | |
103 | */ | |
104 | int au_diropq_test(struct dentry *h_dentry) | |
105 | { | |
106 | int err; | |
107 | struct inode *h_dir; | |
108 | ||
109 | h_dir = d_inode(h_dentry); | |
110 | err = au_wh_test(h_dentry, &diropq_name, | |
111 | au_test_h_perm_sio(h_dir, MAY_EXEC)); | |
112 | return err; | |
113 | } | |
114 | ||
115 | /* | |
116 | * returns a negative dentry whose name is unique and temporary. | |
117 | */ | |
118 | struct dentry *au_whtmp_lkup(struct dentry *h_parent, struct au_branch *br, | |
119 | struct qstr *prefix) | |
120 | { | |
121 | struct dentry *dentry; | |
122 | int i; | |
123 | char defname[NAME_MAX - AUFS_MAX_NAMELEN + DNAME_INLINE_LEN + 1], | |
124 | *name, *p; | |
125 | /* strict atomic_t is unnecessary here */ | |
126 | static unsigned short cnt; | |
127 | struct qstr qs; | |
128 | ||
129 | BUILD_BUG_ON(sizeof(cnt) * 2 > AUFS_WH_TMP_LEN); | |
130 | ||
131 | name = defname; | |
132 | qs.len = sizeof(defname) - DNAME_INLINE_LEN + prefix->len - 1; | |
133 | if (unlikely(prefix->len > DNAME_INLINE_LEN)) { | |
134 | dentry = ERR_PTR(-ENAMETOOLONG); | |
135 | if (unlikely(qs.len > NAME_MAX)) | |
136 | goto out; | |
137 | dentry = ERR_PTR(-ENOMEM); | |
138 | name = kmalloc(qs.len + 1, GFP_NOFS); | |
139 | if (unlikely(!name)) | |
140 | goto out; | |
141 | } | |
142 | ||
143 | /* doubly whiteout-ed */ | |
144 | memcpy(name, AUFS_WH_PFX AUFS_WH_PFX, AUFS_WH_PFX_LEN * 2); | |
145 | p = name + AUFS_WH_PFX_LEN * 2; | |
146 | memcpy(p, prefix->name, prefix->len); | |
147 | p += prefix->len; | |
148 | *p++ = '.'; | |
149 | AuDebugOn(name + qs.len + 1 - p <= AUFS_WH_TMP_LEN); | |
150 | ||
151 | qs.name = name; | |
152 | for (i = 0; i < 3; i++) { | |
153 | sprintf(p, "%.*x", AUFS_WH_TMP_LEN, cnt++); | |
154 | dentry = au_sio_lkup_one(&qs, h_parent); | |
155 | if (IS_ERR(dentry) || d_is_negative(dentry)) | |
156 | goto out_name; | |
157 | dput(dentry); | |
158 | } | |
159 | /* pr_warn("could not get random name\n"); */ | |
160 | dentry = ERR_PTR(-EEXIST); | |
161 | AuDbg("%.*s\n", AuLNPair(&qs)); | |
162 | BUG(); | |
163 | ||
164 | out_name: | |
165 | if (name != defname) | |
166 | au_kfree_try_rcu(name); | |
167 | out: | |
168 | AuTraceErrPtr(dentry); | |
169 | return dentry; | |
170 | } | |
171 | ||
172 | /* | |
173 | * rename the @h_dentry on @br to the whiteouted temporary name. | |
174 | */ | |
175 | int au_whtmp_ren(struct dentry *h_dentry, struct au_branch *br) | |
176 | { | |
177 | int err; | |
178 | struct path h_path = { | |
179 | .mnt = au_br_mnt(br) | |
180 | }; | |
181 | struct inode *h_dir, *delegated; | |
182 | struct dentry *h_parent; | |
183 | ||
184 | h_parent = h_dentry->d_parent; /* dir inode is locked */ | |
185 | h_dir = d_inode(h_parent); | |
186 | IMustLock(h_dir); | |
187 | ||
188 | h_path.dentry = au_whtmp_lkup(h_parent, br, &h_dentry->d_name); | |
189 | err = PTR_ERR(h_path.dentry); | |
190 | if (IS_ERR(h_path.dentry)) | |
191 | goto out; | |
192 | ||
193 | /* under the same dir, no need to lock_rename() */ | |
194 | delegated = NULL; | |
195 | err = vfsub_rename(h_dir, h_dentry, h_dir, &h_path, &delegated, | |
196 | /*flags*/0); | |
197 | AuTraceErr(err); | |
198 | if (unlikely(err == -EWOULDBLOCK)) { | |
199 | pr_warn("cannot retry for NFSv4 delegation" | |
200 | " for an internal rename\n"); | |
201 | iput(delegated); | |
202 | } | |
203 | dput(h_path.dentry); | |
204 | ||
205 | out: | |
206 | AuTraceErr(err); | |
207 | return err; | |
208 | } | |
209 | ||
210 | /* ---------------------------------------------------------------------- */ | |
211 | /* | |
212 | * functions for removing a whiteout | |
213 | */ | |
214 | ||
215 | static int do_unlink_wh(struct inode *h_dir, struct path *h_path) | |
216 | { | |
217 | int err, force; | |
218 | struct inode *delegated; | |
219 | ||
220 | /* | |
221 | * forces superio when the dir has a sticky bit. | |
222 | * this may be a violation of unix fs semantics. | |
223 | */ | |
224 | force = (h_dir->i_mode & S_ISVTX) | |
225 | && !uid_eq(current_fsuid(), d_inode(h_path->dentry)->i_uid); | |
226 | delegated = NULL; | |
227 | err = vfsub_unlink(h_dir, h_path, &delegated, force); | |
228 | if (unlikely(err == -EWOULDBLOCK)) { | |
229 | pr_warn("cannot retry for NFSv4 delegation" | |
230 | " for an internal unlink\n"); | |
231 | iput(delegated); | |
232 | } | |
233 | return err; | |
234 | } | |
235 | ||
236 | int au_wh_unlink_dentry(struct inode *h_dir, struct path *h_path, | |
237 | struct dentry *dentry) | |
238 | { | |
239 | int err; | |
240 | ||
241 | err = do_unlink_wh(h_dir, h_path); | |
242 | if (!err && dentry) | |
243 | au_set_dbwh(dentry, -1); | |
244 | ||
245 | return err; | |
246 | } | |
247 | ||
248 | static int unlink_wh_name(struct dentry *h_parent, struct qstr *wh, | |
249 | struct au_branch *br) | |
250 | { | |
251 | int err; | |
252 | struct path h_path = { | |
253 | .mnt = au_br_mnt(br) | |
254 | }; | |
255 | ||
256 | err = 0; | |
257 | h_path.dentry = vfsub_lkup_one(wh, h_parent); | |
258 | if (IS_ERR(h_path.dentry)) | |
259 | err = PTR_ERR(h_path.dentry); | |
260 | else { | |
261 | if (d_is_reg(h_path.dentry)) | |
262 | err = do_unlink_wh(d_inode(h_parent), &h_path); | |
263 | dput(h_path.dentry); | |
264 | } | |
265 | ||
266 | return err; | |
267 | } | |
268 | ||
269 | /* ---------------------------------------------------------------------- */ | |
270 | /* | |
271 | * initialize/clean whiteout for a branch | |
272 | */ | |
273 | ||
274 | static void au_wh_clean(struct inode *h_dir, struct path *whpath, | |
275 | const int isdir) | |
276 | { | |
277 | int err; | |
278 | struct inode *delegated; | |
279 | ||
280 | if (d_is_negative(whpath->dentry)) | |
281 | return; | |
282 | ||
283 | if (isdir) | |
284 | err = vfsub_rmdir(h_dir, whpath); | |
285 | else { | |
286 | delegated = NULL; | |
287 | err = vfsub_unlink(h_dir, whpath, &delegated, /*force*/0); | |
288 | if (unlikely(err == -EWOULDBLOCK)) { | |
289 | pr_warn("cannot retry for NFSv4 delegation" | |
290 | " for an internal unlink\n"); | |
291 | iput(delegated); | |
292 | } | |
293 | } | |
294 | if (unlikely(err)) | |
295 | pr_warn("failed removing %pd (%d), ignored.\n", | |
296 | whpath->dentry, err); | |
297 | } | |
298 | ||
299 | static int test_linkable(struct dentry *h_root) | |
300 | { | |
301 | struct inode *h_dir = d_inode(h_root); | |
302 | ||
303 | if (h_dir->i_op->link) | |
304 | return 0; | |
305 | ||
306 | pr_err("%pd (%s) doesn't support link(2), use noplink and rw+nolwh\n", | |
307 | h_root, au_sbtype(h_root->d_sb)); | |
e4a3f096 | 308 | return -ENOSYS; /* the branch doesn't have its ->link() */ |
a3a49a17 SF |
309 | } |
310 | ||
311 | /* todo: should this mkdir be done in /sbin/mount.aufs helper? */ | |
312 | static int au_whdir(struct inode *h_dir, struct path *path) | |
313 | { | |
314 | int err; | |
315 | ||
316 | err = -EEXIST; | |
317 | if (d_is_negative(path->dentry)) { | |
318 | int mode = 0700; | |
319 | ||
320 | if (au_test_nfs(path->dentry->d_sb)) | |
321 | mode |= 0111; | |
322 | err = vfsub_mkdir(h_dir, path, mode); | |
323 | } else if (d_is_dir(path->dentry)) | |
324 | err = 0; | |
325 | else | |
326 | pr_err("unknown %pd exists\n", path->dentry); | |
327 | ||
328 | return err; | |
329 | } | |
330 | ||
331 | struct au_wh_base { | |
332 | const struct qstr *name; | |
333 | struct dentry *dentry; | |
334 | }; | |
335 | ||
336 | static void au_wh_init_ro(struct inode *h_dir, struct au_wh_base base[], | |
337 | struct path *h_path) | |
338 | { | |
339 | h_path->dentry = base[AuBrWh_BASE].dentry; | |
340 | au_wh_clean(h_dir, h_path, /*isdir*/0); | |
341 | h_path->dentry = base[AuBrWh_PLINK].dentry; | |
342 | au_wh_clean(h_dir, h_path, /*isdir*/1); | |
343 | h_path->dentry = base[AuBrWh_ORPH].dentry; | |
344 | au_wh_clean(h_dir, h_path, /*isdir*/1); | |
345 | } | |
346 | ||
347 | /* | |
348 | * returns tri-state, | |
349 | * minus: error, caller should print the message | |
350 | * zero: success | |
351 | * plus: error, caller should NOT print the message | |
352 | */ | |
353 | static int au_wh_init_rw_nolink(struct dentry *h_root, struct au_wbr *wbr, | |
354 | int do_plink, struct au_wh_base base[], | |
355 | struct path *h_path) | |
356 | { | |
357 | int err; | |
358 | struct inode *h_dir; | |
359 | ||
360 | h_dir = d_inode(h_root); | |
361 | h_path->dentry = base[AuBrWh_BASE].dentry; | |
362 | au_wh_clean(h_dir, h_path, /*isdir*/0); | |
363 | h_path->dentry = base[AuBrWh_PLINK].dentry; | |
364 | if (do_plink) { | |
365 | err = test_linkable(h_root); | |
366 | if (unlikely(err)) { | |
367 | err = 1; | |
368 | goto out; | |
369 | } | |
370 | ||
371 | err = au_whdir(h_dir, h_path); | |
372 | if (unlikely(err)) | |
373 | goto out; | |
374 | wbr->wbr_plink = dget(base[AuBrWh_PLINK].dentry); | |
375 | } else | |
376 | au_wh_clean(h_dir, h_path, /*isdir*/1); | |
377 | h_path->dentry = base[AuBrWh_ORPH].dentry; | |
378 | err = au_whdir(h_dir, h_path); | |
379 | if (unlikely(err)) | |
380 | goto out; | |
381 | wbr->wbr_orph = dget(base[AuBrWh_ORPH].dentry); | |
382 | ||
383 | out: | |
384 | return err; | |
385 | } | |
386 | ||
387 | /* | |
388 | * for the moment, aufs supports the branch filesystem which does not support | |
389 | * link(2). testing on FAT which does not support i_op->setattr() fully either, | |
390 | * copyup failed. finally, such filesystem will not be used as the writable | |
391 | * branch. | |
392 | * | |
393 | * returns tri-state, see above. | |
394 | */ | |
395 | static int au_wh_init_rw(struct dentry *h_root, struct au_wbr *wbr, | |
396 | int do_plink, struct au_wh_base base[], | |
397 | struct path *h_path) | |
398 | { | |
399 | int err; | |
400 | struct inode *h_dir; | |
401 | ||
402 | WbrWhMustWriteLock(wbr); | |
403 | ||
404 | err = test_linkable(h_root); | |
405 | if (unlikely(err)) { | |
406 | err = 1; | |
407 | goto out; | |
408 | } | |
409 | ||
410 | /* | |
411 | * todo: should this create be done in /sbin/mount.aufs helper? | |
412 | */ | |
413 | err = -EEXIST; | |
414 | h_dir = d_inode(h_root); | |
415 | if (d_is_negative(base[AuBrWh_BASE].dentry)) { | |
416 | h_path->dentry = base[AuBrWh_BASE].dentry; | |
417 | err = vfsub_create(h_dir, h_path, WH_MASK, /*want_excl*/true); | |
418 | } else if (d_is_reg(base[AuBrWh_BASE].dentry)) | |
419 | err = 0; | |
420 | else | |
421 | pr_err("unknown %pd2 exists\n", base[AuBrWh_BASE].dentry); | |
422 | if (unlikely(err)) | |
423 | goto out; | |
424 | ||
425 | h_path->dentry = base[AuBrWh_PLINK].dentry; | |
426 | if (do_plink) { | |
427 | err = au_whdir(h_dir, h_path); | |
428 | if (unlikely(err)) | |
429 | goto out; | |
430 | wbr->wbr_plink = dget(base[AuBrWh_PLINK].dentry); | |
431 | } else | |
432 | au_wh_clean(h_dir, h_path, /*isdir*/1); | |
433 | wbr->wbr_whbase = dget(base[AuBrWh_BASE].dentry); | |
434 | ||
435 | h_path->dentry = base[AuBrWh_ORPH].dentry; | |
436 | err = au_whdir(h_dir, h_path); | |
437 | if (unlikely(err)) | |
438 | goto out; | |
439 | wbr->wbr_orph = dget(base[AuBrWh_ORPH].dentry); | |
440 | ||
441 | out: | |
442 | return err; | |
443 | } | |
444 | ||
445 | /* | |
446 | * initialize the whiteout base file/dir for @br. | |
447 | */ | |
448 | int au_wh_init(struct au_branch *br, struct super_block *sb) | |
449 | { | |
450 | int err, i; | |
451 | const unsigned char do_plink | |
452 | = !!au_opt_test(au_mntflags(sb), PLINK); | |
453 | struct inode *h_dir; | |
454 | struct path path = br->br_path; | |
455 | struct dentry *h_root = path.dentry; | |
456 | struct au_wbr *wbr = br->br_wbr; | |
457 | static const struct qstr base_name[] = { | |
458 | [AuBrWh_BASE] = QSTR_INIT(AUFS_BASE_NAME, | |
459 | sizeof(AUFS_BASE_NAME) - 1), | |
460 | [AuBrWh_PLINK] = QSTR_INIT(AUFS_PLINKDIR_NAME, | |
461 | sizeof(AUFS_PLINKDIR_NAME) - 1), | |
462 | [AuBrWh_ORPH] = QSTR_INIT(AUFS_ORPHDIR_NAME, | |
463 | sizeof(AUFS_ORPHDIR_NAME) - 1) | |
464 | }; | |
465 | struct au_wh_base base[] = { | |
466 | [AuBrWh_BASE] = { | |
467 | .name = base_name + AuBrWh_BASE, | |
468 | .dentry = NULL | |
469 | }, | |
470 | [AuBrWh_PLINK] = { | |
471 | .name = base_name + AuBrWh_PLINK, | |
472 | .dentry = NULL | |
473 | }, | |
474 | [AuBrWh_ORPH] = { | |
475 | .name = base_name + AuBrWh_ORPH, | |
476 | .dentry = NULL | |
477 | } | |
478 | }; | |
479 | ||
480 | if (wbr) | |
481 | WbrWhMustWriteLock(wbr); | |
482 | ||
483 | for (i = 0; i < AuBrWh_Last; i++) { | |
484 | /* doubly whiteouted */ | |
485 | struct dentry *d; | |
486 | ||
487 | d = au_wh_lkup(h_root, (void *)base[i].name, br); | |
488 | err = PTR_ERR(d); | |
489 | if (IS_ERR(d)) | |
490 | goto out; | |
491 | ||
492 | base[i].dentry = d; | |
493 | AuDebugOn(wbr | |
494 | && wbr->wbr_wh[i] | |
495 | && wbr->wbr_wh[i] != base[i].dentry); | |
496 | } | |
497 | ||
498 | if (wbr) | |
499 | for (i = 0; i < AuBrWh_Last; i++) { | |
500 | dput(wbr->wbr_wh[i]); | |
501 | wbr->wbr_wh[i] = NULL; | |
502 | } | |
503 | ||
504 | err = 0; | |
505 | if (!au_br_writable(br->br_perm)) { | |
506 | h_dir = d_inode(h_root); | |
507 | au_wh_init_ro(h_dir, base, &path); | |
508 | } else if (!au_br_wh_linkable(br->br_perm)) { | |
509 | err = au_wh_init_rw_nolink(h_root, wbr, do_plink, base, &path); | |
510 | if (err > 0) | |
511 | goto out; | |
512 | else if (err) | |
513 | goto out_err; | |
514 | } else { | |
515 | err = au_wh_init_rw(h_root, wbr, do_plink, base, &path); | |
516 | if (err > 0) | |
517 | goto out; | |
518 | else if (err) | |
519 | goto out_err; | |
520 | } | |
521 | goto out; /* success */ | |
522 | ||
523 | out_err: | |
524 | pr_err("an error(%d) on the writable branch %pd(%s)\n", | |
525 | err, h_root, au_sbtype(h_root->d_sb)); | |
526 | out: | |
527 | for (i = 0; i < AuBrWh_Last; i++) | |
528 | dput(base[i].dentry); | |
529 | return err; | |
530 | } | |
531 | ||
532 | /* ---------------------------------------------------------------------- */ | |
533 | /* | |
534 | * whiteouts are all hard-linked usually. | |
535 | * when its link count reaches a ceiling, we create a new whiteout base | |
536 | * asynchronously. | |
537 | */ | |
538 | ||
539 | struct reinit_br_wh { | |
540 | struct super_block *sb; | |
541 | struct au_branch *br; | |
542 | }; | |
543 | ||
544 | static void reinit_br_wh(void *arg) | |
545 | { | |
546 | int err; | |
547 | aufs_bindex_t bindex; | |
548 | struct path h_path; | |
549 | struct reinit_br_wh *a = arg; | |
550 | struct au_wbr *wbr; | |
551 | struct inode *dir, *delegated; | |
552 | struct dentry *h_root; | |
553 | struct au_hinode *hdir; | |
554 | ||
555 | err = 0; | |
556 | wbr = a->br->br_wbr; | |
557 | /* big aufs lock */ | |
558 | si_noflush_write_lock(a->sb); | |
559 | if (!au_br_writable(a->br->br_perm)) | |
560 | goto out; | |
561 | bindex = au_br_index(a->sb, a->br->br_id); | |
562 | if (unlikely(bindex < 0)) | |
563 | goto out; | |
564 | ||
565 | di_read_lock_parent(a->sb->s_root, AuLock_IR); | |
566 | dir = d_inode(a->sb->s_root); | |
567 | hdir = au_hi(dir, bindex); | |
568 | h_root = au_h_dptr(a->sb->s_root, bindex); | |
569 | AuDebugOn(h_root != au_br_dentry(a->br)); | |
570 | ||
571 | au_hn_inode_lock_nested(hdir, AuLsc_I_PARENT); | |
572 | wbr_wh_write_lock(wbr); | |
573 | err = au_h_verify(wbr->wbr_whbase, au_opt_udba(a->sb), hdir->hi_inode, | |
574 | h_root, a->br); | |
575 | if (!err) { | |
576 | h_path.dentry = wbr->wbr_whbase; | |
577 | h_path.mnt = au_br_mnt(a->br); | |
578 | delegated = NULL; | |
579 | err = vfsub_unlink(hdir->hi_inode, &h_path, &delegated, | |
580 | /*force*/0); | |
581 | if (unlikely(err == -EWOULDBLOCK)) { | |
582 | pr_warn("cannot retry for NFSv4 delegation" | |
583 | " for an internal unlink\n"); | |
584 | iput(delegated); | |
585 | } | |
586 | } else { | |
587 | pr_warn("%pd is moved, ignored\n", wbr->wbr_whbase); | |
588 | err = 0; | |
589 | } | |
590 | dput(wbr->wbr_whbase); | |
591 | wbr->wbr_whbase = NULL; | |
592 | if (!err) | |
593 | err = au_wh_init(a->br, a->sb); | |
594 | wbr_wh_write_unlock(wbr); | |
595 | au_hn_inode_unlock(hdir); | |
596 | di_read_unlock(a->sb->s_root, AuLock_IR); | |
597 | if (!err) | |
598 | au_fhsm_wrote(a->sb, bindex, /*force*/0); | |
599 | ||
600 | out: | |
601 | if (wbr) | |
602 | atomic_dec(&wbr->wbr_wh_running); | |
603 | au_lcnt_dec(&a->br->br_count); | |
604 | si_write_unlock(a->sb); | |
605 | au_nwt_done(&au_sbi(a->sb)->si_nowait); | |
606 | au_kfree_rcu(a); | |
607 | if (unlikely(err)) | |
608 | AuIOErr("err %d\n", err); | |
609 | } | |
610 | ||
611 | static void kick_reinit_br_wh(struct super_block *sb, struct au_branch *br) | |
612 | { | |
613 | int do_dec, wkq_err; | |
614 | struct reinit_br_wh *arg; | |
615 | ||
616 | do_dec = 1; | |
617 | if (atomic_inc_return(&br->br_wbr->wbr_wh_running) != 1) | |
618 | goto out; | |
619 | ||
620 | /* ignore ENOMEM */ | |
621 | arg = kmalloc(sizeof(*arg), GFP_NOFS); | |
622 | if (arg) { | |
623 | /* | |
624 | * dec(wh_running), kfree(arg) and dec(br_count) | |
625 | * in reinit function | |
626 | */ | |
627 | arg->sb = sb; | |
628 | arg->br = br; | |
629 | au_lcnt_inc(&br->br_count); | |
630 | wkq_err = au_wkq_nowait(reinit_br_wh, arg, sb, /*flags*/0); | |
631 | if (unlikely(wkq_err)) { | |
632 | atomic_dec(&br->br_wbr->wbr_wh_running); | |
633 | au_lcnt_dec(&br->br_count); | |
634 | au_kfree_rcu(arg); | |
635 | } | |
636 | do_dec = 0; | |
637 | } | |
638 | ||
639 | out: | |
640 | if (do_dec) | |
641 | atomic_dec(&br->br_wbr->wbr_wh_running); | |
642 | } | |
643 | ||
644 | /* ---------------------------------------------------------------------- */ | |
645 | ||
646 | /* | |
647 | * create the whiteout @wh. | |
648 | */ | |
649 | static int link_or_create_wh(struct super_block *sb, aufs_bindex_t bindex, | |
650 | struct dentry *wh) | |
651 | { | |
652 | int err; | |
653 | struct path h_path = { | |
654 | .dentry = wh | |
655 | }; | |
656 | struct au_branch *br; | |
657 | struct au_wbr *wbr; | |
658 | struct dentry *h_parent; | |
659 | struct inode *h_dir, *delegated; | |
660 | ||
661 | h_parent = wh->d_parent; /* dir inode is locked */ | |
662 | h_dir = d_inode(h_parent); | |
663 | IMustLock(h_dir); | |
664 | ||
665 | br = au_sbr(sb, bindex); | |
666 | h_path.mnt = au_br_mnt(br); | |
667 | wbr = br->br_wbr; | |
668 | wbr_wh_read_lock(wbr); | |
669 | if (wbr->wbr_whbase) { | |
670 | delegated = NULL; | |
671 | err = vfsub_link(wbr->wbr_whbase, h_dir, &h_path, &delegated); | |
672 | if (unlikely(err == -EWOULDBLOCK)) { | |
673 | pr_warn("cannot retry for NFSv4 delegation" | |
674 | " for an internal link\n"); | |
675 | iput(delegated); | |
676 | } | |
677 | if (!err || err != -EMLINK) | |
678 | goto out; | |
679 | ||
680 | /* link count full. re-initialize br_whbase. */ | |
681 | kick_reinit_br_wh(sb, br); | |
682 | } | |
683 | ||
684 | /* return this error in this context */ | |
685 | err = vfsub_create(h_dir, &h_path, WH_MASK, /*want_excl*/true); | |
686 | if (!err) | |
687 | au_fhsm_wrote(sb, bindex, /*force*/0); | |
688 | ||
689 | out: | |
690 | wbr_wh_read_unlock(wbr); | |
691 | return err; | |
692 | } | |
693 | ||
694 | /* ---------------------------------------------------------------------- */ | |
695 | ||
696 | /* | |
697 | * create or remove the diropq. | |
698 | */ | |
699 | static struct dentry *do_diropq(struct dentry *dentry, aufs_bindex_t bindex, | |
700 | unsigned int flags) | |
701 | { | |
702 | struct dentry *opq_dentry, *h_dentry; | |
703 | struct super_block *sb; | |
704 | struct au_branch *br; | |
705 | int err; | |
706 | ||
707 | sb = dentry->d_sb; | |
708 | br = au_sbr(sb, bindex); | |
709 | h_dentry = au_h_dptr(dentry, bindex); | |
710 | opq_dentry = vfsub_lkup_one(&diropq_name, h_dentry); | |
711 | if (IS_ERR(opq_dentry)) | |
712 | goto out; | |
713 | ||
714 | if (au_ftest_diropq(flags, CREATE)) { | |
715 | err = link_or_create_wh(sb, bindex, opq_dentry); | |
716 | if (!err) { | |
717 | au_set_dbdiropq(dentry, bindex); | |
718 | goto out; /* success */ | |
719 | } | |
720 | } else { | |
721 | struct path tmp = { | |
722 | .dentry = opq_dentry, | |
723 | .mnt = au_br_mnt(br) | |
724 | }; | |
725 | err = do_unlink_wh(au_h_iptr(d_inode(dentry), bindex), &tmp); | |
726 | if (!err) | |
727 | au_set_dbdiropq(dentry, -1); | |
728 | } | |
729 | dput(opq_dentry); | |
730 | opq_dentry = ERR_PTR(err); | |
731 | ||
732 | out: | |
733 | return opq_dentry; | |
734 | } | |
735 | ||
736 | struct do_diropq_args { | |
737 | struct dentry **errp; | |
738 | struct dentry *dentry; | |
739 | aufs_bindex_t bindex; | |
740 | unsigned int flags; | |
741 | }; | |
742 | ||
743 | static void call_do_diropq(void *args) | |
744 | { | |
745 | struct do_diropq_args *a = args; | |
746 | *a->errp = do_diropq(a->dentry, a->bindex, a->flags); | |
747 | } | |
748 | ||
749 | struct dentry *au_diropq_sio(struct dentry *dentry, aufs_bindex_t bindex, | |
750 | unsigned int flags) | |
751 | { | |
752 | struct dentry *diropq, *h_dentry; | |
753 | ||
754 | h_dentry = au_h_dptr(dentry, bindex); | |
755 | if (!au_test_h_perm_sio(d_inode(h_dentry), MAY_EXEC | MAY_WRITE)) | |
756 | diropq = do_diropq(dentry, bindex, flags); | |
757 | else { | |
758 | int wkq_err; | |
759 | struct do_diropq_args args = { | |
760 | .errp = &diropq, | |
761 | .dentry = dentry, | |
762 | .bindex = bindex, | |
763 | .flags = flags | |
764 | }; | |
765 | ||
766 | wkq_err = au_wkq_wait(call_do_diropq, &args); | |
767 | if (unlikely(wkq_err)) | |
768 | diropq = ERR_PTR(wkq_err); | |
769 | } | |
770 | ||
771 | return diropq; | |
772 | } | |
773 | ||
774 | /* ---------------------------------------------------------------------- */ | |
775 | ||
776 | /* | |
777 | * lookup whiteout dentry. | |
778 | * @h_parent: lower parent dentry which must exist and be locked | |
779 | * @base_name: name of dentry which will be whiteouted | |
780 | * returns dentry for whiteout. | |
781 | */ | |
782 | struct dentry *au_wh_lkup(struct dentry *h_parent, struct qstr *base_name, | |
783 | struct au_branch *br) | |
784 | { | |
785 | int err; | |
786 | struct qstr wh_name; | |
787 | struct dentry *wh_dentry; | |
788 | ||
789 | err = au_wh_name_alloc(&wh_name, base_name); | |
790 | wh_dentry = ERR_PTR(err); | |
791 | if (!err) { | |
792 | wh_dentry = vfsub_lkup_one(&wh_name, h_parent); | |
793 | au_kfree_try_rcu(wh_name.name); | |
794 | } | |
795 | return wh_dentry; | |
796 | } | |
797 | ||
798 | /* | |
799 | * link/create a whiteout for @dentry on @bindex. | |
800 | */ | |
801 | struct dentry *au_wh_create(struct dentry *dentry, aufs_bindex_t bindex, | |
802 | struct dentry *h_parent) | |
803 | { | |
804 | struct dentry *wh_dentry; | |
805 | struct super_block *sb; | |
806 | int err; | |
807 | ||
808 | sb = dentry->d_sb; | |
809 | wh_dentry = au_wh_lkup(h_parent, &dentry->d_name, au_sbr(sb, bindex)); | |
810 | if (!IS_ERR(wh_dentry) && d_is_negative(wh_dentry)) { | |
811 | err = link_or_create_wh(sb, bindex, wh_dentry); | |
812 | if (!err) { | |
813 | au_set_dbwh(dentry, bindex); | |
814 | au_fhsm_wrote(sb, bindex, /*force*/0); | |
815 | } else { | |
816 | dput(wh_dentry); | |
817 | wh_dentry = ERR_PTR(err); | |
818 | } | |
819 | } | |
820 | ||
821 | return wh_dentry; | |
822 | } | |
823 | ||
824 | /* ---------------------------------------------------------------------- */ | |
825 | ||
826 | /* Delete all whiteouts in this directory on branch bindex. */ | |
827 | static int del_wh_children(struct dentry *h_dentry, struct au_nhash *whlist, | |
828 | aufs_bindex_t bindex, struct au_branch *br) | |
829 | { | |
830 | int err; | |
831 | unsigned long ul, n; | |
832 | struct qstr wh_name; | |
833 | char *p; | |
834 | struct hlist_head *head; | |
835 | struct au_vdir_wh *pos; | |
836 | struct au_vdir_destr *str; | |
837 | ||
838 | err = -ENOMEM; | |
839 | p = (void *)__get_free_page(GFP_NOFS); | |
840 | wh_name.name = p; | |
841 | if (unlikely(!wh_name.name)) | |
842 | goto out; | |
843 | ||
844 | err = 0; | |
845 | memcpy(p, AUFS_WH_PFX, AUFS_WH_PFX_LEN); | |
846 | p += AUFS_WH_PFX_LEN; | |
847 | n = whlist->nh_num; | |
848 | head = whlist->nh_head; | |
849 | for (ul = 0; !err && ul < n; ul++, head++) { | |
850 | hlist_for_each_entry(pos, head, wh_hash) { | |
851 | if (pos->wh_bindex != bindex) | |
852 | continue; | |
853 | ||
854 | str = &pos->wh_str; | |
855 | if (str->len + AUFS_WH_PFX_LEN <= PATH_MAX) { | |
856 | memcpy(p, str->name, str->len); | |
857 | wh_name.len = AUFS_WH_PFX_LEN + str->len; | |
858 | err = unlink_wh_name(h_dentry, &wh_name, br); | |
859 | if (!err) | |
860 | continue; | |
861 | break; | |
862 | } | |
863 | AuIOErr("whiteout name too long %.*s\n", | |
864 | str->len, str->name); | |
865 | err = -EIO; | |
866 | break; | |
867 | } | |
868 | } | |
869 | free_page((unsigned long)wh_name.name); | |
870 | ||
871 | out: | |
872 | return err; | |
873 | } | |
874 | ||
875 | struct del_wh_children_args { | |
876 | int *errp; | |
877 | struct dentry *h_dentry; | |
878 | struct au_nhash *whlist; | |
879 | aufs_bindex_t bindex; | |
880 | struct au_branch *br; | |
881 | }; | |
882 | ||
883 | static void call_del_wh_children(void *args) | |
884 | { | |
885 | struct del_wh_children_args *a = args; | |
886 | *a->errp = del_wh_children(a->h_dentry, a->whlist, a->bindex, a->br); | |
887 | } | |
888 | ||
889 | /* ---------------------------------------------------------------------- */ | |
890 | ||
891 | struct au_whtmp_rmdir *au_whtmp_rmdir_alloc(struct super_block *sb, gfp_t gfp) | |
892 | { | |
893 | struct au_whtmp_rmdir *whtmp; | |
894 | int err; | |
895 | unsigned int rdhash; | |
896 | ||
897 | SiMustAnyLock(sb); | |
898 | ||
899 | whtmp = kzalloc(sizeof(*whtmp), gfp); | |
900 | if (unlikely(!whtmp)) { | |
901 | whtmp = ERR_PTR(-ENOMEM); | |
902 | goto out; | |
903 | } | |
904 | ||
905 | /* no estimation for dir size */ | |
906 | rdhash = au_sbi(sb)->si_rdhash; | |
907 | if (!rdhash) | |
908 | rdhash = AUFS_RDHASH_DEF; | |
909 | err = au_nhash_alloc(&whtmp->whlist, rdhash, gfp); | |
910 | if (unlikely(err)) { | |
911 | au_kfree_rcu(whtmp); | |
912 | whtmp = ERR_PTR(err); | |
913 | } | |
914 | ||
915 | out: | |
916 | return whtmp; | |
917 | } | |
918 | ||
919 | void au_whtmp_rmdir_free(struct au_whtmp_rmdir *whtmp) | |
920 | { | |
921 | if (whtmp->br) | |
922 | au_lcnt_dec(&whtmp->br->br_count); | |
923 | dput(whtmp->wh_dentry); | |
924 | iput(whtmp->dir); | |
925 | au_nhash_wh_free(&whtmp->whlist); | |
926 | au_kfree_rcu(whtmp); | |
927 | } | |
928 | ||
929 | /* | |
930 | * rmdir the whiteouted temporary named dir @h_dentry. | |
931 | * @whlist: whiteouted children. | |
932 | */ | |
933 | int au_whtmp_rmdir(struct inode *dir, aufs_bindex_t bindex, | |
934 | struct dentry *wh_dentry, struct au_nhash *whlist) | |
935 | { | |
936 | int err; | |
937 | unsigned int h_nlink; | |
938 | struct path h_tmp; | |
939 | struct inode *wh_inode, *h_dir; | |
940 | struct au_branch *br; | |
941 | ||
942 | h_dir = d_inode(wh_dentry->d_parent); /* dir inode is locked */ | |
943 | IMustLock(h_dir); | |
944 | ||
945 | br = au_sbr(dir->i_sb, bindex); | |
946 | wh_inode = d_inode(wh_dentry); | |
947 | inode_lock_nested(wh_inode, AuLsc_I_CHILD); | |
948 | ||
949 | /* | |
950 | * someone else might change some whiteouts while we were sleeping. | |
951 | * it means this whlist may have an obsoleted entry. | |
952 | */ | |
953 | if (!au_test_h_perm_sio(wh_inode, MAY_EXEC | MAY_WRITE)) | |
954 | err = del_wh_children(wh_dentry, whlist, bindex, br); | |
955 | else { | |
956 | int wkq_err; | |
957 | struct del_wh_children_args args = { | |
958 | .errp = &err, | |
959 | .h_dentry = wh_dentry, | |
960 | .whlist = whlist, | |
961 | .bindex = bindex, | |
962 | .br = br | |
963 | }; | |
964 | ||
965 | wkq_err = au_wkq_wait(call_del_wh_children, &args); | |
966 | if (unlikely(wkq_err)) | |
967 | err = wkq_err; | |
968 | } | |
969 | inode_unlock(wh_inode); | |
970 | ||
971 | if (!err) { | |
972 | h_tmp.dentry = wh_dentry; | |
973 | h_tmp.mnt = au_br_mnt(br); | |
974 | h_nlink = h_dir->i_nlink; | |
975 | err = vfsub_rmdir(h_dir, &h_tmp); | |
976 | /* some fs doesn't change the parent nlink in some cases */ | |
977 | h_nlink -= h_dir->i_nlink; | |
978 | } | |
979 | ||
980 | if (!err) { | |
981 | if (au_ibtop(dir) == bindex) { | |
982 | /* todo: dir->i_mutex is necessary */ | |
983 | au_cpup_attr_timesizes(dir); | |
984 | if (h_nlink) | |
985 | vfsub_drop_nlink(dir); | |
986 | } | |
987 | return 0; /* success */ | |
988 | } | |
989 | ||
990 | pr_warn("failed removing %pd(%d), ignored\n", wh_dentry, err); | |
991 | return err; | |
992 | } | |
993 | ||
994 | static void call_rmdir_whtmp(void *args) | |
995 | { | |
996 | int err; | |
997 | aufs_bindex_t bindex; | |
998 | struct au_whtmp_rmdir *a = args; | |
999 | struct super_block *sb; | |
1000 | struct dentry *h_parent; | |
1001 | struct inode *h_dir; | |
1002 | struct au_hinode *hdir; | |
1003 | ||
1004 | /* rmdir by nfsd may cause deadlock with this i_mutex */ | |
1005 | /* inode_lock(a->dir); */ | |
1006 | err = -EROFS; | |
1007 | sb = a->dir->i_sb; | |
1008 | si_read_lock(sb, !AuLock_FLUSH); | |
1009 | if (!au_br_writable(a->br->br_perm)) | |
1010 | goto out; | |
1011 | bindex = au_br_index(sb, a->br->br_id); | |
1012 | if (unlikely(bindex < 0)) | |
1013 | goto out; | |
1014 | ||
1015 | err = -EIO; | |
1016 | ii_write_lock_parent(a->dir); | |
1017 | h_parent = dget_parent(a->wh_dentry); | |
1018 | h_dir = d_inode(h_parent); | |
1019 | hdir = au_hi(a->dir, bindex); | |
1020 | err = vfsub_mnt_want_write(au_br_mnt(a->br)); | |
1021 | if (unlikely(err)) | |
1022 | goto out_mnt; | |
1023 | au_hn_inode_lock_nested(hdir, AuLsc_I_PARENT); | |
1024 | err = au_h_verify(a->wh_dentry, au_opt_udba(sb), h_dir, h_parent, | |
1025 | a->br); | |
1026 | if (!err) | |
1027 | err = au_whtmp_rmdir(a->dir, bindex, a->wh_dentry, &a->whlist); | |
1028 | au_hn_inode_unlock(hdir); | |
1029 | vfsub_mnt_drop_write(au_br_mnt(a->br)); | |
1030 | ||
1031 | out_mnt: | |
1032 | dput(h_parent); | |
1033 | ii_write_unlock(a->dir); | |
1034 | out: | |
1035 | /* inode_unlock(a->dir); */ | |
1036 | au_whtmp_rmdir_free(a); | |
1037 | si_read_unlock(sb); | |
1038 | au_nwt_done(&au_sbi(sb)->si_nowait); | |
1039 | if (unlikely(err)) | |
1040 | AuIOErr("err %d\n", err); | |
1041 | } | |
1042 | ||
1043 | void au_whtmp_kick_rmdir(struct inode *dir, aufs_bindex_t bindex, | |
1044 | struct dentry *wh_dentry, struct au_whtmp_rmdir *args) | |
1045 | { | |
1046 | int wkq_err; | |
1047 | struct super_block *sb; | |
1048 | ||
1049 | IMustLock(dir); | |
1050 | ||
1051 | /* all post-process will be done in do_rmdir_whtmp(). */ | |
1052 | sb = dir->i_sb; | |
1053 | args->dir = au_igrab(dir); | |
1054 | args->br = au_sbr(sb, bindex); | |
1055 | au_lcnt_inc(&args->br->br_count); | |
1056 | args->wh_dentry = dget(wh_dentry); | |
1057 | wkq_err = au_wkq_nowait(call_rmdir_whtmp, args, sb, /*flags*/0); | |
1058 | if (unlikely(wkq_err)) { | |
1059 | pr_warn("rmdir error %pd (%d), ignored\n", wh_dentry, wkq_err); | |
1060 | au_whtmp_rmdir_free(args); | |
1061 | } | |
1062 | } |