]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blob - fs/aufs/inode.c
NFSv4: Fix reboot recovery in copy offload
[mirror_ubuntu-zesty-kernel.git] / fs / aufs / inode.c
1 /*
2 * Copyright (C) 2005-2016 Junjiro R. Okajima
3 *
4 * This program, aufs is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18 /*
19 * inode functions
20 */
21
22 #include "aufs.h"
23
24 struct inode *au_igrab(struct inode *inode)
25 {
26 if (inode) {
27 AuDebugOn(!atomic_read(&inode->i_count));
28 ihold(inode);
29 }
30 return inode;
31 }
32
33 static void au_refresh_hinode_attr(struct inode *inode, int do_version)
34 {
35 au_cpup_attr_all(inode, /*force*/0);
36 au_update_iigen(inode, /*half*/1);
37 if (do_version)
38 inode->i_version++;
39 }
40
41 static int au_ii_refresh(struct inode *inode, int *update)
42 {
43 int err, e, nbr;
44 umode_t type;
45 aufs_bindex_t bindex, new_bindex;
46 struct super_block *sb;
47 struct au_iinfo *iinfo;
48 struct au_hinode *p, *q, tmp;
49
50 AuDebugOn(au_is_bad_inode(inode));
51 IiMustWriteLock(inode);
52
53 *update = 0;
54 sb = inode->i_sb;
55 nbr = au_sbbot(sb) + 1;
56 type = inode->i_mode & S_IFMT;
57 iinfo = au_ii(inode);
58 err = au_hinode_realloc(iinfo, nbr, /*may_shrink*/0);
59 if (unlikely(err))
60 goto out;
61
62 AuDebugOn(iinfo->ii_btop < 0);
63 p = au_hinode(iinfo, iinfo->ii_btop);
64 for (bindex = iinfo->ii_btop; bindex <= iinfo->ii_bbot;
65 bindex++, p++) {
66 if (!p->hi_inode)
67 continue;
68
69 AuDebugOn(type != (p->hi_inode->i_mode & S_IFMT));
70 new_bindex = au_br_index(sb, p->hi_id);
71 if (new_bindex == bindex)
72 continue;
73
74 if (new_bindex < 0) {
75 *update = 1;
76 au_hiput(p);
77 p->hi_inode = NULL;
78 continue;
79 }
80
81 if (new_bindex < iinfo->ii_btop)
82 iinfo->ii_btop = new_bindex;
83 if (iinfo->ii_bbot < new_bindex)
84 iinfo->ii_bbot = new_bindex;
85 /* swap two lower inode, and loop again */
86 q = au_hinode(iinfo, new_bindex);
87 tmp = *q;
88 *q = *p;
89 *p = tmp;
90 if (tmp.hi_inode) {
91 bindex--;
92 p--;
93 }
94 }
95 au_update_ibrange(inode, /*do_put_zero*/0);
96 au_hinode_realloc(iinfo, nbr, /*may_shrink*/1); /* harmless if err */
97 e = au_dy_irefresh(inode);
98 if (unlikely(e && !err))
99 err = e;
100
101 out:
102 AuTraceErr(err);
103 return err;
104 }
105
106 void au_refresh_iop(struct inode *inode, int force_getattr)
107 {
108 int type;
109 struct au_sbinfo *sbi = au_sbi(inode->i_sb);
110 const struct inode_operations *iop
111 = force_getattr ? aufs_iop : sbi->si_iop_array;
112
113 if (inode->i_op == iop)
114 return;
115
116 switch (inode->i_mode & S_IFMT) {
117 case S_IFDIR:
118 type = AuIop_DIR;
119 break;
120 case S_IFLNK:
121 type = AuIop_SYMLINK;
122 break;
123 default:
124 type = AuIop_OTHER;
125 break;
126 }
127
128 inode->i_op = iop + type;
129 /* unnecessary smp_wmb() */
130 }
131
132 int au_refresh_hinode_self(struct inode *inode)
133 {
134 int err, update;
135
136 err = au_ii_refresh(inode, &update);
137 if (!err)
138 au_refresh_hinode_attr(inode, update && S_ISDIR(inode->i_mode));
139
140 AuTraceErr(err);
141 return err;
142 }
143
144 int au_refresh_hinode(struct inode *inode, struct dentry *dentry)
145 {
146 int err, e, update;
147 unsigned int flags;
148 umode_t mode;
149 aufs_bindex_t bindex, bbot;
150 unsigned char isdir;
151 struct au_hinode *p;
152 struct au_iinfo *iinfo;
153
154 err = au_ii_refresh(inode, &update);
155 if (unlikely(err))
156 goto out;
157
158 update = 0;
159 iinfo = au_ii(inode);
160 p = au_hinode(iinfo, iinfo->ii_btop);
161 mode = (inode->i_mode & S_IFMT);
162 isdir = S_ISDIR(mode);
163 flags = au_hi_flags(inode, isdir);
164 bbot = au_dbbot(dentry);
165 for (bindex = au_dbtop(dentry); bindex <= bbot; bindex++) {
166 struct inode *h_i, *h_inode;
167 struct dentry *h_d;
168
169 h_d = au_h_dptr(dentry, bindex);
170 if (!h_d || d_is_negative(h_d))
171 continue;
172
173 h_inode = d_inode(h_d);
174 AuDebugOn(mode != (h_inode->i_mode & S_IFMT));
175 if (iinfo->ii_btop <= bindex && bindex <= iinfo->ii_bbot) {
176 h_i = au_h_iptr(inode, bindex);
177 if (h_i) {
178 if (h_i == h_inode)
179 continue;
180 err = -EIO;
181 break;
182 }
183 }
184 if (bindex < iinfo->ii_btop)
185 iinfo->ii_btop = bindex;
186 if (iinfo->ii_bbot < bindex)
187 iinfo->ii_bbot = bindex;
188 au_set_h_iptr(inode, bindex, au_igrab(h_inode), flags);
189 update = 1;
190 }
191 au_update_ibrange(inode, /*do_put_zero*/0);
192 e = au_dy_irefresh(inode);
193 if (unlikely(e && !err))
194 err = e;
195 if (!err)
196 au_refresh_hinode_attr(inode, update && isdir);
197
198 out:
199 AuTraceErr(err);
200 return err;
201 }
202
203 static int set_inode(struct inode *inode, struct dentry *dentry)
204 {
205 int err;
206 unsigned int flags;
207 umode_t mode;
208 aufs_bindex_t bindex, btop, btail;
209 unsigned char isdir;
210 struct dentry *h_dentry;
211 struct inode *h_inode;
212 struct au_iinfo *iinfo;
213 struct inode_operations *iop;
214
215 IiMustWriteLock(inode);
216
217 err = 0;
218 isdir = 0;
219 iop = au_sbi(inode->i_sb)->si_iop_array;
220 btop = au_dbtop(dentry);
221 h_dentry = au_h_dptr(dentry, btop);
222 h_inode = d_inode(h_dentry);
223 mode = h_inode->i_mode;
224 switch (mode & S_IFMT) {
225 case S_IFREG:
226 btail = au_dbtail(dentry);
227 inode->i_op = iop + AuIop_OTHER;
228 inode->i_fop = &aufs_file_fop;
229 err = au_dy_iaop(inode, btop, h_inode);
230 if (unlikely(err))
231 goto out;
232 break;
233 case S_IFDIR:
234 isdir = 1;
235 btail = au_dbtaildir(dentry);
236 inode->i_op = iop + AuIop_DIR;
237 inode->i_fop = &aufs_dir_fop;
238 break;
239 case S_IFLNK:
240 btail = au_dbtail(dentry);
241 inode->i_op = iop + AuIop_SYMLINK;
242 break;
243 case S_IFBLK:
244 case S_IFCHR:
245 case S_IFIFO:
246 case S_IFSOCK:
247 btail = au_dbtail(dentry);
248 inode->i_op = iop + AuIop_OTHER;
249 init_special_inode(inode, mode, h_inode->i_rdev);
250 break;
251 default:
252 AuIOErr("Unknown file type 0%o\n", mode);
253 err = -EIO;
254 goto out;
255 }
256
257 /* do not set hnotify for whiteouted dirs (SHWH mode) */
258 flags = au_hi_flags(inode, isdir);
259 if (au_opt_test(au_mntflags(dentry->d_sb), SHWH)
260 && au_ftest_hi(flags, HNOTIFY)
261 && dentry->d_name.len > AUFS_WH_PFX_LEN
262 && !memcmp(dentry->d_name.name, AUFS_WH_PFX, AUFS_WH_PFX_LEN))
263 au_fclr_hi(flags, HNOTIFY);
264 iinfo = au_ii(inode);
265 iinfo->ii_btop = btop;
266 iinfo->ii_bbot = btail;
267 for (bindex = btop; bindex <= btail; bindex++) {
268 h_dentry = au_h_dptr(dentry, bindex);
269 if (h_dentry)
270 au_set_h_iptr(inode, bindex,
271 au_igrab(d_inode(h_dentry)), flags);
272 }
273 au_cpup_attr_all(inode, /*force*/1);
274 /*
275 * to force calling aufs_get_acl() every time,
276 * do not call cache_no_acl() for aufs inode.
277 */
278
279 out:
280 return err;
281 }
282
283 /*
284 * successful returns with iinfo write_locked
285 * minus: errno
286 * zero: success, matched
287 * plus: no error, but unmatched
288 */
289 static int reval_inode(struct inode *inode, struct dentry *dentry)
290 {
291 int err;
292 unsigned int gen, igflags;
293 aufs_bindex_t bindex, bbot;
294 struct inode *h_inode, *h_dinode;
295 struct dentry *h_dentry;
296
297 /*
298 * before this function, if aufs got any iinfo lock, it must be only
299 * one, the parent dir.
300 * it can happen by UDBA and the obsoleted inode number.
301 */
302 err = -EIO;
303 if (unlikely(inode->i_ino == parent_ino(dentry)))
304 goto out;
305
306 err = 1;
307 ii_write_lock_new_child(inode);
308 h_dentry = au_h_dptr(dentry, au_dbtop(dentry));
309 h_dinode = d_inode(h_dentry);
310 bbot = au_ibbot(inode);
311 for (bindex = au_ibtop(inode); bindex <= bbot; bindex++) {
312 h_inode = au_h_iptr(inode, bindex);
313 if (!h_inode || h_inode != h_dinode)
314 continue;
315
316 err = 0;
317 gen = au_iigen(inode, &igflags);
318 if (gen == au_digen(dentry)
319 && !au_ig_ftest(igflags, HALF_REFRESHED))
320 break;
321
322 /* fully refresh inode using dentry */
323 err = au_refresh_hinode(inode, dentry);
324 if (!err)
325 au_update_iigen(inode, /*half*/0);
326 break;
327 }
328
329 if (unlikely(err))
330 ii_write_unlock(inode);
331 out:
332 return err;
333 }
334
335 int au_ino(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
336 unsigned int d_type, ino_t *ino)
337 {
338 int err;
339 struct mutex *mtx;
340
341 /* prevent hardlinked inode number from race condition */
342 mtx = NULL;
343 if (d_type != DT_DIR) {
344 mtx = &au_sbr(sb, bindex)->br_xino.xi_nondir_mtx;
345 mutex_lock(mtx);
346 }
347 err = au_xino_read(sb, bindex, h_ino, ino);
348 if (unlikely(err))
349 goto out;
350
351 if (!*ino) {
352 err = -EIO;
353 *ino = au_xino_new_ino(sb);
354 if (unlikely(!*ino))
355 goto out;
356 err = au_xino_write(sb, bindex, h_ino, *ino);
357 if (unlikely(err))
358 goto out;
359 }
360
361 out:
362 if (mtx)
363 mutex_unlock(mtx);
364 return err;
365 }
366
367 /* successful returns with iinfo write_locked */
368 /* todo: return with unlocked? */
369 struct inode *au_new_inode(struct dentry *dentry, int must_new)
370 {
371 struct inode *inode, *h_inode;
372 struct dentry *h_dentry;
373 struct super_block *sb;
374 struct mutex *mtx;
375 ino_t h_ino, ino;
376 int err;
377 aufs_bindex_t btop;
378
379 sb = dentry->d_sb;
380 btop = au_dbtop(dentry);
381 h_dentry = au_h_dptr(dentry, btop);
382 h_inode = d_inode(h_dentry);
383 h_ino = h_inode->i_ino;
384
385 /*
386 * stop 'race'-ing between hardlinks under different
387 * parents.
388 */
389 mtx = NULL;
390 if (!d_is_dir(h_dentry))
391 mtx = &au_sbr(sb, btop)->br_xino.xi_nondir_mtx;
392
393 new_ino:
394 if (mtx)
395 mutex_lock(mtx);
396 err = au_xino_read(sb, btop, h_ino, &ino);
397 inode = ERR_PTR(err);
398 if (unlikely(err))
399 goto out;
400
401 if (!ino) {
402 ino = au_xino_new_ino(sb);
403 if (unlikely(!ino)) {
404 inode = ERR_PTR(-EIO);
405 goto out;
406 }
407 }
408
409 AuDbg("i%lu\n", (unsigned long)ino);
410 inode = au_iget_locked(sb, ino);
411 err = PTR_ERR(inode);
412 if (IS_ERR(inode))
413 goto out;
414
415 AuDbg("%lx, new %d\n", inode->i_state, !!(inode->i_state & I_NEW));
416 if (inode->i_state & I_NEW) {
417 ii_write_lock_new_child(inode);
418 err = set_inode(inode, dentry);
419 if (!err) {
420 unlock_new_inode(inode);
421 goto out; /* success */
422 }
423
424 /*
425 * iget_failed() calls iput(), but we need to call
426 * ii_write_unlock() after iget_failed(). so dirty hack for
427 * i_count.
428 */
429 atomic_inc(&inode->i_count);
430 iget_failed(inode);
431 ii_write_unlock(inode);
432 au_xino_write(sb, btop, h_ino, /*ino*/0);
433 /* ignore this error */
434 goto out_iput;
435 } else if (!must_new && !IS_DEADDIR(inode) && inode->i_nlink) {
436 /*
437 * horrible race condition between lookup, readdir and copyup
438 * (or something).
439 */
440 if (mtx)
441 mutex_unlock(mtx);
442 err = reval_inode(inode, dentry);
443 if (unlikely(err < 0)) {
444 mtx = NULL;
445 goto out_iput;
446 }
447
448 if (!err) {
449 mtx = NULL;
450 goto out; /* success */
451 } else if (mtx)
452 mutex_lock(mtx);
453 }
454
455 if (unlikely(au_test_fs_unique_ino(h_inode)))
456 AuWarn1("Warning: Un-notified UDBA or repeatedly renamed dir,"
457 " b%d, %s, %pd, hi%lu, i%lu.\n",
458 btop, au_sbtype(h_dentry->d_sb), dentry,
459 (unsigned long)h_ino, (unsigned long)ino);
460 ino = 0;
461 err = au_xino_write(sb, btop, h_ino, /*ino*/0);
462 if (!err) {
463 iput(inode);
464 if (mtx)
465 mutex_unlock(mtx);
466 goto new_ino;
467 }
468
469 out_iput:
470 iput(inode);
471 inode = ERR_PTR(err);
472 out:
473 if (mtx)
474 mutex_unlock(mtx);
475 return inode;
476 }
477
478 /* ---------------------------------------------------------------------- */
479
480 int au_test_ro(struct super_block *sb, aufs_bindex_t bindex,
481 struct inode *inode)
482 {
483 int err;
484 struct inode *hi;
485
486 err = au_br_rdonly(au_sbr(sb, bindex));
487
488 /* pseudo-link after flushed may happen out of bounds */
489 if (!err
490 && inode
491 && au_ibtop(inode) <= bindex
492 && bindex <= au_ibbot(inode)) {
493 /*
494 * permission check is unnecessary since vfsub routine
495 * will be called later
496 */
497 hi = au_h_iptr(inode, bindex);
498 if (hi)
499 err = IS_IMMUTABLE(hi) ? -EROFS : 0;
500 }
501
502 return err;
503 }
504
505 int au_test_h_perm(struct inode *h_inode, int mask)
506 {
507 if (uid_eq(current_fsuid(), GLOBAL_ROOT_UID))
508 return 0;
509 return inode_permission(h_inode, mask);
510 }
511
512 int au_test_h_perm_sio(struct inode *h_inode, int mask)
513 {
514 if (au_test_nfs(h_inode->i_sb)
515 && (mask & MAY_WRITE)
516 && S_ISDIR(h_inode->i_mode))
517 mask |= MAY_READ; /* force permission check */
518 return au_test_h_perm(h_inode, mask);
519 }