]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blob - fs/aufs/inode.c
UBUNTU: SAUCE: AUFS
[mirror_ubuntu-jammy-kernel.git] / fs / aufs / inode.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (C) 2005-2021 Junjiro R. Okajima
4 *
5 * This program, aufs is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19 /*
20 * inode functions
21 */
22
23 #include <linux/iversion.h>
24 #include "aufs.h"
25
26 struct inode *au_igrab(struct inode *inode)
27 {
28 if (inode) {
29 AuDebugOn(!atomic_read(&inode->i_count));
30 ihold(inode);
31 }
32 return inode;
33 }
34
35 static void au_refresh_hinode_attr(struct inode *inode, int do_version)
36 {
37 au_cpup_attr_all(inode, /*force*/0);
38 au_update_iigen(inode, /*half*/1);
39 if (do_version)
40 inode_inc_iversion(inode);
41 }
42
43 static int au_ii_refresh(struct inode *inode, int *update)
44 {
45 int err, e, nbr;
46 umode_t type;
47 aufs_bindex_t bindex, new_bindex;
48 struct super_block *sb;
49 struct au_iinfo *iinfo;
50 struct au_hinode *p, *q, tmp;
51
52 AuDebugOn(au_is_bad_inode(inode));
53 IiMustWriteLock(inode);
54
55 *update = 0;
56 sb = inode->i_sb;
57 nbr = au_sbbot(sb) + 1;
58 type = inode->i_mode & S_IFMT;
59 iinfo = au_ii(inode);
60 err = au_hinode_realloc(iinfo, nbr, /*may_shrink*/0);
61 if (unlikely(err))
62 goto out;
63
64 AuDebugOn(iinfo->ii_btop < 0);
65 p = au_hinode(iinfo, iinfo->ii_btop);
66 for (bindex = iinfo->ii_btop; bindex <= iinfo->ii_bbot;
67 bindex++, p++) {
68 if (!p->hi_inode)
69 continue;
70
71 AuDebugOn(type != (p->hi_inode->i_mode & S_IFMT));
72 new_bindex = au_br_index(sb, p->hi_id);
73 if (new_bindex == bindex)
74 continue;
75
76 if (new_bindex < 0) {
77 *update = 1;
78 au_hiput(p);
79 p->hi_inode = NULL;
80 continue;
81 }
82
83 if (new_bindex < iinfo->ii_btop)
84 iinfo->ii_btop = new_bindex;
85 if (iinfo->ii_bbot < new_bindex)
86 iinfo->ii_bbot = new_bindex;
87 /* swap two lower inode, and loop again */
88 q = au_hinode(iinfo, new_bindex);
89 tmp = *q;
90 *q = *p;
91 *p = tmp;
92 if (tmp.hi_inode) {
93 bindex--;
94 p--;
95 }
96 }
97 au_update_ibrange(inode, /*do_put_zero*/0);
98 au_hinode_realloc(iinfo, nbr, /*may_shrink*/1); /* harmless if err */
99 e = au_dy_irefresh(inode);
100 if (unlikely(e && !err))
101 err = e;
102
103 out:
104 AuTraceErr(err);
105 return err;
106 }
107
108 void au_refresh_iop(struct inode *inode, int force_getattr)
109 {
110 int type;
111 struct au_sbinfo *sbi = au_sbi(inode->i_sb);
112 const struct inode_operations *iop
113 = force_getattr ? aufs_iop : sbi->si_iop_array;
114
115 if (inode->i_op == iop)
116 return;
117
118 switch (inode->i_mode & S_IFMT) {
119 case S_IFDIR:
120 type = AuIop_DIR;
121 break;
122 case S_IFLNK:
123 type = AuIop_SYMLINK;
124 break;
125 default:
126 type = AuIop_OTHER;
127 break;
128 }
129
130 inode->i_op = iop + type;
131 /* unnecessary smp_wmb() */
132 }
133
134 int au_refresh_hinode_self(struct inode *inode)
135 {
136 int err, update;
137
138 err = au_ii_refresh(inode, &update);
139 if (!err)
140 au_refresh_hinode_attr(inode, update && S_ISDIR(inode->i_mode));
141
142 AuTraceErr(err);
143 return err;
144 }
145
146 int au_refresh_hinode(struct inode *inode, struct dentry *dentry)
147 {
148 int err, e, update;
149 unsigned int flags;
150 umode_t mode;
151 aufs_bindex_t bindex, bbot;
152 unsigned char isdir;
153 struct au_hinode *p;
154 struct au_iinfo *iinfo;
155
156 err = au_ii_refresh(inode, &update);
157 if (unlikely(err))
158 goto out;
159
160 update = 0;
161 iinfo = au_ii(inode);
162 p = au_hinode(iinfo, iinfo->ii_btop);
163 mode = (inode->i_mode & S_IFMT);
164 isdir = S_ISDIR(mode);
165 flags = au_hi_flags(inode, isdir);
166 bbot = au_dbbot(dentry);
167 for (bindex = au_dbtop(dentry); bindex <= bbot; bindex++) {
168 struct inode *h_i, *h_inode;
169 struct dentry *h_d;
170
171 h_d = au_h_dptr(dentry, bindex);
172 if (!h_d || d_is_negative(h_d))
173 continue;
174
175 h_inode = d_inode(h_d);
176 AuDebugOn(mode != (h_inode->i_mode & S_IFMT));
177 if (iinfo->ii_btop <= bindex && bindex <= iinfo->ii_bbot) {
178 h_i = au_h_iptr(inode, bindex);
179 if (h_i) {
180 if (h_i == h_inode)
181 continue;
182 err = -EIO;
183 break;
184 }
185 }
186 if (bindex < iinfo->ii_btop)
187 iinfo->ii_btop = bindex;
188 if (iinfo->ii_bbot < bindex)
189 iinfo->ii_bbot = bindex;
190 au_set_h_iptr(inode, bindex, au_igrab(h_inode), flags);
191 update = 1;
192 }
193 au_update_ibrange(inode, /*do_put_zero*/0);
194 e = au_dy_irefresh(inode);
195 if (unlikely(e && !err))
196 err = e;
197 if (!err)
198 au_refresh_hinode_attr(inode, update && isdir);
199
200 out:
201 AuTraceErr(err);
202 return err;
203 }
204
205 static int set_inode(struct inode *inode, struct dentry *dentry)
206 {
207 int err;
208 unsigned int flags;
209 umode_t mode;
210 aufs_bindex_t bindex, btop, btail;
211 unsigned char isdir;
212 struct dentry *h_dentry;
213 struct inode *h_inode;
214 struct au_iinfo *iinfo;
215 const struct inode_operations *iop;
216
217 IiMustWriteLock(inode);
218
219 err = 0;
220 isdir = 0;
221 iop = au_sbi(inode->i_sb)->si_iop_array;
222 btop = au_dbtop(dentry);
223 h_dentry = au_h_dptr(dentry, btop);
224 h_inode = d_inode(h_dentry);
225 mode = h_inode->i_mode;
226 switch (mode & S_IFMT) {
227 case S_IFREG:
228 btail = au_dbtail(dentry);
229 inode->i_op = iop + AuIop_OTHER;
230 inode->i_fop = &aufs_file_fop;
231 err = au_dy_iaop(inode, btop, h_inode);
232 if (unlikely(err))
233 goto out;
234 break;
235 case S_IFDIR:
236 isdir = 1;
237 btail = au_dbtaildir(dentry);
238 inode->i_op = iop + AuIop_DIR;
239 inode->i_fop = &aufs_dir_fop;
240 break;
241 case S_IFLNK:
242 btail = au_dbtail(dentry);
243 inode->i_op = iop + AuIop_SYMLINK;
244 break;
245 case S_IFBLK:
246 case S_IFCHR:
247 case S_IFIFO:
248 case S_IFSOCK:
249 btail = au_dbtail(dentry);
250 inode->i_op = iop + AuIop_OTHER;
251 init_special_inode(inode, mode, h_inode->i_rdev);
252 break;
253 default:
254 AuIOErr("Unknown file type 0%o\n", mode);
255 err = -EIO;
256 goto out;
257 }
258
259 /* do not set hnotify for whiteouted dirs (SHWH mode) */
260 flags = au_hi_flags(inode, isdir);
261 if (au_opt_test(au_mntflags(dentry->d_sb), SHWH)
262 && au_ftest_hi(flags, HNOTIFY)
263 && dentry->d_name.len > AUFS_WH_PFX_LEN
264 && !memcmp(dentry->d_name.name, AUFS_WH_PFX, AUFS_WH_PFX_LEN))
265 au_fclr_hi(flags, HNOTIFY);
266 iinfo = au_ii(inode);
267 iinfo->ii_btop = btop;
268 iinfo->ii_bbot = btail;
269 for (bindex = btop; bindex <= btail; bindex++) {
270 h_dentry = au_h_dptr(dentry, bindex);
271 if (h_dentry)
272 au_set_h_iptr(inode, bindex,
273 au_igrab(d_inode(h_dentry)), flags);
274 }
275 au_cpup_attr_all(inode, /*force*/1);
276 /*
277 * to force calling aufs_get_acl() every time,
278 * do not call cache_no_acl() for aufs inode.
279 */
280
281 out:
282 return err;
283 }
284
285 /*
286 * successful returns with iinfo write_locked
287 * minus: errno
288 * zero: success, matched
289 * plus: no error, but unmatched
290 */
291 static int reval_inode(struct inode *inode, struct dentry *dentry)
292 {
293 int err;
294 unsigned int gen, igflags;
295 aufs_bindex_t bindex, bbot;
296 struct inode *h_inode, *h_dinode;
297 struct dentry *h_dentry;
298
299 /*
300 * before this function, if aufs got any iinfo lock, it must be only
301 * one, the parent dir.
302 * it can happen by UDBA and the obsoleted inode number.
303 */
304 err = -EIO;
305 if (unlikely(inode->i_ino == parent_ino(dentry)))
306 goto out;
307
308 err = 1;
309 ii_write_lock_new_child(inode);
310 h_dentry = au_h_dptr(dentry, au_dbtop(dentry));
311 h_dinode = d_inode(h_dentry);
312 bbot = au_ibbot(inode);
313 for (bindex = au_ibtop(inode); bindex <= bbot; bindex++) {
314 h_inode = au_h_iptr(inode, bindex);
315 if (!h_inode || h_inode != h_dinode)
316 continue;
317
318 err = 0;
319 gen = au_iigen(inode, &igflags);
320 if (gen == au_digen(dentry)
321 && !au_ig_ftest(igflags, HALF_REFRESHED))
322 break;
323
324 /* fully refresh inode using dentry */
325 err = au_refresh_hinode(inode, dentry);
326 if (!err)
327 au_update_iigen(inode, /*half*/0);
328 break;
329 }
330
331 if (unlikely(err))
332 ii_write_unlock(inode);
333 out:
334 return err;
335 }
336
337 int au_ino(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
338 unsigned int d_type, ino_t *ino)
339 {
340 int err, idx;
341 const int isnondir = d_type != DT_DIR;
342
343 /* prevent hardlinked inode number from race condition */
344 if (isnondir) {
345 err = au_xinondir_enter(sb, bindex, h_ino, &idx);
346 if (unlikely(err))
347 goto out;
348 }
349
350 err = au_xino_read(sb, bindex, h_ino, ino);
351 if (unlikely(err))
352 goto out_xinondir;
353
354 if (!*ino) {
355 err = -EIO;
356 *ino = au_xino_new_ino(sb);
357 if (unlikely(!*ino))
358 goto out_xinondir;
359 err = au_xino_write(sb, bindex, h_ino, *ino);
360 if (unlikely(err))
361 goto out_xinondir;
362 }
363
364 out_xinondir:
365 if (isnondir && idx >= 0)
366 au_xinondir_leave(sb, bindex, h_ino, idx);
367 out:
368 return err;
369 }
370
371 /* successful returns with iinfo write_locked */
372 /* todo: return with unlocked? */
373 struct inode *au_new_inode(struct dentry *dentry, int must_new)
374 {
375 struct inode *inode, *h_inode;
376 struct dentry *h_dentry;
377 struct super_block *sb;
378 ino_t h_ino, ino;
379 int err, idx, hlinked;
380 aufs_bindex_t btop;
381
382 sb = dentry->d_sb;
383 btop = au_dbtop(dentry);
384 h_dentry = au_h_dptr(dentry, btop);
385 h_inode = d_inode(h_dentry);
386 h_ino = h_inode->i_ino;
387 hlinked = !d_is_dir(h_dentry) && h_inode->i_nlink > 1;
388
389 new_ino:
390 /*
391 * stop 'race'-ing between hardlinks under different
392 * parents.
393 */
394 if (hlinked) {
395 err = au_xinondir_enter(sb, btop, h_ino, &idx);
396 inode = ERR_PTR(err);
397 if (unlikely(err))
398 goto out;
399 }
400
401 err = au_xino_read(sb, btop, h_ino, &ino);
402 inode = ERR_PTR(err);
403 if (unlikely(err))
404 goto out_xinondir;
405
406 if (!ino) {
407 ino = au_xino_new_ino(sb);
408 if (unlikely(!ino)) {
409 inode = ERR_PTR(-EIO);
410 goto out_xinondir;
411 }
412 }
413
414 AuDbg("i%lu\n", (unsigned long)ino);
415 inode = au_iget_locked(sb, ino);
416 err = PTR_ERR(inode);
417 if (IS_ERR(inode))
418 goto out_xinondir;
419
420 AuDbg("%lx, new %d\n", inode->i_state, !!(inode->i_state & I_NEW));
421 if (inode->i_state & I_NEW) {
422 ii_write_lock_new_child(inode);
423 err = set_inode(inode, dentry);
424 if (!err) {
425 unlock_new_inode(inode);
426 goto out_xinondir; /* success */
427 }
428
429 /*
430 * iget_failed() calls iput(), but we need to call
431 * ii_write_unlock() after iget_failed(). so dirty hack for
432 * i_count.
433 */
434 atomic_inc(&inode->i_count);
435 iget_failed(inode);
436 ii_write_unlock(inode);
437 au_xino_write(sb, btop, h_ino, /*ino*/0);
438 /* ignore this error */
439 goto out_iput;
440 } else if (!must_new && !IS_DEADDIR(inode) && inode->i_nlink) {
441 /*
442 * horrible race condition between lookup, readdir and copyup
443 * (or something).
444 */
445 if (hlinked && idx >= 0)
446 au_xinondir_leave(sb, btop, h_ino, idx);
447 err = reval_inode(inode, dentry);
448 if (unlikely(err < 0)) {
449 hlinked = 0;
450 goto out_iput;
451 }
452 if (!err)
453 goto out; /* success */
454 else if (hlinked && idx >= 0) {
455 err = au_xinondir_enter(sb, btop, h_ino, &idx);
456 if (unlikely(err)) {
457 iput(inode);
458 inode = ERR_PTR(err);
459 goto out;
460 }
461 }
462 }
463
464 if (unlikely(au_test_fs_unique_ino(h_inode)))
465 AuWarn1("Warning: Un-notified UDBA or repeatedly renamed dir,"
466 " b%d, %s, %pd, hi%lu, i%lu.\n",
467 btop, au_sbtype(h_dentry->d_sb), dentry,
468 (unsigned long)h_ino, (unsigned long)ino);
469 ino = 0;
470 err = au_xino_write(sb, btop, h_ino, /*ino*/0);
471 if (!err) {
472 iput(inode);
473 if (hlinked && idx >= 0)
474 au_xinondir_leave(sb, btop, h_ino, idx);
475 goto new_ino;
476 }
477
478 out_iput:
479 iput(inode);
480 inode = ERR_PTR(err);
481 out_xinondir:
482 if (hlinked && idx >= 0)
483 au_xinondir_leave(sb, btop, h_ino, idx);
484 out:
485 return inode;
486 }
487
488 /* ---------------------------------------------------------------------- */
489
490 int au_test_ro(struct super_block *sb, aufs_bindex_t bindex,
491 struct inode *inode)
492 {
493 int err;
494 struct inode *hi;
495
496 err = au_br_rdonly(au_sbr(sb, bindex));
497
498 /* pseudo-link after flushed may happen out of bounds */
499 if (!err
500 && inode
501 && au_ibtop(inode) <= bindex
502 && bindex <= au_ibbot(inode)) {
503 /*
504 * permission check is unnecessary since vfsub routine
505 * will be called later
506 */
507 hi = au_h_iptr(inode, bindex);
508 if (hi)
509 err = IS_IMMUTABLE(hi) ? -EROFS : 0;
510 }
511
512 return err;
513 }
514
515 int au_test_h_perm(struct user_namespace *h_userns, struct inode *h_inode,
516 int mask)
517 {
518 if (uid_eq(current_fsuid(), GLOBAL_ROOT_UID))
519 return 0;
520 return inode_permission(h_userns, h_inode, mask);
521 }
522
523 int au_test_h_perm_sio(struct user_namespace *h_userns, struct inode *h_inode,
524 int mask)
525 {
526 if (au_test_nfs(h_inode->i_sb)
527 && (mask & MAY_WRITE)
528 && S_ISDIR(h_inode->i_mode))
529 mask |= MAY_READ; /* force permission check */
530 return au_test_h_perm(h_userns, h_inode, mask);
531 }