]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blob - fs/kernfs/dir.c
kernfs: remove unnecessary NULL check in __kernfs_remove()
[mirror_ubuntu-bionic-kernel.git] / fs / kernfs / dir.c
1 /*
2 * fs/kernfs/dir.c - kernfs directory implementation
3 *
4 * Copyright (c) 2001-3 Patrick Mochel
5 * Copyright (c) 2007 SUSE Linux Products GmbH
6 * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org>
7 *
8 * This file is released under the GPLv2.
9 */
10
11 #include <linux/sched.h>
12 #include <linux/fs.h>
13 #include <linux/namei.h>
14 #include <linux/idr.h>
15 #include <linux/slab.h>
16 #include <linux/security.h>
17 #include <linux/hash.h>
18
19 #include "kernfs-internal.h"
20
21 DEFINE_MUTEX(kernfs_mutex);
22
23 #define rb_to_kn(X) rb_entry((X), struct kernfs_node, rb)
24
25 static bool kernfs_lockdep(struct kernfs_node *kn)
26 {
27 #ifdef CONFIG_DEBUG_LOCK_ALLOC
28 return kn->flags & KERNFS_LOCKDEP;
29 #else
30 return false;
31 #endif
32 }
33
34 /**
35 * kernfs_name_hash
36 * @name: Null terminated string to hash
37 * @ns: Namespace tag to hash
38 *
39 * Returns 31 bit hash of ns + name (so it fits in an off_t )
40 */
41 static unsigned int kernfs_name_hash(const char *name, const void *ns)
42 {
43 unsigned long hash = init_name_hash();
44 unsigned int len = strlen(name);
45 while (len--)
46 hash = partial_name_hash(*name++, hash);
47 hash = (end_name_hash(hash) ^ hash_ptr((void *)ns, 31));
48 hash &= 0x7fffffffU;
49 /* Reserve hash numbers 0, 1 and INT_MAX for magic directory entries */
50 if (hash < 1)
51 hash += 2;
52 if (hash >= INT_MAX)
53 hash = INT_MAX - 1;
54 return hash;
55 }
56
57 static int kernfs_name_compare(unsigned int hash, const char *name,
58 const void *ns, const struct kernfs_node *kn)
59 {
60 if (hash != kn->hash)
61 return hash - kn->hash;
62 if (ns != kn->ns)
63 return ns - kn->ns;
64 return strcmp(name, kn->name);
65 }
66
67 static int kernfs_sd_compare(const struct kernfs_node *left,
68 const struct kernfs_node *right)
69 {
70 return kernfs_name_compare(left->hash, left->name, left->ns, right);
71 }
72
73 /**
74 * kernfs_link_sibling - link kernfs_node into sibling rbtree
75 * @kn: kernfs_node of interest
76 *
77 * Link @kn into its sibling rbtree which starts from
78 * @kn->parent->dir.children.
79 *
80 * Locking:
81 * mutex_lock(kernfs_mutex)
82 *
83 * RETURNS:
84 * 0 on susccess -EEXIST on failure.
85 */
86 static int kernfs_link_sibling(struct kernfs_node *kn)
87 {
88 struct rb_node **node = &kn->parent->dir.children.rb_node;
89 struct rb_node *parent = NULL;
90
91 if (kernfs_type(kn) == KERNFS_DIR)
92 kn->parent->dir.subdirs++;
93
94 while (*node) {
95 struct kernfs_node *pos;
96 int result;
97
98 pos = rb_to_kn(*node);
99 parent = *node;
100 result = kernfs_sd_compare(kn, pos);
101 if (result < 0)
102 node = &pos->rb.rb_left;
103 else if (result > 0)
104 node = &pos->rb.rb_right;
105 else
106 return -EEXIST;
107 }
108 /* add new node and rebalance the tree */
109 rb_link_node(&kn->rb, parent, node);
110 rb_insert_color(&kn->rb, &kn->parent->dir.children);
111 return 0;
112 }
113
114 /**
115 * kernfs_unlink_sibling - unlink kernfs_node from sibling rbtree
116 * @kn: kernfs_node of interest
117 *
118 * Unlink @kn from its sibling rbtree which starts from
119 * kn->parent->dir.children.
120 *
121 * Locking:
122 * mutex_lock(kernfs_mutex)
123 */
124 static bool kernfs_unlink_sibling(struct kernfs_node *kn)
125 {
126 if (RB_EMPTY_NODE(&kn->rb))
127 return false;
128
129 if (kernfs_type(kn) == KERNFS_DIR)
130 kn->parent->dir.subdirs--;
131
132 rb_erase(&kn->rb, &kn->parent->dir.children);
133 RB_CLEAR_NODE(&kn->rb);
134 return true;
135 }
136
137 /**
138 * kernfs_get_active - get an active reference to kernfs_node
139 * @kn: kernfs_node to get an active reference to
140 *
141 * Get an active reference of @kn. This function is noop if @kn
142 * is NULL.
143 *
144 * RETURNS:
145 * Pointer to @kn on success, NULL on failure.
146 */
147 struct kernfs_node *kernfs_get_active(struct kernfs_node *kn)
148 {
149 if (unlikely(!kn))
150 return NULL;
151
152 if (kernfs_lockdep(kn))
153 rwsem_acquire_read(&kn->dep_map, 0, 1, _RET_IP_);
154
155 /*
156 * Try to obtain an active ref. If @kn is deactivated, we block
157 * till either it's reactivated or killed.
158 */
159 do {
160 if (atomic_inc_unless_negative(&kn->active))
161 return kn;
162
163 wait_event(kernfs_root(kn)->deactivate_waitq,
164 atomic_read(&kn->active) >= 0 ||
165 RB_EMPTY_NODE(&kn->rb));
166 } while (!RB_EMPTY_NODE(&kn->rb));
167
168 if (kernfs_lockdep(kn))
169 rwsem_release(&kn->dep_map, 1, _RET_IP_);
170 return NULL;
171 }
172
173 /**
174 * kernfs_put_active - put an active reference to kernfs_node
175 * @kn: kernfs_node to put an active reference to
176 *
177 * Put an active reference to @kn. This function is noop if @kn
178 * is NULL.
179 */
180 void kernfs_put_active(struct kernfs_node *kn)
181 {
182 struct kernfs_root *root = kernfs_root(kn);
183 int v;
184
185 if (unlikely(!kn))
186 return;
187
188 if (kernfs_lockdep(kn))
189 rwsem_release(&kn->dep_map, 1, _RET_IP_);
190 v = atomic_dec_return(&kn->active);
191 if (likely(v != KN_DEACTIVATED_BIAS))
192 return;
193
194 wake_up_all(&root->deactivate_waitq);
195 }
196
197 /**
198 * kernfs_drain - drain kernfs_node
199 * @kn: kernfs_node to drain
200 *
201 * Drain existing usages of @kn. Mutiple removers may invoke this function
202 * concurrently on @kn and all will return after draining is complete.
203 * Returns %true if drain is performed and kernfs_mutex was temporarily
204 * released. %false if @kn was already drained and no operation was
205 * necessary.
206 *
207 * The caller is responsible for ensuring @kn stays pinned while this
208 * function is in progress even if it gets removed by someone else.
209 */
210 static bool kernfs_drain(struct kernfs_node *kn)
211 __releases(&kernfs_mutex) __acquires(&kernfs_mutex)
212 {
213 struct kernfs_root *root = kernfs_root(kn);
214
215 lockdep_assert_held(&kernfs_mutex);
216 WARN_ON_ONCE(atomic_read(&kn->active) >= 0);
217
218 /*
219 * We want to go through the active ref lockdep annotation at least
220 * once for all node removals, but the lockdep annotation can't be
221 * nested inside kernfs_mutex and deactivation can't make forward
222 * progress if we keep dropping the mutex. Use JUST_ACTIVATED to
223 * force the slow path once for each deactivation if lockdep is
224 * enabled.
225 */
226 if ((!kernfs_lockdep(kn) || !(kn->flags & KERNFS_JUST_DEACTIVATED)) &&
227 atomic_read(&kn->active) == KN_DEACTIVATED_BIAS)
228 return false;
229
230 kn->flags &= ~KERNFS_JUST_DEACTIVATED;
231 mutex_unlock(&kernfs_mutex);
232
233 if (kernfs_lockdep(kn)) {
234 rwsem_acquire(&kn->dep_map, 0, 0, _RET_IP_);
235 if (atomic_read(&kn->active) != KN_DEACTIVATED_BIAS)
236 lock_contended(&kn->dep_map, _RET_IP_);
237 }
238
239 wait_event(root->deactivate_waitq,
240 atomic_read(&kn->active) == KN_DEACTIVATED_BIAS);
241
242 if (kernfs_lockdep(kn)) {
243 lock_acquired(&kn->dep_map, _RET_IP_);
244 rwsem_release(&kn->dep_map, 1, _RET_IP_);
245 }
246
247 mutex_lock(&kernfs_mutex);
248 return true;
249 }
250
251 /**
252 * kernfs_get - get a reference count on a kernfs_node
253 * @kn: the target kernfs_node
254 */
255 void kernfs_get(struct kernfs_node *kn)
256 {
257 if (kn) {
258 WARN_ON(!atomic_read(&kn->count));
259 atomic_inc(&kn->count);
260 }
261 }
262 EXPORT_SYMBOL_GPL(kernfs_get);
263
264 /**
265 * kernfs_put - put a reference count on a kernfs_node
266 * @kn: the target kernfs_node
267 *
268 * Put a reference count of @kn and destroy it if it reached zero.
269 */
270 void kernfs_put(struct kernfs_node *kn)
271 {
272 struct kernfs_node *parent;
273 struct kernfs_root *root;
274
275 if (!kn || !atomic_dec_and_test(&kn->count))
276 return;
277 root = kernfs_root(kn);
278 repeat:
279 /*
280 * Moving/renaming is always done while holding reference.
281 * kn->parent won't change beneath us.
282 */
283 parent = kn->parent;
284
285 WARN_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS,
286 "kernfs_put: %s/%s: released with incorrect active_ref %d\n",
287 parent ? parent->name : "", kn->name, atomic_read(&kn->active));
288
289 if (kernfs_type(kn) == KERNFS_LINK)
290 kernfs_put(kn->symlink.target_kn);
291 if (!(kn->flags & KERNFS_STATIC_NAME))
292 kfree(kn->name);
293 if (kn->iattr) {
294 if (kn->iattr->ia_secdata)
295 security_release_secctx(kn->iattr->ia_secdata,
296 kn->iattr->ia_secdata_len);
297 simple_xattrs_free(&kn->iattr->xattrs);
298 }
299 kfree(kn->iattr);
300 ida_simple_remove(&root->ino_ida, kn->ino);
301 kmem_cache_free(kernfs_node_cache, kn);
302
303 kn = parent;
304 if (kn) {
305 if (atomic_dec_and_test(&kn->count))
306 goto repeat;
307 } else {
308 /* just released the root kn, free @root too */
309 ida_destroy(&root->ino_ida);
310 kfree(root);
311 }
312 }
313 EXPORT_SYMBOL_GPL(kernfs_put);
314
315 static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags)
316 {
317 struct kernfs_node *kn;
318
319 if (flags & LOOKUP_RCU)
320 return -ECHILD;
321
322 /* Always perform fresh lookup for negatives */
323 if (!dentry->d_inode)
324 goto out_bad_unlocked;
325
326 kn = dentry->d_fsdata;
327 mutex_lock(&kernfs_mutex);
328
329 /* Force fresh lookup if removed */
330 if (kn->parent && RB_EMPTY_NODE(&kn->rb))
331 goto out_bad;
332
333 /* The kernfs node has been moved? */
334 if (dentry->d_parent->d_fsdata != kn->parent)
335 goto out_bad;
336
337 /* The kernfs node has been renamed */
338 if (strcmp(dentry->d_name.name, kn->name) != 0)
339 goto out_bad;
340
341 /* The kernfs node has been moved to a different namespace */
342 if (kn->parent && kernfs_ns_enabled(kn->parent) &&
343 kernfs_info(dentry->d_sb)->ns != kn->ns)
344 goto out_bad;
345
346 mutex_unlock(&kernfs_mutex);
347 out_valid:
348 return 1;
349 out_bad:
350 mutex_unlock(&kernfs_mutex);
351 out_bad_unlocked:
352 /*
353 * @dentry doesn't match the underlying kernfs node, drop the
354 * dentry and force lookup. If we have submounts we must allow the
355 * vfs caches to lie about the state of the filesystem to prevent
356 * leaks and other nasty things, so use check_submounts_and_drop()
357 * instead of d_drop().
358 */
359 if (check_submounts_and_drop(dentry) != 0)
360 goto out_valid;
361
362 return 0;
363 }
364
365 static void kernfs_dop_release(struct dentry *dentry)
366 {
367 kernfs_put(dentry->d_fsdata);
368 }
369
370 const struct dentry_operations kernfs_dops = {
371 .d_revalidate = kernfs_dop_revalidate,
372 .d_release = kernfs_dop_release,
373 };
374
375 struct kernfs_node *kernfs_new_node(struct kernfs_root *root, const char *name,
376 umode_t mode, unsigned flags)
377 {
378 char *dup_name = NULL;
379 struct kernfs_node *kn;
380 int ret;
381
382 if (!(flags & KERNFS_STATIC_NAME)) {
383 name = dup_name = kstrdup(name, GFP_KERNEL);
384 if (!name)
385 return NULL;
386 }
387
388 kn = kmem_cache_zalloc(kernfs_node_cache, GFP_KERNEL);
389 if (!kn)
390 goto err_out1;
391
392 ret = ida_simple_get(&root->ino_ida, 1, 0, GFP_KERNEL);
393 if (ret < 0)
394 goto err_out2;
395 kn->ino = ret;
396
397 atomic_set(&kn->count, 1);
398 atomic_set(&kn->active, KN_DEACTIVATED_BIAS);
399 kn->deact_depth = 1;
400 RB_CLEAR_NODE(&kn->rb);
401
402 kn->name = name;
403 kn->mode = mode;
404 kn->flags = flags;
405
406 return kn;
407
408 err_out2:
409 kmem_cache_free(kernfs_node_cache, kn);
410 err_out1:
411 kfree(dup_name);
412 return NULL;
413 }
414
415 /**
416 * kernfs_add_one - add kernfs_node to parent without warning
417 * @kn: kernfs_node to be added
418 * @parent: the parent kernfs_node to add @kn to
419 *
420 * Get @parent and set @kn->parent to it and increment nlink of the
421 * parent inode if @kn is a directory and link into the children list
422 * of the parent.
423 *
424 * RETURNS:
425 * 0 on success, -EEXIST if entry with the given name already
426 * exists.
427 */
428 int kernfs_add_one(struct kernfs_node *kn, struct kernfs_node *parent)
429 {
430 struct kernfs_iattrs *ps_iattr;
431 bool has_ns;
432 int ret;
433
434 if (!kernfs_get_active(parent))
435 return -ENOENT;
436
437 mutex_lock(&kernfs_mutex);
438
439 ret = -EINVAL;
440 has_ns = kernfs_ns_enabled(parent);
441 if (WARN(has_ns != (bool)kn->ns, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n",
442 has_ns ? "required" : "invalid", parent->name, kn->name))
443 goto out_unlock;
444
445 if (kernfs_type(parent) != KERNFS_DIR)
446 goto out_unlock;
447
448 kn->hash = kernfs_name_hash(kn->name, kn->ns);
449 kn->parent = parent;
450 kernfs_get(parent);
451
452 ret = kernfs_link_sibling(kn);
453 if (ret)
454 goto out_unlock;
455
456 /* Update timestamps on the parent */
457 ps_iattr = parent->iattr;
458 if (ps_iattr) {
459 struct iattr *ps_iattrs = &ps_iattr->ia_iattr;
460 ps_iattrs->ia_ctime = ps_iattrs->ia_mtime = CURRENT_TIME;
461 }
462
463 /* Mark the entry added into directory tree */
464 atomic_sub(KN_DEACTIVATED_BIAS, &kn->active);
465 kn->deact_depth--;
466 ret = 0;
467 out_unlock:
468 mutex_unlock(&kernfs_mutex);
469 kernfs_put_active(parent);
470 return ret;
471 }
472
473 /**
474 * kernfs_find_ns - find kernfs_node with the given name
475 * @parent: kernfs_node to search under
476 * @name: name to look for
477 * @ns: the namespace tag to use
478 *
479 * Look for kernfs_node with name @name under @parent. Returns pointer to
480 * the found kernfs_node on success, %NULL on failure.
481 */
482 static struct kernfs_node *kernfs_find_ns(struct kernfs_node *parent,
483 const unsigned char *name,
484 const void *ns)
485 {
486 struct rb_node *node = parent->dir.children.rb_node;
487 bool has_ns = kernfs_ns_enabled(parent);
488 unsigned int hash;
489
490 lockdep_assert_held(&kernfs_mutex);
491
492 if (has_ns != (bool)ns) {
493 WARN(1, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n",
494 has_ns ? "required" : "invalid", parent->name, name);
495 return NULL;
496 }
497
498 hash = kernfs_name_hash(name, ns);
499 while (node) {
500 struct kernfs_node *kn;
501 int result;
502
503 kn = rb_to_kn(node);
504 result = kernfs_name_compare(hash, name, ns, kn);
505 if (result < 0)
506 node = node->rb_left;
507 else if (result > 0)
508 node = node->rb_right;
509 else
510 return kn;
511 }
512 return NULL;
513 }
514
515 /**
516 * kernfs_find_and_get_ns - find and get kernfs_node with the given name
517 * @parent: kernfs_node to search under
518 * @name: name to look for
519 * @ns: the namespace tag to use
520 *
521 * Look for kernfs_node with name @name under @parent and get a reference
522 * if found. This function may sleep and returns pointer to the found
523 * kernfs_node on success, %NULL on failure.
524 */
525 struct kernfs_node *kernfs_find_and_get_ns(struct kernfs_node *parent,
526 const char *name, const void *ns)
527 {
528 struct kernfs_node *kn;
529
530 mutex_lock(&kernfs_mutex);
531 kn = kernfs_find_ns(parent, name, ns);
532 kernfs_get(kn);
533 mutex_unlock(&kernfs_mutex);
534
535 return kn;
536 }
537 EXPORT_SYMBOL_GPL(kernfs_find_and_get_ns);
538
539 /**
540 * kernfs_create_root - create a new kernfs hierarchy
541 * @kdops: optional directory syscall operations for the hierarchy
542 * @priv: opaque data associated with the new directory
543 *
544 * Returns the root of the new hierarchy on success, ERR_PTR() value on
545 * failure.
546 */
547 struct kernfs_root *kernfs_create_root(struct kernfs_dir_ops *kdops, void *priv)
548 {
549 struct kernfs_root *root;
550 struct kernfs_node *kn;
551
552 root = kzalloc(sizeof(*root), GFP_KERNEL);
553 if (!root)
554 return ERR_PTR(-ENOMEM);
555
556 ida_init(&root->ino_ida);
557
558 kn = kernfs_new_node(root, "", S_IFDIR | S_IRUGO | S_IXUGO, KERNFS_DIR);
559 if (!kn) {
560 ida_destroy(&root->ino_ida);
561 kfree(root);
562 return ERR_PTR(-ENOMEM);
563 }
564
565 atomic_sub(KN_DEACTIVATED_BIAS, &kn->active);
566 kn->deact_depth--;
567 kn->priv = priv;
568 kn->dir.root = root;
569
570 root->dir_ops = kdops;
571 root->kn = kn;
572 init_waitqueue_head(&root->deactivate_waitq);
573
574 return root;
575 }
576
577 /**
578 * kernfs_destroy_root - destroy a kernfs hierarchy
579 * @root: root of the hierarchy to destroy
580 *
581 * Destroy the hierarchy anchored at @root by removing all existing
582 * directories and destroying @root.
583 */
584 void kernfs_destroy_root(struct kernfs_root *root)
585 {
586 kernfs_remove(root->kn); /* will also free @root */
587 }
588
589 /**
590 * kernfs_create_dir_ns - create a directory
591 * @parent: parent in which to create a new directory
592 * @name: name of the new directory
593 * @mode: mode of the new directory
594 * @priv: opaque data associated with the new directory
595 * @ns: optional namespace tag of the directory
596 *
597 * Returns the created node on success, ERR_PTR() value on failure.
598 */
599 struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent,
600 const char *name, umode_t mode,
601 void *priv, const void *ns)
602 {
603 struct kernfs_node *kn;
604 int rc;
605
606 /* allocate */
607 kn = kernfs_new_node(kernfs_root(parent), name, mode | S_IFDIR,
608 KERNFS_DIR);
609 if (!kn)
610 return ERR_PTR(-ENOMEM);
611
612 kn->dir.root = parent->dir.root;
613 kn->ns = ns;
614 kn->priv = priv;
615
616 /* link in */
617 rc = kernfs_add_one(kn, parent);
618 if (!rc)
619 return kn;
620
621 kernfs_put(kn);
622 return ERR_PTR(rc);
623 }
624
625 static struct dentry *kernfs_iop_lookup(struct inode *dir,
626 struct dentry *dentry,
627 unsigned int flags)
628 {
629 struct dentry *ret;
630 struct kernfs_node *parent = dentry->d_parent->d_fsdata;
631 struct kernfs_node *kn;
632 struct inode *inode;
633 const void *ns = NULL;
634
635 mutex_lock(&kernfs_mutex);
636
637 if (kernfs_ns_enabled(parent))
638 ns = kernfs_info(dir->i_sb)->ns;
639
640 kn = kernfs_find_ns(parent, dentry->d_name.name, ns);
641
642 /* no such entry */
643 if (!kn) {
644 ret = NULL;
645 goto out_unlock;
646 }
647 kernfs_get(kn);
648 dentry->d_fsdata = kn;
649
650 /* attach dentry and inode */
651 inode = kernfs_get_inode(dir->i_sb, kn);
652 if (!inode) {
653 ret = ERR_PTR(-ENOMEM);
654 goto out_unlock;
655 }
656
657 /* instantiate and hash dentry */
658 ret = d_materialise_unique(dentry, inode);
659 out_unlock:
660 mutex_unlock(&kernfs_mutex);
661 return ret;
662 }
663
664 static int kernfs_iop_mkdir(struct inode *dir, struct dentry *dentry,
665 umode_t mode)
666 {
667 struct kernfs_node *parent = dir->i_private;
668 struct kernfs_dir_ops *kdops = kernfs_root(parent)->dir_ops;
669
670 if (!kdops || !kdops->mkdir)
671 return -EPERM;
672
673 return kdops->mkdir(parent, dentry->d_name.name, mode);
674 }
675
676 static int kernfs_iop_rmdir(struct inode *dir, struct dentry *dentry)
677 {
678 struct kernfs_node *kn = dentry->d_fsdata;
679 struct kernfs_dir_ops *kdops = kernfs_root(kn)->dir_ops;
680
681 if (!kdops || !kdops->rmdir)
682 return -EPERM;
683
684 return kdops->rmdir(kn);
685 }
686
687 static int kernfs_iop_rename(struct inode *old_dir, struct dentry *old_dentry,
688 struct inode *new_dir, struct dentry *new_dentry)
689 {
690 struct kernfs_node *kn = old_dentry->d_fsdata;
691 struct kernfs_node *new_parent = new_dir->i_private;
692 struct kernfs_dir_ops *kdops = kernfs_root(kn)->dir_ops;
693
694 if (!kdops || !kdops->rename)
695 return -EPERM;
696
697 return kdops->rename(kn, new_parent, new_dentry->d_name.name);
698 }
699
700 const struct inode_operations kernfs_dir_iops = {
701 .lookup = kernfs_iop_lookup,
702 .permission = kernfs_iop_permission,
703 .setattr = kernfs_iop_setattr,
704 .getattr = kernfs_iop_getattr,
705 .setxattr = kernfs_iop_setxattr,
706 .removexattr = kernfs_iop_removexattr,
707 .getxattr = kernfs_iop_getxattr,
708 .listxattr = kernfs_iop_listxattr,
709
710 .mkdir = kernfs_iop_mkdir,
711 .rmdir = kernfs_iop_rmdir,
712 .rename = kernfs_iop_rename,
713 };
714
715 static struct kernfs_node *kernfs_leftmost_descendant(struct kernfs_node *pos)
716 {
717 struct kernfs_node *last;
718
719 while (true) {
720 struct rb_node *rbn;
721
722 last = pos;
723
724 if (kernfs_type(pos) != KERNFS_DIR)
725 break;
726
727 rbn = rb_first(&pos->dir.children);
728 if (!rbn)
729 break;
730
731 pos = rb_to_kn(rbn);
732 }
733
734 return last;
735 }
736
737 /**
738 * kernfs_next_descendant_post - find the next descendant for post-order walk
739 * @pos: the current position (%NULL to initiate traversal)
740 * @root: kernfs_node whose descendants to walk
741 *
742 * Find the next descendant to visit for post-order traversal of @root's
743 * descendants. @root is included in the iteration and the last node to be
744 * visited.
745 */
746 static struct kernfs_node *kernfs_next_descendant_post(struct kernfs_node *pos,
747 struct kernfs_node *root)
748 {
749 struct rb_node *rbn;
750
751 lockdep_assert_held(&kernfs_mutex);
752
753 /* if first iteration, visit leftmost descendant which may be root */
754 if (!pos)
755 return kernfs_leftmost_descendant(root);
756
757 /* if we visited @root, we're done */
758 if (pos == root)
759 return NULL;
760
761 /* if there's an unvisited sibling, visit its leftmost descendant */
762 rbn = rb_next(&pos->rb);
763 if (rbn)
764 return kernfs_leftmost_descendant(rb_to_kn(rbn));
765
766 /* no sibling left, visit parent */
767 return pos->parent;
768 }
769
770 static void __kernfs_deactivate(struct kernfs_node *kn)
771 {
772 struct kernfs_node *pos;
773
774 lockdep_assert_held(&kernfs_mutex);
775
776 /* prevent any new usage under @kn by deactivating all nodes */
777 pos = NULL;
778 while ((pos = kernfs_next_descendant_post(pos, kn))) {
779 if (!pos->deact_depth++) {
780 WARN_ON_ONCE(atomic_read(&pos->active) < 0);
781 atomic_add(KN_DEACTIVATED_BIAS, &pos->active);
782 pos->flags |= KERNFS_JUST_DEACTIVATED;
783 }
784 }
785
786 /*
787 * Drain the subtree. If kernfs_drain() blocked to drain, which is
788 * indicated by %true return, it temporarily released kernfs_mutex
789 * and the rbtree might have been modified inbetween breaking our
790 * future walk. Restart the walk after each %true return.
791 */
792 pos = NULL;
793 while ((pos = kernfs_next_descendant_post(pos, kn))) {
794 bool drained;
795
796 kernfs_get(pos);
797 drained = kernfs_drain(pos);
798 kernfs_put(pos);
799 if (drained)
800 pos = NULL;
801 }
802 }
803
804 static void __kernfs_reactivate(struct kernfs_node *kn)
805 {
806 struct kernfs_node *pos;
807
808 lockdep_assert_held(&kernfs_mutex);
809
810 pos = NULL;
811 while ((pos = kernfs_next_descendant_post(pos, kn))) {
812 if (!--pos->deact_depth) {
813 WARN_ON_ONCE(atomic_read(&pos->active) >= 0);
814 atomic_sub(KN_DEACTIVATED_BIAS, &pos->active);
815 }
816 WARN_ON_ONCE(pos->deact_depth < 0);
817 }
818
819 /* some nodes reactivated, kick get_active waiters */
820 wake_up_all(&kernfs_root(kn)->deactivate_waitq);
821 }
822
823 static void __kernfs_deactivate_self(struct kernfs_node *kn)
824 {
825 /*
826 * Take out ourself out of the active ref dependency chain and
827 * deactivate. If we're called without an active ref, lockdep will
828 * complain.
829 */
830 kernfs_put_active(kn);
831 __kernfs_deactivate(kn);
832 }
833
834 static void __kernfs_reactivate_self(struct kernfs_node *kn)
835 {
836 __kernfs_reactivate(kn);
837 /*
838 * Restore active ref dropped by deactivate_self() so that it's
839 * balanced on return. put_active() will soon be called on @kn, so
840 * this can't break anything regardless of @kn's state.
841 */
842 atomic_inc(&kn->active);
843 if (kernfs_lockdep(kn))
844 rwsem_acquire(&kn->dep_map, 0, 1, _RET_IP_);
845 }
846
847 /**
848 * kernfs_deactivate - deactivate subtree of a node
849 * @kn: kernfs_node to deactivate subtree of
850 *
851 * Deactivate the subtree of @kn. On return, there's no active operation
852 * going on under @kn and creation or renaming of a node under @kn is
853 * blocked until @kn is reactivated or removed. This function can be
854 * called multiple times and nests properly. Each invocation should be
855 * paired with kernfs_reactivate().
856 *
857 * For a kernfs user which uses simple locking, the subsystem lock would
858 * nest inside active reference. This becomes problematic if the user
859 * tries to remove nodes while holding the subystem lock as it would create
860 * a reverse locking dependency from the subsystem lock to active ref.
861 * This function can be used to break such reverse dependency. The user
862 * can call this function outside the subsystem lock and then proceed to
863 * invoke kernfs_remove() while holding the subsystem lock without
864 * introducing such reverse dependency.
865 */
866 void kernfs_deactivate(struct kernfs_node *kn)
867 {
868 mutex_lock(&kernfs_mutex);
869 __kernfs_deactivate(kn);
870 mutex_unlock(&kernfs_mutex);
871 }
872
873 /**
874 * kernfs_reactivate - reactivate subtree of a node
875 * @kn: kernfs_node to reactivate subtree of
876 *
877 * Undo kernfs_deactivate().
878 */
879 void kernfs_reactivate(struct kernfs_node *kn)
880 {
881 mutex_lock(&kernfs_mutex);
882 __kernfs_reactivate(kn);
883 mutex_unlock(&kernfs_mutex);
884 }
885
886 /**
887 * kernfs_deactivate_self - deactivate subtree of a node from its own method
888 * @kn: the self kernfs_node to deactivate subtree of
889 *
890 * The caller must be running off of a kernfs operation which is invoked
891 * with an active reference - e.g. one of kernfs_ops. Once this function
892 * is called, @kn may be removed by someone else while the enclosing method
893 * is in progress. Other than that, this function is equivalent to
894 * kernfs_deactivate() and should be paired with kernfs_reactivate_self().
895 */
896 void kernfs_deactivate_self(struct kernfs_node *kn)
897 {
898 mutex_lock(&kernfs_mutex);
899 __kernfs_deactivate_self(kn);
900 mutex_unlock(&kernfs_mutex);
901 }
902
903 /**
904 * kernfs_reactivate_self - reactivate subtree of a node from its own method
905 * @kn: the self kernfs_node to reactivate subtree of
906 *
907 * Undo kernfs_deactivate_self().
908 */
909 void kernfs_reactivate_self(struct kernfs_node *kn)
910 {
911 mutex_lock(&kernfs_mutex);
912 __kernfs_reactivate_self(kn);
913 mutex_unlock(&kernfs_mutex);
914 }
915
916 static void __kernfs_remove(struct kernfs_node *kn)
917 {
918 struct kernfs_root *root = kernfs_root(kn);
919 struct kernfs_node *pos;
920
921 lockdep_assert_held(&kernfs_mutex);
922
923 pr_debug("kernfs %s: removing\n", kn->name);
924
925 __kernfs_deactivate(kn);
926
927 /* unlink the subtree node-by-node */
928 do {
929 pos = kernfs_leftmost_descendant(kn);
930
931 /*
932 * We're gonna release kernfs_mutex to unmap bin files,
933 * Make sure @pos doesn't go away inbetween.
934 */
935 kernfs_get(pos);
936
937 /*
938 * This must be come before unlinking; otherwise, when
939 * there are multiple removers, some may finish before
940 * unmapping is complete.
941 */
942 if (pos->flags & KERNFS_HAS_MMAP) {
943 mutex_unlock(&kernfs_mutex);
944 kernfs_unmap_file(pos);
945 mutex_lock(&kernfs_mutex);
946 }
947
948 /*
949 * kernfs_unlink_sibling() succeeds once per node. Use it
950 * to decide who's responsible for cleanups.
951 */
952 if (!pos->parent || kernfs_unlink_sibling(pos)) {
953 struct kernfs_iattrs *ps_iattr =
954 pos->parent ? pos->parent->iattr : NULL;
955
956 /* update timestamps on the parent */
957 if (ps_iattr) {
958 ps_iattr->ia_iattr.ia_ctime = CURRENT_TIME;
959 ps_iattr->ia_iattr.ia_mtime = CURRENT_TIME;
960 }
961
962 kernfs_put(pos);
963 }
964
965 kernfs_put(pos);
966 } while (pos != kn);
967
968 /* some nodes killed, kick get_active waiters */
969 wake_up_all(&root->deactivate_waitq);
970 }
971
972 /**
973 * kernfs_remove - remove a kernfs_node recursively
974 * @kn: the kernfs_node to remove
975 *
976 * Remove @kn along with all its subdirectories and files.
977 */
978 void kernfs_remove(struct kernfs_node *kn)
979 {
980 mutex_lock(&kernfs_mutex);
981 __kernfs_remove(kn);
982 mutex_unlock(&kernfs_mutex);
983 }
984
985 /**
986 * kernfs_remove_self - remove a kernfs_node from its own method
987 * @kn: the self kernfs_node to remove
988 *
989 * The caller must be running off of a kernfs operation which is invoked
990 * with an active reference - e.g. one of kernfs_ops. This can be used to
991 * implement a file operation which deletes itself.
992 *
993 * For example, the "delete" file for a sysfs device directory can be
994 * implemented by invoking kernfs_remove_self() on the "delete" file
995 * itself. This function breaks the circular dependency of trying to
996 * deactivate self while holding an active ref itself. It isn't necessary
997 * to modify the usual removal path to use kernfs_remove_self(). The
998 * "delete" implementation can simply invoke kernfs_remove_self() on self
999 * before proceeding with the usual removal path. kernfs will ignore later
1000 * kernfs_remove() on self.
1001 *
1002 * kernfs_remove_self() can be called multiple times concurrently on the
1003 * same kernfs_node. Only the first one actually performs removal and
1004 * returns %true. All others will wait until the kernfs operation which
1005 * won self-removal finishes and return %false. Note that the losers wait
1006 * for the completion of not only the winning kernfs_remove_self() but also
1007 * the whole kernfs_ops which won the arbitration. This can be used to
1008 * guarantee, for example, all concurrent writes to a "delete" file to
1009 * finish only after the whole operation is complete.
1010 */
1011 bool kernfs_remove_self(struct kernfs_node *kn)
1012 {
1013 bool ret;
1014
1015 mutex_lock(&kernfs_mutex);
1016 __kernfs_deactivate_self(kn);
1017
1018 /*
1019 * SUICIDAL is used to arbitrate among competing invocations. Only
1020 * the first one will actually perform removal. When the removal
1021 * is complete, SUICIDED is set and the active ref is restored
1022 * while holding kernfs_mutex. The ones which lost arbitration
1023 * waits for SUICDED && drained which can happen only after the
1024 * enclosing kernfs operation which executed the winning instance
1025 * of kernfs_remove_self() finished.
1026 */
1027 if (!(kn->flags & KERNFS_SUICIDAL)) {
1028 kn->flags |= KERNFS_SUICIDAL;
1029 __kernfs_remove(kn);
1030 kn->flags |= KERNFS_SUICIDED;
1031 ret = true;
1032 } else {
1033 wait_queue_head_t *waitq = &kernfs_root(kn)->deactivate_waitq;
1034 DEFINE_WAIT(wait);
1035
1036 while (true) {
1037 prepare_to_wait(waitq, &wait, TASK_UNINTERRUPTIBLE);
1038
1039 if ((kn->flags & KERNFS_SUICIDED) &&
1040 atomic_read(&kn->active) == KN_DEACTIVATED_BIAS)
1041 break;
1042
1043 mutex_unlock(&kernfs_mutex);
1044 schedule();
1045 mutex_lock(&kernfs_mutex);
1046 }
1047 finish_wait(waitq, &wait);
1048 WARN_ON_ONCE(!RB_EMPTY_NODE(&kn->rb));
1049 ret = false;
1050 }
1051
1052 __kernfs_reactivate_self(kn);
1053 mutex_unlock(&kernfs_mutex);
1054 return ret;
1055 }
1056
1057 /**
1058 * kernfs_remove_by_name_ns - find a kernfs_node by name and remove it
1059 * @parent: parent of the target
1060 * @name: name of the kernfs_node to remove
1061 * @ns: namespace tag of the kernfs_node to remove
1062 *
1063 * Look for the kernfs_node with @name and @ns under @parent and remove it.
1064 * Returns 0 on success, -ENOENT if such entry doesn't exist.
1065 */
1066 int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name,
1067 const void *ns)
1068 {
1069 struct kernfs_node *kn;
1070
1071 if (!parent) {
1072 WARN(1, KERN_WARNING "kernfs: can not remove '%s', no directory\n",
1073 name);
1074 return -ENOENT;
1075 }
1076
1077 mutex_lock(&kernfs_mutex);
1078
1079 kn = kernfs_find_ns(parent, name, ns);
1080 if (kn)
1081 __kernfs_remove(kn);
1082
1083 mutex_unlock(&kernfs_mutex);
1084
1085 if (kn)
1086 return 0;
1087 else
1088 return -ENOENT;
1089 }
1090
1091 /**
1092 * kernfs_rename_ns - move and rename a kernfs_node
1093 * @kn: target node
1094 * @new_parent: new parent to put @sd under
1095 * @new_name: new name
1096 * @new_ns: new namespace tag
1097 */
1098 int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent,
1099 const char *new_name, const void *new_ns)
1100 {
1101 int error;
1102
1103 error = -ENOENT;
1104 if (!kernfs_get_active(new_parent))
1105 goto out;
1106 if (!kernfs_get_active(kn))
1107 goto out_put_new_parent;
1108
1109 mutex_lock(&kernfs_mutex);
1110
1111 error = 0;
1112 if ((kn->parent == new_parent) && (kn->ns == new_ns) &&
1113 (strcmp(kn->name, new_name) == 0))
1114 goto out_unlock; /* nothing to rename */
1115
1116 error = -EEXIST;
1117 if (kernfs_find_ns(new_parent, new_name, new_ns))
1118 goto out_unlock;
1119
1120 /* rename kernfs_node */
1121 if (strcmp(kn->name, new_name) != 0) {
1122 error = -ENOMEM;
1123 new_name = kstrdup(new_name, GFP_KERNEL);
1124 if (!new_name)
1125 goto out_unlock;
1126
1127 if (kn->flags & KERNFS_STATIC_NAME)
1128 kn->flags &= ~KERNFS_STATIC_NAME;
1129 else
1130 kfree(kn->name);
1131
1132 kn->name = new_name;
1133 }
1134
1135 /*
1136 * Move to the appropriate place in the appropriate directories rbtree.
1137 */
1138 kernfs_unlink_sibling(kn);
1139 kernfs_get(new_parent);
1140 kernfs_put(kn->parent);
1141 kn->ns = new_ns;
1142 kn->hash = kernfs_name_hash(kn->name, kn->ns);
1143 kn->parent = new_parent;
1144 kernfs_link_sibling(kn);
1145
1146 error = 0;
1147 out_unlock:
1148 mutex_unlock(&kernfs_mutex);
1149 kernfs_put_active(kn);
1150 out_put_new_parent:
1151 kernfs_put_active(new_parent);
1152 out:
1153 return error;
1154 }
1155
1156 /* Relationship between s_mode and the DT_xxx types */
1157 static inline unsigned char dt_type(struct kernfs_node *kn)
1158 {
1159 return (kn->mode >> 12) & 15;
1160 }
1161
1162 static int kernfs_dir_fop_release(struct inode *inode, struct file *filp)
1163 {
1164 kernfs_put(filp->private_data);
1165 return 0;
1166 }
1167
1168 static struct kernfs_node *kernfs_dir_pos(const void *ns,
1169 struct kernfs_node *parent, loff_t hash, struct kernfs_node *pos)
1170 {
1171 if (pos) {
1172 int valid = pos->parent == parent && hash == pos->hash;
1173 kernfs_put(pos);
1174 if (!valid)
1175 pos = NULL;
1176 }
1177 if (!pos && (hash > 1) && (hash < INT_MAX)) {
1178 struct rb_node *node = parent->dir.children.rb_node;
1179 while (node) {
1180 pos = rb_to_kn(node);
1181
1182 if (hash < pos->hash)
1183 node = node->rb_left;
1184 else if (hash > pos->hash)
1185 node = node->rb_right;
1186 else
1187 break;
1188 }
1189 }
1190 /* Skip over entries in the wrong namespace */
1191 while (pos && pos->ns != ns) {
1192 struct rb_node *node = rb_next(&pos->rb);
1193 if (!node)
1194 pos = NULL;
1195 else
1196 pos = rb_to_kn(node);
1197 }
1198 return pos;
1199 }
1200
1201 static struct kernfs_node *kernfs_dir_next_pos(const void *ns,
1202 struct kernfs_node *parent, ino_t ino, struct kernfs_node *pos)
1203 {
1204 pos = kernfs_dir_pos(ns, parent, ino, pos);
1205 if (pos)
1206 do {
1207 struct rb_node *node = rb_next(&pos->rb);
1208 if (!node)
1209 pos = NULL;
1210 else
1211 pos = rb_to_kn(node);
1212 } while (pos && pos->ns != ns);
1213 return pos;
1214 }
1215
1216 static int kernfs_fop_readdir(struct file *file, struct dir_context *ctx)
1217 {
1218 struct dentry *dentry = file->f_path.dentry;
1219 struct kernfs_node *parent = dentry->d_fsdata;
1220 struct kernfs_node *pos = file->private_data;
1221 const void *ns = NULL;
1222
1223 if (!dir_emit_dots(file, ctx))
1224 return 0;
1225 mutex_lock(&kernfs_mutex);
1226
1227 if (kernfs_ns_enabled(parent))
1228 ns = kernfs_info(dentry->d_sb)->ns;
1229
1230 for (pos = kernfs_dir_pos(ns, parent, ctx->pos, pos);
1231 pos;
1232 pos = kernfs_dir_next_pos(ns, parent, ctx->pos, pos)) {
1233 const char *name = pos->name;
1234 unsigned int type = dt_type(pos);
1235 int len = strlen(name);
1236 ino_t ino = pos->ino;
1237
1238 ctx->pos = pos->hash;
1239 file->private_data = pos;
1240 kernfs_get(pos);
1241
1242 mutex_unlock(&kernfs_mutex);
1243 if (!dir_emit(ctx, name, len, ino, type))
1244 return 0;
1245 mutex_lock(&kernfs_mutex);
1246 }
1247 mutex_unlock(&kernfs_mutex);
1248 file->private_data = NULL;
1249 ctx->pos = INT_MAX;
1250 return 0;
1251 }
1252
1253 static loff_t kernfs_dir_fop_llseek(struct file *file, loff_t offset,
1254 int whence)
1255 {
1256 struct inode *inode = file_inode(file);
1257 loff_t ret;
1258
1259 mutex_lock(&inode->i_mutex);
1260 ret = generic_file_llseek(file, offset, whence);
1261 mutex_unlock(&inode->i_mutex);
1262
1263 return ret;
1264 }
1265
1266 const struct file_operations kernfs_dir_fops = {
1267 .read = generic_read_dir,
1268 .iterate = kernfs_fop_readdir,
1269 .release = kernfs_dir_fop_release,
1270 .llseek = kernfs_dir_fop_llseek,
1271 };