2 * fs/kernfs/dir.c - kernfs directory implementation
4 * Copyright (c) 2001-3 Patrick Mochel
5 * Copyright (c) 2007 SUSE Linux Products GmbH
6 * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org>
8 * This file is released under the GPLv2.
11 #include <linux/sched.h>
13 #include <linux/namei.h>
14 #include <linux/idr.h>
15 #include <linux/slab.h>
16 #include <linux/security.h>
17 #include <linux/hash.h>
19 #include "kernfs-internal.h"
21 DEFINE_MUTEX(kernfs_mutex
);
23 #define rb_to_kn(X) rb_entry((X), struct kernfs_node, rb)
25 static bool kernfs_lockdep(struct kernfs_node
*kn
)
27 #ifdef CONFIG_DEBUG_LOCK_ALLOC
28 return kn
->flags
& KERNFS_LOCKDEP
;
36 * @name: Null terminated string to hash
37 * @ns: Namespace tag to hash
39 * Returns 31 bit hash of ns + name (so it fits in an off_t )
41 static unsigned int kernfs_name_hash(const char *name
, const void *ns
)
43 unsigned long hash
= init_name_hash();
44 unsigned int len
= strlen(name
);
46 hash
= partial_name_hash(*name
++, hash
);
47 hash
= (end_name_hash(hash
) ^ hash_ptr((void *)ns
, 31));
49 /* Reserve hash numbers 0, 1 and INT_MAX for magic directory entries */
57 static int kernfs_name_compare(unsigned int hash
, const char *name
,
58 const void *ns
, const struct kernfs_node
*kn
)
61 return hash
- kn
->hash
;
64 return strcmp(name
, kn
->name
);
67 static int kernfs_sd_compare(const struct kernfs_node
*left
,
68 const struct kernfs_node
*right
)
70 return kernfs_name_compare(left
->hash
, left
->name
, left
->ns
, right
);
74 * kernfs_link_sibling - link kernfs_node into sibling rbtree
75 * @kn: kernfs_node of interest
77 * Link @kn into its sibling rbtree which starts from
78 * @kn->parent->dir.children.
81 * mutex_lock(kernfs_mutex)
84 * 0 on susccess -EEXIST on failure.
86 static int kernfs_link_sibling(struct kernfs_node
*kn
)
88 struct rb_node
**node
= &kn
->parent
->dir
.children
.rb_node
;
89 struct rb_node
*parent
= NULL
;
91 if (kernfs_type(kn
) == KERNFS_DIR
)
92 kn
->parent
->dir
.subdirs
++;
95 struct kernfs_node
*pos
;
98 pos
= rb_to_kn(*node
);
100 result
= kernfs_sd_compare(kn
, pos
);
102 node
= &pos
->rb
.rb_left
;
104 node
= &pos
->rb
.rb_right
;
108 /* add new node and rebalance the tree */
109 rb_link_node(&kn
->rb
, parent
, node
);
110 rb_insert_color(&kn
->rb
, &kn
->parent
->dir
.children
);
115 * kernfs_unlink_sibling - unlink kernfs_node from sibling rbtree
116 * @kn: kernfs_node of interest
118 * Unlink @kn from its sibling rbtree which starts from
119 * kn->parent->dir.children.
122 * mutex_lock(kernfs_mutex)
124 static bool kernfs_unlink_sibling(struct kernfs_node
*kn
)
126 if (RB_EMPTY_NODE(&kn
->rb
))
129 if (kernfs_type(kn
) == KERNFS_DIR
)
130 kn
->parent
->dir
.subdirs
--;
132 rb_erase(&kn
->rb
, &kn
->parent
->dir
.children
);
133 RB_CLEAR_NODE(&kn
->rb
);
138 * kernfs_get_active - get an active reference to kernfs_node
139 * @kn: kernfs_node to get an active reference to
141 * Get an active reference of @kn. This function is noop if @kn
145 * Pointer to @kn on success, NULL on failure.
147 struct kernfs_node
*kernfs_get_active(struct kernfs_node
*kn
)
152 if (kernfs_lockdep(kn
))
153 rwsem_acquire_read(&kn
->dep_map
, 0, 1, _RET_IP_
);
156 * Try to obtain an active ref. If @kn is deactivated, we block
157 * till either it's reactivated or killed.
160 if (atomic_inc_unless_negative(&kn
->active
))
163 wait_event(kernfs_root(kn
)->deactivate_waitq
,
164 atomic_read(&kn
->active
) >= 0 ||
165 RB_EMPTY_NODE(&kn
->rb
));
166 } while (!RB_EMPTY_NODE(&kn
->rb
));
168 if (kernfs_lockdep(kn
))
169 rwsem_release(&kn
->dep_map
, 1, _RET_IP_
);
174 * kernfs_put_active - put an active reference to kernfs_node
175 * @kn: kernfs_node to put an active reference to
177 * Put an active reference to @kn. This function is noop if @kn
180 void kernfs_put_active(struct kernfs_node
*kn
)
182 struct kernfs_root
*root
= kernfs_root(kn
);
188 if (kernfs_lockdep(kn
))
189 rwsem_release(&kn
->dep_map
, 1, _RET_IP_
);
190 v
= atomic_dec_return(&kn
->active
);
191 if (likely(v
!= KN_DEACTIVATED_BIAS
))
194 wake_up_all(&root
->deactivate_waitq
);
198 * kernfs_drain - drain kernfs_node
199 * @kn: kernfs_node to drain
201 * Drain existing usages of @kn. Mutiple removers may invoke this function
202 * concurrently on @kn and all will return after draining is complete.
203 * Returns %true if drain is performed and kernfs_mutex was temporarily
204 * released. %false if @kn was already drained and no operation was
207 * The caller is responsible for ensuring @kn stays pinned while this
208 * function is in progress even if it gets removed by someone else.
210 static bool kernfs_drain(struct kernfs_node
*kn
)
211 __releases(&kernfs_mutex
) __acquires(&kernfs_mutex
)
213 struct kernfs_root
*root
= kernfs_root(kn
);
215 lockdep_assert_held(&kernfs_mutex
);
216 WARN_ON_ONCE(atomic_read(&kn
->active
) >= 0);
219 * We want to go through the active ref lockdep annotation at least
220 * once for all node removals, but the lockdep annotation can't be
221 * nested inside kernfs_mutex and deactivation can't make forward
222 * progress if we keep dropping the mutex. Use JUST_ACTIVATED to
223 * force the slow path once for each deactivation if lockdep is
226 if ((!kernfs_lockdep(kn
) || !(kn
->flags
& KERNFS_JUST_DEACTIVATED
)) &&
227 atomic_read(&kn
->active
) == KN_DEACTIVATED_BIAS
)
230 kn
->flags
&= ~KERNFS_JUST_DEACTIVATED
;
231 mutex_unlock(&kernfs_mutex
);
233 if (kernfs_lockdep(kn
)) {
234 rwsem_acquire(&kn
->dep_map
, 0, 0, _RET_IP_
);
235 if (atomic_read(&kn
->active
) != KN_DEACTIVATED_BIAS
)
236 lock_contended(&kn
->dep_map
, _RET_IP_
);
239 wait_event(root
->deactivate_waitq
,
240 atomic_read(&kn
->active
) == KN_DEACTIVATED_BIAS
);
242 if (kernfs_lockdep(kn
)) {
243 lock_acquired(&kn
->dep_map
, _RET_IP_
);
244 rwsem_release(&kn
->dep_map
, 1, _RET_IP_
);
247 mutex_lock(&kernfs_mutex
);
252 * kernfs_get - get a reference count on a kernfs_node
253 * @kn: the target kernfs_node
255 void kernfs_get(struct kernfs_node
*kn
)
258 WARN_ON(!atomic_read(&kn
->count
));
259 atomic_inc(&kn
->count
);
262 EXPORT_SYMBOL_GPL(kernfs_get
);
265 * kernfs_put - put a reference count on a kernfs_node
266 * @kn: the target kernfs_node
268 * Put a reference count of @kn and destroy it if it reached zero.
270 void kernfs_put(struct kernfs_node
*kn
)
272 struct kernfs_node
*parent
;
273 struct kernfs_root
*root
;
275 if (!kn
|| !atomic_dec_and_test(&kn
->count
))
277 root
= kernfs_root(kn
);
280 * Moving/renaming is always done while holding reference.
281 * kn->parent won't change beneath us.
285 WARN_ONCE(atomic_read(&kn
->active
) != KN_DEACTIVATED_BIAS
,
286 "kernfs_put: %s/%s: released with incorrect active_ref %d\n",
287 parent
? parent
->name
: "", kn
->name
, atomic_read(&kn
->active
));
289 if (kernfs_type(kn
) == KERNFS_LINK
)
290 kernfs_put(kn
->symlink
.target_kn
);
291 if (!(kn
->flags
& KERNFS_STATIC_NAME
))
294 if (kn
->iattr
->ia_secdata
)
295 security_release_secctx(kn
->iattr
->ia_secdata
,
296 kn
->iattr
->ia_secdata_len
);
297 simple_xattrs_free(&kn
->iattr
->xattrs
);
300 ida_simple_remove(&root
->ino_ida
, kn
->ino
);
301 kmem_cache_free(kernfs_node_cache
, kn
);
305 if (atomic_dec_and_test(&kn
->count
))
308 /* just released the root kn, free @root too */
309 ida_destroy(&root
->ino_ida
);
313 EXPORT_SYMBOL_GPL(kernfs_put
);
315 static int kernfs_dop_revalidate(struct dentry
*dentry
, unsigned int flags
)
317 struct kernfs_node
*kn
;
319 if (flags
& LOOKUP_RCU
)
322 /* Always perform fresh lookup for negatives */
323 if (!dentry
->d_inode
)
324 goto out_bad_unlocked
;
326 kn
= dentry
->d_fsdata
;
327 mutex_lock(&kernfs_mutex
);
329 /* Force fresh lookup if removed */
330 if (kn
->parent
&& RB_EMPTY_NODE(&kn
->rb
))
333 /* The kernfs node has been moved? */
334 if (dentry
->d_parent
->d_fsdata
!= kn
->parent
)
337 /* The kernfs node has been renamed */
338 if (strcmp(dentry
->d_name
.name
, kn
->name
) != 0)
341 /* The kernfs node has been moved to a different namespace */
342 if (kn
->parent
&& kernfs_ns_enabled(kn
->parent
) &&
343 kernfs_info(dentry
->d_sb
)->ns
!= kn
->ns
)
346 mutex_unlock(&kernfs_mutex
);
350 mutex_unlock(&kernfs_mutex
);
353 * @dentry doesn't match the underlying kernfs node, drop the
354 * dentry and force lookup. If we have submounts we must allow the
355 * vfs caches to lie about the state of the filesystem to prevent
356 * leaks and other nasty things, so use check_submounts_and_drop()
357 * instead of d_drop().
359 if (check_submounts_and_drop(dentry
) != 0)
365 static void kernfs_dop_release(struct dentry
*dentry
)
367 kernfs_put(dentry
->d_fsdata
);
370 const struct dentry_operations kernfs_dops
= {
371 .d_revalidate
= kernfs_dop_revalidate
,
372 .d_release
= kernfs_dop_release
,
375 struct kernfs_node
*kernfs_new_node(struct kernfs_root
*root
, const char *name
,
376 umode_t mode
, unsigned flags
)
378 char *dup_name
= NULL
;
379 struct kernfs_node
*kn
;
382 if (!(flags
& KERNFS_STATIC_NAME
)) {
383 name
= dup_name
= kstrdup(name
, GFP_KERNEL
);
388 kn
= kmem_cache_zalloc(kernfs_node_cache
, GFP_KERNEL
);
392 ret
= ida_simple_get(&root
->ino_ida
, 1, 0, GFP_KERNEL
);
397 atomic_set(&kn
->count
, 1);
398 atomic_set(&kn
->active
, KN_DEACTIVATED_BIAS
);
400 RB_CLEAR_NODE(&kn
->rb
);
409 kmem_cache_free(kernfs_node_cache
, kn
);
416 * kernfs_add_one - add kernfs_node to parent without warning
417 * @kn: kernfs_node to be added
418 * @parent: the parent kernfs_node to add @kn to
420 * Get @parent and set @kn->parent to it and increment nlink of the
421 * parent inode if @kn is a directory and link into the children list
425 * 0 on success, -EEXIST if entry with the given name already
428 int kernfs_add_one(struct kernfs_node
*kn
, struct kernfs_node
*parent
)
430 struct kernfs_iattrs
*ps_iattr
;
434 if (!kernfs_get_active(parent
))
437 mutex_lock(&kernfs_mutex
);
440 has_ns
= kernfs_ns_enabled(parent
);
441 if (WARN(has_ns
!= (bool)kn
->ns
, KERN_WARNING
"kernfs: ns %s in '%s' for '%s'\n",
442 has_ns
? "required" : "invalid", parent
->name
, kn
->name
))
445 if (kernfs_type(parent
) != KERNFS_DIR
)
448 kn
->hash
= kernfs_name_hash(kn
->name
, kn
->ns
);
452 ret
= kernfs_link_sibling(kn
);
456 /* Update timestamps on the parent */
457 ps_iattr
= parent
->iattr
;
459 struct iattr
*ps_iattrs
= &ps_iattr
->ia_iattr
;
460 ps_iattrs
->ia_ctime
= ps_iattrs
->ia_mtime
= CURRENT_TIME
;
463 /* Mark the entry added into directory tree */
464 atomic_sub(KN_DEACTIVATED_BIAS
, &kn
->active
);
468 mutex_unlock(&kernfs_mutex
);
469 kernfs_put_active(parent
);
474 * kernfs_find_ns - find kernfs_node with the given name
475 * @parent: kernfs_node to search under
476 * @name: name to look for
477 * @ns: the namespace tag to use
479 * Look for kernfs_node with name @name under @parent. Returns pointer to
480 * the found kernfs_node on success, %NULL on failure.
482 static struct kernfs_node
*kernfs_find_ns(struct kernfs_node
*parent
,
483 const unsigned char *name
,
486 struct rb_node
*node
= parent
->dir
.children
.rb_node
;
487 bool has_ns
= kernfs_ns_enabled(parent
);
490 lockdep_assert_held(&kernfs_mutex
);
492 if (has_ns
!= (bool)ns
) {
493 WARN(1, KERN_WARNING
"kernfs: ns %s in '%s' for '%s'\n",
494 has_ns
? "required" : "invalid", parent
->name
, name
);
498 hash
= kernfs_name_hash(name
, ns
);
500 struct kernfs_node
*kn
;
504 result
= kernfs_name_compare(hash
, name
, ns
, kn
);
506 node
= node
->rb_left
;
508 node
= node
->rb_right
;
516 * kernfs_find_and_get_ns - find and get kernfs_node with the given name
517 * @parent: kernfs_node to search under
518 * @name: name to look for
519 * @ns: the namespace tag to use
521 * Look for kernfs_node with name @name under @parent and get a reference
522 * if found. This function may sleep and returns pointer to the found
523 * kernfs_node on success, %NULL on failure.
525 struct kernfs_node
*kernfs_find_and_get_ns(struct kernfs_node
*parent
,
526 const char *name
, const void *ns
)
528 struct kernfs_node
*kn
;
530 mutex_lock(&kernfs_mutex
);
531 kn
= kernfs_find_ns(parent
, name
, ns
);
533 mutex_unlock(&kernfs_mutex
);
537 EXPORT_SYMBOL_GPL(kernfs_find_and_get_ns
);
540 * kernfs_create_root - create a new kernfs hierarchy
541 * @kdops: optional directory syscall operations for the hierarchy
542 * @priv: opaque data associated with the new directory
544 * Returns the root of the new hierarchy on success, ERR_PTR() value on
547 struct kernfs_root
*kernfs_create_root(struct kernfs_dir_ops
*kdops
, void *priv
)
549 struct kernfs_root
*root
;
550 struct kernfs_node
*kn
;
552 root
= kzalloc(sizeof(*root
), GFP_KERNEL
);
554 return ERR_PTR(-ENOMEM
);
556 ida_init(&root
->ino_ida
);
558 kn
= kernfs_new_node(root
, "", S_IFDIR
| S_IRUGO
| S_IXUGO
, KERNFS_DIR
);
560 ida_destroy(&root
->ino_ida
);
562 return ERR_PTR(-ENOMEM
);
565 atomic_sub(KN_DEACTIVATED_BIAS
, &kn
->active
);
570 root
->dir_ops
= kdops
;
572 init_waitqueue_head(&root
->deactivate_waitq
);
578 * kernfs_destroy_root - destroy a kernfs hierarchy
579 * @root: root of the hierarchy to destroy
581 * Destroy the hierarchy anchored at @root by removing all existing
582 * directories and destroying @root.
584 void kernfs_destroy_root(struct kernfs_root
*root
)
586 kernfs_remove(root
->kn
); /* will also free @root */
590 * kernfs_create_dir_ns - create a directory
591 * @parent: parent in which to create a new directory
592 * @name: name of the new directory
593 * @mode: mode of the new directory
594 * @priv: opaque data associated with the new directory
595 * @ns: optional namespace tag of the directory
597 * Returns the created node on success, ERR_PTR() value on failure.
599 struct kernfs_node
*kernfs_create_dir_ns(struct kernfs_node
*parent
,
600 const char *name
, umode_t mode
,
601 void *priv
, const void *ns
)
603 struct kernfs_node
*kn
;
607 kn
= kernfs_new_node(kernfs_root(parent
), name
, mode
| S_IFDIR
,
610 return ERR_PTR(-ENOMEM
);
612 kn
->dir
.root
= parent
->dir
.root
;
617 rc
= kernfs_add_one(kn
, parent
);
625 static struct dentry
*kernfs_iop_lookup(struct inode
*dir
,
626 struct dentry
*dentry
,
630 struct kernfs_node
*parent
= dentry
->d_parent
->d_fsdata
;
631 struct kernfs_node
*kn
;
633 const void *ns
= NULL
;
635 mutex_lock(&kernfs_mutex
);
637 if (kernfs_ns_enabled(parent
))
638 ns
= kernfs_info(dir
->i_sb
)->ns
;
640 kn
= kernfs_find_ns(parent
, dentry
->d_name
.name
, ns
);
648 dentry
->d_fsdata
= kn
;
650 /* attach dentry and inode */
651 inode
= kernfs_get_inode(dir
->i_sb
, kn
);
653 ret
= ERR_PTR(-ENOMEM
);
657 /* instantiate and hash dentry */
658 ret
= d_materialise_unique(dentry
, inode
);
660 mutex_unlock(&kernfs_mutex
);
664 static int kernfs_iop_mkdir(struct inode
*dir
, struct dentry
*dentry
,
667 struct kernfs_node
*parent
= dir
->i_private
;
668 struct kernfs_dir_ops
*kdops
= kernfs_root(parent
)->dir_ops
;
670 if (!kdops
|| !kdops
->mkdir
)
673 return kdops
->mkdir(parent
, dentry
->d_name
.name
, mode
);
676 static int kernfs_iop_rmdir(struct inode
*dir
, struct dentry
*dentry
)
678 struct kernfs_node
*kn
= dentry
->d_fsdata
;
679 struct kernfs_dir_ops
*kdops
= kernfs_root(kn
)->dir_ops
;
681 if (!kdops
|| !kdops
->rmdir
)
684 return kdops
->rmdir(kn
);
687 static int kernfs_iop_rename(struct inode
*old_dir
, struct dentry
*old_dentry
,
688 struct inode
*new_dir
, struct dentry
*new_dentry
)
690 struct kernfs_node
*kn
= old_dentry
->d_fsdata
;
691 struct kernfs_node
*new_parent
= new_dir
->i_private
;
692 struct kernfs_dir_ops
*kdops
= kernfs_root(kn
)->dir_ops
;
694 if (!kdops
|| !kdops
->rename
)
697 return kdops
->rename(kn
, new_parent
, new_dentry
->d_name
.name
);
700 const struct inode_operations kernfs_dir_iops
= {
701 .lookup
= kernfs_iop_lookup
,
702 .permission
= kernfs_iop_permission
,
703 .setattr
= kernfs_iop_setattr
,
704 .getattr
= kernfs_iop_getattr
,
705 .setxattr
= kernfs_iop_setxattr
,
706 .removexattr
= kernfs_iop_removexattr
,
707 .getxattr
= kernfs_iop_getxattr
,
708 .listxattr
= kernfs_iop_listxattr
,
710 .mkdir
= kernfs_iop_mkdir
,
711 .rmdir
= kernfs_iop_rmdir
,
712 .rename
= kernfs_iop_rename
,
715 static struct kernfs_node
*kernfs_leftmost_descendant(struct kernfs_node
*pos
)
717 struct kernfs_node
*last
;
724 if (kernfs_type(pos
) != KERNFS_DIR
)
727 rbn
= rb_first(&pos
->dir
.children
);
738 * kernfs_next_descendant_post - find the next descendant for post-order walk
739 * @pos: the current position (%NULL to initiate traversal)
740 * @root: kernfs_node whose descendants to walk
742 * Find the next descendant to visit for post-order traversal of @root's
743 * descendants. @root is included in the iteration and the last node to be
746 static struct kernfs_node
*kernfs_next_descendant_post(struct kernfs_node
*pos
,
747 struct kernfs_node
*root
)
751 lockdep_assert_held(&kernfs_mutex
);
753 /* if first iteration, visit leftmost descendant which may be root */
755 return kernfs_leftmost_descendant(root
);
757 /* if we visited @root, we're done */
761 /* if there's an unvisited sibling, visit its leftmost descendant */
762 rbn
= rb_next(&pos
->rb
);
764 return kernfs_leftmost_descendant(rb_to_kn(rbn
));
766 /* no sibling left, visit parent */
770 static void __kernfs_deactivate(struct kernfs_node
*kn
)
772 struct kernfs_node
*pos
;
774 lockdep_assert_held(&kernfs_mutex
);
776 /* prevent any new usage under @kn by deactivating all nodes */
778 while ((pos
= kernfs_next_descendant_post(pos
, kn
))) {
779 if (!pos
->deact_depth
++) {
780 WARN_ON_ONCE(atomic_read(&pos
->active
) < 0);
781 atomic_add(KN_DEACTIVATED_BIAS
, &pos
->active
);
782 pos
->flags
|= KERNFS_JUST_DEACTIVATED
;
787 * Drain the subtree. If kernfs_drain() blocked to drain, which is
788 * indicated by %true return, it temporarily released kernfs_mutex
789 * and the rbtree might have been modified inbetween breaking our
790 * future walk. Restart the walk after each %true return.
793 while ((pos
= kernfs_next_descendant_post(pos
, kn
))) {
797 drained
= kernfs_drain(pos
);
804 static void __kernfs_reactivate(struct kernfs_node
*kn
)
806 struct kernfs_node
*pos
;
808 lockdep_assert_held(&kernfs_mutex
);
811 while ((pos
= kernfs_next_descendant_post(pos
, kn
))) {
812 if (!--pos
->deact_depth
) {
813 WARN_ON_ONCE(atomic_read(&pos
->active
) >= 0);
814 atomic_sub(KN_DEACTIVATED_BIAS
, &pos
->active
);
816 WARN_ON_ONCE(pos
->deact_depth
< 0);
819 /* some nodes reactivated, kick get_active waiters */
820 wake_up_all(&kernfs_root(kn
)->deactivate_waitq
);
823 static void __kernfs_deactivate_self(struct kernfs_node
*kn
)
826 * Take out ourself out of the active ref dependency chain and
827 * deactivate. If we're called without an active ref, lockdep will
830 kernfs_put_active(kn
);
831 __kernfs_deactivate(kn
);
834 static void __kernfs_reactivate_self(struct kernfs_node
*kn
)
836 __kernfs_reactivate(kn
);
838 * Restore active ref dropped by deactivate_self() so that it's
839 * balanced on return. put_active() will soon be called on @kn, so
840 * this can't break anything regardless of @kn's state.
842 atomic_inc(&kn
->active
);
843 if (kernfs_lockdep(kn
))
844 rwsem_acquire(&kn
->dep_map
, 0, 1, _RET_IP_
);
848 * kernfs_deactivate - deactivate subtree of a node
849 * @kn: kernfs_node to deactivate subtree of
851 * Deactivate the subtree of @kn. On return, there's no active operation
852 * going on under @kn and creation or renaming of a node under @kn is
853 * blocked until @kn is reactivated or removed. This function can be
854 * called multiple times and nests properly. Each invocation should be
855 * paired with kernfs_reactivate().
857 * For a kernfs user which uses simple locking, the subsystem lock would
858 * nest inside active reference. This becomes problematic if the user
859 * tries to remove nodes while holding the subystem lock as it would create
860 * a reverse locking dependency from the subsystem lock to active ref.
861 * This function can be used to break such reverse dependency. The user
862 * can call this function outside the subsystem lock and then proceed to
863 * invoke kernfs_remove() while holding the subsystem lock without
864 * introducing such reverse dependency.
866 void kernfs_deactivate(struct kernfs_node
*kn
)
868 mutex_lock(&kernfs_mutex
);
869 __kernfs_deactivate(kn
);
870 mutex_unlock(&kernfs_mutex
);
874 * kernfs_reactivate - reactivate subtree of a node
875 * @kn: kernfs_node to reactivate subtree of
877 * Undo kernfs_deactivate().
879 void kernfs_reactivate(struct kernfs_node
*kn
)
881 mutex_lock(&kernfs_mutex
);
882 __kernfs_reactivate(kn
);
883 mutex_unlock(&kernfs_mutex
);
887 * kernfs_deactivate_self - deactivate subtree of a node from its own method
888 * @kn: the self kernfs_node to deactivate subtree of
890 * The caller must be running off of a kernfs operation which is invoked
891 * with an active reference - e.g. one of kernfs_ops. Once this function
892 * is called, @kn may be removed by someone else while the enclosing method
893 * is in progress. Other than that, this function is equivalent to
894 * kernfs_deactivate() and should be paired with kernfs_reactivate_self().
896 void kernfs_deactivate_self(struct kernfs_node
*kn
)
898 mutex_lock(&kernfs_mutex
);
899 __kernfs_deactivate_self(kn
);
900 mutex_unlock(&kernfs_mutex
);
904 * kernfs_reactivate_self - reactivate subtree of a node from its own method
905 * @kn: the self kernfs_node to reactivate subtree of
907 * Undo kernfs_deactivate_self().
909 void kernfs_reactivate_self(struct kernfs_node
*kn
)
911 mutex_lock(&kernfs_mutex
);
912 __kernfs_reactivate_self(kn
);
913 mutex_unlock(&kernfs_mutex
);
916 static void __kernfs_remove(struct kernfs_node
*kn
)
918 struct kernfs_root
*root
= kernfs_root(kn
);
919 struct kernfs_node
*pos
;
921 lockdep_assert_held(&kernfs_mutex
);
923 pr_debug("kernfs %s: removing\n", kn
->name
);
925 __kernfs_deactivate(kn
);
927 /* unlink the subtree node-by-node */
929 pos
= kernfs_leftmost_descendant(kn
);
932 * We're gonna release kernfs_mutex to unmap bin files,
933 * Make sure @pos doesn't go away inbetween.
938 * This must be come before unlinking; otherwise, when
939 * there are multiple removers, some may finish before
940 * unmapping is complete.
942 if (pos
->flags
& KERNFS_HAS_MMAP
) {
943 mutex_unlock(&kernfs_mutex
);
944 kernfs_unmap_file(pos
);
945 mutex_lock(&kernfs_mutex
);
949 * kernfs_unlink_sibling() succeeds once per node. Use it
950 * to decide who's responsible for cleanups.
952 if (!pos
->parent
|| kernfs_unlink_sibling(pos
)) {
953 struct kernfs_iattrs
*ps_iattr
=
954 pos
->parent
? pos
->parent
->iattr
: NULL
;
956 /* update timestamps on the parent */
958 ps_iattr
->ia_iattr
.ia_ctime
= CURRENT_TIME
;
959 ps_iattr
->ia_iattr
.ia_mtime
= CURRENT_TIME
;
968 /* some nodes killed, kick get_active waiters */
969 wake_up_all(&root
->deactivate_waitq
);
973 * kernfs_remove - remove a kernfs_node recursively
974 * @kn: the kernfs_node to remove
976 * Remove @kn along with all its subdirectories and files.
978 void kernfs_remove(struct kernfs_node
*kn
)
980 mutex_lock(&kernfs_mutex
);
982 mutex_unlock(&kernfs_mutex
);
986 * kernfs_remove_self - remove a kernfs_node from its own method
987 * @kn: the self kernfs_node to remove
989 * The caller must be running off of a kernfs operation which is invoked
990 * with an active reference - e.g. one of kernfs_ops. This can be used to
991 * implement a file operation which deletes itself.
993 * For example, the "delete" file for a sysfs device directory can be
994 * implemented by invoking kernfs_remove_self() on the "delete" file
995 * itself. This function breaks the circular dependency of trying to
996 * deactivate self while holding an active ref itself. It isn't necessary
997 * to modify the usual removal path to use kernfs_remove_self(). The
998 * "delete" implementation can simply invoke kernfs_remove_self() on self
999 * before proceeding with the usual removal path. kernfs will ignore later
1000 * kernfs_remove() on self.
1002 * kernfs_remove_self() can be called multiple times concurrently on the
1003 * same kernfs_node. Only the first one actually performs removal and
1004 * returns %true. All others will wait until the kernfs operation which
1005 * won self-removal finishes and return %false. Note that the losers wait
1006 * for the completion of not only the winning kernfs_remove_self() but also
1007 * the whole kernfs_ops which won the arbitration. This can be used to
1008 * guarantee, for example, all concurrent writes to a "delete" file to
1009 * finish only after the whole operation is complete.
1011 bool kernfs_remove_self(struct kernfs_node
*kn
)
1015 mutex_lock(&kernfs_mutex
);
1016 __kernfs_deactivate_self(kn
);
1019 * SUICIDAL is used to arbitrate among competing invocations. Only
1020 * the first one will actually perform removal. When the removal
1021 * is complete, SUICIDED is set and the active ref is restored
1022 * while holding kernfs_mutex. The ones which lost arbitration
1023 * waits for SUICDED && drained which can happen only after the
1024 * enclosing kernfs operation which executed the winning instance
1025 * of kernfs_remove_self() finished.
1027 if (!(kn
->flags
& KERNFS_SUICIDAL
)) {
1028 kn
->flags
|= KERNFS_SUICIDAL
;
1029 __kernfs_remove(kn
);
1030 kn
->flags
|= KERNFS_SUICIDED
;
1033 wait_queue_head_t
*waitq
= &kernfs_root(kn
)->deactivate_waitq
;
1037 prepare_to_wait(waitq
, &wait
, TASK_UNINTERRUPTIBLE
);
1039 if ((kn
->flags
& KERNFS_SUICIDED
) &&
1040 atomic_read(&kn
->active
) == KN_DEACTIVATED_BIAS
)
1043 mutex_unlock(&kernfs_mutex
);
1045 mutex_lock(&kernfs_mutex
);
1047 finish_wait(waitq
, &wait
);
1048 WARN_ON_ONCE(!RB_EMPTY_NODE(&kn
->rb
));
1052 __kernfs_reactivate_self(kn
);
1053 mutex_unlock(&kernfs_mutex
);
1058 * kernfs_remove_by_name_ns - find a kernfs_node by name and remove it
1059 * @parent: parent of the target
1060 * @name: name of the kernfs_node to remove
1061 * @ns: namespace tag of the kernfs_node to remove
1063 * Look for the kernfs_node with @name and @ns under @parent and remove it.
1064 * Returns 0 on success, -ENOENT if such entry doesn't exist.
1066 int kernfs_remove_by_name_ns(struct kernfs_node
*parent
, const char *name
,
1069 struct kernfs_node
*kn
;
1072 WARN(1, KERN_WARNING
"kernfs: can not remove '%s', no directory\n",
1077 mutex_lock(&kernfs_mutex
);
1079 kn
= kernfs_find_ns(parent
, name
, ns
);
1081 __kernfs_remove(kn
);
1083 mutex_unlock(&kernfs_mutex
);
1092 * kernfs_rename_ns - move and rename a kernfs_node
1094 * @new_parent: new parent to put @sd under
1095 * @new_name: new name
1096 * @new_ns: new namespace tag
1098 int kernfs_rename_ns(struct kernfs_node
*kn
, struct kernfs_node
*new_parent
,
1099 const char *new_name
, const void *new_ns
)
1104 if (!kernfs_get_active(new_parent
))
1106 if (!kernfs_get_active(kn
))
1107 goto out_put_new_parent
;
1109 mutex_lock(&kernfs_mutex
);
1112 if ((kn
->parent
== new_parent
) && (kn
->ns
== new_ns
) &&
1113 (strcmp(kn
->name
, new_name
) == 0))
1114 goto out_unlock
; /* nothing to rename */
1117 if (kernfs_find_ns(new_parent
, new_name
, new_ns
))
1120 /* rename kernfs_node */
1121 if (strcmp(kn
->name
, new_name
) != 0) {
1123 new_name
= kstrdup(new_name
, GFP_KERNEL
);
1127 if (kn
->flags
& KERNFS_STATIC_NAME
)
1128 kn
->flags
&= ~KERNFS_STATIC_NAME
;
1132 kn
->name
= new_name
;
1136 * Move to the appropriate place in the appropriate directories rbtree.
1138 kernfs_unlink_sibling(kn
);
1139 kernfs_get(new_parent
);
1140 kernfs_put(kn
->parent
);
1142 kn
->hash
= kernfs_name_hash(kn
->name
, kn
->ns
);
1143 kn
->parent
= new_parent
;
1144 kernfs_link_sibling(kn
);
1148 mutex_unlock(&kernfs_mutex
);
1149 kernfs_put_active(kn
);
1151 kernfs_put_active(new_parent
);
1156 /* Relationship between s_mode and the DT_xxx types */
1157 static inline unsigned char dt_type(struct kernfs_node
*kn
)
1159 return (kn
->mode
>> 12) & 15;
1162 static int kernfs_dir_fop_release(struct inode
*inode
, struct file
*filp
)
1164 kernfs_put(filp
->private_data
);
1168 static struct kernfs_node
*kernfs_dir_pos(const void *ns
,
1169 struct kernfs_node
*parent
, loff_t hash
, struct kernfs_node
*pos
)
1172 int valid
= pos
->parent
== parent
&& hash
== pos
->hash
;
1177 if (!pos
&& (hash
> 1) && (hash
< INT_MAX
)) {
1178 struct rb_node
*node
= parent
->dir
.children
.rb_node
;
1180 pos
= rb_to_kn(node
);
1182 if (hash
< pos
->hash
)
1183 node
= node
->rb_left
;
1184 else if (hash
> pos
->hash
)
1185 node
= node
->rb_right
;
1190 /* Skip over entries in the wrong namespace */
1191 while (pos
&& pos
->ns
!= ns
) {
1192 struct rb_node
*node
= rb_next(&pos
->rb
);
1196 pos
= rb_to_kn(node
);
1201 static struct kernfs_node
*kernfs_dir_next_pos(const void *ns
,
1202 struct kernfs_node
*parent
, ino_t ino
, struct kernfs_node
*pos
)
1204 pos
= kernfs_dir_pos(ns
, parent
, ino
, pos
);
1207 struct rb_node
*node
= rb_next(&pos
->rb
);
1211 pos
= rb_to_kn(node
);
1212 } while (pos
&& pos
->ns
!= ns
);
1216 static int kernfs_fop_readdir(struct file
*file
, struct dir_context
*ctx
)
1218 struct dentry
*dentry
= file
->f_path
.dentry
;
1219 struct kernfs_node
*parent
= dentry
->d_fsdata
;
1220 struct kernfs_node
*pos
= file
->private_data
;
1221 const void *ns
= NULL
;
1223 if (!dir_emit_dots(file
, ctx
))
1225 mutex_lock(&kernfs_mutex
);
1227 if (kernfs_ns_enabled(parent
))
1228 ns
= kernfs_info(dentry
->d_sb
)->ns
;
1230 for (pos
= kernfs_dir_pos(ns
, parent
, ctx
->pos
, pos
);
1232 pos
= kernfs_dir_next_pos(ns
, parent
, ctx
->pos
, pos
)) {
1233 const char *name
= pos
->name
;
1234 unsigned int type
= dt_type(pos
);
1235 int len
= strlen(name
);
1236 ino_t ino
= pos
->ino
;
1238 ctx
->pos
= pos
->hash
;
1239 file
->private_data
= pos
;
1242 mutex_unlock(&kernfs_mutex
);
1243 if (!dir_emit(ctx
, name
, len
, ino
, type
))
1245 mutex_lock(&kernfs_mutex
);
1247 mutex_unlock(&kernfs_mutex
);
1248 file
->private_data
= NULL
;
1253 static loff_t
kernfs_dir_fop_llseek(struct file
*file
, loff_t offset
,
1256 struct inode
*inode
= file_inode(file
);
1259 mutex_lock(&inode
->i_mutex
);
1260 ret
= generic_file_llseek(file
, offset
, whence
);
1261 mutex_unlock(&inode
->i_mutex
);
1266 const struct file_operations kernfs_dir_fops
= {
1267 .read
= generic_read_dir
,
1268 .iterate
= kernfs_fop_readdir
,
1269 .release
= kernfs_dir_fop_release
,
1270 .llseek
= kernfs_dir_fop_llseek
,