fs/kernfs/dir.c

   1 /*
   2  * fs/kernfs/dir.c - kernfs directory implementation
   3  *
   4  * Copyright (c) 2001-3 Patrick Mochel
   5  * Copyright (c) 2007 SUSE Linux Products GmbH
   6  * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org>
   7  *
   8  * This file is released under the GPLv2.
   9  */
  10
  11 #include <linux/sched.h>
  12 #include <linux/fs.h>
  13 #include <linux/namei.h>
  14 #include <linux/idr.h>
  15 #include <linux/slab.h>
  16 #include <linux/security.h>
  17 #include <linux/hash.h>
  18
  19 #include "kernfs-internal.h"
  20
  21 DEFINE_MUTEX(kernfs_mutex);
  22
  23 #define rb_to_kn(X) rb_entry((X), struct kernfs_node, rb)
  24
  25 static bool kernfs_lockdep(struct kernfs_node *kn)
  26 {
  27 #ifdef CONFIG_DEBUG_LOCK_ALLOC
  28         return kn->flags & KERNFS_LOCKDEP;
  29 #else
  30         return false;
  31 #endif
  32 }
  33
  34 /**
  35  *      kernfs_name_hash
  36  *      @name: Null terminated string to hash
  37  *      @ns:   Namespace tag to hash
  38  *
  39  *      Returns 31 bit hash of ns + name (so it fits in an off_t )
  40  */
  41 static unsigned int kernfs_name_hash(const char *name, const void *ns)
  42 {
  43         unsigned long hash = init_name_hash();
  44         unsigned int len = strlen(name);
  45         while (len--)
  46                 hash = partial_name_hash(*name++, hash);
  47         hash = (end_name_hash(hash) ^ hash_ptr((void *)ns, 31));
  48         hash &= 0x7fffffffU;
  49         /* Reserve hash numbers 0, 1 and INT_MAX for magic directory entries */
  50         if (hash < 1)
  51                 hash += 2;
  52         if (hash >= INT_MAX)
  53                 hash = INT_MAX - 1;
  54         return hash;
  55 }
  56
  57 static int kernfs_name_compare(unsigned int hash, const char *name,
  58                                const void *ns, const struct kernfs_node *kn)
  59 {
  60         if (hash != kn->hash)
  61                 return hash - kn->hash;
  62         if (ns != kn->ns)
  63                 return ns - kn->ns;
  64         return strcmp(name, kn->name);
  65 }
  66
  67 static int kernfs_sd_compare(const struct kernfs_node *left,
  68                              const struct kernfs_node *right)
  69 {
  70         return kernfs_name_compare(left->hash, left->name, left->ns, right);
  71 }
  72
  73 /**
  74  *      kernfs_link_sibling - link kernfs_node into sibling rbtree
  75  *      @kn: kernfs_node of interest
  76  *
  77  *      Link @kn into its sibling rbtree which starts from
  78  *      @kn->parent->dir.children.
  79  *
  80  *      Locking:
  81  *      mutex_lock(kernfs_mutex)
  82  *
  83  *      RETURNS:
  84  *      0 on susccess -EEXIST on failure.
  85  */
  86 static int kernfs_link_sibling(struct kernfs_node *kn)
  87 {
  88         struct rb_node **node = &kn->parent->dir.children.rb_node;
  89         struct rb_node *parent = NULL;
  90
  91         if (kernfs_type(kn) == KERNFS_DIR)
  92                 kn->parent->dir.subdirs++;
  93
  94         while (*node) {
  95                 struct kernfs_node *pos;
  96                 int result;
  97
  98                 pos = rb_to_kn(*node);
  99                 parent = *node;
 100                 result = kernfs_sd_compare(kn, pos);
 101                 if (result < 0)
 102                         node = &pos->rb.rb_left;
 103                 else if (result > 0)
 104                         node = &pos->rb.rb_right;
 105                 else
 106                         return -EEXIST;
 107         }
 108         /* add new node and rebalance the tree */
 109         rb_link_node(&kn->rb, parent, node);
 110         rb_insert_color(&kn->rb, &kn->parent->dir.children);
 111         return 0;
 112 }
 113
 114 /**
 115  *      kernfs_unlink_sibling - unlink kernfs_node from sibling rbtree
 116  *      @kn: kernfs_node of interest
 117  *
 118  *      Unlink @kn from its sibling rbtree which starts from
 119  *      kn->parent->dir.children.
 120  *
 121  *      Locking:
 122  *      mutex_lock(kernfs_mutex)
 123  */
 124 static bool kernfs_unlink_sibling(struct kernfs_node *kn)
 125 {
 126         if (RB_EMPTY_NODE(&kn->rb))
 127                 return false;
 128
 129         if (kernfs_type(kn) == KERNFS_DIR)
 130                 kn->parent->dir.subdirs--;
 131
 132         rb_erase(&kn->rb, &kn->parent->dir.children);
 133         RB_CLEAR_NODE(&kn->rb);
 134         return true;
 135 }
 136
 137 /**
 138  *      kernfs_get_active - get an active reference to kernfs_node
 139  *      @kn: kernfs_node to get an active reference to
 140  *
 141  *      Get an active reference of @kn.  This function is noop if @kn
 142  *      is NULL.
 143  *
 144  *      RETURNS:
 145  *      Pointer to @kn on success, NULL on failure.
 146  */
 147 struct kernfs_node *kernfs_get_active(struct kernfs_node *kn)
 148 {
 149         if (unlikely(!kn))
 150                 return NULL;
 151
 152         if (kernfs_lockdep(kn))
 153                 rwsem_acquire_read(&kn->dep_map, 0, 1, _RET_IP_);
 154
 155         /*
 156          * Try to obtain an active ref.  If @kn is deactivated, we block
 157          * till either it's reactivated or killed.
 158          */
 159         do {
 160                 if (atomic_inc_unless_negative(&kn->active))
 161                         return kn;
 162
 163                 wait_event(kernfs_root(kn)->deactivate_waitq,
 164                            atomic_read(&kn->active) >= 0 ||
 165                            RB_EMPTY_NODE(&kn->rb));
 166         } while (!RB_EMPTY_NODE(&kn->rb));
 167
 168         if (kernfs_lockdep(kn))
 169                 rwsem_release(&kn->dep_map, 1, _RET_IP_);
 170         return NULL;
 171 }
 172
 173 /**
 174  *      kernfs_put_active - put an active reference to kernfs_node
 175  *      @kn: kernfs_node to put an active reference to
 176  *
 177  *      Put an active reference to @kn.  This function is noop if @kn
 178  *      is NULL.
 179  */
 180 void kernfs_put_active(struct kernfs_node *kn)
 181 {
 182         struct kernfs_root *root = kernfs_root(kn);
 183         int v;
 184
 185         if (unlikely(!kn))
 186                 return;
 187
 188         if (kernfs_lockdep(kn))
 189                 rwsem_release(&kn->dep_map, 1, _RET_IP_);
 190         v = atomic_dec_return(&kn->active);
 191         if (likely(v != KN_DEACTIVATED_BIAS))
 192                 return;
 193
 194         wake_up_all(&root->deactivate_waitq);
 195 }
 196
 197 /**
 198  * kernfs_drain - drain kernfs_node
 199  * @kn: kernfs_node to drain
 200  *
 201  * Drain existing usages of @kn.  Mutiple removers may invoke this function
 202  * concurrently on @kn and all will return after draining is complete.
 203  * Returns %true if drain is performed and kernfs_mutex was temporarily
 204  * released.  %false if @kn was already drained and no operation was
 205  * necessary.
 206  *
 207  * The caller is responsible for ensuring @kn stays pinned while this
 208  * function is in progress even if it gets removed by someone else.
 209  */
 210 static bool kernfs_drain(struct kernfs_node *kn)
 211         __releases(&kernfs_mutex) __acquires(&kernfs_mutex)
 212 {
 213         struct kernfs_root *root = kernfs_root(kn);
 214
 215         lockdep_assert_held(&kernfs_mutex);
 216         WARN_ON_ONCE(atomic_read(&kn->active) >= 0);
 217
 218         /*
 219          * We want to go through the active ref lockdep annotation at least
 220          * once for all node removals, but the lockdep annotation can't be
 221          * nested inside kernfs_mutex and deactivation can't make forward
 222          * progress if we keep dropping the mutex.  Use JUST_ACTIVATED to
 223          * force the slow path once for each deactivation if lockdep is
 224          * enabled.
 225          */
 226         if ((!kernfs_lockdep(kn) || !(kn->flags & KERNFS_JUST_DEACTIVATED)) &&
 227             atomic_read(&kn->active) == KN_DEACTIVATED_BIAS)
 228                 return false;
 229
 230         kn->flags &= ~KERNFS_JUST_DEACTIVATED;
 231         mutex_unlock(&kernfs_mutex);
 232
 233         if (kernfs_lockdep(kn)) {
 234                 rwsem_acquire(&kn->dep_map, 0, 0, _RET_IP_);
 235                 if (atomic_read(&kn->active) != KN_DEACTIVATED_BIAS)
 236                         lock_contended(&kn->dep_map, _RET_IP_);
 237         }
 238
 239         wait_event(root->deactivate_waitq,
 240                    atomic_read(&kn->active) == KN_DEACTIVATED_BIAS);
 241
 242         if (kernfs_lockdep(kn)) {
 243                 lock_acquired(&kn->dep_map, _RET_IP_);
 244                 rwsem_release(&kn->dep_map, 1, _RET_IP_);
 245         }
 246
 247         mutex_lock(&kernfs_mutex);
 248         return true;
 249 }
 250
 251 /**
 252  * kernfs_get - get a reference count on a kernfs_node
 253  * @kn: the target kernfs_node
 254  */
 255 void kernfs_get(struct kernfs_node *kn)
 256 {
 257         if (kn) {
 258                 WARN_ON(!atomic_read(&kn->count));
 259                 atomic_inc(&kn->count);
 260         }
 261 }
 262 EXPORT_SYMBOL_GPL(kernfs_get);
 263
 264 /**
 265  * kernfs_put - put a reference count on a kernfs_node
 266  * @kn: the target kernfs_node
 267  *
 268  * Put a reference count of @kn and destroy it if it reached zero.
 269  */
 270 void kernfs_put(struct kernfs_node *kn)
 271 {
 272         struct kernfs_node *parent;
 273         struct kernfs_root *root;
 274
 275         if (!kn || !atomic_dec_and_test(&kn->count))
 276                 return;
 277         root = kernfs_root(kn);
 278  repeat:
 279         /*
 280          * Moving/renaming is always done while holding reference.
 281          * kn->parent won't change beneath us.
 282          */
 283         parent = kn->parent;
 284
 285         WARN_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS,
 286                   "kernfs_put: %s/%s: released with incorrect active_ref %d\n",
 287                   parent ? parent->name : "", kn->name, atomic_read(&kn->active));
 288
 289         if (kernfs_type(kn) == KERNFS_LINK)
 290                 kernfs_put(kn->symlink.target_kn);
 291         if (!(kn->flags & KERNFS_STATIC_NAME))
 292                 kfree(kn->name);
 293         if (kn->iattr) {
 294                 if (kn->iattr->ia_secdata)
 295                         security_release_secctx(kn->iattr->ia_secdata,
 296                                                 kn->iattr->ia_secdata_len);
 297                 simple_xattrs_free(&kn->iattr->xattrs);
 298         }
 299         kfree(kn->iattr);
 300         ida_simple_remove(&root->ino_ida, kn->ino);
 301         kmem_cache_free(kernfs_node_cache, kn);
 302
 303         kn = parent;
 304         if (kn) {
 305                 if (atomic_dec_and_test(&kn->count))
 306                         goto repeat;
 307         } else {
 308                 /* just released the root kn, free @root too */
 309                 ida_destroy(&root->ino_ida);
 310                 kfree(root);
 311         }
 312 }
 313 EXPORT_SYMBOL_GPL(kernfs_put);
 314
 315 static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags)
 316 {
 317         struct kernfs_node *kn;
 318
 319         if (flags & LOOKUP_RCU)
 320                 return -ECHILD;
 321
 322         /* Always perform fresh lookup for negatives */
 323         if (!dentry->d_inode)
 324                 goto out_bad_unlocked;
 325
 326         kn = dentry->d_fsdata;
 327         mutex_lock(&kernfs_mutex);
 328
 329         /* Force fresh lookup if removed */
 330         if (kn->parent && RB_EMPTY_NODE(&kn->rb))
 331                 goto out_bad;
 332
 333         /* The kernfs node has been moved? */
 334         if (dentry->d_parent->d_fsdata != kn->parent)
 335                 goto out_bad;
 336
 337         /* The kernfs node has been renamed */
 338         if (strcmp(dentry->d_name.name, kn->name) != 0)
 339                 goto out_bad;
 340
 341         /* The kernfs node has been moved to a different namespace */
 342         if (kn->parent && kernfs_ns_enabled(kn->parent) &&
 343             kernfs_info(dentry->d_sb)->ns != kn->ns)
 344                 goto out_bad;
 345
 346         mutex_unlock(&kernfs_mutex);
 347 out_valid:
 348         return 1;
 349 out_bad:
 350         mutex_unlock(&kernfs_mutex);
 351 out_bad_unlocked:
 352         /*
 353          * @dentry doesn't match the underlying kernfs node, drop the
 354          * dentry and force lookup.  If we have submounts we must allow the
 355          * vfs caches to lie about the state of the filesystem to prevent
 356          * leaks and other nasty things, so use check_submounts_and_drop()
 357          * instead of d_drop().
 358          */
 359         if (check_submounts_and_drop(dentry) != 0)
 360                 goto out_valid;
 361
 362         return 0;
 363 }
 364
 365 static void kernfs_dop_release(struct dentry *dentry)
 366 {
 367         kernfs_put(dentry->d_fsdata);
 368 }
 369
 370 const struct dentry_operations kernfs_dops = {
 371         .d_revalidate   = kernfs_dop_revalidate,
 372         .d_release      = kernfs_dop_release,
 373 };
 374
 375 struct kernfs_node *kernfs_new_node(struct kernfs_root *root, const char *name,
 376                                     umode_t mode, unsigned flags)
 377 {
 378         char *dup_name = NULL;
 379         struct kernfs_node *kn;
 380         int ret;
 381
 382         if (!(flags & KERNFS_STATIC_NAME)) {
 383                 name = dup_name = kstrdup(name, GFP_KERNEL);
 384                 if (!name)
 385                         return NULL;
 386         }
 387
 388         kn = kmem_cache_zalloc(kernfs_node_cache, GFP_KERNEL);
 389         if (!kn)
 390                 goto err_out1;
 391
 392         ret = ida_simple_get(&root->ino_ida, 1, 0, GFP_KERNEL);
 393         if (ret < 0)
 394                 goto err_out2;
 395         kn->ino = ret;
 396
 397         atomic_set(&kn->count, 1);
 398         atomic_set(&kn->active, KN_DEACTIVATED_BIAS);
 399         kn->deact_depth = 1;
 400         RB_CLEAR_NODE(&kn->rb);
 401
 402         kn->name = name;
 403         kn->mode = mode;
 404         kn->flags = flags;
 405
 406         return kn;
 407
 408  err_out2:
 409         kmem_cache_free(kernfs_node_cache, kn);
 410  err_out1:
 411         kfree(dup_name);
 412         return NULL;
 413 }
 414
 415 /**
 416  *      kernfs_add_one - add kernfs_node to parent without warning
 417  *      @kn: kernfs_node to be added
 418  *      @parent: the parent kernfs_node to add @kn to
 419  *
 420  *      Get @parent and set @kn->parent to it and increment nlink of the
 421  *      parent inode if @kn is a directory and link into the children list
 422  *      of the parent.
 423  *
 424  *      RETURNS:
 425  *      0 on success, -EEXIST if entry with the given name already
 426  *      exists.
 427  */
 428 int kernfs_add_one(struct kernfs_node *kn, struct kernfs_node *parent)
 429 {
 430         struct kernfs_iattrs *ps_iattr;
 431         bool has_ns;
 432         int ret;
 433
 434         if (!kernfs_get_active(parent))
 435                 return -ENOENT;
 436
 437         mutex_lock(&kernfs_mutex);
 438
 439         ret = -EINVAL;
 440         has_ns = kernfs_ns_enabled(parent);
 441         if (WARN(has_ns != (bool)kn->ns, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n",
 442                  has_ns ? "required" : "invalid", parent->name, kn->name))
 443                 goto out_unlock;
 444
 445         if (kernfs_type(parent) != KERNFS_DIR)
 446                 goto out_unlock;
 447
 448         kn->hash = kernfs_name_hash(kn->name, kn->ns);
 449         kn->parent = parent;
 450         kernfs_get(parent);
 451
 452         ret = kernfs_link_sibling(kn);
 453         if (ret)
 454                 goto out_unlock;
 455
 456         /* Update timestamps on the parent */
 457         ps_iattr = parent->iattr;
 458         if (ps_iattr) {
 459                 struct iattr *ps_iattrs = &ps_iattr->ia_iattr;
 460                 ps_iattrs->ia_ctime = ps_iattrs->ia_mtime = CURRENT_TIME;
 461         }
 462
 463         /* Mark the entry added into directory tree */
 464         atomic_sub(KN_DEACTIVATED_BIAS, &kn->active);
 465         kn->deact_depth--;
 466         ret = 0;
 467 out_unlock:
 468         mutex_unlock(&kernfs_mutex);
 469         kernfs_put_active(parent);
 470         return ret;
 471 }
 472
 473 /**
 474  * kernfs_find_ns - find kernfs_node with the given name
 475  * @parent: kernfs_node to search under
 476  * @name: name to look for
 477  * @ns: the namespace tag to use
 478  *
 479  * Look for kernfs_node with name @name under @parent.  Returns pointer to
 480  * the found kernfs_node on success, %NULL on failure.
 481  */
 482 static struct kernfs_node *kernfs_find_ns(struct kernfs_node *parent,
 483                                           const unsigned char *name,
 484                                           const void *ns)
 485 {
 486         struct rb_node *node = parent->dir.children.rb_node;
 487         bool has_ns = kernfs_ns_enabled(parent);
 488         unsigned int hash;
 489
 490         lockdep_assert_held(&kernfs_mutex);
 491
 492         if (has_ns != (bool)ns) {
 493                 WARN(1, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n",
 494                      has_ns ? "required" : "invalid", parent->name, name);
 495                 return NULL;
 496         }
 497
 498         hash = kernfs_name_hash(name, ns);
 499         while (node) {
 500                 struct kernfs_node *kn;
 501                 int result;
 502
 503                 kn = rb_to_kn(node);
 504                 result = kernfs_name_compare(hash, name, ns, kn);
 505                 if (result < 0)
 506                         node = node->rb_left;
 507                 else if (result > 0)
 508                         node = node->rb_right;
 509                 else
 510                         return kn;
 511         }
 512         return NULL;
 513 }
 514
 515 /**
 516  * kernfs_find_and_get_ns - find and get kernfs_node with the given name
 517  * @parent: kernfs_node to search under
 518  * @name: name to look for
 519  * @ns: the namespace tag to use
 520  *
 521  * Look for kernfs_node with name @name under @parent and get a reference
 522  * if found.  This function may sleep and returns pointer to the found
 523  * kernfs_node on success, %NULL on failure.
 524  */
 525 struct kernfs_node *kernfs_find_and_get_ns(struct kernfs_node *parent,
 526                                            const char *name, const void *ns)
 527 {
 528         struct kernfs_node *kn;
 529
 530         mutex_lock(&kernfs_mutex);
 531         kn = kernfs_find_ns(parent, name, ns);
 532         kernfs_get(kn);
 533         mutex_unlock(&kernfs_mutex);
 534
 535         return kn;
 536 }
 537 EXPORT_SYMBOL_GPL(kernfs_find_and_get_ns);
 538
 539 /**
 540  * kernfs_create_root - create a new kernfs hierarchy
 541  * @kdops: optional directory syscall operations for the hierarchy
 542  * @priv: opaque data associated with the new directory
 543  *
 544  * Returns the root of the new hierarchy on success, ERR_PTR() value on
 545  * failure.
 546  */
 547 struct kernfs_root *kernfs_create_root(struct kernfs_dir_ops *kdops, void *priv)
 548 {
 549         struct kernfs_root *root;
 550         struct kernfs_node *kn;
 551
 552         root = kzalloc(sizeof(*root), GFP_KERNEL);
 553         if (!root)
 554                 return ERR_PTR(-ENOMEM);
 555
 556         ida_init(&root->ino_ida);
 557
 558         kn = kernfs_new_node(root, "", S_IFDIR | S_IRUGO | S_IXUGO, KERNFS_DIR);
 559         if (!kn) {
 560                 ida_destroy(&root->ino_ida);
 561                 kfree(root);
 562                 return ERR_PTR(-ENOMEM);
 563         }
 564
 565         atomic_sub(KN_DEACTIVATED_BIAS, &kn->active);
 566         kn->deact_depth--;
 567         kn->priv = priv;
 568         kn->dir.root = root;
 569
 570         root->dir_ops = kdops;
 571         root->kn = kn;
 572         init_waitqueue_head(&root->deactivate_waitq);
 573
 574         return root;
 575 }
 576
 577 /**
 578  * kernfs_destroy_root - destroy a kernfs hierarchy
 579  * @root: root of the hierarchy to destroy
 580  *
 581  * Destroy the hierarchy anchored at @root by removing all existing
 582  * directories and destroying @root.
 583  */
 584 void kernfs_destroy_root(struct kernfs_root *root)
 585 {
 586         kernfs_remove(root->kn);        /* will also free @root */
 587 }
 588
 589 /**
 590  * kernfs_create_dir_ns - create a directory
 591  * @parent: parent in which to create a new directory
 592  * @name: name of the new directory
 593  * @mode: mode of the new directory
 594  * @priv: opaque data associated with the new directory
 595  * @ns: optional namespace tag of the directory
 596  *
 597  * Returns the created node on success, ERR_PTR() value on failure.
 598  */
 599 struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent,
 600                                          const char *name, umode_t mode,
 601                                          void *priv, const void *ns)
 602 {
 603         struct kernfs_node *kn;
 604         int rc;
 605
 606         /* allocate */
 607         kn = kernfs_new_node(kernfs_root(parent), name, mode | S_IFDIR,
 608                              KERNFS_DIR);
 609         if (!kn)
 610                 return ERR_PTR(-ENOMEM);
 611
 612         kn->dir.root = parent->dir.root;
 613         kn->ns = ns;
 614         kn->priv = priv;
 615
 616         /* link in */
 617         rc = kernfs_add_one(kn, parent);
 618         if (!rc)
 619                 return kn;
 620
 621         kernfs_put(kn);
 622         return ERR_PTR(rc);
 623 }
 624
 625 static struct dentry *kernfs_iop_lookup(struct inode *dir,
 626                                         struct dentry *dentry,
 627                                         unsigned int flags)
 628 {
 629         struct dentry *ret;
 630         struct kernfs_node *parent = dentry->d_parent->d_fsdata;
 631         struct kernfs_node *kn;
 632         struct inode *inode;
 633         const void *ns = NULL;
 634
 635         mutex_lock(&kernfs_mutex);
 636
 637         if (kernfs_ns_enabled(parent))
 638                 ns = kernfs_info(dir->i_sb)->ns;
 639
 640         kn = kernfs_find_ns(parent, dentry->d_name.name, ns);
 641
 642         /* no such entry */
 643         if (!kn) {
 644                 ret = NULL;
 645                 goto out_unlock;
 646         }
 647         kernfs_get(kn);
 648         dentry->d_fsdata = kn;
 649
 650         /* attach dentry and inode */
 651         inode = kernfs_get_inode(dir->i_sb, kn);
 652         if (!inode) {
 653                 ret = ERR_PTR(-ENOMEM);
 654                 goto out_unlock;
 655         }
 656
 657         /* instantiate and hash dentry */
 658         ret = d_materialise_unique(dentry, inode);
 659  out_unlock:
 660         mutex_unlock(&kernfs_mutex);
 661         return ret;
 662 }
 663
 664 static int kernfs_iop_mkdir(struct inode *dir, struct dentry *dentry,
 665                             umode_t mode)
 666 {
 667         struct kernfs_node *parent = dir->i_private;
 668         struct kernfs_dir_ops *kdops = kernfs_root(parent)->dir_ops;
 669
 670         if (!kdops || !kdops->mkdir)
 671                 return -EPERM;
 672
 673         return kdops->mkdir(parent, dentry->d_name.name, mode);
 674 }
 675
 676 static int kernfs_iop_rmdir(struct inode *dir, struct dentry *dentry)
 677 {
 678         struct kernfs_node *kn  = dentry->d_fsdata;
 679         struct kernfs_dir_ops *kdops = kernfs_root(kn)->dir_ops;
 680
 681         if (!kdops || !kdops->rmdir)
 682                 return -EPERM;
 683
 684         return kdops->rmdir(kn);
 685 }
 686
 687 static int kernfs_iop_rename(struct inode *old_dir, struct dentry *old_dentry,
 688                              struct inode *new_dir, struct dentry *new_dentry)
 689 {
 690         struct kernfs_node *kn  = old_dentry->d_fsdata;
 691         struct kernfs_node *new_parent = new_dir->i_private;
 692         struct kernfs_dir_ops *kdops = kernfs_root(kn)->dir_ops;
 693
 694         if (!kdops || !kdops->rename)
 695                 return -EPERM;
 696
 697         return kdops->rename(kn, new_parent, new_dentry->d_name.name);
 698 }
 699
 700 const struct inode_operations kernfs_dir_iops = {
 701         .lookup         = kernfs_iop_lookup,
 702         .permission     = kernfs_iop_permission,
 703         .setattr        = kernfs_iop_setattr,
 704         .getattr        = kernfs_iop_getattr,
 705         .setxattr       = kernfs_iop_setxattr,
 706         .removexattr    = kernfs_iop_removexattr,
 707         .getxattr       = kernfs_iop_getxattr,
 708         .listxattr      = kernfs_iop_listxattr,
 709
 710         .mkdir          = kernfs_iop_mkdir,
 711         .rmdir          = kernfs_iop_rmdir,
 712         .rename         = kernfs_iop_rename,
 713 };
 714
 715 static struct kernfs_node *kernfs_leftmost_descendant(struct kernfs_node *pos)
 716 {
 717         struct kernfs_node *last;
 718
 719         while (true) {
 720                 struct rb_node *rbn;
 721
 722                 last = pos;
 723
 724                 if (kernfs_type(pos) != KERNFS_DIR)
 725                         break;
 726
 727                 rbn = rb_first(&pos->dir.children);
 728                 if (!rbn)
 729                         break;
 730
 731                 pos = rb_to_kn(rbn);
 732         }
 733
 734         return last;
 735 }
 736
 737 /**
 738  * kernfs_next_descendant_post - find the next descendant for post-order walk
 739  * @pos: the current position (%NULL to initiate traversal)
 740  * @root: kernfs_node whose descendants to walk
 741  *
 742  * Find the next descendant to visit for post-order traversal of @root's
 743  * descendants.  @root is included in the iteration and the last node to be
 744  * visited.
 745  */
 746 static struct kernfs_node *kernfs_next_descendant_post(struct kernfs_node *pos,
 747                                                        struct kernfs_node *root)
 748 {
 749         struct rb_node *rbn;
 750
 751         lockdep_assert_held(&kernfs_mutex);
 752
 753         /* if first iteration, visit leftmost descendant which may be root */
 754         if (!pos)
 755                 return kernfs_leftmost_descendant(root);
 756
 757         /* if we visited @root, we're done */
 758         if (pos == root)
 759                 return NULL;
 760
 761         /* if there's an unvisited sibling, visit its leftmost descendant */
 762         rbn = rb_next(&pos->rb);
 763         if (rbn)
 764                 return kernfs_leftmost_descendant(rb_to_kn(rbn));
 765
 766         /* no sibling left, visit parent */
 767         return pos->parent;
 768 }
 769
 770 static void __kernfs_deactivate(struct kernfs_node *kn)
 771 {
 772         struct kernfs_node *pos;
 773
 774         lockdep_assert_held(&kernfs_mutex);
 775
 776         /* prevent any new usage under @kn by deactivating all nodes */
 777         pos = NULL;
 778         while ((pos = kernfs_next_descendant_post(pos, kn))) {
 779                 if (!pos->deact_depth++) {
 780                         WARN_ON_ONCE(atomic_read(&pos->active) < 0);
 781                         atomic_add(KN_DEACTIVATED_BIAS, &pos->active);
 782                         pos->flags |= KERNFS_JUST_DEACTIVATED;
 783                 }
 784         }
 785
 786         /*
 787          * Drain the subtree.  If kernfs_drain() blocked to drain, which is
 788          * indicated by %true return, it temporarily released kernfs_mutex
 789          * and the rbtree might have been modified inbetween breaking our
 790          * future walk.  Restart the walk after each %true return.
 791          */
 792         pos = NULL;
 793         while ((pos = kernfs_next_descendant_post(pos, kn))) {
 794                 bool drained;
 795
 796                 kernfs_get(pos);
 797                 drained = kernfs_drain(pos);
 798                 kernfs_put(pos);
 799                 if (drained)
 800                         pos = NULL;
 801         }
 802 }
 803
 804 static void __kernfs_reactivate(struct kernfs_node *kn)
 805 {
 806         struct kernfs_node *pos;
 807
 808         lockdep_assert_held(&kernfs_mutex);
 809
 810         pos = NULL;
 811         while ((pos = kernfs_next_descendant_post(pos, kn))) {
 812                 if (!--pos->deact_depth) {
 813                         WARN_ON_ONCE(atomic_read(&pos->active) >= 0);
 814                         atomic_sub(KN_DEACTIVATED_BIAS, &pos->active);
 815                 }
 816                 WARN_ON_ONCE(pos->deact_depth < 0);
 817         }
 818
 819         /* some nodes reactivated, kick get_active waiters */
 820         wake_up_all(&kernfs_root(kn)->deactivate_waitq);
 821 }
 822
 823 static void __kernfs_deactivate_self(struct kernfs_node *kn)
 824 {
 825         /*
 826          * Take out ourself out of the active ref dependency chain and
 827          * deactivate.  If we're called without an active ref, lockdep will
 828          * complain.
 829          */
 830         kernfs_put_active(kn);
 831         __kernfs_deactivate(kn);
 832 }
 833
 834 static void __kernfs_reactivate_self(struct kernfs_node *kn)
 835 {
 836         __kernfs_reactivate(kn);
 837         /*
 838          * Restore active ref dropped by deactivate_self() so that it's
 839          * balanced on return.  put_active() will soon be called on @kn, so
 840          * this can't break anything regardless of @kn's state.
 841          */
 842         atomic_inc(&kn->active);
 843         if (kernfs_lockdep(kn))
 844                 rwsem_acquire(&kn->dep_map, 0, 1, _RET_IP_);
 845 }
 846
 847 /**
 848  * kernfs_deactivate - deactivate subtree of a node
 849  * @kn: kernfs_node to deactivate subtree of
 850  *
 851  * Deactivate the subtree of @kn.  On return, there's no active operation
 852  * going on under @kn and creation or renaming of a node under @kn is
 853  * blocked until @kn is reactivated or removed.  This function can be
 854  * called multiple times and nests properly.  Each invocation should be
 855  * paired with kernfs_reactivate().
 856  *
 857  * For a kernfs user which uses simple locking, the subsystem lock would
 858  * nest inside active reference.  This becomes problematic if the user
 859  * tries to remove nodes while holding the subystem lock as it would create
 860  * a reverse locking dependency from the subsystem lock to active ref.
 861  * This function can be used to break such reverse dependency.  The user
 862  * can call this function outside the subsystem lock and then proceed to
 863  * invoke kernfs_remove() while holding the subsystem lock without
 864  * introducing such reverse dependency.
 865  */
 866 void kernfs_deactivate(struct kernfs_node *kn)
 867 {
 868         mutex_lock(&kernfs_mutex);
 869         __kernfs_deactivate(kn);
 870         mutex_unlock(&kernfs_mutex);
 871 }
 872
 873 /**
 874  * kernfs_reactivate - reactivate subtree of a node
 875  * @kn: kernfs_node to reactivate subtree of
 876  *
 877  * Undo kernfs_deactivate().
 878  */
 879 void kernfs_reactivate(struct kernfs_node *kn)
 880 {
 881         mutex_lock(&kernfs_mutex);
 882         __kernfs_reactivate(kn);
 883         mutex_unlock(&kernfs_mutex);
 884 }
 885
 886 /**
 887  * kernfs_deactivate_self - deactivate subtree of a node from its own method
 888  * @kn: the self kernfs_node to deactivate subtree of
 889  *
 890  * The caller must be running off of a kernfs operation which is invoked
 891  * with an active reference - e.g. one of kernfs_ops.  Once this function
 892  * is called, @kn may be removed by someone else while the enclosing method
 893  * is in progress.  Other than that, this function is equivalent to
 894  * kernfs_deactivate() and should be paired with kernfs_reactivate_self().
 895  */
 896 void kernfs_deactivate_self(struct kernfs_node *kn)
 897 {
 898         mutex_lock(&kernfs_mutex);
 899         __kernfs_deactivate_self(kn);
 900         mutex_unlock(&kernfs_mutex);
 901 }
 902
 903 /**
 904  * kernfs_reactivate_self - reactivate subtree of a node from its own method
 905  * @kn: the self kernfs_node to reactivate subtree of
 906  *
 907  * Undo kernfs_deactivate_self().
 908  */
 909 void kernfs_reactivate_self(struct kernfs_node *kn)
 910 {
 911         mutex_lock(&kernfs_mutex);
 912         __kernfs_reactivate_self(kn);
 913         mutex_unlock(&kernfs_mutex);
 914 }
 915
 916 static void __kernfs_remove(struct kernfs_node *kn)
 917 {
 918         struct kernfs_root *root = kernfs_root(kn);
 919         struct kernfs_node *pos;
 920
 921         lockdep_assert_held(&kernfs_mutex);
 922
 923         pr_debug("kernfs %s: removing\n", kn->name);
 924
 925         __kernfs_deactivate(kn);
 926
 927         /* unlink the subtree node-by-node */
 928         do {
 929                 pos = kernfs_leftmost_descendant(kn);
 930
 931                 /*
 932                  * We're gonna release kernfs_mutex to unmap bin files,
 933                  * Make sure @pos doesn't go away inbetween.
 934                  */
 935                 kernfs_get(pos);
 936
 937                 /*
 938                  * This must be come before unlinking; otherwise, when
 939                  * there are multiple removers, some may finish before
 940                  * unmapping is complete.
 941                  */
 942                 if (pos->flags & KERNFS_HAS_MMAP) {
 943                         mutex_unlock(&kernfs_mutex);
 944                         kernfs_unmap_file(pos);
 945                         mutex_lock(&kernfs_mutex);
 946                 }
 947
 948                 /*
 949                  * kernfs_unlink_sibling() succeeds once per node.  Use it
 950                  * to decide who's responsible for cleanups.
 951                  */
 952                 if (!pos->parent || kernfs_unlink_sibling(pos)) {
 953                         struct kernfs_iattrs *ps_iattr =
 954                                 pos->parent ? pos->parent->iattr : NULL;
 955
 956                         /* update timestamps on the parent */
 957                         if (ps_iattr) {
 958                                 ps_iattr->ia_iattr.ia_ctime = CURRENT_TIME;
 959                                 ps_iattr->ia_iattr.ia_mtime = CURRENT_TIME;
 960                         }
 961
 962                         kernfs_put(pos);
 963                 }
 964
 965                 kernfs_put(pos);
 966         } while (pos != kn);
 967
 968         /* some nodes killed, kick get_active waiters */
 969         wake_up_all(&root->deactivate_waitq);
 970 }
 971
 972 /**
 973  * kernfs_remove - remove a kernfs_node recursively
 974  * @kn: the kernfs_node to remove
 975  *
 976  * Remove @kn along with all its subdirectories and files.
 977  */
 978 void kernfs_remove(struct kernfs_node *kn)
 979 {
 980         mutex_lock(&kernfs_mutex);
 981         __kernfs_remove(kn);
 982         mutex_unlock(&kernfs_mutex);
 983 }
 984
 985 /**
 986  * kernfs_remove_self - remove a kernfs_node from its own method
 987  * @kn: the self kernfs_node to remove
 988  *
 989  * The caller must be running off of a kernfs operation which is invoked
 990  * with an active reference - e.g. one of kernfs_ops.  This can be used to
 991  * implement a file operation which deletes itself.
 992  *
 993  * For example, the "delete" file for a sysfs device directory can be
 994  * implemented by invoking kernfs_remove_self() on the "delete" file
 995  * itself.  This function breaks the circular dependency of trying to
 996  * deactivate self while holding an active ref itself.  It isn't necessary
 997  * to modify the usual removal path to use kernfs_remove_self().  The
 998  * "delete" implementation can simply invoke kernfs_remove_self() on self
 999  * before proceeding with the usual removal path.  kernfs will ignore later
1000  * kernfs_remove() on self.
1001  *
1002  * kernfs_remove_self() can be called multiple times concurrently on the
1003  * same kernfs_node.  Only the first one actually performs removal and
1004  * returns %true.  All others will wait until the kernfs operation which
1005  * won self-removal finishes and return %false.  Note that the losers wait
1006  * for the completion of not only the winning kernfs_remove_self() but also
1007  * the whole kernfs_ops which won the arbitration.  This can be used to
1008  * guarantee, for example, all concurrent writes to a "delete" file to
1009  * finish only after the whole operation is complete.
1010  */
1011 bool kernfs_remove_self(struct kernfs_node *kn)
1012 {
1013         bool ret;
1014
1015         mutex_lock(&kernfs_mutex);
1016         __kernfs_deactivate_self(kn);
1017
1018         /*
1019          * SUICIDAL is used to arbitrate among competing invocations.  Only
1020          * the first one will actually perform removal.  When the removal
1021          * is complete, SUICIDED is set and the active ref is restored
1022          * while holding kernfs_mutex.  The ones which lost arbitration
1023          * waits for SUICDED && drained which can happen only after the
1024          * enclosing kernfs operation which executed the winning instance
1025          * of kernfs_remove_self() finished.
1026          */
1027         if (!(kn->flags & KERNFS_SUICIDAL)) {
1028                 kn->flags |= KERNFS_SUICIDAL;
1029                 __kernfs_remove(kn);
1030                 kn->flags |= KERNFS_SUICIDED;
1031                 ret = true;
1032         } else {
1033                 wait_queue_head_t *waitq = &kernfs_root(kn)->deactivate_waitq;
1034                 DEFINE_WAIT(wait);
1035
1036                 while (true) {
1037                         prepare_to_wait(waitq, &wait, TASK_UNINTERRUPTIBLE);
1038
1039                         if ((kn->flags & KERNFS_SUICIDED) &&
1040                             atomic_read(&kn->active) == KN_DEACTIVATED_BIAS)
1041                                 break;
1042
1043                         mutex_unlock(&kernfs_mutex);
1044                         schedule();
1045                         mutex_lock(&kernfs_mutex);
1046                 }
1047                 finish_wait(waitq, &wait);
1048                 WARN_ON_ONCE(!RB_EMPTY_NODE(&kn->rb));
1049                 ret = false;
1050         }
1051
1052         __kernfs_reactivate_self(kn);
1053         mutex_unlock(&kernfs_mutex);
1054         return ret;
1055 }
1056
1057 /**
1058  * kernfs_remove_by_name_ns - find a kernfs_node by name and remove it
1059  * @parent: parent of the target
1060  * @name: name of the kernfs_node to remove
1061  * @ns: namespace tag of the kernfs_node to remove
1062  *
1063  * Look for the kernfs_node with @name and @ns under @parent and remove it.
1064  * Returns 0 on success, -ENOENT if such entry doesn't exist.
1065  */
1066 int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name,
1067                              const void *ns)
1068 {
1069         struct kernfs_node *kn;
1070
1071         if (!parent) {
1072                 WARN(1, KERN_WARNING "kernfs: can not remove '%s', no directory\n",
1073                         name);
1074                 return -ENOENT;
1075         }
1076
1077         mutex_lock(&kernfs_mutex);
1078
1079         kn = kernfs_find_ns(parent, name, ns);
1080         if (kn)
1081                 __kernfs_remove(kn);
1082
1083         mutex_unlock(&kernfs_mutex);
1084
1085         if (kn)
1086                 return 0;
1087         else
1088                 return -ENOENT;
1089 }
1090
1091 /**
1092  * kernfs_rename_ns - move and rename a kernfs_node
1093  * @kn: target node
1094  * @new_parent: new parent to put @sd under
1095  * @new_name: new name
1096  * @new_ns: new namespace tag
1097  */
1098 int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent,
1099                      const char *new_name, const void *new_ns)
1100 {
1101         int error;
1102
1103         error = -ENOENT;
1104         if (!kernfs_get_active(new_parent))
1105                 goto out;
1106         if (!kernfs_get_active(kn))
1107                 goto out_put_new_parent;
1108
1109         mutex_lock(&kernfs_mutex);
1110
1111         error = 0;
1112         if ((kn->parent == new_parent) && (kn->ns == new_ns) &&
1113             (strcmp(kn->name, new_name) == 0))
1114                 goto out_unlock;        /* nothing to rename */
1115
1116         error = -EEXIST;
1117         if (kernfs_find_ns(new_parent, new_name, new_ns))
1118                 goto out_unlock;
1119
1120         /* rename kernfs_node */
1121         if (strcmp(kn->name, new_name) != 0) {
1122                 error = -ENOMEM;
1123                 new_name = kstrdup(new_name, GFP_KERNEL);
1124                 if (!new_name)
1125                         goto out_unlock;
1126
1127                 if (kn->flags & KERNFS_STATIC_NAME)
1128                         kn->flags &= ~KERNFS_STATIC_NAME;
1129                 else
1130                         kfree(kn->name);
1131
1132                 kn->name = new_name;
1133         }
1134
1135         /*
1136          * Move to the appropriate place in the appropriate directories rbtree.
1137          */
1138         kernfs_unlink_sibling(kn);
1139         kernfs_get(new_parent);
1140         kernfs_put(kn->parent);
1141         kn->ns = new_ns;
1142         kn->hash = kernfs_name_hash(kn->name, kn->ns);
1143         kn->parent = new_parent;
1144         kernfs_link_sibling(kn);
1145
1146         error = 0;
1147 out_unlock:
1148         mutex_unlock(&kernfs_mutex);
1149         kernfs_put_active(kn);
1150 out_put_new_parent:
1151         kernfs_put_active(new_parent);
1152 out:
1153         return error;
1154 }
1155
1156 /* Relationship between s_mode and the DT_xxx types */
1157 static inline unsigned char dt_type(struct kernfs_node *kn)
1158 {
1159         return (kn->mode >> 12) & 15;
1160 }
1161
1162 static int kernfs_dir_fop_release(struct inode *inode, struct file *filp)
1163 {
1164         kernfs_put(filp->private_data);
1165         return 0;
1166 }
1167
1168 static struct kernfs_node *kernfs_dir_pos(const void *ns,
1169         struct kernfs_node *parent, loff_t hash, struct kernfs_node *pos)
1170 {
1171         if (pos) {
1172                 int valid = pos->parent == parent && hash == pos->hash;
1173                 kernfs_put(pos);
1174                 if (!valid)
1175                         pos = NULL;
1176         }
1177         if (!pos && (hash > 1) && (hash < INT_MAX)) {
1178                 struct rb_node *node = parent->dir.children.rb_node;
1179                 while (node) {
1180                         pos = rb_to_kn(node);
1181
1182                         if (hash < pos->hash)
1183                                 node = node->rb_left;
1184                         else if (hash > pos->hash)
1185                                 node = node->rb_right;
1186                         else
1187                                 break;
1188                 }
1189         }
1190         /* Skip over entries in the wrong namespace */
1191         while (pos && pos->ns != ns) {
1192                 struct rb_node *node = rb_next(&pos->rb);
1193                 if (!node)
1194                         pos = NULL;
1195                 else
1196                         pos = rb_to_kn(node);
1197         }
1198         return pos;
1199 }
1200
1201 static struct kernfs_node *kernfs_dir_next_pos(const void *ns,
1202         struct kernfs_node *parent, ino_t ino, struct kernfs_node *pos)
1203 {
1204         pos = kernfs_dir_pos(ns, parent, ino, pos);
1205         if (pos)
1206                 do {
1207                         struct rb_node *node = rb_next(&pos->rb);
1208                         if (!node)
1209                                 pos = NULL;
1210                         else
1211                                 pos = rb_to_kn(node);
1212                 } while (pos && pos->ns != ns);
1213         return pos;
1214 }
1215
1216 static int kernfs_fop_readdir(struct file *file, struct dir_context *ctx)
1217 {
1218         struct dentry *dentry = file->f_path.dentry;
1219         struct kernfs_node *parent = dentry->d_fsdata;
1220         struct kernfs_node *pos = file->private_data;
1221         const void *ns = NULL;
1222
1223         if (!dir_emit_dots(file, ctx))
1224                 return 0;
1225         mutex_lock(&kernfs_mutex);
1226
1227         if (kernfs_ns_enabled(parent))
1228                 ns = kernfs_info(dentry->d_sb)->ns;
1229
1230         for (pos = kernfs_dir_pos(ns, parent, ctx->pos, pos);
1231              pos;
1232              pos = kernfs_dir_next_pos(ns, parent, ctx->pos, pos)) {
1233                 const char *name = pos->name;
1234                 unsigned int type = dt_type(pos);
1235                 int len = strlen(name);
1236                 ino_t ino = pos->ino;
1237
1238                 ctx->pos = pos->hash;
1239                 file->private_data = pos;
1240                 kernfs_get(pos);
1241
1242                 mutex_unlock(&kernfs_mutex);
1243                 if (!dir_emit(ctx, name, len, ino, type))
1244                         return 0;
1245                 mutex_lock(&kernfs_mutex);
1246         }
1247         mutex_unlock(&kernfs_mutex);
1248         file->private_data = NULL;
1249         ctx->pos = INT_MAX;
1250         return 0;
1251 }
1252
1253 static loff_t kernfs_dir_fop_llseek(struct file *file, loff_t offset,
1254                                     int whence)
1255 {
1256         struct inode *inode = file_inode(file);
1257         loff_t ret;
1258
1259         mutex_lock(&inode->i_mutex);
1260         ret = generic_file_llseek(file, offset, whence);
1261         mutex_unlock(&inode->i_mutex);
1262
1263         return ret;
1264 }
1265
1266 const struct file_operations kernfs_dir_fops = {
1267         .read           = generic_read_dir,
1268         .iterate        = kernfs_fop_readdir,
1269         .release        = kernfs_dir_fop_release,
1270         .llseek         = kernfs_dir_fop_llseek,
1271 };