fs/kernfs/dir.c

   1 /*
   2  * fs/kernfs/dir.c - kernfs directory implementation
   3  *
   4  * Copyright (c) 2001-3 Patrick Mochel
   5  * Copyright (c) 2007 SUSE Linux Products GmbH
   6  * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org>
   7  *
   8  * This file is released under the GPLv2.
   9  */
  10
  11 #include <linux/sched.h>
  12 #include <linux/fs.h>
  13 #include <linux/namei.h>
  14 #include <linux/idr.h>
  15 #include <linux/slab.h>
  16 #include <linux/security.h>
  17 #include <linux/hash.h>
  18
  19 #include "kernfs-internal.h"
  20
  21 DEFINE_MUTEX(kernfs_mutex);
  22 static DEFINE_SPINLOCK(kernfs_rename_lock);     /* kn->parent and ->name */
  23 static char kernfs_pr_cont_buf[PATH_MAX];       /* protected by rename_lock */
  24
  25 #define rb_to_kn(X) rb_entry((X), struct kernfs_node, rb)
  26
  27 static bool kernfs_active(struct kernfs_node *kn)
  28 {
  29         lockdep_assert_held(&kernfs_mutex);
  30         return atomic_read(&kn->active) >= 0;
  31 }
  32
  33 static bool kernfs_lockdep(struct kernfs_node *kn)
  34 {
  35 #ifdef CONFIG_DEBUG_LOCK_ALLOC
  36         return kn->flags & KERNFS_LOCKDEP;
  37 #else
  38         return false;
  39 #endif
  40 }
  41
  42 static int kernfs_name_locked(struct kernfs_node *kn, char *buf, size_t buflen)
  43 {
  44         return strlcpy(buf, kn->parent ? kn->name : "/", buflen);
  45 }
  46
  47 static char * __must_check kernfs_path_locked(struct kernfs_node *kn, char *buf,
  48                                               size_t buflen)
  49 {
  50         char *p = buf + buflen;
  51         int len;
  52
  53         *--p = '\0';
  54
  55         do {
  56                 len = strlen(kn->name);
  57                 if (p - buf < len + 1) {
  58                         buf[0] = '\0';
  59                         p = NULL;
  60                         break;
  61                 }
  62                 p -= len;
  63                 memcpy(p, kn->name, len);
  64                 *--p = '/';
  65                 kn = kn->parent;
  66         } while (kn && kn->parent);
  67
  68         return p;
  69 }
  70
  71 /**
  72  * kernfs_name - obtain the name of a given node
  73  * @kn: kernfs_node of interest
  74  * @buf: buffer to copy @kn's name into
  75  * @buflen: size of @buf
  76  *
  77  * Copies the name of @kn into @buf of @buflen bytes.  The behavior is
  78  * similar to strlcpy().  It returns the length of @kn's name and if @buf
  79  * isn't long enough, it's filled upto @buflen-1 and nul terminated.
  80  *
  81  * This function can be called from any context.
  82  */
  83 int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen)
  84 {
  85         unsigned long flags;
  86         int ret;
  87
  88         spin_lock_irqsave(&kernfs_rename_lock, flags);
  89         ret = kernfs_name_locked(kn, buf, buflen);
  90         spin_unlock_irqrestore(&kernfs_rename_lock, flags);
  91         return ret;
  92 }
  93
  94 /**
  95  * kernfs_path - build full path of a given node
  96  * @kn: kernfs_node of interest
  97  * @buf: buffer to copy @kn's name into
  98  * @buflen: size of @buf
  99  *
 100  * Builds and returns the full path of @kn in @buf of @buflen bytes.  The
 101  * path is built from the end of @buf so the returned pointer usually
 102  * doesn't match @buf.  If @buf isn't long enough, @buf is nul terminated
 103  * and %NULL is returned.
 104  */
 105 char *kernfs_path(struct kernfs_node *kn, char *buf, size_t buflen)
 106 {
 107         unsigned long flags;
 108         char *p;
 109
 110         spin_lock_irqsave(&kernfs_rename_lock, flags);
 111         p = kernfs_path_locked(kn, buf, buflen);
 112         spin_unlock_irqrestore(&kernfs_rename_lock, flags);
 113         return p;
 114 }
 115 EXPORT_SYMBOL_GPL(kernfs_path);
 116
 117 /**
 118  * pr_cont_kernfs_name - pr_cont name of a kernfs_node
 119  * @kn: kernfs_node of interest
 120  *
 121  * This function can be called from any context.
 122  */
 123 void pr_cont_kernfs_name(struct kernfs_node *kn)
 124 {
 125         unsigned long flags;
 126
 127         spin_lock_irqsave(&kernfs_rename_lock, flags);
 128
 129         kernfs_name_locked(kn, kernfs_pr_cont_buf, sizeof(kernfs_pr_cont_buf));
 130         pr_cont("%s", kernfs_pr_cont_buf);
 131
 132         spin_unlock_irqrestore(&kernfs_rename_lock, flags);
 133 }
 134
 135 /**
 136  * pr_cont_kernfs_path - pr_cont path of a kernfs_node
 137  * @kn: kernfs_node of interest
 138  *
 139  * This function can be called from any context.
 140  */
 141 void pr_cont_kernfs_path(struct kernfs_node *kn)
 142 {
 143         unsigned long flags;
 144         char *p;
 145
 146         spin_lock_irqsave(&kernfs_rename_lock, flags);
 147
 148         p = kernfs_path_locked(kn, kernfs_pr_cont_buf,
 149                                sizeof(kernfs_pr_cont_buf));
 150         if (p)
 151                 pr_cont("%s", p);
 152         else
 153                 pr_cont("<name too long>");
 154
 155         spin_unlock_irqrestore(&kernfs_rename_lock, flags);
 156 }
 157
 158 /**
 159  * kernfs_get_parent - determine the parent node and pin it
 160  * @kn: kernfs_node of interest
 161  *
 162  * Determines @kn's parent, pins and returns it.  This function can be
 163  * called from any context.
 164  */
 165 struct kernfs_node *kernfs_get_parent(struct kernfs_node *kn)
 166 {
 167         struct kernfs_node *parent;
 168         unsigned long flags;
 169
 170         spin_lock_irqsave(&kernfs_rename_lock, flags);
 171         parent = kn->parent;
 172         kernfs_get(parent);
 173         spin_unlock_irqrestore(&kernfs_rename_lock, flags);
 174
 175         return parent;
 176 }
 177
 178 /**
 179  *      kernfs_name_hash
 180  *      @name: Null terminated string to hash
 181  *      @ns:   Namespace tag to hash
 182  *
 183  *      Returns 31 bit hash of ns + name (so it fits in an off_t )
 184  */
 185 static unsigned int kernfs_name_hash(const char *name, const void *ns)
 186 {
 187         unsigned long hash = init_name_hash();
 188         unsigned int len = strlen(name);
 189         while (len--)
 190                 hash = partial_name_hash(*name++, hash);
 191         hash = (end_name_hash(hash) ^ hash_ptr((void *)ns, 31));
 192         hash &= 0x7fffffffU;
 193         /* Reserve hash numbers 0, 1 and INT_MAX for magic directory entries */
 194         if (hash < 2)
 195                 hash += 2;
 196         if (hash >= INT_MAX)
 197                 hash = INT_MAX - 1;
 198         return hash;
 199 }
 200
 201 static int kernfs_name_compare(unsigned int hash, const char *name,
 202                                const void *ns, const struct kernfs_node *kn)
 203 {
 204         if (hash != kn->hash)
 205                 return hash - kn->hash;
 206         if (ns != kn->ns)
 207                 return ns - kn->ns;
 208         return strcmp(name, kn->name);
 209 }
 210
 211 static int kernfs_sd_compare(const struct kernfs_node *left,
 212                              const struct kernfs_node *right)
 213 {
 214         return kernfs_name_compare(left->hash, left->name, left->ns, right);
 215 }
 216
 217 /**
 218  *      kernfs_link_sibling - link kernfs_node into sibling rbtree
 219  *      @kn: kernfs_node of interest
 220  *
 221  *      Link @kn into its sibling rbtree which starts from
 222  *      @kn->parent->dir.children.
 223  *
 224  *      Locking:
 225  *      mutex_lock(kernfs_mutex)
 226  *
 227  *      RETURNS:
 228  *      0 on susccess -EEXIST on failure.
 229  */
 230 static int kernfs_link_sibling(struct kernfs_node *kn)
 231 {
 232         struct rb_node **node = &kn->parent->dir.children.rb_node;
 233         struct rb_node *parent = NULL;
 234
 235         if (kernfs_type(kn) == KERNFS_DIR)
 236                 kn->parent->dir.subdirs++;
 237
 238         while (*node) {
 239                 struct kernfs_node *pos;
 240                 int result;
 241
 242                 pos = rb_to_kn(*node);
 243                 parent = *node;
 244                 result = kernfs_sd_compare(kn, pos);
 245                 if (result < 0)
 246                         node = &pos->rb.rb_left;
 247                 else if (result > 0)
 248                         node = &pos->rb.rb_right;
 249                 else
 250                         return -EEXIST;
 251         }
 252         /* add new node and rebalance the tree */
 253         rb_link_node(&kn->rb, parent, node);
 254         rb_insert_color(&kn->rb, &kn->parent->dir.children);
 255         return 0;
 256 }
 257
 258 /**
 259  *      kernfs_unlink_sibling - unlink kernfs_node from sibling rbtree
 260  *      @kn: kernfs_node of interest
 261  *
 262  *      Try to unlink @kn from its sibling rbtree which starts from
 263  *      kn->parent->dir.children.  Returns %true if @kn was actually
 264  *      removed, %false if @kn wasn't on the rbtree.
 265  *
 266  *      Locking:
 267  *      mutex_lock(kernfs_mutex)
 268  */
 269 static bool kernfs_unlink_sibling(struct kernfs_node *kn)
 270 {
 271         if (RB_EMPTY_NODE(&kn->rb))
 272                 return false;
 273
 274         if (kernfs_type(kn) == KERNFS_DIR)
 275                 kn->parent->dir.subdirs--;
 276
 277         rb_erase(&kn->rb, &kn->parent->dir.children);
 278         RB_CLEAR_NODE(&kn->rb);
 279         return true;
 280 }
 281
 282 /**
 283  *      kernfs_get_active - get an active reference to kernfs_node
 284  *      @kn: kernfs_node to get an active reference to
 285  *
 286  *      Get an active reference of @kn.  This function is noop if @kn
 287  *      is NULL.
 288  *
 289  *      RETURNS:
 290  *      Pointer to @kn on success, NULL on failure.
 291  */
 292 struct kernfs_node *kernfs_get_active(struct kernfs_node *kn)
 293 {
 294         if (unlikely(!kn))
 295                 return NULL;
 296
 297         if (!atomic_inc_unless_negative(&kn->active))
 298                 return NULL;
 299
 300         if (kernfs_lockdep(kn))
 301                 rwsem_acquire_read(&kn->dep_map, 0, 1, _RET_IP_);
 302         return kn;
 303 }
 304
 305 /**
 306  *      kernfs_put_active - put an active reference to kernfs_node
 307  *      @kn: kernfs_node to put an active reference to
 308  *
 309  *      Put an active reference to @kn.  This function is noop if @kn
 310  *      is NULL.
 311  */
 312 void kernfs_put_active(struct kernfs_node *kn)
 313 {
 314         struct kernfs_root *root = kernfs_root(kn);
 315         int v;
 316
 317         if (unlikely(!kn))
 318                 return;
 319
 320         if (kernfs_lockdep(kn))
 321                 rwsem_release(&kn->dep_map, 1, _RET_IP_);
 322         v = atomic_dec_return(&kn->active);
 323         if (likely(v != KN_DEACTIVATED_BIAS))
 324                 return;
 325
 326         wake_up_all(&root->deactivate_waitq);
 327 }
 328
 329 /**
 330  * kernfs_drain - drain kernfs_node
 331  * @kn: kernfs_node to drain
 332  *
 333  * Drain existing usages and nuke all existing mmaps of @kn.  Mutiple
 334  * removers may invoke this function concurrently on @kn and all will
 335  * return after draining is complete.
 336  */
 337 static void kernfs_drain(struct kernfs_node *kn)
 338         __releases(&kernfs_mutex) __acquires(&kernfs_mutex)
 339 {
 340         struct kernfs_root *root = kernfs_root(kn);
 341
 342         lockdep_assert_held(&kernfs_mutex);
 343         WARN_ON_ONCE(kernfs_active(kn));
 344
 345         mutex_unlock(&kernfs_mutex);
 346
 347         if (kernfs_lockdep(kn)) {
 348                 rwsem_acquire(&kn->dep_map, 0, 0, _RET_IP_);
 349                 if (atomic_read(&kn->active) != KN_DEACTIVATED_BIAS)
 350                         lock_contended(&kn->dep_map, _RET_IP_);
 351         }
 352
 353         /* but everyone should wait for draining */
 354         wait_event(root->deactivate_waitq,
 355                    atomic_read(&kn->active) == KN_DEACTIVATED_BIAS);
 356
 357         if (kernfs_lockdep(kn)) {
 358                 lock_acquired(&kn->dep_map, _RET_IP_);
 359                 rwsem_release(&kn->dep_map, 1, _RET_IP_);
 360         }
 361
 362         kernfs_unmap_bin_file(kn);
 363
 364         mutex_lock(&kernfs_mutex);
 365 }
 366
 367 /**
 368  * kernfs_get - get a reference count on a kernfs_node
 369  * @kn: the target kernfs_node
 370  */
 371 void kernfs_get(struct kernfs_node *kn)
 372 {
 373         if (kn) {
 374                 WARN_ON(!atomic_read(&kn->count));
 375                 atomic_inc(&kn->count);
 376         }
 377 }
 378 EXPORT_SYMBOL_GPL(kernfs_get);
 379
 380 /**
 381  * kernfs_put - put a reference count on a kernfs_node
 382  * @kn: the target kernfs_node
 383  *
 384  * Put a reference count of @kn and destroy it if it reached zero.
 385  */
 386 void kernfs_put(struct kernfs_node *kn)
 387 {
 388         struct kernfs_node *parent;
 389         struct kernfs_root *root;
 390
 391         if (!kn || !atomic_dec_and_test(&kn->count))
 392                 return;
 393         root = kernfs_root(kn);
 394  repeat:
 395         /*
 396          * Moving/renaming is always done while holding reference.
 397          * kn->parent won't change beneath us.
 398          */
 399         parent = kn->parent;
 400
 401         WARN_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS,
 402                   "kernfs_put: %s/%s: released with incorrect active_ref %d\n",
 403                   parent ? parent->name : "", kn->name, atomic_read(&kn->active));
 404
 405         if (kernfs_type(kn) == KERNFS_LINK)
 406                 kernfs_put(kn->symlink.target_kn);
 407         if (!(kn->flags & KERNFS_STATIC_NAME))
 408                 kfree(kn->name);
 409         if (kn->iattr) {
 410                 if (kn->iattr->ia_secdata)
 411                         security_release_secctx(kn->iattr->ia_secdata,
 412                                                 kn->iattr->ia_secdata_len);
 413                 simple_xattrs_free(&kn->iattr->xattrs);
 414         }
 415         kfree(kn->iattr);
 416         ida_simple_remove(&root->ino_ida, kn->ino);
 417         kmem_cache_free(kernfs_node_cache, kn);
 418
 419         kn = parent;
 420         if (kn) {
 421                 if (atomic_dec_and_test(&kn->count))
 422                         goto repeat;
 423         } else {
 424                 /* just released the root kn, free @root too */
 425                 ida_destroy(&root->ino_ida);
 426                 kfree(root);
 427         }
 428 }
 429 EXPORT_SYMBOL_GPL(kernfs_put);
 430
 431 static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags)
 432 {
 433         struct kernfs_node *kn;
 434
 435         if (flags & LOOKUP_RCU)
 436                 return -ECHILD;
 437
 438         /* Always perform fresh lookup for negatives */
 439         if (!dentry->d_inode)
 440                 goto out_bad_unlocked;
 441
 442         kn = dentry->d_fsdata;
 443         mutex_lock(&kernfs_mutex);
 444
 445         /* The kernfs node has been deactivated */
 446         if (!kernfs_active(kn))
 447                 goto out_bad;
 448
 449         /* The kernfs node has been moved? */
 450         if (dentry->d_parent->d_fsdata != kn->parent)
 451                 goto out_bad;
 452
 453         /* The kernfs node has been renamed */
 454         if (strcmp(dentry->d_name.name, kn->name) != 0)
 455                 goto out_bad;
 456
 457         /* The kernfs node has been moved to a different namespace */
 458         if (kn->parent && kernfs_ns_enabled(kn->parent) &&
 459             kernfs_info(dentry->d_sb)->ns != kn->ns)
 460                 goto out_bad;
 461
 462         mutex_unlock(&kernfs_mutex);
 463 out_valid:
 464         return 1;
 465 out_bad:
 466         mutex_unlock(&kernfs_mutex);
 467 out_bad_unlocked:
 468         /*
 469          * @dentry doesn't match the underlying kernfs node, drop the
 470          * dentry and force lookup.  If we have submounts we must allow the
 471          * vfs caches to lie about the state of the filesystem to prevent
 472          * leaks and other nasty things, so use check_submounts_and_drop()
 473          * instead of d_drop().
 474          */
 475         if (check_submounts_and_drop(dentry) != 0)
 476                 goto out_valid;
 477
 478         return 0;
 479 }
 480
 481 static void kernfs_dop_release(struct dentry *dentry)
 482 {
 483         kernfs_put(dentry->d_fsdata);
 484 }
 485
 486 const struct dentry_operations kernfs_dops = {
 487         .d_revalidate   = kernfs_dop_revalidate,
 488         .d_release      = kernfs_dop_release,
 489 };
 490
 491 /**
 492  * kernfs_node_from_dentry - determine kernfs_node associated with a dentry
 493  * @dentry: the dentry in question
 494  *
 495  * Return the kernfs_node associated with @dentry.  If @dentry is not a
 496  * kernfs one, %NULL is returned.
 497  *
 498  * While the returned kernfs_node will stay accessible as long as @dentry
 499  * is accessible, the returned node can be in any state and the caller is
 500  * fully responsible for determining what's accessible.
 501  */
 502 struct kernfs_node *kernfs_node_from_dentry(struct dentry *dentry)
 503 {
 504         if (dentry->d_sb->s_op == &kernfs_sops)
 505                 return dentry->d_fsdata;
 506         return NULL;
 507 }
 508
 509 static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root,
 510                                              const char *name, umode_t mode,
 511                                              unsigned flags)
 512 {
 513         char *dup_name = NULL;
 514         struct kernfs_node *kn;
 515         int ret;
 516
 517         if (!(flags & KERNFS_STATIC_NAME)) {
 518                 name = dup_name = kstrdup(name, GFP_KERNEL);
 519                 if (!name)
 520                         return NULL;
 521         }
 522
 523         kn = kmem_cache_zalloc(kernfs_node_cache, GFP_KERNEL);
 524         if (!kn)
 525                 goto err_out1;
 526
 527         ret = ida_simple_get(&root->ino_ida, 1, 0, GFP_KERNEL);
 528         if (ret < 0)
 529                 goto err_out2;
 530         kn->ino = ret;
 531
 532         atomic_set(&kn->count, 1);
 533         atomic_set(&kn->active, KN_DEACTIVATED_BIAS);
 534         RB_CLEAR_NODE(&kn->rb);
 535
 536         kn->name = name;
 537         kn->mode = mode;
 538         kn->flags = flags;
 539
 540         return kn;
 541
 542  err_out2:
 543         kmem_cache_free(kernfs_node_cache, kn);
 544  err_out1:
 545         kfree(dup_name);
 546         return NULL;
 547 }
 548
 549 struct kernfs_node *kernfs_new_node(struct kernfs_node *parent,
 550                                     const char *name, umode_t mode,
 551                                     unsigned flags)
 552 {
 553         struct kernfs_node *kn;
 554
 555         kn = __kernfs_new_node(kernfs_root(parent), name, mode, flags);
 556         if (kn) {
 557                 kernfs_get(parent);
 558                 kn->parent = parent;
 559         }
 560         return kn;
 561 }
 562
 563 /**
 564  *      kernfs_add_one - add kernfs_node to parent without warning
 565  *      @kn: kernfs_node to be added
 566  *
 567  *      The caller must already have initialized @kn->parent.  This
 568  *      function increments nlink of the parent's inode if @kn is a
 569  *      directory and link into the children list of the parent.
 570  *
 571  *      RETURNS:
 572  *      0 on success, -EEXIST if entry with the given name already
 573  *      exists.
 574  */
 575 int kernfs_add_one(struct kernfs_node *kn)
 576 {
 577         struct kernfs_node *parent = kn->parent;
 578         struct kernfs_iattrs *ps_iattr;
 579         bool has_ns;
 580         int ret;
 581
 582         mutex_lock(&kernfs_mutex);
 583
 584         ret = -EINVAL;
 585         has_ns = kernfs_ns_enabled(parent);
 586         if (WARN(has_ns != (bool)kn->ns, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n",
 587                  has_ns ? "required" : "invalid", parent->name, kn->name))
 588                 goto out_unlock;
 589
 590         if (kernfs_type(parent) != KERNFS_DIR)
 591                 goto out_unlock;
 592
 593         ret = -ENOENT;
 594         if ((parent->flags & KERNFS_ACTIVATED) && !kernfs_active(parent))
 595                 goto out_unlock;
 596
 597         kn->hash = kernfs_name_hash(kn->name, kn->ns);
 598
 599         ret = kernfs_link_sibling(kn);
 600         if (ret)
 601                 goto out_unlock;
 602
 603         /* Update timestamps on the parent */
 604         ps_iattr = parent->iattr;
 605         if (ps_iattr) {
 606                 struct iattr *ps_iattrs = &ps_iattr->ia_iattr;
 607                 ps_iattrs->ia_ctime = ps_iattrs->ia_mtime = CURRENT_TIME;
 608         }
 609
 610         mutex_unlock(&kernfs_mutex);
 611
 612         /*
 613          * Activate the new node unless CREATE_DEACTIVATED is requested.
 614          * If not activated here, the kernfs user is responsible for
 615          * activating the node with kernfs_activate().  A node which hasn't
 616          * been activated is not visible to userland and its removal won't
 617          * trigger deactivation.
 618          */
 619         if (!(kernfs_root(kn)->flags & KERNFS_ROOT_CREATE_DEACTIVATED))
 620                 kernfs_activate(kn);
 621         return 0;
 622
 623 out_unlock:
 624         mutex_unlock(&kernfs_mutex);
 625         return ret;
 626 }
 627
 628 /**
 629  * kernfs_find_ns - find kernfs_node with the given name
 630  * @parent: kernfs_node to search under
 631  * @name: name to look for
 632  * @ns: the namespace tag to use
 633  *
 634  * Look for kernfs_node with name @name under @parent.  Returns pointer to
 635  * the found kernfs_node on success, %NULL on failure.
 636  */
 637 static struct kernfs_node *kernfs_find_ns(struct kernfs_node *parent,
 638                                           const unsigned char *name,
 639                                           const void *ns)
 640 {
 641         struct rb_node *node = parent->dir.children.rb_node;
 642         bool has_ns = kernfs_ns_enabled(parent);
 643         unsigned int hash;
 644
 645         lockdep_assert_held(&kernfs_mutex);
 646
 647         if (has_ns != (bool)ns) {
 648                 WARN(1, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n",
 649                      has_ns ? "required" : "invalid", parent->name, name);
 650                 return NULL;
 651         }
 652
 653         hash = kernfs_name_hash(name, ns);
 654         while (node) {
 655                 struct kernfs_node *kn;
 656                 int result;
 657
 658                 kn = rb_to_kn(node);
 659                 result = kernfs_name_compare(hash, name, ns, kn);
 660                 if (result < 0)
 661                         node = node->rb_left;
 662                 else if (result > 0)
 663                         node = node->rb_right;
 664                 else
 665                         return kn;
 666         }
 667         return NULL;
 668 }
 669
 670 /**
 671  * kernfs_find_and_get_ns - find and get kernfs_node with the given name
 672  * @parent: kernfs_node to search under
 673  * @name: name to look for
 674  * @ns: the namespace tag to use
 675  *
 676  * Look for kernfs_node with name @name under @parent and get a reference
 677  * if found.  This function may sleep and returns pointer to the found
 678  * kernfs_node on success, %NULL on failure.
 679  */
 680 struct kernfs_node *kernfs_find_and_get_ns(struct kernfs_node *parent,
 681                                            const char *name, const void *ns)
 682 {
 683         struct kernfs_node *kn;
 684
 685         mutex_lock(&kernfs_mutex);
 686         kn = kernfs_find_ns(parent, name, ns);
 687         kernfs_get(kn);
 688         mutex_unlock(&kernfs_mutex);
 689
 690         return kn;
 691 }
 692 EXPORT_SYMBOL_GPL(kernfs_find_and_get_ns);
 693
 694 /**
 695  * kernfs_create_root - create a new kernfs hierarchy
 696  * @scops: optional syscall operations for the hierarchy
 697  * @flags: KERNFS_ROOT_* flags
 698  * @priv: opaque data associated with the new directory
 699  *
 700  * Returns the root of the new hierarchy on success, ERR_PTR() value on
 701  * failure.
 702  */
 703 struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops,
 704                                        unsigned int flags, void *priv)
 705 {
 706         struct kernfs_root *root;
 707         struct kernfs_node *kn;
 708
 709         root = kzalloc(sizeof(*root), GFP_KERNEL);
 710         if (!root)
 711                 return ERR_PTR(-ENOMEM);
 712
 713         ida_init(&root->ino_ida);
 714
 715         kn = __kernfs_new_node(root, "", S_IFDIR | S_IRUGO | S_IXUGO,
 716                                KERNFS_DIR);
 717         if (!kn) {
 718                 ida_destroy(&root->ino_ida);
 719                 kfree(root);
 720                 return ERR_PTR(-ENOMEM);
 721         }
 722
 723         kn->priv = priv;
 724         kn->dir.root = root;
 725
 726         root->syscall_ops = scops;
 727         root->flags = flags;
 728         root->kn = kn;
 729         init_waitqueue_head(&root->deactivate_waitq);
 730
 731         if (!(root->flags & KERNFS_ROOT_CREATE_DEACTIVATED))
 732                 kernfs_activate(kn);
 733
 734         return root;
 735 }
 736
 737 /**
 738  * kernfs_destroy_root - destroy a kernfs hierarchy
 739  * @root: root of the hierarchy to destroy
 740  *
 741  * Destroy the hierarchy anchored at @root by removing all existing
 742  * directories and destroying @root.
 743  */
 744 void kernfs_destroy_root(struct kernfs_root *root)
 745 {
 746         kernfs_remove(root->kn);        /* will also free @root */
 747 }
 748
 749 /**
 750  * kernfs_create_dir_ns - create a directory
 751  * @parent: parent in which to create a new directory
 752  * @name: name of the new directory
 753  * @mode: mode of the new directory
 754  * @priv: opaque data associated with the new directory
 755  * @ns: optional namespace tag of the directory
 756  *
 757  * Returns the created node on success, ERR_PTR() value on failure.
 758  */
 759 struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent,
 760                                          const char *name, umode_t mode,
 761                                          void *priv, const void *ns)
 762 {
 763         struct kernfs_node *kn;
 764         int rc;
 765
 766         /* allocate */
 767         kn = kernfs_new_node(parent, name, mode | S_IFDIR, KERNFS_DIR);
 768         if (!kn)
 769                 return ERR_PTR(-ENOMEM);
 770
 771         kn->dir.root = parent->dir.root;
 772         kn->ns = ns;
 773         kn->priv = priv;
 774
 775         /* link in */
 776         rc = kernfs_add_one(kn);
 777         if (!rc)
 778                 return kn;
 779
 780         kernfs_put(kn);
 781         return ERR_PTR(rc);
 782 }
 783
 784 static struct dentry *kernfs_iop_lookup(struct inode *dir,
 785                                         struct dentry *dentry,
 786                                         unsigned int flags)
 787 {
 788         struct dentry *ret;
 789         struct kernfs_node *parent = dentry->d_parent->d_fsdata;
 790         struct kernfs_node *kn;
 791         struct inode *inode;
 792         const void *ns = NULL;
 793
 794         mutex_lock(&kernfs_mutex);
 795
 796         if (kernfs_ns_enabled(parent))
 797                 ns = kernfs_info(dir->i_sb)->ns;
 798
 799         kn = kernfs_find_ns(parent, dentry->d_name.name, ns);
 800
 801         /* no such entry */
 802         if (!kn || !kernfs_active(kn)) {
 803                 ret = NULL;
 804                 goto out_unlock;
 805         }
 806         kernfs_get(kn);
 807         dentry->d_fsdata = kn;
 808
 809         /* attach dentry and inode */
 810         inode = kernfs_get_inode(dir->i_sb, kn);
 811         if (!inode) {
 812                 ret = ERR_PTR(-ENOMEM);
 813                 goto out_unlock;
 814         }
 815
 816         /* instantiate and hash dentry */
 817         ret = d_materialise_unique(dentry, inode);
 818  out_unlock:
 819         mutex_unlock(&kernfs_mutex);
 820         return ret;
 821 }
 822
 823 static int kernfs_iop_mkdir(struct inode *dir, struct dentry *dentry,
 824                             umode_t mode)
 825 {
 826         struct kernfs_node *parent = dir->i_private;
 827         struct kernfs_syscall_ops *scops = kernfs_root(parent)->syscall_ops;
 828         int ret;
 829
 830         if (!scops || !scops->mkdir)
 831                 return -EPERM;
 832
 833         if (!kernfs_get_active(parent))
 834                 return -ENODEV;
 835
 836         ret = scops->mkdir(parent, dentry->d_name.name, mode);
 837
 838         kernfs_put_active(parent);
 839         return ret;
 840 }
 841
 842 static int kernfs_iop_rmdir(struct inode *dir, struct dentry *dentry)
 843 {
 844         struct kernfs_node *kn  = dentry->d_fsdata;
 845         struct kernfs_syscall_ops *scops = kernfs_root(kn)->syscall_ops;
 846         int ret;
 847
 848         if (!scops || !scops->rmdir)
 849                 return -EPERM;
 850
 851         if (!kernfs_get_active(kn))
 852                 return -ENODEV;
 853
 854         ret = scops->rmdir(kn);
 855
 856         kernfs_put_active(kn);
 857         return ret;
 858 }
 859
 860 static int kernfs_iop_rename(struct inode *old_dir, struct dentry *old_dentry,
 861                              struct inode *new_dir, struct dentry *new_dentry)
 862 {
 863         struct kernfs_node *kn  = old_dentry->d_fsdata;
 864         struct kernfs_node *new_parent = new_dir->i_private;
 865         struct kernfs_syscall_ops *scops = kernfs_root(kn)->syscall_ops;
 866         int ret;
 867
 868         if (!scops || !scops->rename)
 869                 return -EPERM;
 870
 871         if (!kernfs_get_active(kn))
 872                 return -ENODEV;
 873
 874         if (!kernfs_get_active(new_parent)) {
 875                 kernfs_put_active(kn);
 876                 return -ENODEV;
 877         }
 878
 879         ret = scops->rename(kn, new_parent, new_dentry->d_name.name);
 880
 881         kernfs_put_active(new_parent);
 882         kernfs_put_active(kn);
 883         return ret;
 884 }
 885
 886 const struct inode_operations kernfs_dir_iops = {
 887         .lookup         = kernfs_iop_lookup,
 888         .permission     = kernfs_iop_permission,
 889         .setattr        = kernfs_iop_setattr,
 890         .getattr        = kernfs_iop_getattr,
 891         .setxattr       = kernfs_iop_setxattr,
 892         .removexattr    = kernfs_iop_removexattr,
 893         .getxattr       = kernfs_iop_getxattr,
 894         .listxattr      = kernfs_iop_listxattr,
 895
 896         .mkdir          = kernfs_iop_mkdir,
 897         .rmdir          = kernfs_iop_rmdir,
 898         .rename         = kernfs_iop_rename,
 899 };
 900
 901 static struct kernfs_node *kernfs_leftmost_descendant(struct kernfs_node *pos)
 902 {
 903         struct kernfs_node *last;
 904
 905         while (true) {
 906                 struct rb_node *rbn;
 907
 908                 last = pos;
 909
 910                 if (kernfs_type(pos) != KERNFS_DIR)
 911                         break;
 912
 913                 rbn = rb_first(&pos->dir.children);
 914                 if (!rbn)
 915                         break;
 916
 917                 pos = rb_to_kn(rbn);
 918         }
 919
 920         return last;
 921 }
 922
 923 /**
 924  * kernfs_next_descendant_post - find the next descendant for post-order walk
 925  * @pos: the current position (%NULL to initiate traversal)
 926  * @root: kernfs_node whose descendants to walk
 927  *
 928  * Find the next descendant to visit for post-order traversal of @root's
 929  * descendants.  @root is included in the iteration and the last node to be
 930  * visited.
 931  */
 932 static struct kernfs_node *kernfs_next_descendant_post(struct kernfs_node *pos,
 933                                                        struct kernfs_node *root)
 934 {
 935         struct rb_node *rbn;
 936
 937         lockdep_assert_held(&kernfs_mutex);
 938
 939         /* if first iteration, visit leftmost descendant which may be root */
 940         if (!pos)
 941                 return kernfs_leftmost_descendant(root);
 942
 943         /* if we visited @root, we're done */
 944         if (pos == root)
 945                 return NULL;
 946
 947         /* if there's an unvisited sibling, visit its leftmost descendant */
 948         rbn = rb_next(&pos->rb);
 949         if (rbn)
 950                 return kernfs_leftmost_descendant(rb_to_kn(rbn));
 951
 952         /* no sibling left, visit parent */
 953         return pos->parent;
 954 }
 955
 956 /**
 957  * kernfs_activate - activate a node which started deactivated
 958  * @kn: kernfs_node whose subtree is to be activated
 959  *
 960  * If the root has KERNFS_ROOT_CREATE_DEACTIVATED set, a newly created node
 961  * needs to be explicitly activated.  A node which hasn't been activated
 962  * isn't visible to userland and deactivation is skipped during its
 963  * removal.  This is useful to construct atomic init sequences where
 964  * creation of multiple nodes should either succeed or fail atomically.
 965  *
 966  * The caller is responsible for ensuring that this function is not called
 967  * after kernfs_remove*() is invoked on @kn.
 968  */
 969 void kernfs_activate(struct kernfs_node *kn)
 970 {
 971         struct kernfs_node *pos;
 972
 973         mutex_lock(&kernfs_mutex);
 974
 975         pos = NULL;
 976         while ((pos = kernfs_next_descendant_post(pos, kn))) {
 977                 if (!pos || (pos->flags & KERNFS_ACTIVATED))
 978                         continue;
 979
 980                 WARN_ON_ONCE(pos->parent && RB_EMPTY_NODE(&pos->rb));
 981                 WARN_ON_ONCE(atomic_read(&pos->active) != KN_DEACTIVATED_BIAS);
 982
 983                 atomic_sub(KN_DEACTIVATED_BIAS, &pos->active);
 984                 pos->flags |= KERNFS_ACTIVATED;
 985         }
 986
 987         mutex_unlock(&kernfs_mutex);
 988 }
 989
 990 static void __kernfs_remove(struct kernfs_node *kn)
 991 {
 992         struct kernfs_node *pos;
 993
 994         lockdep_assert_held(&kernfs_mutex);
 995
 996         /*
 997          * Short-circuit if non-root @kn has already finished removal.
 998          * This is for kernfs_remove_self() which plays with active ref
 999          * after removal.
1000          */
1001         if (!kn || (kn->parent && RB_EMPTY_NODE(&kn->rb)))
1002                 return;
1003
1004         pr_debug("kernfs %s: removing\n", kn->name);
1005
1006         /* prevent any new usage under @kn by deactivating all nodes */
1007         pos = NULL;
1008         while ((pos = kernfs_next_descendant_post(pos, kn)))
1009                 if (kernfs_active(pos))
1010                         atomic_add(KN_DEACTIVATED_BIAS, &pos->active);
1011
1012         /* deactivate and unlink the subtree node-by-node */
1013         do {
1014                 pos = kernfs_leftmost_descendant(kn);
1015
1016                 /*
1017                  * kernfs_drain() drops kernfs_mutex temporarily and @pos's
1018                  * base ref could have been put by someone else by the time
1019                  * the function returns.  Make sure it doesn't go away
1020                  * underneath us.
1021                  */
1022                 kernfs_get(pos);
1023
1024                 /*
1025                  * Drain iff @kn was activated.  This avoids draining and
1026                  * its lockdep annotations for nodes which have never been
1027                  * activated and allows embedding kernfs_remove() in create
1028                  * error paths without worrying about draining.
1029                  */
1030                 if (kn->flags & KERNFS_ACTIVATED)
1031                         kernfs_drain(pos);
1032                 else
1033                         WARN_ON_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS);
1034
1035                 /*
1036                  * kernfs_unlink_sibling() succeeds once per node.  Use it
1037                  * to decide who's responsible for cleanups.
1038                  */
1039                 if (!pos->parent || kernfs_unlink_sibling(pos)) {
1040                         struct kernfs_iattrs *ps_iattr =
1041                                 pos->parent ? pos->parent->iattr : NULL;
1042
1043                         /* update timestamps on the parent */
1044                         if (ps_iattr) {
1045                                 ps_iattr->ia_iattr.ia_ctime = CURRENT_TIME;
1046                                 ps_iattr->ia_iattr.ia_mtime = CURRENT_TIME;
1047                         }
1048
1049                         kernfs_put(pos);
1050                 }
1051
1052                 kernfs_put(pos);
1053         } while (pos != kn);
1054 }
1055
1056 /**
1057  * kernfs_remove - remove a kernfs_node recursively
1058  * @kn: the kernfs_node to remove
1059  *
1060  * Remove @kn along with all its subdirectories and files.
1061  */
1062 void kernfs_remove(struct kernfs_node *kn)
1063 {
1064         mutex_lock(&kernfs_mutex);
1065         __kernfs_remove(kn);
1066         mutex_unlock(&kernfs_mutex);
1067 }
1068
1069 /**
1070  * kernfs_break_active_protection - break out of active protection
1071  * @kn: the self kernfs_node
1072  *
1073  * The caller must be running off of a kernfs operation which is invoked
1074  * with an active reference - e.g. one of kernfs_ops.  Each invocation of
1075  * this function must also be matched with an invocation of
1076  * kernfs_unbreak_active_protection().
1077  *
1078  * This function releases the active reference of @kn the caller is
1079  * holding.  Once this function is called, @kn may be removed at any point
1080  * and the caller is solely responsible for ensuring that the objects it
1081  * dereferences are accessible.
1082  */
1083 void kernfs_break_active_protection(struct kernfs_node *kn)
1084 {
1085         /*
1086          * Take out ourself out of the active ref dependency chain.  If
1087          * we're called without an active ref, lockdep will complain.
1088          */
1089         kernfs_put_active(kn);
1090 }
1091
1092 /**
1093  * kernfs_unbreak_active_protection - undo kernfs_break_active_protection()
1094  * @kn: the self kernfs_node
1095  *
1096  * If kernfs_break_active_protection() was called, this function must be
1097  * invoked before finishing the kernfs operation.  Note that while this
1098  * function restores the active reference, it doesn't and can't actually
1099  * restore the active protection - @kn may already or be in the process of
1100  * being removed.  Once kernfs_break_active_protection() is invoked, that
1101  * protection is irreversibly gone for the kernfs operation instance.
1102  *
1103  * While this function may be called at any point after
1104  * kernfs_break_active_protection() is invoked, its most useful location
1105  * would be right before the enclosing kernfs operation returns.
1106  */
1107 void kernfs_unbreak_active_protection(struct kernfs_node *kn)
1108 {
1109         /*
1110          * @kn->active could be in any state; however, the increment we do
1111          * here will be undone as soon as the enclosing kernfs operation
1112          * finishes and this temporary bump can't break anything.  If @kn
1113          * is alive, nothing changes.  If @kn is being deactivated, the
1114          * soon-to-follow put will either finish deactivation or restore
1115          * deactivated state.  If @kn is already removed, the temporary
1116          * bump is guaranteed to be gone before @kn is released.
1117          */
1118         atomic_inc(&kn->active);
1119         if (kernfs_lockdep(kn))
1120                 rwsem_acquire(&kn->dep_map, 0, 1, _RET_IP_);
1121 }
1122
1123 /**
1124  * kernfs_remove_self - remove a kernfs_node from its own method
1125  * @kn: the self kernfs_node to remove
1126  *
1127  * The caller must be running off of a kernfs operation which is invoked
1128  * with an active reference - e.g. one of kernfs_ops.  This can be used to
1129  * implement a file operation which deletes itself.
1130  *
1131  * For example, the "delete" file for a sysfs device directory can be
1132  * implemented by invoking kernfs_remove_self() on the "delete" file
1133  * itself.  This function breaks the circular dependency of trying to
1134  * deactivate self while holding an active ref itself.  It isn't necessary
1135  * to modify the usual removal path to use kernfs_remove_self().  The
1136  * "delete" implementation can simply invoke kernfs_remove_self() on self
1137  * before proceeding with the usual removal path.  kernfs will ignore later
1138  * kernfs_remove() on self.
1139  *
1140  * kernfs_remove_self() can be called multiple times concurrently on the
1141  * same kernfs_node.  Only the first one actually performs removal and
1142  * returns %true.  All others will wait until the kernfs operation which
1143  * won self-removal finishes and return %false.  Note that the losers wait
1144  * for the completion of not only the winning kernfs_remove_self() but also
1145  * the whole kernfs_ops which won the arbitration.  This can be used to
1146  * guarantee, for example, all concurrent writes to a "delete" file to
1147  * finish only after the whole operation is complete.
1148  */
1149 bool kernfs_remove_self(struct kernfs_node *kn)
1150 {
1151         bool ret;
1152
1153         mutex_lock(&kernfs_mutex);
1154         kernfs_break_active_protection(kn);
1155
1156         /*
1157          * SUICIDAL is used to arbitrate among competing invocations.  Only
1158          * the first one will actually perform removal.  When the removal
1159          * is complete, SUICIDED is set and the active ref is restored
1160          * while holding kernfs_mutex.  The ones which lost arbitration
1161          * waits for SUICDED && drained which can happen only after the
1162          * enclosing kernfs operation which executed the winning instance
1163          * of kernfs_remove_self() finished.
1164          */
1165         if (!(kn->flags & KERNFS_SUICIDAL)) {
1166                 kn->flags |= KERNFS_SUICIDAL;
1167                 __kernfs_remove(kn);
1168                 kn->flags |= KERNFS_SUICIDED;
1169                 ret = true;
1170         } else {
1171                 wait_queue_head_t *waitq = &kernfs_root(kn)->deactivate_waitq;
1172                 DEFINE_WAIT(wait);
1173
1174                 while (true) {
1175                         prepare_to_wait(waitq, &wait, TASK_UNINTERRUPTIBLE);
1176
1177                         if ((kn->flags & KERNFS_SUICIDED) &&
1178                             atomic_read(&kn->active) == KN_DEACTIVATED_BIAS)
1179                                 break;
1180
1181                         mutex_unlock(&kernfs_mutex);
1182                         schedule();
1183                         mutex_lock(&kernfs_mutex);
1184                 }
1185                 finish_wait(waitq, &wait);
1186                 WARN_ON_ONCE(!RB_EMPTY_NODE(&kn->rb));
1187                 ret = false;
1188         }
1189
1190         /*
1191          * This must be done while holding kernfs_mutex; otherwise, waiting
1192          * for SUICIDED && deactivated could finish prematurely.
1193          */
1194         kernfs_unbreak_active_protection(kn);
1195
1196         mutex_unlock(&kernfs_mutex);
1197         return ret;
1198 }
1199
1200 /**
1201  * kernfs_remove_by_name_ns - find a kernfs_node by name and remove it
1202  * @parent: parent of the target
1203  * @name: name of the kernfs_node to remove
1204  * @ns: namespace tag of the kernfs_node to remove
1205  *
1206  * Look for the kernfs_node with @name and @ns under @parent and remove it.
1207  * Returns 0 on success, -ENOENT if such entry doesn't exist.
1208  */
1209 int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name,
1210                              const void *ns)
1211 {
1212         struct kernfs_node *kn;
1213
1214         if (!parent) {
1215                 WARN(1, KERN_WARNING "kernfs: can not remove '%s', no directory\n",
1216                         name);
1217                 return -ENOENT;
1218         }
1219
1220         mutex_lock(&kernfs_mutex);
1221
1222         kn = kernfs_find_ns(parent, name, ns);
1223         if (kn)
1224                 __kernfs_remove(kn);
1225
1226         mutex_unlock(&kernfs_mutex);
1227
1228         if (kn)
1229                 return 0;
1230         else
1231                 return -ENOENT;
1232 }
1233
1234 /**
1235  * kernfs_rename_ns - move and rename a kernfs_node
1236  * @kn: target node
1237  * @new_parent: new parent to put @sd under
1238  * @new_name: new name
1239  * @new_ns: new namespace tag
1240  */
1241 int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent,
1242                      const char *new_name, const void *new_ns)
1243 {
1244         struct kernfs_node *old_parent;
1245         const char *old_name = NULL;
1246         int error;
1247
1248         /* can't move or rename root */
1249         if (!kn->parent)
1250                 return -EINVAL;
1251
1252         mutex_lock(&kernfs_mutex);
1253
1254         error = -ENOENT;
1255         if (!kernfs_active(kn) || !kernfs_active(new_parent))
1256                 goto out;
1257
1258         error = 0;
1259         if ((kn->parent == new_parent) && (kn->ns == new_ns) &&
1260             (strcmp(kn->name, new_name) == 0))
1261                 goto out;       /* nothing to rename */
1262
1263         error = -EEXIST;
1264         if (kernfs_find_ns(new_parent, new_name, new_ns))
1265                 goto out;
1266
1267         /* rename kernfs_node */
1268         if (strcmp(kn->name, new_name) != 0) {
1269                 error = -ENOMEM;
1270                 new_name = kstrdup(new_name, GFP_KERNEL);
1271                 if (!new_name)
1272                         goto out;
1273         } else {
1274                 new_name = NULL;
1275         }
1276
1277         /*
1278          * Move to the appropriate place in the appropriate directories rbtree.
1279          */
1280         kernfs_unlink_sibling(kn);
1281         kernfs_get(new_parent);
1282
1283         /* rename_lock protects ->parent and ->name accessors */
1284         spin_lock_irq(&kernfs_rename_lock);
1285
1286         old_parent = kn->parent;
1287         kn->parent = new_parent;
1288
1289         kn->ns = new_ns;
1290         if (new_name) {
1291                 if (!(kn->flags & KERNFS_STATIC_NAME))
1292                         old_name = kn->name;
1293                 kn->flags &= ~KERNFS_STATIC_NAME;
1294                 kn->name = new_name;
1295         }
1296
1297         spin_unlock_irq(&kernfs_rename_lock);
1298
1299         kn->hash = kernfs_name_hash(kn->name, kn->ns);
1300         kernfs_link_sibling(kn);
1301
1302         kernfs_put(old_parent);
1303         kfree(old_name);
1304
1305         error = 0;
1306  out:
1307         mutex_unlock(&kernfs_mutex);
1308         return error;
1309 }
1310
1311 /* Relationship between s_mode and the DT_xxx types */
1312 static inline unsigned char dt_type(struct kernfs_node *kn)
1313 {
1314         return (kn->mode >> 12) & 15;
1315 }
1316
1317 static int kernfs_dir_fop_release(struct inode *inode, struct file *filp)
1318 {
1319         kernfs_put(filp->private_data);
1320         return 0;
1321 }
1322
1323 static struct kernfs_node *kernfs_dir_pos(const void *ns,
1324         struct kernfs_node *parent, loff_t hash, struct kernfs_node *pos)
1325 {
1326         if (pos) {
1327                 int valid = kernfs_active(pos) &&
1328                         pos->parent == parent && hash == pos->hash;
1329                 kernfs_put(pos);
1330                 if (!valid)
1331                         pos = NULL;
1332         }
1333         if (!pos && (hash > 1) && (hash < INT_MAX)) {
1334                 struct rb_node *node = parent->dir.children.rb_node;
1335                 while (node) {
1336                         pos = rb_to_kn(node);
1337
1338                         if (hash < pos->hash)
1339                                 node = node->rb_left;
1340                         else if (hash > pos->hash)
1341                                 node = node->rb_right;
1342                         else
1343                                 break;
1344                 }
1345         }
1346         /* Skip over entries which are dying/dead or in the wrong namespace */
1347         while (pos && (!kernfs_active(pos) || pos->ns != ns)) {
1348                 struct rb_node *node = rb_next(&pos->rb);
1349                 if (!node)
1350                         pos = NULL;
1351                 else
1352                         pos = rb_to_kn(node);
1353         }
1354         return pos;
1355 }
1356
1357 static struct kernfs_node *kernfs_dir_next_pos(const void *ns,
1358         struct kernfs_node *parent, ino_t ino, struct kernfs_node *pos)
1359 {
1360         pos = kernfs_dir_pos(ns, parent, ino, pos);
1361         if (pos) {
1362                 do {
1363                         struct rb_node *node = rb_next(&pos->rb);
1364                         if (!node)
1365                                 pos = NULL;
1366                         else
1367                                 pos = rb_to_kn(node);
1368                 } while (pos && (!kernfs_active(pos) || pos->ns != ns));
1369         }
1370         return pos;
1371 }
1372
1373 static int kernfs_fop_readdir(struct file *file, struct dir_context *ctx)
1374 {
1375         struct dentry *dentry = file->f_path.dentry;
1376         struct kernfs_node *parent = dentry->d_fsdata;
1377         struct kernfs_node *pos = file->private_data;
1378         const void *ns = NULL;
1379
1380         if (!dir_emit_dots(file, ctx))
1381                 return 0;
1382         mutex_lock(&kernfs_mutex);
1383
1384         if (kernfs_ns_enabled(parent))
1385                 ns = kernfs_info(dentry->d_sb)->ns;
1386
1387         for (pos = kernfs_dir_pos(ns, parent, ctx->pos, pos);
1388              pos;
1389              pos = kernfs_dir_next_pos(ns, parent, ctx->pos, pos)) {
1390                 const char *name = pos->name;
1391                 unsigned int type = dt_type(pos);
1392                 int len = strlen(name);
1393                 ino_t ino = pos->ino;
1394
1395                 ctx->pos = pos->hash;
1396                 file->private_data = pos;
1397                 kernfs_get(pos);
1398
1399                 mutex_unlock(&kernfs_mutex);
1400                 if (!dir_emit(ctx, name, len, ino, type))
1401                         return 0;
1402                 mutex_lock(&kernfs_mutex);
1403         }
1404         mutex_unlock(&kernfs_mutex);
1405         file->private_data = NULL;
1406         ctx->pos = INT_MAX;
1407         return 0;
1408 }
1409
1410 static loff_t kernfs_dir_fop_llseek(struct file *file, loff_t offset,
1411                                     int whence)
1412 {
1413         struct inode *inode = file_inode(file);
1414         loff_t ret;
1415
1416         mutex_lock(&inode->i_mutex);
1417         ret = generic_file_llseek(file, offset, whence);
1418         mutex_unlock(&inode->i_mutex);
1419
1420         return ret;
1421 }
1422
1423 const struct file_operations kernfs_dir_fops = {
1424         .read           = generic_read_dir,
1425         .iterate        = kernfs_fop_readdir,
1426         .release        = kernfs_dir_fop_release,
1427         .llseek         = kernfs_dir_fop_llseek,
1428 };