Merge branch 'overlayfs-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mszer...

author Linus Torvalds <torvalds@linux-foundation.org>

Wed, 13 Sep 2017 16:11:44 +0000 (09:11 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Wed, 13 Sep 2017 16:11:44 +0000 (09:11 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Wed, 13 Sep 2017 16:11:44 +0000 (09:11 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Wed, 13 Sep 2017 16:11:44 +0000 (09:11 -0700)
diff --combined Documentation/filesystems/vfs.txt

index 405a3df759b33938dc9b6cc4e819673538f973a1,7f20c1bdfb67c35e130fef79cc9ac1d1467b9ecc..5fd325df59e2233df60cbf6da6c92230aa0c26ef
--- 1/Documentation/filesystems/vfs.txt
--- 2/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@@ -829,7 -829,9 +829,7 @@@ struct address_space_operations 
     swap_activate: Called when swapon is used on a file to allocate
         space if necessary and pin the block lookup information in
         memory. A return value of zero indicates success,
- -      in which case this file can be used to back swapspace. The
- -      swapspace operations will be proxied to this address space's
- -      ->swap_{out,in} methods.
+ +      in which case this file can be used to back swapspace.
   
     swap_deactivate: Called during swapoff on files where swap_activate
         was successful.
@@@ -988,7 -990,7 +988,7 @@@ struct dentry_operations 
         struct vfsmount *(*d_automount)(struct path *);
         int (*d_manage)(const struct path *, bool);
         struct dentry *(*d_real)(struct dentry *, const struct inode *,
-                                unsigned int);
+                                unsigned int, unsigned int);
   };
   
     d_revalidate: called when the VFS needs to revalidate a dentry. This
diff --combined fs/inode.c

index 210054157a4998b901c366e5d27a211b39f23315,eed15033e36bc369fe1b0df45bcbcb097e1bd647..d1e35b53bb23b80db7077500f63eeec9bce6bb28
--- 1/fs/inode.c
--- 2/fs/inode.c
+++ b/fs/inode.c
@@@ -353,7 -353,7 +353,7 @@@ void address_space_init_once(struct add
         init_rwsem(&mapping->i_mmap_rwsem);
         INIT_LIST_HEAD(&mapping->private_list);
         spin_lock_init(&mapping->private_lock);
- -      mapping->i_mmap = RB_ROOT;
+ +      mapping->i_mmap = RB_ROOT_CACHED;
   }
   EXPORT_SYMBOL(address_space_init_once);
   
@@@ -637,7 -637,6 +637,7 @@@ again
   
         dispose_list(&dispose);
   }
+ +EXPORT_SYMBOL_GPL(evict_inodes);
   
   /**
    * invalidate_inodes  - attempt to free all inodes on a superblock
@@@ -1570,11 -1569,24 +1570,24 @@@ EXPORT_SYMBOL(bmap)
   static void update_ovl_inode_times(struct dentry *dentry, struct inode *inode,
                                bool rcu)
   {
-       if (!rcu) {
-               struct inode *realinode = d_real_inode(dentry);
+       struct dentry *upperdentry;
   
-               if (unlikely(inode != realinode) &&
-                   (!timespec_equal(&inode->i_mtime, &realinode->i_mtime) ||
+       /*
+        * Nothing to do if in rcu or if non-overlayfs
+        */
+       if (rcu || likely(!(dentry->d_flags & DCACHE_OP_REAL)))
+               return;
+ 
+       upperdentry = d_real(dentry, NULL, 0, D_REAL_UPPER);
+ 
+       /*
+        * If file is on lower then we can't update atime, so no worries about
+        * stale mtime/ctime.
+        */
+       if (upperdentry) {
+               struct inode *realinode = d_inode(upperdentry);
+ 
+               if ((!timespec_equal(&inode->i_mtime, &realinode->i_mtime) ||
                      !timespec_equal(&inode->i_ctime, &realinode->i_ctime))) {
                         inode->i_mtime = realinode->i_mtime;
                         inode->i_ctime = realinode->i_ctime;
diff --combined fs/internal.h

index fedfe94d84ba56a45ee9ab24948824ecc999b388,60cdbcd2887b731c9e4624426290cc8e1a9399e6..48cee21b4f146f5da86b3e485a1f9c155c33a5fe
--- 1/fs/internal.h
--- 2/fs/internal.h
+++ b/fs/internal.h
@@@ -71,8 -71,10 +71,10 @@@ extern void __init mnt_init(void)
   
   extern int __mnt_want_write(struct vfsmount *);
   extern int __mnt_want_write_file(struct file *);
+ extern int mnt_want_write_file_path(struct file *);
   extern void __mnt_drop_write(struct vfsmount *);
   extern void __mnt_drop_write_file(struct file *);
+ extern void mnt_drop_write_file_path(struct file *);
   
   /*
    * fs_struct.c
@@@ -132,6 -134,7 +134,6 @@@ static inline bool atime_needs_update_r
   extern void inode_io_list_del(struct inode *inode);
   
   extern long get_nr_dirty_inodes(void);
- -extern void evict_inodes(struct super_block *);
   extern int invalidate_inodes(struct super_block *, bool);
   
   /*
diff --combined fs/overlayfs/readdir.c

index f0fd3adb169398ec8483876dece90fe2bc435d78,3e27e455a502bb88ac7ee55216fe538e74e9bd15..62e9b22a2077ac2ac9acb382dd5617a34e9e7636
--- 1/fs/overlayfs/readdir.c
--- 2/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@@ -15,11 -15,13 +15,13 @@@
   #include <linux/rbtree.h>
   #include <linux/security.h>
   #include <linux/cred.h>
+ #include <linux/ratelimit.h>
   #include "overlayfs.h"
   
   struct ovl_cache_entry {
         unsigned int len;
         unsigned int type;
+       u64 real_ino;
         u64 ino;
         struct list_head l_node;
         struct rb_node node;
@@@ -32,18 -34,20 +34,20 @@@ struct ovl_dir_cache 
         long refcount;
         u64 version;
         struct list_head entries;
+       struct rb_root root;
   };
   
   struct ovl_readdir_data {
         struct dir_context ctx;
         struct dentry *dentry;
         bool is_lowest;
-       struct rb_root root;
+       struct rb_root *root;
         struct list_head *list;
         struct list_head middle;
         struct ovl_cache_entry *first_maybe_whiteout;
         int count;
         int err;
+       bool is_upper;
         bool d_type_supported;
   };
   
@@@ -58,7 -62,33 +62,33 @@@ struct ovl_dir_file 
   
   static struct ovl_cache_entry *ovl_cache_entry_from_node(struct rb_node *n)
   {
-       return container_of(n, struct ovl_cache_entry, node);
+       return rb_entry(n, struct ovl_cache_entry, node);
+ }
+ 
+ static bool ovl_cache_entry_find_link(const char *name, int len,
+                                     struct rb_node ***link,
+                                     struct rb_node **parent)
+ {
+       bool found = false;
+       struct rb_node **newp = *link;
+ 
+       while (!found && *newp) {
+               int cmp;
+               struct ovl_cache_entry *tmp;
+ 
+               *parent = *newp;
+               tmp = ovl_cache_entry_from_node(*newp);
+               cmp = strncmp(name, tmp->name, len);
+               if (cmp > 0)
+                       newp = &tmp->node.rb_right;
+               else if (cmp < 0 || len < tmp->len)
+                       newp = &tmp->node.rb_left;
+               else
+                       found = true;
+       }
+       *link = newp;
+ 
+       return found;
   }
   
   static struct ovl_cache_entry *ovl_cache_entry_find(struct rb_root *root,
@@@ -82,6 -112,32 +112,32 @@@
         return NULL;
   }
   
+ static bool ovl_calc_d_ino(struct ovl_readdir_data *rdd,
+                          struct ovl_cache_entry *p)
+ {
+       /* Don't care if not doing ovl_iter() */
+       if (!rdd->dentry)
+               return false;
+ 
+       /* Always recalc d_ino for parent */
+       if (strcmp(p->name, "..") == 0)
+               return true;
+ 
+       /* If this is lower, then native d_ino will do */
+       if (!rdd->is_upper)
+               return false;
+ 
+       /*
+        * Recalc d_ino for '.' and for all entries if dir is impure (contains
+        * copied up entries)
+        */
+       if ((p->name[0] == '.' && p->len == 1) ||
+           ovl_test_flag(OVL_IMPURE, d_inode(rdd->dentry)))
+               return true;
+ 
+       return false;
+ }
+ 
   static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd,
                                                    const char *name, int len,
                                                    u64 ino, unsigned int d_type)
@@@ -97,7 -153,11 +153,11 @@@
         p->name[len] = '\0';
         p->len = len;
         p->type = d_type;
+       p->real_ino = ino;
         p->ino = ino;
+       /* Defer setting d_ino for upper entry to ovl_iterate() */
+       if (ovl_calc_d_ino(rdd, p))
+               p->ino = 0;
         p->is_whiteout = false;
   
         if (d_type == DT_CHR) {
@@@ -111,32 -171,22 +171,22 @@@ static int ovl_cache_entry_add_rb(struc
                                   const char *name, int len, u64 ino,
                                   unsigned int d_type)
   {
-       struct rb_node **newp = &rdd->root.rb_node;
+       struct rb_node **newp = &rdd->root->rb_node;
         struct rb_node *parent = NULL;
         struct ovl_cache_entry *p;
   
-       while (*newp) {
-               int cmp;
-               struct ovl_cache_entry *tmp;
- 
-               parent = *newp;
-               tmp = ovl_cache_entry_from_node(*newp);
-               cmp = strncmp(name, tmp->name, len);
-               if (cmp > 0)
-                       newp = &tmp->node.rb_right;
-               else if (cmp < 0 || len < tmp->len)
-                       newp = &tmp->node.rb_left;
-               else
-                       return 0;
-       }
+       if (ovl_cache_entry_find_link(name, len, &newp, &parent))
+               return 0;
   
         p = ovl_cache_entry_new(rdd, name, len, ino, d_type);
-       if (p == NULL)
+       if (p == NULL) {
+               rdd->err = -ENOMEM;
                 return -ENOMEM;
+       }
   
         list_add_tail(&p->l_node, rdd->list);
         rb_link_node(&p->node, parent, newp);
-       rb_insert_color(&p->node, &rdd->root);
+       rb_insert_color(&p->node, rdd->root);
   
         return 0;
   }
@@@ -147,7 -197,7 +197,7 @@@ static int ovl_fill_lowest(struct ovl_r
   {
         struct ovl_cache_entry *p;
   
-       p = ovl_cache_entry_find(&rdd->root, name, namelen);
+       p = ovl_cache_entry_find(rdd->root, name, namelen);
         if (p) {
                 list_move_tail(&p->l_node, &rdd->middle);
         } else {
@@@ -172,6 -222,16 +222,16 @@@ void ovl_cache_free(struct list_head *l
         INIT_LIST_HEAD(list);
   }
   
+ void ovl_dir_cache_free(struct inode *inode)
+ {
+       struct ovl_dir_cache *cache = ovl_dir_cache(inode);
+ 
+       if (cache) {
+               ovl_cache_free(&cache->entries);
+               kfree(cache);
+       }
+ }
+ 
   static void ovl_cache_put(struct ovl_dir_file *od, struct dentry *dentry)
   {
         struct ovl_dir_cache *cache = od->cache;
@@@ -179,8 -239,8 +239,8 @@@
         WARN_ON(cache->refcount <= 0);
         cache->refcount--;
         if (!cache->refcount) {
-               if (ovl_dir_cache(dentry) == cache)
-                       ovl_set_dir_cache(dentry, NULL);
+               if (ovl_dir_cache(d_inode(dentry)) == cache)
+                       ovl_set_dir_cache(d_inode(dentry), NULL);
   
                 ovl_cache_free(&cache->entries);
                 kfree(cache);
@@@ -273,7 -333,8 +333,8 @@@ static void ovl_dir_reset(struct file *
                 od->is_real = false;
   }
   
- static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list)
+ static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list,
+       struct rb_root *root)
   {
         int err;
         struct path realpath;
@@@ -281,13 -342,14 +342,14 @@@
                 .ctx.actor = ovl_fill_merge,
                 .dentry = dentry,
                 .list = list,
-               .root = RB_ROOT,
+               .root = root,
                 .is_lowest = false,
         };
         int idx, next;
   
         for (idx = 0; idx != -1; idx = next) {
                 next = ovl_path_next(idx, dentry, &realpath);
+               rdd.is_upper = ovl_dentry_upper(dentry) == realpath.dentry;
   
                 if (next != -1) {
                         err = ovl_dir_read(&realpath, &rdd);
@@@ -326,12 -388,13 +388,13 @@@ static struct ovl_dir_cache *ovl_cache_
         int res;
         struct ovl_dir_cache *cache;
   
-       cache = ovl_dir_cache(dentry);
+       cache = ovl_dir_cache(d_inode(dentry));
         if (cache && ovl_dentry_version_get(dentry) == cache->version) {
+               WARN_ON(!cache->refcount);
                 cache->refcount++;
                 return cache;
         }
-       ovl_set_dir_cache(dentry, NULL);
+       ovl_set_dir_cache(d_inode(dentry), NULL);
   
         cache = kzalloc(sizeof(struct ovl_dir_cache), GFP_KERNEL);
         if (!cache)
@@@ -339,8 -402,9 +402,9 @@@
   
         cache->refcount = 1;
         INIT_LIST_HEAD(&cache->entries);
+       cache->root = RB_ROOT;
   
-       res = ovl_dir_read_merged(dentry, &cache->entries);
+       res = ovl_dir_read_merged(dentry, &cache->entries, &cache->root);
         if (res) {
                 ovl_cache_free(&cache->entries);
                 kfree(cache);
@@@ -348,22 -412,266 +412,266 @@@
         }
   
         cache->version = ovl_dentry_version_get(dentry);
-       ovl_set_dir_cache(dentry, cache);
+       ovl_set_dir_cache(d_inode(dentry), cache);
   
         return cache;
   }
   
+ /*
+  * Set d_ino for upper entries. Non-upper entries should always report
+  * the uppermost real inode ino and should not call this function.
+  *
+  * When not all layer are on same fs, report real ino also for upper.
+  *
+  * When all layers are on the same fs, and upper has a reference to
+  * copy up origin, call vfs_getattr() on the overlay entry to make
+  * sure that d_ino will be consistent with st_ino from stat(2).
+  */
+ static int ovl_cache_update_ino(struct path *path, struct ovl_cache_entry *p)
+ 
+ {
+       struct dentry *dir = path->dentry;
+       struct dentry *this = NULL;
+       enum ovl_path_type type;
+       u64 ino = p->real_ino;
+       int err = 0;
+ 
+       if (!ovl_same_sb(dir->d_sb))
+               goto out;
+ 
+       if (p->name[0] == '.') {
+               if (p->len == 1) {
+                       this = dget(dir);
+                       goto get;
+               }
+               if (p->len == 2 && p->name[1] == '.') {
+                       /* we shall not be moved */
+                       this = dget(dir->d_parent);
+                       goto get;
+               }
+       }
+       this = lookup_one_len(p->name, dir, p->len);
+       if (IS_ERR_OR_NULL(this) || !this->d_inode) {
+               if (IS_ERR(this)) {
+                       err = PTR_ERR(this);
+                       this = NULL;
+                       goto fail;
+               }
+               goto out;
+       }
+ 
+ get:
+       type = ovl_path_type(this);
+       if (OVL_TYPE_ORIGIN(type)) {
+               struct kstat stat;
+               struct path statpath = *path;
+ 
+               statpath.dentry = this;
+               err = vfs_getattr(&statpath, &stat, STATX_INO, 0);
+               if (err)
+                       goto fail;
+ 
+               WARN_ON_ONCE(dir->d_sb->s_dev != stat.dev);
+               ino = stat.ino;
+       }
+ 
+ out:
+       p->ino = ino;
+       dput(this);
+       return err;
+ 
+ fail:
+       pr_warn_ratelimited("overlay: failed to look up (%s) for ino (%i)\n",
+                           p->name, err);
+       goto out;
+ }
+ 
+ static int ovl_fill_plain(struct dir_context *ctx, const char *name,
+                         int namelen, loff_t offset, u64 ino,
+                         unsigned int d_type)
+ {
+       struct ovl_cache_entry *p;
+       struct ovl_readdir_data *rdd =
+               container_of(ctx, struct ovl_readdir_data, ctx);
+ 
+       rdd->count++;
+       p = ovl_cache_entry_new(rdd, name, namelen, ino, d_type);
+       if (p == NULL) {
+               rdd->err = -ENOMEM;
+               return -ENOMEM;
+       }
+       list_add_tail(&p->l_node, rdd->list);
+ 
+       return 0;
+ }
+ 
+ static int ovl_dir_read_impure(struct path *path,  struct list_head *list,
+                              struct rb_root *root)
+ {
+       int err;
+       struct path realpath;
+       struct ovl_cache_entry *p, *n;
+       struct ovl_readdir_data rdd = {
+               .ctx.actor = ovl_fill_plain,
+               .list = list,
+               .root = root,
+       };
+ 
+       INIT_LIST_HEAD(list);
+       *root = RB_ROOT;
+       ovl_path_upper(path->dentry, &realpath);
+ 
+       err = ovl_dir_read(&realpath, &rdd);
+       if (err)
+               return err;
+ 
+       list_for_each_entry_safe(p, n, list, l_node) {
+               if (strcmp(p->name, ".") != 0 &&
+                   strcmp(p->name, "..") != 0) {
+                       err = ovl_cache_update_ino(path, p);
+                       if (err)
+                               return err;
+               }
+               if (p->ino == p->real_ino) {
+                       list_del(&p->l_node);
+                       kfree(p);
+               } else {
+                       struct rb_node **newp = &root->rb_node;
+                       struct rb_node *parent = NULL;
+ 
+                       if (WARN_ON(ovl_cache_entry_find_link(p->name, p->len,
+                                                             &newp, &parent)))
+                               return -EIO;
+ 
+                       rb_link_node(&p->node, parent, newp);
+                       rb_insert_color(&p->node, root);
+               }
+       }
+       return 0;
+ }
+ 
+ static struct ovl_dir_cache *ovl_cache_get_impure(struct path *path)
+ {
+       int res;
+       struct dentry *dentry = path->dentry;
+       struct ovl_dir_cache *cache;
+ 
+       cache = ovl_dir_cache(d_inode(dentry));
+       if (cache && ovl_dentry_version_get(dentry) == cache->version)
+               return cache;
+ 
+       /* Impure cache is not refcounted, free it here */
+       ovl_dir_cache_free(d_inode(dentry));
+       ovl_set_dir_cache(d_inode(dentry), NULL);
+ 
+       cache = kzalloc(sizeof(struct ovl_dir_cache), GFP_KERNEL);
+       if (!cache)
+               return ERR_PTR(-ENOMEM);
+ 
+       res = ovl_dir_read_impure(path, &cache->entries, &cache->root);
+       if (res) {
+               ovl_cache_free(&cache->entries);
+               kfree(cache);
+               return ERR_PTR(res);
+       }
+       if (list_empty(&cache->entries)) {
+               /* Good oportunity to get rid of an unnecessary "impure" flag */
+               ovl_do_removexattr(ovl_dentry_upper(dentry), OVL_XATTR_IMPURE);
+               ovl_clear_flag(OVL_IMPURE, d_inode(dentry));
+               kfree(cache);
+               return NULL;
+       }
+ 
+       cache->version = ovl_dentry_version_get(dentry);
+       ovl_set_dir_cache(d_inode(dentry), cache);
+ 
+       return cache;
+ }
+ 
+ struct ovl_readdir_translate {
+       struct dir_context *orig_ctx;
+       struct ovl_dir_cache *cache;
+       struct dir_context ctx;
+       u64 parent_ino;
+ };
+ 
+ static int ovl_fill_real(struct dir_context *ctx, const char *name,
+                          int namelen, loff_t offset, u64 ino,
+                          unsigned int d_type)
+ {
+       struct ovl_readdir_translate *rdt =
+               container_of(ctx, struct ovl_readdir_translate, ctx);
+       struct dir_context *orig_ctx = rdt->orig_ctx;
+ 
+       if (rdt->parent_ino && strcmp(name, "..") == 0)
+               ino = rdt->parent_ino;
+       else if (rdt->cache) {
+               struct ovl_cache_entry *p;
+ 
+               p = ovl_cache_entry_find(&rdt->cache->root, name, namelen);
+               if (p)
+                       ino = p->ino;
+       }
+ 
+       return orig_ctx->actor(orig_ctx, name, namelen, offset, ino, d_type);
+ }
+ 
+ static int ovl_iterate_real(struct file *file, struct dir_context *ctx)
+ {
+       int err;
+       struct ovl_dir_file *od = file->private_data;
+       struct dentry *dir = file->f_path.dentry;
+       struct ovl_readdir_translate rdt = {
+               .ctx.actor = ovl_fill_real,
+               .orig_ctx = ctx,
+       };
+ 
+       if (OVL_TYPE_MERGE(ovl_path_type(dir->d_parent))) {
+               struct kstat stat;
+               struct path statpath = file->f_path;
+ 
+               statpath.dentry = dir->d_parent;
+               err = vfs_getattr(&statpath, &stat, STATX_INO, 0);
+               if (err)
+                       return err;
+ 
+               WARN_ON_ONCE(dir->d_sb->s_dev != stat.dev);
+               rdt.parent_ino = stat.ino;
+       }
+ 
+       if (ovl_test_flag(OVL_IMPURE, d_inode(dir))) {
+               rdt.cache = ovl_cache_get_impure(&file->f_path);
+               if (IS_ERR(rdt.cache))
+                       return PTR_ERR(rdt.cache);
+       }
+ 
+       return iterate_dir(od->realfile, &rdt.ctx);
+ }
+ 
+ 
   static int ovl_iterate(struct file *file, struct dir_context *ctx)
   {
         struct ovl_dir_file *od = file->private_data;
         struct dentry *dentry = file->f_path.dentry;
         struct ovl_cache_entry *p;
+       int err;
   
         if (!ctx->pos)
                 ovl_dir_reset(file);
   
-       if (od->is_real)
+       if (od->is_real) {
+               /*
+                * If parent is merge, then need to adjust d_ino for '..', if
+                * dir is impure then need to adjust d_ino for copied up
+                * entries.
+                */
+               if (ovl_same_sb(dentry->d_sb) &&
+                   (ovl_test_flag(OVL_IMPURE, d_inode(dentry)) ||
+                    OVL_TYPE_MERGE(ovl_path_type(dentry->d_parent)))) {
+                       return ovl_iterate_real(file, ctx);
+               }
                 return iterate_dir(od->realfile, ctx);
+       }
   
         if (!od->cache) {
                 struct ovl_dir_cache *cache;
@@@ -378,9 -686,15 +686,15 @@@
   
         while (od->cursor != &od->cache->entries) {
                 p = list_entry(od->cursor, struct ovl_cache_entry, l_node);
-               if (!p->is_whiteout)
+               if (!p->is_whiteout) {
+                       if (!p->ino) {
+                               err = ovl_cache_update_ino(&file->f_path, p);
+                               if (err)
+                                       return err;
+                       }
                         if (!dir_emit(ctx, p->name, p->len, p->ino, p->type))
                                 break;
+               }
                 od->cursor = p->l_node.next;
                 ctx->pos++;
         }
@@@ -446,14 -760,14 +760,14 @@@ static int ovl_dir_fsync(struct file *f
   
                         ovl_path_upper(dentry, &upperpath);
                         realfile = ovl_path_open(&upperpath, O_RDONLY);
- -                      smp_mb__before_spinlock();
+ +
                         inode_lock(inode);
                         if (!od->upperfile) {
                                 if (IS_ERR(realfile)) {
                                         inode_unlock(inode);
                                         return PTR_ERR(realfile);
                                 }
- -                              od->upperfile = realfile;
+ +                              smp_store_release(&od->upperfile, realfile);
                         } else {
                                 /* somebody has beaten us to it */
                                 if (!IS_ERR(realfile))
@@@ -522,8 -836,9 +836,9 @@@ int ovl_check_empty_dir(struct dentry *
   {
         int err;
         struct ovl_cache_entry *p;
+       struct rb_root root = RB_ROOT;
   
-       err = ovl_dir_read_merged(dentry, list);
+       err = ovl_dir_read_merged(dentry, list, &root);
         if (err)
                 return err;
   
@@@ -612,12 -927,13 +927,13 @@@ static void ovl_workdir_cleanup_recurse
         int err;
         struct inode *dir = path->dentry->d_inode;
         LIST_HEAD(list);
+       struct rb_root root = RB_ROOT;
         struct ovl_cache_entry *p;
         struct ovl_readdir_data rdd = {
                 .ctx.actor = ovl_fill_merge,
                 .dentry = NULL,
                 .list = &list,
-               .root = RB_ROOT,
+               .root = &root,
                 .is_lowest = false,
         };
   
@@@ -675,12 -991,13 +991,13 @@@ int ovl_indexdir_cleanup(struct dentry 
         struct inode *dir = dentry->d_inode;
         struct path path = { .mnt = mnt, .dentry = dentry };
         LIST_HEAD(list);
+       struct rb_root root = RB_ROOT;
         struct ovl_cache_entry *p;
         struct ovl_readdir_data rdd = {
                 .ctx.actor = ovl_fill_merge,
                 .dentry = NULL,
                 .list = &list,
-               .root = RB_ROOT,
+               .root = &root,
                 .is_lowest = false,
         };
   
diff --combined fs/xattr.c

index 7b03df6b8be26fbc7483f139063b99e1a0d2b451,d7c2cf7817bbe000a28171ec2935efcc5ca66c9a..4424f7fecf14549b65c62d0cac4b8b692718f426
--- 1/fs/xattr.c
--- 2/fs/xattr.c
+++ b/fs/xattr.c
@@@ -23,6 -23,7 +23,7 @@@
   #include <linux/posix_acl_xattr.h>
   
   #include <linux/uaccess.h>
+ #include "internal.h"
   
   static const char *
   strcmp_prefix(const char *a, const char *a_prefix)
@@@ -441,12 -442,6 +442,12 @@@ setxattr(struct dentry *d, const char _
                 if ((strcmp(kname, XATTR_NAME_POSIX_ACL_ACCESS) == 0) ||
                     (strcmp(kname, XATTR_NAME_POSIX_ACL_DEFAULT) == 0))
                         posix_acl_fix_xattr_from_user(kvalue, size);
+ +              else if (strcmp(kname, XATTR_NAME_CAPS) == 0) {
+ +                      error = cap_convert_nscap(d, &kvalue, size);
+ +                      if (error < 0)
+ +                              goto out;
+ +                      size = error;
+ +              }
         }
   
         error = vfs_setxattr(d, kname, kvalue, size, flags);
@@@ -502,10 -497,10 +503,10 @@@ SYSCALL_DEFINE5(fsetxattr, int, fd, con
         if (!f.file)
                 return error;
         audit_file(f.file);
-       error = mnt_want_write_file(f.file);
+       error = mnt_want_write_file_path(f.file);
         if (!error) {
                 error = setxattr(f.file->f_path.dentry, name, value, size, flags);
-               mnt_drop_write_file(f.file);
+               mnt_drop_write_file_path(f.file);
         }
         fdput(f);
         return error;
@@@ -734,10 -729,10 +735,10 @@@ SYSCALL_DEFINE2(fremovexattr, int, fd, 
         if (!f.file)
                 return error;
         audit_file(f.file);
-       error = mnt_want_write_file(f.file);
+       error = mnt_want_write_file_path(f.file);
         if (!error) {
                 error = removexattr(f.file->f_path.dentry, name);
-               mnt_drop_write_file(f.file);
+               mnt_drop_write_file_path(f.file);
         }
         fdput(f);
         return error;
diff --combined include/linux/fs.h

index 2d0e6748e46e1f6707b0de2b127cf5ee59553c3c,ee1db83c39cb714d962ffbf8a6fe28b1473897da..33d8e45cd874b7b25d9fc570b26c6df4dfea83e2
--- 1/include/linux/fs.h
--- 2/include/linux/fs.h
+++ b/include/linux/fs.h
@@@ -72,8 -72,6 +72,8 @@@ extern int leases_enable, lease_break_t
   extern int sysctl_protected_symlinks;
   extern int sysctl_protected_hardlinks;
   
+ +typedef __kernel_rwf_t rwf_t;
+ +
   struct buffer_head;
   typedef int (get_block_t)(struct inode *inode, sector_t iblock,
                         struct buffer_head *bh_result, int create);
@@@ -392,7 -390,7 +392,7 @@@ struct address_space 
         struct radix_tree_root  page_tree;      /* radix tree of all pages */
         spinlock_t              tree_lock;      /* and lock protecting it */
         atomic_t                i_mmap_writable;/* count VM_SHARED mappings */
- -      struct rb_root          i_mmap;         /* tree of private and shared mappings */
+ +      struct rb_root_cached   i_mmap;         /* tree of private and shared mappings */
         struct rw_semaphore     i_mmap_rwsem;   /* protect tree, count, list */
         /* Protected by tree_lock together with the radix tree */
         unsigned long           nrpages;        /* number of total pages */
@@@ -429,7 -427,6 +429,7 @@@ struct block_device 
   #endif
         struct block_device *   bd_contains;
         unsigned                bd_block_size;
+ +      u8                      bd_partno;
         struct hd_struct *      bd_part;
         /* number of times partitions within this device have been opened. */
         unsigned                bd_part_count;
@@@ -487,7 -484,7 +487,7 @@@ static inline void i_mmap_unlock_read(s
    */
   static inline int mapping_mapped(struct address_space *mapping)
   {
- -      return  !RB_EMPTY_ROOT(&mapping->i_mmap);
+ +      return  !RB_EMPTY_ROOT(&mapping->i_mmap.rb_root);
   }
   
   /*
@@@ -910,9 -907,9 +910,9 @@@ static inline struct file *get_file(str
   /* Page cache limit. The filesystems should put that into their s_maxbytes 
      limits, otherwise bad things can happen in VM. */ 
   #if BITS_PER_LONG==32
- -#define MAX_LFS_FILESIZE      (((loff_t)PAGE_SIZE << (BITS_PER_LONG-1))-1)
+ +#define MAX_LFS_FILESIZE      ((loff_t)ULONG_MAX << PAGE_SHIFT)
   #elif BITS_PER_LONG==64
- -#define MAX_LFS_FILESIZE      ((loff_t)0x7fffffffffffffffLL)
+ +#define MAX_LFS_FILESIZE      ((loff_t)LLONG_MAX)
   #endif
   
   #define FL_POSIX      1
@@@ -1003,6 -1000,7 +1003,6 @@@ struct file_lock 
         unsigned char fl_type;
         unsigned int fl_pid;
         int fl_link_cpu;                /* what cpu's list is this on? */
- -      struct pid *fl_nspid;
         wait_queue_head_t fl_wait;
         struct file *fl_file;
         loff_t fl_start;
@@@ -1235,7 -1233,7 +1235,7 @@@ static inline struct inode *file_inode(
   
   static inline struct dentry *file_dentry(const struct file *file)
   {
-       return d_real(file->f_path.dentry, file_inode(file), 0);
+       return d_real(file->f_path.dentry, file_inode(file), 0, 0);
   }
   
   static inline int locks_lock_file_wait(struct file *filp, struct file_lock *fl)
@@@ -1270,6 -1268,8 +1270,6 @@@ extern void f_delown(struct file *filp)
   extern pid_t f_getown(struct file *filp);
   extern int send_sigurg(struct fown_struct *fown);
   
- -struct mm_struct;
- -
   /*
    *    Umount options
    */
@@@ -1758,9 -1758,9 +1758,9 @@@ extern ssize_t __vfs_write(struct file 
   extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *);
   extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *);
   extern ssize_t vfs_readv(struct file *, const struct iovec __user *,
- -              unsigned long, loff_t *, int);
+ +              unsigned long, loff_t *, rwf_t);
   extern ssize_t vfs_writev(struct file *, const struct iovec __user *,
- -              unsigned long, loff_t *, int);
+ +              unsigned long, loff_t *, rwf_t);
   extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *,
                                    loff_t, size_t, unsigned int);
   extern int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in,
@@@ -2471,13 -2471,9 +2471,13 @@@ static inline void bd_unlink_disk_holde
   #endif
   
   /* fs/char_dev.c */
- -#define CHRDEV_MAJOR_HASH_SIZE        255
+ +#define CHRDEV_MAJOR_MAX 512
   /* Marks the bottom of the first segment of free char majors */
   #define CHRDEV_MAJOR_DYN_END 234
+ +/* Marks the top and bottom of the second segment of free char majors */
+ +#define CHRDEV_MAJOR_DYN_EXT_START 511
+ +#define CHRDEV_MAJOR_DYN_EXT_END 384
+ +
   extern int alloc_chrdev_region(dev_t *, unsigned, unsigned, const char *);
   extern int register_chrdev_region(dev_t, unsigned, const char *);
   extern int __register_chrdev(unsigned int major, unsigned int baseminor,
@@@ -2504,14 -2500,14 +2504,14 @@@ static inline void unregister_chrdev(un
   #define BDEVT_SIZE    10      /* Largest string for MAJ:MIN for blkdev */
   
   #ifdef CONFIG_BLOCK
- -#define BLKDEV_MAJOR_HASH_SIZE        255
+ +#define BLKDEV_MAJOR_MAX      512
   extern const char *__bdevname(dev_t, char *buffer);
   extern const char *bdevname(struct block_device *bdev, char *buffer);
   extern struct block_device *lookup_bdev(const char *);
   extern void blkdev_show(struct seq_file *,off_t);
   
   #else
- -#define BLKDEV_MAJOR_HASH_SIZE        0
+ +#define BLKDEV_MAJOR_MAX      0
   #endif
   
   extern void init_special_inode(struct inode *, umode_t, dev_t);
@@@ -2543,19 -2539,12 +2543,19 @@@ extern int invalidate_inode_pages2_rang
   extern int write_inode_now(struct inode *, int);
   extern int filemap_fdatawrite(struct address_space *);
   extern int filemap_flush(struct address_space *);
- -extern int filemap_fdatawait(struct address_space *);
   extern int filemap_fdatawait_keep_errors(struct address_space *mapping);
   extern int filemap_fdatawait_range(struct address_space *, loff_t lstart,
                                    loff_t lend);
+ +
+ +static inline int filemap_fdatawait(struct address_space *mapping)
+ +{
+ +      return filemap_fdatawait_range(mapping, 0, LLONG_MAX);
+ +}
+ +
   extern bool filemap_range_has_page(struct address_space *, loff_t lstart,
                                   loff_t lend);
+ +extern int __must_check file_fdatawait_range(struct file *file, loff_t lstart,
+ +                                              loff_t lend);
   extern int filemap_write_and_wait(struct address_space *mapping);
   extern int filemap_write_and_wait_range(struct address_space *mapping,
                                         loff_t lstart, loff_t lend);
@@@ -2564,19 -2553,12 +2564,19 @@@ extern int __filemap_fdatawrite_range(s
   extern int filemap_fdatawrite_range(struct address_space *mapping,
                                 loff_t start, loff_t end);
   extern int filemap_check_errors(struct address_space *mapping);
- -
   extern void __filemap_set_wb_err(struct address_space *mapping, int err);
+ +
+ +extern int __must_check file_fdatawait_range(struct file *file, loff_t lstart,
+ +                                              loff_t lend);
   extern int __must_check file_check_and_advance_wb_err(struct file *file);
   extern int __must_check file_write_and_wait_range(struct file *file,
                                                 loff_t start, loff_t end);
   
+ +static inline int file_write_and_wait(struct file *file)
+ +{
+ +      return file_write_and_wait_range(file, 0, LLONG_MAX);
+ +}
+ +
   /**
    * filemap_set_wb_err - set a writeback error on an address_space
    * @mapping: mapping in which to set writeback error
@@@ -2590,6 -2572,8 +2590,6 @@@
    * When a writeback error occurs, most filesystems will want to call
    * filemap_set_wb_err to record the error in the mapping so that it will be
    * automatically reported whenever fsync is called on the file.
- - *
- - * FIXME: mention FS_* flag here?
    */
   static inline void filemap_set_wb_err(struct address_space *mapping, int err)
   {
@@@ -2847,7 -2831,6 +2847,7 @@@ static inline void lockdep_annotate_ino
   #endif
   extern void unlock_new_inode(struct inode *);
   extern unsigned int get_next_ino(void);
+ +extern void evict_inodes(struct super_block *sb);
   
   extern void __iget(struct inode * inode);
   extern void iget_failed(struct inode *);
@@@ -2891,9 -2874,9 +2891,9 @@@ extern ssize_t generic_file_direct_writ
   extern ssize_t generic_perform_write(struct file *, struct iov_iter *, loff_t);
   
   ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos,
- -              int flags);
+ +              rwf_t flags);
   ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos,
- -              int flags);
+ +              rwf_t flags);
   
   /* fs/block_dev.c */
   extern ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to);
@@@ -3015,10 -2998,6 +3015,10 @@@ void __inode_add_bytes(struct inode *in
   void inode_add_bytes(struct inode *inode, loff_t bytes);
   void __inode_sub_bytes(struct inode *inode, loff_t bytes);
   void inode_sub_bytes(struct inode *inode, loff_t bytes);
+ +static inline loff_t __inode_get_bytes(struct inode *inode)
+ +{
+ +      return (((loff_t)inode->i_blocks) << 9) + inode->i_bytes;
+ +}
   loff_t inode_get_bytes(struct inode *inode);
   void inode_set_bytes(struct inode *inode, loff_t bytes);
   const char *simple_get_link(struct dentry *, struct inode *,
@@@ -3043,7 -3022,8 +3043,7 @@@ static inline int vfs_lstat(const char 
   static inline int vfs_fstatat(int dfd, const char __user *filename,
                               struct kstat *stat, int flags)
   {
- -      return vfs_statx(dfd, filename, flags | AT_NO_AUTOMOUNT,
- -                       stat, STATX_BASIC_STATS);
+ +      return vfs_statx(dfd, filename, flags, stat, STATX_BASIC_STATS);
   }
   static inline int vfs_fstat(int fd, struct kstat *stat)
   {
@@@ -3163,7 -3143,7 +3163,7 @@@ static inline int iocb_flags(struct fil
         return res;
   }
   
- -static inline int kiocb_set_rw_flags(struct kiocb *ki, int flags)
+ +static inline int kiocb_set_rw_flags(struct kiocb *ki, rwf_t flags)
   {
         if (unlikely(flags & ~RWF_SUPPORTED))
                 return -EOPNOTSUPP;
author	Linus Torvalds <torvalds@linux-foundation.org>
	Wed, 13 Sep 2017 16:11:44 +0000 (09:11 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Wed, 13 Sep 2017 16:11:44 +0000 (09:11 -0700)
		1	2
Documentation/filesystems/vfs.txt	patch \|	diff1 \|	diff2 \|	blob \| history
fs/inode.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/internal.h	patch \|	diff1 \|	diff2 \|	blob \| history
fs/overlayfs/readdir.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/xattr.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/fs.h	patch \|	diff1 \|	diff2 \|	blob \| history