Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso...

author Linus Torvalds <torvalds@linux-foundation.org>

Mon, 23 Feb 2015 02:05:13 +0000 (18:05 -0800)

committer Linus Torvalds <torvalds@linux-foundation.org>

Mon, 23 Feb 2015 02:05:13 +0000 (18:05 -0800)
author Linus Torvalds <torvalds@linux-foundation.org>
Mon, 23 Feb 2015 02:05:13 +0000 (18:05 -0800)
committer Linus Torvalds <torvalds@linux-foundation.org>
Mon, 23 Feb 2015 02:05:13 +0000 (18:05 -0800)
diff --combined fs/ext4/ext4.h

index 982d934fd9ac98338377d3b1621b3d577531b6e6,7fec2efd8635e83d2c4196c3fe2f970846cd2804..f63c3d5805c4c156ad3ed412cbecf85e700cf9d2
--- 1/fs/ext4/ext4.h
--- 2/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@@ -364,7 -364,8 +364,8 @@@ struct flex_groups 
   #define EXT4_DIRTY_FL                 0x00000100
   #define EXT4_COMPRBLK_FL              0x00000200 /* One or more compressed clusters */
   #define EXT4_NOCOMPR_FL                       0x00000400 /* Don't compress */
- #define EXT4_ECOMPR_FL                        0x00000800 /* Compression error */
+       /* nb: was previously EXT2_ECOMPR_FL */
+ #define EXT4_ENCRYPT_FL                       0x00000800 /* encrypted file */
   /* End compression flags --- maybe not all used */
   #define EXT4_INDEX_FL                 0x00001000 /* hash-indexed directory */
   #define EXT4_IMAGIC_FL                        0x00002000 /* AFS directory */
@@@ -421,7 -422,7 +422,7 @@@ enum 
         EXT4_INODE_DIRTY        = 8,
         EXT4_INODE_COMPRBLK     = 9,    /* One or more compressed clusters */
         EXT4_INODE_NOCOMPR      = 10,   /* Don't compress */
-       EXT4_INODE_ECOMPR       = 11,   /* Compression error */
+       EXT4_INODE_ENCRYPT      = 11,   /* Compression error */
   /* End compression flags --- maybe not all used */
         EXT4_INODE_INDEX        = 12,   /* hash-indexed directory */
         EXT4_INODE_IMAGIC       = 13,   /* AFS directory */
@@@ -466,7 -467,7 +467,7 @@@ static inline void ext4_check_flag_valu
         CHECK_FLAG_VALUE(DIRTY);
         CHECK_FLAG_VALUE(COMPRBLK);
         CHECK_FLAG_VALUE(NOCOMPR);
-       CHECK_FLAG_VALUE(ECOMPR);
+       CHECK_FLAG_VALUE(ENCRYPT);
         CHECK_FLAG_VALUE(INDEX);
         CHECK_FLAG_VALUE(IMAGIC);
         CHECK_FLAG_VALUE(JOURNAL_DATA);
@@@ -965,11 -966,6 +966,11 @@@ struct ext4_inode_info 
   #define EXT4_MOUNT_ERRORS_MASK                0x00070
   #define EXT4_MOUNT_MINIX_DF           0x00080 /* Mimics the Minix statfs */
   #define EXT4_MOUNT_NOLOAD             0x00100 /* Don't use existing journal*/
+ +#ifdef CONFIG_FS_DAX
+ +#define EXT4_MOUNT_DAX                        0x00200 /* Direct Access */
+ +#else
+ +#define EXT4_MOUNT_DAX                        0
+ +#endif
   #define EXT4_MOUNT_DATA_FLAGS         0x00C00 /* Mode for data writes: */
   #define EXT4_MOUNT_JOURNAL_DATA               0x00400 /* Write data to journal */
   #define EXT4_MOUNT_ORDERED_DATA               0x00800 /* Flush data before commit */
@@@ -1048,6 -1044,12 +1049,12 @@@ extern void ext4_set_bits(void *bm, in
   /* Metadata checksum algorithm codes */
   #define EXT4_CRC32C_CHKSUM            1
   
+ /* Encryption algorithms */
+ #define EXT4_ENCRYPTION_MODE_INVALID          0
+ #define EXT4_ENCRYPTION_MODE_AES_256_XTS      1
+ #define EXT4_ENCRYPTION_MODE_AES_256_GCM      2
+ #define EXT4_ENCRYPTION_MODE_AES_256_CBC      3
+ 
   /*
    * Structure of the super block
    */
@@@ -1161,7 -1163,8 +1168,8 @@@ struct ext4_super_block 
         __le32  s_grp_quota_inum;       /* inode for tracking group quota */
         __le32  s_overhead_clusters;    /* overhead blocks/clusters in fs */
         __le32  s_backup_bgs[2];        /* groups with sparse_super2 SBs */
-       __le32  s_reserved[106];        /* Padding to the end of the block */
+       __u8    s_encrypt_algos[4];     /* Encryption algorithms in use  */
+       __le32  s_reserved[105];        /* Padding to the end of the block */
         __le32  s_checksum;             /* crc32c(superblock) */
   };
   
@@@ -1527,6 -1530,7 +1535,7 @@@ static inline void ext4_clear_state_fla
    * GDT_CSUM bits are mutually exclusive.
    */
   #define EXT4_FEATURE_RO_COMPAT_METADATA_CSUM  0x0400
+ #define EXT4_FEATURE_RO_COMPAT_READONLY               0x1000
   
   #define EXT4_FEATURE_INCOMPAT_COMPRESSION     0x0001
   #define EXT4_FEATURE_INCOMPAT_FILETYPE                0x0002
@@@ -1542,6 -1546,7 +1551,7 @@@
   #define EXT4_FEATURE_INCOMPAT_BG_USE_META_CSUM        0x2000 /* use crc32c for bg */
   #define EXT4_FEATURE_INCOMPAT_LARGEDIR                0x4000 /* >2GB or 3-lvl htree */
   #define EXT4_FEATURE_INCOMPAT_INLINE_DATA     0x8000 /* data in inode */
+ #define EXT4_FEATURE_INCOMPAT_ENCRYPT         0x10000
   
   #define EXT2_FEATURE_COMPAT_SUPP      EXT4_FEATURE_COMPAT_EXT_ATTR
   #define EXT2_FEATURE_INCOMPAT_SUPP    (EXT4_FEATURE_INCOMPAT_FILETYPE| \
@@@ -2583,7 -2588,6 +2593,7 @@@ extern const struct file_operations ext
   /* file.c */
   extern const struct inode_operations ext4_file_inode_operations;
   extern const struct file_operations ext4_file_operations;
+ +extern const struct file_operations ext4_dax_file_operations;
   extern loff_t ext4_llseek(struct file *file, loff_t offset, int origin);
   
   /* inline.c */
diff --combined fs/ext4/indirect.c

index 6b9878a24182b06125cb496ef973ff0d9b739106,5e7af1c69577249441e9de8a662b432447822dfb..45fe924f82bce2ff76e3e74b45ec1833729433ea
--- 1/fs/ext4/indirect.c
--- 2/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@@ -689,22 -689,14 +689,22 @@@ retry
                         inode_dio_done(inode);
                         goto locked;
                 }
- -              ret = __blockdev_direct_IO(rw, iocb, inode,
- -                               inode->i_sb->s_bdev, iter, offset,
- -                               ext4_get_block, NULL, NULL, 0);
+ +              if (IS_DAX(inode))
+ +                      ret = dax_do_io(rw, iocb, inode, iter, offset,
+ +                                      ext4_get_block, NULL, 0);
+ +              else
+ +                      ret = __blockdev_direct_IO(rw, iocb, inode,
+ +                                      inode->i_sb->s_bdev, iter, offset,
+ +                                      ext4_get_block, NULL, NULL, 0);
                 inode_dio_done(inode);
         } else {
   locked:
- -              ret = blockdev_direct_IO(rw, iocb, inode, iter,
- -                               offset, ext4_get_block);
+ +              if (IS_DAX(inode))
+ +                      ret = dax_do_io(rw, iocb, inode, iter, offset,
+ +                                      ext4_get_block, NULL, DIO_LOCKING);
+ +              else
+ +                      ret = blockdev_direct_IO(rw, iocb, inode, iter,
+ +                                      offset, ext4_get_block);
   
                 if (unlikely((rw & WRITE) && ret < 0)) {
                         loff_t isize = i_size_read(inode);
@@@ -1401,10 -1393,7 +1401,7 @@@ end_range
                                  * to free. Everything was covered by the start
                                  * of the range.
                                  */
-                               return 0;
-                       } else {
-                               /* Shared branch grows from an indirect block */
-                               partial2--;
+                               goto do_indirects;
                         }
                 } else {
                         /*
@@@ -1435,56 -1424,96 +1432,96 @@@
         /* Punch happened within the same level (n == n2) */
         partial = ext4_find_shared(inode, n, offsets, chain, &nr);
         partial2 = ext4_find_shared(inode, n2, offsets2, chain2, &nr2);
-       /*
-        * ext4_find_shared returns Indirect structure which
-        * points to the last element which should not be
-        * removed by truncate. But this is end of the range
-        * in punch_hole so we need to point to the next element
-        */
-       partial2->p++;
-       while ((partial > chain) || (partial2 > chain2)) {
-               /* We're at the same block, so we're almost finished */
-               if ((partial->bh && partial2->bh) &&
-                   (partial->bh->b_blocknr == partial2->bh->b_blocknr)) {
-                       if ((partial > chain) && (partial2 > chain2)) {
+ 
+       /* Free top, but only if partial2 isn't its subtree. */
+       if (nr) {
+               int level = min(partial - chain, partial2 - chain2);
+               int i;
+               int subtree = 1;
+ 
+               for (i = 0; i <= level; i++) {
+                       if (offsets[i] != offsets2[i]) {
+                               subtree = 0;
+                               break;
+                       }
+               }
+ 
+               if (!subtree) {
+                       if (partial == chain) {
+                               /* Shared branch grows from the inode */
+                               ext4_free_branches(handle, inode, NULL,
+                                                  &nr, &nr+1,
+                                                  (chain+n-1) - partial);
+                               *partial->p = 0;
+                       } else {
+                               /* Shared branch grows from an indirect block */
+                               BUFFER_TRACE(partial->bh, "get_write_access");
                                 ext4_free_branches(handle, inode, partial->bh,
-                                                  partial->p + 1,
-                                                  partial2->p,
+                                                  partial->p,
+                                                  partial->p+1,
                                                    (chain+n-1) - partial);
-                               BUFFER_TRACE(partial->bh, "call brelse");
-                               brelse(partial->bh);
-                               BUFFER_TRACE(partial2->bh, "call brelse");
-                               brelse(partial2->bh);
                         }
-                       return 0;
                 }
+       }
+ 
+       if (!nr2) {
                 /*
-                * Clear the ends of indirect blocks on the shared branch
-                * at the start of the range
+                * ext4_find_shared returns Indirect structure which
+                * points to the last element which should not be
+                * removed by truncate. But this is end of the range
+                * in punch_hole so we need to point to the next element
                  */
-               if (partial > chain) {
+               partial2->p++;
+       }
+ 
+       while (partial > chain || partial2 > chain2) {
+               int depth = (chain+n-1) - partial;
+               int depth2 = (chain2+n2-1) - partial2;
+ 
+               if (partial > chain && partial2 > chain2 &&
+                   partial->bh->b_blocknr == partial2->bh->b_blocknr) {
+                       /*
+                        * We've converged on the same block. Clear the range,
+                        * then we're done.
+                        */
                         ext4_free_branches(handle, inode, partial->bh,
-                                  partial->p + 1,
-                                  (__le32 *)partial->bh->b_data+addr_per_block,
-                                  (chain+n-1) - partial);
+                                          partial->p + 1,
+                                          partial2->p,
+                                          (chain+n-1) - partial);
                         BUFFER_TRACE(partial->bh, "call brelse");
                         brelse(partial->bh);
-                       partial--;
+                       BUFFER_TRACE(partial2->bh, "call brelse");
+                       brelse(partial2->bh);
+                       return 0;
                 }
+ 
                 /*
-                * Clear the ends of indirect blocks on the shared branch
-                * at the end of the range
+                * The start and end partial branches may not be at the same
+                * level even though the punch happened within one level. So, we
+                * give them a chance to arrive at the same level, then walk
+                * them in step with each other until we converge on the same
+                * block.
                  */
-               if (partial2 > chain2) {
+               if (partial > chain && depth <= depth2) {
+                       ext4_free_branches(handle, inode, partial->bh,
+                                          partial->p + 1,
+                                          (__le32 *)partial->bh->b_data+addr_per_block,
+                                          (chain+n-1) - partial);
+                       BUFFER_TRACE(partial->bh, "call brelse");
+                       brelse(partial->bh);
+                       partial--;
+               }
+               if (partial2 > chain2 && depth2 <= depth) {
                         ext4_free_branches(handle, inode, partial2->bh,
                                            (__le32 *)partial2->bh->b_data,
                                            partial2->p,
-                                          (chain2+n-1) - partial2);
+                                          (chain2+n2-1) - partial2);
                         BUFFER_TRACE(partial2->bh, "call brelse");
                         brelse(partial2->bh);
                         partial2--;
                 }
         }
+       return 0;
   
   do_indirects:
         /* Kill the remaining (whole) subtrees */
diff --combined fs/ext4/inode.c

index 85404f15e53a28860ce5a7be08220106b76fc06f,4df6d01b762eb52e9e181b95328e3d726ef77c42..5cb9a212b86f3efd69ca604df07dc20b901dabb1
--- 1/fs/ext4/inode.c
--- 2/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@@ -657,18 -657,6 +657,18 @@@ has_zeroout
         return retval;
   }
   
+ +static void ext4_end_io_unwritten(struct buffer_head *bh, int uptodate)
+ +{
+ +      struct inode *inode = bh->b_assoc_map->host;
+ +      /* XXX: breaks on 32-bit > 16GB. Is that even supported? */
+ +      loff_t offset = (loff_t)(uintptr_t)bh->b_private << inode->i_blkbits;
+ +      int err;
+ +      if (!uptodate)
+ +              return;
+ +      WARN_ON(!buffer_unwritten(bh));
+ +      err = ext4_convert_unwritten_extents(NULL, inode, offset, bh->b_size);
+ +}
+ +
   /* Maximum number of blocks we map for direct IO at once. */
   #define DIO_MAX_BLOCKS 4096
   
@@@ -706,11 -694,6 +706,11 @@@ static int _ext4_get_block(struct inod
   
                 map_bh(bh, inode->i_sb, map.m_pblk);
                 bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags;
+ +              if (IS_DAX(inode) && buffer_unwritten(bh) && !io_end) {
+ +                      bh->b_assoc_map = inode->i_mapping;
+ +                      bh->b_private = (void *)(unsigned long)iblock;
+ +                      bh->b_end_io = ext4_end_io_unwritten;
+ +              }
                 if (io_end && io_end->flag & EXT4_IO_END_UNWRITTEN)
                         set_buffer_defer_completion(bh);
                 bh->b_size = inode->i_sb->s_blocksize * map.m_len;
@@@ -1024,6 -1007,7 +1024,7 @@@ static int ext4_write_end(struct file *
   {
         handle_t *handle = ext4_journal_current_handle();
         struct inode *inode = mapping->host;
+       loff_t old_size = inode->i_size;
         int ret = 0, ret2;
         int i_size_changed = 0;
   
@@@ -1054,6 -1038,8 +1055,8 @@@
         unlock_page(page);
         page_cache_release(page);
   
+       if (old_size < pos)
+               pagecache_isize_extended(inode, old_size, pos);
         /*
          * Don't mark the inode dirty under page lock. First, it unnecessarily
          * makes the holding time of page lock longer. Second, it forces lock
@@@ -1095,6 -1081,7 +1098,7 @@@ static int ext4_journalled_write_end(st
   {
         handle_t *handle = ext4_journal_current_handle();
         struct inode *inode = mapping->host;
+       loff_t old_size = inode->i_size;
         int ret = 0, ret2;
         int partial = 0;
         unsigned from, to;
@@@ -1127,6 -1114,9 +1131,9 @@@
         unlock_page(page);
         page_cache_release(page);
   
+       if (old_size < pos)
+               pagecache_isize_extended(inode, old_size, pos);
+ 
         if (size_changed) {
                 ret2 = ext4_mark_inode_dirty(handle, inode);
                 if (!ret)
@@@ -3027,14 -3017,13 +3034,14 @@@ static ssize_t ext4_ext_direct_IO(int r
                 get_block_func = ext4_get_block_write;
                 dio_flags = DIO_LOCKING;
         }
- -      ret = __blockdev_direct_IO(rw, iocb, inode,
- -                                 inode->i_sb->s_bdev, iter,
- -                                 offset,
- -                                 get_block_func,
- -                                 ext4_end_io_dio,
- -                                 NULL,
- -                                 dio_flags);
+ +      if (IS_DAX(inode))
+ +              ret = dax_do_io(rw, iocb, inode, iter, offset, get_block_func,
+ +                              ext4_end_io_dio, dio_flags);
+ +      else
+ +              ret = __blockdev_direct_IO(rw, iocb, inode,
+ +                                         inode->i_sb->s_bdev, iter, offset,
+ +                                         get_block_func,
+ +                                         ext4_end_io_dio, NULL, dio_flags);
   
         /*
          * Put our reference to io_end. This can free the io_end structure e.g.
@@@ -3198,12 -3187,19 +3205,12 @@@ void ext4_set_aops(struct inode *inode
                 inode->i_mapping->a_ops = &ext4_aops;
   }
   
- -/*
- - * ext4_block_zero_page_range() zeros out a mapping of length 'length'
- - * starting from file offset 'from'.  The range to be zero'd must
- - * be contained with in one block.  If the specified range exceeds
- - * the end of the block it will be shortened to end of the block
- - * that cooresponds to 'from'
- - */
- -static int ext4_block_zero_page_range(handle_t *handle,
+ +static int __ext4_block_zero_page_range(handle_t *handle,
                 struct address_space *mapping, loff_t from, loff_t length)
   {
         ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT;
         unsigned offset = from & (PAGE_CACHE_SIZE-1);
- -      unsigned blocksize, max, pos;
+ +      unsigned blocksize, pos;
         ext4_lblk_t iblock;
         struct inode *inode = mapping->host;
         struct buffer_head *bh;
@@@ -3216,6 -3212,14 +3223,6 @@@
                 return -ENOMEM;
   
         blocksize = inode->i_sb->s_blocksize;
- -      max = blocksize - (offset & (blocksize - 1));
- -
- -      /*
- -       * correct length if it does not fall between
- -       * 'from' and the end of the block
- -       */
- -      if (length > max || length < 0)
- -              length = max;
   
         iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
   
@@@ -3280,33 -3284,6 +3287,33 @@@ unlock
         return err;
   }
   
+ +/*
+ + * ext4_block_zero_page_range() zeros out a mapping of length 'length'
+ + * starting from file offset 'from'.  The range to be zero'd must
+ + * be contained with in one block.  If the specified range exceeds
+ + * the end of the block it will be shortened to end of the block
+ + * that cooresponds to 'from'
+ + */
+ +static int ext4_block_zero_page_range(handle_t *handle,
+ +              struct address_space *mapping, loff_t from, loff_t length)
+ +{
+ +      struct inode *inode = mapping->host;
+ +      unsigned offset = from & (PAGE_CACHE_SIZE-1);
+ +      unsigned blocksize = inode->i_sb->s_blocksize;
+ +      unsigned max = blocksize - (offset & (blocksize - 1));
+ +
+ +      /*
+ +       * correct length if it does not fall between
+ +       * 'from' and the end of the block
+ +       */
+ +      if (length > max || length < 0)
+ +              length = max;
+ +
+ +      if (IS_DAX(inode))
+ +              return dax_zero_page_range(inode, from, length, ext4_get_block);
+ +      return __ext4_block_zero_page_range(handle, mapping, from, length);
+ +}
+ +
   /*
    * ext4_block_truncate_page() zeroes out a mapping from file offset `from'
    * up to the end of the block which corresponds to `from'.
@@@ -3828,10 -3805,8 +3835,10 @@@ void ext4_set_inode_flags(struct inode 
                 new_fl |= S_NOATIME;
         if (flags & EXT4_DIRSYNC_FL)
                 new_fl |= S_DIRSYNC;
+ +      if (test_opt(inode->i_sb, DAX))
+ +              new_fl |= S_DAX;
         inode_set_flags(inode, new_fl,
- -                      S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
+ +                      S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_DAX);
   }
   
   /* Propagate flags from i_flags to EXT4_I(inode)->i_flags */
@@@ -4084,10 -4059,7 +4091,10 @@@ struct inode *ext4_iget(struct super_bl
   
         if (S_ISREG(inode->i_mode)) {
                 inode->i_op = &ext4_file_inode_operations;
- -              inode->i_fop = &ext4_file_operations;
+ +              if (test_opt(inode->i_sb, DAX))
+ +                      inode->i_fop = &ext4_dax_file_operations;
+ +              else
+ +                      inode->i_fop = &ext4_file_operations;
                 ext4_set_aops(inode);
         } else if (S_ISDIR(inode->i_mode)) {
                 inode->i_op = &ext4_dir_inode_operations;
@@@ -4174,65 -4146,6 +4181,65 @@@ static int ext4_inode_blocks_set(handle
         return 0;
   }
   
+ +struct other_inode {
+ +      unsigned long           orig_ino;
+ +      struct ext4_inode       *raw_inode;
+ +};
+ +
+ +static int other_inode_match(struct inode * inode, unsigned long ino,
+ +                           void *data)
+ +{
+ +      struct other_inode *oi = (struct other_inode *) data;
+ +
+ +      if ((inode->i_ino != ino) ||
+ +          (inode->i_state & (I_FREEING | I_WILL_FREE | I_NEW |
+ +                             I_DIRTY_SYNC | I_DIRTY_DATASYNC)) ||
+ +          ((inode->i_state & I_DIRTY_TIME) == 0))
+ +              return 0;
+ +      spin_lock(&inode->i_lock);
+ +      if (((inode->i_state & (I_FREEING | I_WILL_FREE | I_NEW |
+ +                              I_DIRTY_SYNC | I_DIRTY_DATASYNC)) == 0) &&
+ +          (inode->i_state & I_DIRTY_TIME)) {
+ +              struct ext4_inode_info  *ei = EXT4_I(inode);
+ +
+ +              inode->i_state &= ~(I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED);
+ +              spin_unlock(&inode->i_lock);
+ +
+ +              spin_lock(&ei->i_raw_lock);
+ +              EXT4_INODE_SET_XTIME(i_ctime, inode, oi->raw_inode);
+ +              EXT4_INODE_SET_XTIME(i_mtime, inode, oi->raw_inode);
+ +              EXT4_INODE_SET_XTIME(i_atime, inode, oi->raw_inode);
+ +              ext4_inode_csum_set(inode, oi->raw_inode, ei);
+ +              spin_unlock(&ei->i_raw_lock);
+ +              trace_ext4_other_inode_update_time(inode, oi->orig_ino);
+ +              return -1;
+ +      }
+ +      spin_unlock(&inode->i_lock);
+ +      return -1;
+ +}
+ +
+ +/*
+ + * Opportunistically update the other time fields for other inodes in
+ + * the same inode table block.
+ + */
+ +static void ext4_update_other_inodes_time(struct super_block *sb,
+ +                                        unsigned long orig_ino, char *buf)
+ +{
+ +      struct other_inode oi;
+ +      unsigned long ino;
+ +      int i, inodes_per_block = EXT4_SB(sb)->s_inodes_per_block;
+ +      int inode_size = EXT4_INODE_SIZE(sb);
+ +
+ +      oi.orig_ino = orig_ino;
+ +      ino = orig_ino & ~(inodes_per_block - 1);
+ +      for (i = 0; i < inodes_per_block; i++, ino++, buf += inode_size) {
+ +              if (ino == orig_ino)
+ +                      continue;
+ +              oi.raw_inode = (struct ext4_inode *) buf;
+ +              (void) find_inode_nowait(sb, ino, other_inode_match, &oi);
+ +      }
+ +}
+ +
   /*
    * Post the struct inode info into an on-disk inode location in the
    * buffer-cache.  This gobbles the caller's reference to the
@@@ -4342,11 -4255,10 +4349,11 @@@ static int ext4_do_update_inode(handle_
                                 cpu_to_le16(ei->i_extra_isize);
                 }
         }
- -
         ext4_inode_csum_set(inode, raw_inode, ei);
- -
         spin_unlock(&ei->i_raw_lock);
+ +      if (inode->i_sb->s_flags & MS_LAZYTIME)
+ +              ext4_update_other_inodes_time(inode->i_sb, inode->i_ino,
+ +                                            bh->b_data);
   
         BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
         rc = ext4_handle_dirty_metadata(handle, NULL, bh);
@@@ -4629,7 -4541,7 +4636,7 @@@ int ext4_setattr(struct dentry *dentry
                  * Truncate pagecache after we've waited for commit
                  * in data=journal mode to make pages freeable.
                  */
- -                      truncate_pagecache(inode, inode->i_size);
+ +              truncate_pagecache(inode, inode->i_size);
         }
         /*
          * We want to call ext4_truncate() even if attr->ia_size ==
@@@ -4935,17 -4847,11 +4942,17 @@@ int ext4_mark_inode_dirty(handle_t *han
    * If the inode is marked synchronous, we don't honour that here - doing
    * so would cause a commit on atime updates, which we don't bother doing.
    * We handle synchronous inodes at the highest possible level.
+ + *
+ + * If only the I_DIRTY_TIME flag is set, we can skip everything.  If
+ + * I_DIRTY_TIME and I_DIRTY_SYNC is set, the only inode fields we need
+ + * to copy into the on-disk inode structure are the timestamp files.
    */
   void ext4_dirty_inode(struct inode *inode, int flags)
   {
         handle_t *handle;
   
+ +      if (flags == I_DIRTY_TIME)
+ +              return;
         handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
         if (IS_ERR(handle))
                 goto out;
diff --combined fs/ext4/super.c

index 1adac6868e6fd0e97f91fa871ed45288fffc5cb6,bff3427784ca4aafe6c02f14797d8c1065970567..e061e66c82800f700b7642e4c82fa2cc836be05f
--- 1/fs/ext4/super.c
--- 2/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@@ -334,7 -334,7 +334,7 @@@ static void save_error_info(struct supe
   static int block_device_ejected(struct super_block *sb)
   {
         struct inode *bd_inode = sb->s_bdev->bd_inode;
- -      struct backing_dev_info *bdi = bd_inode->i_mapping->backing_dev_info;
+ +      struct backing_dev_info *bdi = inode_to_bdi(bd_inode);
   
         return bdi->dev == NULL;
   }
@@@ -1046,7 -1046,10 +1046,7 @@@ static int ext4_mark_dquot_dirty(struc
   static int ext4_write_info(struct super_block *sb, int type);
   static int ext4_quota_on(struct super_block *sb, int type, int format_id,
                          struct path *path);
- -static int ext4_quota_on_sysfile(struct super_block *sb, int type,
- -                               int format_id);
   static int ext4_quota_off(struct super_block *sb, int type);
- -static int ext4_quota_off_sysfile(struct super_block *sb, int type);
   static int ext4_quota_on_mount(struct super_block *sb, int type);
   static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
                                size_t len, loff_t off);
@@@ -1081,6 -1084,16 +1081,6 @@@ static const struct quotactl_ops ext4_q
         .get_dqblk      = dquot_get_dqblk,
         .set_dqblk      = dquot_set_dqblk
   };
- -
- -static const struct quotactl_ops ext4_qctl_sysfile_operations = {
- -      .quota_on_meta  = ext4_quota_on_sysfile,
- -      .quota_off      = ext4_quota_off_sysfile,
- -      .quota_sync     = dquot_quota_sync,
- -      .get_info       = dquot_get_dqinfo,
- -      .set_info       = dquot_set_dqinfo,
- -      .get_dqblk      = dquot_get_dqblk,
- -      .set_dqblk      = dquot_set_dqblk
- -};
   #endif
   
   static const struct super_operations ext4_sops = {
@@@ -1124,9 -1137,8 +1124,9 @@@ enum 
         Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
         Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
         Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err,
- -      Opt_usrquota, Opt_grpquota, Opt_i_version,
+ +      Opt_usrquota, Opt_grpquota, Opt_i_version, Opt_dax,
         Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit,
+ +      Opt_lazytime, Opt_nolazytime,
         Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity,
         Opt_inode_readahead_blks, Opt_journal_ioprio,
         Opt_dioread_nolock, Opt_dioread_lock,
@@@ -1188,11 -1200,8 +1188,11 @@@ static const match_table_t tokens = 
         {Opt_barrier, "barrier"},
         {Opt_nobarrier, "nobarrier"},
         {Opt_i_version, "i_version"},
+ +      {Opt_dax, "dax"},
         {Opt_stripe, "stripe=%u"},
         {Opt_delalloc, "delalloc"},
+ +      {Opt_lazytime, "lazytime"},
+ +      {Opt_nolazytime, "nolazytime"},
         {Opt_nodelalloc, "nodelalloc"},
         {Opt_removed, "mblk_io_submit"},
         {Opt_removed, "nomblk_io_submit"},
@@@ -1375,7 -1384,6 +1375,7 @@@ static const struct mount_opts 
         {Opt_min_batch_time, 0, MOPT_GTE0},
         {Opt_inode_readahead_blks, 0, MOPT_GTE0},
         {Opt_init_itable, 0, MOPT_GTE0},
+ +      {Opt_dax, EXT4_MOUNT_DAX, MOPT_SET},
         {Opt_stripe, 0, MOPT_GTE0},
         {Opt_resuid, 0, MOPT_GTE0},
         {Opt_resgid, 0, MOPT_GTE0},
@@@ -1451,12 -1459,6 +1451,12 @@@ static int handle_mount_opt(struct supe
         case Opt_i_version:
                 sb->s_flags |= MS_I_VERSION;
                 return 1;
+ +      case Opt_lazytime:
+ +              sb->s_flags |= MS_LAZYTIME;
+ +              return 1;
+ +      case Opt_nolazytime:
+ +              sb->s_flags &= ~MS_LAZYTIME;
+ +              return 1;
         }
   
         for (m = ext4_mount_opts; m->token != Opt_err; m++)
@@@ -1617,11 -1619,6 +1617,11 @@@
                         return -1;
                 }
                 sbi->s_jquota_fmt = m->mount_opt;
+ +#endif
+ +#ifndef CONFIG_FS_DAX
+ +      } else if (token == Opt_dax) {
+ +              ext4_msg(sb, KERN_INFO, "dax option not supported");
+ +              return -1;
   #endif
         } else {
                 if (!args->from)
@@@ -2779,6 -2776,12 +2779,12 @@@ static int ext4_feature_set_ok(struct s
         if (readonly)
                 return 1;
   
+       if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_READONLY)) {
+               ext4_msg(sb, KERN_INFO, "filesystem is read-only");
+               sb->s_flags |= MS_RDONLY;
+               return 1;
+       }
+ 
         /* Check that feature set is OK for a read-write mount */
         if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP)) {
                 ext4_msg(sb, KERN_ERR, "couldn't mount RDWR because of "
@@@ -3605,11 -3608,6 +3611,11 @@@ static int ext4_fill_super(struct super
                                  "both data=journal and dioread_nolock");
                         goto failed_mount;
                 }
+ +              if (test_opt(sb, DAX)) {
+ +                      ext4_msg(sb, KERN_ERR, "can't mount with "
+ +                               "both data=journal and dax");
+ +                      goto failed_mount;
+ +              }
                 if (test_opt(sb, DELALLOC))
                         clear_opt(sb, DELALLOC);
         }
@@@ -3673,19 -3671,6 +3679,19 @@@
                 goto failed_mount;
         }
   
+ +      if (sbi->s_mount_opt & EXT4_MOUNT_DAX) {
+ +              if (blocksize != PAGE_SIZE) {
+ +                      ext4_msg(sb, KERN_ERR,
+ +                                      "error: unsupported blocksize for dax");
+ +                      goto failed_mount;
+ +              }
+ +              if (!sb->s_bdev->bd_disk->fops->direct_access) {
+ +                      ext4_msg(sb, KERN_ERR,
+ +                                      "error: device does not support dax");
+ +                      goto failed_mount;
+ +              }
+ +      }
+ +
         if (sb->s_blocksize != blocksize) {
                 /* Validate the filesystem blocksize */
                 if (!sb_set_blocksize(sb, blocksize)) {
@@@ -3936,9 -3921,8 +3942,8 @@@
         get_random_bytes(&sbi->s_next_generation, sizeof(u32));
         spin_lock_init(&sbi->s_next_gen_lock);
   
-       init_timer(&sbi->s_err_report);
-       sbi->s_err_report.function = print_daily_error_info;
-       sbi->s_err_report.data = (unsigned long) sb;
+       setup_timer(&sbi->s_err_report, print_daily_error_info,
+               (unsigned long) sb);
   
         /* Register extent status tree shrinker */
         if (ext4_es_register_shrinker(sbi))
@@@ -3956,7 -3940,7 +3961,7 @@@
   #ifdef CONFIG_QUOTA
         sb->dq_op = &ext4_quota_operations;
         if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA))
- -              sb->s_qcop = &ext4_qctl_sysfile_operations;
+ +              sb->s_qcop = &dquot_quotactl_sysfile_ops;
         else
                 sb->s_qcop = &ext4_qctl_operations;
         sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP;
@@@ -4866,9 -4850,6 +4871,6 @@@ static int ext4_remount(struct super_bl
         if (sbi->s_journal && sbi->s_journal->j_task->io_context)
                 journal_ioprio = sbi->s_journal->j_task->io_context->ioprio;
   
-       /*
-        * Allow the "check" option to be passed as a remount option.
-        */
         if (!parse_options(data, sb, NULL, &journal_ioprio, 1)) {
                 err = -EINVAL;
                 goto restore_opts;
@@@ -4877,17 -4858,8 +4879,8 @@@
         if ((old_opts.s_mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) ^
             test_opt(sb, JOURNAL_CHECKSUM)) {
                 ext4_msg(sb, KERN_ERR, "changing journal_checksum "
-                        "during remount not supported");
-               err = -EINVAL;
-               goto restore_opts;
-       }
- 
-       if ((old_opts.s_mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) ^
-           test_opt(sb, JOURNAL_CHECKSUM)) {
-               ext4_msg(sb, KERN_ERR, "changing journal_checksum "
-                        "during remount not supported");
-               err = -EINVAL;
-               goto restore_opts;
+                        "during remount not supported; ignoring");
+               sbi->s_mount_opt ^= EXT4_MOUNT_JOURNAL_CHECKSUM;
         }
   
         if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
@@@ -4903,18 -4875,6 +4896,18 @@@
                         err = -EINVAL;
                         goto restore_opts;
                 }
+ +              if (test_opt(sb, DAX)) {
+ +                      ext4_msg(sb, KERN_ERR, "can't mount with "
+ +                               "both data=journal and dax");
+ +                      err = -EINVAL;
+ +                      goto restore_opts;
+ +              }
+ +      }
+ +
+ +      if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_DAX) {
+ +              ext4_msg(sb, KERN_WARNING, "warning: refusing change of "
+ +                      "dax flag with busy inodes while remounting");
+ +              sbi->s_mount_opt ^= EXT4_MOUNT_DAX;
         }
   
         if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED)
@@@ -4963,7 -4923,9 +4956,9 @@@
                                 ext4_mark_recovery_complete(sb, es);
                 } else {
                         /* Make sure we can mount this feature set readwrite */
-                       if (!ext4_feature_set_ok(sb, 0)) {
+                       if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
+                                       EXT4_FEATURE_RO_COMPAT_READONLY) ||
+                           !ext4_feature_set_ok(sb, 0)) {
                                 err = -EROFS;
                                 goto restore_opts;
                         }
@@@ -5053,7 -5015,6 +5048,7 @@@
         }
   #endif
   
+ +      *flags = (*flags & ~MS_LAZYTIME) | (sb->s_flags & MS_LAZYTIME);
         ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s", orig_data);
         kfree(orig_data);
         return 0;
@@@ -5322,6 -5283,21 +5317,6 @@@ static int ext4_enable_quotas(struct su
         return 0;
   }
   
- -/*
- - * quota_on function that is used when QUOTA feature is set.
- - */
- -static int ext4_quota_on_sysfile(struct super_block *sb, int type,
- -                               int format_id)
- -{
- -      if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA))
- -              return -EINVAL;
- -
- -      /*
- -       * USAGE was enabled at mount time. Only need to enable LIMITS now.
- -       */
- -      return ext4_quota_enable(sb, type, format_id, DQUOT_LIMITS_ENABLED);
- -}
- -
   static int ext4_quota_off(struct super_block *sb, int type)
   {
         struct inode *inode = sb_dqopt(sb)->files[type];
@@@ -5348,6 -5324,18 +5343,6 @@@ out
         return dquot_quota_off(sb, type);
   }
   
- -/*
- - * quota_off function that is used when QUOTA feature is set.
- - */
- -static int ext4_quota_off_sysfile(struct super_block *sb, int type)
- -{
- -      if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA))
- -              return -EINVAL;
- -
- -      /* Disable only the limits. */
- -      return dquot_disable(sb, type, DQUOT_LIMITS_ENABLED);
- -}
- -
   /* Read data from quotafile - avoid pagecache and such because we cannot afford
    * acquiring the locks... As quota files are never truncated and quota code
    * itself serializes the operations (and no one else should touch the files)
author	Linus Torvalds <torvalds@linux-foundation.org>
	Mon, 23 Feb 2015 02:05:13 +0000 (18:05 -0800)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Mon, 23 Feb 2015 02:05:13 +0000 (18:05 -0800)
		1	2
fs/ext4/ext4.h	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext4/indirect.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext4/inode.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext4/super.c	patch \|	diff1 \|	diff2 \|	blob \| history