]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/commitdiff
Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso...
authorLinus Torvalds <torvalds@linux-foundation.org>
Mon, 23 Feb 2015 02:05:13 +0000 (18:05 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Mon, 23 Feb 2015 02:05:13 +0000 (18:05 -0800)
Pull ext4 fixes from Ted Ts'o:
 "Ext4 bug fixes.

  We also reserved code points for encryption and read-only images (for
  which the implementation is mostly just the reserved code point for a
  read-only feature :-)"

* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4:
  ext4: fix indirect punch hole corruption
  ext4: ignore journal checksum on remount; don't fail
  ext4: remove duplicate remount check for JOURNAL_CHECKSUM change
  ext4: fix mmap data corruption in nodelalloc mode when blocksize < pagesize
  ext4: support read-only images
  ext4: change to use setup_timer() instead of init_timer()
  ext4: reserve codepoints used by the ext4 encryption feature
  jbd2: complain about descriptor block checksum errors

1  2 
fs/ext4/ext4.h
fs/ext4/indirect.c
fs/ext4/inode.c
fs/ext4/super.c

diff --combined fs/ext4/ext4.h
index 982d934fd9ac98338377d3b1621b3d577531b6e6,7fec2efd8635e83d2c4196c3fe2f970846cd2804..f63c3d5805c4c156ad3ed412cbecf85e700cf9d2
@@@ -364,7 -364,8 +364,8 @@@ struct flex_groups 
  #define EXT4_DIRTY_FL                 0x00000100
  #define EXT4_COMPRBLK_FL              0x00000200 /* One or more compressed clusters */
  #define EXT4_NOCOMPR_FL                       0x00000400 /* Don't compress */
- #define EXT4_ECOMPR_FL                        0x00000800 /* Compression error */
+       /* nb: was previously EXT2_ECOMPR_FL */
+ #define EXT4_ENCRYPT_FL                       0x00000800 /* encrypted file */
  /* End compression flags --- maybe not all used */
  #define EXT4_INDEX_FL                 0x00001000 /* hash-indexed directory */
  #define EXT4_IMAGIC_FL                        0x00002000 /* AFS directory */
@@@ -421,7 -422,7 +422,7 @@@ enum 
        EXT4_INODE_DIRTY        = 8,
        EXT4_INODE_COMPRBLK     = 9,    /* One or more compressed clusters */
        EXT4_INODE_NOCOMPR      = 10,   /* Don't compress */
-       EXT4_INODE_ECOMPR       = 11,   /* Compression error */
+       EXT4_INODE_ENCRYPT      = 11,   /* Compression error */
  /* End compression flags --- maybe not all used */
        EXT4_INODE_INDEX        = 12,   /* hash-indexed directory */
        EXT4_INODE_IMAGIC       = 13,   /* AFS directory */
@@@ -466,7 -467,7 +467,7 @@@ static inline void ext4_check_flag_valu
        CHECK_FLAG_VALUE(DIRTY);
        CHECK_FLAG_VALUE(COMPRBLK);
        CHECK_FLAG_VALUE(NOCOMPR);
-       CHECK_FLAG_VALUE(ECOMPR);
+       CHECK_FLAG_VALUE(ENCRYPT);
        CHECK_FLAG_VALUE(INDEX);
        CHECK_FLAG_VALUE(IMAGIC);
        CHECK_FLAG_VALUE(JOURNAL_DATA);
@@@ -965,11 -966,6 +966,11 @@@ struct ext4_inode_info 
  #define EXT4_MOUNT_ERRORS_MASK                0x00070
  #define EXT4_MOUNT_MINIX_DF           0x00080 /* Mimics the Minix statfs */
  #define EXT4_MOUNT_NOLOAD             0x00100 /* Don't use existing journal*/
 +#ifdef CONFIG_FS_DAX
 +#define EXT4_MOUNT_DAX                        0x00200 /* Direct Access */
 +#else
 +#define EXT4_MOUNT_DAX                        0
 +#endif
  #define EXT4_MOUNT_DATA_FLAGS         0x00C00 /* Mode for data writes: */
  #define EXT4_MOUNT_JOURNAL_DATA               0x00400 /* Write data to journal */
  #define EXT4_MOUNT_ORDERED_DATA               0x00800 /* Flush data before commit */
@@@ -1048,6 -1044,12 +1049,12 @@@ extern void ext4_set_bits(void *bm, in
  /* Metadata checksum algorithm codes */
  #define EXT4_CRC32C_CHKSUM            1
  
+ /* Encryption algorithms */
+ #define EXT4_ENCRYPTION_MODE_INVALID          0
+ #define EXT4_ENCRYPTION_MODE_AES_256_XTS      1
+ #define EXT4_ENCRYPTION_MODE_AES_256_GCM      2
+ #define EXT4_ENCRYPTION_MODE_AES_256_CBC      3
  /*
   * Structure of the super block
   */
@@@ -1161,7 -1163,8 +1168,8 @@@ struct ext4_super_block 
        __le32  s_grp_quota_inum;       /* inode for tracking group quota */
        __le32  s_overhead_clusters;    /* overhead blocks/clusters in fs */
        __le32  s_backup_bgs[2];        /* groups with sparse_super2 SBs */
-       __le32  s_reserved[106];        /* Padding to the end of the block */
+       __u8    s_encrypt_algos[4];     /* Encryption algorithms in use  */
+       __le32  s_reserved[105];        /* Padding to the end of the block */
        __le32  s_checksum;             /* crc32c(superblock) */
  };
  
@@@ -1527,6 -1530,7 +1535,7 @@@ static inline void ext4_clear_state_fla
   * GDT_CSUM bits are mutually exclusive.
   */
  #define EXT4_FEATURE_RO_COMPAT_METADATA_CSUM  0x0400
+ #define EXT4_FEATURE_RO_COMPAT_READONLY               0x1000
  
  #define EXT4_FEATURE_INCOMPAT_COMPRESSION     0x0001
  #define EXT4_FEATURE_INCOMPAT_FILETYPE                0x0002
  #define EXT4_FEATURE_INCOMPAT_BG_USE_META_CSUM        0x2000 /* use crc32c for bg */
  #define EXT4_FEATURE_INCOMPAT_LARGEDIR                0x4000 /* >2GB or 3-lvl htree */
  #define EXT4_FEATURE_INCOMPAT_INLINE_DATA     0x8000 /* data in inode */
+ #define EXT4_FEATURE_INCOMPAT_ENCRYPT         0x10000
  
  #define EXT2_FEATURE_COMPAT_SUPP      EXT4_FEATURE_COMPAT_EXT_ATTR
  #define EXT2_FEATURE_INCOMPAT_SUPP    (EXT4_FEATURE_INCOMPAT_FILETYPE| \
@@@ -2583,7 -2588,6 +2593,7 @@@ extern const struct file_operations ext
  /* file.c */
  extern const struct inode_operations ext4_file_inode_operations;
  extern const struct file_operations ext4_file_operations;
 +extern const struct file_operations ext4_dax_file_operations;
  extern loff_t ext4_llseek(struct file *file, loff_t offset, int origin);
  
  /* inline.c */
diff --combined fs/ext4/indirect.c
index 6b9878a24182b06125cb496ef973ff0d9b739106,5e7af1c69577249441e9de8a662b432447822dfb..45fe924f82bce2ff76e3e74b45ec1833729433ea
@@@ -689,22 -689,14 +689,22 @@@ retry
                        inode_dio_done(inode);
                        goto locked;
                }
 -              ret = __blockdev_direct_IO(rw, iocb, inode,
 -                               inode->i_sb->s_bdev, iter, offset,
 -                               ext4_get_block, NULL, NULL, 0);
 +              if (IS_DAX(inode))
 +                      ret = dax_do_io(rw, iocb, inode, iter, offset,
 +                                      ext4_get_block, NULL, 0);
 +              else
 +                      ret = __blockdev_direct_IO(rw, iocb, inode,
 +                                      inode->i_sb->s_bdev, iter, offset,
 +                                      ext4_get_block, NULL, NULL, 0);
                inode_dio_done(inode);
        } else {
  locked:
 -              ret = blockdev_direct_IO(rw, iocb, inode, iter,
 -                               offset, ext4_get_block);
 +              if (IS_DAX(inode))
 +                      ret = dax_do_io(rw, iocb, inode, iter, offset,
 +                                      ext4_get_block, NULL, DIO_LOCKING);
 +              else
 +                      ret = blockdev_direct_IO(rw, iocb, inode, iter,
 +                                      offset, ext4_get_block);
  
                if (unlikely((rw & WRITE) && ret < 0)) {
                        loff_t isize = i_size_read(inode);
@@@ -1401,10 -1393,7 +1401,7 @@@ end_range
                                 * to free. Everything was covered by the start
                                 * of the range.
                                 */
-                               return 0;
-                       } else {
-                               /* Shared branch grows from an indirect block */
-                               partial2--;
+                               goto do_indirects;
                        }
                } else {
                        /*
        /* Punch happened within the same level (n == n2) */
        partial = ext4_find_shared(inode, n, offsets, chain, &nr);
        partial2 = ext4_find_shared(inode, n2, offsets2, chain2, &nr2);
-       /*
-        * ext4_find_shared returns Indirect structure which
-        * points to the last element which should not be
-        * removed by truncate. But this is end of the range
-        * in punch_hole so we need to point to the next element
-        */
-       partial2->p++;
-       while ((partial > chain) || (partial2 > chain2)) {
-               /* We're at the same block, so we're almost finished */
-               if ((partial->bh && partial2->bh) &&
-                   (partial->bh->b_blocknr == partial2->bh->b_blocknr)) {
-                       if ((partial > chain) && (partial2 > chain2)) {
+       /* Free top, but only if partial2 isn't its subtree. */
+       if (nr) {
+               int level = min(partial - chain, partial2 - chain2);
+               int i;
+               int subtree = 1;
+               for (i = 0; i <= level; i++) {
+                       if (offsets[i] != offsets2[i]) {
+                               subtree = 0;
+                               break;
+                       }
+               }
+               if (!subtree) {
+                       if (partial == chain) {
+                               /* Shared branch grows from the inode */
+                               ext4_free_branches(handle, inode, NULL,
+                                                  &nr, &nr+1,
+                                                  (chain+n-1) - partial);
+                               *partial->p = 0;
+                       } else {
+                               /* Shared branch grows from an indirect block */
+                               BUFFER_TRACE(partial->bh, "get_write_access");
                                ext4_free_branches(handle, inode, partial->bh,
-                                                  partial->p + 1,
-                                                  partial2->p,
+                                                  partial->p,
+                                                  partial->p+1,
                                                   (chain+n-1) - partial);
-                               BUFFER_TRACE(partial->bh, "call brelse");
-                               brelse(partial->bh);
-                               BUFFER_TRACE(partial2->bh, "call brelse");
-                               brelse(partial2->bh);
                        }
-                       return 0;
                }
+       }
+       if (!nr2) {
                /*
-                * Clear the ends of indirect blocks on the shared branch
-                * at the start of the range
+                * ext4_find_shared returns Indirect structure which
+                * points to the last element which should not be
+                * removed by truncate. But this is end of the range
+                * in punch_hole so we need to point to the next element
                 */
-               if (partial > chain) {
+               partial2->p++;
+       }
+       while (partial > chain || partial2 > chain2) {
+               int depth = (chain+n-1) - partial;
+               int depth2 = (chain2+n2-1) - partial2;
+               if (partial > chain && partial2 > chain2 &&
+                   partial->bh->b_blocknr == partial2->bh->b_blocknr) {
+                       /*
+                        * We've converged on the same block. Clear the range,
+                        * then we're done.
+                        */
                        ext4_free_branches(handle, inode, partial->bh,
-                                  partial->p + 1,
-                                  (__le32 *)partial->bh->b_data+addr_per_block,
-                                  (chain+n-1) - partial);
+                                          partial->p + 1,
+                                          partial2->p,
+                                          (chain+n-1) - partial);
                        BUFFER_TRACE(partial->bh, "call brelse");
                        brelse(partial->bh);
-                       partial--;
+                       BUFFER_TRACE(partial2->bh, "call brelse");
+                       brelse(partial2->bh);
+                       return 0;
                }
                /*
-                * Clear the ends of indirect blocks on the shared branch
-                * at the end of the range
+                * The start and end partial branches may not be at the same
+                * level even though the punch happened within one level. So, we
+                * give them a chance to arrive at the same level, then walk
+                * them in step with each other until we converge on the same
+                * block.
                 */
-               if (partial2 > chain2) {
+               if (partial > chain && depth <= depth2) {
+                       ext4_free_branches(handle, inode, partial->bh,
+                                          partial->p + 1,
+                                          (__le32 *)partial->bh->b_data+addr_per_block,
+                                          (chain+n-1) - partial);
+                       BUFFER_TRACE(partial->bh, "call brelse");
+                       brelse(partial->bh);
+                       partial--;
+               }
+               if (partial2 > chain2 && depth2 <= depth) {
                        ext4_free_branches(handle, inode, partial2->bh,
                                           (__le32 *)partial2->bh->b_data,
                                           partial2->p,
-                                          (chain2+n-1) - partial2);
+                                          (chain2+n2-1) - partial2);
                        BUFFER_TRACE(partial2->bh, "call brelse");
                        brelse(partial2->bh);
                        partial2--;
                }
        }
+       return 0;
  
  do_indirects:
        /* Kill the remaining (whole) subtrees */
diff --combined fs/ext4/inode.c
index 85404f15e53a28860ce5a7be08220106b76fc06f,4df6d01b762eb52e9e181b95328e3d726ef77c42..5cb9a212b86f3efd69ca604df07dc20b901dabb1
@@@ -657,18 -657,6 +657,18 @@@ has_zeroout
        return retval;
  }
  
 +static void ext4_end_io_unwritten(struct buffer_head *bh, int uptodate)
 +{
 +      struct inode *inode = bh->b_assoc_map->host;
 +      /* XXX: breaks on 32-bit > 16GB. Is that even supported? */
 +      loff_t offset = (loff_t)(uintptr_t)bh->b_private << inode->i_blkbits;
 +      int err;
 +      if (!uptodate)
 +              return;
 +      WARN_ON(!buffer_unwritten(bh));
 +      err = ext4_convert_unwritten_extents(NULL, inode, offset, bh->b_size);
 +}
 +
  /* Maximum number of blocks we map for direct IO at once. */
  #define DIO_MAX_BLOCKS 4096
  
@@@ -706,11 -694,6 +706,11 @@@ static int _ext4_get_block(struct inod
  
                map_bh(bh, inode->i_sb, map.m_pblk);
                bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags;
 +              if (IS_DAX(inode) && buffer_unwritten(bh) && !io_end) {
 +                      bh->b_assoc_map = inode->i_mapping;
 +                      bh->b_private = (void *)(unsigned long)iblock;
 +                      bh->b_end_io = ext4_end_io_unwritten;
 +              }
                if (io_end && io_end->flag & EXT4_IO_END_UNWRITTEN)
                        set_buffer_defer_completion(bh);
                bh->b_size = inode->i_sb->s_blocksize * map.m_len;
@@@ -1024,6 -1007,7 +1024,7 @@@ static int ext4_write_end(struct file *
  {
        handle_t *handle = ext4_journal_current_handle();
        struct inode *inode = mapping->host;
+       loff_t old_size = inode->i_size;
        int ret = 0, ret2;
        int i_size_changed = 0;
  
        unlock_page(page);
        page_cache_release(page);
  
+       if (old_size < pos)
+               pagecache_isize_extended(inode, old_size, pos);
        /*
         * Don't mark the inode dirty under page lock. First, it unnecessarily
         * makes the holding time of page lock longer. Second, it forces lock
@@@ -1095,6 -1081,7 +1098,7 @@@ static int ext4_journalled_write_end(st
  {
        handle_t *handle = ext4_journal_current_handle();
        struct inode *inode = mapping->host;
+       loff_t old_size = inode->i_size;
        int ret = 0, ret2;
        int partial = 0;
        unsigned from, to;
        unlock_page(page);
        page_cache_release(page);
  
+       if (old_size < pos)
+               pagecache_isize_extended(inode, old_size, pos);
        if (size_changed) {
                ret2 = ext4_mark_inode_dirty(handle, inode);
                if (!ret)
@@@ -3027,14 -3017,13 +3034,14 @@@ static ssize_t ext4_ext_direct_IO(int r
                get_block_func = ext4_get_block_write;
                dio_flags = DIO_LOCKING;
        }
 -      ret = __blockdev_direct_IO(rw, iocb, inode,
 -                                 inode->i_sb->s_bdev, iter,
 -                                 offset,
 -                                 get_block_func,
 -                                 ext4_end_io_dio,
 -                                 NULL,
 -                                 dio_flags);
 +      if (IS_DAX(inode))
 +              ret = dax_do_io(rw, iocb, inode, iter, offset, get_block_func,
 +                              ext4_end_io_dio, dio_flags);
 +      else
 +              ret = __blockdev_direct_IO(rw, iocb, inode,
 +                                         inode->i_sb->s_bdev, iter, offset,
 +                                         get_block_func,
 +                                         ext4_end_io_dio, NULL, dio_flags);
  
        /*
         * Put our reference to io_end. This can free the io_end structure e.g.
@@@ -3198,12 -3187,19 +3205,12 @@@ void ext4_set_aops(struct inode *inode
                inode->i_mapping->a_ops = &ext4_aops;
  }
  
 -/*
 - * ext4_block_zero_page_range() zeros out a mapping of length 'length'
 - * starting from file offset 'from'.  The range to be zero'd must
 - * be contained with in one block.  If the specified range exceeds
 - * the end of the block it will be shortened to end of the block
 - * that cooresponds to 'from'
 - */
 -static int ext4_block_zero_page_range(handle_t *handle,
 +static int __ext4_block_zero_page_range(handle_t *handle,
                struct address_space *mapping, loff_t from, loff_t length)
  {
        ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT;
        unsigned offset = from & (PAGE_CACHE_SIZE-1);
 -      unsigned blocksize, max, pos;
 +      unsigned blocksize, pos;
        ext4_lblk_t iblock;
        struct inode *inode = mapping->host;
        struct buffer_head *bh;
                return -ENOMEM;
  
        blocksize = inode->i_sb->s_blocksize;
 -      max = blocksize - (offset & (blocksize - 1));
 -
 -      /*
 -       * correct length if it does not fall between
 -       * 'from' and the end of the block
 -       */
 -      if (length > max || length < 0)
 -              length = max;
  
        iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
  
@@@ -3280,33 -3284,6 +3287,33 @@@ unlock
        return err;
  }
  
 +/*
 + * ext4_block_zero_page_range() zeros out a mapping of length 'length'
 + * starting from file offset 'from'.  The range to be zero'd must
 + * be contained with in one block.  If the specified range exceeds
 + * the end of the block it will be shortened to end of the block
 + * that cooresponds to 'from'
 + */
 +static int ext4_block_zero_page_range(handle_t *handle,
 +              struct address_space *mapping, loff_t from, loff_t length)
 +{
 +      struct inode *inode = mapping->host;
 +      unsigned offset = from & (PAGE_CACHE_SIZE-1);
 +      unsigned blocksize = inode->i_sb->s_blocksize;
 +      unsigned max = blocksize - (offset & (blocksize - 1));
 +
 +      /*
 +       * correct length if it does not fall between
 +       * 'from' and the end of the block
 +       */
 +      if (length > max || length < 0)
 +              length = max;
 +
 +      if (IS_DAX(inode))
 +              return dax_zero_page_range(inode, from, length, ext4_get_block);
 +      return __ext4_block_zero_page_range(handle, mapping, from, length);
 +}
 +
  /*
   * ext4_block_truncate_page() zeroes out a mapping from file offset `from'
   * up to the end of the block which corresponds to `from'.
@@@ -3828,10 -3805,8 +3835,10 @@@ void ext4_set_inode_flags(struct inode 
                new_fl |= S_NOATIME;
        if (flags & EXT4_DIRSYNC_FL)
                new_fl |= S_DIRSYNC;
 +      if (test_opt(inode->i_sb, DAX))
 +              new_fl |= S_DAX;
        inode_set_flags(inode, new_fl,
 -                      S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
 +                      S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_DAX);
  }
  
  /* Propagate flags from i_flags to EXT4_I(inode)->i_flags */
@@@ -4084,10 -4059,7 +4091,10 @@@ struct inode *ext4_iget(struct super_bl
  
        if (S_ISREG(inode->i_mode)) {
                inode->i_op = &ext4_file_inode_operations;
 -              inode->i_fop = &ext4_file_operations;
 +              if (test_opt(inode->i_sb, DAX))
 +                      inode->i_fop = &ext4_dax_file_operations;
 +              else
 +                      inode->i_fop = &ext4_file_operations;
                ext4_set_aops(inode);
        } else if (S_ISDIR(inode->i_mode)) {
                inode->i_op = &ext4_dir_inode_operations;
@@@ -4174,65 -4146,6 +4181,65 @@@ static int ext4_inode_blocks_set(handle
        return 0;
  }
  
 +struct other_inode {
 +      unsigned long           orig_ino;
 +      struct ext4_inode       *raw_inode;
 +};
 +
 +static int other_inode_match(struct inode * inode, unsigned long ino,
 +                           void *data)
 +{
 +      struct other_inode *oi = (struct other_inode *) data;
 +
 +      if ((inode->i_ino != ino) ||
 +          (inode->i_state & (I_FREEING | I_WILL_FREE | I_NEW |
 +                             I_DIRTY_SYNC | I_DIRTY_DATASYNC)) ||
 +          ((inode->i_state & I_DIRTY_TIME) == 0))
 +              return 0;
 +      spin_lock(&inode->i_lock);
 +      if (((inode->i_state & (I_FREEING | I_WILL_FREE | I_NEW |
 +                              I_DIRTY_SYNC | I_DIRTY_DATASYNC)) == 0) &&
 +          (inode->i_state & I_DIRTY_TIME)) {
 +              struct ext4_inode_info  *ei = EXT4_I(inode);
 +
 +              inode->i_state &= ~(I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED);
 +              spin_unlock(&inode->i_lock);
 +
 +              spin_lock(&ei->i_raw_lock);
 +              EXT4_INODE_SET_XTIME(i_ctime, inode, oi->raw_inode);
 +              EXT4_INODE_SET_XTIME(i_mtime, inode, oi->raw_inode);
 +              EXT4_INODE_SET_XTIME(i_atime, inode, oi->raw_inode);
 +              ext4_inode_csum_set(inode, oi->raw_inode, ei);
 +              spin_unlock(&ei->i_raw_lock);
 +              trace_ext4_other_inode_update_time(inode, oi->orig_ino);
 +              return -1;
 +      }
 +      spin_unlock(&inode->i_lock);
 +      return -1;
 +}
 +
 +/*
 + * Opportunistically update the other time fields for other inodes in
 + * the same inode table block.
 + */
 +static void ext4_update_other_inodes_time(struct super_block *sb,
 +                                        unsigned long orig_ino, char *buf)
 +{
 +      struct other_inode oi;
 +      unsigned long ino;
 +      int i, inodes_per_block = EXT4_SB(sb)->s_inodes_per_block;
 +      int inode_size = EXT4_INODE_SIZE(sb);
 +
 +      oi.orig_ino = orig_ino;
 +      ino = orig_ino & ~(inodes_per_block - 1);
 +      for (i = 0; i < inodes_per_block; i++, ino++, buf += inode_size) {
 +              if (ino == orig_ino)
 +                      continue;
 +              oi.raw_inode = (struct ext4_inode *) buf;
 +              (void) find_inode_nowait(sb, ino, other_inode_match, &oi);
 +      }
 +}
 +
  /*
   * Post the struct inode info into an on-disk inode location in the
   * buffer-cache.  This gobbles the caller's reference to the
@@@ -4342,11 -4255,10 +4349,11 @@@ static int ext4_do_update_inode(handle_
                                cpu_to_le16(ei->i_extra_isize);
                }
        }
 -
        ext4_inode_csum_set(inode, raw_inode, ei);
 -
        spin_unlock(&ei->i_raw_lock);
 +      if (inode->i_sb->s_flags & MS_LAZYTIME)
 +              ext4_update_other_inodes_time(inode->i_sb, inode->i_ino,
 +                                            bh->b_data);
  
        BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
        rc = ext4_handle_dirty_metadata(handle, NULL, bh);
@@@ -4629,7 -4541,7 +4636,7 @@@ int ext4_setattr(struct dentry *dentry
                 * Truncate pagecache after we've waited for commit
                 * in data=journal mode to make pages freeable.
                 */
 -                      truncate_pagecache(inode, inode->i_size);
 +              truncate_pagecache(inode, inode->i_size);
        }
        /*
         * We want to call ext4_truncate() even if attr->ia_size ==
@@@ -4935,17 -4847,11 +4942,17 @@@ int ext4_mark_inode_dirty(handle_t *han
   * If the inode is marked synchronous, we don't honour that here - doing
   * so would cause a commit on atime updates, which we don't bother doing.
   * We handle synchronous inodes at the highest possible level.
 + *
 + * If only the I_DIRTY_TIME flag is set, we can skip everything.  If
 + * I_DIRTY_TIME and I_DIRTY_SYNC is set, the only inode fields we need
 + * to copy into the on-disk inode structure are the timestamp files.
   */
  void ext4_dirty_inode(struct inode *inode, int flags)
  {
        handle_t *handle;
  
 +      if (flags == I_DIRTY_TIME)
 +              return;
        handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
        if (IS_ERR(handle))
                goto out;
diff --combined fs/ext4/super.c
index 1adac6868e6fd0e97f91fa871ed45288fffc5cb6,bff3427784ca4aafe6c02f14797d8c1065970567..e061e66c82800f700b7642e4c82fa2cc836be05f
@@@ -334,7 -334,7 +334,7 @@@ static void save_error_info(struct supe
  static int block_device_ejected(struct super_block *sb)
  {
        struct inode *bd_inode = sb->s_bdev->bd_inode;
 -      struct backing_dev_info *bdi = bd_inode->i_mapping->backing_dev_info;
 +      struct backing_dev_info *bdi = inode_to_bdi(bd_inode);
  
        return bdi->dev == NULL;
  }
@@@ -1046,7 -1046,10 +1046,7 @@@ static int ext4_mark_dquot_dirty(struc
  static int ext4_write_info(struct super_block *sb, int type);
  static int ext4_quota_on(struct super_block *sb, int type, int format_id,
                         struct path *path);
 -static int ext4_quota_on_sysfile(struct super_block *sb, int type,
 -                               int format_id);
  static int ext4_quota_off(struct super_block *sb, int type);
 -static int ext4_quota_off_sysfile(struct super_block *sb, int type);
  static int ext4_quota_on_mount(struct super_block *sb, int type);
  static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
                               size_t len, loff_t off);
@@@ -1081,6 -1084,16 +1081,6 @@@ static const struct quotactl_ops ext4_q
        .get_dqblk      = dquot_get_dqblk,
        .set_dqblk      = dquot_set_dqblk
  };
 -
 -static const struct quotactl_ops ext4_qctl_sysfile_operations = {
 -      .quota_on_meta  = ext4_quota_on_sysfile,
 -      .quota_off      = ext4_quota_off_sysfile,
 -      .quota_sync     = dquot_quota_sync,
 -      .get_info       = dquot_get_dqinfo,
 -      .set_info       = dquot_set_dqinfo,
 -      .get_dqblk      = dquot_get_dqblk,
 -      .set_dqblk      = dquot_set_dqblk
 -};
  #endif
  
  static const struct super_operations ext4_sops = {
@@@ -1124,9 -1137,8 +1124,9 @@@ enum 
        Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
        Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
        Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err,
 -      Opt_usrquota, Opt_grpquota, Opt_i_version,
 +      Opt_usrquota, Opt_grpquota, Opt_i_version, Opt_dax,
        Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit,
 +      Opt_lazytime, Opt_nolazytime,
        Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity,
        Opt_inode_readahead_blks, Opt_journal_ioprio,
        Opt_dioread_nolock, Opt_dioread_lock,
@@@ -1188,11 -1200,8 +1188,11 @@@ static const match_table_t tokens = 
        {Opt_barrier, "barrier"},
        {Opt_nobarrier, "nobarrier"},
        {Opt_i_version, "i_version"},
 +      {Opt_dax, "dax"},
        {Opt_stripe, "stripe=%u"},
        {Opt_delalloc, "delalloc"},
 +      {Opt_lazytime, "lazytime"},
 +      {Opt_nolazytime, "nolazytime"},
        {Opt_nodelalloc, "nodelalloc"},
        {Opt_removed, "mblk_io_submit"},
        {Opt_removed, "nomblk_io_submit"},
@@@ -1375,7 -1384,6 +1375,7 @@@ static const struct mount_opts 
        {Opt_min_batch_time, 0, MOPT_GTE0},
        {Opt_inode_readahead_blks, 0, MOPT_GTE0},
        {Opt_init_itable, 0, MOPT_GTE0},
 +      {Opt_dax, EXT4_MOUNT_DAX, MOPT_SET},
        {Opt_stripe, 0, MOPT_GTE0},
        {Opt_resuid, 0, MOPT_GTE0},
        {Opt_resgid, 0, MOPT_GTE0},
@@@ -1451,12 -1459,6 +1451,12 @@@ static int handle_mount_opt(struct supe
        case Opt_i_version:
                sb->s_flags |= MS_I_VERSION;
                return 1;
 +      case Opt_lazytime:
 +              sb->s_flags |= MS_LAZYTIME;
 +              return 1;
 +      case Opt_nolazytime:
 +              sb->s_flags &= ~MS_LAZYTIME;
 +              return 1;
        }
  
        for (m = ext4_mount_opts; m->token != Opt_err; m++)
                        return -1;
                }
                sbi->s_jquota_fmt = m->mount_opt;
 +#endif
 +#ifndef CONFIG_FS_DAX
 +      } else if (token == Opt_dax) {
 +              ext4_msg(sb, KERN_INFO, "dax option not supported");
 +              return -1;
  #endif
        } else {
                if (!args->from)
@@@ -2779,6 -2776,12 +2779,12 @@@ static int ext4_feature_set_ok(struct s
        if (readonly)
                return 1;
  
+       if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_READONLY)) {
+               ext4_msg(sb, KERN_INFO, "filesystem is read-only");
+               sb->s_flags |= MS_RDONLY;
+               return 1;
+       }
        /* Check that feature set is OK for a read-write mount */
        if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP)) {
                ext4_msg(sb, KERN_ERR, "couldn't mount RDWR because of "
@@@ -3605,11 -3608,6 +3611,11 @@@ static int ext4_fill_super(struct super
                                 "both data=journal and dioread_nolock");
                        goto failed_mount;
                }
 +              if (test_opt(sb, DAX)) {
 +                      ext4_msg(sb, KERN_ERR, "can't mount with "
 +                               "both data=journal and dax");
 +                      goto failed_mount;
 +              }
                if (test_opt(sb, DELALLOC))
                        clear_opt(sb, DELALLOC);
        }
                goto failed_mount;
        }
  
 +      if (sbi->s_mount_opt & EXT4_MOUNT_DAX) {
 +              if (blocksize != PAGE_SIZE) {
 +                      ext4_msg(sb, KERN_ERR,
 +                                      "error: unsupported blocksize for dax");
 +                      goto failed_mount;
 +              }
 +              if (!sb->s_bdev->bd_disk->fops->direct_access) {
 +                      ext4_msg(sb, KERN_ERR,
 +                                      "error: device does not support dax");
 +                      goto failed_mount;
 +              }
 +      }
 +
        if (sb->s_blocksize != blocksize) {
                /* Validate the filesystem blocksize */
                if (!sb_set_blocksize(sb, blocksize)) {
        get_random_bytes(&sbi->s_next_generation, sizeof(u32));
        spin_lock_init(&sbi->s_next_gen_lock);
  
-       init_timer(&sbi->s_err_report);
-       sbi->s_err_report.function = print_daily_error_info;
-       sbi->s_err_report.data = (unsigned long) sb;
+       setup_timer(&sbi->s_err_report, print_daily_error_info,
+               (unsigned long) sb);
  
        /* Register extent status tree shrinker */
        if (ext4_es_register_shrinker(sbi))
  #ifdef CONFIG_QUOTA
        sb->dq_op = &ext4_quota_operations;
        if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA))
 -              sb->s_qcop = &ext4_qctl_sysfile_operations;
 +              sb->s_qcop = &dquot_quotactl_sysfile_ops;
        else
                sb->s_qcop = &ext4_qctl_operations;
        sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP;
@@@ -4866,9 -4850,6 +4871,6 @@@ static int ext4_remount(struct super_bl
        if (sbi->s_journal && sbi->s_journal->j_task->io_context)
                journal_ioprio = sbi->s_journal->j_task->io_context->ioprio;
  
-       /*
-        * Allow the "check" option to be passed as a remount option.
-        */
        if (!parse_options(data, sb, NULL, &journal_ioprio, 1)) {
                err = -EINVAL;
                goto restore_opts;
        if ((old_opts.s_mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) ^
            test_opt(sb, JOURNAL_CHECKSUM)) {
                ext4_msg(sb, KERN_ERR, "changing journal_checksum "
-                        "during remount not supported");
-               err = -EINVAL;
-               goto restore_opts;
-       }
-       if ((old_opts.s_mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) ^
-           test_opt(sb, JOURNAL_CHECKSUM)) {
-               ext4_msg(sb, KERN_ERR, "changing journal_checksum "
-                        "during remount not supported");
-               err = -EINVAL;
-               goto restore_opts;
+                        "during remount not supported; ignoring");
+               sbi->s_mount_opt ^= EXT4_MOUNT_JOURNAL_CHECKSUM;
        }
  
        if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
                        err = -EINVAL;
                        goto restore_opts;
                }
 +              if (test_opt(sb, DAX)) {
 +                      ext4_msg(sb, KERN_ERR, "can't mount with "
 +                               "both data=journal and dax");
 +                      err = -EINVAL;
 +                      goto restore_opts;
 +              }
 +      }
 +
 +      if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_DAX) {
 +              ext4_msg(sb, KERN_WARNING, "warning: refusing change of "
 +                      "dax flag with busy inodes while remounting");
 +              sbi->s_mount_opt ^= EXT4_MOUNT_DAX;
        }
  
        if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED)
                                ext4_mark_recovery_complete(sb, es);
                } else {
                        /* Make sure we can mount this feature set readwrite */
-                       if (!ext4_feature_set_ok(sb, 0)) {
+                       if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
+                                       EXT4_FEATURE_RO_COMPAT_READONLY) ||
+                           !ext4_feature_set_ok(sb, 0)) {
                                err = -EROFS;
                                goto restore_opts;
                        }
        }
  #endif
  
 +      *flags = (*flags & ~MS_LAZYTIME) | (sb->s_flags & MS_LAZYTIME);
        ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s", orig_data);
        kfree(orig_data);
        return 0;
@@@ -5322,6 -5283,21 +5317,6 @@@ static int ext4_enable_quotas(struct su
        return 0;
  }
  
 -/*
 - * quota_on function that is used when QUOTA feature is set.
 - */
 -static int ext4_quota_on_sysfile(struct super_block *sb, int type,
 -                               int format_id)
 -{
 -      if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA))
 -              return -EINVAL;
 -
 -      /*
 -       * USAGE was enabled at mount time. Only need to enable LIMITS now.
 -       */
 -      return ext4_quota_enable(sb, type, format_id, DQUOT_LIMITS_ENABLED);
 -}
 -
  static int ext4_quota_off(struct super_block *sb, int type)
  {
        struct inode *inode = sb_dqopt(sb)->files[type];
@@@ -5348,6 -5324,18 +5343,6 @@@ out
        return dquot_quota_off(sb, type);
  }
  
 -/*
 - * quota_off function that is used when QUOTA feature is set.
 - */
 -static int ext4_quota_off_sysfile(struct super_block *sb, int type)
 -{
 -      if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA))
 -              return -EINVAL;
 -
 -      /* Disable only the limits. */
 -      return dquot_disable(sb, type, DQUOT_LIMITS_ENABLED);
 -}
 -
  /* Read data from quotafile - avoid pagecache and such because we cannot afford
   * acquiring the locks... As quota files are never truncated and quota code
   * itself serializes the operations (and no one else should touch the files)