Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs

[mirror_ubuntu-artful-kernel.git] / fs / xfs / xfs_file.c
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c

index 86ecc9b49e1582361551891cf252316e0e80773d..bbb9eb6811b2e07f05652be729a9142fbcc169d0 100644 (file)
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -47,40 +47,6 @@
  
  static const struct vm_operations_struct xfs_file_vm_ops;
  
-/*
- * Locking primitives for read and write IO paths to ensure we consistently use
- * and order the inode->i_mutex, ip->i_lock and ip->i_iolock.
- */
-static inline void
-xfs_rw_ilock(
-       struct xfs_inode        *ip,
-       int                     type)
-{
-       if (type & XFS_IOLOCK_EXCL)
-               inode_lock(VFS_I(ip));
-       xfs_ilock(ip, type);
-}
-
-static inline void
-xfs_rw_iunlock(
-       struct xfs_inode        *ip,
-       int                     type)
-{
-       xfs_iunlock(ip, type);
-       if (type & XFS_IOLOCK_EXCL)
-               inode_unlock(VFS_I(ip));
-}
-
-static inline void
-xfs_rw_ilock_demote(
-       struct xfs_inode        *ip,
-       int                     type)
-{
-       xfs_ilock_demote(ip, type);
-       if (type & XFS_IOLOCK_EXCL)
-               inode_unlock(VFS_I(ip));
-}
-
  /*
   * Clear the specified ranges to zero through either the pagecache or DAX.
   * Holes and unwritten extents will be left as-is as they already are zeroed.
@@ -183,19 +149,16 @@ xfs_file_fsync(
  
         xfs_iflags_clear(ip, XFS_ITRUNCATED);
  
-       if (mp->m_flags & XFS_MOUNT_BARRIER) {
-               /*
-                * If we have an RT and/or log subvolume we need to make sure
-                * to flush the write cache the device used for file data
-                * first.  This is to ensure newly written file data make
-                * it to disk before logging the new inode size in case of
-                * an extending write.
-                */
-               if (XFS_IS_REALTIME_INODE(ip))
-                       xfs_blkdev_issue_flush(mp->m_rtdev_targp);
-               else if (mp->m_logdev_targp != mp->m_ddev_targp)
-                       xfs_blkdev_issue_flush(mp->m_ddev_targp);
-       }
+       /*
+        * If we have an RT and/or log subvolume we need to make sure to flush
+        * the write cache the device used for file data first.  This is to
+        * ensure newly written file data make it to disk before logging the new
+        * inode size in case of an extending write.
+        */
+       if (XFS_IS_REALTIME_INODE(ip))
+               xfs_blkdev_issue_flush(mp->m_rtdev_targp);
+       else if (mp->m_logdev_targp != mp->m_ddev_targp)
+               xfs_blkdev_issue_flush(mp->m_ddev_targp);
  
         /*
          * All metadata updates are logged, which means that we just have to
@@ -230,10 +193,8 @@ xfs_file_fsync(
          * an already allocated file and thus do not have any metadata to
          * commit.
          */
-       if ((mp->m_flags & XFS_MOUNT_BARRIER) &&
-           mp->m_logdev_targp == mp->m_ddev_targp &&
-           !XFS_IS_REALTIME_INODE(ip) &&
-           !log_flushed)
+       if (!log_flushed && !XFS_IS_REALTIME_INODE(ip) &&
+           mp->m_logdev_targp == mp->m_ddev_targp)
                 xfs_blkdev_issue_flush(mp->m_ddev_targp);
  
         return error;
@@ -244,62 +205,21 @@ xfs_file_dio_aio_read(
         struct kiocb            *iocb,
         struct iov_iter         *to)
  {
-       struct address_space    *mapping = iocb->ki_filp->f_mapping;
-       struct inode            *inode = mapping->host;
-       struct xfs_inode        *ip = XFS_I(inode);
-       loff_t                  isize = i_size_read(inode);
+       struct xfs_inode        *ip = XFS_I(file_inode(iocb->ki_filp));
         size_t                  count = iov_iter_count(to);
-       loff_t                  end = iocb->ki_pos + count - 1;
-       struct iov_iter         data;
-       struct xfs_buftarg      *target;
-       ssize_t                 ret = 0;
+       ssize_t                 ret;
  
         trace_xfs_file_direct_read(ip, count, iocb->ki_pos);
  
         if (!count)
                 return 0; /* skip atime */
  
-       if (XFS_IS_REALTIME_INODE(ip))
-               target = ip->i_mount->m_rtdev_targp;
-       else
-               target = ip->i_mount->m_ddev_targp;
-
-       /* DIO must be aligned to device logical sector size */
-       if ((iocb->ki_pos | count) & target->bt_logical_sectormask) {
-               if (iocb->ki_pos == isize)
-                       return 0;
-               return -EINVAL;
-       }
-
         file_accessed(iocb->ki_filp);
  
-       xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
-       if (mapping->nrpages) {
-               ret = filemap_write_and_wait_range(mapping, iocb->ki_pos, end);
-               if (ret)
-                       goto out_unlock;
-
-               /*
-                * Invalidate whole pages. This can return an error if we fail
-                * to invalidate a page, but this should never happen on XFS.
-                * Warn if it does fail.
-                */
-               ret = invalidate_inode_pages2_range(mapping,
-                               iocb->ki_pos >> PAGE_SHIFT, end >> PAGE_SHIFT);
-               WARN_ON_ONCE(ret);
-               ret = 0;
-       }
+       xfs_ilock(ip, XFS_IOLOCK_SHARED);
+       ret = iomap_dio_rw(iocb, to, &xfs_iomap_ops, NULL);
+       xfs_iunlock(ip, XFS_IOLOCK_SHARED);
  
-       data = *to;
-       ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data,
-                       xfs_get_blocks_direct, NULL, NULL, 0);
-       if (ret >= 0) {
-               iocb->ki_pos += ret;
-               iov_iter_advance(to, ret);
-       }
-
-out_unlock:
-       xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
         return ret;
  }
  
@@ -317,9 +237,9 @@ xfs_file_dax_read(
         if (!count)
                 return 0; /* skip atime */
  
-       xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
-       ret = iomap_dax_rw(iocb, to, &xfs_iomap_ops);
-       xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
+       xfs_ilock(ip, XFS_IOLOCK_SHARED);
+       ret = dax_iomap_rw(iocb, to, &xfs_iomap_ops);
+       xfs_iunlock(ip, XFS_IOLOCK_SHARED);
  
         file_accessed(iocb->ki_filp);
         return ret;
@@ -335,9 +255,9 @@ xfs_file_buffered_aio_read(
  
         trace_xfs_file_buffered_read(ip, iov_iter_count(to), iocb->ki_pos);
  
-       xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
+       xfs_ilock(ip, XFS_IOLOCK_SHARED);
         ret = generic_file_read_iter(iocb, to);
-       xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
+       xfs_iunlock(ip, XFS_IOLOCK_SHARED);
  
         return ret;
  }
@@ -418,15 +338,18 @@ restart:
         if (error <= 0)
                 return error;
  
-       error = xfs_break_layouts(inode, iolock, true);
+       error = xfs_break_layouts(inode, iolock);
         if (error)
                 return error;
  
-       /* For changing security info in file_remove_privs() we need i_mutex */
+       /*
+        * For changing security info in file_remove_privs() we need i_rwsem
+        * exclusively.
+        */
         if (*iolock == XFS_IOLOCK_SHARED && !IS_NOSEC(inode)) {
-               xfs_rw_iunlock(ip, *iolock);
+               xfs_iunlock(ip, *iolock);
                 *iolock = XFS_IOLOCK_EXCL;
-               xfs_rw_ilock(ip, *iolock);
+               xfs_ilock(ip, *iolock);
                 goto restart;
         }
         /*
@@ -451,9 +374,9 @@ restart:
                 spin_unlock(&ip->i_flags_lock);
                 if (!drained_dio) {
                         if (*iolock == XFS_IOLOCK_SHARED) {
-                               xfs_rw_iunlock(ip, *iolock);
+                               xfs_iunlock(ip, *iolock);
                                 *iolock = XFS_IOLOCK_EXCL;
-                               xfs_rw_ilock(ip, *iolock);
+                               xfs_ilock(ip, *iolock);
                                 iov_iter_reexpand(from, count);
                         }
                         /*
@@ -496,6 +419,58 @@ restart:
         return 0;
  }
  
+static int
+xfs_dio_write_end_io(
+       struct kiocb            *iocb,
+       ssize_t                 size,
+       unsigned                flags)
+{
+       struct inode            *inode = file_inode(iocb->ki_filp);
+       struct xfs_inode        *ip = XFS_I(inode);
+       loff_t                  offset = iocb->ki_pos;
+       bool                    update_size = false;
+       int                     error = 0;
+
+       trace_xfs_end_io_direct_write(ip, offset, size);
+
+       if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+               return -EIO;
+
+       if (size <= 0)
+               return size;
+
+       /*
+        * We need to update the in-core inode size here so that we don't end up
+        * with the on-disk inode size being outside the in-core inode size. We
+        * have no other method of updating EOF for AIO, so always do it here
+        * if necessary.
+        *
+        * We need to lock the test/set EOF update as we can be racing with
+        * other IO completions here to update the EOF. Failing to serialise
+        * here can result in EOF moving backwards and Bad Things Happen when
+        * that occurs.
+        */
+       spin_lock(&ip->i_flags_lock);
+       if (offset + size > i_size_read(inode)) {
+               i_size_write(inode, offset + size);
+               update_size = true;
+       }
+       spin_unlock(&ip->i_flags_lock);
+
+       if (flags & IOMAP_DIO_COW) {
+               error = xfs_reflink_end_cow(ip, offset, size);
+               if (error)
+                       return error;
+       }
+
+       if (flags & IOMAP_DIO_UNWRITTEN)
+               error = xfs_iomap_write_unwritten(ip, offset, size);
+       else if (update_size)
+               error = xfs_setfilesize(ip, offset, size);
+
+       return error;
+}
+
  /*
   * xfs_file_dio_aio_write - handle direct IO writes
   *
@@ -535,9 +510,7 @@ xfs_file_dio_aio_write(
         int                     unaligned_io = 0;
         int                     iolock;
         size_t                  count = iov_iter_count(from);
-       loff_t                  end;
-       struct iov_iter         data;
-       struct xfs_buftarg      *target = XFS_IS_REALTIME_INODE(ip) ?
+       struct xfs_buftarg      *target = XFS_IS_REALTIME_INODE(ip) ?
                                         mp->m_rtdev_targp : mp->m_ddev_targp;
  
         /* DIO must be aligned to device logical sector size */
@@ -559,29 +532,12 @@ xfs_file_dio_aio_write(
                 iolock = XFS_IOLOCK_SHARED;
         }
  
-       xfs_rw_ilock(ip, iolock);
+       xfs_ilock(ip, iolock);
  
         ret = xfs_file_aio_write_checks(iocb, from, &iolock);
         if (ret)
                 goto out;
         count = iov_iter_count(from);
-       end = iocb->ki_pos + count - 1;
-
-       if (mapping->nrpages) {
-               ret = filemap_write_and_wait_range(mapping, iocb->ki_pos, end);
-               if (ret)
-                       goto out;
-
-               /*
-                * Invalidate whole pages. This can return an error if we fail
-                * to invalidate a page, but this should never happen on XFS.
-                * Warn if it does fail.
-                */
-               ret = invalidate_inode_pages2_range(mapping,
-                               iocb->ki_pos >> PAGE_SHIFT, end >> PAGE_SHIFT);
-               WARN_ON_ONCE(ret);
-               ret = 0;
-       }
  
         /*
          * If we are doing unaligned IO, wait for all other IO to drain,
@@ -591,7 +547,7 @@ xfs_file_dio_aio_write(
         if (unaligned_io)
                 inode_dio_wait(inode);
         else if (iolock == XFS_IOLOCK_EXCL) {
-               xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
+               xfs_ilock_demote(ip, XFS_IOLOCK_EXCL);
                 iolock = XFS_IOLOCK_SHARED;
         }
  
@@ -604,24 +560,9 @@ xfs_file_dio_aio_write(
                         goto out;
         }
  
-       data = *from;
-       ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data,
-                       xfs_get_blocks_direct, xfs_end_io_direct_write,
-                       NULL, DIO_ASYNC_EXTEND);
-
-       /* see generic_file_direct_write() for why this is necessary */
-       if (mapping->nrpages) {
-               invalidate_inode_pages2_range(mapping,
-                                             iocb->ki_pos >> PAGE_SHIFT,
-                                             end >> PAGE_SHIFT);
-       }
-
-       if (ret > 0) {
-               iocb->ki_pos += ret;
-               iov_iter_advance(from, ret);
-       }
+       ret = iomap_dio_rw(iocb, from, &xfs_iomap_ops, xfs_dio_write_end_io);
  out:
-       xfs_rw_iunlock(ip, iolock);
+       xfs_iunlock(ip, iolock);
  
         /*
          * No fallback to buffered IO on errors for XFS, direct IO will either
@@ -643,7 +584,7 @@ xfs_file_dax_write(
         size_t                  count;
         loff_t                  pos;
  
-       xfs_rw_ilock(ip, iolock);
+       xfs_ilock(ip, iolock);
         ret = xfs_file_aio_write_checks(iocb, from, &iolock);
         if (ret)
                 goto out;
@@ -652,15 +593,13 @@ xfs_file_dax_write(
         count = iov_iter_count(from);
  
         trace_xfs_file_dax_write(ip, count, pos);
-
-       ret = iomap_dax_rw(iocb, from, &xfs_iomap_ops);
+       ret = dax_iomap_rw(iocb, from, &xfs_iomap_ops);
         if (ret > 0 && iocb->ki_pos > i_size_read(inode)) {
                 i_size_write(inode, iocb->ki_pos);
                 error = xfs_setfilesize(ip, pos, ret);
         }
-
  out:
-       xfs_rw_iunlock(ip, iolock);
+       xfs_iunlock(ip, iolock);
         return error ? error : ret;
  }
  
@@ -677,7 +616,7 @@ xfs_file_buffered_aio_write(
         int                     enospc = 0;
         int                     iolock = XFS_IOLOCK_EXCL;
  
-       xfs_rw_ilock(ip, iolock);
+       xfs_ilock(ip, iolock);
  
         ret = xfs_file_aio_write_checks(iocb, from, &iolock);
         if (ret)
@@ -721,7 +660,7 @@ write_retry:
  
         current->backing_dev_info = NULL;
  out:
-       xfs_rw_iunlock(ip, iolock);
+       xfs_iunlock(ip, iolock);
         return ret;
  }
  
@@ -797,7 +736,7 @@ xfs_file_fallocate(
                 return -EOPNOTSUPP;
  
         xfs_ilock(ip, iolock);
-       error = xfs_break_layouts(inode, &iolock, false);
+       error = xfs_break_layouts(inode, &iolock);
         if (error)
                 goto out_unlock;
  
@@ -921,7 +860,6 @@ xfs_file_clone_range(
                                      len, false);
  }
  
-#define XFS_MAX_DEDUPE_LEN     (16 * 1024 * 1024)
  STATIC ssize_t
  xfs_file_dedupe_range(
         struct file     *src_file,
@@ -932,14 +870,6 @@ xfs_file_dedupe_range(
  {
         int             error;
  
-       /*
-        * Limit the total length we will dedupe for each operation.
-        * This is intended to bound the total time spent in this
-        * ioctl to something sane.
-        */
-       if (len > XFS_MAX_DEDUPE_LEN)
-               len = XFS_MAX_DEDUPE_LEN;
-
         error = xfs_reflink_remap_range(src_file, loff, dst_file, dst_loff,
                                      len, true);
         if (error)
@@ -1456,7 +1386,7 @@ xfs_filemap_page_mkwrite(
         xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
  
         if (IS_DAX(inode)) {
-               ret = iomap_dax_fault(vma, vmf, &xfs_iomap_ops);
+               ret = dax_iomap_fault(vma, vmf, &xfs_iomap_ops);
         } else {
                 ret = iomap_page_mkwrite(vma, vmf, &xfs_iomap_ops);
                 ret = block_page_mkwrite_return(ret);
@@ -1483,15 +1413,9 @@ xfs_filemap_fault(
                 return xfs_filemap_page_mkwrite(vma, vmf);
  
         xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
-       if (IS_DAX(inode)) {
-               /*
-                * we do not want to trigger unwritten extent conversion on read
-                * faults - that is unnecessary overhead and would also require
-                * changes to xfs_get_blocks_direct() to map unwritten extent
-                * ioend for conversion on read-only mappings.
-                */
-               ret = iomap_dax_fault(vma, vmf, &xfs_iomap_ops);
-       } else
+       if (IS_DAX(inode))
+               ret = dax_iomap_fault(vma, vmf, &xfs_iomap_ops);
+       else
                 ret = filemap_fault(vma, vmf);
         xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
  
@@ -1527,7 +1451,7 @@ xfs_filemap_pmd_fault(
         }
  
         xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
-       ret = dax_pmd_fault(vma, addr, pmd, flags, xfs_get_blocks_dax_fault);
+       ret = dax_iomap_pmd_fault(vma, addr, pmd, flags, &xfs_iomap_ops);
         xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
  
         if (flags & FAULT_FLAG_WRITE)