]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blobdiff - fs/xfs/xfs_file.c
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
[mirror_ubuntu-artful-kernel.git] / fs / xfs / xfs_file.c
index 86ecc9b49e1582361551891cf252316e0e80773d..bbb9eb6811b2e07f05652be729a9142fbcc169d0 100644 (file)
 
 static const struct vm_operations_struct xfs_file_vm_ops;
 
-/*
- * Locking primitives for read and write IO paths to ensure we consistently use
- * and order the inode->i_mutex, ip->i_lock and ip->i_iolock.
- */
-static inline void
-xfs_rw_ilock(
-       struct xfs_inode        *ip,
-       int                     type)
-{
-       if (type & XFS_IOLOCK_EXCL)
-               inode_lock(VFS_I(ip));
-       xfs_ilock(ip, type);
-}
-
-static inline void
-xfs_rw_iunlock(
-       struct xfs_inode        *ip,
-       int                     type)
-{
-       xfs_iunlock(ip, type);
-       if (type & XFS_IOLOCK_EXCL)
-               inode_unlock(VFS_I(ip));
-}
-
-static inline void
-xfs_rw_ilock_demote(
-       struct xfs_inode        *ip,
-       int                     type)
-{
-       xfs_ilock_demote(ip, type);
-       if (type & XFS_IOLOCK_EXCL)
-               inode_unlock(VFS_I(ip));
-}
-
 /*
  * Clear the specified ranges to zero through either the pagecache or DAX.
  * Holes and unwritten extents will be left as-is as they already are zeroed.
@@ -183,19 +149,16 @@ xfs_file_fsync(
 
        xfs_iflags_clear(ip, XFS_ITRUNCATED);
 
-       if (mp->m_flags & XFS_MOUNT_BARRIER) {
-               /*
-                * If we have an RT and/or log subvolume we need to make sure
-                * to flush the write cache the device used for file data
-                * first.  This is to ensure newly written file data make
-                * it to disk before logging the new inode size in case of
-                * an extending write.
-                */
-               if (XFS_IS_REALTIME_INODE(ip))
-                       xfs_blkdev_issue_flush(mp->m_rtdev_targp);
-               else if (mp->m_logdev_targp != mp->m_ddev_targp)
-                       xfs_blkdev_issue_flush(mp->m_ddev_targp);
-       }
+       /*
+        * If we have an RT and/or log subvolume we need to make sure to flush
+        * the write cache the device used for file data first.  This is to
+        * ensure newly written file data make it to disk before logging the new
+        * inode size in case of an extending write.
+        */
+       if (XFS_IS_REALTIME_INODE(ip))
+               xfs_blkdev_issue_flush(mp->m_rtdev_targp);
+       else if (mp->m_logdev_targp != mp->m_ddev_targp)
+               xfs_blkdev_issue_flush(mp->m_ddev_targp);
 
        /*
         * All metadata updates are logged, which means that we just have to
@@ -230,10 +193,8 @@ xfs_file_fsync(
         * an already allocated file and thus do not have any metadata to
         * commit.
         */
-       if ((mp->m_flags & XFS_MOUNT_BARRIER) &&
-           mp->m_logdev_targp == mp->m_ddev_targp &&
-           !XFS_IS_REALTIME_INODE(ip) &&
-           !log_flushed)
+       if (!log_flushed && !XFS_IS_REALTIME_INODE(ip) &&
+           mp->m_logdev_targp == mp->m_ddev_targp)
                xfs_blkdev_issue_flush(mp->m_ddev_targp);
 
        return error;
@@ -244,62 +205,21 @@ xfs_file_dio_aio_read(
        struct kiocb            *iocb,
        struct iov_iter         *to)
 {
-       struct address_space    *mapping = iocb->ki_filp->f_mapping;
-       struct inode            *inode = mapping->host;
-       struct xfs_inode        *ip = XFS_I(inode);
-       loff_t                  isize = i_size_read(inode);
+       struct xfs_inode        *ip = XFS_I(file_inode(iocb->ki_filp));
        size_t                  count = iov_iter_count(to);
-       loff_t                  end = iocb->ki_pos + count - 1;
-       struct iov_iter         data;
-       struct xfs_buftarg      *target;
-       ssize_t                 ret = 0;
+       ssize_t                 ret;
 
        trace_xfs_file_direct_read(ip, count, iocb->ki_pos);
 
        if (!count)
                return 0; /* skip atime */
 
-       if (XFS_IS_REALTIME_INODE(ip))
-               target = ip->i_mount->m_rtdev_targp;
-       else
-               target = ip->i_mount->m_ddev_targp;
-
-       /* DIO must be aligned to device logical sector size */
-       if ((iocb->ki_pos | count) & target->bt_logical_sectormask) {
-               if (iocb->ki_pos == isize)
-                       return 0;
-               return -EINVAL;
-       }
-
        file_accessed(iocb->ki_filp);
 
-       xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
-       if (mapping->nrpages) {
-               ret = filemap_write_and_wait_range(mapping, iocb->ki_pos, end);
-               if (ret)
-                       goto out_unlock;
-
-               /*
-                * Invalidate whole pages. This can return an error if we fail
-                * to invalidate a page, but this should never happen on XFS.
-                * Warn if it does fail.
-                */
-               ret = invalidate_inode_pages2_range(mapping,
-                               iocb->ki_pos >> PAGE_SHIFT, end >> PAGE_SHIFT);
-               WARN_ON_ONCE(ret);
-               ret = 0;
-       }
+       xfs_ilock(ip, XFS_IOLOCK_SHARED);
+       ret = iomap_dio_rw(iocb, to, &xfs_iomap_ops, NULL);
+       xfs_iunlock(ip, XFS_IOLOCK_SHARED);
 
-       data = *to;
-       ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data,
-                       xfs_get_blocks_direct, NULL, NULL, 0);
-       if (ret >= 0) {
-               iocb->ki_pos += ret;
-               iov_iter_advance(to, ret);
-       }
-
-out_unlock:
-       xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
        return ret;
 }
 
@@ -317,9 +237,9 @@ xfs_file_dax_read(
        if (!count)
                return 0; /* skip atime */
 
-       xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
-       ret = iomap_dax_rw(iocb, to, &xfs_iomap_ops);
-       xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
+       xfs_ilock(ip, XFS_IOLOCK_SHARED);
+       ret = dax_iomap_rw(iocb, to, &xfs_iomap_ops);
+       xfs_iunlock(ip, XFS_IOLOCK_SHARED);
 
        file_accessed(iocb->ki_filp);
        return ret;
@@ -335,9 +255,9 @@ xfs_file_buffered_aio_read(
 
        trace_xfs_file_buffered_read(ip, iov_iter_count(to), iocb->ki_pos);
 
-       xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
+       xfs_ilock(ip, XFS_IOLOCK_SHARED);
        ret = generic_file_read_iter(iocb, to);
-       xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
+       xfs_iunlock(ip, XFS_IOLOCK_SHARED);
 
        return ret;
 }
@@ -418,15 +338,18 @@ restart:
        if (error <= 0)
                return error;
 
-       error = xfs_break_layouts(inode, iolock, true);
+       error = xfs_break_layouts(inode, iolock);
        if (error)
                return error;
 
-       /* For changing security info in file_remove_privs() we need i_mutex */
+       /*
+        * For changing security info in file_remove_privs() we need i_rwsem
+        * exclusively.
+        */
        if (*iolock == XFS_IOLOCK_SHARED && !IS_NOSEC(inode)) {
-               xfs_rw_iunlock(ip, *iolock);
+               xfs_iunlock(ip, *iolock);
                *iolock = XFS_IOLOCK_EXCL;
-               xfs_rw_ilock(ip, *iolock);
+               xfs_ilock(ip, *iolock);
                goto restart;
        }
        /*
@@ -451,9 +374,9 @@ restart:
                spin_unlock(&ip->i_flags_lock);
                if (!drained_dio) {
                        if (*iolock == XFS_IOLOCK_SHARED) {
-                               xfs_rw_iunlock(ip, *iolock);
+                               xfs_iunlock(ip, *iolock);
                                *iolock = XFS_IOLOCK_EXCL;
-                               xfs_rw_ilock(ip, *iolock);
+                               xfs_ilock(ip, *iolock);
                                iov_iter_reexpand(from, count);
                        }
                        /*
@@ -496,6 +419,58 @@ restart:
        return 0;
 }
 
+static int
+xfs_dio_write_end_io(
+       struct kiocb            *iocb,
+       ssize_t                 size,
+       unsigned                flags)
+{
+       struct inode            *inode = file_inode(iocb->ki_filp);
+       struct xfs_inode        *ip = XFS_I(inode);
+       loff_t                  offset = iocb->ki_pos;
+       bool                    update_size = false;
+       int                     error = 0;
+
+       trace_xfs_end_io_direct_write(ip, offset, size);
+
+       if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+               return -EIO;
+
+       if (size <= 0)
+               return size;
+
+       /*
+        * We need to update the in-core inode size here so that we don't end up
+        * with the on-disk inode size being outside the in-core inode size. We
+        * have no other method of updating EOF for AIO, so always do it here
+        * if necessary.
+        *
+        * We need to lock the test/set EOF update as we can be racing with
+        * other IO completions here to update the EOF. Failing to serialise
+        * here can result in EOF moving backwards and Bad Things Happen when
+        * that occurs.
+        */
+       spin_lock(&ip->i_flags_lock);
+       if (offset + size > i_size_read(inode)) {
+               i_size_write(inode, offset + size);
+               update_size = true;
+       }
+       spin_unlock(&ip->i_flags_lock);
+
+       if (flags & IOMAP_DIO_COW) {
+               error = xfs_reflink_end_cow(ip, offset, size);
+               if (error)
+                       return error;
+       }
+
+       if (flags & IOMAP_DIO_UNWRITTEN)
+               error = xfs_iomap_write_unwritten(ip, offset, size);
+       else if (update_size)
+               error = xfs_setfilesize(ip, offset, size);
+
+       return error;
+}
+
 /*
  * xfs_file_dio_aio_write - handle direct IO writes
  *
@@ -535,9 +510,7 @@ xfs_file_dio_aio_write(
        int                     unaligned_io = 0;
        int                     iolock;
        size_t                  count = iov_iter_count(from);
-       loff_t                  end;
-       struct iov_iter         data;
-       struct xfs_buftarg      *target = XFS_IS_REALTIME_INODE(ip) ?
+       struct xfs_buftarg      *target = XFS_IS_REALTIME_INODE(ip) ?
                                        mp->m_rtdev_targp : mp->m_ddev_targp;
 
        /* DIO must be aligned to device logical sector size */
@@ -559,29 +532,12 @@ xfs_file_dio_aio_write(
                iolock = XFS_IOLOCK_SHARED;
        }
 
-       xfs_rw_ilock(ip, iolock);
+       xfs_ilock(ip, iolock);
 
        ret = xfs_file_aio_write_checks(iocb, from, &iolock);
        if (ret)
                goto out;
        count = iov_iter_count(from);
-       end = iocb->ki_pos + count - 1;
-
-       if (mapping->nrpages) {
-               ret = filemap_write_and_wait_range(mapping, iocb->ki_pos, end);
-               if (ret)
-                       goto out;
-
-               /*
-                * Invalidate whole pages. This can return an error if we fail
-                * to invalidate a page, but this should never happen on XFS.
-                * Warn if it does fail.
-                */
-               ret = invalidate_inode_pages2_range(mapping,
-                               iocb->ki_pos >> PAGE_SHIFT, end >> PAGE_SHIFT);
-               WARN_ON_ONCE(ret);
-               ret = 0;
-       }
 
        /*
         * If we are doing unaligned IO, wait for all other IO to drain,
@@ -591,7 +547,7 @@ xfs_file_dio_aio_write(
        if (unaligned_io)
                inode_dio_wait(inode);
        else if (iolock == XFS_IOLOCK_EXCL) {
-               xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
+               xfs_ilock_demote(ip, XFS_IOLOCK_EXCL);
                iolock = XFS_IOLOCK_SHARED;
        }
 
@@ -604,24 +560,9 @@ xfs_file_dio_aio_write(
                        goto out;
        }
 
-       data = *from;
-       ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data,
-                       xfs_get_blocks_direct, xfs_end_io_direct_write,
-                       NULL, DIO_ASYNC_EXTEND);
-
-       /* see generic_file_direct_write() for why this is necessary */
-       if (mapping->nrpages) {
-               invalidate_inode_pages2_range(mapping,
-                                             iocb->ki_pos >> PAGE_SHIFT,
-                                             end >> PAGE_SHIFT);
-       }
-
-       if (ret > 0) {
-               iocb->ki_pos += ret;
-               iov_iter_advance(from, ret);
-       }
+       ret = iomap_dio_rw(iocb, from, &xfs_iomap_ops, xfs_dio_write_end_io);
 out:
-       xfs_rw_iunlock(ip, iolock);
+       xfs_iunlock(ip, iolock);
 
        /*
         * No fallback to buffered IO on errors for XFS, direct IO will either
@@ -643,7 +584,7 @@ xfs_file_dax_write(
        size_t                  count;
        loff_t                  pos;
 
-       xfs_rw_ilock(ip, iolock);
+       xfs_ilock(ip, iolock);
        ret = xfs_file_aio_write_checks(iocb, from, &iolock);
        if (ret)
                goto out;
@@ -652,15 +593,13 @@ xfs_file_dax_write(
        count = iov_iter_count(from);
 
        trace_xfs_file_dax_write(ip, count, pos);
-
-       ret = iomap_dax_rw(iocb, from, &xfs_iomap_ops);
+       ret = dax_iomap_rw(iocb, from, &xfs_iomap_ops);
        if (ret > 0 && iocb->ki_pos > i_size_read(inode)) {
                i_size_write(inode, iocb->ki_pos);
                error = xfs_setfilesize(ip, pos, ret);
        }
-
 out:
-       xfs_rw_iunlock(ip, iolock);
+       xfs_iunlock(ip, iolock);
        return error ? error : ret;
 }
 
@@ -677,7 +616,7 @@ xfs_file_buffered_aio_write(
        int                     enospc = 0;
        int                     iolock = XFS_IOLOCK_EXCL;
 
-       xfs_rw_ilock(ip, iolock);
+       xfs_ilock(ip, iolock);
 
        ret = xfs_file_aio_write_checks(iocb, from, &iolock);
        if (ret)
@@ -721,7 +660,7 @@ write_retry:
 
        current->backing_dev_info = NULL;
 out:
-       xfs_rw_iunlock(ip, iolock);
+       xfs_iunlock(ip, iolock);
        return ret;
 }
 
@@ -797,7 +736,7 @@ xfs_file_fallocate(
                return -EOPNOTSUPP;
 
        xfs_ilock(ip, iolock);
-       error = xfs_break_layouts(inode, &iolock, false);
+       error = xfs_break_layouts(inode, &iolock);
        if (error)
                goto out_unlock;
 
@@ -921,7 +860,6 @@ xfs_file_clone_range(
                                     len, false);
 }
 
-#define XFS_MAX_DEDUPE_LEN     (16 * 1024 * 1024)
 STATIC ssize_t
 xfs_file_dedupe_range(
        struct file     *src_file,
@@ -932,14 +870,6 @@ xfs_file_dedupe_range(
 {
        int             error;
 
-       /*
-        * Limit the total length we will dedupe for each operation.
-        * This is intended to bound the total time spent in this
-        * ioctl to something sane.
-        */
-       if (len > XFS_MAX_DEDUPE_LEN)
-               len = XFS_MAX_DEDUPE_LEN;
-
        error = xfs_reflink_remap_range(src_file, loff, dst_file, dst_loff,
                                     len, true);
        if (error)
@@ -1456,7 +1386,7 @@ xfs_filemap_page_mkwrite(
        xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
 
        if (IS_DAX(inode)) {
-               ret = iomap_dax_fault(vma, vmf, &xfs_iomap_ops);
+               ret = dax_iomap_fault(vma, vmf, &xfs_iomap_ops);
        } else {
                ret = iomap_page_mkwrite(vma, vmf, &xfs_iomap_ops);
                ret = block_page_mkwrite_return(ret);
@@ -1483,15 +1413,9 @@ xfs_filemap_fault(
                return xfs_filemap_page_mkwrite(vma, vmf);
 
        xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
-       if (IS_DAX(inode)) {
-               /*
-                * we do not want to trigger unwritten extent conversion on read
-                * faults - that is unnecessary overhead and would also require
-                * changes to xfs_get_blocks_direct() to map unwritten extent
-                * ioend for conversion on read-only mappings.
-                */
-               ret = iomap_dax_fault(vma, vmf, &xfs_iomap_ops);
-       } else
+       if (IS_DAX(inode))
+               ret = dax_iomap_fault(vma, vmf, &xfs_iomap_ops);
+       else
                ret = filemap_fault(vma, vmf);
        xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
 
@@ -1527,7 +1451,7 @@ xfs_filemap_pmd_fault(
        }
 
        xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
-       ret = dax_pmd_fault(vma, addr, pmd, flags, xfs_get_blocks_dax_fault);
+       ret = dax_iomap_pmd_fault(vma, addr, pmd, flags, &xfs_iomap_ops);
        xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
 
        if (flags & FAULT_FLAG_WRITE)