block: add a bi_error field to struct bio

[mirror_ubuntu-zesty-kernel.git] / fs / btrfs / inode.c
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c

index 855935f6671ae59b1b025c3916d553bc689191ef..6b8becfe2057c6b071aa5ec061558770a361797c 100644 (file)
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1845,8 +1845,10 @@ static int __btrfs_submit_bio_done(struct inode *inode, int rw, struct bio *bio,
         int ret;
  
         ret = btrfs_map_bio(root, rw, bio, mirror_num, 1);
-       if (ret)
-               bio_endio(bio, ret);
+       if (ret) {
+               bio->bi_error = ret;
+               bio_endio(bio);
+       }
         return ret;
  }
  
@@ -1906,8 +1908,10 @@ mapit:
         ret = btrfs_map_bio(root, rw, bio, mirror_num, 0);
  
  out:
-       if (ret < 0)
-               bio_endio(bio, ret);
+       if (ret < 0) {
+               bio->bi_error = ret;
+               bio_endio(bio);
+       }
         return ret;
  }
  
@@ -4989,8 +4993,9 @@ static void evict_inode_truncate_pages(struct inode *inode)
         /*
          * Keep looping until we have no more ranges in the io tree.
          * We can have ongoing bios started by readpages (called from readahead)
-        * that didn't get their end io callbacks called yet or they are still
-        * in progress ((extent_io.c:end_bio_extent_readpage()). This means some
+        * that have their endio callback (extent_io.c:end_bio_extent_readpage)
+        * still in progress (unlocked the pages in the bio but did not yet
+        * unlocked the ranges in the io tree). Therefore this means some
          * ranges can still be locked and eviction started because before
          * submitting those bios, which are executed by a separate task (work
          * queue kthread), inode references (inode->i_count) were not taken
@@ -7546,6 +7551,7 @@ unlock:
  
                 current->journal_info = outstanding_extents;
                 btrfs_free_reserved_data_space(inode, len);
+               set_bit(BTRFS_INODE_DIO_READY, &BTRFS_I(inode)->runtime_flags);
         }
  
         /*
@@ -7687,13 +7693,13 @@ struct btrfs_retry_complete {
         int uptodate;
  };
  
-static void btrfs_retry_endio_nocsum(struct bio *bio, int err)
+static void btrfs_retry_endio_nocsum(struct bio *bio)
  {
         struct btrfs_retry_complete *done = bio->bi_private;
         struct bio_vec *bvec;
         int i;
  
-       if (err)
+       if (bio->bi_error)
                 goto end;
  
         done->uptodate = 1;
@@ -7742,7 +7748,7 @@ try_again:
         return 0;
  }
  
-static void btrfs_retry_endio(struct bio *bio, int err)
+static void btrfs_retry_endio(struct bio *bio)
  {
         struct btrfs_retry_complete *done = bio->bi_private;
         struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
@@ -7751,7 +7757,7 @@ static void btrfs_retry_endio(struct bio *bio, int err)
         int ret;
         int i;
  
-       if (err)
+       if (bio->bi_error)
                 goto end;
  
         uptodate = 1;
@@ -7834,12 +7840,13 @@ static int btrfs_subio_endio_read(struct inode *inode,
         }
  }
  
-static void btrfs_endio_direct_read(struct bio *bio, int err)
+static void btrfs_endio_direct_read(struct bio *bio)
  {
         struct btrfs_dio_private *dip = bio->bi_private;
         struct inode *inode = dip->inode;
         struct bio *dio_bio;
         struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
+       int err = bio->bi_error;
  
         if (dip->flags & BTRFS_DIO_ORIG_BIO_SUBMITTED)
                 err = btrfs_subio_endio_read(inode, io_bio, err);
@@ -7850,17 +7857,14 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
  
         kfree(dip);
  
-       /* If we had a csum failure make sure to clear the uptodate flag */
-       if (err)
-               clear_bit(BIO_UPTODATE, &dio_bio->bi_flags);
-       dio_end_io(dio_bio, err);
+       dio_end_io(dio_bio, bio->bi_error);
  
         if (io_bio->end_io)
                 io_bio->end_io(io_bio, err);
         bio_put(bio);
  }
  
-static void btrfs_endio_direct_write(struct bio *bio, int err)
+static void btrfs_endio_direct_write(struct bio *bio)
  {
         struct btrfs_dio_private *dip = bio->bi_private;
         struct inode *inode = dip->inode;
@@ -7871,12 +7875,11 @@ static void btrfs_endio_direct_write(struct bio *bio, int err)
         struct bio *dio_bio;
         int ret;
  
-       if (err)
-               goto out_done;
  again:
         ret = btrfs_dec_test_first_ordered_pending(inode, &ordered,
                                                    &ordered_offset,
-                                                  ordered_bytes, !err);
+                                                  ordered_bytes,
+                                                  !bio->bi_error);
         if (!ret)
                 goto out_test;
  
@@ -7895,15 +7898,11 @@ out_test:
                 ordered = NULL;
                 goto again;
         }
-out_done:
         dio_bio = dip->dio_bio;
  
         kfree(dip);
  
-       /* If we had an error make sure to clear the uptodate flag */
-       if (err)
-               clear_bit(BIO_UPTODATE, &dio_bio->bi_flags);
-       dio_end_io(dio_bio, err);
+       dio_end_io(dio_bio, bio->bi_error);
         bio_put(bio);
  }
  
@@ -7918,9 +7917,10 @@ static int __btrfs_submit_bio_start_direct_io(struct inode *inode, int rw,
         return 0;
  }
  
-static void btrfs_end_dio_bio(struct bio *bio, int err)
+static void btrfs_end_dio_bio(struct bio *bio)
  {
         struct btrfs_dio_private *dip = bio->bi_private;
+       int err = bio->bi_error;
  
         if (err)
                 btrfs_warn(BTRFS_I(dip->inode)->root->fs_info,
@@ -7949,8 +7949,8 @@ static void btrfs_end_dio_bio(struct bio *bio, int err)
         if (dip->errors) {
                 bio_io_error(dip->orig_bio);
         } else {
-               set_bit(BIO_UPTODATE, &dip->dio_bio->bi_flags);
-               bio_endio(dip->orig_bio, 0);
+               dip->dio_bio->bi_error = 0;
+               bio_endio(dip->orig_bio);
         }
  out:
         bio_put(bio);
@@ -8163,9 +8163,8 @@ out_err:
  static void btrfs_submit_direct(int rw, struct bio *dio_bio,
                                 struct inode *inode, loff_t file_offset)
  {
-       struct btrfs_root *root = BTRFS_I(inode)->root;
-       struct btrfs_dio_private *dip;
-       struct bio *io_bio;
+       struct btrfs_dio_private *dip = NULL;
+       struct bio *io_bio = NULL;
         struct btrfs_io_bio *btrfs_bio;
         int skip_sum;
         int write = rw & REQ_WRITE;
@@ -8182,7 +8181,7 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio,
         dip = kzalloc(sizeof(*dip), GFP_NOFS);
         if (!dip) {
                 ret = -ENOMEM;
-               goto free_io_bio;
+               goto free_ordered;
         }
  
         dip->private = dio_bio->bi_private;
@@ -8210,25 +8209,56 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio,
  
         if (btrfs_bio->end_io)
                 btrfs_bio->end_io(btrfs_bio, ret);
-free_io_bio:
-       bio_put(io_bio);
  
  free_ordered:
         /*
-        * If this is a write, we need to clean up the reserved space and kill
-        * the ordered extent.
+        * If we arrived here it means either we failed to submit the dip
+        * or we either failed to clone the dio_bio or failed to allocate the
+        * dip. If we cloned the dio_bio and allocated the dip, we can just
+        * call bio_endio against our io_bio so that we get proper resource
+        * cleanup if we fail to submit the dip, otherwise, we must do the
+        * same as btrfs_endio_direct_[write|read] because we can't call these
+        * callbacks - they require an allocated dip and a clone of dio_bio.
          */
-       if (write) {
-               struct btrfs_ordered_extent *ordered;
-               ordered = btrfs_lookup_ordered_extent(inode, file_offset);
-               if (!test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags) &&
-                   !test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags))
-                       btrfs_free_reserved_extent(root, ordered->start,
-                                                  ordered->disk_len, 1);
-               btrfs_put_ordered_extent(ordered);
-               btrfs_put_ordered_extent(ordered);
+       if (io_bio && dip) {
+               io_bio->bi_error = -EIO;
+               bio_endio(io_bio);
+               /*
+                * The end io callbacks free our dip, do the final put on io_bio
+                * and all the cleanup and final put for dio_bio (through
+                * dio_end_io()).
+                */
+               dip = NULL;
+               io_bio = NULL;
+       } else {
+               if (write) {
+                       struct btrfs_ordered_extent *ordered;
+
+                       ordered = btrfs_lookup_ordered_extent(inode,
+                                                             file_offset);
+                       set_bit(BTRFS_ORDERED_IOERR, &ordered->flags);
+                       /*
+                        * Decrements our ref on the ordered extent and removes
+                        * the ordered extent from the inode's ordered tree,
+                        * doing all the proper resource cleanup such as for the
+                        * reserved space and waking up any waiters for this
+                        * ordered extent (through btrfs_remove_ordered_extent).
+                        */
+                       btrfs_finish_ordered_io(ordered);
+               } else {
+                       unlock_extent(&BTRFS_I(inode)->io_tree, file_offset,
+                             file_offset + dio_bio->bi_iter.bi_size - 1);
+               }
+               dio_bio->bi_error = -EIO;
+               /*
+                * Releases and cleans up our dio_bio, no need to bio_put()
+                * nor bio_endio()/bio_io_error() against dio_bio.
+                */
+               dio_end_io(dio_bio, ret);
         }
-       bio_endio(dio_bio, ret);
+       if (io_bio)
+               bio_put(io_bio);
+       kfree(dip);
  }
  
  static ssize_t check_direct_IO(struct btrfs_root *root, struct kiocb *iocb,
@@ -8330,9 +8360,18 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
                                    btrfs_submit_direct, flags);
         if (iov_iter_rw(iter) == WRITE) {
                 current->journal_info = NULL;
-               if (ret < 0 && ret != -EIOCBQUEUED)
-                       btrfs_delalloc_release_space(inode, count);
-               else if (ret >= 0 && (size_t)ret < count)
+               if (ret < 0 && ret != -EIOCBQUEUED) {
+                       /*
+                        * If the error comes from submitting stage,
+                        * btrfs_get_blocsk_direct() has free'd data space,
+                        * and metadata space will be handled by
+                        * finish_ordered_fn, don't do that again to make
+                        * sure bytes_may_use is correct.
+                        */
+                       if (!test_and_clear_bit(BTRFS_INODE_DIO_READY,
+                                    &BTRFS_I(inode)->runtime_flags))
+                               btrfs_delalloc_release_space(inode, count);
+               } else if (ret >= 0 && (size_t)ret < count)
                         btrfs_delalloc_release_space(inode,
                                                      count - (size_t)ret);
         }