md: raid1: kill warning on powerpc_pseries

[mirror_ubuntu-bionic-kernel.git] / drivers / md / raid1.c
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c

index 7453d94eeed700c8ac30da1b8d7857b4788fdbd5..7d6723558fd8fc8eaa7805d7209bcd5961ef2653 100644 (file)
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -37,14 +37,18 @@
  #include <linux/module.h>
  #include <linux/seq_file.h>
  #include <linux/ratelimit.h>
+#include <linux/sched/signal.h>
+
  #include <trace/events/block.h>
+
  #include "md.h"
  #include "raid1.h"
  #include "bitmap.h"
  
  #define UNSUPPORTED_MDDEV_FLAGS                \
         ((1L << MD_HAS_JOURNAL) |       \
-        (1L << MD_JOURNAL_CLEAN))
+        (1L << MD_JOURNAL_CLEAN) |     \
+        (1L << MD_HAS_PPL))
  
  /*
   * Number of guaranteed r1bios in case of extreme VM load:
@@ -77,6 +81,24 @@ static void lower_barrier(struct r1conf *conf, sector_t sector_nr);
  #define raid1_log(md, fmt, args...)                            \
         do { if ((md)->queue) blk_add_trace_msg((md)->queue, "raid1 " fmt, ##args); } while (0)
  
+/*
+ * 'strct resync_pages' stores actual pages used for doing the resync
+ *  IO, and it is per-bio, so make .bi_private points to it.
+ */
+static inline struct resync_pages *get_resync_pages(struct bio *bio)
+{
+       return bio->bi_private;
+}
+
+/*
+ * for resync bio, r1bio pointer can be retrieved from the per-bio
+ * 'struct resync_pages'.
+ */
+static inline struct r1bio *get_resync_r1bio(struct bio *bio)
+{
+       return get_resync_pages(bio)->raid_bio;
+}
+
  static void * r1bio_pool_alloc(gfp_t gfp_flags, void *data)
  {
         struct pool_info *pi = data;
@@ -91,10 +113,8 @@ static void r1bio_pool_free(void *r1_bio, void *data)
         kfree(r1_bio);
  }
  
-#define RESYNC_BLOCK_SIZE (64*1024)
  #define RESYNC_DEPTH 32
  #define RESYNC_SECTORS (RESYNC_BLOCK_SIZE >> 9)
-#define RESYNC_PAGES ((RESYNC_BLOCK_SIZE + PAGE_SIZE-1) / PAGE_SIZE)
  #define RESYNC_WINDOW (RESYNC_BLOCK_SIZE * RESYNC_DEPTH)
  #define RESYNC_WINDOW_SECTORS (RESYNC_WINDOW >> 9)
  #define CLUSTER_RESYNC_WINDOW (16 * RESYNC_WINDOW)
@@ -106,12 +126,18 @@ static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data)
         struct r1bio *r1_bio;
         struct bio *bio;
         int need_pages;
-       int i, j;
+       int j;
+       struct resync_pages *rps;
  
         r1_bio = r1bio_pool_alloc(gfp_flags, pi);
         if (!r1_bio)
                 return NULL;
  
+       rps = kmalloc(sizeof(struct resync_pages) * pi->raid_disks,
+                     gfp_flags);
+       if (!rps)
+               goto out_free_r1bio;
+
         /*
          * Allocate bios : 1 for reading, n-1 for writing
          */
@@ -131,19 +157,22 @@ static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data)
                 need_pages = pi->raid_disks;
         else
                 need_pages = 1;
-       for (j = 0; j < need_pages; j++) {
+       for (j = 0; j < pi->raid_disks; j++) {
+               struct resync_pages *rp = &rps[j];
+
                 bio = r1_bio->bios[j];
-               bio->bi_vcnt = RESYNC_PAGES;
  
-               if (bio_alloc_pages(bio, gfp_flags))
-                       goto out_free_pages;
-       }
-       /* If not user-requests, copy the page pointers to all bios */
-       if (!test_bit(MD_RECOVERY_REQUESTED, &pi->mddev->recovery)) {
-               for (i=0; i<RESYNC_PAGES ; i++)
-                       for (j=1; j<pi->raid_disks; j++)
-                               r1_bio->bios[j]->bi_io_vec[i].bv_page =
-                                       r1_bio->bios[0]->bi_io_vec[i].bv_page;
+               if (j < need_pages) {
+                       if (resync_alloc_pages(rp, gfp_flags))
+                               goto out_free_pages;
+               } else {
+                       memcpy(rp, &rps[0], sizeof(*rp));
+                       resync_get_all_pages(rp);
+               }
+
+               rp->idx = 0;
+               rp->raid_bio = r1_bio;
+               bio->bi_private = rp;
         }
  
         r1_bio->master_bio = NULL;
@@ -152,11 +181,14 @@ static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data)
  
  out_free_pages:
         while (--j >= 0)
-               bio_free_pages(r1_bio->bios[j]);
+               resync_free_pages(&rps[j]);
  
  out_free_bio:
         while (++j < pi->raid_disks)
                 bio_put(r1_bio->bios[j]);
+       kfree(rps);
+
+out_free_r1bio:
         r1bio_pool_free(r1_bio, data);
         return NULL;
  }
@@ -164,18 +196,18 @@ out_free_bio:
  static void r1buf_pool_free(void *__r1_bio, void *data)
  {
         struct pool_info *pi = data;
-       int i,j;
+       int i;
         struct r1bio *r1bio = __r1_bio;
+       struct resync_pages *rp = NULL;
  
-       for (i = 0; i < RESYNC_PAGES; i++)
-               for (j = pi->raid_disks; j-- ;) {
-                       if (j == 0 ||
-                           r1bio->bios[j]->bi_io_vec[i].bv_page !=
-                           r1bio->bios[0]->bi_io_vec[i].bv_page)
-                               safe_put_page(r1bio->bios[j]->bi_io_vec[i].bv_page);
-               }
-       for (i=0 ; i < pi->raid_disks; i++)
+       for (i = pi->raid_disks; i--; ) {
+               rp = get_resync_pages(r1bio->bios[i]);
+               resync_free_pages(rp);
                 bio_put(r1bio->bios[i]);
+       }
+
+       /* resync pages array stored in the 1st bio's .bi_private */
+       kfree(rp);
  
         r1bio_pool_free(r1bio, data);
  }
@@ -242,35 +274,17 @@ static void reschedule_retry(struct r1bio *r1_bio)
  static void call_bio_endio(struct r1bio *r1_bio)
  {
         struct bio *bio = r1_bio->master_bio;
-       int done;
         struct r1conf *conf = r1_bio->mddev->private;
-       sector_t bi_sector = bio->bi_iter.bi_sector;
-
-       if (bio->bi_phys_segments) {
-               unsigned long flags;
-               spin_lock_irqsave(&conf->device_lock, flags);
-               bio->bi_phys_segments--;
-               done = (bio->bi_phys_segments == 0);
-               spin_unlock_irqrestore(&conf->device_lock, flags);
-               /*
-                * make_request() might be waiting for
-                * bi_phys_segments to decrease
-                */
-               wake_up(&conf->wait_barrier);
-       } else
-               done = 1;
  
         if (!test_bit(R1BIO_Uptodate, &r1_bio->state))
                 bio->bi_error = -EIO;
  
-       if (done) {
-               bio_endio(bio);
-               /*
-                * Wake up any possible resync thread that waits for the device
-                * to go idle.
-                */
-               allow_barrier(conf, bi_sector);
-       }
+       bio_endio(bio);
+       /*
+        * Wake up any possible resync thread that waits for the device
+        * to go idle.
+        */
+       allow_barrier(conf, r1_bio->sector);
  }
  
  static void raid_end_bio_io(struct r1bio *r1_bio)
@@ -374,12 +388,9 @@ static void close_write(struct r1bio *r1_bio)
  {
         /* it really is the end of this request */
         if (test_bit(R1BIO_BehindIO, &r1_bio->state)) {
-               /* free extra copy of the data pages */
-               int i = r1_bio->behind_page_count;
-               while (i--)
-                       safe_put_page(r1_bio->behind_bvecs[i].bv_page);
-               kfree(r1_bio->behind_bvecs);
-               r1_bio->behind_bvecs = NULL;
+               bio_free_pages(r1_bio->behind_master_bio);
+               bio_put(r1_bio->behind_master_bio);
+               r1_bio->behind_master_bio = NULL;
         }
         /* clear the bitmap if all writes complete successfully */
         bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector,
@@ -481,6 +492,10 @@ static void raid1_end_write_request(struct bio *bio)
         }
  
         if (behind) {
+               /* we release behind master bio when all write are done */
+               if (r1_bio->behind_master_bio == bio)
+                       to_put = NULL;
+
                 if (test_bit(WriteMostly, &rdev->flags))
                         atomic_dec(&r1_bio->behind_remaining);
  
@@ -973,6 +988,16 @@ static void wait_read_barrier(struct r1conf *conf, sector_t sector_nr)
         spin_unlock_irq(&conf->resync_lock);
  }
  
+static void inc_pending(struct r1conf *conf, sector_t bi_sector)
+{
+       /* The current request requires multiple r1_bio, so
+        * we need to increment the pending count, and the corresponding
+        * window count.
+        */
+       int idx = sector_to_idx(bi_sector);
+       atomic_inc(&conf->nr_pending[idx]);
+}
+
  static void wait_barrier(struct r1conf *conf, sector_t sector_nr)
  {
         int idx = sector_to_idx(sector_nr);
@@ -1024,7 +1049,7 @@ static int get_unqueued_pending(struct r1conf *conf)
  static void freeze_array(struct r1conf *conf, int extra)
  {
         /* Stop sync I/O and normal I/O and wait for everything to
-        * go quite.
+        * go quiet.
          * This is called in two situations:
          * 1) management command handlers (reshape, remove disk, quiesce).
          * 2) one normal I/O request failed.
@@ -1065,39 +1090,50 @@ static void unfreeze_array(struct r1conf *conf)
         wake_up(&conf->wait_barrier);
  }
  
-/* duplicate the data pages for behind I/O
- */
-static void alloc_behind_pages(struct bio *bio, struct r1bio *r1_bio)
+static struct bio *alloc_behind_master_bio(struct r1bio *r1_bio,
+                                          struct bio *bio,
+                                          int offset, int size)
  {
-       int i;
-       struct bio_vec *bvec;
-       struct bio_vec *bvecs = kzalloc(bio->bi_vcnt * sizeof(struct bio_vec),
-                                       GFP_NOIO);
-       if (unlikely(!bvecs))
-               return;
+       unsigned vcnt = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
+       int i = 0;
+       struct bio *behind_bio = NULL;
+
+       behind_bio = bio_alloc_mddev(GFP_NOIO, vcnt, r1_bio->mddev);
+       if (!behind_bio)
+               goto fail;
+
+       /* discard op, we don't support writezero/writesame yet */
+       if (!bio_has_data(bio))
+               goto skip_copy;
+
+       while (i < vcnt && size) {
+               struct page *page;
+               int len = min_t(int, PAGE_SIZE, size);
  
-       bio_for_each_segment_all(bvec, bio, i) {
-               bvecs[i] = *bvec;
-               bvecs[i].bv_page = alloc_page(GFP_NOIO);
-               if (unlikely(!bvecs[i].bv_page))
-                       goto do_sync_io;
-               memcpy(kmap(bvecs[i].bv_page) + bvec->bv_offset,
-                      kmap(bvec->bv_page) + bvec->bv_offset, bvec->bv_len);
-               kunmap(bvecs[i].bv_page);
-               kunmap(bvec->bv_page);
-       }
-       r1_bio->behind_bvecs = bvecs;
-       r1_bio->behind_page_count = bio->bi_vcnt;
+               page = alloc_page(GFP_NOIO);
+               if (unlikely(!page))
+                       goto free_pages;
+
+               bio_add_page(behind_bio, page, len, 0);
+
+               size -= len;
+               i++;
+       }
+
+       bio_copy_data_partial(behind_bio, bio, offset,
+                             behind_bio->bi_iter.bi_size);
+skip_copy:
+       r1_bio->behind_master_bio = behind_bio;;
         set_bit(R1BIO_BehindIO, &r1_bio->state);
-       return;
  
-do_sync_io:
-       for (i = 0; i < bio->bi_vcnt; i++)
-               if (bvecs[i].bv_page)
-                       put_page(bvecs[i].bv_page);
-       kfree(bvecs);
+       return behind_bio;
+
+free_pages:
         pr_debug("%dB behind alloc failed, doing sync I/O\n",
                  bio->bi_iter.bi_size);
+       bio_free_pages(behind_bio);
+fail:
+       return behind_bio;
  }
  
  struct raid1_plug_cb {
@@ -1187,17 +1223,6 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio)
  
         r1_bio = alloc_r1bio(mddev, bio, 0);
  
-       /*
-        * We might need to issue multiple reads to different
-        * devices if there are bad blocks around, so we keep
-        * track of the number of reads in bio->bi_phys_segments.
-        * If this is 0, there is only one r1_bio and no locking
-        * will be needed when requests complete.  If it is
-        * non-zero, then it is the number of not-completed requests.
-        */
-       bio->bi_phys_segments = 0;
-       bio_clear_flag(bio, BIO_SEG_VALID);
-
         /*
          * make_request() can abort the operation when read-ahead is being
          * used and no empty request is available.
@@ -1253,12 +1278,7 @@ read_again:
                 sectors_handled = (r1_bio->sector + max_sectors
                                    - bio->bi_iter.bi_sector);
                 r1_bio->sectors = max_sectors;
-               spin_lock_irq(&conf->device_lock);
-               if (bio->bi_phys_segments == 0)
-                       bio->bi_phys_segments = 2;
-               else
-                       bio->bi_phys_segments++;
-               spin_unlock_irq(&conf->device_lock);
+               bio_inc_remaining(bio);
  
                 /*
                  * Cannot call generic_make_request directly as that will be
@@ -1286,6 +1306,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio)
         int first_clone;
         int sectors_handled;
         int max_sectors;
+       sector_t offset;
  
         /*
          * Register the new request and wait if the reconstruction
@@ -1325,16 +1346,6 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio)
  
         r1_bio = alloc_r1bio(mddev, bio, 0);
  
-       /* We might need to issue multiple writes to different
-        * devices if there are bad blocks around, so we keep
-        * track of the number of writes in bio->bi_phys_segments.
-        * If this is 0, there is only one r1_bio and no locking
-        * will be needed when requests complete.  If it is
-        * non-zero, then it is the number of not-completed requests.
-        */
-       bio->bi_phys_segments = 0;
-       bio_clear_flag(bio, BIO_SEG_VALID);
-
         if (conf->pending_count >= max_queued_requests) {
                 md_wakeup_thread(mddev->thread);
                 raid1_log(mddev, "wait queued");
@@ -1432,31 +1443,22 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio)
                 goto retry_write;
         }
  
-       if (max_sectors < r1_bio->sectors) {
-               /* We are splitting this write into multiple parts, so
-                * we need to prepare for allocating another r1_bio.
-                */
+       if (max_sectors < r1_bio->sectors)
                 r1_bio->sectors = max_sectors;
-               spin_lock_irq(&conf->device_lock);
-               if (bio->bi_phys_segments == 0)
-                       bio->bi_phys_segments = 2;
-               else
-                       bio->bi_phys_segments++;
-               spin_unlock_irq(&conf->device_lock);
-       }
+
         sectors_handled = r1_bio->sector + max_sectors - bio->bi_iter.bi_sector;
  
         atomic_set(&r1_bio->remaining, 1);
         atomic_set(&r1_bio->behind_remaining, 0);
  
         first_clone = 1;
+
+       offset = r1_bio->sector - bio->bi_iter.bi_sector;
         for (i = 0; i < disks; i++) {
                 struct bio *mbio = NULL;
-               sector_t offset;
                 if (!r1_bio->bios[i])
                         continue;
  
-               offset = r1_bio->sector - bio->bi_iter.bi_sector;
  
                 if (first_clone) {
                         /* do behind I/O ?
@@ -1467,11 +1469,9 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio)
                             (atomic_read(&bitmap->behind_writes)
                              < mddev->bitmap_info.max_write_behind) &&
                             !waitqueue_active(&bitmap->behind_wait)) {
-                               mbio = bio_clone_bioset_partial(bio, GFP_NOIO,
-                                                               mddev->bio_set,
-                                                               offset << 9,
-                                                               max_sectors << 9);
-                               alloc_behind_pages(mbio, r1_bio);
+                               mbio = alloc_behind_master_bio(r1_bio, bio,
+                                                              offset << 9,
+                                                              max_sectors << 9);
                         }
  
                         bitmap_startwrite(bitmap, r1_bio->sector,
@@ -1482,26 +1482,17 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio)
                 }
  
                 if (!mbio) {
-                       if (r1_bio->behind_bvecs)
-                               mbio = bio_clone_bioset_partial(bio, GFP_NOIO,
-                                                               mddev->bio_set,
-                                                               offset << 9,
-                                                               max_sectors << 9);
+                       if (r1_bio->behind_master_bio)
+                               mbio = bio_clone_fast(r1_bio->behind_master_bio,
+                                                     GFP_NOIO,
+                                                     mddev->bio_set);
                         else {
                                 mbio = bio_clone_fast(bio, GFP_NOIO, mddev->bio_set);
                                 bio_trim(mbio, offset, max_sectors);
                         }
                 }
  
-               if (r1_bio->behind_bvecs) {
-                       struct bio_vec *bvec;
-                       int j;
-
-                       /*
-                        * We trimmed the bio, so _all is legit
-                        */
-                       bio_for_each_segment_all(bvec, mbio, j)
-                               bvec->bv_page = r1_bio->behind_bvecs[j].bv_page;
+               if (r1_bio->behind_master_bio) {
                         if (test_bit(WriteMostly, &conf->mirrors[i].rdev->flags))
                                 atomic_inc(&r1_bio->behind_remaining);
                 }
@@ -1549,10 +1540,12 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio)
          * as it could result in the bio being freed.
          */
         if (sectors_handled < bio_sectors(bio)) {
+               /* We need another r1_bio, which must be counted */
+               sector_t sect = bio->bi_iter.bi_sector + sectors_handled;
+
+               inc_pending(conf, sect);
+               bio_inc_remaining(bio);
                 r1_bio_write_done(r1_bio);
-               /* We need another r1_bio.  It has already been counted
-                * in bio->bi_phys_segments
-                */
                 r1_bio = alloc_r1bio(mddev, bio, sectors_handled);
                 goto retry_write;
         }
@@ -1584,9 +1577,30 @@ static void raid1_make_request(struct mddev *mddev, struct bio *bio)
                         split = bio;
                 }
  
-               if (bio_data_dir(split) == READ)
+               if (bio_data_dir(split) == READ) {
                         raid1_read_request(mddev, split);
-               else
+
+                       /*
+                        * If a bio is splitted, the first part of bio will
+                        * pass barrier but the bio is queued in
+                        * current->bio_list (see generic_make_request). If
+                        * there is a raise_barrier() called here, the second
+                        * part of bio can't pass barrier. But since the first
+                        * part bio isn't dispatched to underlaying disks yet,
+                        * the barrier is never released, hence raise_barrier
+                        * will alays wait. We have a deadlock.
+                        * Note, this only happens in read path. For write
+                        * path, the first part of bio is dispatched in a
+                        * schedule() call (because of blk plug) or offloaded
+                        * to raid10d.
+                        * Quitting from the function immediately can change
+                        * the bio order queued in bio_list and avoid the deadlock.
+                        */
+                       if (split != bio) {
+                               generic_make_request(bio);
+                               break;
+                       }
+               } else
                         raid1_write_request(mddev, split);
         } while (split != bio);
  }
@@ -1863,7 +1877,7 @@ abort:
  
  static void end_sync_read(struct bio *bio)
  {
-       struct r1bio *r1_bio = bio->bi_private;
+       struct r1bio *r1_bio = get_resync_r1bio(bio);
  
         update_head_pos(r1_bio->read_disk, r1_bio);
  
@@ -1882,7 +1896,7 @@ static void end_sync_read(struct bio *bio)
  static void end_sync_write(struct bio *bio)
  {
         int uptodate = !bio->bi_error;
-       struct r1bio *r1_bio = bio->bi_private;
+       struct r1bio *r1_bio = get_resync_r1bio(bio);
         struct mddev *mddev = r1_bio->mddev;
         struct r1conf *conf = mddev->private;
         sector_t first_bad;
@@ -1961,6 +1975,7 @@ static int fix_sync_read_error(struct r1bio *r1_bio)
         struct mddev *mddev = r1_bio->mddev;
         struct r1conf *conf = mddev->private;
         struct bio *bio = r1_bio->bios[r1_bio->read_disk];
+       struct page **pages = get_resync_pages(bio)->pages;
         sector_t sect = r1_bio->sector;
         int sectors = r1_bio->sectors;
         int idx = 0;
@@ -1994,7 +2009,7 @@ static int fix_sync_read_error(struct r1bio *r1_bio)
                                  */
                                 rdev = conf->mirrors[d].rdev;
                                 if (sync_page_io(rdev, sect, s<<9,
-                                                bio->bi_io_vec[idx].bv_page,
+                                                pages[idx],
                                                  REQ_OP_READ, 0, false)) {
                                         success = 1;
                                         break;
@@ -2049,7 +2064,7 @@ static int fix_sync_read_error(struct r1bio *r1_bio)
                                 continue;
                         rdev = conf->mirrors[d].rdev;
                         if (r1_sync_page_io(rdev, sect, s,
-                                           bio->bi_io_vec[idx].bv_page,
+                                           pages[idx],
                                             WRITE) == 0) {
                                 r1_bio->bios[d]->bi_end_io = NULL;
                                 rdev_dec_pending(rdev, mddev);
@@ -2064,7 +2079,7 @@ static int fix_sync_read_error(struct r1bio *r1_bio)
                                 continue;
                         rdev = conf->mirrors[d].rdev;
                         if (r1_sync_page_io(rdev, sect, s,
-                                           bio->bi_io_vec[idx].bv_page,
+                                           pages[idx],
                                             READ) != 0)
                                 atomic_add(s, &rdev->corrected_errors);
                 }
@@ -2098,7 +2113,9 @@ static void process_checks(struct r1bio *r1_bio)
                 int j;
                 int size;
                 int error;
+               struct bio_vec *bi;
                 struct bio *b = r1_bio->bios[i];
+               struct resync_pages *rp = get_resync_pages(b);
                 if (b->bi_end_io != end_sync_read)
                         continue;
                 /* fixup the bio for reuse, but preserve errno */
@@ -2111,12 +2128,11 @@ static void process_checks(struct r1bio *r1_bio)
                         conf->mirrors[i].rdev->data_offset;
                 b->bi_bdev = conf->mirrors[i].rdev->bdev;
                 b->bi_end_io = end_sync_read;
-               b->bi_private = r1_bio;
+               rp->raid_bio = r1_bio;
+               b->bi_private = rp;
  
                 size = b->bi_iter.bi_size;
-               for (j = 0; j < vcnt ; j++) {
-                       struct bio_vec *bi;
-                       bi = &b->bi_io_vec[j];
+               bio_for_each_segment_all(bi, b, j) {
                         bi->bv_offset = 0;
                         if (size > PAGE_SIZE)
                                 bi->bv_len = PAGE_SIZE;
@@ -2138,20 +2154,24 @@ static void process_checks(struct r1bio *r1_bio)
                 struct bio *pbio = r1_bio->bios[primary];
                 struct bio *sbio = r1_bio->bios[i];
                 int error = sbio->bi_error;
+               struct page **ppages = get_resync_pages(pbio)->pages;
+               struct page **spages = get_resync_pages(sbio)->pages;
+               struct bio_vec *bi;
+               int page_len[RESYNC_PAGES] = { 0 };
  
                 if (sbio->bi_end_io != end_sync_read)
                         continue;
                 /* Now we can 'fixup' the error value */
                 sbio->bi_error = 0;
  
+               bio_for_each_segment_all(bi, sbio, j)
+                       page_len[j] = bi->bv_len;
+
                 if (!error) {
                         for (j = vcnt; j-- ; ) {
-                               struct page *p, *s;
-                               p = pbio->bi_io_vec[j].bv_page;
-                               s = sbio->bi_io_vec[j].bv_page;
-                               if (memcmp(page_address(p),
-                                          page_address(s),
-                                          sbio->bi_io_vec[j].bv_len))
+                               if (memcmp(page_address(ppages[j]),
+                                          page_address(spages[j]),
+                                          page_len[j]))
                                         break;
                         }
                 } else
@@ -2367,18 +2387,11 @@ static int narrow_write_error(struct r1bio *r1_bio, int i)
                 /* Write at 'sector' for 'sectors'*/
  
                 if (test_bit(R1BIO_BehindIO, &r1_bio->state)) {
-                       unsigned vcnt = r1_bio->behind_page_count;
-                       struct bio_vec *vec = r1_bio->behind_bvecs;
-
-                       while (!vec->bv_page) {
-                               vec++;
-                               vcnt--;
-                       }
-
-                       wbio = bio_alloc_mddev(GFP_NOIO, vcnt, mddev);
-                       memcpy(wbio->bi_io_vec, vec, vcnt * sizeof(struct bio_vec));
-
-                       wbio->bi_vcnt = vcnt;
+                       wbio = bio_clone_fast(r1_bio->behind_master_bio,
+                                             GFP_NOIO,
+                                             mddev->bio_set);
+                       /* We really need a _all clone */
+                       wbio->bi_iter = (struct bvec_iter){ 0 };
                 } else {
                         wbio = bio_clone_fast(r1_bio->master_bio, GFP_NOIO,
                                               mddev->bio_set);
@@ -2550,12 +2563,7 @@ read_more:
                         int sectors_handled = (r1_bio->sector + max_sectors
                                                - mbio->bi_iter.bi_sector);
                         r1_bio->sectors = max_sectors;
-                       spin_lock_irq(&conf->device_lock);
-                       if (mbio->bi_phys_segments == 0)
-                               mbio->bi_phys_segments = 2;
-                       else
-                               mbio->bi_phys_segments++;
-                       spin_unlock_irq(&conf->device_lock);
+                       bio_inc_remaining(mbio);
                         trace_block_bio_remap(bdev_get_queue(bio->bi_bdev),
                                               bio, bio_dev, bio_sector);
                         generic_make_request(bio);
@@ -2563,6 +2571,7 @@ read_more:
  
                         r1_bio = alloc_r1bio(mddev, mbio, sectors_handled);
                         set_bit(R1BIO_ReadError, &r1_bio->state);
+                       inc_pending(conf, r1_bio->sector);
  
                         goto read_more;
                 } else {
@@ -2769,7 +2778,6 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
         for (i = 0; i < conf->raid_disks * 2; i++) {
                 struct md_rdev *rdev;
                 bio = r1_bio->bios[i];
-               bio_reset(bio);
  
                 rdev = rcu_dereference(conf->mirrors[i].rdev);
                 if (rdev == NULL ||
@@ -2825,7 +2833,6 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
                         atomic_inc(&rdev->nr_pending);
                         bio->bi_iter.bi_sector = sector_nr + rdev->data_offset;
                         bio->bi_bdev = rdev->bdev;
-                       bio->bi_private = r1_bio;
                         if (test_bit(FailFast, &rdev->flags))
                                 bio->bi_opf |= MD_FAILFAST;
                 }
@@ -2911,31 +2918,25 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
                 }
  
                 for (i = 0 ; i < conf->raid_disks * 2; i++) {
+                       struct resync_pages *rp;
+
                         bio = r1_bio->bios[i];
+                       rp = get_resync_pages(bio);
                         if (bio->bi_end_io) {
-                               page = bio->bi_io_vec[bio->bi_vcnt].bv_page;
-                               if (bio_add_page(bio, page, len, 0) == 0) {
-                                       /* stop here */
-                                       bio->bi_io_vec[bio->bi_vcnt].bv_page = page;
-                                       while (i > 0) {
-                                               i--;
-                                               bio = r1_bio->bios[i];
-                                               if (bio->bi_end_io==NULL)
-                                                       continue;
-                                               /* remove last page from this bio */
-                                               bio->bi_vcnt--;
-                                               bio->bi_iter.bi_size -= len;
-                                               bio_clear_flag(bio, BIO_SEG_VALID);
-                                       }
-                                       goto bio_full;
-                               }
+                               page = resync_fetch_page(rp, rp->idx++);
+
+                               /*
+                                * won't fail because the vec table is big
+                                * enough to hold all these pages
+                                */
+                               bio_add_page(bio, page, len, 0);
                         }
                 }
                 nr_sectors += len>>9;
                 sector_nr += len>>9;
                 sync_blocks -= (len>>9);
-       } while (r1_bio->bios[disk]->bi_vcnt < RESYNC_PAGES);
- bio_full:
+       } while (get_resync_pages(r1_bio->bios[disk]->bi_private)->idx < RESYNC_PAGES);
+
         r1_bio->sectors = nr_sectors;
  
         if (mddev_is_clustered(mddev) &&
@@ -3243,8 +3244,6 @@ static int raid1_resize(struct mddev *mddev, sector_t sectors)
                         return ret;
         }
         md_set_array_sectors(mddev, newsize);
-       set_capacity(mddev->gendisk, mddev->array_sectors);
-       revalidate_disk(mddev->gendisk);
         if (sectors > mddev->dev_sectors &&
             mddev->recovery_cp > mddev->dev_sectors) {
                 mddev->recovery_cp = mddev->dev_sectors;