]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blobdiff - drivers/md/raid5.c
MD: change the parameter of md thread
[mirror_ubuntu-jammy-kernel.git] / drivers / md / raid5.c
index 04348d76bb30fa8831964ea980ec2df912a45f92..81c02d63440b647e9fc6145206f0d53b6efc90d9 100644 (file)
@@ -99,34 +99,40 @@ static inline struct bio *r5_next_bio(struct bio *bio, sector_t sector)
  * We maintain a biased count of active stripes in the bottom 16 bits of
  * bi_phys_segments, and a count of processed stripes in the upper 16 bits
  */
-static inline int raid5_bi_phys_segments(struct bio *bio)
+static inline int raid5_bi_processed_stripes(struct bio *bio)
 {
-       return bio->bi_phys_segments & 0xffff;
+       atomic_t *segments = (atomic_t *)&bio->bi_phys_segments;
+       return (atomic_read(segments) >> 16) & 0xffff;
 }
 
-static inline int raid5_bi_hw_segments(struct bio *bio)
+static inline int raid5_dec_bi_active_stripes(struct bio *bio)
 {
-       return (bio->bi_phys_segments >> 16) & 0xffff;
+       atomic_t *segments = (atomic_t *)&bio->bi_phys_segments;
+       return atomic_sub_return(1, segments) & 0xffff;
 }
 
-static inline int raid5_dec_bi_phys_segments(struct bio *bio)
+static inline void raid5_inc_bi_active_stripes(struct bio *bio)
 {
-       --bio->bi_phys_segments;
-       return raid5_bi_phys_segments(bio);
+       atomic_t *segments = (atomic_t *)&bio->bi_phys_segments;
+       atomic_inc(segments);
 }
 
-static inline int raid5_dec_bi_hw_segments(struct bio *bio)
+static inline void raid5_set_bi_processed_stripes(struct bio *bio,
+       unsigned int cnt)
 {
-       unsigned short val = raid5_bi_hw_segments(bio);
+       atomic_t *segments = (atomic_t *)&bio->bi_phys_segments;
+       int old, new;
 
-       --val;
-       bio->bi_phys_segments = (val << 16) | raid5_bi_phys_segments(bio);
-       return val;
+       do {
+               old = atomic_read(segments);
+               new = (old & 0xffff) | (cnt << 16);
+       } while (atomic_cmpxchg(segments, old, new) != old);
 }
 
-static inline void raid5_set_bi_hw_segments(struct bio *bio, unsigned int cnt)
+static inline void raid5_set_bi_stripes(struct bio *bio, unsigned int cnt)
 {
-       bio->bi_phys_segments = raid5_bi_phys_segments(bio) | (cnt << 16);
+       atomic_t *segments = (atomic_t *)&bio->bi_phys_segments;
+       atomic_set(segments, cnt);
 }
 
 /* Find first data disk in a raid6 stripe */
@@ -190,49 +196,56 @@ static int stripe_operations_active(struct stripe_head *sh)
               test_bit(STRIPE_COMPUTE_RUN, &sh->state);
 }
 
-static void __release_stripe(struct r5conf *conf, struct stripe_head *sh)
+static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh)
 {
-       if (atomic_dec_and_test(&sh->count)) {
-               BUG_ON(!list_empty(&sh->lru));
-               BUG_ON(atomic_read(&conf->active_stripes)==0);
-               if (test_bit(STRIPE_HANDLE, &sh->state)) {
-                       if (test_bit(STRIPE_DELAYED, &sh->state) &&
-                           !test_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
-                               list_add_tail(&sh->lru, &conf->delayed_list);
-                       else if (test_bit(STRIPE_BIT_DELAY, &sh->state) &&
-                                  sh->bm_seq - conf->seq_write > 0)
-                               list_add_tail(&sh->lru, &conf->bitmap_list);
-                       else {
-                               clear_bit(STRIPE_DELAYED, &sh->state);
-                               clear_bit(STRIPE_BIT_DELAY, &sh->state);
-                               list_add_tail(&sh->lru, &conf->handle_list);
-                       }
-                       md_wakeup_thread(conf->mddev->thread);
-               } else {
-                       BUG_ON(stripe_operations_active(sh));
-                       if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
-                               if (atomic_dec_return(&conf->preread_active_stripes)
-                                   < IO_THRESHOLD)
-                                       md_wakeup_thread(conf->mddev->thread);
-                       atomic_dec(&conf->active_stripes);
-                       if (!test_bit(STRIPE_EXPANDING, &sh->state)) {
-                               list_add_tail(&sh->lru, &conf->inactive_list);
-                               wake_up(&conf->wait_for_stripe);
-                               if (conf->retry_read_aligned)
-                                       md_wakeup_thread(conf->mddev->thread);
-                       }
+       BUG_ON(!list_empty(&sh->lru));
+       BUG_ON(atomic_read(&conf->active_stripes)==0);
+       if (test_bit(STRIPE_HANDLE, &sh->state)) {
+               if (test_bit(STRIPE_DELAYED, &sh->state) &&
+                   !test_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
+                       list_add_tail(&sh->lru, &conf->delayed_list);
+               else if (test_bit(STRIPE_BIT_DELAY, &sh->state) &&
+                          sh->bm_seq - conf->seq_write > 0)
+                       list_add_tail(&sh->lru, &conf->bitmap_list);
+               else {
+                       clear_bit(STRIPE_DELAYED, &sh->state);
+                       clear_bit(STRIPE_BIT_DELAY, &sh->state);
+                       list_add_tail(&sh->lru, &conf->handle_list);
+               }
+               md_wakeup_thread(conf->mddev->thread);
+       } else {
+               BUG_ON(stripe_operations_active(sh));
+               if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
+                       if (atomic_dec_return(&conf->preread_active_stripes)
+                           < IO_THRESHOLD)
+                               md_wakeup_thread(conf->mddev->thread);
+               atomic_dec(&conf->active_stripes);
+               if (!test_bit(STRIPE_EXPANDING, &sh->state)) {
+                       list_add_tail(&sh->lru, &conf->inactive_list);
+                       wake_up(&conf->wait_for_stripe);
+                       if (conf->retry_read_aligned)
+                               md_wakeup_thread(conf->mddev->thread);
                }
        }
 }
 
+static void __release_stripe(struct r5conf *conf, struct stripe_head *sh)
+{
+       if (atomic_dec_and_test(&sh->count))
+               do_release_stripe(conf, sh);
+}
+
 static void release_stripe(struct stripe_head *sh)
 {
        struct r5conf *conf = sh->raid_conf;
        unsigned long flags;
 
-       spin_lock_irqsave(&conf->device_lock, flags);
-       __release_stripe(conf, sh);
-       spin_unlock_irqrestore(&conf->device_lock, flags);
+       local_irq_save(flags);
+       if (atomic_dec_and_lock(&sh->count, &conf->device_lock)) {
+               do_release_stripe(conf, sh);
+               spin_unlock(&conf->device_lock);
+       }
+       local_irq_restore(flags);
 }
 
 static inline void remove_hash(struct stripe_head *sh)
@@ -471,7 +484,8 @@ get_active_stripe(struct r5conf *conf, sector_t sector,
                } else {
                        if (atomic_read(&sh->count)) {
                                BUG_ON(!list_empty(&sh->lru)
-                                   && !test_bit(STRIPE_EXPANDING, &sh->state));
+                                   && !test_bit(STRIPE_EXPANDING, &sh->state)
+                                   && !test_bit(STRIPE_ON_UNPLUG_LIST, &sh->state));
                        } else {
                                if (!test_bit(STRIPE_HANDLE, &sh->state))
                                        atomic_inc(&conf->active_stripes);
@@ -640,6 +654,9 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
                        else
                                bi->bi_sector = (sh->sector
                                                 + rdev->data_offset);
+                       if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags))
+                               bi->bi_rw |= REQ_FLUSH;
+
                        bi->bi_flags = 1 << BIO_UPTODATE;
                        bi->bi_idx = 0;
                        bi->bi_io_vec[0].bv_len = STRIPE_SIZE;
@@ -749,14 +766,12 @@ static void ops_complete_biofill(void *stripe_head_ref)
 {
        struct stripe_head *sh = stripe_head_ref;
        struct bio *return_bi = NULL;
-       struct r5conf *conf = sh->raid_conf;
        int i;
 
        pr_debug("%s: stripe %llu\n", __func__,
                (unsigned long long)sh->sector);
 
        /* clear completed biofills */
-       spin_lock_irq(&conf->device_lock);
        for (i = sh->disks; i--; ) {
                struct r5dev *dev = &sh->dev[i];
 
@@ -774,7 +789,7 @@ static void ops_complete_biofill(void *stripe_head_ref)
                        while (rbi && rbi->bi_sector <
                                dev->sector + STRIPE_SECTORS) {
                                rbi2 = r5_next_bio(rbi, dev->sector);
-                               if (!raid5_dec_bi_phys_segments(rbi)) {
+                               if (!raid5_dec_bi_active_stripes(rbi)) {
                                        rbi->bi_next = return_bi;
                                        return_bi = rbi;
                                }
@@ -782,7 +797,6 @@ static void ops_complete_biofill(void *stripe_head_ref)
                        }
                }
        }
-       spin_unlock_irq(&conf->device_lock);
        clear_bit(STRIPE_BIOFILL_RUN, &sh->state);
 
        return_io(return_bi);
@@ -794,7 +808,6 @@ static void ops_complete_biofill(void *stripe_head_ref)
 static void ops_run_biofill(struct stripe_head *sh)
 {
        struct dma_async_tx_descriptor *tx = NULL;
-       struct r5conf *conf = sh->raid_conf;
        struct async_submit_ctl submit;
        int i;
 
@@ -805,10 +818,10 @@ static void ops_run_biofill(struct stripe_head *sh)
                struct r5dev *dev = &sh->dev[i];
                if (test_bit(R5_Wantfill, &dev->flags)) {
                        struct bio *rbi;
-                       spin_lock_irq(&conf->device_lock);
+                       spin_lock_irq(&sh->stripe_lock);
                        dev->read = rbi = dev->toread;
                        dev->toread = NULL;
-                       spin_unlock_irq(&conf->device_lock);
+                       spin_unlock_irq(&sh->stripe_lock);
                        while (rbi && rbi->bi_sector <
                                dev->sector + STRIPE_SECTORS) {
                                tx = async_copy_data(0, rbi, dev->page,
@@ -1144,12 +1157,12 @@ ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
                if (test_and_clear_bit(R5_Wantdrain, &dev->flags)) {
                        struct bio *wbi;
 
-                       spin_lock_irq(&sh->raid_conf->device_lock);
+                       spin_lock_irq(&sh->stripe_lock);
                        chosen = dev->towrite;
                        dev->towrite = NULL;
                        BUG_ON(dev->written);
                        wbi = dev->written = chosen;
-                       spin_unlock_irq(&sh->raid_conf->device_lock);
+                       spin_unlock_irq(&sh->stripe_lock);
 
                        while (wbi && wbi->bi_sector <
                                dev->sector + STRIPE_SECTORS) {
@@ -1454,6 +1467,8 @@ static int grow_one_stripe(struct r5conf *conf)
        init_waitqueue_head(&sh->ops.wait_for_ops);
        #endif
 
+       spin_lock_init(&sh->stripe_lock);
+
        if (grow_buffers(sh)) {
                shrink_buffers(sh);
                kmem_cache_free(conf->slab_cache, sh);
@@ -1739,7 +1754,9 @@ static void raid5_end_read_request(struct bio * bi, int error)
                        atomic_add(STRIPE_SECTORS, &rdev->corrected_errors);
                        clear_bit(R5_ReadError, &sh->dev[i].flags);
                        clear_bit(R5_ReWrite, &sh->dev[i].flags);
-               }
+               } else if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags))
+                       clear_bit(R5_ReadNoMerge, &sh->dev[i].flags);
+
                if (atomic_read(&rdev->read_errors))
                        atomic_set(&rdev->read_errors, 0);
        } else {
@@ -1784,7 +1801,11 @@ static void raid5_end_read_request(struct bio * bi, int error)
                else
                        retry = 1;
                if (retry)
-                       set_bit(R5_ReadError, &sh->dev[i].flags);
+                       if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags)) {
+                               set_bit(R5_ReadError, &sh->dev[i].flags);
+                               clear_bit(R5_ReadNoMerge, &sh->dev[i].flags);
+                       } else
+                               set_bit(R5_ReadNoMerge, &sh->dev[i].flags);
                else {
                        clear_bit(R5_ReadError, &sh->dev[i].flags);
                        clear_bit(R5_ReWrite, &sh->dev[i].flags);
@@ -2340,11 +2361,18 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in
                (unsigned long long)bi->bi_sector,
                (unsigned long long)sh->sector);
 
-
-       spin_lock_irq(&conf->device_lock);
+       /*
+        * If several bio share a stripe. The bio bi_phys_segments acts as a
+        * reference count to avoid race. The reference count should already be
+        * increased before this function is called (for example, in
+        * make_request()), so other bio sharing this stripe will not free the
+        * stripe. If a stripe is owned by one stripe, the stripe lock will
+        * protect it.
+        */
+       spin_lock_irq(&sh->stripe_lock);
        if (forwrite) {
                bip = &sh->dev[dd_idx].towrite;
-               if (*bip == NULL && sh->dev[dd_idx].written == NULL)
+               if (*bip == NULL)
                        firstwrite = 1;
        } else
                bip = &sh->dev[dd_idx].toread;
@@ -2360,7 +2388,7 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in
        if (*bip)
                bi->bi_next = *bip;
        *bip = bi;
-       bi->bi_phys_segments++;
+       raid5_inc_bi_active_stripes(bi);
 
        if (forwrite) {
                /* check if page is covered */
@@ -2375,7 +2403,7 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in
                if (sector >= sh->dev[dd_idx].sector + STRIPE_SECTORS)
                        set_bit(R5_OVERWRITE, &sh->dev[dd_idx].flags);
        }
-       spin_unlock_irq(&conf->device_lock);
+       spin_unlock_irq(&sh->stripe_lock);
 
        pr_debug("added bi b#%llu to stripe s#%llu, disk %d.\n",
                (unsigned long long)(*bip)->bi_sector,
@@ -2391,7 +2419,7 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in
 
  overlap:
        set_bit(R5_Overlap, &sh->dev[dd_idx].flags);
-       spin_unlock_irq(&conf->device_lock);
+       spin_unlock_irq(&sh->stripe_lock);
        return 0;
 }
 
@@ -2441,10 +2469,11 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
                                rdev_dec_pending(rdev, conf->mddev);
                        }
                }
-               spin_lock_irq(&conf->device_lock);
+               spin_lock_irq(&sh->stripe_lock);
                /* fail all writes first */
                bi = sh->dev[i].towrite;
                sh->dev[i].towrite = NULL;
+               spin_unlock_irq(&sh->stripe_lock);
                if (bi) {
                        s->to_write--;
                        bitmap_end = 1;
@@ -2457,13 +2486,17 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
                        sh->dev[i].sector + STRIPE_SECTORS) {
                        struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector);
                        clear_bit(BIO_UPTODATE, &bi->bi_flags);
-                       if (!raid5_dec_bi_phys_segments(bi)) {
+                       if (!raid5_dec_bi_active_stripes(bi)) {
                                md_write_end(conf->mddev);
                                bi->bi_next = *return_bi;
                                *return_bi = bi;
                        }
                        bi = nextbi;
                }
+               if (bitmap_end)
+                       bitmap_endwrite(conf->mddev->bitmap, sh->sector,
+                               STRIPE_SECTORS, 0, 0);
+               bitmap_end = 0;
                /* and fail all 'written' */
                bi = sh->dev[i].written;
                sh->dev[i].written = NULL;
@@ -2472,7 +2505,7 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
                       sh->dev[i].sector + STRIPE_SECTORS) {
                        struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector);
                        clear_bit(BIO_UPTODATE, &bi->bi_flags);
-                       if (!raid5_dec_bi_phys_segments(bi)) {
+                       if (!raid5_dec_bi_active_stripes(bi)) {
                                md_write_end(conf->mddev);
                                bi->bi_next = *return_bi;
                                *return_bi = bi;
@@ -2496,14 +2529,13 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
                                struct bio *nextbi =
                                        r5_next_bio(bi, sh->dev[i].sector);
                                clear_bit(BIO_UPTODATE, &bi->bi_flags);
-                               if (!raid5_dec_bi_phys_segments(bi)) {
+                               if (!raid5_dec_bi_active_stripes(bi)) {
                                        bi->bi_next = *return_bi;
                                        *return_bi = bi;
                                }
                                bi = nextbi;
                        }
                }
-               spin_unlock_irq(&conf->device_lock);
                if (bitmap_end)
                        bitmap_endwrite(conf->mddev->bitmap, sh->sector,
                                        STRIPE_SECTORS, 0, 0);
@@ -2707,30 +2739,23 @@ static void handle_stripe_clean_event(struct r5conf *conf,
                                test_bit(R5_UPTODATE, &dev->flags)) {
                                /* We can return any write requests */
                                struct bio *wbi, *wbi2;
-                               int bitmap_end = 0;
                                pr_debug("Return write for disc %d\n", i);
-                               spin_lock_irq(&conf->device_lock);
                                wbi = dev->written;
                                dev->written = NULL;
                                while (wbi && wbi->bi_sector <
                                        dev->sector + STRIPE_SECTORS) {
                                        wbi2 = r5_next_bio(wbi, dev->sector);
-                                       if (!raid5_dec_bi_phys_segments(wbi)) {
+                                       if (!raid5_dec_bi_active_stripes(wbi)) {
                                                md_write_end(conf->mddev);
                                                wbi->bi_next = *return_bi;
                                                *return_bi = wbi;
                                        }
                                        wbi = wbi2;
                                }
-                               if (dev->towrite == NULL)
-                                       bitmap_end = 1;
-                               spin_unlock_irq(&conf->device_lock);
-                               if (bitmap_end)
-                                       bitmap_endwrite(conf->mddev->bitmap,
-                                                       sh->sector,
-                                                       STRIPE_SECTORS,
+                               bitmap_endwrite(conf->mddev->bitmap, sh->sector,
+                                               STRIPE_SECTORS,
                                         !test_bit(STRIPE_DEGRADED, &sh->state),
-                                                       0);
+                                               0);
                        }
                }
 
@@ -3182,7 +3207,6 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
 
        /* Now to look around and see what can be done */
        rcu_read_lock();
-       spin_lock_irq(&conf->device_lock);
        for (i=disks; i--; ) {
                struct md_rdev *rdev;
                sector_t first_bad;
@@ -3328,7 +3352,6 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
                                do_recovery = 1;
                }
        }
-       spin_unlock_irq(&conf->device_lock);
        if (test_bit(STRIPE_SYNCING, &sh->state)) {
                /* If there is a failed device being replaced,
                 *     we must be recovering.
@@ -3791,7 +3814,7 @@ static struct bio *remove_bio_from_retry(struct r5conf *conf)
                 * this sets the active strip count to 1 and the processed
                 * strip count to zero (upper 8 bits)
                 */
-               bi->bi_phys_segments = 1; /* biased count of active stripes */
+               raid5_set_bi_stripes(bi, 1); /* biased count of active stripes */
        }
 
        return bi;
@@ -3988,6 +4011,62 @@ static struct stripe_head *__get_priority_stripe(struct r5conf *conf)
        return sh;
 }
 
+struct raid5_plug_cb {
+       struct blk_plug_cb      cb;
+       struct list_head        list;
+};
+
+static void raid5_unplug(struct blk_plug_cb *blk_cb, bool from_schedule)
+{
+       struct raid5_plug_cb *cb = container_of(
+               blk_cb, struct raid5_plug_cb, cb);
+       struct stripe_head *sh;
+       struct mddev *mddev = cb->cb.data;
+       struct r5conf *conf = mddev->private;
+
+       if (cb->list.next && !list_empty(&cb->list)) {
+               spin_lock_irq(&conf->device_lock);
+               while (!list_empty(&cb->list)) {
+                       sh = list_first_entry(&cb->list, struct stripe_head, lru);
+                       list_del_init(&sh->lru);
+                       /*
+                        * avoid race release_stripe_plug() sees
+                        * STRIPE_ON_UNPLUG_LIST clear but the stripe
+                        * is still in our list
+                        */
+                       smp_mb__before_clear_bit();
+                       clear_bit(STRIPE_ON_UNPLUG_LIST, &sh->state);
+                       __release_stripe(conf, sh);
+               }
+               spin_unlock_irq(&conf->device_lock);
+       }
+       kfree(cb);
+}
+
+static void release_stripe_plug(struct mddev *mddev,
+                               struct stripe_head *sh)
+{
+       struct blk_plug_cb *blk_cb = blk_check_plugged(
+               raid5_unplug, mddev,
+               sizeof(struct raid5_plug_cb));
+       struct raid5_plug_cb *cb;
+
+       if (!blk_cb) {
+               release_stripe(sh);
+               return;
+       }
+
+       cb = container_of(blk_cb, struct raid5_plug_cb, cb);
+
+       if (cb->list.next == NULL)
+               INIT_LIST_HEAD(&cb->list);
+
+       if (!test_and_set_bit(STRIPE_ON_UNPLUG_LIST, &sh->state))
+               list_add_tail(&sh->lru, &cb->list);
+       else
+               release_stripe(sh);
+}
+
 static void make_request(struct mddev *mddev, struct bio * bi)
 {
        struct r5conf *conf = mddev->private;
@@ -4113,11 +4192,10 @@ static void make_request(struct mddev *mddev, struct bio * bi)
                        finish_wait(&conf->wait_for_overlap, &w);
                        set_bit(STRIPE_HANDLE, &sh->state);
                        clear_bit(STRIPE_DELAYED, &sh->state);
-                       if ((bi->bi_rw & REQ_SYNC) &&
+                       if ((bi->bi_rw & REQ_NOIDLE) &&
                            !test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
                                atomic_inc(&conf->preread_active_stripes);
-                       mddev_check_plugged(mddev);
-                       release_stripe(sh);
+                       release_stripe_plug(mddev, sh);
                } else {
                        /* cannot get stripe for read-ahead, just give-up */
                        clear_bit(BIO_UPTODATE, &bi->bi_flags);
@@ -4126,9 +4204,7 @@ static void make_request(struct mddev *mddev, struct bio * bi)
                }
        }
 
-       spin_lock_irq(&conf->device_lock);
-       remaining = raid5_dec_bi_phys_segments(bi);
-       spin_unlock_irq(&conf->device_lock);
+       remaining = raid5_dec_bi_active_stripes(bi);
        if (remaining == 0) {
 
                if ( rw == WRITE )
@@ -4484,7 +4560,7 @@ static int  retry_aligned_read(struct r5conf *conf, struct bio *raid_bio)
                     sector += STRIPE_SECTORS,
                     scnt++) {
 
-               if (scnt < raid5_bi_hw_segments(raid_bio))
+               if (scnt < raid5_bi_processed_stripes(raid_bio))
                        /* already done this stripe */
                        continue;
 
@@ -4492,25 +4568,24 @@ static int  retry_aligned_read(struct r5conf *conf, struct bio *raid_bio)
 
                if (!sh) {
                        /* failed to get a stripe - must wait */
-                       raid5_set_bi_hw_segments(raid_bio, scnt);
+                       raid5_set_bi_processed_stripes(raid_bio, scnt);
                        conf->retry_read_aligned = raid_bio;
                        return handled;
                }
 
                if (!add_stripe_bio(sh, raid_bio, dd_idx, 0)) {
                        release_stripe(sh);
-                       raid5_set_bi_hw_segments(raid_bio, scnt);
+                       raid5_set_bi_processed_stripes(raid_bio, scnt);
                        conf->retry_read_aligned = raid_bio;
                        return handled;
                }
 
+               set_bit(R5_ReadNoMerge, &sh->dev[dd_idx].flags);
                handle_stripe(sh);
                release_stripe(sh);
                handled++;
        }
-       spin_lock_irq(&conf->device_lock);
-       remaining = raid5_dec_bi_phys_segments(raid_bio);
-       spin_unlock_irq(&conf->device_lock);
+       remaining = raid5_dec_bi_active_stripes(raid_bio);
        if (remaining == 0)
                bio_endio(raid_bio, 0);
        if (atomic_dec_and_test(&conf->active_aligned_reads))
@@ -4518,6 +4593,30 @@ static int  retry_aligned_read(struct r5conf *conf, struct bio *raid_bio)
        return handled;
 }
 
+#define MAX_STRIPE_BATCH 8
+static int handle_active_stripes(struct r5conf *conf)
+{
+       struct stripe_head *batch[MAX_STRIPE_BATCH], *sh;
+       int i, batch_size = 0;
+
+       while (batch_size < MAX_STRIPE_BATCH &&
+                       (sh = __get_priority_stripe(conf)) != NULL)
+               batch[batch_size++] = sh;
+
+       if (batch_size == 0)
+               return batch_size;
+       spin_unlock_irq(&conf->device_lock);
+
+       for (i = 0; i < batch_size; i++)
+               handle_stripe(batch[i]);
+
+       cond_resched();
+
+       spin_lock_irq(&conf->device_lock);
+       for (i = 0; i < batch_size; i++)
+               __release_stripe(conf, batch[i]);
+       return batch_size;
+}
 
 /*
  * This is our raid5 kernel thread.
@@ -4526,9 +4625,9 @@ static int  retry_aligned_read(struct r5conf *conf, struct bio *raid_bio)
  * During the scan, completed stripes are saved for us by the interrupt
  * handler, so that they will not have to wait for our next wakeup.
  */
-static void raid5d(struct mddev *mddev)
+static void raid5d(struct md_thread *thread)
 {
-       struct stripe_head *sh;
+       struct mddev *mddev = thread->mddev;
        struct r5conf *conf = mddev->private;
        int handled;
        struct blk_plug plug;
@@ -4542,8 +4641,9 @@ static void raid5d(struct mddev *mddev)
        spin_lock_irq(&conf->device_lock);
        while (1) {
                struct bio *bio;
+               int batch_size;
 
-               if (atomic_read(&mddev->plug_cnt) == 0 &&
+               if (
                    !list_empty(&conf->bitmap_list)) {
                        /* Now is a good time to flush some bitmap updates */
                        conf->seq_flush++;
@@ -4553,8 +4653,7 @@ static void raid5d(struct mddev *mddev)
                        conf->seq_write = conf->seq_flush;
                        activate_bit_delay(conf);
                }
-               if (atomic_read(&mddev->plug_cnt) == 0)
-                       raid5_activate_delayed(conf);
+               raid5_activate_delayed(conf);
 
                while ((bio = remove_bio_from_retry(conf))) {
                        int ok;
@@ -4566,21 +4665,16 @@ static void raid5d(struct mddev *mddev)
                        handled++;
                }
 
-               sh = __get_priority_stripe(conf);
-
-               if (!sh)
+               batch_size = handle_active_stripes(conf);
+               if (!batch_size)
                        break;
-               spin_unlock_irq(&conf->device_lock);
-               
-               handled++;
-               handle_stripe(sh);
-               release_stripe(sh);
-               cond_resched();
+               handled += batch_size;
 
-               if (mddev->flags & ~(1<<MD_CHANGE_PENDING))
+               if (mddev->flags & ~(1<<MD_CHANGE_PENDING)) {
+                       spin_unlock_irq(&conf->device_lock);
                        md_check_recovery(mddev);
-
-               spin_lock_irq(&conf->device_lock);
+                       spin_lock_irq(&conf->device_lock);
+               }
        }
        pr_debug("%d stripes handled\n", handled);