]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/commitdiff
Merge tag 'md/4.8-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md
authorLinus Torvalds <torvalds@linux-foundation.org>
Tue, 30 Aug 2016 18:24:04 +0000 (11:24 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 30 Aug 2016 18:24:04 +0000 (11:24 -0700)
Pull MD fixes from Shaohua Li:
 "This includes several bug fixes:

   - Alexey Obitotskiy fixed a hang for faulty raid5 array with external
     management

   - Song Liu fixed two raid5 journal related bugs

   - Tomasz Majchrzak fixed a bad block recording issue and an
     accounting issue for raid10

   - ZhengYuan Liu fixed an accounting issue for raid5

   - I fixed a potential race condition and memory leak with DIF/DIX
     enabled

   - other trival fixes"

* tag 'md/4.8-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md:
  raid5: avoid unnecessary bio data set
  raid5: fix memory leak of bio integrity data
  raid10: record correct address of bad block
  md-cluster: fix error return code in join()
  r5cache: set MD_JOURNAL_CLEAN correctly
  md: don't print the same repeated messages about delayed sync operation
  md: remove obsolete ret in md_start_sync
  md: do not count journal as spare in GET_ARRAY_INFO
  md: Prevent IO hold during accessing to faulty raid5 array
  MD: hold mddev lock to change bitmap location
  raid5: fix incorrectly counter of conf->empty_inactive_list_nr
  raid10: increment write counter after bio is split

1  2 
drivers/md/md.c
drivers/md/raid10.c
drivers/md/raid5.c

diff --combined drivers/md/md.c
index d646f6e444f0d1921cf6d1df8e276f6acec76736,4f6cf3b849e3d6b8bf611046485cc7c9b34190f5..67642bacd597ae7c97517e7dab47e05115c3c87a
@@@ -285,7 -285,7 +285,7 @@@ static blk_qc_t md_make_request(struct 
         */
        sectors = bio_sectors(bio);
        /* bio could be mergeable after passing to underlayer */
 -      bio->bi_rw &= ~REQ_NOMERGE;
 +      bio->bi_opf &= ~REQ_NOMERGE;
        mddev->pers->make_request(mddev, bio);
  
        cpu = part_stat_lock();
@@@ -414,7 -414,7 +414,7 @@@ static void md_submit_flush_data(struc
                /* an empty barrier - all done */
                bio_endio(bio);
        else {
 -              bio->bi_rw &= ~REQ_PREFLUSH;
 +              bio->bi_opf &= ~REQ_PREFLUSH;
                mddev->pers->make_request(mddev, bio);
        }
  
@@@ -1604,11 -1604,8 +1604,8 @@@ static int super_1_validate(struct mdde
                        mddev->new_chunk_sectors = mddev->chunk_sectors;
                }
  
-               if (le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL) {
+               if (le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL)
                        set_bit(MD_HAS_JOURNAL, &mddev->flags);
-                       if (mddev->recovery_cp == MaxSector)
-                               set_bit(MD_JOURNAL_CLEAN, &mddev->flags);
-               }
        } else if (mddev->pers == NULL) {
                /* Insist of good event counter while assembling, except for
                 * spares (which don't need an event count) */
@@@ -5851,6 -5848,9 +5848,9 @@@ static int get_array_info(struct mddev 
                        working++;
                        if (test_bit(In_sync, &rdev->flags))
                                insync++;
+                       else if (test_bit(Journal, &rdev->flags))
+                               /* TODO: add journal count to md_u.h */
+                               ;
                        else
                                spare++;
                }
@@@ -7862,6 -7862,7 +7862,7 @@@ void md_do_sync(struct md_thread *threa
         */
  
        do {
+               int mddev2_minor = -1;
                mddev->curr_resync = 2;
  
        try_again:
                                prepare_to_wait(&resync_wait, &wq, TASK_INTERRUPTIBLE);
                                if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
                                    mddev2->curr_resync >= mddev->curr_resync) {
-                                       printk(KERN_INFO "md: delaying %s of %s"
-                                              " until %s has finished (they"
-                                              " share one or more physical units)\n",
-                                              desc, mdname(mddev), mdname(mddev2));
+                                       if (mddev2_minor != mddev2->md_minor) {
+                                               mddev2_minor = mddev2->md_minor;
+                                               printk(KERN_INFO "md: delaying %s of %s"
+                                                      " until %s has finished (they"
+                                                      " share one or more physical units)\n",
+                                                      desc, mdname(mddev),
+                                                      mdname(mddev2));
+                                       }
                                        mddev_put(mddev2);
                                        if (signal_pending(current))
                                                flush_signals(current);
@@@ -8275,16 -8280,13 +8280,13 @@@ no_add
  static void md_start_sync(struct work_struct *ws)
  {
        struct mddev *mddev = container_of(ws, struct mddev, del_work);
-       int ret = 0;
  
        mddev->sync_thread = md_register_thread(md_do_sync,
                                                mddev,
                                                "resync");
        if (!mddev->sync_thread) {
-               if (!(mddev_is_clustered(mddev) && ret == -EAGAIN))
-                       printk(KERN_ERR "%s: could not start resync"
-                              " thread...\n",
-                              mdname(mddev));
+               printk(KERN_ERR "%s: could not start resync thread...\n",
+                      mdname(mddev));
                /* leave the spares where they are, it shouldn't hurt */
                clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
                clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
diff --combined drivers/md/raid10.c
index 0e4efcd1079550faba99495bdd6db51badbfea0f,4589866257d58b09bb698e341074e868bd52fdd3..be1a9fca3b2d2ade369359d109d1a53ddf30d077
@@@ -1054,8 -1054,8 +1054,8 @@@ static void __make_request(struct mdde
        int i;
        const int op = bio_op(bio);
        const int rw = bio_data_dir(bio);
 -      const unsigned long do_sync = (bio->bi_rw & REQ_SYNC);
 -      const unsigned long do_fua = (bio->bi_rw & REQ_FUA);
 +      const unsigned long do_sync = (bio->bi_opf & REQ_SYNC);
 +      const unsigned long do_fua = (bio->bi_opf & REQ_FUA);
        unsigned long flags;
        struct md_rdev *blocked_rdev;
        struct blk_plug_cb *cb;
        int max_sectors;
        int sectors;
  
+       md_write_start(mddev, bio);
        /*
         * Register the new request and wait if the reconstruction
         * thread has put up a bar for new requests.
@@@ -1440,13 -1442,11 +1442,11 @@@ static void raid10_make_request(struct 
  
        struct bio *split;
  
 -      if (unlikely(bio->bi_rw & REQ_PREFLUSH)) {
 +      if (unlikely(bio->bi_opf & REQ_PREFLUSH)) {
                md_flush_request(mddev, bio);
                return;
        }
  
-       md_write_start(mddev, bio);
        do {
  
                /*
@@@ -2465,20 -2465,21 +2465,21 @@@ static int narrow_write_error(struct r1
  
        while (sect_to_write) {
                struct bio *wbio;
+               sector_t wsector;
                if (sectors > sect_to_write)
                        sectors = sect_to_write;
                /* Write at 'sector' for 'sectors' */
                wbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
                bio_trim(wbio, sector - bio->bi_iter.bi_sector, sectors);
-               wbio->bi_iter.bi_sector = (r10_bio->devs[i].addr+
-                                  choose_data_offset(r10_bio, rdev) +
-                                  (sector - r10_bio->sector));
+               wsector = r10_bio->devs[i].addr + (sector - r10_bio->sector);
+               wbio->bi_iter.bi_sector = wsector +
+                                  choose_data_offset(r10_bio, rdev);
                wbio->bi_bdev = rdev->bdev;
                bio_set_op_attrs(wbio, REQ_OP_WRITE, 0);
  
                if (submit_bio_wait(wbio) < 0)
                        /* Failure! */
-                       ok = rdev_set_badblocks(rdev, sector,
+                       ok = rdev_set_badblocks(rdev, wsector,
                                                sectors, 0)
                                && ok;
  
@@@ -2533,7 -2534,7 +2534,7 @@@ read_more
                return;
        }
  
 -      do_sync = (r10_bio->master_bio->bi_rw & REQ_SYNC);
 +      do_sync = (r10_bio->master_bio->bi_opf & REQ_SYNC);
        slot = r10_bio->read_slot;
        printk_ratelimited(
                KERN_ERR
diff --combined drivers/md/raid5.c
index 8912407a4dd0edb251a36e3befe3ff8182598bdf,62febe8d491958b37a5a357021a72f363b84604e..da583bb43c84e5faaa0de0adfb2853b173221847
@@@ -659,6 -659,7 +659,7 @@@ raid5_get_active_stripe(struct r5conf *
  {
        struct stripe_head *sh;
        int hash = stripe_hash_locks_hash(sector);
+       int inc_empty_inactive_list_flag;
  
        pr_debug("get_stripe, sector %llu\n", (unsigned long long)sector);
  
                                        atomic_inc(&conf->active_stripes);
                                BUG_ON(list_empty(&sh->lru) &&
                                       !test_bit(STRIPE_EXPANDING, &sh->state));
+                               inc_empty_inactive_list_flag = 0;
+                               if (!list_empty(conf->inactive_list + hash))
+                                       inc_empty_inactive_list_flag = 1;
                                list_del_init(&sh->lru);
+                               if (list_empty(conf->inactive_list + hash) && inc_empty_inactive_list_flag)
+                                       atomic_inc(&conf->empty_inactive_list_nr);
                                if (sh->group) {
                                        sh->group->stripes_cnt--;
                                        sh->group = NULL;
@@@ -762,6 -768,7 +768,7 @@@ static void stripe_add_to_batch_list(st
        sector_t head_sector, tmp_sec;
        int hash;
        int dd_idx;
+       int inc_empty_inactive_list_flag;
  
        /* Don't cross chunks, so stripe pd_idx/qd_idx is the same */
        tmp_sec = sh->sector;
                                atomic_inc(&conf->active_stripes);
                        BUG_ON(list_empty(&head->lru) &&
                               !test_bit(STRIPE_EXPANDING, &head->state));
+                       inc_empty_inactive_list_flag = 0;
+                       if (!list_empty(conf->inactive_list + hash))
+                               inc_empty_inactive_list_flag = 1;
                        list_del_init(&head->lru);
+                       if (list_empty(conf->inactive_list + hash) && inc_empty_inactive_list_flag)
+                               atomic_inc(&conf->empty_inactive_list_nr);
                        if (head->group) {
                                head->group->stripes_cnt--;
                                head->group = NULL;
        dd_idx = 0;
        while (dd_idx == sh->pd_idx || dd_idx == sh->qd_idx)
                dd_idx++;
 -      if (head->dev[dd_idx].towrite->bi_rw != sh->dev[dd_idx].towrite->bi_rw ||
 +      if (head->dev[dd_idx].towrite->bi_opf != sh->dev[dd_idx].towrite->bi_opf ||
            bio_op(head->dev[dd_idx].towrite) != bio_op(sh->dev[dd_idx].towrite))
                goto unlock_out;
  
@@@ -993,7 -1005,6 +1005,6 @@@ again
  
                        set_bit(STRIPE_IO_STARTED, &sh->state);
  
-                       bio_reset(bi);
                        bi->bi_bdev = rdev->bdev;
                        bio_set_op_attrs(bi, op, op_flags);
                        bi->bi_end_io = op_is_write(op)
  
                        pr_debug("%s: for %llu schedule op %d on disc %d\n",
                                __func__, (unsigned long long)sh->sector,
 -                              bi->bi_rw, i);
 +                              bi->bi_opf, i);
                        atomic_inc(&sh->count);
                        if (sh != head_sh)
                                atomic_inc(&head_sh->count);
                                bi->bi_iter.bi_sector = (sh->sector
                                                 + rdev->data_offset);
                        if (test_bit(R5_ReadNoMerge, &head_sh->dev[i].flags))
 -                              bi->bi_rw |= REQ_NOMERGE;
 +                              bi->bi_opf |= REQ_NOMERGE;
  
                        if (test_bit(R5_SkipCopy, &sh->dev[i].flags))
                                WARN_ON(test_bit(R5_UPTODATE, &sh->dev[i].flags));
  
                        set_bit(STRIPE_IO_STARTED, &sh->state);
  
-                       bio_reset(rbi);
                        rbi->bi_bdev = rrdev->bdev;
                        bio_set_op_attrs(rbi, op, op_flags);
                        BUG_ON(!op_is_write(op));
                        pr_debug("%s: for %llu schedule op %d on "
                                 "replacement disc %d\n",
                                __func__, (unsigned long long)sh->sector,
 -                              rbi->bi_rw, i);
 +                              rbi->bi_opf, i);
                        atomic_inc(&sh->count);
                        if (sh != head_sh)
                                atomic_inc(&head_sh->count);
                        if (op_is_write(op))
                                set_bit(STRIPE_DEGRADED, &sh->state);
                        pr_debug("skip op %d on disc %d for sector %llu\n",
 -                              bi->bi_rw, i, (unsigned long long)sh->sector);
 +                              bi->bi_opf, i, (unsigned long long)sh->sector);
                        clear_bit(R5_LOCKED, &sh->dev[i].flags);
                        set_bit(STRIPE_HANDLE, &sh->state);
                }
@@@ -1619,9 -1629,9 +1629,9 @@@ again
  
                        while (wbi && wbi->bi_iter.bi_sector <
                                dev->sector + STRIPE_SECTORS) {
 -                              if (wbi->bi_rw & REQ_FUA)
 +                              if (wbi->bi_opf & REQ_FUA)
                                        set_bit(R5_WantFUA, &dev->flags);
 -                              if (wbi->bi_rw & REQ_SYNC)
 +                              if (wbi->bi_opf & REQ_SYNC)
                                        set_bit(R5_SyncIO, &dev->flags);
                                if (bio_op(wbi) == REQ_OP_DISCARD)
                                        set_bit(R5_Discard, &dev->flags);
@@@ -1978,9 -1988,11 +1988,11 @@@ static void raid_run_ops(struct stripe_
        put_cpu();
  }
  
- static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp)
+ static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp,
+       int disks)
  {
        struct stripe_head *sh;
+       int i;
  
        sh = kmem_cache_zalloc(sc, gfp);
        if (sh) {
                INIT_LIST_HEAD(&sh->batch_list);
                INIT_LIST_HEAD(&sh->lru);
                atomic_set(&sh->count, 1);
+               for (i = 0; i < disks; i++) {
+                       struct r5dev *dev = &sh->dev[i];
+                       bio_init(&dev->req);
+                       dev->req.bi_io_vec = &dev->vec;
+                       dev->req.bi_max_vecs = 1;
+                       bio_init(&dev->rreq);
+                       dev->rreq.bi_io_vec = &dev->rvec;
+                       dev->rreq.bi_max_vecs = 1;
+               }
        }
        return sh;
  }
@@@ -1996,7 -2019,7 +2019,7 @@@ static int grow_one_stripe(struct r5con
  {
        struct stripe_head *sh;
  
-       sh = alloc_stripe(conf->slab_cache, gfp);
+       sh = alloc_stripe(conf->slab_cache, gfp, conf->pool_size);
        if (!sh)
                return 0;
  
@@@ -2167,7 -2190,7 +2190,7 @@@ static int resize_stripes(struct r5con
        mutex_lock(&conf->cache_size_mutex);
  
        for (i = conf->max_nr_stripes; i; i--) {
-               nsh = alloc_stripe(sc, GFP_KERNEL);
+               nsh = alloc_stripe(sc, GFP_KERNEL, newsize);
                if (!nsh)
                        break;
  
@@@ -2299,6 -2322,7 +2322,7 @@@ static void raid5_end_read_request(stru
                (unsigned long long)sh->sector, i, atomic_read(&sh->count),
                bi->bi_error);
        if (i == disks) {
+               bio_reset(bi);
                BUG();
                return;
        }
        clear_bit(R5_LOCKED, &sh->dev[i].flags);
        set_bit(STRIPE_HANDLE, &sh->state);
        raid5_release_stripe(sh);
+       bio_reset(bi);
  }
  
  static void raid5_end_write_request(struct bio *bi)
                (unsigned long long)sh->sector, i, atomic_read(&sh->count),
                bi->bi_error);
        if (i == disks) {
+               bio_reset(bi);
                BUG();
                return;
        }
  
        if (sh->batch_head && sh != sh->batch_head)
                raid5_release_stripe(sh->batch_head);
+       bio_reset(bi);
  }
  
  static void raid5_build_block(struct stripe_head *sh, int i, int previous)
  {
        struct r5dev *dev = &sh->dev[i];
  
-       bio_init(&dev->req);
-       dev->req.bi_io_vec = &dev->vec;
-       dev->req.bi_max_vecs = 1;
-       dev->req.bi_private = sh;
-       bio_init(&dev->rreq);
-       dev->rreq.bi_io_vec = &dev->rvec;
-       dev->rreq.bi_max_vecs = 1;
-       dev->rreq.bi_private = sh;
        dev->flags = 0;
        dev->sector = raid5_compute_blocknr(sh, i, previous);
  }
@@@ -4628,7 -4645,9 +4645,9 @@@ finish
        }
  
        if (!bio_list_empty(&s.return_bi)) {
-               if (test_bit(MD_CHANGE_PENDING, &conf->mddev->flags)) {
+               if (test_bit(MD_CHANGE_PENDING, &conf->mddev->flags) &&
+                               (s.failed <= conf->max_degraded ||
+                                       conf->mddev->external == 0)) {
                        spin_lock_irq(&conf->device_lock);
                        bio_list_merge(&conf->return_bi, &s.return_bi);
                        spin_unlock_irq(&conf->device_lock);
@@@ -5154,7 -5173,7 +5173,7 @@@ static void raid5_make_request(struct m
        DEFINE_WAIT(w);
        bool do_prepare;
  
 -      if (unlikely(bi->bi_rw & REQ_PREFLUSH)) {
 +      if (unlikely(bi->bi_opf & REQ_PREFLUSH)) {
                int ret = r5l_handle_flush_request(conf->log, bi);
  
                if (ret == 0)
                        (unsigned long long)logical_sector);
  
                sh = raid5_get_active_stripe(conf, new_sector, previous,
 -                                     (bi->bi_rw & REQ_RAHEAD), 0);
 +                                     (bi->bi_opf & REQ_RAHEAD), 0);
                if (sh) {
                        if (unlikely(previous)) {
                                /* expansion might have moved on while waiting for a
                        set_bit(STRIPE_HANDLE, &sh->state);
                        clear_bit(STRIPE_DELAYED, &sh->state);
                        if ((!sh->batch_head || sh == sh->batch_head) &&
 -                          (bi->bi_rw & REQ_SYNC) &&
 +                          (bi->bi_opf & REQ_SYNC) &&
                            !test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
                                atomic_inc(&conf->preread_active_stripes);
                        release_stripe_plug(mddev, sh);
@@@ -6826,11 -6845,14 +6845,14 @@@ static int raid5_run(struct mddev *mdde
        if (IS_ERR(conf))
                return PTR_ERR(conf);
  
-       if (test_bit(MD_HAS_JOURNAL, &mddev->flags) && !journal_dev) {
-               printk(KERN_ERR "md/raid:%s: journal disk is missing, force array readonly\n",
-                      mdname(mddev));
-               mddev->ro = 1;
-               set_disk_ro(mddev->gendisk, 1);
+       if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) {
+               if (!journal_dev) {
+                       pr_err("md/raid:%s: journal disk is missing, force array readonly\n",
+                              mdname(mddev));
+                       mddev->ro = 1;
+                       set_disk_ro(mddev->gendisk, 1);
+               } else if (mddev->recovery_cp == MaxSector)
+                       set_bit(MD_JOURNAL_CLEAN, &mddev->flags);
        }
  
        conf->min_offset_diff = min_offset_diff;