dm raid1: abstract get_valid_mirror function

[mirror_ubuntu-zesty-kernel.git] / drivers / md / dm-raid1.c
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c

index cc9dc79b0784ad4fb0970e1db4defd0126b63e5a..d1a7f1a4789ce508b5b4e02bb07055d136cf3ce5 100644 (file)
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -35,6 +35,7 @@ static DECLARE_WAIT_QUEUE_HEAD(_kmirrord_recovery_stopped);
   *---------------------------------------------------------------*/
  enum dm_raid1_error {
         DM_RAID1_WRITE_ERROR,
+       DM_RAID1_FLUSH_ERROR,
         DM_RAID1_SYNC_ERROR,
         DM_RAID1_READ_ERROR
  };
@@ -57,6 +58,7 @@ struct mirror_set {
         struct bio_list reads;
         struct bio_list writes;
         struct bio_list failures;
+       struct bio_list holds;  /* bios are waiting until suspend */
  
         struct dm_region_hash *rh;
         struct dm_kcopyd_client *kcopyd_client;
@@ -179,6 +181,17 @@ static void set_default_mirror(struct mirror *m)
         atomic_set(&ms->default_mirror, m - m0);
  }
  
+static struct mirror *get_valid_mirror(struct mirror_set *ms)
+{
+       struct mirror *m;
+
+       for (m = ms->mirror; m < ms->mirror + ms->nr_mirrors; m++)
+               if (!atomic_read(&m->error_count))
+                       return m;
+
+       return NULL;
+}
+
  /* fail_mirror
   * @m: mirror device to fail
   * @error_type: one of the enum's, DM_RAID1_*_ERROR
@@ -224,19 +237,50 @@ static void fail_mirror(struct mirror *m, enum dm_raid1_error error_type)
                 goto out;
         }
  
-       for (new = ms->mirror; new < ms->mirror + ms->nr_mirrors; new++)
-               if (!atomic_read(&new->error_count)) {
-                       set_default_mirror(new);
-                       break;
-               }
-
-       if (unlikely(new == ms->mirror + ms->nr_mirrors))
+       new = get_valid_mirror(ms);
+       if (new)
+               set_default_mirror(new);
+       else
                 DMWARN("All sides of mirror have failed.");
  
  out:
         schedule_work(&ms->trigger_event);
  }
  
+static int mirror_flush(struct dm_target *ti)
+{
+       struct mirror_set *ms = ti->private;
+       unsigned long error_bits;
+
+       unsigned int i;
+       struct dm_io_region io[ms->nr_mirrors];
+       struct mirror *m;
+       struct dm_io_request io_req = {
+               .bi_rw = WRITE_BARRIER,
+               .mem.type = DM_IO_KMEM,
+               .mem.ptr.bvec = NULL,
+               .client = ms->io_client,
+       };
+
+       for (i = 0, m = ms->mirror; i < ms->nr_mirrors; i++, m++) {
+               io[i].bdev = m->dev->bdev;
+               io[i].sector = 0;
+               io[i].count = 0;
+       }
+
+       error_bits = -1;
+       dm_io(&io_req, ms->nr_mirrors, io, &error_bits);
+       if (unlikely(error_bits != 0)) {
+               for (i = 0; i < ms->nr_mirrors; i++)
+                       if (test_bit(i, &error_bits))
+                               fail_mirror(ms->mirror + i,
+                                           DM_RAID1_FLUSH_ERROR);
+               return -EIO;
+       }
+
+       return 0;
+}
+
  /*-----------------------------------------------------------------
   * Recovery.
   *
@@ -396,6 +440,8 @@ static int mirror_available(struct mirror_set *ms, struct bio *bio)
   */
  static sector_t map_sector(struct mirror *m, struct bio *bio)
  {
+       if (unlikely(!bio->bi_size))
+               return 0;
         return m->offset + (bio->bi_sector - m->ms->ti->begin);
  }
  
@@ -413,6 +459,27 @@ static void map_region(struct dm_io_region *io, struct mirror *m,
         io->count = bio->bi_size >> 9;
  }
  
+static void hold_bio(struct mirror_set *ms, struct bio *bio)
+{
+       /*
+        * If device is suspended, complete the bio.
+        */
+       if (atomic_read(&ms->suspend)) {
+               if (dm_noflush_suspending(ms->ti))
+                       bio_endio(bio, DM_ENDIO_REQUEUE);
+               else
+                       bio_endio(bio, -EIO);
+               return;
+       }
+
+       /*
+        * Hold bio until the suspend is complete.
+        */
+       spin_lock_irq(&ms->lock);
+       bio_list_add(&ms->holds, bio);
+       spin_unlock_irq(&ms->lock);
+}
+
  /*-----------------------------------------------------------------
   * Reads
   *---------------------------------------------------------------*/
@@ -562,7 +629,7 @@ static void do_write(struct mirror_set *ms, struct bio *bio)
         struct dm_io_region io[ms->nr_mirrors], *dest = io;
         struct mirror *m;
         struct dm_io_request io_req = {
-               .bi_rw = WRITE,
+               .bi_rw = WRITE | (bio->bi_rw & WRITE_BARRIER),
                 .mem.type = DM_IO_BVEC,
                 .mem.ptr.bvec = bio->bi_io_vec + bio->bi_idx,
                 .notify.fn = write_callback,
@@ -603,6 +670,11 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes)
         bio_list_init(&requeue);
  
         while ((bio = bio_list_pop(writes))) {
+               if (unlikely(bio_empty_barrier(bio))) {
+                       bio_list_add(&sync, bio);
+                       continue;
+               }
+
                 region = dm_rh_bio_to_region(ms->rh, bio);
  
                 if (log->type->is_remote_recovering &&
@@ -681,20 +753,12 @@ static void do_failures(struct mirror_set *ms, struct bio_list *failures)
  {
         struct bio *bio;
  
-       if (!failures->head)
+       if (likely(!failures->head))
                 return;
  
-       if (!ms->log_failure) {
-               while ((bio = bio_list_pop(failures))) {
-                       ms->in_sync = 0;
-                       dm_rh_mark_nosync(ms->rh, bio, bio->bi_size, 0);
-               }
-               return;
-       }
-
         /*
          * If the log has failed, unattempted writes are being
-        * put on the failures list.  We can't issue those writes
+        * put on the holds list.  We can't issue those writes
          * until a log has been marked, so we must store them.
          *
          * If a 'noflush' suspend is in progress, we can requeue
@@ -709,23 +773,15 @@ static void do_failures(struct mirror_set *ms, struct bio_list *failures)
          * for us to treat them the same and requeue them
          * as well.
          */
-       if (dm_noflush_suspending(ms->ti)) {
-               while ((bio = bio_list_pop(failures)))
-                       bio_endio(bio, DM_ENDIO_REQUEUE);
-               return;
-       }
  
-       if (atomic_read(&ms->suspend)) {
-               while ((bio = bio_list_pop(failures)))
-                       bio_endio(bio, -EIO);
-               return;
+       while ((bio = bio_list_pop(failures))) {
+               if (ms->log_failure)
+                       hold_bio(ms, bio);
+               else {
+                       ms->in_sync = 0;
+                       dm_rh_mark_nosync(ms->rh, bio, bio->bi_size, 0);
+               }
         }
-
-       spin_lock_irq(&ms->lock);
-       bio_list_merge(&ms->failures, failures);
-       spin_unlock_irq(&ms->lock);
-
-       delayed_wake(ms);
  }
  
  static void trigger_event(struct work_struct *work)
@@ -889,7 +945,8 @@ static struct dm_dirty_log *create_dirty_log(struct dm_target *ti,
                 return NULL;
         }
  
-       dl = dm_dirty_log_create(argv[0], ti, param_count, argv + 2);
+       dl = dm_dirty_log_create(argv[0], ti, mirror_flush, param_count,
+                                argv + 2);
         if (!dl) {
                 ti->error = "Error creating mirror dirty log";
                 return NULL;
@@ -995,6 +1052,7 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv)
  
         ti->private = ms;
         ti->split_io = dm_rh_get_region_size(ms->rh);
+       ti->num_flush_requests = 1;
  
         ms->kmirrord_wq = create_singlethread_workqueue("kmirrord");
         if (!ms->kmirrord_wq) {
@@ -1122,7 +1180,8 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio,
          * We need to dec pending if this was a write.
          */
         if (rw == WRITE) {
-               dm_rh_dec(ms->rh, map_context->ll);
+               if (likely(!bio_empty_barrier(bio)))
+                       dm_rh_dec(ms->rh, map_context->ll);
                 return error;
         }
  
@@ -1180,6 +1239,9 @@ static void mirror_presuspend(struct dm_target *ti)
         struct mirror_set *ms = (struct mirror_set *) ti->private;
         struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh);
  
+       struct bio_list holds;
+       struct bio *bio;
+
         atomic_set(&ms->suspend, 1);
  
         /*
@@ -1202,6 +1264,22 @@ static void mirror_presuspend(struct dm_target *ti)
          * we know that all of our I/O has been pushed.
          */
         flush_workqueue(ms->kmirrord_wq);
+
+       /*
+        * Now set ms->suspend is set and the workqueue flushed, no more
+        * entries can be added to ms->hold list, so process it.
+        *
+        * Bios can still arrive concurrently with or after this
+        * presuspend function, but they cannot join the hold list
+        * because ms->suspend is set.
+        */
+       spin_lock_irq(&ms->lock);
+       holds = ms->holds;
+       bio_list_init(&ms->holds);
+       spin_unlock_irq(&ms->lock);
+
+       while ((bio = bio_list_pop(&holds)))
+               hold_bio(ms, bio);
  }
  
  static void mirror_postsuspend(struct dm_target *ti)
@@ -1244,7 +1322,8 @@ static char device_status_char(struct mirror *m)
         if (!atomic_read(&(m->error_count)))
                 return 'A';
  
-       return (test_bit(DM_RAID1_WRITE_ERROR, &(m->error_type))) ? 'D' :
+       return (test_bit(DM_RAID1_FLUSH_ERROR, &(m->error_type))) ? 'F' :
+               (test_bit(DM_RAID1_WRITE_ERROR, &(m->error_type))) ? 'D' :
                 (test_bit(DM_RAID1_SYNC_ERROR, &(m->error_type))) ? 'S' :
                 (test_bit(DM_RAID1_READ_ERROR, &(m->error_type))) ? 'R' : 'U';
  }