]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blobdiff - drivers/block/drbd/drbd_receiver.c
drbd: Track the numbers of sectors in flight
[mirror_ubuntu-artful-kernel.git] / drivers / block / drbd / drbd_receiver.c
index 24487d4fb20297e6676a91e60c9525f26092e315..f3052d871d312a4ab4ff08751f87703c461190db 100644 (file)
@@ -187,15 +187,6 @@ static struct page *drbd_pp_first_pages_or_try_alloc(struct drbd_conf *mdev, int
        return NULL;
 }
 
-/* kick lower level device, if we have more than (arbitrary number)
- * reference counts on it, which typically are locally submitted io
- * requests.  don't use unacked_cnt, so we speed up proto A and B, too. */
-static void maybe_kick_lo(struct drbd_conf *mdev)
-{
-       if (atomic_read(&mdev->local_cnt) >= mdev->net_conf->unplug_watermark)
-               drbd_kick_lo(mdev);
-}
-
 static void reclaim_net_ee(struct drbd_conf *mdev, struct list_head *to_be_freed)
 {
        struct drbd_epoch_entry *e;
@@ -219,7 +210,6 @@ static void drbd_kick_lo_and_reclaim_net(struct drbd_conf *mdev)
        LIST_HEAD(reclaimed);
        struct drbd_epoch_entry *e, *t;
 
-       maybe_kick_lo(mdev);
        spin_lock_irq(&mdev->req_lock);
        reclaim_net_ee(mdev, &reclaimed);
        spin_unlock_irq(&mdev->req_lock);
@@ -287,7 +277,7 @@ static void drbd_pp_free(struct drbd_conf *mdev, struct page *page, int is_net)
        atomic_t *a = is_net ? &mdev->pp_in_use_by_net : &mdev->pp_in_use;
        int i;
 
-       if (drbd_pp_vacant > (DRBD_MAX_SEGMENT_SIZE/PAGE_SIZE)*minor_count)
+       if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE)*minor_count)
                i = page_chain_free(page);
        else {
                struct page *tmp;
@@ -436,8 +426,7 @@ void _drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head)
        while (!list_empty(head)) {
                prepare_to_wait(&mdev->ee_wait, &wait, TASK_UNINTERRUPTIBLE);
                spin_unlock_irq(&mdev->req_lock);
-               drbd_kick_lo(mdev);
-               schedule();
+               io_schedule();
                finish_wait(&mdev->ee_wait, &wait);
                spin_lock_irq(&mdev->req_lock);
        }
@@ -1111,8 +1100,6 @@ next_bio:
        /* > e->sector, unless this is the first bio */
        bio->bi_sector = sector;
        bio->bi_bdev = mdev->ldev->backing_bdev;
-       /* we special case some flags in the multi-bio case, see below
-        * (REQ_UNPLUG) */
        bio->bi_rw = rw;
        bio->bi_private = e;
        bio->bi_end_io = drbd_endio_sec;
@@ -1141,13 +1128,8 @@ next_bio:
                bios = bios->bi_next;
                bio->bi_next = NULL;
 
-               /* strip off REQ_UNPLUG unless it is the last bio */
-               if (bios)
-                       bio->bi_rw &= ~REQ_UNPLUG;
-
                drbd_generic_make_request(mdev, fault_type, bio);
        } while (bios);
-       maybe_kick_lo(mdev);
        return 0;
 
 fail:
@@ -1167,9 +1149,6 @@ static int receive_Barrier(struct drbd_conf *mdev, enum drbd_packets cmd, unsign
 
        inc_unacked(mdev);
 
-       if (mdev->net_conf->wire_protocol != DRBD_PROT_C)
-               drbd_kick_lo(mdev);
-
        mdev->current_epoch->barrier_nr = p->barrier;
        rv = drbd_may_finish_epoch(mdev, mdev->current_epoch, EV_GOT_BARRIER_NR);
 
@@ -1261,7 +1240,7 @@ read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector, int data_size) __
        data_size -= dgs;
 
        ERR_IF(data_size &  0x1ff) return NULL;
-       ERR_IF(data_size >  DRBD_MAX_SEGMENT_SIZE) return NULL;
+       ERR_IF(data_size >  DRBD_MAX_BIO_SIZE) return NULL;
 
        /* even though we trust out peer,
         * we sometimes have to double check. */
@@ -1302,7 +1281,8 @@ read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector, int data_size) __
        if (dgs) {
                drbd_csum_ee(mdev, mdev->integrity_r_tfm, e, dig_vv);
                if (memcmp(dig_in, dig_vv, dgs)) {
-                       dev_err(DEV, "Digest integrity check FAILED.\n");
+                       dev_err(DEV, "Digest integrity check FAILED: %llus +%u\n",
+                               (unsigned long long)sector, data_size);
                        drbd_bcast_ee(mdev, "digest failed",
                                        dgs, dig_in, dig_vv, e);
                        drbd_free_ee(mdev, e);
@@ -1632,16 +1612,15 @@ static int drbd_wait_peer_seq(struct drbd_conf *mdev, const u32 packet_seq)
        return ret;
 }
 
-static unsigned long write_flags_to_bio(struct drbd_conf *mdev, u32 dpf)
+/* see also bio_flags_to_wire()
+ * DRBD_REQ_*, because we need to semantically map the flags to data packet
+ * flags and back. We may replicate to other kernel versions. */
+static unsigned long wire_flags_to_bio(struct drbd_conf *mdev, u32 dpf)
 {
-       if (mdev->agreed_pro_version >= 95)
-               return  (dpf & DP_RW_SYNC ? REQ_SYNC : 0) |
-                       (dpf & DP_UNPLUG ? REQ_UNPLUG : 0) |
-                       (dpf & DP_FUA ? REQ_FUA : 0) |
-                       (dpf & DP_FLUSH ? REQ_FUA : 0) |
-                       (dpf & DP_DISCARD ? REQ_DISCARD : 0);
-       else
-               return dpf & DP_RW_SYNC ? (REQ_SYNC | REQ_UNPLUG) : 0;
+       return  (dpf & DP_RW_SYNC ? REQ_SYNC : 0) |
+               (dpf & DP_FUA ? REQ_FUA : 0) |
+               (dpf & DP_FLUSH ? REQ_FLUSH : 0) |
+               (dpf & DP_DISCARD ? REQ_DISCARD : 0);
 }
 
 /* mirrored write */
@@ -1681,18 +1660,18 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
 
        e->w.cb = e_end_block;
 
+       dp_flags = be32_to_cpu(p->dp_flags);
+       rw |= wire_flags_to_bio(mdev, dp_flags);
+
+       if (dp_flags & DP_MAY_SET_IN_SYNC)
+               e->flags |= EE_MAY_SET_IN_SYNC;
+
        spin_lock(&mdev->epoch_lock);
        e->epoch = mdev->current_epoch;
        atomic_inc(&e->epoch->epoch_size);
        atomic_inc(&e->epoch->active);
        spin_unlock(&mdev->epoch_lock);
 
-       dp_flags = be32_to_cpu(p->dp_flags);
-       rw |= write_flags_to_bio(mdev, dp_flags);
-
-       if (dp_flags & DP_MAY_SET_IN_SYNC)
-               e->flags |= EE_MAY_SET_IN_SYNC;
-
        /* I'm the receiver, I do hold a net_cnt reference. */
        if (!mdev->net_conf->two_primaries) {
                spin_lock_irq(&mdev->req_lock);
@@ -1905,8 +1884,12 @@ int drbd_rs_should_slow_down(struct drbd_conf *mdev)
 
                /* sync speed average over the last 2*DRBD_SYNC_MARK_STEP,
                 * approx. */
-               i = (mdev->rs_last_mark + DRBD_SYNC_MARKS-2) % DRBD_SYNC_MARKS;
-               rs_left = drbd_bm_total_weight(mdev) - mdev->rs_failed;
+               i = (mdev->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
+
+               if (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T)
+                       rs_left = mdev->ov_left;
+               else
+                       rs_left = drbd_bm_total_weight(mdev) - mdev->rs_failed;
 
                dt = ((long)jiffies - (long)mdev->rs_mark_time[i]) / HZ;
                if (!dt)
@@ -1934,7 +1917,7 @@ static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packets cmd, un
        sector = be64_to_cpu(p->sector);
        size   = be32_to_cpu(p->blksize);
 
-       if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_SEGMENT_SIZE) {
+       if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_BIO_SIZE) {
                dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
                                (unsigned long long)sector, size);
                return FALSE;
@@ -1992,6 +1975,8 @@ static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packets cmd, un
        case P_RS_DATA_REQUEST:
                e->w.cb = w_e_end_rsdata_req;
                fault_type = DRBD_FAULT_RS_RD;
+               /* used in the sector offset progress display */
+               mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
                break;
 
        case P_OV_REPLY:
@@ -2013,7 +1998,11 @@ static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packets cmd, un
                if (cmd == P_CSUM_RS_REQUEST) {
                        D_ASSERT(mdev->agreed_pro_version >= 89);
                        e->w.cb = w_e_end_csum_rs_req;
+                       /* used in the sector offset progress display */
+                       mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
                } else if (cmd == P_OV_REPLY) {
+                       /* track progress, we may need to throttle */
+                       atomic_add(size >> 9, &mdev->rs_sect_in);
                        e->w.cb = w_e_end_ov_reply;
                        dec_rs_pending(mdev);
                        /* drbd_rs_begin_io done when we sent this request,
@@ -2025,9 +2014,16 @@ static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packets cmd, un
        case P_OV_REQUEST:
                if (mdev->ov_start_sector == ~(sector_t)0 &&
                    mdev->agreed_pro_version >= 90) {
+                       unsigned long now = jiffies;
+                       int i;
                        mdev->ov_start_sector = sector;
                        mdev->ov_position = sector;
-                       mdev->ov_left = mdev->rs_total - BM_SECT_TO_BIT(sector);
+                       mdev->ov_left = drbd_bm_bits(mdev) - BM_SECT_TO_BIT(sector);
+                       mdev->rs_total = mdev->ov_left;
+                       for (i = 0; i < DRBD_SYNC_MARKS; i++) {
+                               mdev->rs_mark_left[i] = mdev->ov_left;
+                               mdev->rs_mark_time[i] = now;
+                       }
                        dev_info(DEV, "Online Verify start sector: %llu\n",
                                        (unsigned long long)sector);
                }
@@ -2901,7 +2897,7 @@ static int receive_sizes(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
 {
        struct p_sizes *p = &mdev->data.rbuf.sizes;
        enum determine_dev_size dd = unchanged;
-       unsigned int max_seg_s;
+       unsigned int max_bio_size;
        sector_t p_size, p_usize, my_usize;
        int ldsc = 0; /* local disk size changed */
        enum dds_flags ddsf;
@@ -2974,14 +2970,14 @@ static int receive_sizes(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
                }
 
                if (mdev->agreed_pro_version < 94)
-                       max_seg_s = be32_to_cpu(p->max_segment_size);
+                       max_bio_size = be32_to_cpu(p->max_bio_size);
                else if (mdev->agreed_pro_version == 94)
-                       max_seg_s = DRBD_MAX_SIZE_H80_PACKET;
+                       max_bio_size = DRBD_MAX_SIZE_H80_PACKET;
                else /* drbd 8.3.8 onwards */
-                       max_seg_s = DRBD_MAX_SEGMENT_SIZE;
+                       max_bio_size = DRBD_MAX_BIO_SIZE;
 
-               if (max_seg_s != queue_max_segment_size(mdev->rq_queue))
-                       drbd_setup_queue_param(mdev, max_seg_s);
+               if (max_bio_size != queue_max_hw_sectors(mdev->rq_queue) << 9)
+                       drbd_setup_queue_param(mdev, max_bio_size);
 
                drbd_setup_order_type(mdev, be16_to_cpu(p->queue_order_type));
                put_ldev(mdev);
@@ -3556,9 +3552,6 @@ static int receive_skip(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
 
 static int receive_UnplugRemote(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
 {
-       if (mdev->state.disk >= D_INCONSISTENT)
-               drbd_kick_lo(mdev);
-
        /* Make sure we've acked all the TCP data associated
         * with the data requests being unplugged */
        drbd_tcp_quickack(mdev->data.socket);
@@ -4358,7 +4351,13 @@ static int got_OVResult(struct drbd_conf *mdev, struct p_header80 *h)
        drbd_rs_complete_io(mdev, sector);
        dec_rs_pending(mdev);
 
-       if (--mdev->ov_left == 0) {
+       --mdev->ov_left;
+
+       /* let's advance progress step marks only for every other megabyte */
+       if ((mdev->ov_left & 0x200) == 0x200)
+               drbd_advance_rs_marks(mdev, mdev->ov_left);
+
+       if (mdev->ov_left == 0) {
                w = kmalloc(sizeof(*w), GFP_NOIO);
                if (w) {
                        w->cb = w_ov_finished;