]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - drivers/block/drbd/drbd_worker.c
drbd: Enforce limits of disk_conf members; centralized these checks
[mirror_ubuntu-artful-kernel.git] / drivers / block / drbd / drbd_worker.c
CommitLineData
b411b363
PR
1/*
2 drbd_worker.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23
24 */
25
b411b363 26#include <linux/module.h>
b411b363
PR
27#include <linux/drbd.h>
28#include <linux/sched.h>
b411b363
PR
29#include <linux/wait.h>
30#include <linux/mm.h>
31#include <linux/memcontrol.h>
32#include <linux/mm_inline.h>
33#include <linux/slab.h>
34#include <linux/random.h>
b411b363
PR
35#include <linux/string.h>
36#include <linux/scatterlist.h>
37
38#include "drbd_int.h"
39#include "drbd_req.h"
b411b363 40
00d56944 41static int w_make_ov_request(struct drbd_work *w, int cancel);
b411b363
PR
42
43
c5a91619
AG
44/* endio handlers:
45 * drbd_md_io_complete (defined here)
fcefa62e
AG
46 * drbd_request_endio (defined here)
47 * drbd_peer_request_endio (defined here)
c5a91619
AG
48 * bm_async_io_complete (defined in drbd_bitmap.c)
49 *
b411b363
PR
50 * For all these callbacks, note the following:
51 * The callbacks will be called in irq context by the IDE drivers,
52 * and in Softirqs/Tasklets/BH context by the SCSI drivers.
53 * Try to get the locking right :)
54 *
55 */
56
57
58/* About the global_state_lock
59 Each state transition on an device holds a read lock. In case we have
60 to evaluate the sync after dependencies, we grab a write lock, because
61 we need stable states on all devices for that. */
62rwlock_t global_state_lock;
63
64/* used for synchronous meta data and bitmap IO
65 * submitted by drbd_md_sync_page_io()
66 */
67void drbd_md_io_complete(struct bio *bio, int error)
68{
69 struct drbd_md_io *md_io;
70
71 md_io = (struct drbd_md_io *)bio->bi_private;
72 md_io->error = error;
73
b411b363
PR
74 complete(&md_io->event);
75}
76
77/* reads on behalf of the partner,
78 * "submitted" by the receiver
79 */
db830c46 80void drbd_endio_read_sec_final(struct drbd_peer_request *peer_req) __releases(local)
b411b363
PR
81{
82 unsigned long flags = 0;
a21e9298 83 struct drbd_conf *mdev = peer_req->w.mdev;
b411b363 84
87eeee41 85 spin_lock_irqsave(&mdev->tconn->req_lock, flags);
db830c46
AG
86 mdev->read_cnt += peer_req->i.size >> 9;
87 list_del(&peer_req->w.list);
b411b363
PR
88 if (list_empty(&mdev->read_ee))
89 wake_up(&mdev->ee_wait);
db830c46 90 if (test_bit(__EE_WAS_ERROR, &peer_req->flags))
81e84650 91 __drbd_chk_io_error(mdev, false);
87eeee41 92 spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
b411b363 93
db830c46 94 drbd_queue_work(&mdev->tconn->data.work, &peer_req->w);
b411b363 95 put_ldev(mdev);
b411b363
PR
96}
97
98/* writes on behalf of the partner, or resync writes,
45bb912b 99 * "submitted" by the receiver, final stage. */
db830c46 100static void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(local)
b411b363
PR
101{
102 unsigned long flags = 0;
a21e9298 103 struct drbd_conf *mdev = peer_req->w.mdev;
181286ad 104 struct drbd_interval i;
b411b363 105 int do_wake;
579b57ed 106 u64 block_id;
b411b363 107 int do_al_complete_io;
b411b363 108
db830c46 109 /* after we moved peer_req to done_ee,
b411b363
PR
110 * we may no longer access it,
111 * it may be freed/reused already!
112 * (as soon as we release the req_lock) */
181286ad 113 i = peer_req->i;
db830c46
AG
114 do_al_complete_io = peer_req->flags & EE_CALL_AL_COMPLETE_IO;
115 block_id = peer_req->block_id;
b411b363 116
87eeee41 117 spin_lock_irqsave(&mdev->tconn->req_lock, flags);
db830c46
AG
118 mdev->writ_cnt += peer_req->i.size >> 9;
119 list_del(&peer_req->w.list); /* has been on active_ee or sync_ee */
120 list_add_tail(&peer_req->w.list, &mdev->done_ee);
b411b363 121
bb3bfe96 122 /*
5e472264 123 * Do not remove from the write_requests tree here: we did not send the
bb3bfe96
AG
124 * Ack yet and did not wake possibly waiting conflicting requests.
125 * Removed from the tree from "drbd_process_done_ee" within the
126 * appropriate w.cb (e_end_block/e_end_resync_block) or from
127 * _drbd_clear_done_ee.
128 */
b411b363 129
579b57ed 130 do_wake = list_empty(block_id == ID_SYNCER ? &mdev->sync_ee : &mdev->active_ee);
b411b363 131
db830c46 132 if (test_bit(__EE_WAS_ERROR, &peer_req->flags))
81e84650 133 __drbd_chk_io_error(mdev, false);
87eeee41 134 spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
b411b363 135
579b57ed 136 if (block_id == ID_SYNCER)
181286ad 137 drbd_rs_complete_io(mdev, i.sector);
b411b363
PR
138
139 if (do_wake)
140 wake_up(&mdev->ee_wait);
141
142 if (do_al_complete_io)
181286ad 143 drbd_al_complete_io(mdev, &i);
b411b363 144
0625ac19 145 wake_asender(mdev->tconn);
b411b363 146 put_ldev(mdev);
45bb912b 147}
b411b363 148
45bb912b
LE
149/* writes on behalf of the partner, or resync writes,
150 * "submitted" by the receiver.
151 */
fcefa62e 152void drbd_peer_request_endio(struct bio *bio, int error)
45bb912b 153{
db830c46 154 struct drbd_peer_request *peer_req = bio->bi_private;
a21e9298 155 struct drbd_conf *mdev = peer_req->w.mdev;
45bb912b
LE
156 int uptodate = bio_flagged(bio, BIO_UPTODATE);
157 int is_write = bio_data_dir(bio) == WRITE;
158
07194272 159 if (error && __ratelimit(&drbd_ratelimit_state))
45bb912b
LE
160 dev_warn(DEV, "%s: error=%d s=%llus\n",
161 is_write ? "write" : "read", error,
db830c46 162 (unsigned long long)peer_req->i.sector);
45bb912b 163 if (!error && !uptodate) {
07194272
LE
164 if (__ratelimit(&drbd_ratelimit_state))
165 dev_warn(DEV, "%s: setting error to -EIO s=%llus\n",
166 is_write ? "write" : "read",
db830c46 167 (unsigned long long)peer_req->i.sector);
45bb912b
LE
168 /* strange behavior of some lower level drivers...
169 * fail the request by clearing the uptodate flag,
170 * but do not return any error?! */
171 error = -EIO;
172 }
173
174 if (error)
db830c46 175 set_bit(__EE_WAS_ERROR, &peer_req->flags);
45bb912b
LE
176
177 bio_put(bio); /* no need for the bio anymore */
db830c46 178 if (atomic_dec_and_test(&peer_req->pending_bios)) {
45bb912b 179 if (is_write)
db830c46 180 drbd_endio_write_sec_final(peer_req);
45bb912b 181 else
db830c46 182 drbd_endio_read_sec_final(peer_req);
45bb912b 183 }
b411b363
PR
184}
185
186/* read, readA or write requests on R_PRIMARY coming from drbd_make_request
187 */
fcefa62e 188void drbd_request_endio(struct bio *bio, int error)
b411b363 189{
a115413d 190 unsigned long flags;
b411b363 191 struct drbd_request *req = bio->bi_private;
a21e9298 192 struct drbd_conf *mdev = req->w.mdev;
a115413d 193 struct bio_and_error m;
b411b363
PR
194 enum drbd_req_event what;
195 int uptodate = bio_flagged(bio, BIO_UPTODATE);
196
b411b363
PR
197 if (!error && !uptodate) {
198 dev_warn(DEV, "p %s: setting error to -EIO\n",
199 bio_data_dir(bio) == WRITE ? "write" : "read");
200 /* strange behavior of some lower level drivers...
201 * fail the request by clearing the uptodate flag,
202 * but do not return any error?! */
203 error = -EIO;
204 }
205
b411b363
PR
206 /* to avoid recursion in __req_mod */
207 if (unlikely(error)) {
208 what = (bio_data_dir(bio) == WRITE)
8554df1c 209 ? WRITE_COMPLETED_WITH_ERROR
5c3c7e64 210 : (bio_rw(bio) == READ)
8554df1c
AG
211 ? READ_COMPLETED_WITH_ERROR
212 : READ_AHEAD_COMPLETED_WITH_ERROR;
b411b363 213 } else
8554df1c 214 what = COMPLETED_OK;
b411b363
PR
215
216 bio_put(req->private_bio);
217 req->private_bio = ERR_PTR(error);
218
a115413d 219 /* not req_mod(), we need irqsave here! */
87eeee41 220 spin_lock_irqsave(&mdev->tconn->req_lock, flags);
a115413d 221 __req_mod(req, what, &m);
87eeee41 222 spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
a115413d
LE
223
224 if (m.bio)
225 complete_master_bio(mdev, &m);
b411b363
PR
226}
227
99920dc5 228int w_read_retry_remote(struct drbd_work *w, int cancel)
b411b363
PR
229{
230 struct drbd_request *req = container_of(w, struct drbd_request, w);
00d56944 231 struct drbd_conf *mdev = w->mdev;
b411b363
PR
232
233 /* We should not detach for read io-error,
234 * but try to WRITE the P_DATA_REPLY to the failed location,
235 * to give the disk the chance to relocate that block */
236
87eeee41 237 spin_lock_irq(&mdev->tconn->req_lock);
d255e5ff 238 if (cancel || mdev->state.pdsk != D_UP_TO_DATE) {
8554df1c 239 _req_mod(req, READ_RETRY_REMOTE_CANCELED);
87eeee41 240 spin_unlock_irq(&mdev->tconn->req_lock);
99920dc5 241 return 0;
b411b363 242 }
87eeee41 243 spin_unlock_irq(&mdev->tconn->req_lock);
b411b363 244
00d56944 245 return w_send_read_req(w, 0);
b411b363
PR
246}
247
f6ffca9f 248void drbd_csum_ee(struct drbd_conf *mdev, struct crypto_hash *tfm,
db830c46 249 struct drbd_peer_request *peer_req, void *digest)
45bb912b
LE
250{
251 struct hash_desc desc;
252 struct scatterlist sg;
db830c46 253 struct page *page = peer_req->pages;
45bb912b
LE
254 struct page *tmp;
255 unsigned len;
256
257 desc.tfm = tfm;
258 desc.flags = 0;
259
260 sg_init_table(&sg, 1);
261 crypto_hash_init(&desc);
262
263 while ((tmp = page_chain_next(page))) {
264 /* all but the last page will be fully used */
265 sg_set_page(&sg, page, PAGE_SIZE, 0);
266 crypto_hash_update(&desc, &sg, sg.length);
267 page = tmp;
268 }
269 /* and now the last, possibly only partially used page */
db830c46 270 len = peer_req->i.size & (PAGE_SIZE - 1);
45bb912b
LE
271 sg_set_page(&sg, page, len ?: PAGE_SIZE, 0);
272 crypto_hash_update(&desc, &sg, sg.length);
273 crypto_hash_final(&desc, digest);
274}
275
276void drbd_csum_bio(struct drbd_conf *mdev, struct crypto_hash *tfm, struct bio *bio, void *digest)
b411b363
PR
277{
278 struct hash_desc desc;
279 struct scatterlist sg;
280 struct bio_vec *bvec;
281 int i;
282
283 desc.tfm = tfm;
284 desc.flags = 0;
285
286 sg_init_table(&sg, 1);
287 crypto_hash_init(&desc);
288
289 __bio_for_each_segment(bvec, bio, i, 0) {
290 sg_set_page(&sg, bvec->bv_page, bvec->bv_len, bvec->bv_offset);
291 crypto_hash_update(&desc, &sg, sg.length);
292 }
293 crypto_hash_final(&desc, digest);
294}
295
9676c760 296/* MAYBE merge common code with w_e_end_ov_req */
99920dc5 297static int w_e_send_csum(struct drbd_work *w, int cancel)
b411b363 298{
00d56944
PR
299 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
300 struct drbd_conf *mdev = w->mdev;
b411b363
PR
301 int digest_size;
302 void *digest;
99920dc5 303 int err = 0;
b411b363 304
53ea4331
LE
305 if (unlikely(cancel))
306 goto out;
b411b363 307
9676c760 308 if (unlikely((peer_req->flags & EE_WAS_ERROR) != 0))
53ea4331 309 goto out;
b411b363 310
f399002e 311 digest_size = crypto_hash_digestsize(mdev->tconn->csums_tfm);
53ea4331
LE
312 digest = kmalloc(digest_size, GFP_NOIO);
313 if (digest) {
db830c46
AG
314 sector_t sector = peer_req->i.sector;
315 unsigned int size = peer_req->i.size;
f399002e 316 drbd_csum_ee(mdev, mdev->tconn->csums_tfm, peer_req, digest);
9676c760 317 /* Free peer_req and pages before send.
53ea4331
LE
318 * In case we block on congestion, we could otherwise run into
319 * some distributed deadlock, if the other side blocks on
320 * congestion as well, because our receiver blocks in
c37c8ecf 321 * drbd_alloc_pages due to pp_in_use > max_buffers. */
3967deb1 322 drbd_free_peer_req(mdev, peer_req);
db830c46 323 peer_req = NULL;
53ea4331 324 inc_rs_pending(mdev);
99920dc5 325 err = drbd_send_drequest_csum(mdev, sector, size,
db1b0b72
AG
326 digest, digest_size,
327 P_CSUM_RS_REQUEST);
53ea4331
LE
328 kfree(digest);
329 } else {
330 dev_err(DEV, "kmalloc() of digest failed.\n");
99920dc5 331 err = -ENOMEM;
53ea4331 332 }
b411b363 333
53ea4331 334out:
db830c46 335 if (peer_req)
3967deb1 336 drbd_free_peer_req(mdev, peer_req);
b411b363 337
99920dc5 338 if (unlikely(err))
b411b363 339 dev_err(DEV, "drbd_send_drequest(..., csum) failed\n");
99920dc5 340 return err;
b411b363
PR
341}
342
343#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
344
345static int read_for_csum(struct drbd_conf *mdev, sector_t sector, int size)
346{
db830c46 347 struct drbd_peer_request *peer_req;
b411b363
PR
348
349 if (!get_ldev(mdev))
80a40e43 350 return -EIO;
b411b363 351
e3555d85 352 if (drbd_rs_should_slow_down(mdev, sector))
0f0601f4
LE
353 goto defer;
354
b411b363
PR
355 /* GFP_TRY, because if there is no memory available right now, this may
356 * be rescheduled for later. It is "only" background resync, after all. */
0db55363
AG
357 peer_req = drbd_alloc_peer_req(mdev, ID_SYNCER /* unused */, sector,
358 size, GFP_TRY);
db830c46 359 if (!peer_req)
80a40e43 360 goto defer;
b411b363 361
db830c46 362 peer_req->w.cb = w_e_send_csum;
87eeee41 363 spin_lock_irq(&mdev->tconn->req_lock);
db830c46 364 list_add(&peer_req->w.list, &mdev->read_ee);
87eeee41 365 spin_unlock_irq(&mdev->tconn->req_lock);
b411b363 366
0f0601f4 367 atomic_add(size >> 9, &mdev->rs_sect_ev);
fbe29dec 368 if (drbd_submit_peer_request(mdev, peer_req, READ, DRBD_FAULT_RS_RD) == 0)
80a40e43 369 return 0;
b411b363 370
10f6d992
LE
371 /* If it failed because of ENOMEM, retry should help. If it failed
372 * because bio_add_page failed (probably broken lower level driver),
373 * retry may or may not help.
374 * If it does not, you may need to force disconnect. */
87eeee41 375 spin_lock_irq(&mdev->tconn->req_lock);
db830c46 376 list_del(&peer_req->w.list);
87eeee41 377 spin_unlock_irq(&mdev->tconn->req_lock);
22cc37a9 378
3967deb1 379 drbd_free_peer_req(mdev, peer_req);
80a40e43 380defer:
45bb912b 381 put_ldev(mdev);
80a40e43 382 return -EAGAIN;
b411b363
PR
383}
384
99920dc5 385int w_resync_timer(struct drbd_work *w, int cancel)
b411b363 386{
00d56944 387 struct drbd_conf *mdev = w->mdev;
63106d3c
PR
388 switch (mdev->state.conn) {
389 case C_VERIFY_S:
00d56944 390 w_make_ov_request(w, cancel);
63106d3c
PR
391 break;
392 case C_SYNC_TARGET:
00d56944 393 w_make_resync_request(w, cancel);
63106d3c 394 break;
b411b363
PR
395 }
396
99920dc5 397 return 0;
794abb75
PR
398}
399
400void resync_timer_fn(unsigned long data)
401{
402 struct drbd_conf *mdev = (struct drbd_conf *) data;
403
404 if (list_empty(&mdev->resync_work.list))
e42325a5 405 drbd_queue_work(&mdev->tconn->data.work, &mdev->resync_work);
b411b363
PR
406}
407
778f271d
PR
408static void fifo_set(struct fifo_buffer *fb, int value)
409{
410 int i;
411
412 for (i = 0; i < fb->size; i++)
f10f2623 413 fb->values[i] = value;
778f271d
PR
414}
415
416static int fifo_push(struct fifo_buffer *fb, int value)
417{
418 int ov;
419
420 ov = fb->values[fb->head_index];
421 fb->values[fb->head_index++] = value;
422
423 if (fb->head_index >= fb->size)
424 fb->head_index = 0;
425
426 return ov;
427}
428
429static void fifo_add_val(struct fifo_buffer *fb, int value)
430{
431 int i;
432
433 for (i = 0; i < fb->size; i++)
434 fb->values[i] += value;
435}
436
9958c857
PR
437struct fifo_buffer *fifo_alloc(int fifo_size)
438{
439 struct fifo_buffer *fb;
440
441 fb = kzalloc(sizeof(struct fifo_buffer) + sizeof(int) * fifo_size, GFP_KERNEL);
442 if (!fb)
443 return NULL;
444
445 fb->head_index = 0;
446 fb->size = fifo_size;
447 fb->total = 0;
448
449 return fb;
450}
451
9d77a5fe 452static int drbd_rs_controller(struct drbd_conf *mdev)
778f271d 453{
daeda1cc 454 struct disk_conf *dc;
778f271d
PR
455 unsigned int sect_in; /* Number of sectors that came in since the last turn */
456 unsigned int want; /* The number of sectors we want in the proxy */
457 int req_sect; /* Number of sectors to request in this turn */
458 int correction; /* Number of sectors more we need in the proxy*/
459 int cps; /* correction per invocation of drbd_rs_controller() */
460 int steps; /* Number of time steps to plan ahead */
461 int curr_corr;
462 int max_sect;
463
464 sect_in = atomic_xchg(&mdev->rs_sect_in, 0); /* Number of sectors that came in */
465 mdev->rs_in_flight -= sect_in;
466
467 spin_lock(&mdev->peer_seq_lock); /* get an atomic view on mdev->rs_plan_s */
daeda1cc
PR
468 rcu_read_lock();
469 dc = rcu_dereference(mdev->ldev->disk_conf);
778f271d 470
9958c857 471 steps = mdev->rs_plan_s->size; /* (dc->c_plan_ahead * 10 * SLEEP_TIME) / HZ; */
778f271d
PR
472
473 if (mdev->rs_in_flight + sect_in == 0) { /* At start of resync */
daeda1cc 474 want = ((dc->resync_rate * 2 * SLEEP_TIME) / HZ) * steps;
778f271d 475 } else { /* normal path */
daeda1cc
PR
476 want = dc->c_fill_target ? dc->c_fill_target :
477 sect_in * dc->c_delay_target * HZ / (SLEEP_TIME * 10);
778f271d
PR
478 }
479
9958c857 480 correction = want - mdev->rs_in_flight - mdev->rs_plan_s->total;
778f271d
PR
481
482 /* Plan ahead */
483 cps = correction / steps;
9958c857
PR
484 fifo_add_val(mdev->rs_plan_s, cps);
485 mdev->rs_plan_s->total += cps * steps;
778f271d
PR
486
487 /* What we do in this step */
9958c857
PR
488 curr_corr = fifo_push(mdev->rs_plan_s, 0);
489 mdev->rs_plan_s->total -= curr_corr;
778f271d
PR
490
491 req_sect = sect_in + curr_corr;
492 if (req_sect < 0)
493 req_sect = 0;
494
daeda1cc 495 max_sect = (dc->c_max_rate * 2 * SLEEP_TIME) / HZ;
778f271d
PR
496 if (req_sect > max_sect)
497 req_sect = max_sect;
498
499 /*
500 dev_warn(DEV, "si=%u if=%d wa=%u co=%d st=%d cps=%d pl=%d cc=%d rs=%d\n",
501 sect_in, mdev->rs_in_flight, want, correction,
502 steps, cps, mdev->rs_planed, curr_corr, req_sect);
503 */
daeda1cc
PR
504 rcu_read_unlock();
505 spin_unlock(&mdev->peer_seq_lock);
778f271d
PR
506
507 return req_sect;
508}
509
9d77a5fe 510static int drbd_rs_number_requests(struct drbd_conf *mdev)
e65f440d
LE
511{
512 int number;
9958c857 513 if (mdev->rs_plan_s->size) { /* rcu_dereference(mdev->ldev->disk_conf)->c_plan_ahead */
e65f440d
LE
514 number = drbd_rs_controller(mdev) >> (BM_BLOCK_SHIFT - 9);
515 mdev->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME;
516 } else {
daeda1cc
PR
517 rcu_read_lock();
518 mdev->c_sync_rate = rcu_dereference(mdev->ldev->disk_conf)->resync_rate;
519 rcu_read_unlock();
e65f440d
LE
520 number = SLEEP_TIME * mdev->c_sync_rate / ((BM_BLOCK_SIZE / 1024) * HZ);
521 }
522
e65f440d
LE
523 /* ignore the amount of pending requests, the resync controller should
524 * throttle down to incoming reply rate soon enough anyways. */
525 return number;
526}
527
99920dc5 528int w_make_resync_request(struct drbd_work *w, int cancel)
b411b363 529{
00d56944 530 struct drbd_conf *mdev = w->mdev;
b411b363
PR
531 unsigned long bit;
532 sector_t sector;
533 const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
1816a2b4 534 int max_bio_size;
e65f440d 535 int number, rollback_i, size;
b411b363 536 int align, queued, sndbuf;
0f0601f4 537 int i = 0;
b411b363
PR
538
539 if (unlikely(cancel))
99920dc5 540 return 0;
b411b363 541
af85e8e8
LE
542 if (mdev->rs_total == 0) {
543 /* empty resync? */
544 drbd_resync_finished(mdev);
99920dc5 545 return 0;
af85e8e8
LE
546 }
547
b411b363
PR
548 if (!get_ldev(mdev)) {
549 /* Since we only need to access mdev->rsync a
550 get_ldev_if_state(mdev,D_FAILED) would be sufficient, but
551 to continue resync with a broken disk makes no sense at
552 all */
553 dev_err(DEV, "Disk broke down during resync!\n");
99920dc5 554 return 0;
b411b363
PR
555 }
556
0cfdd247 557 max_bio_size = queue_max_hw_sectors(mdev->rq_queue) << 9;
e65f440d
LE
558 number = drbd_rs_number_requests(mdev);
559 if (number == 0)
0f0601f4 560 goto requeue;
b411b363 561
b411b363
PR
562 for (i = 0; i < number; i++) {
563 /* Stop generating RS requests, when half of the send buffer is filled */
e42325a5
PR
564 mutex_lock(&mdev->tconn->data.mutex);
565 if (mdev->tconn->data.socket) {
566 queued = mdev->tconn->data.socket->sk->sk_wmem_queued;
567 sndbuf = mdev->tconn->data.socket->sk->sk_sndbuf;
b411b363
PR
568 } else {
569 queued = 1;
570 sndbuf = 0;
571 }
e42325a5 572 mutex_unlock(&mdev->tconn->data.mutex);
b411b363
PR
573 if (queued > sndbuf / 2)
574 goto requeue;
575
576next_sector:
577 size = BM_BLOCK_SIZE;
578 bit = drbd_bm_find_next(mdev, mdev->bm_resync_fo);
579
4b0715f0 580 if (bit == DRBD_END_OF_BITMAP) {
b411b363 581 mdev->bm_resync_fo = drbd_bm_bits(mdev);
b411b363 582 put_ldev(mdev);
99920dc5 583 return 0;
b411b363
PR
584 }
585
586 sector = BM_BIT_TO_SECT(bit);
587
e3555d85
PR
588 if (drbd_rs_should_slow_down(mdev, sector) ||
589 drbd_try_rs_begin_io(mdev, sector)) {
b411b363
PR
590 mdev->bm_resync_fo = bit;
591 goto requeue;
592 }
593 mdev->bm_resync_fo = bit + 1;
594
595 if (unlikely(drbd_bm_test_bit(mdev, bit) == 0)) {
596 drbd_rs_complete_io(mdev, sector);
597 goto next_sector;
598 }
599
1816a2b4 600#if DRBD_MAX_BIO_SIZE > BM_BLOCK_SIZE
b411b363
PR
601 /* try to find some adjacent bits.
602 * we stop if we have already the maximum req size.
603 *
604 * Additionally always align bigger requests, in order to
605 * be prepared for all stripe sizes of software RAIDs.
b411b363
PR
606 */
607 align = 1;
d207450c 608 rollback_i = i;
b411b363 609 for (;;) {
1816a2b4 610 if (size + BM_BLOCK_SIZE > max_bio_size)
b411b363
PR
611 break;
612
613 /* Be always aligned */
614 if (sector & ((1<<(align+3))-1))
615 break;
616
617 /* do not cross extent boundaries */
618 if (((bit+1) & BM_BLOCKS_PER_BM_EXT_MASK) == 0)
619 break;
620 /* now, is it actually dirty, after all?
621 * caution, drbd_bm_test_bit is tri-state for some
622 * obscure reason; ( b == 0 ) would get the out-of-band
623 * only accidentally right because of the "oddly sized"
624 * adjustment below */
625 if (drbd_bm_test_bit(mdev, bit+1) != 1)
626 break;
627 bit++;
628 size += BM_BLOCK_SIZE;
629 if ((BM_BLOCK_SIZE << align) <= size)
630 align++;
631 i++;
632 }
633 /* if we merged some,
634 * reset the offset to start the next drbd_bm_find_next from */
635 if (size > BM_BLOCK_SIZE)
636 mdev->bm_resync_fo = bit + 1;
637#endif
638
639 /* adjust very last sectors, in case we are oddly sized */
640 if (sector + (size>>9) > capacity)
641 size = (capacity-sector)<<9;
f399002e 642 if (mdev->tconn->agreed_pro_version >= 89 && mdev->tconn->csums_tfm) {
b411b363 643 switch (read_for_csum(mdev, sector, size)) {
80a40e43 644 case -EIO: /* Disk failure */
b411b363 645 put_ldev(mdev);
99920dc5 646 return -EIO;
80a40e43 647 case -EAGAIN: /* allocation failed, or ldev busy */
b411b363
PR
648 drbd_rs_complete_io(mdev, sector);
649 mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
d207450c 650 i = rollback_i;
b411b363 651 goto requeue;
80a40e43
LE
652 case 0:
653 /* everything ok */
654 break;
655 default:
656 BUG();
b411b363
PR
657 }
658 } else {
99920dc5
AG
659 int err;
660
b411b363 661 inc_rs_pending(mdev);
99920dc5
AG
662 err = drbd_send_drequest(mdev, P_RS_DATA_REQUEST,
663 sector, size, ID_SYNCER);
664 if (err) {
b411b363
PR
665 dev_err(DEV, "drbd_send_drequest() failed, aborting...\n");
666 dec_rs_pending(mdev);
667 put_ldev(mdev);
99920dc5 668 return err;
b411b363
PR
669 }
670 }
671 }
672
673 if (mdev->bm_resync_fo >= drbd_bm_bits(mdev)) {
674 /* last syncer _request_ was sent,
675 * but the P_RS_DATA_REPLY not yet received. sync will end (and
676 * next sync group will resume), as soon as we receive the last
677 * resync data block, and the last bit is cleared.
678 * until then resync "work" is "inactive" ...
679 */
b411b363 680 put_ldev(mdev);
99920dc5 681 return 0;
b411b363
PR
682 }
683
684 requeue:
778f271d 685 mdev->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
b411b363
PR
686 mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME);
687 put_ldev(mdev);
99920dc5 688 return 0;
b411b363
PR
689}
690
00d56944 691static int w_make_ov_request(struct drbd_work *w, int cancel)
b411b363 692{
00d56944 693 struct drbd_conf *mdev = w->mdev;
b411b363
PR
694 int number, i, size;
695 sector_t sector;
696 const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
697
698 if (unlikely(cancel))
699 return 1;
700
2649f080 701 number = drbd_rs_number_requests(mdev);
b411b363
PR
702
703 sector = mdev->ov_position;
704 for (i = 0; i < number; i++) {
705 if (sector >= capacity) {
b411b363
PR
706 return 1;
707 }
708
709 size = BM_BLOCK_SIZE;
710
e3555d85
PR
711 if (drbd_rs_should_slow_down(mdev, sector) ||
712 drbd_try_rs_begin_io(mdev, sector)) {
b411b363
PR
713 mdev->ov_position = sector;
714 goto requeue;
715 }
716
717 if (sector + (size>>9) > capacity)
718 size = (capacity-sector)<<9;
719
720 inc_rs_pending(mdev);
5b9f499c 721 if (drbd_send_ov_request(mdev, sector, size)) {
b411b363
PR
722 dec_rs_pending(mdev);
723 return 0;
724 }
725 sector += BM_SECT_PER_BIT;
726 }
727 mdev->ov_position = sector;
728
729 requeue:
2649f080 730 mdev->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
b411b363
PR
731 mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME);
732 return 1;
733}
734
99920dc5 735int w_ov_finished(struct drbd_work *w, int cancel)
b411b363 736{
00d56944 737 struct drbd_conf *mdev = w->mdev;
b411b363 738 kfree(w);
8f7bed77 739 ov_out_of_sync_print(mdev);
b411b363
PR
740 drbd_resync_finished(mdev);
741
99920dc5 742 return 0;
b411b363
PR
743}
744
99920dc5 745static int w_resync_finished(struct drbd_work *w, int cancel)
b411b363 746{
00d56944 747 struct drbd_conf *mdev = w->mdev;
b411b363
PR
748 kfree(w);
749
750 drbd_resync_finished(mdev);
751
99920dc5 752 return 0;
b411b363
PR
753}
754
af85e8e8
LE
755static void ping_peer(struct drbd_conf *mdev)
756{
2a67d8b9
PR
757 struct drbd_tconn *tconn = mdev->tconn;
758
759 clear_bit(GOT_PING_ACK, &tconn->flags);
760 request_ping(tconn);
761 wait_event(tconn->ping_wait,
762 test_bit(GOT_PING_ACK, &tconn->flags) || mdev->state.conn < C_CONNECTED);
af85e8e8
LE
763}
764
b411b363
PR
765int drbd_resync_finished(struct drbd_conf *mdev)
766{
767 unsigned long db, dt, dbdt;
768 unsigned long n_oos;
769 union drbd_state os, ns;
770 struct drbd_work *w;
771 char *khelper_cmd = NULL;
26525618 772 int verify_done = 0;
b411b363
PR
773
774 /* Remove all elements from the resync LRU. Since future actions
775 * might set bits in the (main) bitmap, then the entries in the
776 * resync LRU would be wrong. */
777 if (drbd_rs_del_all(mdev)) {
778 /* In case this is not possible now, most probably because
779 * there are P_RS_DATA_REPLY Packets lingering on the worker's
780 * queue (or even the read operations for those packets
781 * is not finished by now). Retry in 100ms. */
782
20ee6390 783 schedule_timeout_interruptible(HZ / 10);
b411b363
PR
784 w = kmalloc(sizeof(struct drbd_work), GFP_ATOMIC);
785 if (w) {
786 w->cb = w_resync_finished;
e42325a5 787 drbd_queue_work(&mdev->tconn->data.work, w);
b411b363
PR
788 return 1;
789 }
790 dev_err(DEV, "Warn failed to drbd_rs_del_all() and to kmalloc(w).\n");
791 }
792
793 dt = (jiffies - mdev->rs_start - mdev->rs_paused) / HZ;
794 if (dt <= 0)
795 dt = 1;
796 db = mdev->rs_total;
797 dbdt = Bit2KB(db/dt);
798 mdev->rs_paused /= HZ;
799
800 if (!get_ldev(mdev))
801 goto out;
802
af85e8e8
LE
803 ping_peer(mdev);
804
87eeee41 805 spin_lock_irq(&mdev->tconn->req_lock);
78bae59b 806 os = drbd_read_state(mdev);
b411b363 807
26525618
LE
808 verify_done = (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T);
809
b411b363
PR
810 /* This protects us against multiple calls (that can happen in the presence
811 of application IO), and against connectivity loss just before we arrive here. */
812 if (os.conn <= C_CONNECTED)
813 goto out_unlock;
814
815 ns = os;
816 ns.conn = C_CONNECTED;
817
818 dev_info(DEV, "%s done (total %lu sec; paused %lu sec; %lu K/sec)\n",
26525618 819 verify_done ? "Online verify " : "Resync",
b411b363
PR
820 dt + mdev->rs_paused, mdev->rs_paused, dbdt);
821
822 n_oos = drbd_bm_total_weight(mdev);
823
824 if (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) {
825 if (n_oos) {
826 dev_alert(DEV, "Online verify found %lu %dk block out of sync!\n",
827 n_oos, Bit2KB(1));
828 khelper_cmd = "out-of-sync";
829 }
830 } else {
831 D_ASSERT((n_oos - mdev->rs_failed) == 0);
832
833 if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T)
834 khelper_cmd = "after-resync-target";
835
f399002e 836 if (mdev->tconn->csums_tfm && mdev->rs_total) {
b411b363
PR
837 const unsigned long s = mdev->rs_same_csum;
838 const unsigned long t = mdev->rs_total;
839 const int ratio =
840 (t == 0) ? 0 :
841 (t < 100000) ? ((s*100)/t) : (s/(t/100));
24c4830c 842 dev_info(DEV, "%u %% had equal checksums, eliminated: %luK; "
b411b363
PR
843 "transferred %luK total %luK\n",
844 ratio,
845 Bit2KB(mdev->rs_same_csum),
846 Bit2KB(mdev->rs_total - mdev->rs_same_csum),
847 Bit2KB(mdev->rs_total));
848 }
849 }
850
851 if (mdev->rs_failed) {
852 dev_info(DEV, " %lu failed blocks\n", mdev->rs_failed);
853
854 if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) {
855 ns.disk = D_INCONSISTENT;
856 ns.pdsk = D_UP_TO_DATE;
857 } else {
858 ns.disk = D_UP_TO_DATE;
859 ns.pdsk = D_INCONSISTENT;
860 }
861 } else {
862 ns.disk = D_UP_TO_DATE;
863 ns.pdsk = D_UP_TO_DATE;
864
865 if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) {
866 if (mdev->p_uuid) {
867 int i;
868 for (i = UI_BITMAP ; i <= UI_HISTORY_END ; i++)
869 _drbd_uuid_set(mdev, i, mdev->p_uuid[i]);
870 drbd_uuid_set(mdev, UI_BITMAP, mdev->ldev->md.uuid[UI_CURRENT]);
871 _drbd_uuid_set(mdev, UI_CURRENT, mdev->p_uuid[UI_CURRENT]);
872 } else {
873 dev_err(DEV, "mdev->p_uuid is NULL! BUG\n");
874 }
875 }
876
62b0da3a
LE
877 if (!(os.conn == C_VERIFY_S || os.conn == C_VERIFY_T)) {
878 /* for verify runs, we don't update uuids here,
879 * so there would be nothing to report. */
880 drbd_uuid_set_bm(mdev, 0UL);
881 drbd_print_uuids(mdev, "updated UUIDs");
882 if (mdev->p_uuid) {
883 /* Now the two UUID sets are equal, update what we
884 * know of the peer. */
885 int i;
886 for (i = UI_CURRENT ; i <= UI_HISTORY_END ; i++)
887 mdev->p_uuid[i] = mdev->ldev->md.uuid[i];
888 }
b411b363
PR
889 }
890 }
891
892 _drbd_set_state(mdev, ns, CS_VERBOSE, NULL);
893out_unlock:
87eeee41 894 spin_unlock_irq(&mdev->tconn->req_lock);
b411b363
PR
895 put_ldev(mdev);
896out:
897 mdev->rs_total = 0;
898 mdev->rs_failed = 0;
899 mdev->rs_paused = 0;
26525618
LE
900 if (verify_done)
901 mdev->ov_start_sector = 0;
b411b363 902
13d42685
LE
903 drbd_md_sync(mdev);
904
b411b363
PR
905 if (khelper_cmd)
906 drbd_khelper(mdev, khelper_cmd);
907
908 return 1;
909}
910
911/* helper */
db830c46 912static void move_to_net_ee_or_free(struct drbd_conf *mdev, struct drbd_peer_request *peer_req)
b411b363 913{
045417f7 914 if (drbd_peer_req_has_active_page(peer_req)) {
b411b363 915 /* This might happen if sendpage() has not finished */
db830c46 916 int i = (peer_req->i.size + PAGE_SIZE -1) >> PAGE_SHIFT;
435f0740
LE
917 atomic_add(i, &mdev->pp_in_use_by_net);
918 atomic_sub(i, &mdev->pp_in_use);
87eeee41 919 spin_lock_irq(&mdev->tconn->req_lock);
db830c46 920 list_add_tail(&peer_req->w.list, &mdev->net_ee);
87eeee41 921 spin_unlock_irq(&mdev->tconn->req_lock);
435f0740 922 wake_up(&drbd_pp_wait);
b411b363 923 } else
3967deb1 924 drbd_free_peer_req(mdev, peer_req);
b411b363
PR
925}
926
927/**
928 * w_e_end_data_req() - Worker callback, to send a P_DATA_REPLY packet in response to a P_DATA_REQUEST
929 * @mdev: DRBD device.
930 * @w: work object.
931 * @cancel: The connection will be closed anyways
932 */
99920dc5 933int w_e_end_data_req(struct drbd_work *w, int cancel)
b411b363 934{
db830c46 935 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
00d56944 936 struct drbd_conf *mdev = w->mdev;
99920dc5 937 int err;
b411b363
PR
938
939 if (unlikely(cancel)) {
3967deb1 940 drbd_free_peer_req(mdev, peer_req);
b411b363 941 dec_unacked(mdev);
99920dc5 942 return 0;
b411b363
PR
943 }
944
db830c46 945 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
99920dc5 946 err = drbd_send_block(mdev, P_DATA_REPLY, peer_req);
b411b363
PR
947 } else {
948 if (__ratelimit(&drbd_ratelimit_state))
949 dev_err(DEV, "Sending NegDReply. sector=%llus.\n",
db830c46 950 (unsigned long long)peer_req->i.sector);
b411b363 951
99920dc5 952 err = drbd_send_ack(mdev, P_NEG_DREPLY, peer_req);
b411b363
PR
953 }
954
955 dec_unacked(mdev);
956
db830c46 957 move_to_net_ee_or_free(mdev, peer_req);
b411b363 958
99920dc5 959 if (unlikely(err))
b411b363 960 dev_err(DEV, "drbd_send_block() failed\n");
99920dc5 961 return err;
b411b363
PR
962}
963
964/**
965 * w_e_end_rsdata_req() - Worker callback to send a P_RS_DATA_REPLY packet in response to a P_RS_DATA_REQUESTRS
966 * @mdev: DRBD device.
967 * @w: work object.
968 * @cancel: The connection will be closed anyways
969 */
99920dc5 970int w_e_end_rsdata_req(struct drbd_work *w, int cancel)
b411b363 971{
db830c46 972 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
00d56944 973 struct drbd_conf *mdev = w->mdev;
99920dc5 974 int err;
b411b363
PR
975
976 if (unlikely(cancel)) {
3967deb1 977 drbd_free_peer_req(mdev, peer_req);
b411b363 978 dec_unacked(mdev);
99920dc5 979 return 0;
b411b363
PR
980 }
981
982 if (get_ldev_if_state(mdev, D_FAILED)) {
db830c46 983 drbd_rs_complete_io(mdev, peer_req->i.sector);
b411b363
PR
984 put_ldev(mdev);
985 }
986
d612d309 987 if (mdev->state.conn == C_AHEAD) {
99920dc5 988 err = drbd_send_ack(mdev, P_RS_CANCEL, peer_req);
db830c46 989 } else if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
b411b363
PR
990 if (likely(mdev->state.pdsk >= D_INCONSISTENT)) {
991 inc_rs_pending(mdev);
99920dc5 992 err = drbd_send_block(mdev, P_RS_DATA_REPLY, peer_req);
b411b363
PR
993 } else {
994 if (__ratelimit(&drbd_ratelimit_state))
995 dev_err(DEV, "Not sending RSDataReply, "
996 "partner DISKLESS!\n");
99920dc5 997 err = 0;
b411b363
PR
998 }
999 } else {
1000 if (__ratelimit(&drbd_ratelimit_state))
1001 dev_err(DEV, "Sending NegRSDReply. sector %llus.\n",
db830c46 1002 (unsigned long long)peer_req->i.sector);
b411b363 1003
99920dc5 1004 err = drbd_send_ack(mdev, P_NEG_RS_DREPLY, peer_req);
b411b363
PR
1005
1006 /* update resync data with failure */
db830c46 1007 drbd_rs_failed_io(mdev, peer_req->i.sector, peer_req->i.size);
b411b363
PR
1008 }
1009
1010 dec_unacked(mdev);
1011
db830c46 1012 move_to_net_ee_or_free(mdev, peer_req);
b411b363 1013
99920dc5 1014 if (unlikely(err))
b411b363 1015 dev_err(DEV, "drbd_send_block() failed\n");
99920dc5 1016 return err;
b411b363
PR
1017}
1018
99920dc5 1019int w_e_end_csum_rs_req(struct drbd_work *w, int cancel)
b411b363 1020{
db830c46 1021 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
00d56944 1022 struct drbd_conf *mdev = w->mdev;
b411b363
PR
1023 struct digest_info *di;
1024 int digest_size;
1025 void *digest = NULL;
99920dc5 1026 int err, eq = 0;
b411b363
PR
1027
1028 if (unlikely(cancel)) {
3967deb1 1029 drbd_free_peer_req(mdev, peer_req);
b411b363 1030 dec_unacked(mdev);
99920dc5 1031 return 0;
b411b363
PR
1032 }
1033
1d53f09e 1034 if (get_ldev(mdev)) {
db830c46 1035 drbd_rs_complete_io(mdev, peer_req->i.sector);
1d53f09e
LE
1036 put_ldev(mdev);
1037 }
b411b363 1038
db830c46 1039 di = peer_req->digest;
b411b363 1040
db830c46 1041 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
b411b363
PR
1042 /* quick hack to try to avoid a race against reconfiguration.
1043 * a real fix would be much more involved,
1044 * introducing more locking mechanisms */
f399002e
LE
1045 if (mdev->tconn->csums_tfm) {
1046 digest_size = crypto_hash_digestsize(mdev->tconn->csums_tfm);
b411b363
PR
1047 D_ASSERT(digest_size == di->digest_size);
1048 digest = kmalloc(digest_size, GFP_NOIO);
1049 }
1050 if (digest) {
f399002e 1051 drbd_csum_ee(mdev, mdev->tconn->csums_tfm, peer_req, digest);
b411b363
PR
1052 eq = !memcmp(digest, di->digest, digest_size);
1053 kfree(digest);
1054 }
1055
1056 if (eq) {
db830c46 1057 drbd_set_in_sync(mdev, peer_req->i.sector, peer_req->i.size);
676396d5 1058 /* rs_same_csums unit is BM_BLOCK_SIZE */
db830c46 1059 mdev->rs_same_csum += peer_req->i.size >> BM_BLOCK_SHIFT;
99920dc5 1060 err = drbd_send_ack(mdev, P_RS_IS_IN_SYNC, peer_req);
b411b363
PR
1061 } else {
1062 inc_rs_pending(mdev);
db830c46
AG
1063 peer_req->block_id = ID_SYNCER; /* By setting block_id, digest pointer becomes invalid! */
1064 peer_req->flags &= ~EE_HAS_DIGEST; /* This peer request no longer has a digest pointer */
204bba99 1065 kfree(di);
99920dc5 1066 err = drbd_send_block(mdev, P_RS_DATA_REPLY, peer_req);
b411b363
PR
1067 }
1068 } else {
99920dc5 1069 err = drbd_send_ack(mdev, P_NEG_RS_DREPLY, peer_req);
b411b363
PR
1070 if (__ratelimit(&drbd_ratelimit_state))
1071 dev_err(DEV, "Sending NegDReply. I guess it gets messy.\n");
1072 }
1073
1074 dec_unacked(mdev);
db830c46 1075 move_to_net_ee_or_free(mdev, peer_req);
b411b363 1076
99920dc5 1077 if (unlikely(err))
b411b363 1078 dev_err(DEV, "drbd_send_block/ack() failed\n");
99920dc5 1079 return err;
b411b363
PR
1080}
1081
99920dc5 1082int w_e_end_ov_req(struct drbd_work *w, int cancel)
b411b363 1083{
db830c46 1084 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
00d56944 1085 struct drbd_conf *mdev = w->mdev;
db830c46
AG
1086 sector_t sector = peer_req->i.sector;
1087 unsigned int size = peer_req->i.size;
b411b363
PR
1088 int digest_size;
1089 void *digest;
99920dc5 1090 int err = 0;
b411b363
PR
1091
1092 if (unlikely(cancel))
1093 goto out;
1094
f399002e 1095 digest_size = crypto_hash_digestsize(mdev->tconn->verify_tfm);
b411b363 1096 digest = kmalloc(digest_size, GFP_NOIO);
8f21420e 1097 if (!digest) {
99920dc5 1098 err = 1; /* terminate the connection in case the allocation failed */
8f21420e 1099 goto out;
b411b363
PR
1100 }
1101
db830c46 1102 if (likely(!(peer_req->flags & EE_WAS_ERROR)))
f399002e 1103 drbd_csum_ee(mdev, mdev->tconn->verify_tfm, peer_req, digest);
8f21420e
PR
1104 else
1105 memset(digest, 0, digest_size);
1106
53ea4331
LE
1107 /* Free e and pages before send.
1108 * In case we block on congestion, we could otherwise run into
1109 * some distributed deadlock, if the other side blocks on
1110 * congestion as well, because our receiver blocks in
c37c8ecf 1111 * drbd_alloc_pages due to pp_in_use > max_buffers. */
3967deb1 1112 drbd_free_peer_req(mdev, peer_req);
db830c46 1113 peer_req = NULL;
8f21420e 1114 inc_rs_pending(mdev);
99920dc5
AG
1115 err = drbd_send_drequest_csum(mdev, sector, size, digest, digest_size, P_OV_REPLY);
1116 if (err)
8f21420e
PR
1117 dec_rs_pending(mdev);
1118 kfree(digest);
1119
b411b363 1120out:
db830c46 1121 if (peer_req)
3967deb1 1122 drbd_free_peer_req(mdev, peer_req);
b411b363 1123 dec_unacked(mdev);
99920dc5 1124 return err;
b411b363
PR
1125}
1126
8f7bed77 1127void drbd_ov_out_of_sync_found(struct drbd_conf *mdev, sector_t sector, int size)
b411b363
PR
1128{
1129 if (mdev->ov_last_oos_start + mdev->ov_last_oos_size == sector) {
1130 mdev->ov_last_oos_size += size>>9;
1131 } else {
1132 mdev->ov_last_oos_start = sector;
1133 mdev->ov_last_oos_size = size>>9;
1134 }
1135 drbd_set_out_of_sync(mdev, sector, size);
b411b363
PR
1136}
1137
99920dc5 1138int w_e_end_ov_reply(struct drbd_work *w, int cancel)
b411b363 1139{
db830c46 1140 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
00d56944 1141 struct drbd_conf *mdev = w->mdev;
b411b363 1142 struct digest_info *di;
b411b363 1143 void *digest;
db830c46
AG
1144 sector_t sector = peer_req->i.sector;
1145 unsigned int size = peer_req->i.size;
53ea4331 1146 int digest_size;
99920dc5 1147 int err, eq = 0;
b411b363
PR
1148
1149 if (unlikely(cancel)) {
3967deb1 1150 drbd_free_peer_req(mdev, peer_req);
b411b363 1151 dec_unacked(mdev);
99920dc5 1152 return 0;
b411b363
PR
1153 }
1154
1155 /* after "cancel", because after drbd_disconnect/drbd_rs_cancel_all
1156 * the resync lru has been cleaned up already */
1d53f09e 1157 if (get_ldev(mdev)) {
db830c46 1158 drbd_rs_complete_io(mdev, peer_req->i.sector);
1d53f09e
LE
1159 put_ldev(mdev);
1160 }
b411b363 1161
db830c46 1162 di = peer_req->digest;
b411b363 1163
db830c46 1164 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
f399002e 1165 digest_size = crypto_hash_digestsize(mdev->tconn->verify_tfm);
b411b363
PR
1166 digest = kmalloc(digest_size, GFP_NOIO);
1167 if (digest) {
f399002e 1168 drbd_csum_ee(mdev, mdev->tconn->verify_tfm, peer_req, digest);
b411b363
PR
1169
1170 D_ASSERT(digest_size == di->digest_size);
1171 eq = !memcmp(digest, di->digest, digest_size);
1172 kfree(digest);
1173 }
b411b363
PR
1174 }
1175
9676c760
LE
1176 /* Free peer_req and pages before send.
1177 * In case we block on congestion, we could otherwise run into
1178 * some distributed deadlock, if the other side blocks on
1179 * congestion as well, because our receiver blocks in
c37c8ecf 1180 * drbd_alloc_pages due to pp_in_use > max_buffers. */
3967deb1 1181 drbd_free_peer_req(mdev, peer_req);
b411b363 1182 if (!eq)
8f7bed77 1183 drbd_ov_out_of_sync_found(mdev, sector, size);
b411b363 1184 else
8f7bed77 1185 ov_out_of_sync_print(mdev);
b411b363 1186
99920dc5 1187 err = drbd_send_ack_ex(mdev, P_OV_RESULT, sector, size,
fa79abd8 1188 eq ? ID_IN_SYNC : ID_OUT_OF_SYNC);
b411b363 1189
53ea4331 1190 dec_unacked(mdev);
b411b363 1191
ea5442af
LE
1192 --mdev->ov_left;
1193
1194 /* let's advance progress step marks only for every other megabyte */
1195 if ((mdev->ov_left & 0x200) == 0x200)
1196 drbd_advance_rs_marks(mdev, mdev->ov_left);
1197
1198 if (mdev->ov_left == 0) {
8f7bed77 1199 ov_out_of_sync_print(mdev);
b411b363
PR
1200 drbd_resync_finished(mdev);
1201 }
1202
99920dc5 1203 return err;
b411b363
PR
1204}
1205
99920dc5 1206int w_prev_work_done(struct drbd_work *w, int cancel)
b411b363
PR
1207{
1208 struct drbd_wq_barrier *b = container_of(w, struct drbd_wq_barrier, w);
00d56944 1209
b411b363 1210 complete(&b->done);
99920dc5 1211 return 0;
b411b363
PR
1212}
1213
99920dc5 1214int w_send_barrier(struct drbd_work *w, int cancel)
b411b363 1215{
9f5bdc33 1216 struct drbd_socket *sock;
b411b363 1217 struct drbd_tl_epoch *b = container_of(w, struct drbd_tl_epoch, w);
00d56944 1218 struct drbd_conf *mdev = w->mdev;
9f5bdc33 1219 struct p_barrier *p;
b411b363
PR
1220
1221 /* really avoid racing with tl_clear. w.cb may have been referenced
1222 * just before it was reassigned and re-queued, so double check that.
1223 * actually, this race was harmless, since we only try to send the
1224 * barrier packet here, and otherwise do nothing with the object.
1225 * but compare with the head of w_clear_epoch */
87eeee41 1226 spin_lock_irq(&mdev->tconn->req_lock);
b411b363
PR
1227 if (w->cb != w_send_barrier || mdev->state.conn < C_CONNECTED)
1228 cancel = 1;
87eeee41 1229 spin_unlock_irq(&mdev->tconn->req_lock);
b411b363 1230 if (cancel)
b411b363 1231 return 0;
99920dc5 1232
9f5bdc33
AG
1233 sock = &mdev->tconn->data;
1234 p = drbd_prepare_command(mdev, sock);
1235 if (!p)
1236 return -EIO;
b411b363
PR
1237 p->barrier = b->br_number;
1238 /* inc_ap_pending was done where this was queued.
1239 * dec_ap_pending will be done in got_BarrierAck
1240 * or (on connection loss) in w_clear_epoch. */
9f5bdc33 1241 return drbd_send_command(mdev, sock, P_BARRIER, sizeof(*p), NULL, 0);
b411b363
PR
1242}
1243
99920dc5 1244int w_send_write_hint(struct drbd_work *w, int cancel)
b411b363 1245{
00d56944 1246 struct drbd_conf *mdev = w->mdev;
9f5bdc33
AG
1247 struct drbd_socket *sock;
1248
b411b363 1249 if (cancel)
99920dc5 1250 return 0;
9f5bdc33
AG
1251 sock = &mdev->tconn->data;
1252 if (!drbd_prepare_command(mdev, sock))
1253 return -EIO;
e658983a 1254 return drbd_send_command(mdev, sock, P_UNPLUG_REMOTE, 0, NULL, 0);
b411b363
PR
1255}
1256
8f7bed77 1257int w_send_out_of_sync(struct drbd_work *w, int cancel)
73a01a18
PR
1258{
1259 struct drbd_request *req = container_of(w, struct drbd_request, w);
00d56944 1260 struct drbd_conf *mdev = w->mdev;
99920dc5 1261 int err;
73a01a18
PR
1262
1263 if (unlikely(cancel)) {
8554df1c 1264 req_mod(req, SEND_CANCELED);
99920dc5 1265 return 0;
73a01a18
PR
1266 }
1267
8f7bed77 1268 err = drbd_send_out_of_sync(mdev, req);
8554df1c 1269 req_mod(req, OOS_HANDED_TO_NETWORK);
73a01a18 1270
99920dc5 1271 return err;
73a01a18
PR
1272}
1273
b411b363
PR
1274/**
1275 * w_send_dblock() - Worker callback to send a P_DATA packet in order to mirror a write request
1276 * @mdev: DRBD device.
1277 * @w: work object.
1278 * @cancel: The connection will be closed anyways
1279 */
99920dc5 1280int w_send_dblock(struct drbd_work *w, int cancel)
b411b363
PR
1281{
1282 struct drbd_request *req = container_of(w, struct drbd_request, w);
00d56944 1283 struct drbd_conf *mdev = w->mdev;
99920dc5 1284 int err;
b411b363
PR
1285
1286 if (unlikely(cancel)) {
8554df1c 1287 req_mod(req, SEND_CANCELED);
99920dc5 1288 return 0;
b411b363
PR
1289 }
1290
99920dc5
AG
1291 err = drbd_send_dblock(mdev, req);
1292 req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK);
b411b363 1293
99920dc5 1294 return err;
b411b363
PR
1295}
1296
1297/**
1298 * w_send_read_req() - Worker callback to send a read request (P_DATA_REQUEST) packet
1299 * @mdev: DRBD device.
1300 * @w: work object.
1301 * @cancel: The connection will be closed anyways
1302 */
99920dc5 1303int w_send_read_req(struct drbd_work *w, int cancel)
b411b363
PR
1304{
1305 struct drbd_request *req = container_of(w, struct drbd_request, w);
00d56944 1306 struct drbd_conf *mdev = w->mdev;
99920dc5 1307 int err;
b411b363
PR
1308
1309 if (unlikely(cancel)) {
8554df1c 1310 req_mod(req, SEND_CANCELED);
99920dc5 1311 return 0;
b411b363
PR
1312 }
1313
99920dc5 1314 err = drbd_send_drequest(mdev, P_DATA_REQUEST, req->i.sector, req->i.size,
6c1005e7 1315 (unsigned long)req);
b411b363 1316
99920dc5 1317 req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK);
b411b363 1318
99920dc5 1319 return err;
b411b363
PR
1320}
1321
99920dc5 1322int w_restart_disk_io(struct drbd_work *w, int cancel)
265be2d0
PR
1323{
1324 struct drbd_request *req = container_of(w, struct drbd_request, w);
00d56944 1325 struct drbd_conf *mdev = w->mdev;
265be2d0 1326
0778286a 1327 if (bio_data_dir(req->master_bio) == WRITE && req->rq_state & RQ_IN_ACT_LOG)
181286ad 1328 drbd_al_begin_io(mdev, &req->i);
265be2d0
PR
1329 /* Calling drbd_al_begin_io() out of the worker might deadlocks
1330 theoretically. Practically it can not deadlock, since this is
1331 only used when unfreezing IOs. All the extents of the requests
1332 that made it into the TL are already active */
1333
1334 drbd_req_make_private_bio(req, req->master_bio);
1335 req->private_bio->bi_bdev = mdev->ldev->backing_bdev;
1336 generic_make_request(req->private_bio);
1337
99920dc5 1338 return 0;
265be2d0
PR
1339}
1340
b411b363
PR
1341static int _drbd_may_sync_now(struct drbd_conf *mdev)
1342{
1343 struct drbd_conf *odev = mdev;
daeda1cc 1344 int ra;
b411b363
PR
1345
1346 while (1) {
438c8374
PR
1347 if (!odev->ldev)
1348 return 1;
daeda1cc
PR
1349 rcu_read_lock();
1350 ra = rcu_dereference(odev->ldev->disk_conf)->resync_after;
1351 rcu_read_unlock();
1352 if (ra == -1)
b411b363 1353 return 1;
daeda1cc 1354 odev = minor_to_mdev(ra);
841ce241
AG
1355 if (!expect(odev))
1356 return 1;
b411b363
PR
1357 if ((odev->state.conn >= C_SYNC_SOURCE &&
1358 odev->state.conn <= C_PAUSED_SYNC_T) ||
1359 odev->state.aftr_isp || odev->state.peer_isp ||
1360 odev->state.user_isp)
1361 return 0;
1362 }
1363}
1364
1365/**
1366 * _drbd_pause_after() - Pause resync on all devices that may not resync now
1367 * @mdev: DRBD device.
1368 *
1369 * Called from process context only (admin command and after_state_ch).
1370 */
1371static int _drbd_pause_after(struct drbd_conf *mdev)
1372{
1373 struct drbd_conf *odev;
1374 int i, rv = 0;
1375
695d08fa 1376 rcu_read_lock();
81a5d60e 1377 idr_for_each_entry(&minors, odev, i) {
b411b363
PR
1378 if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS)
1379 continue;
1380 if (!_drbd_may_sync_now(odev))
1381 rv |= (__drbd_set_state(_NS(odev, aftr_isp, 1), CS_HARD, NULL)
1382 != SS_NOTHING_TO_DO);
1383 }
695d08fa 1384 rcu_read_unlock();
b411b363
PR
1385
1386 return rv;
1387}
1388
1389/**
1390 * _drbd_resume_next() - Resume resync on all devices that may resync now
1391 * @mdev: DRBD device.
1392 *
1393 * Called from process context only (admin command and worker).
1394 */
1395static int _drbd_resume_next(struct drbd_conf *mdev)
1396{
1397 struct drbd_conf *odev;
1398 int i, rv = 0;
1399
695d08fa 1400 rcu_read_lock();
81a5d60e 1401 idr_for_each_entry(&minors, odev, i) {
b411b363
PR
1402 if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS)
1403 continue;
1404 if (odev->state.aftr_isp) {
1405 if (_drbd_may_sync_now(odev))
1406 rv |= (__drbd_set_state(_NS(odev, aftr_isp, 0),
1407 CS_HARD, NULL)
1408 != SS_NOTHING_TO_DO) ;
1409 }
1410 }
695d08fa 1411 rcu_read_unlock();
b411b363
PR
1412 return rv;
1413}
1414
1415void resume_next_sg(struct drbd_conf *mdev)
1416{
1417 write_lock_irq(&global_state_lock);
1418 _drbd_resume_next(mdev);
1419 write_unlock_irq(&global_state_lock);
1420}
1421
1422void suspend_other_sg(struct drbd_conf *mdev)
1423{
1424 write_lock_irq(&global_state_lock);
1425 _drbd_pause_after(mdev);
1426 write_unlock_irq(&global_state_lock);
1427}
1428
dc97b708
PR
1429/* caller must hold global_state_lock */
1430enum drbd_ret_code drbd_sync_after_valid(struct drbd_conf *mdev, int o_minor)
b411b363
PR
1431{
1432 struct drbd_conf *odev;
daeda1cc 1433 int ra;
b411b363
PR
1434
1435 if (o_minor == -1)
1436 return NO_ERROR;
1437 if (o_minor < -1 || minor_to_mdev(o_minor) == NULL)
1438 return ERR_SYNC_AFTER;
1439
1440 /* check for loops */
1441 odev = minor_to_mdev(o_minor);
1442 while (1) {
1443 if (odev == mdev)
1444 return ERR_SYNC_AFTER_CYCLE;
1445
daeda1cc
PR
1446 rcu_read_lock();
1447 ra = rcu_dereference(odev->ldev->disk_conf)->resync_after;
1448 rcu_read_unlock();
b411b363 1449 /* dependency chain ends here, no cycles. */
daeda1cc 1450 if (ra == -1)
b411b363
PR
1451 return NO_ERROR;
1452
1453 /* follow the dependency chain */
daeda1cc 1454 odev = minor_to_mdev(ra);
b411b363
PR
1455 }
1456}
1457
dc97b708
PR
1458/* caller must hold global_state_lock */
1459void drbd_sync_after_changed(struct drbd_conf *mdev)
b411b363
PR
1460{
1461 int changes;
b411b363 1462
dc97b708
PR
1463 do {
1464 changes = _drbd_pause_after(mdev);
1465 changes |= _drbd_resume_next(mdev);
1466 } while (changes);
b411b363
PR
1467}
1468
9bd28d3c
LE
1469void drbd_rs_controller_reset(struct drbd_conf *mdev)
1470{
1471 atomic_set(&mdev->rs_sect_in, 0);
1472 atomic_set(&mdev->rs_sect_ev, 0);
1473 mdev->rs_in_flight = 0;
9958c857 1474 mdev->rs_plan_s->total = 0;
9bd28d3c 1475 spin_lock(&mdev->peer_seq_lock);
9958c857 1476 fifo_set(mdev->rs_plan_s, 0);
9bd28d3c
LE
1477 spin_unlock(&mdev->peer_seq_lock);
1478}
1479
1f04af33
PR
1480void start_resync_timer_fn(unsigned long data)
1481{
1482 struct drbd_conf *mdev = (struct drbd_conf *) data;
1483
1484 drbd_queue_work(&mdev->tconn->data.work, &mdev->start_resync_work);
1485}
1486
99920dc5 1487int w_start_resync(struct drbd_work *w, int cancel)
1f04af33 1488{
00d56944
PR
1489 struct drbd_conf *mdev = w->mdev;
1490
1f04af33
PR
1491 if (atomic_read(&mdev->unacked_cnt) || atomic_read(&mdev->rs_pending_cnt)) {
1492 dev_warn(DEV, "w_start_resync later...\n");
1493 mdev->start_resync_timer.expires = jiffies + HZ/10;
1494 add_timer(&mdev->start_resync_timer);
99920dc5 1495 return 0;
1f04af33
PR
1496 }
1497
1498 drbd_start_resync(mdev, C_SYNC_SOURCE);
1499 clear_bit(AHEAD_TO_SYNC_SOURCE, &mdev->current_epoch->flags);
99920dc5 1500 return 0;
1f04af33
PR
1501}
1502
b411b363
PR
1503/**
1504 * drbd_start_resync() - Start the resync process
1505 * @mdev: DRBD device.
1506 * @side: Either C_SYNC_SOURCE or C_SYNC_TARGET
1507 *
1508 * This function might bring you directly into one of the
1509 * C_PAUSED_SYNC_* states.
1510 */
1511void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side)
1512{
1513 union drbd_state ns;
1514 int r;
1515
c4752ef1 1516 if (mdev->state.conn >= C_SYNC_SOURCE && mdev->state.conn < C_AHEAD) {
b411b363
PR
1517 dev_err(DEV, "Resync already running!\n");
1518 return;
1519 }
1520
59817f4f
PR
1521 if (mdev->state.conn < C_AHEAD) {
1522 /* In case a previous resync run was aborted by an IO error/detach on the peer. */
1523 drbd_rs_cancel_all(mdev);
1524 /* This should be done when we abort the resync. We definitely do not
1525 want to have this for connections going back and forth between
1526 Ahead/Behind and SyncSource/SyncTarget */
1527 }
b411b363 1528
e64a3294
PR
1529 if (!test_bit(B_RS_H_DONE, &mdev->flags)) {
1530 if (side == C_SYNC_TARGET) {
1531 /* Since application IO was locked out during C_WF_BITMAP_T and
1532 C_WF_SYNC_UUID we are still unmodified. Before going to C_SYNC_TARGET
1533 we check that we might make the data inconsistent. */
1534 r = drbd_khelper(mdev, "before-resync-target");
1535 r = (r >> 8) & 0xff;
1536 if (r > 0) {
1537 dev_info(DEV, "before-resync-target handler returned %d, "
09b9e797 1538 "dropping connection.\n", r);
38fa9988 1539 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
09b9e797
PR
1540 return;
1541 }
e64a3294
PR
1542 } else /* C_SYNC_SOURCE */ {
1543 r = drbd_khelper(mdev, "before-resync-source");
1544 r = (r >> 8) & 0xff;
1545 if (r > 0) {
1546 if (r == 3) {
1547 dev_info(DEV, "before-resync-source handler returned %d, "
1548 "ignoring. Old userland tools?", r);
1549 } else {
1550 dev_info(DEV, "before-resync-source handler returned %d, "
1551 "dropping connection.\n", r);
38fa9988 1552 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
e64a3294
PR
1553 return;
1554 }
1555 }
09b9e797 1556 }
b411b363
PR
1557 }
1558
e64a3294 1559 if (current == mdev->tconn->worker.task) {
dad20554 1560 /* The worker should not sleep waiting for state_mutex,
e64a3294 1561 that can take long */
8410da8f 1562 if (!mutex_trylock(mdev->state_mutex)) {
e64a3294
PR
1563 set_bit(B_RS_H_DONE, &mdev->flags);
1564 mdev->start_resync_timer.expires = jiffies + HZ/5;
1565 add_timer(&mdev->start_resync_timer);
1566 return;
1567 }
1568 } else {
8410da8f 1569 mutex_lock(mdev->state_mutex);
e64a3294
PR
1570 }
1571 clear_bit(B_RS_H_DONE, &mdev->flags);
b411b363
PR
1572
1573 if (!get_ldev_if_state(mdev, D_NEGOTIATING)) {
8410da8f 1574 mutex_unlock(mdev->state_mutex);
b411b363
PR
1575 return;
1576 }
1577
b411b363 1578 write_lock_irq(&global_state_lock);
78bae59b 1579 ns = drbd_read_state(mdev);
b411b363
PR
1580
1581 ns.aftr_isp = !_drbd_may_sync_now(mdev);
1582
1583 ns.conn = side;
1584
1585 if (side == C_SYNC_TARGET)
1586 ns.disk = D_INCONSISTENT;
1587 else /* side == C_SYNC_SOURCE */
1588 ns.pdsk = D_INCONSISTENT;
1589
1590 r = __drbd_set_state(mdev, ns, CS_VERBOSE, NULL);
78bae59b 1591 ns = drbd_read_state(mdev);
b411b363
PR
1592
1593 if (ns.conn < C_CONNECTED)
1594 r = SS_UNKNOWN_ERROR;
1595
1596 if (r == SS_SUCCESS) {
1d7734a0
LE
1597 unsigned long tw = drbd_bm_total_weight(mdev);
1598 unsigned long now = jiffies;
1599 int i;
1600
b411b363
PR
1601 mdev->rs_failed = 0;
1602 mdev->rs_paused = 0;
b411b363 1603 mdev->rs_same_csum = 0;
0f0601f4
LE
1604 mdev->rs_last_events = 0;
1605 mdev->rs_last_sect_ev = 0;
1d7734a0
LE
1606 mdev->rs_total = tw;
1607 mdev->rs_start = now;
1608 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
1609 mdev->rs_mark_left[i] = tw;
1610 mdev->rs_mark_time[i] = now;
1611 }
b411b363
PR
1612 _drbd_pause_after(mdev);
1613 }
1614 write_unlock_irq(&global_state_lock);
5a22db89 1615
b411b363
PR
1616 if (r == SS_SUCCESS) {
1617 dev_info(DEV, "Began resync as %s (will sync %lu KB [%lu bits set]).\n",
1618 drbd_conn_str(ns.conn),
1619 (unsigned long) mdev->rs_total << (BM_BLOCK_SHIFT-10),
1620 (unsigned long) mdev->rs_total);
6c922ed5
LE
1621 if (side == C_SYNC_TARGET)
1622 mdev->bm_resync_fo = 0;
1623
1624 /* Since protocol 96, we must serialize drbd_gen_and_send_sync_uuid
1625 * with w_send_oos, or the sync target will get confused as to
1626 * how much bits to resync. We cannot do that always, because for an
1627 * empty resync and protocol < 95, we need to do it here, as we call
1628 * drbd_resync_finished from here in that case.
1629 * We drbd_gen_and_send_sync_uuid here for protocol < 96,
1630 * and from after_state_ch otherwise. */
31890f4a 1631 if (side == C_SYNC_SOURCE && mdev->tconn->agreed_pro_version < 96)
6c922ed5 1632 drbd_gen_and_send_sync_uuid(mdev);
b411b363 1633
31890f4a 1634 if (mdev->tconn->agreed_pro_version < 95 && mdev->rs_total == 0) {
af85e8e8
LE
1635 /* This still has a race (about when exactly the peers
1636 * detect connection loss) that can lead to a full sync
1637 * on next handshake. In 8.3.9 we fixed this with explicit
1638 * resync-finished notifications, but the fix
1639 * introduces a protocol change. Sleeping for some
1640 * time longer than the ping interval + timeout on the
1641 * SyncSource, to give the SyncTarget the chance to
1642 * detect connection loss, then waiting for a ping
1643 * response (implicit in drbd_resync_finished) reduces
1644 * the race considerably, but does not solve it. */
44ed167d
PR
1645 if (side == C_SYNC_SOURCE) {
1646 struct net_conf *nc;
1647 int timeo;
1648
1649 rcu_read_lock();
1650 nc = rcu_dereference(mdev->tconn->net_conf);
1651 timeo = nc->ping_int * HZ + nc->ping_timeo * HZ / 9;
1652 rcu_read_unlock();
1653 schedule_timeout_interruptible(timeo);
1654 }
b411b363 1655 drbd_resync_finished(mdev);
b411b363
PR
1656 }
1657
9bd28d3c 1658 drbd_rs_controller_reset(mdev);
b411b363
PR
1659 /* ns.conn may already be != mdev->state.conn,
1660 * we may have been paused in between, or become paused until
1661 * the timer triggers.
1662 * No matter, that is handled in resync_timer_fn() */
1663 if (ns.conn == C_SYNC_TARGET)
1664 mod_timer(&mdev->resync_timer, jiffies);
1665
1666 drbd_md_sync(mdev);
1667 }
5a22db89 1668 put_ldev(mdev);
8410da8f 1669 mutex_unlock(mdev->state_mutex);
b411b363
PR
1670}
1671
1672int drbd_worker(struct drbd_thread *thi)
1673{
392c8801 1674 struct drbd_tconn *tconn = thi->tconn;
b411b363 1675 struct drbd_work *w = NULL;
0e29d163 1676 struct drbd_conf *mdev;
44ed167d 1677 struct net_conf *nc;
b411b363 1678 LIST_HEAD(work_list);
f399002e 1679 int vnr, intr = 0;
44ed167d 1680 int cork;
b411b363 1681
e77a0a5c 1682 while (get_t_state(thi) == RUNNING) {
80822284 1683 drbd_thread_current_set_cpu(thi);
b411b363 1684
19393e10
PR
1685 if (down_trylock(&tconn->data.work.s)) {
1686 mutex_lock(&tconn->data.mutex);
44ed167d
PR
1687
1688 rcu_read_lock();
1689 nc = rcu_dereference(tconn->net_conf);
1690 cork = nc ? !nc->no_cork : 0;
1691 rcu_read_unlock();
1692
1693 if (tconn->data.socket && cork)
19393e10
PR
1694 drbd_tcp_uncork(tconn->data.socket);
1695 mutex_unlock(&tconn->data.mutex);
b411b363 1696
19393e10 1697 intr = down_interruptible(&tconn->data.work.s);
b411b363 1698
19393e10 1699 mutex_lock(&tconn->data.mutex);
44ed167d 1700 if (tconn->data.socket && cork)
19393e10
PR
1701 drbd_tcp_cork(tconn->data.socket);
1702 mutex_unlock(&tconn->data.mutex);
b411b363
PR
1703 }
1704
1705 if (intr) {
b411b363 1706 flush_signals(current);
19393e10
PR
1707 if (get_t_state(thi) == RUNNING) {
1708 conn_warn(tconn, "Worker got an unexpected signal\n");
b411b363 1709 continue;
19393e10 1710 }
b411b363
PR
1711 break;
1712 }
1713
e77a0a5c 1714 if (get_t_state(thi) != RUNNING)
b411b363
PR
1715 break;
1716 /* With this break, we have done a down() but not consumed
1717 the entry from the list. The cleanup code takes care of
1718 this... */
1719
1720 w = NULL;
19393e10
PR
1721 spin_lock_irq(&tconn->data.work.q_lock);
1722 if (list_empty(&tconn->data.work.q)) {
b411b363
PR
1723 /* something terribly wrong in our logic.
1724 * we were able to down() the semaphore,
1725 * but the list is empty... doh.
1726 *
1727 * what is the best thing to do now?
1728 * try again from scratch, restarting the receiver,
1729 * asender, whatnot? could break even more ugly,
1730 * e.g. when we are primary, but no good local data.
1731 *
1732 * I'll try to get away just starting over this loop.
1733 */
19393e10
PR
1734 conn_warn(tconn, "Work list unexpectedly empty\n");
1735 spin_unlock_irq(&tconn->data.work.q_lock);
b411b363
PR
1736 continue;
1737 }
19393e10 1738 w = list_entry(tconn->data.work.q.next, struct drbd_work, list);
b411b363 1739 list_del_init(&w->list);
19393e10 1740 spin_unlock_irq(&tconn->data.work.q_lock);
b411b363 1741
99920dc5 1742 if (w->cb(w, tconn->cstate < C_WF_REPORT_PARAMS)) {
b411b363 1743 /* dev_warn(DEV, "worker: a callback failed! \n"); */
bbeb641c
PR
1744 if (tconn->cstate >= C_WF_REPORT_PARAMS)
1745 conn_request_state(tconn, NS(conn, C_NETWORK_FAILURE), CS_HARD);
b411b363
PR
1746 }
1747 }
b411b363 1748
19393e10
PR
1749 spin_lock_irq(&tconn->data.work.q_lock);
1750 while (!list_empty(&tconn->data.work.q)) {
1751 list_splice_init(&tconn->data.work.q, &work_list);
1752 spin_unlock_irq(&tconn->data.work.q_lock);
b411b363
PR
1753
1754 while (!list_empty(&work_list)) {
1755 w = list_entry(work_list.next, struct drbd_work, list);
1756 list_del_init(&w->list);
00d56944 1757 w->cb(w, 1);
b411b363
PR
1758 }
1759
19393e10 1760 spin_lock_irq(&tconn->data.work.q_lock);
b411b363 1761 }
19393e10 1762 sema_init(&tconn->data.work.s, 0);
b411b363
PR
1763 /* DANGEROUS race: if someone did queue his work within the spinlock,
1764 * but up() ed outside the spinlock, we could get an up() on the
1765 * semaphore without corresponding list entry.
1766 * So don't do that.
1767 */
19393e10 1768 spin_unlock_irq(&tconn->data.work.q_lock);
b411b363 1769
d3fcb490 1770 down_read(&drbd_cfg_rwsem);
f399002e 1771 idr_for_each_entry(&tconn->volumes, mdev, vnr) {
0e29d163 1772 D_ASSERT(mdev->state.disk == D_DISKLESS && mdev->state.conn == C_STANDALONE);
0e29d163
PR
1773 drbd_mdev_cleanup(mdev);
1774 }
d3fcb490 1775 up_read(&drbd_cfg_rwsem);
b411b363
PR
1776
1777 return 0;
1778}