]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blame - drivers/block/drbd/drbd_worker.c
drbd: Do not mod_timer() with a past time
[mirror_ubuntu-bionic-kernel.git] / drivers / block / drbd / drbd_worker.c
CommitLineData
b411b363
PR
1/*
2 drbd_worker.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23
24 */
25
b411b363 26#include <linux/module.h>
b411b363
PR
27#include <linux/drbd.h>
28#include <linux/sched.h>
b411b363
PR
29#include <linux/wait.h>
30#include <linux/mm.h>
31#include <linux/memcontrol.h>
32#include <linux/mm_inline.h>
33#include <linux/slab.h>
34#include <linux/random.h>
b411b363
PR
35#include <linux/string.h>
36#include <linux/scatterlist.h>
37
38#include "drbd_int.h"
39#include "drbd_req.h"
b411b363 40
00d56944 41static int w_make_ov_request(struct drbd_work *w, int cancel);
b411b363
PR
42
43
c5a91619
AG
44/* endio handlers:
45 * drbd_md_io_complete (defined here)
fcefa62e
AG
46 * drbd_request_endio (defined here)
47 * drbd_peer_request_endio (defined here)
c5a91619
AG
48 * bm_async_io_complete (defined in drbd_bitmap.c)
49 *
b411b363
PR
50 * For all these callbacks, note the following:
51 * The callbacks will be called in irq context by the IDE drivers,
52 * and in Softirqs/Tasklets/BH context by the SCSI drivers.
53 * Try to get the locking right :)
54 *
55 */
56
57
58/* About the global_state_lock
59 Each state transition on an device holds a read lock. In case we have
95f8efd0 60 to evaluate the resync after dependencies, we grab a write lock, because
b411b363
PR
61 we need stable states on all devices for that. */
62rwlock_t global_state_lock;
63
64/* used for synchronous meta data and bitmap IO
65 * submitted by drbd_md_sync_page_io()
66 */
67void drbd_md_io_complete(struct bio *bio, int error)
68{
69 struct drbd_md_io *md_io;
cdfda633 70 struct drbd_conf *mdev;
b411b363
PR
71
72 md_io = (struct drbd_md_io *)bio->bi_private;
cdfda633
PR
73 mdev = container_of(md_io, struct drbd_conf, md_io);
74
b411b363
PR
75 md_io->error = error;
76
cdfda633
PR
77 md_io->done = 1;
78 wake_up(&mdev->misc_wait);
79 bio_put(bio);
80 drbd_md_put_buffer(mdev);
81 put_ldev(mdev);
b411b363
PR
82}
83
84/* reads on behalf of the partner,
85 * "submitted" by the receiver
86 */
db830c46 87void drbd_endio_read_sec_final(struct drbd_peer_request *peer_req) __releases(local)
b411b363
PR
88{
89 unsigned long flags = 0;
a21e9298 90 struct drbd_conf *mdev = peer_req->w.mdev;
b411b363 91
87eeee41 92 spin_lock_irqsave(&mdev->tconn->req_lock, flags);
db830c46
AG
93 mdev->read_cnt += peer_req->i.size >> 9;
94 list_del(&peer_req->w.list);
b411b363
PR
95 if (list_empty(&mdev->read_ee))
96 wake_up(&mdev->ee_wait);
db830c46 97 if (test_bit(__EE_WAS_ERROR, &peer_req->flags))
81e84650 98 __drbd_chk_io_error(mdev, false);
87eeee41 99 spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
b411b363 100
db830c46 101 drbd_queue_work(&mdev->tconn->data.work, &peer_req->w);
b411b363 102 put_ldev(mdev);
b411b363
PR
103}
104
105/* writes on behalf of the partner, or resync writes,
45bb912b 106 * "submitted" by the receiver, final stage. */
db830c46 107static void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(local)
b411b363
PR
108{
109 unsigned long flags = 0;
a21e9298 110 struct drbd_conf *mdev = peer_req->w.mdev;
181286ad 111 struct drbd_interval i;
b411b363 112 int do_wake;
579b57ed 113 u64 block_id;
b411b363 114 int do_al_complete_io;
b411b363 115
db830c46 116 /* after we moved peer_req to done_ee,
b411b363
PR
117 * we may no longer access it,
118 * it may be freed/reused already!
119 * (as soon as we release the req_lock) */
181286ad 120 i = peer_req->i;
db830c46
AG
121 do_al_complete_io = peer_req->flags & EE_CALL_AL_COMPLETE_IO;
122 block_id = peer_req->block_id;
b411b363 123
87eeee41 124 spin_lock_irqsave(&mdev->tconn->req_lock, flags);
db830c46
AG
125 mdev->writ_cnt += peer_req->i.size >> 9;
126 list_del(&peer_req->w.list); /* has been on active_ee or sync_ee */
127 list_add_tail(&peer_req->w.list, &mdev->done_ee);
b411b363 128
bb3bfe96 129 /*
5e472264 130 * Do not remove from the write_requests tree here: we did not send the
bb3bfe96
AG
131 * Ack yet and did not wake possibly waiting conflicting requests.
132 * Removed from the tree from "drbd_process_done_ee" within the
133 * appropriate w.cb (e_end_block/e_end_resync_block) or from
134 * _drbd_clear_done_ee.
135 */
b411b363 136
579b57ed 137 do_wake = list_empty(block_id == ID_SYNCER ? &mdev->sync_ee : &mdev->active_ee);
b411b363 138
db830c46 139 if (test_bit(__EE_WAS_ERROR, &peer_req->flags))
81e84650 140 __drbd_chk_io_error(mdev, false);
87eeee41 141 spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
b411b363 142
579b57ed 143 if (block_id == ID_SYNCER)
181286ad 144 drbd_rs_complete_io(mdev, i.sector);
b411b363
PR
145
146 if (do_wake)
147 wake_up(&mdev->ee_wait);
148
149 if (do_al_complete_io)
181286ad 150 drbd_al_complete_io(mdev, &i);
b411b363 151
0625ac19 152 wake_asender(mdev->tconn);
b411b363 153 put_ldev(mdev);
45bb912b 154}
b411b363 155
45bb912b
LE
156/* writes on behalf of the partner, or resync writes,
157 * "submitted" by the receiver.
158 */
fcefa62e 159void drbd_peer_request_endio(struct bio *bio, int error)
45bb912b 160{
db830c46 161 struct drbd_peer_request *peer_req = bio->bi_private;
a21e9298 162 struct drbd_conf *mdev = peer_req->w.mdev;
45bb912b
LE
163 int uptodate = bio_flagged(bio, BIO_UPTODATE);
164 int is_write = bio_data_dir(bio) == WRITE;
165
07194272 166 if (error && __ratelimit(&drbd_ratelimit_state))
45bb912b
LE
167 dev_warn(DEV, "%s: error=%d s=%llus\n",
168 is_write ? "write" : "read", error,
db830c46 169 (unsigned long long)peer_req->i.sector);
45bb912b 170 if (!error && !uptodate) {
07194272
LE
171 if (__ratelimit(&drbd_ratelimit_state))
172 dev_warn(DEV, "%s: setting error to -EIO s=%llus\n",
173 is_write ? "write" : "read",
db830c46 174 (unsigned long long)peer_req->i.sector);
45bb912b
LE
175 /* strange behavior of some lower level drivers...
176 * fail the request by clearing the uptodate flag,
177 * but do not return any error?! */
178 error = -EIO;
179 }
180
181 if (error)
db830c46 182 set_bit(__EE_WAS_ERROR, &peer_req->flags);
45bb912b
LE
183
184 bio_put(bio); /* no need for the bio anymore */
db830c46 185 if (atomic_dec_and_test(&peer_req->pending_bios)) {
45bb912b 186 if (is_write)
db830c46 187 drbd_endio_write_sec_final(peer_req);
45bb912b 188 else
db830c46 189 drbd_endio_read_sec_final(peer_req);
45bb912b 190 }
b411b363
PR
191}
192
193/* read, readA or write requests on R_PRIMARY coming from drbd_make_request
194 */
fcefa62e 195void drbd_request_endio(struct bio *bio, int error)
b411b363 196{
a115413d 197 unsigned long flags;
b411b363 198 struct drbd_request *req = bio->bi_private;
a21e9298 199 struct drbd_conf *mdev = req->w.mdev;
a115413d 200 struct bio_and_error m;
b411b363
PR
201 enum drbd_req_event what;
202 int uptodate = bio_flagged(bio, BIO_UPTODATE);
203
b411b363
PR
204 if (!error && !uptodate) {
205 dev_warn(DEV, "p %s: setting error to -EIO\n",
206 bio_data_dir(bio) == WRITE ? "write" : "read");
207 /* strange behavior of some lower level drivers...
208 * fail the request by clearing the uptodate flag,
209 * but do not return any error?! */
210 error = -EIO;
211 }
212
b411b363
PR
213 /* to avoid recursion in __req_mod */
214 if (unlikely(error)) {
215 what = (bio_data_dir(bio) == WRITE)
8554df1c 216 ? WRITE_COMPLETED_WITH_ERROR
5c3c7e64 217 : (bio_rw(bio) == READ)
8554df1c
AG
218 ? READ_COMPLETED_WITH_ERROR
219 : READ_AHEAD_COMPLETED_WITH_ERROR;
b411b363 220 } else
8554df1c 221 what = COMPLETED_OK;
b411b363
PR
222
223 bio_put(req->private_bio);
224 req->private_bio = ERR_PTR(error);
225
a115413d 226 /* not req_mod(), we need irqsave here! */
87eeee41 227 spin_lock_irqsave(&mdev->tconn->req_lock, flags);
a115413d 228 __req_mod(req, what, &m);
87eeee41 229 spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
a115413d
LE
230
231 if (m.bio)
232 complete_master_bio(mdev, &m);
b411b363
PR
233}
234
99920dc5 235int w_read_retry_remote(struct drbd_work *w, int cancel)
b411b363
PR
236{
237 struct drbd_request *req = container_of(w, struct drbd_request, w);
00d56944 238 struct drbd_conf *mdev = w->mdev;
b411b363
PR
239
240 /* We should not detach for read io-error,
241 * but try to WRITE the P_DATA_REPLY to the failed location,
242 * to give the disk the chance to relocate that block */
243
87eeee41 244 spin_lock_irq(&mdev->tconn->req_lock);
d255e5ff 245 if (cancel || mdev->state.pdsk != D_UP_TO_DATE) {
8554df1c 246 _req_mod(req, READ_RETRY_REMOTE_CANCELED);
87eeee41 247 spin_unlock_irq(&mdev->tconn->req_lock);
99920dc5 248 return 0;
b411b363 249 }
87eeee41 250 spin_unlock_irq(&mdev->tconn->req_lock);
b411b363 251
00d56944 252 return w_send_read_req(w, 0);
b411b363
PR
253}
254
f6ffca9f 255void drbd_csum_ee(struct drbd_conf *mdev, struct crypto_hash *tfm,
db830c46 256 struct drbd_peer_request *peer_req, void *digest)
45bb912b
LE
257{
258 struct hash_desc desc;
259 struct scatterlist sg;
db830c46 260 struct page *page = peer_req->pages;
45bb912b
LE
261 struct page *tmp;
262 unsigned len;
263
264 desc.tfm = tfm;
265 desc.flags = 0;
266
267 sg_init_table(&sg, 1);
268 crypto_hash_init(&desc);
269
270 while ((tmp = page_chain_next(page))) {
271 /* all but the last page will be fully used */
272 sg_set_page(&sg, page, PAGE_SIZE, 0);
273 crypto_hash_update(&desc, &sg, sg.length);
274 page = tmp;
275 }
276 /* and now the last, possibly only partially used page */
db830c46 277 len = peer_req->i.size & (PAGE_SIZE - 1);
45bb912b
LE
278 sg_set_page(&sg, page, len ?: PAGE_SIZE, 0);
279 crypto_hash_update(&desc, &sg, sg.length);
280 crypto_hash_final(&desc, digest);
281}
282
283void drbd_csum_bio(struct drbd_conf *mdev, struct crypto_hash *tfm, struct bio *bio, void *digest)
b411b363
PR
284{
285 struct hash_desc desc;
286 struct scatterlist sg;
287 struct bio_vec *bvec;
288 int i;
289
290 desc.tfm = tfm;
291 desc.flags = 0;
292
293 sg_init_table(&sg, 1);
294 crypto_hash_init(&desc);
295
296 __bio_for_each_segment(bvec, bio, i, 0) {
297 sg_set_page(&sg, bvec->bv_page, bvec->bv_len, bvec->bv_offset);
298 crypto_hash_update(&desc, &sg, sg.length);
299 }
300 crypto_hash_final(&desc, digest);
301}
302
9676c760 303/* MAYBE merge common code with w_e_end_ov_req */
99920dc5 304static int w_e_send_csum(struct drbd_work *w, int cancel)
b411b363 305{
00d56944
PR
306 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
307 struct drbd_conf *mdev = w->mdev;
b411b363
PR
308 int digest_size;
309 void *digest;
99920dc5 310 int err = 0;
b411b363 311
53ea4331
LE
312 if (unlikely(cancel))
313 goto out;
b411b363 314
9676c760 315 if (unlikely((peer_req->flags & EE_WAS_ERROR) != 0))
53ea4331 316 goto out;
b411b363 317
f399002e 318 digest_size = crypto_hash_digestsize(mdev->tconn->csums_tfm);
53ea4331
LE
319 digest = kmalloc(digest_size, GFP_NOIO);
320 if (digest) {
db830c46
AG
321 sector_t sector = peer_req->i.sector;
322 unsigned int size = peer_req->i.size;
f399002e 323 drbd_csum_ee(mdev, mdev->tconn->csums_tfm, peer_req, digest);
9676c760 324 /* Free peer_req and pages before send.
53ea4331
LE
325 * In case we block on congestion, we could otherwise run into
326 * some distributed deadlock, if the other side blocks on
327 * congestion as well, because our receiver blocks in
c37c8ecf 328 * drbd_alloc_pages due to pp_in_use > max_buffers. */
3967deb1 329 drbd_free_peer_req(mdev, peer_req);
db830c46 330 peer_req = NULL;
53ea4331 331 inc_rs_pending(mdev);
99920dc5 332 err = drbd_send_drequest_csum(mdev, sector, size,
db1b0b72
AG
333 digest, digest_size,
334 P_CSUM_RS_REQUEST);
53ea4331
LE
335 kfree(digest);
336 } else {
337 dev_err(DEV, "kmalloc() of digest failed.\n");
99920dc5 338 err = -ENOMEM;
53ea4331 339 }
b411b363 340
53ea4331 341out:
db830c46 342 if (peer_req)
3967deb1 343 drbd_free_peer_req(mdev, peer_req);
b411b363 344
99920dc5 345 if (unlikely(err))
b411b363 346 dev_err(DEV, "drbd_send_drequest(..., csum) failed\n");
99920dc5 347 return err;
b411b363
PR
348}
349
350#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
351
352static int read_for_csum(struct drbd_conf *mdev, sector_t sector, int size)
353{
db830c46 354 struct drbd_peer_request *peer_req;
b411b363
PR
355
356 if (!get_ldev(mdev))
80a40e43 357 return -EIO;
b411b363 358
e3555d85 359 if (drbd_rs_should_slow_down(mdev, sector))
0f0601f4
LE
360 goto defer;
361
b411b363
PR
362 /* GFP_TRY, because if there is no memory available right now, this may
363 * be rescheduled for later. It is "only" background resync, after all. */
0db55363
AG
364 peer_req = drbd_alloc_peer_req(mdev, ID_SYNCER /* unused */, sector,
365 size, GFP_TRY);
db830c46 366 if (!peer_req)
80a40e43 367 goto defer;
b411b363 368
db830c46 369 peer_req->w.cb = w_e_send_csum;
87eeee41 370 spin_lock_irq(&mdev->tconn->req_lock);
db830c46 371 list_add(&peer_req->w.list, &mdev->read_ee);
87eeee41 372 spin_unlock_irq(&mdev->tconn->req_lock);
b411b363 373
0f0601f4 374 atomic_add(size >> 9, &mdev->rs_sect_ev);
fbe29dec 375 if (drbd_submit_peer_request(mdev, peer_req, READ, DRBD_FAULT_RS_RD) == 0)
80a40e43 376 return 0;
b411b363 377
10f6d992
LE
378 /* If it failed because of ENOMEM, retry should help. If it failed
379 * because bio_add_page failed (probably broken lower level driver),
380 * retry may or may not help.
381 * If it does not, you may need to force disconnect. */
87eeee41 382 spin_lock_irq(&mdev->tconn->req_lock);
db830c46 383 list_del(&peer_req->w.list);
87eeee41 384 spin_unlock_irq(&mdev->tconn->req_lock);
22cc37a9 385
3967deb1 386 drbd_free_peer_req(mdev, peer_req);
80a40e43 387defer:
45bb912b 388 put_ldev(mdev);
80a40e43 389 return -EAGAIN;
b411b363
PR
390}
391
99920dc5 392int w_resync_timer(struct drbd_work *w, int cancel)
b411b363 393{
00d56944 394 struct drbd_conf *mdev = w->mdev;
63106d3c
PR
395 switch (mdev->state.conn) {
396 case C_VERIFY_S:
00d56944 397 w_make_ov_request(w, cancel);
63106d3c
PR
398 break;
399 case C_SYNC_TARGET:
00d56944 400 w_make_resync_request(w, cancel);
63106d3c 401 break;
b411b363
PR
402 }
403
99920dc5 404 return 0;
794abb75
PR
405}
406
407void resync_timer_fn(unsigned long data)
408{
409 struct drbd_conf *mdev = (struct drbd_conf *) data;
410
411 if (list_empty(&mdev->resync_work.list))
e42325a5 412 drbd_queue_work(&mdev->tconn->data.work, &mdev->resync_work);
b411b363
PR
413}
414
778f271d
PR
415static void fifo_set(struct fifo_buffer *fb, int value)
416{
417 int i;
418
419 for (i = 0; i < fb->size; i++)
f10f2623 420 fb->values[i] = value;
778f271d
PR
421}
422
423static int fifo_push(struct fifo_buffer *fb, int value)
424{
425 int ov;
426
427 ov = fb->values[fb->head_index];
428 fb->values[fb->head_index++] = value;
429
430 if (fb->head_index >= fb->size)
431 fb->head_index = 0;
432
433 return ov;
434}
435
436static void fifo_add_val(struct fifo_buffer *fb, int value)
437{
438 int i;
439
440 for (i = 0; i < fb->size; i++)
441 fb->values[i] += value;
442}
443
9958c857
PR
444struct fifo_buffer *fifo_alloc(int fifo_size)
445{
446 struct fifo_buffer *fb;
447
448 fb = kzalloc(sizeof(struct fifo_buffer) + sizeof(int) * fifo_size, GFP_KERNEL);
449 if (!fb)
450 return NULL;
451
452 fb->head_index = 0;
453 fb->size = fifo_size;
454 fb->total = 0;
455
456 return fb;
457}
458
9d77a5fe 459static int drbd_rs_controller(struct drbd_conf *mdev)
778f271d 460{
daeda1cc 461 struct disk_conf *dc;
778f271d
PR
462 unsigned int sect_in; /* Number of sectors that came in since the last turn */
463 unsigned int want; /* The number of sectors we want in the proxy */
464 int req_sect; /* Number of sectors to request in this turn */
465 int correction; /* Number of sectors more we need in the proxy*/
466 int cps; /* correction per invocation of drbd_rs_controller() */
467 int steps; /* Number of time steps to plan ahead */
468 int curr_corr;
469 int max_sect;
813472ce 470 struct fifo_buffer *plan;
778f271d
PR
471
472 sect_in = atomic_xchg(&mdev->rs_sect_in, 0); /* Number of sectors that came in */
473 mdev->rs_in_flight -= sect_in;
474
daeda1cc 475 dc = rcu_dereference(mdev->ldev->disk_conf);
813472ce 476 plan = rcu_dereference(mdev->rs_plan_s);
778f271d 477
813472ce 478 steps = plan->size; /* (dc->c_plan_ahead * 10 * SLEEP_TIME) / HZ; */
778f271d
PR
479
480 if (mdev->rs_in_flight + sect_in == 0) { /* At start of resync */
daeda1cc 481 want = ((dc->resync_rate * 2 * SLEEP_TIME) / HZ) * steps;
778f271d 482 } else { /* normal path */
daeda1cc
PR
483 want = dc->c_fill_target ? dc->c_fill_target :
484 sect_in * dc->c_delay_target * HZ / (SLEEP_TIME * 10);
778f271d
PR
485 }
486
813472ce 487 correction = want - mdev->rs_in_flight - plan->total;
778f271d
PR
488
489 /* Plan ahead */
490 cps = correction / steps;
813472ce
PR
491 fifo_add_val(plan, cps);
492 plan->total += cps * steps;
778f271d
PR
493
494 /* What we do in this step */
813472ce
PR
495 curr_corr = fifo_push(plan, 0);
496 plan->total -= curr_corr;
778f271d
PR
497
498 req_sect = sect_in + curr_corr;
499 if (req_sect < 0)
500 req_sect = 0;
501
daeda1cc 502 max_sect = (dc->c_max_rate * 2 * SLEEP_TIME) / HZ;
778f271d
PR
503 if (req_sect > max_sect)
504 req_sect = max_sect;
505
506 /*
507 dev_warn(DEV, "si=%u if=%d wa=%u co=%d st=%d cps=%d pl=%d cc=%d rs=%d\n",
508 sect_in, mdev->rs_in_flight, want, correction,
509 steps, cps, mdev->rs_planed, curr_corr, req_sect);
510 */
511
512 return req_sect;
513}
514
9d77a5fe 515static int drbd_rs_number_requests(struct drbd_conf *mdev)
e65f440d
LE
516{
517 int number;
813472ce
PR
518
519 rcu_read_lock();
520 if (rcu_dereference(mdev->rs_plan_s)->size) {
e65f440d
LE
521 number = drbd_rs_controller(mdev) >> (BM_BLOCK_SHIFT - 9);
522 mdev->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME;
523 } else {
daeda1cc 524 mdev->c_sync_rate = rcu_dereference(mdev->ldev->disk_conf)->resync_rate;
e65f440d
LE
525 number = SLEEP_TIME * mdev->c_sync_rate / ((BM_BLOCK_SIZE / 1024) * HZ);
526 }
813472ce 527 rcu_read_unlock();
e65f440d 528
e65f440d
LE
529 /* ignore the amount of pending requests, the resync controller should
530 * throttle down to incoming reply rate soon enough anyways. */
531 return number;
532}
533
99920dc5 534int w_make_resync_request(struct drbd_work *w, int cancel)
b411b363 535{
00d56944 536 struct drbd_conf *mdev = w->mdev;
b411b363
PR
537 unsigned long bit;
538 sector_t sector;
539 const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
1816a2b4 540 int max_bio_size;
e65f440d 541 int number, rollback_i, size;
b411b363 542 int align, queued, sndbuf;
0f0601f4 543 int i = 0;
b411b363
PR
544
545 if (unlikely(cancel))
99920dc5 546 return 0;
b411b363 547
af85e8e8
LE
548 if (mdev->rs_total == 0) {
549 /* empty resync? */
550 drbd_resync_finished(mdev);
99920dc5 551 return 0;
af85e8e8
LE
552 }
553
b411b363
PR
554 if (!get_ldev(mdev)) {
555 /* Since we only need to access mdev->rsync a
556 get_ldev_if_state(mdev,D_FAILED) would be sufficient, but
557 to continue resync with a broken disk makes no sense at
558 all */
559 dev_err(DEV, "Disk broke down during resync!\n");
99920dc5 560 return 0;
b411b363
PR
561 }
562
0cfdd247 563 max_bio_size = queue_max_hw_sectors(mdev->rq_queue) << 9;
e65f440d
LE
564 number = drbd_rs_number_requests(mdev);
565 if (number == 0)
0f0601f4 566 goto requeue;
b411b363 567
b411b363
PR
568 for (i = 0; i < number; i++) {
569 /* Stop generating RS requests, when half of the send buffer is filled */
e42325a5
PR
570 mutex_lock(&mdev->tconn->data.mutex);
571 if (mdev->tconn->data.socket) {
572 queued = mdev->tconn->data.socket->sk->sk_wmem_queued;
573 sndbuf = mdev->tconn->data.socket->sk->sk_sndbuf;
b411b363
PR
574 } else {
575 queued = 1;
576 sndbuf = 0;
577 }
e42325a5 578 mutex_unlock(&mdev->tconn->data.mutex);
b411b363
PR
579 if (queued > sndbuf / 2)
580 goto requeue;
581
582next_sector:
583 size = BM_BLOCK_SIZE;
584 bit = drbd_bm_find_next(mdev, mdev->bm_resync_fo);
585
4b0715f0 586 if (bit == DRBD_END_OF_BITMAP) {
b411b363 587 mdev->bm_resync_fo = drbd_bm_bits(mdev);
b411b363 588 put_ldev(mdev);
99920dc5 589 return 0;
b411b363
PR
590 }
591
592 sector = BM_BIT_TO_SECT(bit);
593
e3555d85
PR
594 if (drbd_rs_should_slow_down(mdev, sector) ||
595 drbd_try_rs_begin_io(mdev, sector)) {
b411b363
PR
596 mdev->bm_resync_fo = bit;
597 goto requeue;
598 }
599 mdev->bm_resync_fo = bit + 1;
600
601 if (unlikely(drbd_bm_test_bit(mdev, bit) == 0)) {
602 drbd_rs_complete_io(mdev, sector);
603 goto next_sector;
604 }
605
1816a2b4 606#if DRBD_MAX_BIO_SIZE > BM_BLOCK_SIZE
b411b363
PR
607 /* try to find some adjacent bits.
608 * we stop if we have already the maximum req size.
609 *
610 * Additionally always align bigger requests, in order to
611 * be prepared for all stripe sizes of software RAIDs.
b411b363
PR
612 */
613 align = 1;
d207450c 614 rollback_i = i;
b411b363 615 for (;;) {
1816a2b4 616 if (size + BM_BLOCK_SIZE > max_bio_size)
b411b363
PR
617 break;
618
619 /* Be always aligned */
620 if (sector & ((1<<(align+3))-1))
621 break;
622
623 /* do not cross extent boundaries */
624 if (((bit+1) & BM_BLOCKS_PER_BM_EXT_MASK) == 0)
625 break;
626 /* now, is it actually dirty, after all?
627 * caution, drbd_bm_test_bit is tri-state for some
628 * obscure reason; ( b == 0 ) would get the out-of-band
629 * only accidentally right because of the "oddly sized"
630 * adjustment below */
631 if (drbd_bm_test_bit(mdev, bit+1) != 1)
632 break;
633 bit++;
634 size += BM_BLOCK_SIZE;
635 if ((BM_BLOCK_SIZE << align) <= size)
636 align++;
637 i++;
638 }
639 /* if we merged some,
640 * reset the offset to start the next drbd_bm_find_next from */
641 if (size > BM_BLOCK_SIZE)
642 mdev->bm_resync_fo = bit + 1;
643#endif
644
645 /* adjust very last sectors, in case we are oddly sized */
646 if (sector + (size>>9) > capacity)
647 size = (capacity-sector)<<9;
f399002e 648 if (mdev->tconn->agreed_pro_version >= 89 && mdev->tconn->csums_tfm) {
b411b363 649 switch (read_for_csum(mdev, sector, size)) {
80a40e43 650 case -EIO: /* Disk failure */
b411b363 651 put_ldev(mdev);
99920dc5 652 return -EIO;
80a40e43 653 case -EAGAIN: /* allocation failed, or ldev busy */
b411b363
PR
654 drbd_rs_complete_io(mdev, sector);
655 mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
d207450c 656 i = rollback_i;
b411b363 657 goto requeue;
80a40e43
LE
658 case 0:
659 /* everything ok */
660 break;
661 default:
662 BUG();
b411b363
PR
663 }
664 } else {
99920dc5
AG
665 int err;
666
b411b363 667 inc_rs_pending(mdev);
99920dc5
AG
668 err = drbd_send_drequest(mdev, P_RS_DATA_REQUEST,
669 sector, size, ID_SYNCER);
670 if (err) {
b411b363
PR
671 dev_err(DEV, "drbd_send_drequest() failed, aborting...\n");
672 dec_rs_pending(mdev);
673 put_ldev(mdev);
99920dc5 674 return err;
b411b363
PR
675 }
676 }
677 }
678
679 if (mdev->bm_resync_fo >= drbd_bm_bits(mdev)) {
680 /* last syncer _request_ was sent,
681 * but the P_RS_DATA_REPLY not yet received. sync will end (and
682 * next sync group will resume), as soon as we receive the last
683 * resync data block, and the last bit is cleared.
684 * until then resync "work" is "inactive" ...
685 */
b411b363 686 put_ldev(mdev);
99920dc5 687 return 0;
b411b363
PR
688 }
689
690 requeue:
778f271d 691 mdev->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
b411b363
PR
692 mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME);
693 put_ldev(mdev);
99920dc5 694 return 0;
b411b363
PR
695}
696
00d56944 697static int w_make_ov_request(struct drbd_work *w, int cancel)
b411b363 698{
00d56944 699 struct drbd_conf *mdev = w->mdev;
b411b363
PR
700 int number, i, size;
701 sector_t sector;
702 const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
703
704 if (unlikely(cancel))
705 return 1;
706
2649f080 707 number = drbd_rs_number_requests(mdev);
b411b363
PR
708
709 sector = mdev->ov_position;
710 for (i = 0; i < number; i++) {
711 if (sector >= capacity) {
b411b363
PR
712 return 1;
713 }
714
715 size = BM_BLOCK_SIZE;
716
e3555d85
PR
717 if (drbd_rs_should_slow_down(mdev, sector) ||
718 drbd_try_rs_begin_io(mdev, sector)) {
b411b363
PR
719 mdev->ov_position = sector;
720 goto requeue;
721 }
722
723 if (sector + (size>>9) > capacity)
724 size = (capacity-sector)<<9;
725
726 inc_rs_pending(mdev);
5b9f499c 727 if (drbd_send_ov_request(mdev, sector, size)) {
b411b363
PR
728 dec_rs_pending(mdev);
729 return 0;
730 }
731 sector += BM_SECT_PER_BIT;
732 }
733 mdev->ov_position = sector;
734
735 requeue:
2649f080 736 mdev->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
b411b363
PR
737 mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME);
738 return 1;
739}
740
99920dc5 741int w_ov_finished(struct drbd_work *w, int cancel)
b411b363 742{
00d56944 743 struct drbd_conf *mdev = w->mdev;
b411b363 744 kfree(w);
8f7bed77 745 ov_out_of_sync_print(mdev);
b411b363
PR
746 drbd_resync_finished(mdev);
747
99920dc5 748 return 0;
b411b363
PR
749}
750
99920dc5 751static int w_resync_finished(struct drbd_work *w, int cancel)
b411b363 752{
00d56944 753 struct drbd_conf *mdev = w->mdev;
b411b363
PR
754 kfree(w);
755
756 drbd_resync_finished(mdev);
757
99920dc5 758 return 0;
b411b363
PR
759}
760
af85e8e8
LE
761static void ping_peer(struct drbd_conf *mdev)
762{
2a67d8b9
PR
763 struct drbd_tconn *tconn = mdev->tconn;
764
765 clear_bit(GOT_PING_ACK, &tconn->flags);
766 request_ping(tconn);
767 wait_event(tconn->ping_wait,
768 test_bit(GOT_PING_ACK, &tconn->flags) || mdev->state.conn < C_CONNECTED);
af85e8e8
LE
769}
770
b411b363
PR
771int drbd_resync_finished(struct drbd_conf *mdev)
772{
773 unsigned long db, dt, dbdt;
774 unsigned long n_oos;
775 union drbd_state os, ns;
776 struct drbd_work *w;
777 char *khelper_cmd = NULL;
26525618 778 int verify_done = 0;
b411b363
PR
779
780 /* Remove all elements from the resync LRU. Since future actions
781 * might set bits in the (main) bitmap, then the entries in the
782 * resync LRU would be wrong. */
783 if (drbd_rs_del_all(mdev)) {
784 /* In case this is not possible now, most probably because
785 * there are P_RS_DATA_REPLY Packets lingering on the worker's
786 * queue (or even the read operations for those packets
787 * is not finished by now). Retry in 100ms. */
788
20ee6390 789 schedule_timeout_interruptible(HZ / 10);
b411b363
PR
790 w = kmalloc(sizeof(struct drbd_work), GFP_ATOMIC);
791 if (w) {
792 w->cb = w_resync_finished;
e42325a5 793 drbd_queue_work(&mdev->tconn->data.work, w);
b411b363
PR
794 return 1;
795 }
796 dev_err(DEV, "Warn failed to drbd_rs_del_all() and to kmalloc(w).\n");
797 }
798
799 dt = (jiffies - mdev->rs_start - mdev->rs_paused) / HZ;
800 if (dt <= 0)
801 dt = 1;
802 db = mdev->rs_total;
803 dbdt = Bit2KB(db/dt);
804 mdev->rs_paused /= HZ;
805
806 if (!get_ldev(mdev))
807 goto out;
808
af85e8e8
LE
809 ping_peer(mdev);
810
87eeee41 811 spin_lock_irq(&mdev->tconn->req_lock);
78bae59b 812 os = drbd_read_state(mdev);
b411b363 813
26525618
LE
814 verify_done = (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T);
815
b411b363
PR
816 /* This protects us against multiple calls (that can happen in the presence
817 of application IO), and against connectivity loss just before we arrive here. */
818 if (os.conn <= C_CONNECTED)
819 goto out_unlock;
820
821 ns = os;
822 ns.conn = C_CONNECTED;
823
824 dev_info(DEV, "%s done (total %lu sec; paused %lu sec; %lu K/sec)\n",
26525618 825 verify_done ? "Online verify " : "Resync",
b411b363
PR
826 dt + mdev->rs_paused, mdev->rs_paused, dbdt);
827
828 n_oos = drbd_bm_total_weight(mdev);
829
830 if (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) {
831 if (n_oos) {
832 dev_alert(DEV, "Online verify found %lu %dk block out of sync!\n",
833 n_oos, Bit2KB(1));
834 khelper_cmd = "out-of-sync";
835 }
836 } else {
837 D_ASSERT((n_oos - mdev->rs_failed) == 0);
838
839 if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T)
840 khelper_cmd = "after-resync-target";
841
f399002e 842 if (mdev->tconn->csums_tfm && mdev->rs_total) {
b411b363
PR
843 const unsigned long s = mdev->rs_same_csum;
844 const unsigned long t = mdev->rs_total;
845 const int ratio =
846 (t == 0) ? 0 :
847 (t < 100000) ? ((s*100)/t) : (s/(t/100));
24c4830c 848 dev_info(DEV, "%u %% had equal checksums, eliminated: %luK; "
b411b363
PR
849 "transferred %luK total %luK\n",
850 ratio,
851 Bit2KB(mdev->rs_same_csum),
852 Bit2KB(mdev->rs_total - mdev->rs_same_csum),
853 Bit2KB(mdev->rs_total));
854 }
855 }
856
857 if (mdev->rs_failed) {
858 dev_info(DEV, " %lu failed blocks\n", mdev->rs_failed);
859
860 if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) {
861 ns.disk = D_INCONSISTENT;
862 ns.pdsk = D_UP_TO_DATE;
863 } else {
864 ns.disk = D_UP_TO_DATE;
865 ns.pdsk = D_INCONSISTENT;
866 }
867 } else {
868 ns.disk = D_UP_TO_DATE;
869 ns.pdsk = D_UP_TO_DATE;
870
871 if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) {
872 if (mdev->p_uuid) {
873 int i;
874 for (i = UI_BITMAP ; i <= UI_HISTORY_END ; i++)
875 _drbd_uuid_set(mdev, i, mdev->p_uuid[i]);
876 drbd_uuid_set(mdev, UI_BITMAP, mdev->ldev->md.uuid[UI_CURRENT]);
877 _drbd_uuid_set(mdev, UI_CURRENT, mdev->p_uuid[UI_CURRENT]);
878 } else {
879 dev_err(DEV, "mdev->p_uuid is NULL! BUG\n");
880 }
881 }
882
62b0da3a
LE
883 if (!(os.conn == C_VERIFY_S || os.conn == C_VERIFY_T)) {
884 /* for verify runs, we don't update uuids here,
885 * so there would be nothing to report. */
886 drbd_uuid_set_bm(mdev, 0UL);
887 drbd_print_uuids(mdev, "updated UUIDs");
888 if (mdev->p_uuid) {
889 /* Now the two UUID sets are equal, update what we
890 * know of the peer. */
891 int i;
892 for (i = UI_CURRENT ; i <= UI_HISTORY_END ; i++)
893 mdev->p_uuid[i] = mdev->ldev->md.uuid[i];
894 }
b411b363
PR
895 }
896 }
897
898 _drbd_set_state(mdev, ns, CS_VERBOSE, NULL);
899out_unlock:
87eeee41 900 spin_unlock_irq(&mdev->tconn->req_lock);
b411b363
PR
901 put_ldev(mdev);
902out:
903 mdev->rs_total = 0;
904 mdev->rs_failed = 0;
905 mdev->rs_paused = 0;
26525618
LE
906 if (verify_done)
907 mdev->ov_start_sector = 0;
b411b363 908
13d42685
LE
909 drbd_md_sync(mdev);
910
b411b363
PR
911 if (khelper_cmd)
912 drbd_khelper(mdev, khelper_cmd);
913
914 return 1;
915}
916
917/* helper */
db830c46 918static void move_to_net_ee_or_free(struct drbd_conf *mdev, struct drbd_peer_request *peer_req)
b411b363 919{
045417f7 920 if (drbd_peer_req_has_active_page(peer_req)) {
b411b363 921 /* This might happen if sendpage() has not finished */
db830c46 922 int i = (peer_req->i.size + PAGE_SIZE -1) >> PAGE_SHIFT;
435f0740
LE
923 atomic_add(i, &mdev->pp_in_use_by_net);
924 atomic_sub(i, &mdev->pp_in_use);
87eeee41 925 spin_lock_irq(&mdev->tconn->req_lock);
db830c46 926 list_add_tail(&peer_req->w.list, &mdev->net_ee);
87eeee41 927 spin_unlock_irq(&mdev->tconn->req_lock);
435f0740 928 wake_up(&drbd_pp_wait);
b411b363 929 } else
3967deb1 930 drbd_free_peer_req(mdev, peer_req);
b411b363
PR
931}
932
933/**
934 * w_e_end_data_req() - Worker callback, to send a P_DATA_REPLY packet in response to a P_DATA_REQUEST
935 * @mdev: DRBD device.
936 * @w: work object.
937 * @cancel: The connection will be closed anyways
938 */
99920dc5 939int w_e_end_data_req(struct drbd_work *w, int cancel)
b411b363 940{
db830c46 941 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
00d56944 942 struct drbd_conf *mdev = w->mdev;
99920dc5 943 int err;
b411b363
PR
944
945 if (unlikely(cancel)) {
3967deb1 946 drbd_free_peer_req(mdev, peer_req);
b411b363 947 dec_unacked(mdev);
99920dc5 948 return 0;
b411b363
PR
949 }
950
db830c46 951 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
99920dc5 952 err = drbd_send_block(mdev, P_DATA_REPLY, peer_req);
b411b363
PR
953 } else {
954 if (__ratelimit(&drbd_ratelimit_state))
955 dev_err(DEV, "Sending NegDReply. sector=%llus.\n",
db830c46 956 (unsigned long long)peer_req->i.sector);
b411b363 957
99920dc5 958 err = drbd_send_ack(mdev, P_NEG_DREPLY, peer_req);
b411b363
PR
959 }
960
961 dec_unacked(mdev);
962
db830c46 963 move_to_net_ee_or_free(mdev, peer_req);
b411b363 964
99920dc5 965 if (unlikely(err))
b411b363 966 dev_err(DEV, "drbd_send_block() failed\n");
99920dc5 967 return err;
b411b363
PR
968}
969
970/**
971 * w_e_end_rsdata_req() - Worker callback to send a P_RS_DATA_REPLY packet in response to a P_RS_DATA_REQUESTRS
972 * @mdev: DRBD device.
973 * @w: work object.
974 * @cancel: The connection will be closed anyways
975 */
99920dc5 976int w_e_end_rsdata_req(struct drbd_work *w, int cancel)
b411b363 977{
db830c46 978 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
00d56944 979 struct drbd_conf *mdev = w->mdev;
99920dc5 980 int err;
b411b363
PR
981
982 if (unlikely(cancel)) {
3967deb1 983 drbd_free_peer_req(mdev, peer_req);
b411b363 984 dec_unacked(mdev);
99920dc5 985 return 0;
b411b363
PR
986 }
987
988 if (get_ldev_if_state(mdev, D_FAILED)) {
db830c46 989 drbd_rs_complete_io(mdev, peer_req->i.sector);
b411b363
PR
990 put_ldev(mdev);
991 }
992
d612d309 993 if (mdev->state.conn == C_AHEAD) {
99920dc5 994 err = drbd_send_ack(mdev, P_RS_CANCEL, peer_req);
db830c46 995 } else if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
b411b363
PR
996 if (likely(mdev->state.pdsk >= D_INCONSISTENT)) {
997 inc_rs_pending(mdev);
99920dc5 998 err = drbd_send_block(mdev, P_RS_DATA_REPLY, peer_req);
b411b363
PR
999 } else {
1000 if (__ratelimit(&drbd_ratelimit_state))
1001 dev_err(DEV, "Not sending RSDataReply, "
1002 "partner DISKLESS!\n");
99920dc5 1003 err = 0;
b411b363
PR
1004 }
1005 } else {
1006 if (__ratelimit(&drbd_ratelimit_state))
1007 dev_err(DEV, "Sending NegRSDReply. sector %llus.\n",
db830c46 1008 (unsigned long long)peer_req->i.sector);
b411b363 1009
99920dc5 1010 err = drbd_send_ack(mdev, P_NEG_RS_DREPLY, peer_req);
b411b363
PR
1011
1012 /* update resync data with failure */
db830c46 1013 drbd_rs_failed_io(mdev, peer_req->i.sector, peer_req->i.size);
b411b363
PR
1014 }
1015
1016 dec_unacked(mdev);
1017
db830c46 1018 move_to_net_ee_or_free(mdev, peer_req);
b411b363 1019
99920dc5 1020 if (unlikely(err))
b411b363 1021 dev_err(DEV, "drbd_send_block() failed\n");
99920dc5 1022 return err;
b411b363
PR
1023}
1024
99920dc5 1025int w_e_end_csum_rs_req(struct drbd_work *w, int cancel)
b411b363 1026{
db830c46 1027 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
00d56944 1028 struct drbd_conf *mdev = w->mdev;
b411b363
PR
1029 struct digest_info *di;
1030 int digest_size;
1031 void *digest = NULL;
99920dc5 1032 int err, eq = 0;
b411b363
PR
1033
1034 if (unlikely(cancel)) {
3967deb1 1035 drbd_free_peer_req(mdev, peer_req);
b411b363 1036 dec_unacked(mdev);
99920dc5 1037 return 0;
b411b363
PR
1038 }
1039
1d53f09e 1040 if (get_ldev(mdev)) {
db830c46 1041 drbd_rs_complete_io(mdev, peer_req->i.sector);
1d53f09e
LE
1042 put_ldev(mdev);
1043 }
b411b363 1044
db830c46 1045 di = peer_req->digest;
b411b363 1046
db830c46 1047 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
b411b363
PR
1048 /* quick hack to try to avoid a race against reconfiguration.
1049 * a real fix would be much more involved,
1050 * introducing more locking mechanisms */
f399002e
LE
1051 if (mdev->tconn->csums_tfm) {
1052 digest_size = crypto_hash_digestsize(mdev->tconn->csums_tfm);
b411b363
PR
1053 D_ASSERT(digest_size == di->digest_size);
1054 digest = kmalloc(digest_size, GFP_NOIO);
1055 }
1056 if (digest) {
f399002e 1057 drbd_csum_ee(mdev, mdev->tconn->csums_tfm, peer_req, digest);
b411b363
PR
1058 eq = !memcmp(digest, di->digest, digest_size);
1059 kfree(digest);
1060 }
1061
1062 if (eq) {
db830c46 1063 drbd_set_in_sync(mdev, peer_req->i.sector, peer_req->i.size);
676396d5 1064 /* rs_same_csums unit is BM_BLOCK_SIZE */
db830c46 1065 mdev->rs_same_csum += peer_req->i.size >> BM_BLOCK_SHIFT;
99920dc5 1066 err = drbd_send_ack(mdev, P_RS_IS_IN_SYNC, peer_req);
b411b363
PR
1067 } else {
1068 inc_rs_pending(mdev);
db830c46
AG
1069 peer_req->block_id = ID_SYNCER; /* By setting block_id, digest pointer becomes invalid! */
1070 peer_req->flags &= ~EE_HAS_DIGEST; /* This peer request no longer has a digest pointer */
204bba99 1071 kfree(di);
99920dc5 1072 err = drbd_send_block(mdev, P_RS_DATA_REPLY, peer_req);
b411b363
PR
1073 }
1074 } else {
99920dc5 1075 err = drbd_send_ack(mdev, P_NEG_RS_DREPLY, peer_req);
b411b363
PR
1076 if (__ratelimit(&drbd_ratelimit_state))
1077 dev_err(DEV, "Sending NegDReply. I guess it gets messy.\n");
1078 }
1079
1080 dec_unacked(mdev);
db830c46 1081 move_to_net_ee_or_free(mdev, peer_req);
b411b363 1082
99920dc5 1083 if (unlikely(err))
b411b363 1084 dev_err(DEV, "drbd_send_block/ack() failed\n");
99920dc5 1085 return err;
b411b363
PR
1086}
1087
99920dc5 1088int w_e_end_ov_req(struct drbd_work *w, int cancel)
b411b363 1089{
db830c46 1090 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
00d56944 1091 struct drbd_conf *mdev = w->mdev;
db830c46
AG
1092 sector_t sector = peer_req->i.sector;
1093 unsigned int size = peer_req->i.size;
b411b363
PR
1094 int digest_size;
1095 void *digest;
99920dc5 1096 int err = 0;
b411b363
PR
1097
1098 if (unlikely(cancel))
1099 goto out;
1100
f399002e 1101 digest_size = crypto_hash_digestsize(mdev->tconn->verify_tfm);
b411b363 1102 digest = kmalloc(digest_size, GFP_NOIO);
8f21420e 1103 if (!digest) {
99920dc5 1104 err = 1; /* terminate the connection in case the allocation failed */
8f21420e 1105 goto out;
b411b363
PR
1106 }
1107
db830c46 1108 if (likely(!(peer_req->flags & EE_WAS_ERROR)))
f399002e 1109 drbd_csum_ee(mdev, mdev->tconn->verify_tfm, peer_req, digest);
8f21420e
PR
1110 else
1111 memset(digest, 0, digest_size);
1112
53ea4331
LE
1113 /* Free e and pages before send.
1114 * In case we block on congestion, we could otherwise run into
1115 * some distributed deadlock, if the other side blocks on
1116 * congestion as well, because our receiver blocks in
c37c8ecf 1117 * drbd_alloc_pages due to pp_in_use > max_buffers. */
3967deb1 1118 drbd_free_peer_req(mdev, peer_req);
db830c46 1119 peer_req = NULL;
8f21420e 1120 inc_rs_pending(mdev);
99920dc5
AG
1121 err = drbd_send_drequest_csum(mdev, sector, size, digest, digest_size, P_OV_REPLY);
1122 if (err)
8f21420e
PR
1123 dec_rs_pending(mdev);
1124 kfree(digest);
1125
b411b363 1126out:
db830c46 1127 if (peer_req)
3967deb1 1128 drbd_free_peer_req(mdev, peer_req);
b411b363 1129 dec_unacked(mdev);
99920dc5 1130 return err;
b411b363
PR
1131}
1132
8f7bed77 1133void drbd_ov_out_of_sync_found(struct drbd_conf *mdev, sector_t sector, int size)
b411b363
PR
1134{
1135 if (mdev->ov_last_oos_start + mdev->ov_last_oos_size == sector) {
1136 mdev->ov_last_oos_size += size>>9;
1137 } else {
1138 mdev->ov_last_oos_start = sector;
1139 mdev->ov_last_oos_size = size>>9;
1140 }
1141 drbd_set_out_of_sync(mdev, sector, size);
b411b363
PR
1142}
1143
99920dc5 1144int w_e_end_ov_reply(struct drbd_work *w, int cancel)
b411b363 1145{
db830c46 1146 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
00d56944 1147 struct drbd_conf *mdev = w->mdev;
b411b363 1148 struct digest_info *di;
b411b363 1149 void *digest;
db830c46
AG
1150 sector_t sector = peer_req->i.sector;
1151 unsigned int size = peer_req->i.size;
53ea4331 1152 int digest_size;
99920dc5 1153 int err, eq = 0;
b411b363
PR
1154
1155 if (unlikely(cancel)) {
3967deb1 1156 drbd_free_peer_req(mdev, peer_req);
b411b363 1157 dec_unacked(mdev);
99920dc5 1158 return 0;
b411b363
PR
1159 }
1160
1161 /* after "cancel", because after drbd_disconnect/drbd_rs_cancel_all
1162 * the resync lru has been cleaned up already */
1d53f09e 1163 if (get_ldev(mdev)) {
db830c46 1164 drbd_rs_complete_io(mdev, peer_req->i.sector);
1d53f09e
LE
1165 put_ldev(mdev);
1166 }
b411b363 1167
db830c46 1168 di = peer_req->digest;
b411b363 1169
db830c46 1170 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
f399002e 1171 digest_size = crypto_hash_digestsize(mdev->tconn->verify_tfm);
b411b363
PR
1172 digest = kmalloc(digest_size, GFP_NOIO);
1173 if (digest) {
f399002e 1174 drbd_csum_ee(mdev, mdev->tconn->verify_tfm, peer_req, digest);
b411b363
PR
1175
1176 D_ASSERT(digest_size == di->digest_size);
1177 eq = !memcmp(digest, di->digest, digest_size);
1178 kfree(digest);
1179 }
b411b363
PR
1180 }
1181
9676c760
LE
1182 /* Free peer_req and pages before send.
1183 * In case we block on congestion, we could otherwise run into
1184 * some distributed deadlock, if the other side blocks on
1185 * congestion as well, because our receiver blocks in
c37c8ecf 1186 * drbd_alloc_pages due to pp_in_use > max_buffers. */
3967deb1 1187 drbd_free_peer_req(mdev, peer_req);
b411b363 1188 if (!eq)
8f7bed77 1189 drbd_ov_out_of_sync_found(mdev, sector, size);
b411b363 1190 else
8f7bed77 1191 ov_out_of_sync_print(mdev);
b411b363 1192
99920dc5 1193 err = drbd_send_ack_ex(mdev, P_OV_RESULT, sector, size,
fa79abd8 1194 eq ? ID_IN_SYNC : ID_OUT_OF_SYNC);
b411b363 1195
53ea4331 1196 dec_unacked(mdev);
b411b363 1197
ea5442af
LE
1198 --mdev->ov_left;
1199
1200 /* let's advance progress step marks only for every other megabyte */
1201 if ((mdev->ov_left & 0x200) == 0x200)
1202 drbd_advance_rs_marks(mdev, mdev->ov_left);
1203
1204 if (mdev->ov_left == 0) {
8f7bed77 1205 ov_out_of_sync_print(mdev);
b411b363
PR
1206 drbd_resync_finished(mdev);
1207 }
1208
99920dc5 1209 return err;
b411b363
PR
1210}
1211
99920dc5 1212int w_prev_work_done(struct drbd_work *w, int cancel)
b411b363
PR
1213{
1214 struct drbd_wq_barrier *b = container_of(w, struct drbd_wq_barrier, w);
00d56944 1215
b411b363 1216 complete(&b->done);
99920dc5 1217 return 0;
b411b363
PR
1218}
1219
99920dc5 1220int w_send_barrier(struct drbd_work *w, int cancel)
b411b363 1221{
9f5bdc33 1222 struct drbd_socket *sock;
b411b363 1223 struct drbd_tl_epoch *b = container_of(w, struct drbd_tl_epoch, w);
00d56944 1224 struct drbd_conf *mdev = w->mdev;
9f5bdc33 1225 struct p_barrier *p;
b411b363
PR
1226
1227 /* really avoid racing with tl_clear. w.cb may have been referenced
1228 * just before it was reassigned and re-queued, so double check that.
1229 * actually, this race was harmless, since we only try to send the
1230 * barrier packet here, and otherwise do nothing with the object.
1231 * but compare with the head of w_clear_epoch */
87eeee41 1232 spin_lock_irq(&mdev->tconn->req_lock);
b411b363
PR
1233 if (w->cb != w_send_barrier || mdev->state.conn < C_CONNECTED)
1234 cancel = 1;
87eeee41 1235 spin_unlock_irq(&mdev->tconn->req_lock);
b411b363 1236 if (cancel)
b411b363 1237 return 0;
99920dc5 1238
9f5bdc33
AG
1239 sock = &mdev->tconn->data;
1240 p = drbd_prepare_command(mdev, sock);
1241 if (!p)
1242 return -EIO;
b411b363
PR
1243 p->barrier = b->br_number;
1244 /* inc_ap_pending was done where this was queued.
1245 * dec_ap_pending will be done in got_BarrierAck
1246 * or (on connection loss) in w_clear_epoch. */
9f5bdc33 1247 return drbd_send_command(mdev, sock, P_BARRIER, sizeof(*p), NULL, 0);
b411b363
PR
1248}
1249
99920dc5 1250int w_send_write_hint(struct drbd_work *w, int cancel)
b411b363 1251{
00d56944 1252 struct drbd_conf *mdev = w->mdev;
9f5bdc33
AG
1253 struct drbd_socket *sock;
1254
b411b363 1255 if (cancel)
99920dc5 1256 return 0;
9f5bdc33
AG
1257 sock = &mdev->tconn->data;
1258 if (!drbd_prepare_command(mdev, sock))
1259 return -EIO;
e658983a 1260 return drbd_send_command(mdev, sock, P_UNPLUG_REMOTE, 0, NULL, 0);
b411b363
PR
1261}
1262
8f7bed77 1263int w_send_out_of_sync(struct drbd_work *w, int cancel)
73a01a18
PR
1264{
1265 struct drbd_request *req = container_of(w, struct drbd_request, w);
00d56944 1266 struct drbd_conf *mdev = w->mdev;
99920dc5 1267 int err;
73a01a18
PR
1268
1269 if (unlikely(cancel)) {
8554df1c 1270 req_mod(req, SEND_CANCELED);
99920dc5 1271 return 0;
73a01a18
PR
1272 }
1273
8f7bed77 1274 err = drbd_send_out_of_sync(mdev, req);
8554df1c 1275 req_mod(req, OOS_HANDED_TO_NETWORK);
73a01a18 1276
99920dc5 1277 return err;
73a01a18
PR
1278}
1279
b411b363
PR
1280/**
1281 * w_send_dblock() - Worker callback to send a P_DATA packet in order to mirror a write request
1282 * @mdev: DRBD device.
1283 * @w: work object.
1284 * @cancel: The connection will be closed anyways
1285 */
99920dc5 1286int w_send_dblock(struct drbd_work *w, int cancel)
b411b363
PR
1287{
1288 struct drbd_request *req = container_of(w, struct drbd_request, w);
00d56944 1289 struct drbd_conf *mdev = w->mdev;
99920dc5 1290 int err;
b411b363
PR
1291
1292 if (unlikely(cancel)) {
8554df1c 1293 req_mod(req, SEND_CANCELED);
99920dc5 1294 return 0;
b411b363
PR
1295 }
1296
99920dc5
AG
1297 err = drbd_send_dblock(mdev, req);
1298 req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK);
b411b363 1299
99920dc5 1300 return err;
b411b363
PR
1301}
1302
1303/**
1304 * w_send_read_req() - Worker callback to send a read request (P_DATA_REQUEST) packet
1305 * @mdev: DRBD device.
1306 * @w: work object.
1307 * @cancel: The connection will be closed anyways
1308 */
99920dc5 1309int w_send_read_req(struct drbd_work *w, int cancel)
b411b363
PR
1310{
1311 struct drbd_request *req = container_of(w, struct drbd_request, w);
00d56944 1312 struct drbd_conf *mdev = w->mdev;
99920dc5 1313 int err;
b411b363
PR
1314
1315 if (unlikely(cancel)) {
8554df1c 1316 req_mod(req, SEND_CANCELED);
99920dc5 1317 return 0;
b411b363
PR
1318 }
1319
99920dc5 1320 err = drbd_send_drequest(mdev, P_DATA_REQUEST, req->i.sector, req->i.size,
6c1005e7 1321 (unsigned long)req);
b411b363 1322
99920dc5 1323 req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK);
b411b363 1324
99920dc5 1325 return err;
b411b363
PR
1326}
1327
99920dc5 1328int w_restart_disk_io(struct drbd_work *w, int cancel)
265be2d0
PR
1329{
1330 struct drbd_request *req = container_of(w, struct drbd_request, w);
00d56944 1331 struct drbd_conf *mdev = w->mdev;
265be2d0 1332
0778286a 1333 if (bio_data_dir(req->master_bio) == WRITE && req->rq_state & RQ_IN_ACT_LOG)
181286ad 1334 drbd_al_begin_io(mdev, &req->i);
265be2d0
PR
1335 /* Calling drbd_al_begin_io() out of the worker might deadlocks
1336 theoretically. Practically it can not deadlock, since this is
1337 only used when unfreezing IOs. All the extents of the requests
1338 that made it into the TL are already active */
1339
1340 drbd_req_make_private_bio(req, req->master_bio);
1341 req->private_bio->bi_bdev = mdev->ldev->backing_bdev;
1342 generic_make_request(req->private_bio);
1343
99920dc5 1344 return 0;
265be2d0
PR
1345}
1346
b411b363
PR
1347static int _drbd_may_sync_now(struct drbd_conf *mdev)
1348{
1349 struct drbd_conf *odev = mdev;
95f8efd0 1350 int resync_after;
b411b363
PR
1351
1352 while (1) {
438c8374
PR
1353 if (!odev->ldev)
1354 return 1;
daeda1cc 1355 rcu_read_lock();
95f8efd0 1356 resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after;
daeda1cc 1357 rcu_read_unlock();
95f8efd0 1358 if (resync_after == -1)
b411b363 1359 return 1;
95f8efd0 1360 odev = minor_to_mdev(resync_after);
841ce241
AG
1361 if (!expect(odev))
1362 return 1;
b411b363
PR
1363 if ((odev->state.conn >= C_SYNC_SOURCE &&
1364 odev->state.conn <= C_PAUSED_SYNC_T) ||
1365 odev->state.aftr_isp || odev->state.peer_isp ||
1366 odev->state.user_isp)
1367 return 0;
1368 }
1369}
1370
1371/**
1372 * _drbd_pause_after() - Pause resync on all devices that may not resync now
1373 * @mdev: DRBD device.
1374 *
1375 * Called from process context only (admin command and after_state_ch).
1376 */
1377static int _drbd_pause_after(struct drbd_conf *mdev)
1378{
1379 struct drbd_conf *odev;
1380 int i, rv = 0;
1381
695d08fa 1382 rcu_read_lock();
81a5d60e 1383 idr_for_each_entry(&minors, odev, i) {
b411b363
PR
1384 if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS)
1385 continue;
1386 if (!_drbd_may_sync_now(odev))
1387 rv |= (__drbd_set_state(_NS(odev, aftr_isp, 1), CS_HARD, NULL)
1388 != SS_NOTHING_TO_DO);
1389 }
695d08fa 1390 rcu_read_unlock();
b411b363
PR
1391
1392 return rv;
1393}
1394
1395/**
1396 * _drbd_resume_next() - Resume resync on all devices that may resync now
1397 * @mdev: DRBD device.
1398 *
1399 * Called from process context only (admin command and worker).
1400 */
1401static int _drbd_resume_next(struct drbd_conf *mdev)
1402{
1403 struct drbd_conf *odev;
1404 int i, rv = 0;
1405
695d08fa 1406 rcu_read_lock();
81a5d60e 1407 idr_for_each_entry(&minors, odev, i) {
b411b363
PR
1408 if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS)
1409 continue;
1410 if (odev->state.aftr_isp) {
1411 if (_drbd_may_sync_now(odev))
1412 rv |= (__drbd_set_state(_NS(odev, aftr_isp, 0),
1413 CS_HARD, NULL)
1414 != SS_NOTHING_TO_DO) ;
1415 }
1416 }
695d08fa 1417 rcu_read_unlock();
b411b363
PR
1418 return rv;
1419}
1420
1421void resume_next_sg(struct drbd_conf *mdev)
1422{
1423 write_lock_irq(&global_state_lock);
1424 _drbd_resume_next(mdev);
1425 write_unlock_irq(&global_state_lock);
1426}
1427
1428void suspend_other_sg(struct drbd_conf *mdev)
1429{
1430 write_lock_irq(&global_state_lock);
1431 _drbd_pause_after(mdev);
1432 write_unlock_irq(&global_state_lock);
1433}
1434
dc97b708 1435/* caller must hold global_state_lock */
95f8efd0 1436enum drbd_ret_code drbd_resync_after_valid(struct drbd_conf *mdev, int o_minor)
b411b363
PR
1437{
1438 struct drbd_conf *odev;
95f8efd0 1439 int resync_after;
b411b363
PR
1440
1441 if (o_minor == -1)
1442 return NO_ERROR;
1443 if (o_minor < -1 || minor_to_mdev(o_minor) == NULL)
95f8efd0 1444 return ERR_RESYNC_AFTER;
b411b363
PR
1445
1446 /* check for loops */
1447 odev = minor_to_mdev(o_minor);
1448 while (1) {
1449 if (odev == mdev)
95f8efd0 1450 return ERR_RESYNC_AFTER_CYCLE;
b411b363 1451
daeda1cc 1452 rcu_read_lock();
95f8efd0 1453 resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after;
daeda1cc 1454 rcu_read_unlock();
b411b363 1455 /* dependency chain ends here, no cycles. */
95f8efd0 1456 if (resync_after == -1)
b411b363
PR
1457 return NO_ERROR;
1458
1459 /* follow the dependency chain */
95f8efd0 1460 odev = minor_to_mdev(resync_after);
b411b363
PR
1461 }
1462}
1463
dc97b708 1464/* caller must hold global_state_lock */
95f8efd0 1465void drbd_resync_after_changed(struct drbd_conf *mdev)
b411b363
PR
1466{
1467 int changes;
b411b363 1468
dc97b708
PR
1469 do {
1470 changes = _drbd_pause_after(mdev);
1471 changes |= _drbd_resume_next(mdev);
1472 } while (changes);
b411b363
PR
1473}
1474
9bd28d3c
LE
1475void drbd_rs_controller_reset(struct drbd_conf *mdev)
1476{
813472ce
PR
1477 struct fifo_buffer *plan;
1478
9bd28d3c
LE
1479 atomic_set(&mdev->rs_sect_in, 0);
1480 atomic_set(&mdev->rs_sect_ev, 0);
1481 mdev->rs_in_flight = 0;
813472ce
PR
1482
1483 /* Updating the RCU protected object in place is necessary since
1484 this function gets called from atomic context.
1485 It is valid since all other updates also lead to an completely
1486 empty fifo */
1487 rcu_read_lock();
1488 plan = rcu_dereference(mdev->rs_plan_s);
1489 plan->total = 0;
1490 fifo_set(plan, 0);
1491 rcu_read_unlock();
9bd28d3c
LE
1492}
1493
1f04af33
PR
1494void start_resync_timer_fn(unsigned long data)
1495{
1496 struct drbd_conf *mdev = (struct drbd_conf *) data;
1497
1498 drbd_queue_work(&mdev->tconn->data.work, &mdev->start_resync_work);
1499}
1500
99920dc5 1501int w_start_resync(struct drbd_work *w, int cancel)
1f04af33 1502{
00d56944
PR
1503 struct drbd_conf *mdev = w->mdev;
1504
1f04af33
PR
1505 if (atomic_read(&mdev->unacked_cnt) || atomic_read(&mdev->rs_pending_cnt)) {
1506 dev_warn(DEV, "w_start_resync later...\n");
1507 mdev->start_resync_timer.expires = jiffies + HZ/10;
1508 add_timer(&mdev->start_resync_timer);
99920dc5 1509 return 0;
1f04af33
PR
1510 }
1511
1512 drbd_start_resync(mdev, C_SYNC_SOURCE);
1513 clear_bit(AHEAD_TO_SYNC_SOURCE, &mdev->current_epoch->flags);
99920dc5 1514 return 0;
1f04af33
PR
1515}
1516
b411b363
PR
1517/**
1518 * drbd_start_resync() - Start the resync process
1519 * @mdev: DRBD device.
1520 * @side: Either C_SYNC_SOURCE or C_SYNC_TARGET
1521 *
1522 * This function might bring you directly into one of the
1523 * C_PAUSED_SYNC_* states.
1524 */
1525void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side)
1526{
1527 union drbd_state ns;
1528 int r;
1529
c4752ef1 1530 if (mdev->state.conn >= C_SYNC_SOURCE && mdev->state.conn < C_AHEAD) {
b411b363
PR
1531 dev_err(DEV, "Resync already running!\n");
1532 return;
1533 }
1534
59817f4f
PR
1535 if (mdev->state.conn < C_AHEAD) {
1536 /* In case a previous resync run was aborted by an IO error/detach on the peer. */
1537 drbd_rs_cancel_all(mdev);
1538 /* This should be done when we abort the resync. We definitely do not
1539 want to have this for connections going back and forth between
1540 Ahead/Behind and SyncSource/SyncTarget */
1541 }
b411b363 1542
e64a3294
PR
1543 if (!test_bit(B_RS_H_DONE, &mdev->flags)) {
1544 if (side == C_SYNC_TARGET) {
1545 /* Since application IO was locked out during C_WF_BITMAP_T and
1546 C_WF_SYNC_UUID we are still unmodified. Before going to C_SYNC_TARGET
1547 we check that we might make the data inconsistent. */
1548 r = drbd_khelper(mdev, "before-resync-target");
1549 r = (r >> 8) & 0xff;
1550 if (r > 0) {
1551 dev_info(DEV, "before-resync-target handler returned %d, "
09b9e797 1552 "dropping connection.\n", r);
38fa9988 1553 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
09b9e797
PR
1554 return;
1555 }
e64a3294
PR
1556 } else /* C_SYNC_SOURCE */ {
1557 r = drbd_khelper(mdev, "before-resync-source");
1558 r = (r >> 8) & 0xff;
1559 if (r > 0) {
1560 if (r == 3) {
1561 dev_info(DEV, "before-resync-source handler returned %d, "
1562 "ignoring. Old userland tools?", r);
1563 } else {
1564 dev_info(DEV, "before-resync-source handler returned %d, "
1565 "dropping connection.\n", r);
38fa9988 1566 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
e64a3294
PR
1567 return;
1568 }
1569 }
09b9e797 1570 }
b411b363
PR
1571 }
1572
e64a3294 1573 if (current == mdev->tconn->worker.task) {
dad20554 1574 /* The worker should not sleep waiting for state_mutex,
e64a3294 1575 that can take long */
8410da8f 1576 if (!mutex_trylock(mdev->state_mutex)) {
e64a3294
PR
1577 set_bit(B_RS_H_DONE, &mdev->flags);
1578 mdev->start_resync_timer.expires = jiffies + HZ/5;
1579 add_timer(&mdev->start_resync_timer);
1580 return;
1581 }
1582 } else {
8410da8f 1583 mutex_lock(mdev->state_mutex);
e64a3294
PR
1584 }
1585 clear_bit(B_RS_H_DONE, &mdev->flags);
b411b363
PR
1586
1587 if (!get_ldev_if_state(mdev, D_NEGOTIATING)) {
8410da8f 1588 mutex_unlock(mdev->state_mutex);
b411b363
PR
1589 return;
1590 }
1591
b411b363 1592 write_lock_irq(&global_state_lock);
78bae59b 1593 ns = drbd_read_state(mdev);
b411b363
PR
1594
1595 ns.aftr_isp = !_drbd_may_sync_now(mdev);
1596
1597 ns.conn = side;
1598
1599 if (side == C_SYNC_TARGET)
1600 ns.disk = D_INCONSISTENT;
1601 else /* side == C_SYNC_SOURCE */
1602 ns.pdsk = D_INCONSISTENT;
1603
1604 r = __drbd_set_state(mdev, ns, CS_VERBOSE, NULL);
78bae59b 1605 ns = drbd_read_state(mdev);
b411b363
PR
1606
1607 if (ns.conn < C_CONNECTED)
1608 r = SS_UNKNOWN_ERROR;
1609
1610 if (r == SS_SUCCESS) {
1d7734a0
LE
1611 unsigned long tw = drbd_bm_total_weight(mdev);
1612 unsigned long now = jiffies;
1613 int i;
1614
b411b363
PR
1615 mdev->rs_failed = 0;
1616 mdev->rs_paused = 0;
b411b363 1617 mdev->rs_same_csum = 0;
0f0601f4
LE
1618 mdev->rs_last_events = 0;
1619 mdev->rs_last_sect_ev = 0;
1d7734a0
LE
1620 mdev->rs_total = tw;
1621 mdev->rs_start = now;
1622 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
1623 mdev->rs_mark_left[i] = tw;
1624 mdev->rs_mark_time[i] = now;
1625 }
b411b363
PR
1626 _drbd_pause_after(mdev);
1627 }
1628 write_unlock_irq(&global_state_lock);
5a22db89 1629
b411b363
PR
1630 if (r == SS_SUCCESS) {
1631 dev_info(DEV, "Began resync as %s (will sync %lu KB [%lu bits set]).\n",
1632 drbd_conn_str(ns.conn),
1633 (unsigned long) mdev->rs_total << (BM_BLOCK_SHIFT-10),
1634 (unsigned long) mdev->rs_total);
6c922ed5
LE
1635 if (side == C_SYNC_TARGET)
1636 mdev->bm_resync_fo = 0;
1637
1638 /* Since protocol 96, we must serialize drbd_gen_and_send_sync_uuid
1639 * with w_send_oos, or the sync target will get confused as to
1640 * how much bits to resync. We cannot do that always, because for an
1641 * empty resync and protocol < 95, we need to do it here, as we call
1642 * drbd_resync_finished from here in that case.
1643 * We drbd_gen_and_send_sync_uuid here for protocol < 96,
1644 * and from after_state_ch otherwise. */
31890f4a 1645 if (side == C_SYNC_SOURCE && mdev->tconn->agreed_pro_version < 96)
6c922ed5 1646 drbd_gen_and_send_sync_uuid(mdev);
b411b363 1647
31890f4a 1648 if (mdev->tconn->agreed_pro_version < 95 && mdev->rs_total == 0) {
af85e8e8
LE
1649 /* This still has a race (about when exactly the peers
1650 * detect connection loss) that can lead to a full sync
1651 * on next handshake. In 8.3.9 we fixed this with explicit
1652 * resync-finished notifications, but the fix
1653 * introduces a protocol change. Sleeping for some
1654 * time longer than the ping interval + timeout on the
1655 * SyncSource, to give the SyncTarget the chance to
1656 * detect connection loss, then waiting for a ping
1657 * response (implicit in drbd_resync_finished) reduces
1658 * the race considerably, but does not solve it. */
44ed167d
PR
1659 if (side == C_SYNC_SOURCE) {
1660 struct net_conf *nc;
1661 int timeo;
1662
1663 rcu_read_lock();
1664 nc = rcu_dereference(mdev->tconn->net_conf);
1665 timeo = nc->ping_int * HZ + nc->ping_timeo * HZ / 9;
1666 rcu_read_unlock();
1667 schedule_timeout_interruptible(timeo);
1668 }
b411b363 1669 drbd_resync_finished(mdev);
b411b363
PR
1670 }
1671
9bd28d3c 1672 drbd_rs_controller_reset(mdev);
b411b363
PR
1673 /* ns.conn may already be != mdev->state.conn,
1674 * we may have been paused in between, or become paused until
1675 * the timer triggers.
1676 * No matter, that is handled in resync_timer_fn() */
1677 if (ns.conn == C_SYNC_TARGET)
1678 mod_timer(&mdev->resync_timer, jiffies);
1679
1680 drbd_md_sync(mdev);
1681 }
5a22db89 1682 put_ldev(mdev);
8410da8f 1683 mutex_unlock(mdev->state_mutex);
b411b363
PR
1684}
1685
1686int drbd_worker(struct drbd_thread *thi)
1687{
392c8801 1688 struct drbd_tconn *tconn = thi->tconn;
b411b363 1689 struct drbd_work *w = NULL;
0e29d163 1690 struct drbd_conf *mdev;
44ed167d 1691 struct net_conf *nc;
b411b363 1692 LIST_HEAD(work_list);
f399002e 1693 int vnr, intr = 0;
44ed167d 1694 int cork;
b411b363 1695
e77a0a5c 1696 while (get_t_state(thi) == RUNNING) {
80822284 1697 drbd_thread_current_set_cpu(thi);
b411b363 1698
19393e10
PR
1699 if (down_trylock(&tconn->data.work.s)) {
1700 mutex_lock(&tconn->data.mutex);
44ed167d
PR
1701
1702 rcu_read_lock();
1703 nc = rcu_dereference(tconn->net_conf);
bb77d34e 1704 cork = nc ? nc->tcp_cork : 0;
44ed167d
PR
1705 rcu_read_unlock();
1706
1707 if (tconn->data.socket && cork)
19393e10
PR
1708 drbd_tcp_uncork(tconn->data.socket);
1709 mutex_unlock(&tconn->data.mutex);
b411b363 1710
19393e10 1711 intr = down_interruptible(&tconn->data.work.s);
b411b363 1712
19393e10 1713 mutex_lock(&tconn->data.mutex);
44ed167d 1714 if (tconn->data.socket && cork)
19393e10
PR
1715 drbd_tcp_cork(tconn->data.socket);
1716 mutex_unlock(&tconn->data.mutex);
b411b363
PR
1717 }
1718
1719 if (intr) {
b411b363 1720 flush_signals(current);
19393e10
PR
1721 if (get_t_state(thi) == RUNNING) {
1722 conn_warn(tconn, "Worker got an unexpected signal\n");
b411b363 1723 continue;
19393e10 1724 }
b411b363
PR
1725 break;
1726 }
1727
e77a0a5c 1728 if (get_t_state(thi) != RUNNING)
b411b363
PR
1729 break;
1730 /* With this break, we have done a down() but not consumed
1731 the entry from the list. The cleanup code takes care of
1732 this... */
1733
1734 w = NULL;
19393e10
PR
1735 spin_lock_irq(&tconn->data.work.q_lock);
1736 if (list_empty(&tconn->data.work.q)) {
b411b363
PR
1737 /* something terribly wrong in our logic.
1738 * we were able to down() the semaphore,
1739 * but the list is empty... doh.
1740 *
1741 * what is the best thing to do now?
1742 * try again from scratch, restarting the receiver,
1743 * asender, whatnot? could break even more ugly,
1744 * e.g. when we are primary, but no good local data.
1745 *
1746 * I'll try to get away just starting over this loop.
1747 */
19393e10
PR
1748 conn_warn(tconn, "Work list unexpectedly empty\n");
1749 spin_unlock_irq(&tconn->data.work.q_lock);
b411b363
PR
1750 continue;
1751 }
19393e10 1752 w = list_entry(tconn->data.work.q.next, struct drbd_work, list);
b411b363 1753 list_del_init(&w->list);
19393e10 1754 spin_unlock_irq(&tconn->data.work.q_lock);
b411b363 1755
99920dc5 1756 if (w->cb(w, tconn->cstate < C_WF_REPORT_PARAMS)) {
b411b363 1757 /* dev_warn(DEV, "worker: a callback failed! \n"); */
bbeb641c
PR
1758 if (tconn->cstate >= C_WF_REPORT_PARAMS)
1759 conn_request_state(tconn, NS(conn, C_NETWORK_FAILURE), CS_HARD);
b411b363
PR
1760 }
1761 }
b411b363 1762
19393e10
PR
1763 spin_lock_irq(&tconn->data.work.q_lock);
1764 while (!list_empty(&tconn->data.work.q)) {
1765 list_splice_init(&tconn->data.work.q, &work_list);
1766 spin_unlock_irq(&tconn->data.work.q_lock);
b411b363
PR
1767
1768 while (!list_empty(&work_list)) {
1769 w = list_entry(work_list.next, struct drbd_work, list);
1770 list_del_init(&w->list);
00d56944 1771 w->cb(w, 1);
b411b363
PR
1772 }
1773
19393e10 1774 spin_lock_irq(&tconn->data.work.q_lock);
b411b363 1775 }
19393e10 1776 sema_init(&tconn->data.work.s, 0);
b411b363
PR
1777 /* DANGEROUS race: if someone did queue his work within the spinlock,
1778 * but up() ed outside the spinlock, we could get an up() on the
1779 * semaphore without corresponding list entry.
1780 * So don't do that.
1781 */
19393e10 1782 spin_unlock_irq(&tconn->data.work.q_lock);
b411b363 1783
c141ebda 1784 rcu_read_lock();
f399002e 1785 idr_for_each_entry(&tconn->volumes, mdev, vnr) {
0e29d163 1786 D_ASSERT(mdev->state.disk == D_DISKLESS && mdev->state.conn == C_STANDALONE);
c141ebda
PR
1787 kref_get(&mdev->kref);
1788 rcu_read_unlock();
0e29d163 1789 drbd_mdev_cleanup(mdev);
c141ebda
PR
1790 kref_put(&mdev->kref, &drbd_minor_destroy);
1791 rcu_read_lock();
0e29d163 1792 }
c141ebda 1793 rcu_read_unlock();
b411b363
PR
1794
1795 return 0;
1796}