]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - drivers/block/drbd/drbd_worker.c
drbd: Rename struct drbd_conf -> struct drbd_device
[mirror_ubuntu-artful-kernel.git] / drivers / block / drbd / drbd_worker.c
CommitLineData
b411b363
PR
1/*
2 drbd_worker.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23
24 */
25
b411b363 26#include <linux/module.h>
b411b363
PR
27#include <linux/drbd.h>
28#include <linux/sched.h>
b411b363
PR
29#include <linux/wait.h>
30#include <linux/mm.h>
31#include <linux/memcontrol.h>
32#include <linux/mm_inline.h>
33#include <linux/slab.h>
34#include <linux/random.h>
b411b363
PR
35#include <linux/string.h>
36#include <linux/scatterlist.h>
37
38#include "drbd_int.h"
a3603a6e 39#include "drbd_protocol.h"
b411b363 40#include "drbd_req.h"
b411b363 41
00d56944 42static int w_make_ov_request(struct drbd_work *w, int cancel);
b411b363
PR
43
44
c5a91619
AG
45/* endio handlers:
46 * drbd_md_io_complete (defined here)
fcefa62e
AG
47 * drbd_request_endio (defined here)
48 * drbd_peer_request_endio (defined here)
c5a91619
AG
49 * bm_async_io_complete (defined in drbd_bitmap.c)
50 *
b411b363
PR
51 * For all these callbacks, note the following:
52 * The callbacks will be called in irq context by the IDE drivers,
53 * and in Softirqs/Tasklets/BH context by the SCSI drivers.
54 * Try to get the locking right :)
55 *
56 */
57
58
59/* About the global_state_lock
60 Each state transition on an device holds a read lock. In case we have
95f8efd0 61 to evaluate the resync after dependencies, we grab a write lock, because
b411b363
PR
62 we need stable states on all devices for that. */
63rwlock_t global_state_lock;
64
65/* used for synchronous meta data and bitmap IO
66 * submitted by drbd_md_sync_page_io()
67 */
68void drbd_md_io_complete(struct bio *bio, int error)
69{
70 struct drbd_md_io *md_io;
54761697 71 struct drbd_device *mdev;
b411b363
PR
72
73 md_io = (struct drbd_md_io *)bio->bi_private;
54761697 74 mdev = container_of(md_io, struct drbd_device, md_io);
cdfda633 75
b411b363
PR
76 md_io->error = error;
77
0cfac5dd
PR
78 /* We grabbed an extra reference in _drbd_md_sync_page_io() to be able
79 * to timeout on the lower level device, and eventually detach from it.
80 * If this io completion runs after that timeout expired, this
81 * drbd_md_put_buffer() may allow us to finally try and re-attach.
82 * During normal operation, this only puts that extra reference
83 * down to 1 again.
84 * Make sure we first drop the reference, and only then signal
85 * completion, or we may (in drbd_al_read_log()) cycle so fast into the
86 * next drbd_md_sync_page_io(), that we trigger the
87 * ASSERT(atomic_read(&mdev->md_io_in_use) == 1) there.
88 */
89 drbd_md_put_buffer(mdev);
cdfda633
PR
90 md_io->done = 1;
91 wake_up(&mdev->misc_wait);
92 bio_put(bio);
c04ccaa6
LE
93 if (mdev->ldev) /* special case: drbd_md_read() during drbd_adm_attach() */
94 put_ldev(mdev);
b411b363
PR
95}
96
97/* reads on behalf of the partner,
98 * "submitted" by the receiver
99 */
a186e478 100static void drbd_endio_read_sec_final(struct drbd_peer_request *peer_req) __releases(local)
b411b363
PR
101{
102 unsigned long flags = 0;
54761697 103 struct drbd_device *mdev = peer_req->w.mdev;
b411b363 104
87eeee41 105 spin_lock_irqsave(&mdev->tconn->req_lock, flags);
db830c46
AG
106 mdev->read_cnt += peer_req->i.size >> 9;
107 list_del(&peer_req->w.list);
b411b363
PR
108 if (list_empty(&mdev->read_ee))
109 wake_up(&mdev->ee_wait);
db830c46 110 if (test_bit(__EE_WAS_ERROR, &peer_req->flags))
edc9f5eb 111 __drbd_chk_io_error(mdev, DRBD_READ_ERROR);
87eeee41 112 spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
b411b363 113
d5b27b01 114 drbd_queue_work(&mdev->tconn->sender_work, &peer_req->w);
b411b363 115 put_ldev(mdev);
b411b363
PR
116}
117
118/* writes on behalf of the partner, or resync writes,
45bb912b 119 * "submitted" by the receiver, final stage. */
db830c46 120static void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(local)
b411b363
PR
121{
122 unsigned long flags = 0;
54761697 123 struct drbd_device *mdev = peer_req->w.mdev;
181286ad 124 struct drbd_interval i;
b411b363 125 int do_wake;
579b57ed 126 u64 block_id;
b411b363 127 int do_al_complete_io;
b411b363 128
db830c46 129 /* after we moved peer_req to done_ee,
b411b363
PR
130 * we may no longer access it,
131 * it may be freed/reused already!
132 * (as soon as we release the req_lock) */
181286ad 133 i = peer_req->i;
db830c46
AG
134 do_al_complete_io = peer_req->flags & EE_CALL_AL_COMPLETE_IO;
135 block_id = peer_req->block_id;
b411b363 136
87eeee41 137 spin_lock_irqsave(&mdev->tconn->req_lock, flags);
db830c46 138 mdev->writ_cnt += peer_req->i.size >> 9;
a506c13a 139 list_move_tail(&peer_req->w.list, &mdev->done_ee);
b411b363 140
bb3bfe96 141 /*
5e472264 142 * Do not remove from the write_requests tree here: we did not send the
bb3bfe96
AG
143 * Ack yet and did not wake possibly waiting conflicting requests.
144 * Removed from the tree from "drbd_process_done_ee" within the
145 * appropriate w.cb (e_end_block/e_end_resync_block) or from
146 * _drbd_clear_done_ee.
147 */
b411b363 148
579b57ed 149 do_wake = list_empty(block_id == ID_SYNCER ? &mdev->sync_ee : &mdev->active_ee);
b411b363 150
db830c46 151 if (test_bit(__EE_WAS_ERROR, &peer_req->flags))
edc9f5eb 152 __drbd_chk_io_error(mdev, DRBD_WRITE_ERROR);
87eeee41 153 spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
b411b363 154
579b57ed 155 if (block_id == ID_SYNCER)
181286ad 156 drbd_rs_complete_io(mdev, i.sector);
b411b363
PR
157
158 if (do_wake)
159 wake_up(&mdev->ee_wait);
160
161 if (do_al_complete_io)
181286ad 162 drbd_al_complete_io(mdev, &i);
b411b363 163
0625ac19 164 wake_asender(mdev->tconn);
b411b363 165 put_ldev(mdev);
45bb912b 166}
b411b363 167
45bb912b
LE
168/* writes on behalf of the partner, or resync writes,
169 * "submitted" by the receiver.
170 */
fcefa62e 171void drbd_peer_request_endio(struct bio *bio, int error)
45bb912b 172{
db830c46 173 struct drbd_peer_request *peer_req = bio->bi_private;
54761697 174 struct drbd_device *mdev = peer_req->w.mdev;
45bb912b
LE
175 int uptodate = bio_flagged(bio, BIO_UPTODATE);
176 int is_write = bio_data_dir(bio) == WRITE;
177
07194272 178 if (error && __ratelimit(&drbd_ratelimit_state))
45bb912b
LE
179 dev_warn(DEV, "%s: error=%d s=%llus\n",
180 is_write ? "write" : "read", error,
db830c46 181 (unsigned long long)peer_req->i.sector);
45bb912b 182 if (!error && !uptodate) {
07194272
LE
183 if (__ratelimit(&drbd_ratelimit_state))
184 dev_warn(DEV, "%s: setting error to -EIO s=%llus\n",
185 is_write ? "write" : "read",
db830c46 186 (unsigned long long)peer_req->i.sector);
45bb912b
LE
187 /* strange behavior of some lower level drivers...
188 * fail the request by clearing the uptodate flag,
189 * but do not return any error?! */
190 error = -EIO;
191 }
192
193 if (error)
db830c46 194 set_bit(__EE_WAS_ERROR, &peer_req->flags);
45bb912b
LE
195
196 bio_put(bio); /* no need for the bio anymore */
db830c46 197 if (atomic_dec_and_test(&peer_req->pending_bios)) {
45bb912b 198 if (is_write)
db830c46 199 drbd_endio_write_sec_final(peer_req);
45bb912b 200 else
db830c46 201 drbd_endio_read_sec_final(peer_req);
45bb912b 202 }
b411b363
PR
203}
204
205/* read, readA or write requests on R_PRIMARY coming from drbd_make_request
206 */
fcefa62e 207void drbd_request_endio(struct bio *bio, int error)
b411b363 208{
a115413d 209 unsigned long flags;
b411b363 210 struct drbd_request *req = bio->bi_private;
54761697 211 struct drbd_device *mdev = req->w.mdev;
a115413d 212 struct bio_and_error m;
b411b363
PR
213 enum drbd_req_event what;
214 int uptodate = bio_flagged(bio, BIO_UPTODATE);
215
b411b363
PR
216 if (!error && !uptodate) {
217 dev_warn(DEV, "p %s: setting error to -EIO\n",
218 bio_data_dir(bio) == WRITE ? "write" : "read");
219 /* strange behavior of some lower level drivers...
220 * fail the request by clearing the uptodate flag,
221 * but do not return any error?! */
222 error = -EIO;
223 }
224
1b6dd252
PR
225
226 /* If this request was aborted locally before,
227 * but now was completed "successfully",
228 * chances are that this caused arbitrary data corruption.
229 *
230 * "aborting" requests, or force-detaching the disk, is intended for
231 * completely blocked/hung local backing devices which do no longer
232 * complete requests at all, not even do error completions. In this
233 * situation, usually a hard-reset and failover is the only way out.
234 *
235 * By "aborting", basically faking a local error-completion,
236 * we allow for a more graceful swichover by cleanly migrating services.
237 * Still the affected node has to be rebooted "soon".
238 *
239 * By completing these requests, we allow the upper layers to re-use
240 * the associated data pages.
241 *
242 * If later the local backing device "recovers", and now DMAs some data
243 * from disk into the original request pages, in the best case it will
244 * just put random data into unused pages; but typically it will corrupt
245 * meanwhile completely unrelated data, causing all sorts of damage.
246 *
247 * Which means delayed successful completion,
248 * especially for READ requests,
249 * is a reason to panic().
250 *
251 * We assume that a delayed *error* completion is OK,
252 * though we still will complain noisily about it.
253 */
254 if (unlikely(req->rq_state & RQ_LOCAL_ABORTED)) {
255 if (__ratelimit(&drbd_ratelimit_state))
256 dev_emerg(DEV, "delayed completion of aborted local request; disk-timeout may be too aggressive\n");
257
258 if (!error)
259 panic("possible random memory corruption caused by delayed completion of aborted local request\n");
260 }
261
b411b363
PR
262 /* to avoid recursion in __req_mod */
263 if (unlikely(error)) {
264 what = (bio_data_dir(bio) == WRITE)
8554df1c 265 ? WRITE_COMPLETED_WITH_ERROR
5c3c7e64 266 : (bio_rw(bio) == READ)
8554df1c
AG
267 ? READ_COMPLETED_WITH_ERROR
268 : READ_AHEAD_COMPLETED_WITH_ERROR;
b411b363 269 } else
8554df1c 270 what = COMPLETED_OK;
b411b363
PR
271
272 bio_put(req->private_bio);
273 req->private_bio = ERR_PTR(error);
274
a115413d 275 /* not req_mod(), we need irqsave here! */
87eeee41 276 spin_lock_irqsave(&mdev->tconn->req_lock, flags);
a115413d 277 __req_mod(req, what, &m);
87eeee41 278 spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
2415308e 279 put_ldev(mdev);
a115413d
LE
280
281 if (m.bio)
282 complete_master_bio(mdev, &m);
b411b363
PR
283}
284
54761697 285void drbd_csum_ee(struct drbd_device *mdev, struct crypto_hash *tfm,
db830c46 286 struct drbd_peer_request *peer_req, void *digest)
45bb912b
LE
287{
288 struct hash_desc desc;
289 struct scatterlist sg;
db830c46 290 struct page *page = peer_req->pages;
45bb912b
LE
291 struct page *tmp;
292 unsigned len;
293
294 desc.tfm = tfm;
295 desc.flags = 0;
296
297 sg_init_table(&sg, 1);
298 crypto_hash_init(&desc);
299
300 while ((tmp = page_chain_next(page))) {
301 /* all but the last page will be fully used */
302 sg_set_page(&sg, page, PAGE_SIZE, 0);
303 crypto_hash_update(&desc, &sg, sg.length);
304 page = tmp;
305 }
306 /* and now the last, possibly only partially used page */
db830c46 307 len = peer_req->i.size & (PAGE_SIZE - 1);
45bb912b
LE
308 sg_set_page(&sg, page, len ?: PAGE_SIZE, 0);
309 crypto_hash_update(&desc, &sg, sg.length);
310 crypto_hash_final(&desc, digest);
311}
312
54761697 313void drbd_csum_bio(struct drbd_device *mdev, struct crypto_hash *tfm, struct bio *bio, void *digest)
b411b363
PR
314{
315 struct hash_desc desc;
316 struct scatterlist sg;
7988613b
KO
317 struct bio_vec bvec;
318 struct bvec_iter iter;
b411b363
PR
319
320 desc.tfm = tfm;
321 desc.flags = 0;
322
323 sg_init_table(&sg, 1);
324 crypto_hash_init(&desc);
325
7988613b
KO
326 bio_for_each_segment(bvec, bio, iter) {
327 sg_set_page(&sg, bvec.bv_page, bvec.bv_len, bvec.bv_offset);
b411b363
PR
328 crypto_hash_update(&desc, &sg, sg.length);
329 }
330 crypto_hash_final(&desc, digest);
331}
332
9676c760 333/* MAYBE merge common code with w_e_end_ov_req */
99920dc5 334static int w_e_send_csum(struct drbd_work *w, int cancel)
b411b363 335{
00d56944 336 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
54761697 337 struct drbd_device *mdev = w->mdev;
b411b363
PR
338 int digest_size;
339 void *digest;
99920dc5 340 int err = 0;
b411b363 341
53ea4331
LE
342 if (unlikely(cancel))
343 goto out;
b411b363 344
9676c760 345 if (unlikely((peer_req->flags & EE_WAS_ERROR) != 0))
53ea4331 346 goto out;
b411b363 347
f399002e 348 digest_size = crypto_hash_digestsize(mdev->tconn->csums_tfm);
53ea4331
LE
349 digest = kmalloc(digest_size, GFP_NOIO);
350 if (digest) {
db830c46
AG
351 sector_t sector = peer_req->i.sector;
352 unsigned int size = peer_req->i.size;
f399002e 353 drbd_csum_ee(mdev, mdev->tconn->csums_tfm, peer_req, digest);
9676c760 354 /* Free peer_req and pages before send.
53ea4331
LE
355 * In case we block on congestion, we could otherwise run into
356 * some distributed deadlock, if the other side blocks on
357 * congestion as well, because our receiver blocks in
c37c8ecf 358 * drbd_alloc_pages due to pp_in_use > max_buffers. */
3967deb1 359 drbd_free_peer_req(mdev, peer_req);
db830c46 360 peer_req = NULL;
53ea4331 361 inc_rs_pending(mdev);
99920dc5 362 err = drbd_send_drequest_csum(mdev, sector, size,
db1b0b72
AG
363 digest, digest_size,
364 P_CSUM_RS_REQUEST);
53ea4331
LE
365 kfree(digest);
366 } else {
367 dev_err(DEV, "kmalloc() of digest failed.\n");
99920dc5 368 err = -ENOMEM;
53ea4331 369 }
b411b363 370
53ea4331 371out:
db830c46 372 if (peer_req)
3967deb1 373 drbd_free_peer_req(mdev, peer_req);
b411b363 374
99920dc5 375 if (unlikely(err))
b411b363 376 dev_err(DEV, "drbd_send_drequest(..., csum) failed\n");
99920dc5 377 return err;
b411b363
PR
378}
379
380#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
381
54761697 382static int read_for_csum(struct drbd_device *mdev, sector_t sector, int size)
b411b363 383{
db830c46 384 struct drbd_peer_request *peer_req;
b411b363
PR
385
386 if (!get_ldev(mdev))
80a40e43 387 return -EIO;
b411b363 388
e3555d85 389 if (drbd_rs_should_slow_down(mdev, sector))
0f0601f4
LE
390 goto defer;
391
b411b363
PR
392 /* GFP_TRY, because if there is no memory available right now, this may
393 * be rescheduled for later. It is "only" background resync, after all. */
0db55363
AG
394 peer_req = drbd_alloc_peer_req(mdev, ID_SYNCER /* unused */, sector,
395 size, GFP_TRY);
db830c46 396 if (!peer_req)
80a40e43 397 goto defer;
b411b363 398
db830c46 399 peer_req->w.cb = w_e_send_csum;
87eeee41 400 spin_lock_irq(&mdev->tconn->req_lock);
db830c46 401 list_add(&peer_req->w.list, &mdev->read_ee);
87eeee41 402 spin_unlock_irq(&mdev->tconn->req_lock);
b411b363 403
0f0601f4 404 atomic_add(size >> 9, &mdev->rs_sect_ev);
fbe29dec 405 if (drbd_submit_peer_request(mdev, peer_req, READ, DRBD_FAULT_RS_RD) == 0)
80a40e43 406 return 0;
b411b363 407
10f6d992
LE
408 /* If it failed because of ENOMEM, retry should help. If it failed
409 * because bio_add_page failed (probably broken lower level driver),
410 * retry may or may not help.
411 * If it does not, you may need to force disconnect. */
87eeee41 412 spin_lock_irq(&mdev->tconn->req_lock);
db830c46 413 list_del(&peer_req->w.list);
87eeee41 414 spin_unlock_irq(&mdev->tconn->req_lock);
22cc37a9 415
3967deb1 416 drbd_free_peer_req(mdev, peer_req);
80a40e43 417defer:
45bb912b 418 put_ldev(mdev);
80a40e43 419 return -EAGAIN;
b411b363
PR
420}
421
99920dc5 422int w_resync_timer(struct drbd_work *w, int cancel)
b411b363 423{
54761697 424 struct drbd_device *mdev = w->mdev;
63106d3c
PR
425 switch (mdev->state.conn) {
426 case C_VERIFY_S:
00d56944 427 w_make_ov_request(w, cancel);
63106d3c
PR
428 break;
429 case C_SYNC_TARGET:
00d56944 430 w_make_resync_request(w, cancel);
63106d3c 431 break;
b411b363
PR
432 }
433
99920dc5 434 return 0;
794abb75
PR
435}
436
437void resync_timer_fn(unsigned long data)
438{
54761697 439 struct drbd_device *mdev = (struct drbd_device *) data;
794abb75
PR
440
441 if (list_empty(&mdev->resync_work.list))
d5b27b01 442 drbd_queue_work(&mdev->tconn->sender_work, &mdev->resync_work);
b411b363
PR
443}
444
778f271d
PR
445static void fifo_set(struct fifo_buffer *fb, int value)
446{
447 int i;
448
449 for (i = 0; i < fb->size; i++)
f10f2623 450 fb->values[i] = value;
778f271d
PR
451}
452
453static int fifo_push(struct fifo_buffer *fb, int value)
454{
455 int ov;
456
457 ov = fb->values[fb->head_index];
458 fb->values[fb->head_index++] = value;
459
460 if (fb->head_index >= fb->size)
461 fb->head_index = 0;
462
463 return ov;
464}
465
466static void fifo_add_val(struct fifo_buffer *fb, int value)
467{
468 int i;
469
470 for (i = 0; i < fb->size; i++)
471 fb->values[i] += value;
472}
473
9958c857
PR
474struct fifo_buffer *fifo_alloc(int fifo_size)
475{
476 struct fifo_buffer *fb;
477
8747d30a 478 fb = kzalloc(sizeof(struct fifo_buffer) + sizeof(int) * fifo_size, GFP_NOIO);
9958c857
PR
479 if (!fb)
480 return NULL;
481
482 fb->head_index = 0;
483 fb->size = fifo_size;
484 fb->total = 0;
485
486 return fb;
487}
488
54761697 489static int drbd_rs_controller(struct drbd_device *mdev)
778f271d 490{
daeda1cc 491 struct disk_conf *dc;
778f271d
PR
492 unsigned int sect_in; /* Number of sectors that came in since the last turn */
493 unsigned int want; /* The number of sectors we want in the proxy */
494 int req_sect; /* Number of sectors to request in this turn */
495 int correction; /* Number of sectors more we need in the proxy*/
496 int cps; /* correction per invocation of drbd_rs_controller() */
497 int steps; /* Number of time steps to plan ahead */
498 int curr_corr;
499 int max_sect;
813472ce 500 struct fifo_buffer *plan;
778f271d
PR
501
502 sect_in = atomic_xchg(&mdev->rs_sect_in, 0); /* Number of sectors that came in */
503 mdev->rs_in_flight -= sect_in;
504
daeda1cc 505 dc = rcu_dereference(mdev->ldev->disk_conf);
813472ce 506 plan = rcu_dereference(mdev->rs_plan_s);
778f271d 507
813472ce 508 steps = plan->size; /* (dc->c_plan_ahead * 10 * SLEEP_TIME) / HZ; */
778f271d
PR
509
510 if (mdev->rs_in_flight + sect_in == 0) { /* At start of resync */
daeda1cc 511 want = ((dc->resync_rate * 2 * SLEEP_TIME) / HZ) * steps;
778f271d 512 } else { /* normal path */
daeda1cc
PR
513 want = dc->c_fill_target ? dc->c_fill_target :
514 sect_in * dc->c_delay_target * HZ / (SLEEP_TIME * 10);
778f271d
PR
515 }
516
813472ce 517 correction = want - mdev->rs_in_flight - plan->total;
778f271d
PR
518
519 /* Plan ahead */
520 cps = correction / steps;
813472ce
PR
521 fifo_add_val(plan, cps);
522 plan->total += cps * steps;
778f271d
PR
523
524 /* What we do in this step */
813472ce
PR
525 curr_corr = fifo_push(plan, 0);
526 plan->total -= curr_corr;
778f271d
PR
527
528 req_sect = sect_in + curr_corr;
529 if (req_sect < 0)
530 req_sect = 0;
531
daeda1cc 532 max_sect = (dc->c_max_rate * 2 * SLEEP_TIME) / HZ;
778f271d
PR
533 if (req_sect > max_sect)
534 req_sect = max_sect;
535
536 /*
537 dev_warn(DEV, "si=%u if=%d wa=%u co=%d st=%d cps=%d pl=%d cc=%d rs=%d\n",
538 sect_in, mdev->rs_in_flight, want, correction,
539 steps, cps, mdev->rs_planed, curr_corr, req_sect);
540 */
541
542 return req_sect;
543}
544
54761697 545static int drbd_rs_number_requests(struct drbd_device *mdev)
e65f440d
LE
546{
547 int number;
813472ce
PR
548
549 rcu_read_lock();
550 if (rcu_dereference(mdev->rs_plan_s)->size) {
e65f440d
LE
551 number = drbd_rs_controller(mdev) >> (BM_BLOCK_SHIFT - 9);
552 mdev->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME;
553 } else {
daeda1cc 554 mdev->c_sync_rate = rcu_dereference(mdev->ldev->disk_conf)->resync_rate;
e65f440d
LE
555 number = SLEEP_TIME * mdev->c_sync_rate / ((BM_BLOCK_SIZE / 1024) * HZ);
556 }
813472ce 557 rcu_read_unlock();
e65f440d 558
e65f440d
LE
559 /* ignore the amount of pending requests, the resync controller should
560 * throttle down to incoming reply rate soon enough anyways. */
561 return number;
562}
563
99920dc5 564int w_make_resync_request(struct drbd_work *w, int cancel)
b411b363 565{
54761697 566 struct drbd_device *mdev = w->mdev;
b411b363
PR
567 unsigned long bit;
568 sector_t sector;
569 const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
1816a2b4 570 int max_bio_size;
e65f440d 571 int number, rollback_i, size;
b411b363 572 int align, queued, sndbuf;
0f0601f4 573 int i = 0;
b411b363
PR
574
575 if (unlikely(cancel))
99920dc5 576 return 0;
b411b363 577
af85e8e8
LE
578 if (mdev->rs_total == 0) {
579 /* empty resync? */
580 drbd_resync_finished(mdev);
99920dc5 581 return 0;
af85e8e8
LE
582 }
583
b411b363
PR
584 if (!get_ldev(mdev)) {
585 /* Since we only need to access mdev->rsync a
586 get_ldev_if_state(mdev,D_FAILED) would be sufficient, but
587 to continue resync with a broken disk makes no sense at
588 all */
589 dev_err(DEV, "Disk broke down during resync!\n");
99920dc5 590 return 0;
b411b363
PR
591 }
592
0cfdd247 593 max_bio_size = queue_max_hw_sectors(mdev->rq_queue) << 9;
e65f440d
LE
594 number = drbd_rs_number_requests(mdev);
595 if (number == 0)
0f0601f4 596 goto requeue;
b411b363 597
b411b363
PR
598 for (i = 0; i < number; i++) {
599 /* Stop generating RS requests, when half of the send buffer is filled */
e42325a5
PR
600 mutex_lock(&mdev->tconn->data.mutex);
601 if (mdev->tconn->data.socket) {
602 queued = mdev->tconn->data.socket->sk->sk_wmem_queued;
603 sndbuf = mdev->tconn->data.socket->sk->sk_sndbuf;
b411b363
PR
604 } else {
605 queued = 1;
606 sndbuf = 0;
607 }
e42325a5 608 mutex_unlock(&mdev->tconn->data.mutex);
b411b363
PR
609 if (queued > sndbuf / 2)
610 goto requeue;
611
612next_sector:
613 size = BM_BLOCK_SIZE;
614 bit = drbd_bm_find_next(mdev, mdev->bm_resync_fo);
615
4b0715f0 616 if (bit == DRBD_END_OF_BITMAP) {
b411b363 617 mdev->bm_resync_fo = drbd_bm_bits(mdev);
b411b363 618 put_ldev(mdev);
99920dc5 619 return 0;
b411b363
PR
620 }
621
622 sector = BM_BIT_TO_SECT(bit);
623
e3555d85
PR
624 if (drbd_rs_should_slow_down(mdev, sector) ||
625 drbd_try_rs_begin_io(mdev, sector)) {
b411b363
PR
626 mdev->bm_resync_fo = bit;
627 goto requeue;
628 }
629 mdev->bm_resync_fo = bit + 1;
630
631 if (unlikely(drbd_bm_test_bit(mdev, bit) == 0)) {
632 drbd_rs_complete_io(mdev, sector);
633 goto next_sector;
634 }
635
1816a2b4 636#if DRBD_MAX_BIO_SIZE > BM_BLOCK_SIZE
b411b363
PR
637 /* try to find some adjacent bits.
638 * we stop if we have already the maximum req size.
639 *
640 * Additionally always align bigger requests, in order to
641 * be prepared for all stripe sizes of software RAIDs.
b411b363
PR
642 */
643 align = 1;
d207450c 644 rollback_i = i;
b411b363 645 for (;;) {
1816a2b4 646 if (size + BM_BLOCK_SIZE > max_bio_size)
b411b363
PR
647 break;
648
649 /* Be always aligned */
650 if (sector & ((1<<(align+3))-1))
651 break;
652
653 /* do not cross extent boundaries */
654 if (((bit+1) & BM_BLOCKS_PER_BM_EXT_MASK) == 0)
655 break;
656 /* now, is it actually dirty, after all?
657 * caution, drbd_bm_test_bit is tri-state for some
658 * obscure reason; ( b == 0 ) would get the out-of-band
659 * only accidentally right because of the "oddly sized"
660 * adjustment below */
661 if (drbd_bm_test_bit(mdev, bit+1) != 1)
662 break;
663 bit++;
664 size += BM_BLOCK_SIZE;
665 if ((BM_BLOCK_SIZE << align) <= size)
666 align++;
667 i++;
668 }
669 /* if we merged some,
670 * reset the offset to start the next drbd_bm_find_next from */
671 if (size > BM_BLOCK_SIZE)
672 mdev->bm_resync_fo = bit + 1;
673#endif
674
675 /* adjust very last sectors, in case we are oddly sized */
676 if (sector + (size>>9) > capacity)
677 size = (capacity-sector)<<9;
f399002e 678 if (mdev->tconn->agreed_pro_version >= 89 && mdev->tconn->csums_tfm) {
b411b363 679 switch (read_for_csum(mdev, sector, size)) {
80a40e43 680 case -EIO: /* Disk failure */
b411b363 681 put_ldev(mdev);
99920dc5 682 return -EIO;
80a40e43 683 case -EAGAIN: /* allocation failed, or ldev busy */
b411b363
PR
684 drbd_rs_complete_io(mdev, sector);
685 mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
d207450c 686 i = rollback_i;
b411b363 687 goto requeue;
80a40e43
LE
688 case 0:
689 /* everything ok */
690 break;
691 default:
692 BUG();
b411b363
PR
693 }
694 } else {
99920dc5
AG
695 int err;
696
b411b363 697 inc_rs_pending(mdev);
99920dc5
AG
698 err = drbd_send_drequest(mdev, P_RS_DATA_REQUEST,
699 sector, size, ID_SYNCER);
700 if (err) {
b411b363
PR
701 dev_err(DEV, "drbd_send_drequest() failed, aborting...\n");
702 dec_rs_pending(mdev);
703 put_ldev(mdev);
99920dc5 704 return err;
b411b363
PR
705 }
706 }
707 }
708
709 if (mdev->bm_resync_fo >= drbd_bm_bits(mdev)) {
710 /* last syncer _request_ was sent,
711 * but the P_RS_DATA_REPLY not yet received. sync will end (and
712 * next sync group will resume), as soon as we receive the last
713 * resync data block, and the last bit is cleared.
714 * until then resync "work" is "inactive" ...
715 */
b411b363 716 put_ldev(mdev);
99920dc5 717 return 0;
b411b363
PR
718 }
719
720 requeue:
778f271d 721 mdev->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
b411b363
PR
722 mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME);
723 put_ldev(mdev);
99920dc5 724 return 0;
b411b363
PR
725}
726
00d56944 727static int w_make_ov_request(struct drbd_work *w, int cancel)
b411b363 728{
54761697 729 struct drbd_device *mdev = w->mdev;
b411b363
PR
730 int number, i, size;
731 sector_t sector;
732 const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
58ffa580 733 bool stop_sector_reached = false;
b411b363
PR
734
735 if (unlikely(cancel))
736 return 1;
737
2649f080 738 number = drbd_rs_number_requests(mdev);
b411b363
PR
739
740 sector = mdev->ov_position;
741 for (i = 0; i < number; i++) {
58ffa580 742 if (sector >= capacity)
b411b363 743 return 1;
58ffa580
LE
744
745 /* We check for "finished" only in the reply path:
746 * w_e_end_ov_reply().
747 * We need to send at least one request out. */
748 stop_sector_reached = i > 0
749 && verify_can_do_stop_sector(mdev)
750 && sector >= mdev->ov_stop_sector;
751 if (stop_sector_reached)
752 break;
b411b363
PR
753
754 size = BM_BLOCK_SIZE;
755
e3555d85
PR
756 if (drbd_rs_should_slow_down(mdev, sector) ||
757 drbd_try_rs_begin_io(mdev, sector)) {
b411b363
PR
758 mdev->ov_position = sector;
759 goto requeue;
760 }
761
762 if (sector + (size>>9) > capacity)
763 size = (capacity-sector)<<9;
764
765 inc_rs_pending(mdev);
5b9f499c 766 if (drbd_send_ov_request(mdev, sector, size)) {
b411b363
PR
767 dec_rs_pending(mdev);
768 return 0;
769 }
770 sector += BM_SECT_PER_BIT;
771 }
772 mdev->ov_position = sector;
773
774 requeue:
2649f080 775 mdev->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
58ffa580
LE
776 if (i == 0 || !stop_sector_reached)
777 mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME);
b411b363
PR
778 return 1;
779}
780
99920dc5 781int w_ov_finished(struct drbd_work *w, int cancel)
b411b363 782{
54761697 783 struct drbd_device *mdev = w->mdev;
b411b363 784 kfree(w);
8f7bed77 785 ov_out_of_sync_print(mdev);
b411b363
PR
786 drbd_resync_finished(mdev);
787
99920dc5 788 return 0;
b411b363
PR
789}
790
99920dc5 791static int w_resync_finished(struct drbd_work *w, int cancel)
b411b363 792{
54761697 793 struct drbd_device *mdev = w->mdev;
b411b363
PR
794 kfree(w);
795
796 drbd_resync_finished(mdev);
797
99920dc5 798 return 0;
b411b363
PR
799}
800
54761697 801static void ping_peer(struct drbd_device *mdev)
af85e8e8 802{
2a67d8b9
PR
803 struct drbd_tconn *tconn = mdev->tconn;
804
805 clear_bit(GOT_PING_ACK, &tconn->flags);
806 request_ping(tconn);
807 wait_event(tconn->ping_wait,
808 test_bit(GOT_PING_ACK, &tconn->flags) || mdev->state.conn < C_CONNECTED);
af85e8e8
LE
809}
810
54761697 811int drbd_resync_finished(struct drbd_device *mdev)
b411b363
PR
812{
813 unsigned long db, dt, dbdt;
814 unsigned long n_oos;
815 union drbd_state os, ns;
816 struct drbd_work *w;
817 char *khelper_cmd = NULL;
26525618 818 int verify_done = 0;
b411b363
PR
819
820 /* Remove all elements from the resync LRU. Since future actions
821 * might set bits in the (main) bitmap, then the entries in the
822 * resync LRU would be wrong. */
823 if (drbd_rs_del_all(mdev)) {
824 /* In case this is not possible now, most probably because
825 * there are P_RS_DATA_REPLY Packets lingering on the worker's
826 * queue (or even the read operations for those packets
827 * is not finished by now). Retry in 100ms. */
828
20ee6390 829 schedule_timeout_interruptible(HZ / 10);
b411b363
PR
830 w = kmalloc(sizeof(struct drbd_work), GFP_ATOMIC);
831 if (w) {
832 w->cb = w_resync_finished;
9b743da9 833 w->mdev = mdev;
d5b27b01 834 drbd_queue_work(&mdev->tconn->sender_work, w);
b411b363
PR
835 return 1;
836 }
837 dev_err(DEV, "Warn failed to drbd_rs_del_all() and to kmalloc(w).\n");
838 }
839
840 dt = (jiffies - mdev->rs_start - mdev->rs_paused) / HZ;
841 if (dt <= 0)
842 dt = 1;
58ffa580 843
b411b363 844 db = mdev->rs_total;
58ffa580
LE
845 /* adjust for verify start and stop sectors, respective reached position */
846 if (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T)
847 db -= mdev->ov_left;
848
b411b363
PR
849 dbdt = Bit2KB(db/dt);
850 mdev->rs_paused /= HZ;
851
852 if (!get_ldev(mdev))
853 goto out;
854
af85e8e8
LE
855 ping_peer(mdev);
856
87eeee41 857 spin_lock_irq(&mdev->tconn->req_lock);
78bae59b 858 os = drbd_read_state(mdev);
b411b363 859
26525618
LE
860 verify_done = (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T);
861
b411b363
PR
862 /* This protects us against multiple calls (that can happen in the presence
863 of application IO), and against connectivity loss just before we arrive here. */
864 if (os.conn <= C_CONNECTED)
865 goto out_unlock;
866
867 ns = os;
868 ns.conn = C_CONNECTED;
869
870 dev_info(DEV, "%s done (total %lu sec; paused %lu sec; %lu K/sec)\n",
58ffa580 871 verify_done ? "Online verify" : "Resync",
b411b363
PR
872 dt + mdev->rs_paused, mdev->rs_paused, dbdt);
873
874 n_oos = drbd_bm_total_weight(mdev);
875
876 if (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) {
877 if (n_oos) {
878 dev_alert(DEV, "Online verify found %lu %dk block out of sync!\n",
879 n_oos, Bit2KB(1));
880 khelper_cmd = "out-of-sync";
881 }
882 } else {
883 D_ASSERT((n_oos - mdev->rs_failed) == 0);
884
885 if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T)
886 khelper_cmd = "after-resync-target";
887
f399002e 888 if (mdev->tconn->csums_tfm && mdev->rs_total) {
b411b363
PR
889 const unsigned long s = mdev->rs_same_csum;
890 const unsigned long t = mdev->rs_total;
891 const int ratio =
892 (t == 0) ? 0 :
893 (t < 100000) ? ((s*100)/t) : (s/(t/100));
24c4830c 894 dev_info(DEV, "%u %% had equal checksums, eliminated: %luK; "
b411b363
PR
895 "transferred %luK total %luK\n",
896 ratio,
897 Bit2KB(mdev->rs_same_csum),
898 Bit2KB(mdev->rs_total - mdev->rs_same_csum),
899 Bit2KB(mdev->rs_total));
900 }
901 }
902
903 if (mdev->rs_failed) {
904 dev_info(DEV, " %lu failed blocks\n", mdev->rs_failed);
905
906 if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) {
907 ns.disk = D_INCONSISTENT;
908 ns.pdsk = D_UP_TO_DATE;
909 } else {
910 ns.disk = D_UP_TO_DATE;
911 ns.pdsk = D_INCONSISTENT;
912 }
913 } else {
914 ns.disk = D_UP_TO_DATE;
915 ns.pdsk = D_UP_TO_DATE;
916
917 if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) {
918 if (mdev->p_uuid) {
919 int i;
920 for (i = UI_BITMAP ; i <= UI_HISTORY_END ; i++)
921 _drbd_uuid_set(mdev, i, mdev->p_uuid[i]);
922 drbd_uuid_set(mdev, UI_BITMAP, mdev->ldev->md.uuid[UI_CURRENT]);
923 _drbd_uuid_set(mdev, UI_CURRENT, mdev->p_uuid[UI_CURRENT]);
924 } else {
925 dev_err(DEV, "mdev->p_uuid is NULL! BUG\n");
926 }
927 }
928
62b0da3a
LE
929 if (!(os.conn == C_VERIFY_S || os.conn == C_VERIFY_T)) {
930 /* for verify runs, we don't update uuids here,
931 * so there would be nothing to report. */
932 drbd_uuid_set_bm(mdev, 0UL);
933 drbd_print_uuids(mdev, "updated UUIDs");
934 if (mdev->p_uuid) {
935 /* Now the two UUID sets are equal, update what we
936 * know of the peer. */
937 int i;
938 for (i = UI_CURRENT ; i <= UI_HISTORY_END ; i++)
939 mdev->p_uuid[i] = mdev->ldev->md.uuid[i];
940 }
b411b363
PR
941 }
942 }
943
944 _drbd_set_state(mdev, ns, CS_VERBOSE, NULL);
945out_unlock:
87eeee41 946 spin_unlock_irq(&mdev->tconn->req_lock);
b411b363
PR
947 put_ldev(mdev);
948out:
949 mdev->rs_total = 0;
950 mdev->rs_failed = 0;
951 mdev->rs_paused = 0;
58ffa580
LE
952
953 /* reset start sector, if we reached end of device */
954 if (verify_done && mdev->ov_left == 0)
26525618 955 mdev->ov_start_sector = 0;
b411b363 956
13d42685
LE
957 drbd_md_sync(mdev);
958
b411b363
PR
959 if (khelper_cmd)
960 drbd_khelper(mdev, khelper_cmd);
961
962 return 1;
963}
964
965/* helper */
54761697 966static void move_to_net_ee_or_free(struct drbd_device *mdev, struct drbd_peer_request *peer_req)
b411b363 967{
045417f7 968 if (drbd_peer_req_has_active_page(peer_req)) {
b411b363 969 /* This might happen if sendpage() has not finished */
db830c46 970 int i = (peer_req->i.size + PAGE_SIZE -1) >> PAGE_SHIFT;
435f0740
LE
971 atomic_add(i, &mdev->pp_in_use_by_net);
972 atomic_sub(i, &mdev->pp_in_use);
87eeee41 973 spin_lock_irq(&mdev->tconn->req_lock);
db830c46 974 list_add_tail(&peer_req->w.list, &mdev->net_ee);
87eeee41 975 spin_unlock_irq(&mdev->tconn->req_lock);
435f0740 976 wake_up(&drbd_pp_wait);
b411b363 977 } else
3967deb1 978 drbd_free_peer_req(mdev, peer_req);
b411b363
PR
979}
980
981/**
982 * w_e_end_data_req() - Worker callback, to send a P_DATA_REPLY packet in response to a P_DATA_REQUEST
983 * @mdev: DRBD device.
984 * @w: work object.
985 * @cancel: The connection will be closed anyways
986 */
99920dc5 987int w_e_end_data_req(struct drbd_work *w, int cancel)
b411b363 988{
db830c46 989 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
54761697 990 struct drbd_device *mdev = w->mdev;
99920dc5 991 int err;
b411b363
PR
992
993 if (unlikely(cancel)) {
3967deb1 994 drbd_free_peer_req(mdev, peer_req);
b411b363 995 dec_unacked(mdev);
99920dc5 996 return 0;
b411b363
PR
997 }
998
db830c46 999 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
99920dc5 1000 err = drbd_send_block(mdev, P_DATA_REPLY, peer_req);
b411b363
PR
1001 } else {
1002 if (__ratelimit(&drbd_ratelimit_state))
1003 dev_err(DEV, "Sending NegDReply. sector=%llus.\n",
db830c46 1004 (unsigned long long)peer_req->i.sector);
b411b363 1005
99920dc5 1006 err = drbd_send_ack(mdev, P_NEG_DREPLY, peer_req);
b411b363
PR
1007 }
1008
1009 dec_unacked(mdev);
1010
db830c46 1011 move_to_net_ee_or_free(mdev, peer_req);
b411b363 1012
99920dc5 1013 if (unlikely(err))
b411b363 1014 dev_err(DEV, "drbd_send_block() failed\n");
99920dc5 1015 return err;
b411b363
PR
1016}
1017
1018/**
a209b4ae 1019 * w_e_end_rsdata_req() - Worker callback to send a P_RS_DATA_REPLY packet in response to a P_RS_DATA_REQUEST
b411b363
PR
1020 * @mdev: DRBD device.
1021 * @w: work object.
1022 * @cancel: The connection will be closed anyways
1023 */
99920dc5 1024int w_e_end_rsdata_req(struct drbd_work *w, int cancel)
b411b363 1025{
db830c46 1026 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
54761697 1027 struct drbd_device *mdev = w->mdev;
99920dc5 1028 int err;
b411b363
PR
1029
1030 if (unlikely(cancel)) {
3967deb1 1031 drbd_free_peer_req(mdev, peer_req);
b411b363 1032 dec_unacked(mdev);
99920dc5 1033 return 0;
b411b363
PR
1034 }
1035
1036 if (get_ldev_if_state(mdev, D_FAILED)) {
db830c46 1037 drbd_rs_complete_io(mdev, peer_req->i.sector);
b411b363
PR
1038 put_ldev(mdev);
1039 }
1040
d612d309 1041 if (mdev->state.conn == C_AHEAD) {
99920dc5 1042 err = drbd_send_ack(mdev, P_RS_CANCEL, peer_req);
db830c46 1043 } else if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
b411b363
PR
1044 if (likely(mdev->state.pdsk >= D_INCONSISTENT)) {
1045 inc_rs_pending(mdev);
99920dc5 1046 err = drbd_send_block(mdev, P_RS_DATA_REPLY, peer_req);
b411b363
PR
1047 } else {
1048 if (__ratelimit(&drbd_ratelimit_state))
1049 dev_err(DEV, "Not sending RSDataReply, "
1050 "partner DISKLESS!\n");
99920dc5 1051 err = 0;
b411b363
PR
1052 }
1053 } else {
1054 if (__ratelimit(&drbd_ratelimit_state))
1055 dev_err(DEV, "Sending NegRSDReply. sector %llus.\n",
db830c46 1056 (unsigned long long)peer_req->i.sector);
b411b363 1057
99920dc5 1058 err = drbd_send_ack(mdev, P_NEG_RS_DREPLY, peer_req);
b411b363
PR
1059
1060 /* update resync data with failure */
db830c46 1061 drbd_rs_failed_io(mdev, peer_req->i.sector, peer_req->i.size);
b411b363
PR
1062 }
1063
1064 dec_unacked(mdev);
1065
db830c46 1066 move_to_net_ee_or_free(mdev, peer_req);
b411b363 1067
99920dc5 1068 if (unlikely(err))
b411b363 1069 dev_err(DEV, "drbd_send_block() failed\n");
99920dc5 1070 return err;
b411b363
PR
1071}
1072
99920dc5 1073int w_e_end_csum_rs_req(struct drbd_work *w, int cancel)
b411b363 1074{
db830c46 1075 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
54761697 1076 struct drbd_device *mdev = w->mdev;
b411b363
PR
1077 struct digest_info *di;
1078 int digest_size;
1079 void *digest = NULL;
99920dc5 1080 int err, eq = 0;
b411b363
PR
1081
1082 if (unlikely(cancel)) {
3967deb1 1083 drbd_free_peer_req(mdev, peer_req);
b411b363 1084 dec_unacked(mdev);
99920dc5 1085 return 0;
b411b363
PR
1086 }
1087
1d53f09e 1088 if (get_ldev(mdev)) {
db830c46 1089 drbd_rs_complete_io(mdev, peer_req->i.sector);
1d53f09e
LE
1090 put_ldev(mdev);
1091 }
b411b363 1092
db830c46 1093 di = peer_req->digest;
b411b363 1094
db830c46 1095 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
b411b363
PR
1096 /* quick hack to try to avoid a race against reconfiguration.
1097 * a real fix would be much more involved,
1098 * introducing more locking mechanisms */
f399002e
LE
1099 if (mdev->tconn->csums_tfm) {
1100 digest_size = crypto_hash_digestsize(mdev->tconn->csums_tfm);
b411b363
PR
1101 D_ASSERT(digest_size == di->digest_size);
1102 digest = kmalloc(digest_size, GFP_NOIO);
1103 }
1104 if (digest) {
f399002e 1105 drbd_csum_ee(mdev, mdev->tconn->csums_tfm, peer_req, digest);
b411b363
PR
1106 eq = !memcmp(digest, di->digest, digest_size);
1107 kfree(digest);
1108 }
1109
1110 if (eq) {
db830c46 1111 drbd_set_in_sync(mdev, peer_req->i.sector, peer_req->i.size);
676396d5 1112 /* rs_same_csums unit is BM_BLOCK_SIZE */
db830c46 1113 mdev->rs_same_csum += peer_req->i.size >> BM_BLOCK_SHIFT;
99920dc5 1114 err = drbd_send_ack(mdev, P_RS_IS_IN_SYNC, peer_req);
b411b363
PR
1115 } else {
1116 inc_rs_pending(mdev);
db830c46
AG
1117 peer_req->block_id = ID_SYNCER; /* By setting block_id, digest pointer becomes invalid! */
1118 peer_req->flags &= ~EE_HAS_DIGEST; /* This peer request no longer has a digest pointer */
204bba99 1119 kfree(di);
99920dc5 1120 err = drbd_send_block(mdev, P_RS_DATA_REPLY, peer_req);
b411b363
PR
1121 }
1122 } else {
99920dc5 1123 err = drbd_send_ack(mdev, P_NEG_RS_DREPLY, peer_req);
b411b363
PR
1124 if (__ratelimit(&drbd_ratelimit_state))
1125 dev_err(DEV, "Sending NegDReply. I guess it gets messy.\n");
1126 }
1127
1128 dec_unacked(mdev);
db830c46 1129 move_to_net_ee_or_free(mdev, peer_req);
b411b363 1130
99920dc5 1131 if (unlikely(err))
b411b363 1132 dev_err(DEV, "drbd_send_block/ack() failed\n");
99920dc5 1133 return err;
b411b363
PR
1134}
1135
99920dc5 1136int w_e_end_ov_req(struct drbd_work *w, int cancel)
b411b363 1137{
db830c46 1138 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
54761697 1139 struct drbd_device *mdev = w->mdev;
db830c46
AG
1140 sector_t sector = peer_req->i.sector;
1141 unsigned int size = peer_req->i.size;
b411b363
PR
1142 int digest_size;
1143 void *digest;
99920dc5 1144 int err = 0;
b411b363
PR
1145
1146 if (unlikely(cancel))
1147 goto out;
1148
f399002e 1149 digest_size = crypto_hash_digestsize(mdev->tconn->verify_tfm);
b411b363 1150 digest = kmalloc(digest_size, GFP_NOIO);
8f21420e 1151 if (!digest) {
99920dc5 1152 err = 1; /* terminate the connection in case the allocation failed */
8f21420e 1153 goto out;
b411b363
PR
1154 }
1155
db830c46 1156 if (likely(!(peer_req->flags & EE_WAS_ERROR)))
f399002e 1157 drbd_csum_ee(mdev, mdev->tconn->verify_tfm, peer_req, digest);
8f21420e
PR
1158 else
1159 memset(digest, 0, digest_size);
1160
53ea4331
LE
1161 /* Free e and pages before send.
1162 * In case we block on congestion, we could otherwise run into
1163 * some distributed deadlock, if the other side blocks on
1164 * congestion as well, because our receiver blocks in
c37c8ecf 1165 * drbd_alloc_pages due to pp_in_use > max_buffers. */
3967deb1 1166 drbd_free_peer_req(mdev, peer_req);
db830c46 1167 peer_req = NULL;
8f21420e 1168 inc_rs_pending(mdev);
99920dc5
AG
1169 err = drbd_send_drequest_csum(mdev, sector, size, digest, digest_size, P_OV_REPLY);
1170 if (err)
8f21420e
PR
1171 dec_rs_pending(mdev);
1172 kfree(digest);
1173
b411b363 1174out:
db830c46 1175 if (peer_req)
3967deb1 1176 drbd_free_peer_req(mdev, peer_req);
b411b363 1177 dec_unacked(mdev);
99920dc5 1178 return err;
b411b363
PR
1179}
1180
54761697 1181void drbd_ov_out_of_sync_found(struct drbd_device *mdev, sector_t sector, int size)
b411b363
PR
1182{
1183 if (mdev->ov_last_oos_start + mdev->ov_last_oos_size == sector) {
1184 mdev->ov_last_oos_size += size>>9;
1185 } else {
1186 mdev->ov_last_oos_start = sector;
1187 mdev->ov_last_oos_size = size>>9;
1188 }
1189 drbd_set_out_of_sync(mdev, sector, size);
b411b363
PR
1190}
1191
99920dc5 1192int w_e_end_ov_reply(struct drbd_work *w, int cancel)
b411b363 1193{
db830c46 1194 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
54761697 1195 struct drbd_device *mdev = w->mdev;
b411b363 1196 struct digest_info *di;
b411b363 1197 void *digest;
db830c46
AG
1198 sector_t sector = peer_req->i.sector;
1199 unsigned int size = peer_req->i.size;
53ea4331 1200 int digest_size;
99920dc5 1201 int err, eq = 0;
58ffa580 1202 bool stop_sector_reached = false;
b411b363
PR
1203
1204 if (unlikely(cancel)) {
3967deb1 1205 drbd_free_peer_req(mdev, peer_req);
b411b363 1206 dec_unacked(mdev);
99920dc5 1207 return 0;
b411b363
PR
1208 }
1209
1210 /* after "cancel", because after drbd_disconnect/drbd_rs_cancel_all
1211 * the resync lru has been cleaned up already */
1d53f09e 1212 if (get_ldev(mdev)) {
db830c46 1213 drbd_rs_complete_io(mdev, peer_req->i.sector);
1d53f09e
LE
1214 put_ldev(mdev);
1215 }
b411b363 1216
db830c46 1217 di = peer_req->digest;
b411b363 1218
db830c46 1219 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
f399002e 1220 digest_size = crypto_hash_digestsize(mdev->tconn->verify_tfm);
b411b363
PR
1221 digest = kmalloc(digest_size, GFP_NOIO);
1222 if (digest) {
f399002e 1223 drbd_csum_ee(mdev, mdev->tconn->verify_tfm, peer_req, digest);
b411b363
PR
1224
1225 D_ASSERT(digest_size == di->digest_size);
1226 eq = !memcmp(digest, di->digest, digest_size);
1227 kfree(digest);
1228 }
b411b363
PR
1229 }
1230
9676c760
LE
1231 /* Free peer_req and pages before send.
1232 * In case we block on congestion, we could otherwise run into
1233 * some distributed deadlock, if the other side blocks on
1234 * congestion as well, because our receiver blocks in
c37c8ecf 1235 * drbd_alloc_pages due to pp_in_use > max_buffers. */
3967deb1 1236 drbd_free_peer_req(mdev, peer_req);
b411b363 1237 if (!eq)
8f7bed77 1238 drbd_ov_out_of_sync_found(mdev, sector, size);
b411b363 1239 else
8f7bed77 1240 ov_out_of_sync_print(mdev);
b411b363 1241
99920dc5 1242 err = drbd_send_ack_ex(mdev, P_OV_RESULT, sector, size,
fa79abd8 1243 eq ? ID_IN_SYNC : ID_OUT_OF_SYNC);
b411b363 1244
53ea4331 1245 dec_unacked(mdev);
b411b363 1246
ea5442af
LE
1247 --mdev->ov_left;
1248
1249 /* let's advance progress step marks only for every other megabyte */
1250 if ((mdev->ov_left & 0x200) == 0x200)
1251 drbd_advance_rs_marks(mdev, mdev->ov_left);
1252
58ffa580
LE
1253 stop_sector_reached = verify_can_do_stop_sector(mdev) &&
1254 (sector + (size>>9)) >= mdev->ov_stop_sector;
1255
1256 if (mdev->ov_left == 0 || stop_sector_reached) {
8f7bed77 1257 ov_out_of_sync_print(mdev);
b411b363
PR
1258 drbd_resync_finished(mdev);
1259 }
1260
99920dc5 1261 return err;
b411b363
PR
1262}
1263
99920dc5 1264int w_prev_work_done(struct drbd_work *w, int cancel)
b411b363
PR
1265{
1266 struct drbd_wq_barrier *b = container_of(w, struct drbd_wq_barrier, w);
00d56944 1267
b411b363 1268 complete(&b->done);
99920dc5 1269 return 0;
b411b363
PR
1270}
1271
b6dd1a89
LE
1272/* FIXME
1273 * We need to track the number of pending barrier acks,
1274 * and to be able to wait for them.
1275 * See also comment in drbd_adm_attach before drbd_suspend_io.
1276 */
a186e478 1277static int drbd_send_barrier(struct drbd_tconn *tconn)
b411b363 1278{
9f5bdc33 1279 struct p_barrier *p;
b6dd1a89 1280 struct drbd_socket *sock;
b411b363 1281
b6dd1a89
LE
1282 sock = &tconn->data;
1283 p = conn_prepare_command(tconn, sock);
9f5bdc33
AG
1284 if (!p)
1285 return -EIO;
b6dd1a89
LE
1286 p->barrier = tconn->send.current_epoch_nr;
1287 p->pad = 0;
1288 tconn->send.current_epoch_writes = 0;
1289
1290 return conn_send_command(tconn, sock, P_BARRIER, sizeof(*p), NULL, 0);
b411b363
PR
1291}
1292
99920dc5 1293int w_send_write_hint(struct drbd_work *w, int cancel)
b411b363 1294{
54761697 1295 struct drbd_device *mdev = w->mdev;
9f5bdc33
AG
1296 struct drbd_socket *sock;
1297
b411b363 1298 if (cancel)
99920dc5 1299 return 0;
9f5bdc33
AG
1300 sock = &mdev->tconn->data;
1301 if (!drbd_prepare_command(mdev, sock))
1302 return -EIO;
e658983a 1303 return drbd_send_command(mdev, sock, P_UNPLUG_REMOTE, 0, NULL, 0);
b411b363
PR
1304}
1305
4eb9b3cb
LE
1306static void re_init_if_first_write(struct drbd_tconn *tconn, unsigned int epoch)
1307{
1308 if (!tconn->send.seen_any_write_yet) {
1309 tconn->send.seen_any_write_yet = true;
1310 tconn->send.current_epoch_nr = epoch;
1311 tconn->send.current_epoch_writes = 0;
1312 }
1313}
1314
1315static void maybe_send_barrier(struct drbd_tconn *tconn, unsigned int epoch)
1316{
1317 /* re-init if first write on this connection */
1318 if (!tconn->send.seen_any_write_yet)
1319 return;
1320 if (tconn->send.current_epoch_nr != epoch) {
1321 if (tconn->send.current_epoch_writes)
1322 drbd_send_barrier(tconn);
1323 tconn->send.current_epoch_nr = epoch;
1324 }
1325}
1326
8f7bed77 1327int w_send_out_of_sync(struct drbd_work *w, int cancel)
73a01a18
PR
1328{
1329 struct drbd_request *req = container_of(w, struct drbd_request, w);
54761697 1330 struct drbd_device *mdev = w->mdev;
b6dd1a89 1331 struct drbd_tconn *tconn = mdev->tconn;
99920dc5 1332 int err;
73a01a18
PR
1333
1334 if (unlikely(cancel)) {
8554df1c 1335 req_mod(req, SEND_CANCELED);
99920dc5 1336 return 0;
73a01a18
PR
1337 }
1338
b6dd1a89
LE
1339 /* this time, no tconn->send.current_epoch_writes++;
1340 * If it was sent, it was the closing barrier for the last
1341 * replicated epoch, before we went into AHEAD mode.
1342 * No more barriers will be sent, until we leave AHEAD mode again. */
4eb9b3cb 1343 maybe_send_barrier(tconn, req->epoch);
b6dd1a89 1344
8f7bed77 1345 err = drbd_send_out_of_sync(mdev, req);
8554df1c 1346 req_mod(req, OOS_HANDED_TO_NETWORK);
73a01a18 1347
99920dc5 1348 return err;
73a01a18
PR
1349}
1350
b411b363
PR
1351/**
1352 * w_send_dblock() - Worker callback to send a P_DATA packet in order to mirror a write request
1353 * @mdev: DRBD device.
1354 * @w: work object.
1355 * @cancel: The connection will be closed anyways
1356 */
99920dc5 1357int w_send_dblock(struct drbd_work *w, int cancel)
b411b363
PR
1358{
1359 struct drbd_request *req = container_of(w, struct drbd_request, w);
54761697 1360 struct drbd_device *mdev = w->mdev;
b6dd1a89 1361 struct drbd_tconn *tconn = mdev->tconn;
99920dc5 1362 int err;
b411b363
PR
1363
1364 if (unlikely(cancel)) {
8554df1c 1365 req_mod(req, SEND_CANCELED);
99920dc5 1366 return 0;
b411b363
PR
1367 }
1368
4eb9b3cb
LE
1369 re_init_if_first_write(tconn, req->epoch);
1370 maybe_send_barrier(tconn, req->epoch);
b6dd1a89
LE
1371 tconn->send.current_epoch_writes++;
1372
99920dc5
AG
1373 err = drbd_send_dblock(mdev, req);
1374 req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK);
b411b363 1375
99920dc5 1376 return err;
b411b363
PR
1377}
1378
1379/**
1380 * w_send_read_req() - Worker callback to send a read request (P_DATA_REQUEST) packet
1381 * @mdev: DRBD device.
1382 * @w: work object.
1383 * @cancel: The connection will be closed anyways
1384 */
99920dc5 1385int w_send_read_req(struct drbd_work *w, int cancel)
b411b363
PR
1386{
1387 struct drbd_request *req = container_of(w, struct drbd_request, w);
54761697 1388 struct drbd_device *mdev = w->mdev;
b6dd1a89 1389 struct drbd_tconn *tconn = mdev->tconn;
99920dc5 1390 int err;
b411b363
PR
1391
1392 if (unlikely(cancel)) {
8554df1c 1393 req_mod(req, SEND_CANCELED);
99920dc5 1394 return 0;
b411b363
PR
1395 }
1396
b6dd1a89
LE
1397 /* Even read requests may close a write epoch,
1398 * if there was any yet. */
4eb9b3cb 1399 maybe_send_barrier(tconn, req->epoch);
b6dd1a89 1400
99920dc5 1401 err = drbd_send_drequest(mdev, P_DATA_REQUEST, req->i.sector, req->i.size,
6c1005e7 1402 (unsigned long)req);
b411b363 1403
99920dc5 1404 req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK);
b411b363 1405
99920dc5 1406 return err;
b411b363
PR
1407}
1408
99920dc5 1409int w_restart_disk_io(struct drbd_work *w, int cancel)
265be2d0
PR
1410{
1411 struct drbd_request *req = container_of(w, struct drbd_request, w);
54761697 1412 struct drbd_device *mdev = w->mdev;
265be2d0 1413
0778286a 1414 if (bio_data_dir(req->master_bio) == WRITE && req->rq_state & RQ_IN_ACT_LOG)
56392d2f 1415 drbd_al_begin_io(mdev, &req->i, false);
265be2d0
PR
1416
1417 drbd_req_make_private_bio(req, req->master_bio);
1418 req->private_bio->bi_bdev = mdev->ldev->backing_bdev;
1419 generic_make_request(req->private_bio);
1420
99920dc5 1421 return 0;
265be2d0
PR
1422}
1423
54761697 1424static int _drbd_may_sync_now(struct drbd_device *mdev)
b411b363 1425{
54761697 1426 struct drbd_device *odev = mdev;
95f8efd0 1427 int resync_after;
b411b363
PR
1428
1429 while (1) {
a3f8f7dc 1430 if (!odev->ldev || odev->state.disk == D_DISKLESS)
438c8374 1431 return 1;
daeda1cc 1432 rcu_read_lock();
95f8efd0 1433 resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after;
daeda1cc 1434 rcu_read_unlock();
95f8efd0 1435 if (resync_after == -1)
b411b363 1436 return 1;
95f8efd0 1437 odev = minor_to_mdev(resync_after);
a3f8f7dc 1438 if (!odev)
841ce241 1439 return 1;
b411b363
PR
1440 if ((odev->state.conn >= C_SYNC_SOURCE &&
1441 odev->state.conn <= C_PAUSED_SYNC_T) ||
1442 odev->state.aftr_isp || odev->state.peer_isp ||
1443 odev->state.user_isp)
1444 return 0;
1445 }
1446}
1447
1448/**
1449 * _drbd_pause_after() - Pause resync on all devices that may not resync now
1450 * @mdev: DRBD device.
1451 *
1452 * Called from process context only (admin command and after_state_ch).
1453 */
54761697 1454static int _drbd_pause_after(struct drbd_device *mdev)
b411b363 1455{
54761697 1456 struct drbd_device *odev;
b411b363
PR
1457 int i, rv = 0;
1458
695d08fa 1459 rcu_read_lock();
81a5d60e 1460 idr_for_each_entry(&minors, odev, i) {
b411b363
PR
1461 if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS)
1462 continue;
1463 if (!_drbd_may_sync_now(odev))
1464 rv |= (__drbd_set_state(_NS(odev, aftr_isp, 1), CS_HARD, NULL)
1465 != SS_NOTHING_TO_DO);
1466 }
695d08fa 1467 rcu_read_unlock();
b411b363
PR
1468
1469 return rv;
1470}
1471
1472/**
1473 * _drbd_resume_next() - Resume resync on all devices that may resync now
1474 * @mdev: DRBD device.
1475 *
1476 * Called from process context only (admin command and worker).
1477 */
54761697 1478static int _drbd_resume_next(struct drbd_device *mdev)
b411b363 1479{
54761697 1480 struct drbd_device *odev;
b411b363
PR
1481 int i, rv = 0;
1482
695d08fa 1483 rcu_read_lock();
81a5d60e 1484 idr_for_each_entry(&minors, odev, i) {
b411b363
PR
1485 if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS)
1486 continue;
1487 if (odev->state.aftr_isp) {
1488 if (_drbd_may_sync_now(odev))
1489 rv |= (__drbd_set_state(_NS(odev, aftr_isp, 0),
1490 CS_HARD, NULL)
1491 != SS_NOTHING_TO_DO) ;
1492 }
1493 }
695d08fa 1494 rcu_read_unlock();
b411b363
PR
1495 return rv;
1496}
1497
54761697 1498void resume_next_sg(struct drbd_device *mdev)
b411b363
PR
1499{
1500 write_lock_irq(&global_state_lock);
1501 _drbd_resume_next(mdev);
1502 write_unlock_irq(&global_state_lock);
1503}
1504
54761697 1505void suspend_other_sg(struct drbd_device *mdev)
b411b363
PR
1506{
1507 write_lock_irq(&global_state_lock);
1508 _drbd_pause_after(mdev);
1509 write_unlock_irq(&global_state_lock);
1510}
1511
dc97b708 1512/* caller must hold global_state_lock */
54761697 1513enum drbd_ret_code drbd_resync_after_valid(struct drbd_device *mdev, int o_minor)
b411b363 1514{
54761697 1515 struct drbd_device *odev;
95f8efd0 1516 int resync_after;
b411b363
PR
1517
1518 if (o_minor == -1)
1519 return NO_ERROR;
a3f8f7dc 1520 if (o_minor < -1 || o_minor > MINORMASK)
95f8efd0 1521 return ERR_RESYNC_AFTER;
b411b363
PR
1522
1523 /* check for loops */
1524 odev = minor_to_mdev(o_minor);
1525 while (1) {
1526 if (odev == mdev)
95f8efd0 1527 return ERR_RESYNC_AFTER_CYCLE;
b411b363 1528
a3f8f7dc
LE
1529 /* You are free to depend on diskless, non-existing,
1530 * or not yet/no longer existing minors.
1531 * We only reject dependency loops.
1532 * We cannot follow the dependency chain beyond a detached or
1533 * missing minor.
1534 */
1535 if (!odev || !odev->ldev || odev->state.disk == D_DISKLESS)
1536 return NO_ERROR;
1537
daeda1cc 1538 rcu_read_lock();
95f8efd0 1539 resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after;
daeda1cc 1540 rcu_read_unlock();
b411b363 1541 /* dependency chain ends here, no cycles. */
95f8efd0 1542 if (resync_after == -1)
b411b363
PR
1543 return NO_ERROR;
1544
1545 /* follow the dependency chain */
95f8efd0 1546 odev = minor_to_mdev(resync_after);
b411b363
PR
1547 }
1548}
1549
dc97b708 1550/* caller must hold global_state_lock */
54761697 1551void drbd_resync_after_changed(struct drbd_device *mdev)
b411b363
PR
1552{
1553 int changes;
b411b363 1554
dc97b708
PR
1555 do {
1556 changes = _drbd_pause_after(mdev);
1557 changes |= _drbd_resume_next(mdev);
1558 } while (changes);
b411b363
PR
1559}
1560
54761697 1561void drbd_rs_controller_reset(struct drbd_device *mdev)
9bd28d3c 1562{
813472ce
PR
1563 struct fifo_buffer *plan;
1564
9bd28d3c
LE
1565 atomic_set(&mdev->rs_sect_in, 0);
1566 atomic_set(&mdev->rs_sect_ev, 0);
1567 mdev->rs_in_flight = 0;
813472ce
PR
1568
1569 /* Updating the RCU protected object in place is necessary since
1570 this function gets called from atomic context.
1571 It is valid since all other updates also lead to an completely
1572 empty fifo */
1573 rcu_read_lock();
1574 plan = rcu_dereference(mdev->rs_plan_s);
1575 plan->total = 0;
1576 fifo_set(plan, 0);
1577 rcu_read_unlock();
9bd28d3c
LE
1578}
1579
1f04af33
PR
1580void start_resync_timer_fn(unsigned long data)
1581{
54761697 1582 struct drbd_device *mdev = (struct drbd_device *) data;
1f04af33 1583
d5b27b01 1584 drbd_queue_work(&mdev->tconn->sender_work, &mdev->start_resync_work);
1f04af33
PR
1585}
1586
99920dc5 1587int w_start_resync(struct drbd_work *w, int cancel)
1f04af33 1588{
54761697 1589 struct drbd_device *mdev = w->mdev;
00d56944 1590
1f04af33
PR
1591 if (atomic_read(&mdev->unacked_cnt) || atomic_read(&mdev->rs_pending_cnt)) {
1592 dev_warn(DEV, "w_start_resync later...\n");
1593 mdev->start_resync_timer.expires = jiffies + HZ/10;
1594 add_timer(&mdev->start_resync_timer);
99920dc5 1595 return 0;
1f04af33
PR
1596 }
1597
1598 drbd_start_resync(mdev, C_SYNC_SOURCE);
36baf611 1599 clear_bit(AHEAD_TO_SYNC_SOURCE, &mdev->flags);
99920dc5 1600 return 0;
1f04af33
PR
1601}
1602
b411b363
PR
1603/**
1604 * drbd_start_resync() - Start the resync process
1605 * @mdev: DRBD device.
1606 * @side: Either C_SYNC_SOURCE or C_SYNC_TARGET
1607 *
1608 * This function might bring you directly into one of the
1609 * C_PAUSED_SYNC_* states.
1610 */
54761697 1611void drbd_start_resync(struct drbd_device *mdev, enum drbd_conns side)
b411b363
PR
1612{
1613 union drbd_state ns;
1614 int r;
1615
c4752ef1 1616 if (mdev->state.conn >= C_SYNC_SOURCE && mdev->state.conn < C_AHEAD) {
b411b363
PR
1617 dev_err(DEV, "Resync already running!\n");
1618 return;
1619 }
1620
e64a3294
PR
1621 if (!test_bit(B_RS_H_DONE, &mdev->flags)) {
1622 if (side == C_SYNC_TARGET) {
1623 /* Since application IO was locked out during C_WF_BITMAP_T and
1624 C_WF_SYNC_UUID we are still unmodified. Before going to C_SYNC_TARGET
1625 we check that we might make the data inconsistent. */
1626 r = drbd_khelper(mdev, "before-resync-target");
1627 r = (r >> 8) & 0xff;
1628 if (r > 0) {
1629 dev_info(DEV, "before-resync-target handler returned %d, "
09b9e797 1630 "dropping connection.\n", r);
38fa9988 1631 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
09b9e797
PR
1632 return;
1633 }
e64a3294
PR
1634 } else /* C_SYNC_SOURCE */ {
1635 r = drbd_khelper(mdev, "before-resync-source");
1636 r = (r >> 8) & 0xff;
1637 if (r > 0) {
1638 if (r == 3) {
1639 dev_info(DEV, "before-resync-source handler returned %d, "
1640 "ignoring. Old userland tools?", r);
1641 } else {
1642 dev_info(DEV, "before-resync-source handler returned %d, "
1643 "dropping connection.\n", r);
38fa9988 1644 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
e64a3294
PR
1645 return;
1646 }
1647 }
09b9e797 1648 }
b411b363
PR
1649 }
1650
e64a3294 1651 if (current == mdev->tconn->worker.task) {
dad20554 1652 /* The worker should not sleep waiting for state_mutex,
e64a3294 1653 that can take long */
8410da8f 1654 if (!mutex_trylock(mdev->state_mutex)) {
e64a3294
PR
1655 set_bit(B_RS_H_DONE, &mdev->flags);
1656 mdev->start_resync_timer.expires = jiffies + HZ/5;
1657 add_timer(&mdev->start_resync_timer);
1658 return;
1659 }
1660 } else {
8410da8f 1661 mutex_lock(mdev->state_mutex);
e64a3294
PR
1662 }
1663 clear_bit(B_RS_H_DONE, &mdev->flags);
b411b363 1664
0cfac5dd 1665 write_lock_irq(&global_state_lock);
a700471b
PR
1666 /* Did some connection breakage or IO error race with us? */
1667 if (mdev->state.conn < C_CONNECTED
1668 || !get_ldev_if_state(mdev, D_NEGOTIATING)) {
0cfac5dd 1669 write_unlock_irq(&global_state_lock);
8410da8f 1670 mutex_unlock(mdev->state_mutex);
b411b363
PR
1671 return;
1672 }
1673
78bae59b 1674 ns = drbd_read_state(mdev);
b411b363
PR
1675
1676 ns.aftr_isp = !_drbd_may_sync_now(mdev);
1677
1678 ns.conn = side;
1679
1680 if (side == C_SYNC_TARGET)
1681 ns.disk = D_INCONSISTENT;
1682 else /* side == C_SYNC_SOURCE */
1683 ns.pdsk = D_INCONSISTENT;
1684
1685 r = __drbd_set_state(mdev, ns, CS_VERBOSE, NULL);
78bae59b 1686 ns = drbd_read_state(mdev);
b411b363
PR
1687
1688 if (ns.conn < C_CONNECTED)
1689 r = SS_UNKNOWN_ERROR;
1690
1691 if (r == SS_SUCCESS) {
1d7734a0
LE
1692 unsigned long tw = drbd_bm_total_weight(mdev);
1693 unsigned long now = jiffies;
1694 int i;
1695
b411b363
PR
1696 mdev->rs_failed = 0;
1697 mdev->rs_paused = 0;
b411b363 1698 mdev->rs_same_csum = 0;
0f0601f4
LE
1699 mdev->rs_last_events = 0;
1700 mdev->rs_last_sect_ev = 0;
1d7734a0
LE
1701 mdev->rs_total = tw;
1702 mdev->rs_start = now;
1703 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
1704 mdev->rs_mark_left[i] = tw;
1705 mdev->rs_mark_time[i] = now;
1706 }
b411b363
PR
1707 _drbd_pause_after(mdev);
1708 }
1709 write_unlock_irq(&global_state_lock);
5a22db89 1710
b411b363 1711 if (r == SS_SUCCESS) {
328e0f12
PR
1712 /* reset rs_last_bcast when a resync or verify is started,
1713 * to deal with potential jiffies wrap. */
1714 mdev->rs_last_bcast = jiffies - HZ;
1715
b411b363
PR
1716 dev_info(DEV, "Began resync as %s (will sync %lu KB [%lu bits set]).\n",
1717 drbd_conn_str(ns.conn),
1718 (unsigned long) mdev->rs_total << (BM_BLOCK_SHIFT-10),
1719 (unsigned long) mdev->rs_total);
6c922ed5
LE
1720 if (side == C_SYNC_TARGET)
1721 mdev->bm_resync_fo = 0;
1722
1723 /* Since protocol 96, we must serialize drbd_gen_and_send_sync_uuid
1724 * with w_send_oos, or the sync target will get confused as to
1725 * how much bits to resync. We cannot do that always, because for an
1726 * empty resync and protocol < 95, we need to do it here, as we call
1727 * drbd_resync_finished from here in that case.
1728 * We drbd_gen_and_send_sync_uuid here for protocol < 96,
1729 * and from after_state_ch otherwise. */
31890f4a 1730 if (side == C_SYNC_SOURCE && mdev->tconn->agreed_pro_version < 96)
6c922ed5 1731 drbd_gen_and_send_sync_uuid(mdev);
b411b363 1732
31890f4a 1733 if (mdev->tconn->agreed_pro_version < 95 && mdev->rs_total == 0) {
af85e8e8
LE
1734 /* This still has a race (about when exactly the peers
1735 * detect connection loss) that can lead to a full sync
1736 * on next handshake. In 8.3.9 we fixed this with explicit
1737 * resync-finished notifications, but the fix
1738 * introduces a protocol change. Sleeping for some
1739 * time longer than the ping interval + timeout on the
1740 * SyncSource, to give the SyncTarget the chance to
1741 * detect connection loss, then waiting for a ping
1742 * response (implicit in drbd_resync_finished) reduces
1743 * the race considerably, but does not solve it. */
44ed167d
PR
1744 if (side == C_SYNC_SOURCE) {
1745 struct net_conf *nc;
1746 int timeo;
1747
1748 rcu_read_lock();
1749 nc = rcu_dereference(mdev->tconn->net_conf);
1750 timeo = nc->ping_int * HZ + nc->ping_timeo * HZ / 9;
1751 rcu_read_unlock();
1752 schedule_timeout_interruptible(timeo);
1753 }
b411b363 1754 drbd_resync_finished(mdev);
b411b363
PR
1755 }
1756
9bd28d3c 1757 drbd_rs_controller_reset(mdev);
b411b363
PR
1758 /* ns.conn may already be != mdev->state.conn,
1759 * we may have been paused in between, or become paused until
1760 * the timer triggers.
1761 * No matter, that is handled in resync_timer_fn() */
1762 if (ns.conn == C_SYNC_TARGET)
1763 mod_timer(&mdev->resync_timer, jiffies);
1764
1765 drbd_md_sync(mdev);
1766 }
5a22db89 1767 put_ldev(mdev);
8410da8f 1768 mutex_unlock(mdev->state_mutex);
b411b363
PR
1769}
1770
b6dd1a89
LE
1771/* If the resource already closed the current epoch, but we did not
1772 * (because we have not yet seen new requests), we should send the
1773 * corresponding barrier now. Must be checked within the same spinlock
1774 * that is used to check for new requests. */
a186e478 1775static bool need_to_send_barrier(struct drbd_tconn *connection)
b6dd1a89
LE
1776{
1777 if (!connection->send.seen_any_write_yet)
1778 return false;
1779
1780 /* Skip barriers that do not contain any writes.
1781 * This may happen during AHEAD mode. */
1782 if (!connection->send.current_epoch_writes)
1783 return false;
1784
1785 /* ->req_lock is held when requests are queued on
1786 * connection->sender_work, and put into ->transfer_log.
1787 * It is also held when ->current_tle_nr is increased.
1788 * So either there are already new requests queued,
1789 * and corresponding barriers will be send there.
1790 * Or nothing new is queued yet, so the difference will be 1.
1791 */
1792 if (atomic_read(&connection->current_tle_nr) !=
1793 connection->send.current_epoch_nr + 1)
1794 return false;
1795
1796 return true;
1797}
1798
a186e478 1799static bool dequeue_work_batch(struct drbd_work_queue *queue, struct list_head *work_list)
8c0785a5
LE
1800{
1801 spin_lock_irq(&queue->q_lock);
1802 list_splice_init(&queue->q, work_list);
1803 spin_unlock_irq(&queue->q_lock);
1804 return !list_empty(work_list);
1805}
1806
a186e478 1807static bool dequeue_work_item(struct drbd_work_queue *queue, struct list_head *work_list)
8c0785a5
LE
1808{
1809 spin_lock_irq(&queue->q_lock);
1810 if (!list_empty(&queue->q))
1811 list_move(queue->q.next, work_list);
1812 spin_unlock_irq(&queue->q_lock);
1813 return !list_empty(work_list);
1814}
1815
a186e478 1816static void wait_for_work(struct drbd_tconn *connection, struct list_head *work_list)
b6dd1a89
LE
1817{
1818 DEFINE_WAIT(wait);
1819 struct net_conf *nc;
1820 int uncork, cork;
1821
1822 dequeue_work_item(&connection->sender_work, work_list);
1823 if (!list_empty(work_list))
1824 return;
1825
1826 /* Still nothing to do?
1827 * Maybe we still need to close the current epoch,
1828 * even if no new requests are queued yet.
1829 *
1830 * Also, poke TCP, just in case.
1831 * Then wait for new work (or signal). */
1832 rcu_read_lock();
1833 nc = rcu_dereference(connection->net_conf);
1834 uncork = nc ? nc->tcp_cork : 0;
1835 rcu_read_unlock();
1836 if (uncork) {
1837 mutex_lock(&connection->data.mutex);
1838 if (connection->data.socket)
1839 drbd_tcp_uncork(connection->data.socket);
1840 mutex_unlock(&connection->data.mutex);
1841 }
1842
1843 for (;;) {
1844 int send_barrier;
1845 prepare_to_wait(&connection->sender_work.q_wait, &wait, TASK_INTERRUPTIBLE);
1846 spin_lock_irq(&connection->req_lock);
1847 spin_lock(&connection->sender_work.q_lock); /* FIXME get rid of this one? */
bc317a9e
LE
1848 /* dequeue single item only,
1849 * we still use drbd_queue_work_front() in some places */
1850 if (!list_empty(&connection->sender_work.q))
1851 list_move(connection->sender_work.q.next, work_list);
b6dd1a89
LE
1852 spin_unlock(&connection->sender_work.q_lock); /* FIXME get rid of this one? */
1853 if (!list_empty(work_list) || signal_pending(current)) {
1854 spin_unlock_irq(&connection->req_lock);
1855 break;
1856 }
1857 send_barrier = need_to_send_barrier(connection);
1858 spin_unlock_irq(&connection->req_lock);
1859 if (send_barrier) {
1860 drbd_send_barrier(connection);
1861 connection->send.current_epoch_nr++;
1862 }
1863 schedule();
1864 /* may be woken up for other things but new work, too,
1865 * e.g. if the current epoch got closed.
1866 * In which case we send the barrier above. */
1867 }
1868 finish_wait(&connection->sender_work.q_wait, &wait);
1869
1870 /* someone may have changed the config while we have been waiting above. */
1871 rcu_read_lock();
1872 nc = rcu_dereference(connection->net_conf);
1873 cork = nc ? nc->tcp_cork : 0;
1874 rcu_read_unlock();
1875 mutex_lock(&connection->data.mutex);
1876 if (connection->data.socket) {
1877 if (cork)
1878 drbd_tcp_cork(connection->data.socket);
1879 else if (!uncork)
1880 drbd_tcp_uncork(connection->data.socket);
1881 }
1882 mutex_unlock(&connection->data.mutex);
1883}
1884
b411b363
PR
1885int drbd_worker(struct drbd_thread *thi)
1886{
392c8801 1887 struct drbd_tconn *tconn = thi->tconn;
b411b363 1888 struct drbd_work *w = NULL;
54761697 1889 struct drbd_device *mdev;
b411b363 1890 LIST_HEAD(work_list);
8c0785a5 1891 int vnr;
b411b363 1892
e77a0a5c 1893 while (get_t_state(thi) == RUNNING) {
80822284 1894 drbd_thread_current_set_cpu(thi);
b411b363 1895
8c0785a5
LE
1896 /* as long as we use drbd_queue_work_front(),
1897 * we may only dequeue single work items here, not batches. */
1898 if (list_empty(&work_list))
b6dd1a89 1899 wait_for_work(tconn, &work_list);
b411b363 1900
8c0785a5 1901 if (signal_pending(current)) {
b411b363 1902 flush_signals(current);
19393e10
PR
1903 if (get_t_state(thi) == RUNNING) {
1904 conn_warn(tconn, "Worker got an unexpected signal\n");
b411b363 1905 continue;
19393e10 1906 }
b411b363
PR
1907 break;
1908 }
1909
e77a0a5c 1910 if (get_t_state(thi) != RUNNING)
b411b363 1911 break;
b411b363 1912
8c0785a5
LE
1913 while (!list_empty(&work_list)) {
1914 w = list_first_entry(&work_list, struct drbd_work, list);
1915 list_del_init(&w->list);
1916 if (w->cb(w, tconn->cstate < C_WF_REPORT_PARAMS) == 0)
1917 continue;
bbeb641c
PR
1918 if (tconn->cstate >= C_WF_REPORT_PARAMS)
1919 conn_request_state(tconn, NS(conn, C_NETWORK_FAILURE), CS_HARD);
b411b363
PR
1920 }
1921 }
b411b363 1922
8c0785a5 1923 do {
b411b363 1924 while (!list_empty(&work_list)) {
8c0785a5 1925 w = list_first_entry(&work_list, struct drbd_work, list);
b411b363 1926 list_del_init(&w->list);
00d56944 1927 w->cb(w, 1);
b411b363 1928 }
d5b27b01 1929 dequeue_work_batch(&tconn->sender_work, &work_list);
8c0785a5 1930 } while (!list_empty(&work_list));
b411b363 1931
c141ebda 1932 rcu_read_lock();
f399002e 1933 idr_for_each_entry(&tconn->volumes, mdev, vnr) {
0e29d163 1934 D_ASSERT(mdev->state.disk == D_DISKLESS && mdev->state.conn == C_STANDALONE);
c141ebda
PR
1935 kref_get(&mdev->kref);
1936 rcu_read_unlock();
0e29d163 1937 drbd_mdev_cleanup(mdev);
c141ebda
PR
1938 kref_put(&mdev->kref, &drbd_minor_destroy);
1939 rcu_read_lock();
0e29d163 1940 }
c141ebda 1941 rcu_read_unlock();
b411b363
PR
1942
1943 return 0;
1944}