]> git.proxmox.com Git - mirror_ubuntu-disco-kernel.git/blame - drivers/block/drbd/drbd_worker.c
sched/headers: Prepare to move signal wakeup & sigpending methods from <linux/sched...
[mirror_ubuntu-disco-kernel.git] / drivers / block / drbd / drbd_worker.c
CommitLineData
b411b363
PR
1/*
2 drbd_worker.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23
84b8c06b 24*/
b411b363 25
b411b363 26#include <linux/module.h>
b411b363 27#include <linux/drbd.h>
174cd4b1 28#include <linux/sched/signal.h>
b411b363
PR
29#include <linux/wait.h>
30#include <linux/mm.h>
31#include <linux/memcontrol.h>
32#include <linux/mm_inline.h>
33#include <linux/slab.h>
34#include <linux/random.h>
b411b363
PR
35#include <linux/string.h>
36#include <linux/scatterlist.h>
37
38#include "drbd_int.h"
a3603a6e 39#include "drbd_protocol.h"
b411b363 40#include "drbd_req.h"
b411b363 41
d448a2e1
AG
42static int make_ov_request(struct drbd_device *, int);
43static int make_resync_request(struct drbd_device *, int);
b411b363 44
c5a91619 45/* endio handlers:
ed15b795 46 * drbd_md_endio (defined here)
fcefa62e
AG
47 * drbd_request_endio (defined here)
48 * drbd_peer_request_endio (defined here)
ed15b795 49 * drbd_bm_endio (defined in drbd_bitmap.c)
c5a91619 50 *
b411b363
PR
51 * For all these callbacks, note the following:
52 * The callbacks will be called in irq context by the IDE drivers,
53 * and in Softirqs/Tasklets/BH context by the SCSI drivers.
54 * Try to get the locking right :)
55 *
56 */
57
b411b363
PR
58/* used for synchronous meta data and bitmap IO
59 * submitted by drbd_md_sync_page_io()
60 */
4246a0b6 61void drbd_md_endio(struct bio *bio)
b411b363 62{
b30ab791 63 struct drbd_device *device;
b411b363 64
e37d2438 65 device = bio->bi_private;
4246a0b6 66 device->md_io.error = bio->bi_error;
b411b363 67
0cfac5dd
PR
68 /* We grabbed an extra reference in _drbd_md_sync_page_io() to be able
69 * to timeout on the lower level device, and eventually detach from it.
70 * If this io completion runs after that timeout expired, this
71 * drbd_md_put_buffer() may allow us to finally try and re-attach.
72 * During normal operation, this only puts that extra reference
73 * down to 1 again.
74 * Make sure we first drop the reference, and only then signal
75 * completion, or we may (in drbd_al_read_log()) cycle so fast into the
76 * next drbd_md_sync_page_io(), that we trigger the
b30ab791 77 * ASSERT(atomic_read(&device->md_io_in_use) == 1) there.
0cfac5dd 78 */
b30ab791 79 drbd_md_put_buffer(device);
e37d2438 80 device->md_io.done = 1;
b30ab791 81 wake_up(&device->misc_wait);
cdfda633 82 bio_put(bio);
b30ab791
AG
83 if (device->ldev) /* special case: drbd_md_read() during drbd_adm_attach() */
84 put_ldev(device);
b411b363
PR
85}
86
87/* reads on behalf of the partner,
88 * "submitted" by the receiver
89 */
a186e478 90static void drbd_endio_read_sec_final(struct drbd_peer_request *peer_req) __releases(local)
b411b363
PR
91{
92 unsigned long flags = 0;
6780139c
AG
93 struct drbd_peer_device *peer_device = peer_req->peer_device;
94 struct drbd_device *device = peer_device->device;
b411b363 95
0500813f 96 spin_lock_irqsave(&device->resource->req_lock, flags);
b30ab791 97 device->read_cnt += peer_req->i.size >> 9;
a8cd15ba 98 list_del(&peer_req->w.list);
b30ab791
AG
99 if (list_empty(&device->read_ee))
100 wake_up(&device->ee_wait);
db830c46 101 if (test_bit(__EE_WAS_ERROR, &peer_req->flags))
b30ab791 102 __drbd_chk_io_error(device, DRBD_READ_ERROR);
0500813f 103 spin_unlock_irqrestore(&device->resource->req_lock, flags);
b411b363 104
6780139c 105 drbd_queue_work(&peer_device->connection->sender_work, &peer_req->w);
b30ab791 106 put_ldev(device);
b411b363
PR
107}
108
109/* writes on behalf of the partner, or resync writes,
45bb912b 110 * "submitted" by the receiver, final stage. */
a0fb3c47 111void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(local)
b411b363
PR
112{
113 unsigned long flags = 0;
6780139c
AG
114 struct drbd_peer_device *peer_device = peer_req->peer_device;
115 struct drbd_device *device = peer_device->device;
668700b4 116 struct drbd_connection *connection = peer_device->connection;
181286ad 117 struct drbd_interval i;
b411b363 118 int do_wake;
579b57ed 119 u64 block_id;
b411b363 120 int do_al_complete_io;
b411b363 121
db830c46 122 /* after we moved peer_req to done_ee,
b411b363
PR
123 * we may no longer access it,
124 * it may be freed/reused already!
125 * (as soon as we release the req_lock) */
181286ad 126 i = peer_req->i;
db830c46
AG
127 do_al_complete_io = peer_req->flags & EE_CALL_AL_COMPLETE_IO;
128 block_id = peer_req->block_id;
21ae5d7f 129 peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO;
b411b363 130
0500813f 131 spin_lock_irqsave(&device->resource->req_lock, flags);
b30ab791 132 device->writ_cnt += peer_req->i.size >> 9;
a8cd15ba 133 list_move_tail(&peer_req->w.list, &device->done_ee);
b411b363 134
bb3bfe96 135 /*
5e472264 136 * Do not remove from the write_requests tree here: we did not send the
bb3bfe96
AG
137 * Ack yet and did not wake possibly waiting conflicting requests.
138 * Removed from the tree from "drbd_process_done_ee" within the
84b8c06b 139 * appropriate dw.cb (e_end_block/e_end_resync_block) or from
bb3bfe96
AG
140 * _drbd_clear_done_ee.
141 */
b411b363 142
b30ab791 143 do_wake = list_empty(block_id == ID_SYNCER ? &device->sync_ee : &device->active_ee);
b411b363 144
a0fb3c47
LE
145 /* FIXME do we want to detach for failed REQ_DISCARD?
146 * ((peer_req->flags & (EE_WAS_ERROR|EE_IS_TRIM)) == EE_WAS_ERROR) */
147 if (peer_req->flags & EE_WAS_ERROR)
b30ab791 148 __drbd_chk_io_error(device, DRBD_WRITE_ERROR);
668700b4
PR
149
150 if (connection->cstate >= C_WF_REPORT_PARAMS) {
151 kref_get(&device->kref); /* put is in drbd_send_acks_wf() */
152 if (!queue_work(connection->ack_sender, &peer_device->send_acks_work))
153 kref_put(&device->kref, drbd_destroy_device);
154 }
0500813f 155 spin_unlock_irqrestore(&device->resource->req_lock, flags);
b411b363 156
579b57ed 157 if (block_id == ID_SYNCER)
b30ab791 158 drbd_rs_complete_io(device, i.sector);
b411b363
PR
159
160 if (do_wake)
b30ab791 161 wake_up(&device->ee_wait);
b411b363
PR
162
163 if (do_al_complete_io)
b30ab791 164 drbd_al_complete_io(device, &i);
b411b363 165
b30ab791 166 put_ldev(device);
45bb912b 167}
b411b363 168
45bb912b
LE
169/* writes on behalf of the partner, or resync writes,
170 * "submitted" by the receiver.
171 */
4246a0b6 172void drbd_peer_request_endio(struct bio *bio)
45bb912b 173{
db830c46 174 struct drbd_peer_request *peer_req = bio->bi_private;
a8cd15ba 175 struct drbd_device *device = peer_req->peer_device->device;
7e5fec31
FF
176 bool is_write = bio_data_dir(bio) == WRITE;
177 bool is_discard = !!(bio_op(bio) == REQ_OP_DISCARD);
45bb912b 178
4246a0b6 179 if (bio->bi_error && __ratelimit(&drbd_ratelimit_state))
d0180171 180 drbd_warn(device, "%s: error=%d s=%llus\n",
a0fb3c47 181 is_write ? (is_discard ? "discard" : "write")
4246a0b6 182 : "read", bio->bi_error,
db830c46 183 (unsigned long long)peer_req->i.sector);
45bb912b 184
4246a0b6 185 if (bio->bi_error)
db830c46 186 set_bit(__EE_WAS_ERROR, &peer_req->flags);
45bb912b
LE
187
188 bio_put(bio); /* no need for the bio anymore */
db830c46 189 if (atomic_dec_and_test(&peer_req->pending_bios)) {
45bb912b 190 if (is_write)
db830c46 191 drbd_endio_write_sec_final(peer_req);
45bb912b 192 else
db830c46 193 drbd_endio_read_sec_final(peer_req);
45bb912b 194 }
b411b363
PR
195}
196
142207f7
LE
197void drbd_panic_after_delayed_completion_of_aborted_request(struct drbd_device *device)
198{
199 panic("drbd%u %s/%u potential random memory corruption caused by delayed completion of aborted local request\n",
200 device->minor, device->resource->name, device->vnr);
201}
202
b411b363
PR
203/* read, readA or write requests on R_PRIMARY coming from drbd_make_request
204 */
4246a0b6 205void drbd_request_endio(struct bio *bio)
b411b363 206{
a115413d 207 unsigned long flags;
b411b363 208 struct drbd_request *req = bio->bi_private;
84b8c06b 209 struct drbd_device *device = req->device;
a115413d 210 struct bio_and_error m;
b411b363 211 enum drbd_req_event what;
1b6dd252
PR
212
213 /* If this request was aborted locally before,
214 * but now was completed "successfully",
215 * chances are that this caused arbitrary data corruption.
216 *
217 * "aborting" requests, or force-detaching the disk, is intended for
218 * completely blocked/hung local backing devices which do no longer
219 * complete requests at all, not even do error completions. In this
220 * situation, usually a hard-reset and failover is the only way out.
221 *
222 * By "aborting", basically faking a local error-completion,
223 * we allow for a more graceful swichover by cleanly migrating services.
224 * Still the affected node has to be rebooted "soon".
225 *
226 * By completing these requests, we allow the upper layers to re-use
227 * the associated data pages.
228 *
229 * If later the local backing device "recovers", and now DMAs some data
230 * from disk into the original request pages, in the best case it will
231 * just put random data into unused pages; but typically it will corrupt
232 * meanwhile completely unrelated data, causing all sorts of damage.
233 *
234 * Which means delayed successful completion,
235 * especially for READ requests,
236 * is a reason to panic().
237 *
238 * We assume that a delayed *error* completion is OK,
239 * though we still will complain noisily about it.
240 */
241 if (unlikely(req->rq_state & RQ_LOCAL_ABORTED)) {
242 if (__ratelimit(&drbd_ratelimit_state))
d0180171 243 drbd_emerg(device, "delayed completion of aborted local request; disk-timeout may be too aggressive\n");
1b6dd252 244
4246a0b6 245 if (!bio->bi_error)
142207f7 246 drbd_panic_after_delayed_completion_of_aborted_request(device);
1b6dd252
PR
247 }
248
b411b363 249 /* to avoid recursion in __req_mod */
4246a0b6 250 if (unlikely(bio->bi_error)) {
70246286
CH
251 switch (bio_op(bio)) {
252 case REQ_OP_DISCARD:
253 if (bio->bi_error == -EOPNOTSUPP)
254 what = DISCARD_COMPLETED_NOTSUPP;
255 else
256 what = DISCARD_COMPLETED_WITH_ERROR;
257 break;
258 case REQ_OP_READ:
1eff9d32 259 if (bio->bi_opf & REQ_RAHEAD)
70246286
CH
260 what = READ_AHEAD_COMPLETED_WITH_ERROR;
261 else
262 what = READ_COMPLETED_WITH_ERROR;
263 break;
264 default:
265 what = WRITE_COMPLETED_WITH_ERROR;
266 break;
267 }
268 } else {
8554df1c 269 what = COMPLETED_OK;
70246286 270 }
b411b363
PR
271
272 bio_put(req->private_bio);
4246a0b6 273 req->private_bio = ERR_PTR(bio->bi_error);
b411b363 274
a115413d 275 /* not req_mod(), we need irqsave here! */
0500813f 276 spin_lock_irqsave(&device->resource->req_lock, flags);
a115413d 277 __req_mod(req, what, &m);
0500813f 278 spin_unlock_irqrestore(&device->resource->req_lock, flags);
b30ab791 279 put_ldev(device);
a115413d
LE
280
281 if (m.bio)
b30ab791 282 complete_master_bio(device, &m);
b411b363
PR
283}
284
9534d671 285void drbd_csum_ee(struct crypto_ahash *tfm, struct drbd_peer_request *peer_req, void *digest)
45bb912b 286{
9534d671 287 AHASH_REQUEST_ON_STACK(req, tfm);
45bb912b 288 struct scatterlist sg;
db830c46 289 struct page *page = peer_req->pages;
45bb912b
LE
290 struct page *tmp;
291 unsigned len;
292
9534d671
HX
293 ahash_request_set_tfm(req, tfm);
294 ahash_request_set_callback(req, 0, NULL, NULL);
45bb912b
LE
295
296 sg_init_table(&sg, 1);
9534d671 297 crypto_ahash_init(req);
45bb912b
LE
298
299 while ((tmp = page_chain_next(page))) {
300 /* all but the last page will be fully used */
301 sg_set_page(&sg, page, PAGE_SIZE, 0);
9534d671
HX
302 ahash_request_set_crypt(req, &sg, NULL, sg.length);
303 crypto_ahash_update(req);
45bb912b
LE
304 page = tmp;
305 }
306 /* and now the last, possibly only partially used page */
db830c46 307 len = peer_req->i.size & (PAGE_SIZE - 1);
45bb912b 308 sg_set_page(&sg, page, len ?: PAGE_SIZE, 0);
9534d671
HX
309 ahash_request_set_crypt(req, &sg, digest, sg.length);
310 crypto_ahash_finup(req);
311 ahash_request_zero(req);
45bb912b
LE
312}
313
9534d671 314void drbd_csum_bio(struct crypto_ahash *tfm, struct bio *bio, void *digest)
b411b363 315{
9534d671 316 AHASH_REQUEST_ON_STACK(req, tfm);
b411b363 317 struct scatterlist sg;
7988613b
KO
318 struct bio_vec bvec;
319 struct bvec_iter iter;
b411b363 320
9534d671
HX
321 ahash_request_set_tfm(req, tfm);
322 ahash_request_set_callback(req, 0, NULL, NULL);
b411b363
PR
323
324 sg_init_table(&sg, 1);
9534d671 325 crypto_ahash_init(req);
b411b363 326
7988613b
KO
327 bio_for_each_segment(bvec, bio, iter) {
328 sg_set_page(&sg, bvec.bv_page, bvec.bv_len, bvec.bv_offset);
9534d671
HX
329 ahash_request_set_crypt(req, &sg, NULL, sg.length);
330 crypto_ahash_update(req);
9104d31a
LE
331 /* REQ_OP_WRITE_SAME has only one segment,
332 * checksum the payload only once. */
333 if (bio_op(bio) == REQ_OP_WRITE_SAME)
334 break;
b411b363 335 }
9534d671
HX
336 ahash_request_set_crypt(req, NULL, digest, 0);
337 crypto_ahash_final(req);
338 ahash_request_zero(req);
b411b363
PR
339}
340
9676c760 341/* MAYBE merge common code with w_e_end_ov_req */
99920dc5 342static int w_e_send_csum(struct drbd_work *w, int cancel)
b411b363 343{
a8cd15ba 344 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
6780139c
AG
345 struct drbd_peer_device *peer_device = peer_req->peer_device;
346 struct drbd_device *device = peer_device->device;
b411b363
PR
347 int digest_size;
348 void *digest;
99920dc5 349 int err = 0;
b411b363 350
53ea4331
LE
351 if (unlikely(cancel))
352 goto out;
b411b363 353
9676c760 354 if (unlikely((peer_req->flags & EE_WAS_ERROR) != 0))
53ea4331 355 goto out;
b411b363 356
9534d671 357 digest_size = crypto_ahash_digestsize(peer_device->connection->csums_tfm);
53ea4331
LE
358 digest = kmalloc(digest_size, GFP_NOIO);
359 if (digest) {
db830c46
AG
360 sector_t sector = peer_req->i.sector;
361 unsigned int size = peer_req->i.size;
6780139c 362 drbd_csum_ee(peer_device->connection->csums_tfm, peer_req, digest);
9676c760 363 /* Free peer_req and pages before send.
53ea4331
LE
364 * In case we block on congestion, we could otherwise run into
365 * some distributed deadlock, if the other side blocks on
366 * congestion as well, because our receiver blocks in
c37c8ecf 367 * drbd_alloc_pages due to pp_in_use > max_buffers. */
b30ab791 368 drbd_free_peer_req(device, peer_req);
db830c46 369 peer_req = NULL;
b30ab791 370 inc_rs_pending(device);
6780139c 371 err = drbd_send_drequest_csum(peer_device, sector, size,
db1b0b72
AG
372 digest, digest_size,
373 P_CSUM_RS_REQUEST);
53ea4331
LE
374 kfree(digest);
375 } else {
d0180171 376 drbd_err(device, "kmalloc() of digest failed.\n");
99920dc5 377 err = -ENOMEM;
53ea4331 378 }
b411b363 379
53ea4331 380out:
db830c46 381 if (peer_req)
b30ab791 382 drbd_free_peer_req(device, peer_req);
b411b363 383
99920dc5 384 if (unlikely(err))
d0180171 385 drbd_err(device, "drbd_send_drequest(..., csum) failed\n");
99920dc5 386 return err;
b411b363
PR
387}
388
389#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
390
69a22773 391static int read_for_csum(struct drbd_peer_device *peer_device, sector_t sector, int size)
b411b363 392{
69a22773 393 struct drbd_device *device = peer_device->device;
db830c46 394 struct drbd_peer_request *peer_req;
b411b363 395
b30ab791 396 if (!get_ldev(device))
80a40e43 397 return -EIO;
b411b363
PR
398
399 /* GFP_TRY, because if there is no memory available right now, this may
400 * be rescheduled for later. It is "only" background resync, after all. */
69a22773 401 peer_req = drbd_alloc_peer_req(peer_device, ID_SYNCER /* unused */, sector,
9104d31a 402 size, size, GFP_TRY);
db830c46 403 if (!peer_req)
80a40e43 404 goto defer;
b411b363 405
a8cd15ba 406 peer_req->w.cb = w_e_send_csum;
0500813f 407 spin_lock_irq(&device->resource->req_lock);
b9ed7080 408 list_add_tail(&peer_req->w.list, &device->read_ee);
0500813f 409 spin_unlock_irq(&device->resource->req_lock);
b411b363 410
b30ab791 411 atomic_add(size >> 9, &device->rs_sect_ev);
bb3cc85e
MC
412 if (drbd_submit_peer_request(device, peer_req, REQ_OP_READ, 0,
413 DRBD_FAULT_RS_RD) == 0)
80a40e43 414 return 0;
b411b363 415
10f6d992
LE
416 /* If it failed because of ENOMEM, retry should help. If it failed
417 * because bio_add_page failed (probably broken lower level driver),
418 * retry may or may not help.
419 * If it does not, you may need to force disconnect. */
0500813f 420 spin_lock_irq(&device->resource->req_lock);
a8cd15ba 421 list_del(&peer_req->w.list);
0500813f 422 spin_unlock_irq(&device->resource->req_lock);
22cc37a9 423
b30ab791 424 drbd_free_peer_req(device, peer_req);
80a40e43 425defer:
b30ab791 426 put_ldev(device);
80a40e43 427 return -EAGAIN;
b411b363
PR
428}
429
99920dc5 430int w_resync_timer(struct drbd_work *w, int cancel)
b411b363 431{
84b8c06b
AG
432 struct drbd_device *device =
433 container_of(w, struct drbd_device, resync_work);
434
b30ab791 435 switch (device->state.conn) {
63106d3c 436 case C_VERIFY_S:
d448a2e1 437 make_ov_request(device, cancel);
63106d3c
PR
438 break;
439 case C_SYNC_TARGET:
d448a2e1 440 make_resync_request(device, cancel);
63106d3c 441 break;
b411b363
PR
442 }
443
99920dc5 444 return 0;
794abb75
PR
445}
446
447void resync_timer_fn(unsigned long data)
448{
b30ab791 449 struct drbd_device *device = (struct drbd_device *) data;
794abb75 450
15e26f6a
LE
451 drbd_queue_work_if_unqueued(
452 &first_peer_device(device)->connection->sender_work,
453 &device->resync_work);
b411b363
PR
454}
455
778f271d
PR
456static void fifo_set(struct fifo_buffer *fb, int value)
457{
458 int i;
459
460 for (i = 0; i < fb->size; i++)
f10f2623 461 fb->values[i] = value;
778f271d
PR
462}
463
464static int fifo_push(struct fifo_buffer *fb, int value)
465{
466 int ov;
467
468 ov = fb->values[fb->head_index];
469 fb->values[fb->head_index++] = value;
470
471 if (fb->head_index >= fb->size)
472 fb->head_index = 0;
473
474 return ov;
475}
476
477static void fifo_add_val(struct fifo_buffer *fb, int value)
478{
479 int i;
480
481 for (i = 0; i < fb->size; i++)
482 fb->values[i] += value;
483}
484
9958c857
PR
485struct fifo_buffer *fifo_alloc(int fifo_size)
486{
487 struct fifo_buffer *fb;
488
8747d30a 489 fb = kzalloc(sizeof(struct fifo_buffer) + sizeof(int) * fifo_size, GFP_NOIO);
9958c857
PR
490 if (!fb)
491 return NULL;
492
493 fb->head_index = 0;
494 fb->size = fifo_size;
495 fb->total = 0;
496
497 return fb;
498}
499
0e49d7b0 500static int drbd_rs_controller(struct drbd_device *device, unsigned int sect_in)
778f271d 501{
daeda1cc 502 struct disk_conf *dc;
7f34f614 503 unsigned int want; /* The number of sectors we want in-flight */
778f271d 504 int req_sect; /* Number of sectors to request in this turn */
7f34f614 505 int correction; /* Number of sectors more we need in-flight */
778f271d
PR
506 int cps; /* correction per invocation of drbd_rs_controller() */
507 int steps; /* Number of time steps to plan ahead */
508 int curr_corr;
509 int max_sect;
813472ce 510 struct fifo_buffer *plan;
778f271d 511
b30ab791
AG
512 dc = rcu_dereference(device->ldev->disk_conf);
513 plan = rcu_dereference(device->rs_plan_s);
778f271d 514
813472ce 515 steps = plan->size; /* (dc->c_plan_ahead * 10 * SLEEP_TIME) / HZ; */
778f271d 516
b30ab791 517 if (device->rs_in_flight + sect_in == 0) { /* At start of resync */
daeda1cc 518 want = ((dc->resync_rate * 2 * SLEEP_TIME) / HZ) * steps;
778f271d 519 } else { /* normal path */
daeda1cc
PR
520 want = dc->c_fill_target ? dc->c_fill_target :
521 sect_in * dc->c_delay_target * HZ / (SLEEP_TIME * 10);
778f271d
PR
522 }
523
b30ab791 524 correction = want - device->rs_in_flight - plan->total;
778f271d
PR
525
526 /* Plan ahead */
527 cps = correction / steps;
813472ce
PR
528 fifo_add_val(plan, cps);
529 plan->total += cps * steps;
778f271d
PR
530
531 /* What we do in this step */
813472ce
PR
532 curr_corr = fifo_push(plan, 0);
533 plan->total -= curr_corr;
778f271d
PR
534
535 req_sect = sect_in + curr_corr;
536 if (req_sect < 0)
537 req_sect = 0;
538
daeda1cc 539 max_sect = (dc->c_max_rate * 2 * SLEEP_TIME) / HZ;
778f271d
PR
540 if (req_sect > max_sect)
541 req_sect = max_sect;
542
543 /*
d0180171 544 drbd_warn(device, "si=%u if=%d wa=%u co=%d st=%d cps=%d pl=%d cc=%d rs=%d\n",
b30ab791
AG
545 sect_in, device->rs_in_flight, want, correction,
546 steps, cps, device->rs_planed, curr_corr, req_sect);
778f271d
PR
547 */
548
549 return req_sect;
550}
551
b30ab791 552static int drbd_rs_number_requests(struct drbd_device *device)
e65f440d 553{
0e49d7b0
LE
554 unsigned int sect_in; /* Number of sectors that came in since the last turn */
555 int number, mxb;
556
557 sect_in = atomic_xchg(&device->rs_sect_in, 0);
558 device->rs_in_flight -= sect_in;
813472ce
PR
559
560 rcu_read_lock();
0e49d7b0 561 mxb = drbd_get_max_buffers(device) / 2;
b30ab791 562 if (rcu_dereference(device->rs_plan_s)->size) {
0e49d7b0 563 number = drbd_rs_controller(device, sect_in) >> (BM_BLOCK_SHIFT - 9);
b30ab791 564 device->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME;
e65f440d 565 } else {
b30ab791
AG
566 device->c_sync_rate = rcu_dereference(device->ldev->disk_conf)->resync_rate;
567 number = SLEEP_TIME * device->c_sync_rate / ((BM_BLOCK_SIZE / 1024) * HZ);
e65f440d 568 }
813472ce 569 rcu_read_unlock();
e65f440d 570
0e49d7b0
LE
571 /* Don't have more than "max-buffers"/2 in-flight.
572 * Otherwise we may cause the remote site to stall on drbd_alloc_pages(),
573 * potentially causing a distributed deadlock on congestion during
574 * online-verify or (checksum-based) resync, if max-buffers,
575 * socket buffer sizes and resync rate settings are mis-configured. */
7f34f614
LE
576
577 /* note that "number" is in units of "BM_BLOCK_SIZE" (which is 4k),
578 * mxb (as used here, and in drbd_alloc_pages on the peer) is
579 * "number of pages" (typically also 4k),
580 * but "rs_in_flight" is in "sectors" (512 Byte). */
581 if (mxb - device->rs_in_flight/8 < number)
582 number = mxb - device->rs_in_flight/8;
0e49d7b0 583
e65f440d
LE
584 return number;
585}
586
44a4d551 587static int make_resync_request(struct drbd_device *const device, int cancel)
b411b363 588{
44a4d551
LE
589 struct drbd_peer_device *const peer_device = first_peer_device(device);
590 struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL;
b411b363
PR
591 unsigned long bit;
592 sector_t sector;
b30ab791 593 const sector_t capacity = drbd_get_capacity(device->this_bdev);
1816a2b4 594 int max_bio_size;
e65f440d 595 int number, rollback_i, size;
506afb62 596 int align, requeue = 0;
0f0601f4 597 int i = 0;
92d94ae6 598 int discard_granularity = 0;
b411b363
PR
599
600 if (unlikely(cancel))
99920dc5 601 return 0;
b411b363 602
b30ab791 603 if (device->rs_total == 0) {
af85e8e8 604 /* empty resync? */
b30ab791 605 drbd_resync_finished(device);
99920dc5 606 return 0;
af85e8e8
LE
607 }
608
b30ab791
AG
609 if (!get_ldev(device)) {
610 /* Since we only need to access device->rsync a
611 get_ldev_if_state(device,D_FAILED) would be sufficient, but
b411b363
PR
612 to continue resync with a broken disk makes no sense at
613 all */
d0180171 614 drbd_err(device, "Disk broke down during resync!\n");
99920dc5 615 return 0;
b411b363
PR
616 }
617
9104d31a 618 if (connection->agreed_features & DRBD_FF_THIN_RESYNC) {
92d94ae6
PR
619 rcu_read_lock();
620 discard_granularity = rcu_dereference(device->ldev->disk_conf)->rs_discard_granularity;
621 rcu_read_unlock();
622 }
623
b30ab791
AG
624 max_bio_size = queue_max_hw_sectors(device->rq_queue) << 9;
625 number = drbd_rs_number_requests(device);
0e49d7b0 626 if (number <= 0)
0f0601f4 627 goto requeue;
b411b363 628
b411b363 629 for (i = 0; i < number; i++) {
506afb62
LE
630 /* Stop generating RS requests when half of the send buffer is filled,
631 * but notify TCP that we'd like to have more space. */
44a4d551
LE
632 mutex_lock(&connection->data.mutex);
633 if (connection->data.socket) {
506afb62
LE
634 struct sock *sk = connection->data.socket->sk;
635 int queued = sk->sk_wmem_queued;
636 int sndbuf = sk->sk_sndbuf;
637 if (queued > sndbuf / 2) {
638 requeue = 1;
639 if (sk->sk_socket)
640 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
641 }
642 } else
643 requeue = 1;
44a4d551 644 mutex_unlock(&connection->data.mutex);
506afb62 645 if (requeue)
b411b363
PR
646 goto requeue;
647
648next_sector:
649 size = BM_BLOCK_SIZE;
b30ab791 650 bit = drbd_bm_find_next(device, device->bm_resync_fo);
b411b363 651
4b0715f0 652 if (bit == DRBD_END_OF_BITMAP) {
b30ab791
AG
653 device->bm_resync_fo = drbd_bm_bits(device);
654 put_ldev(device);
99920dc5 655 return 0;
b411b363
PR
656 }
657
658 sector = BM_BIT_TO_SECT(bit);
659
ad3fee79 660 if (drbd_try_rs_begin_io(device, sector)) {
b30ab791 661 device->bm_resync_fo = bit;
b411b363
PR
662 goto requeue;
663 }
b30ab791 664 device->bm_resync_fo = bit + 1;
b411b363 665
b30ab791
AG
666 if (unlikely(drbd_bm_test_bit(device, bit) == 0)) {
667 drbd_rs_complete_io(device, sector);
b411b363
PR
668 goto next_sector;
669 }
670
1816a2b4 671#if DRBD_MAX_BIO_SIZE > BM_BLOCK_SIZE
b411b363
PR
672 /* try to find some adjacent bits.
673 * we stop if we have already the maximum req size.
674 *
675 * Additionally always align bigger requests, in order to
676 * be prepared for all stripe sizes of software RAIDs.
b411b363
PR
677 */
678 align = 1;
d207450c 679 rollback_i = i;
6377b923 680 while (i < number) {
1816a2b4 681 if (size + BM_BLOCK_SIZE > max_bio_size)
b411b363
PR
682 break;
683
684 /* Be always aligned */
685 if (sector & ((1<<(align+3))-1))
686 break;
687
92d94ae6
PR
688 if (discard_granularity && size == discard_granularity)
689 break;
690
b411b363
PR
691 /* do not cross extent boundaries */
692 if (((bit+1) & BM_BLOCKS_PER_BM_EXT_MASK) == 0)
693 break;
694 /* now, is it actually dirty, after all?
695 * caution, drbd_bm_test_bit is tri-state for some
696 * obscure reason; ( b == 0 ) would get the out-of-band
697 * only accidentally right because of the "oddly sized"
698 * adjustment below */
b30ab791 699 if (drbd_bm_test_bit(device, bit+1) != 1)
b411b363
PR
700 break;
701 bit++;
702 size += BM_BLOCK_SIZE;
703 if ((BM_BLOCK_SIZE << align) <= size)
704 align++;
705 i++;
706 }
707 /* if we merged some,
708 * reset the offset to start the next drbd_bm_find_next from */
709 if (size > BM_BLOCK_SIZE)
b30ab791 710 device->bm_resync_fo = bit + 1;
b411b363
PR
711#endif
712
713 /* adjust very last sectors, in case we are oddly sized */
714 if (sector + (size>>9) > capacity)
715 size = (capacity-sector)<<9;
aaaba345
LE
716
717 if (device->use_csums) {
44a4d551 718 switch (read_for_csum(peer_device, sector, size)) {
80a40e43 719 case -EIO: /* Disk failure */
b30ab791 720 put_ldev(device);
99920dc5 721 return -EIO;
80a40e43 722 case -EAGAIN: /* allocation failed, or ldev busy */
b30ab791
AG
723 drbd_rs_complete_io(device, sector);
724 device->bm_resync_fo = BM_SECT_TO_BIT(sector);
d207450c 725 i = rollback_i;
b411b363 726 goto requeue;
80a40e43
LE
727 case 0:
728 /* everything ok */
729 break;
730 default:
731 BUG();
b411b363
PR
732 }
733 } else {
99920dc5
AG
734 int err;
735
b30ab791 736 inc_rs_pending(device);
92d94ae6
PR
737 err = drbd_send_drequest(peer_device,
738 size == discard_granularity ? P_RS_THIN_REQ : P_RS_DATA_REQUEST,
99920dc5
AG
739 sector, size, ID_SYNCER);
740 if (err) {
d0180171 741 drbd_err(device, "drbd_send_drequest() failed, aborting...\n");
b30ab791
AG
742 dec_rs_pending(device);
743 put_ldev(device);
99920dc5 744 return err;
b411b363
PR
745 }
746 }
747 }
748
b30ab791 749 if (device->bm_resync_fo >= drbd_bm_bits(device)) {
b411b363
PR
750 /* last syncer _request_ was sent,
751 * but the P_RS_DATA_REPLY not yet received. sync will end (and
752 * next sync group will resume), as soon as we receive the last
753 * resync data block, and the last bit is cleared.
754 * until then resync "work" is "inactive" ...
755 */
b30ab791 756 put_ldev(device);
99920dc5 757 return 0;
b411b363
PR
758 }
759
760 requeue:
b30ab791
AG
761 device->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
762 mod_timer(&device->resync_timer, jiffies + SLEEP_TIME);
763 put_ldev(device);
99920dc5 764 return 0;
b411b363
PR
765}
766
d448a2e1 767static int make_ov_request(struct drbd_device *device, int cancel)
b411b363
PR
768{
769 int number, i, size;
770 sector_t sector;
b30ab791 771 const sector_t capacity = drbd_get_capacity(device->this_bdev);
58ffa580 772 bool stop_sector_reached = false;
b411b363
PR
773
774 if (unlikely(cancel))
775 return 1;
776
b30ab791 777 number = drbd_rs_number_requests(device);
b411b363 778
b30ab791 779 sector = device->ov_position;
b411b363 780 for (i = 0; i < number; i++) {
58ffa580 781 if (sector >= capacity)
b411b363 782 return 1;
58ffa580
LE
783
784 /* We check for "finished" only in the reply path:
785 * w_e_end_ov_reply().
786 * We need to send at least one request out. */
787 stop_sector_reached = i > 0
b30ab791
AG
788 && verify_can_do_stop_sector(device)
789 && sector >= device->ov_stop_sector;
58ffa580
LE
790 if (stop_sector_reached)
791 break;
b411b363
PR
792
793 size = BM_BLOCK_SIZE;
794
ad3fee79 795 if (drbd_try_rs_begin_io(device, sector)) {
b30ab791 796 device->ov_position = sector;
b411b363
PR
797 goto requeue;
798 }
799
800 if (sector + (size>>9) > capacity)
801 size = (capacity-sector)<<9;
802
b30ab791 803 inc_rs_pending(device);
69a22773 804 if (drbd_send_ov_request(first_peer_device(device), sector, size)) {
b30ab791 805 dec_rs_pending(device);
b411b363
PR
806 return 0;
807 }
808 sector += BM_SECT_PER_BIT;
809 }
b30ab791 810 device->ov_position = sector;
b411b363
PR
811
812 requeue:
b30ab791 813 device->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
58ffa580 814 if (i == 0 || !stop_sector_reached)
b30ab791 815 mod_timer(&device->resync_timer, jiffies + SLEEP_TIME);
b411b363
PR
816 return 1;
817}
818
99920dc5 819int w_ov_finished(struct drbd_work *w, int cancel)
b411b363 820{
84b8c06b
AG
821 struct drbd_device_work *dw =
822 container_of(w, struct drbd_device_work, w);
823 struct drbd_device *device = dw->device;
824 kfree(dw);
b30ab791
AG
825 ov_out_of_sync_print(device);
826 drbd_resync_finished(device);
b411b363 827
99920dc5 828 return 0;
b411b363
PR
829}
830
99920dc5 831static int w_resync_finished(struct drbd_work *w, int cancel)
b411b363 832{
84b8c06b
AG
833 struct drbd_device_work *dw =
834 container_of(w, struct drbd_device_work, w);
835 struct drbd_device *device = dw->device;
836 kfree(dw);
b411b363 837
b30ab791 838 drbd_resync_finished(device);
b411b363 839
99920dc5 840 return 0;
b411b363
PR
841}
842
b30ab791 843static void ping_peer(struct drbd_device *device)
af85e8e8 844{
a6b32bc3 845 struct drbd_connection *connection = first_peer_device(device)->connection;
2a67d8b9 846
bde89a9e
AG
847 clear_bit(GOT_PING_ACK, &connection->flags);
848 request_ping(connection);
849 wait_event(connection->ping_wait,
850 test_bit(GOT_PING_ACK, &connection->flags) || device->state.conn < C_CONNECTED);
af85e8e8
LE
851}
852
b30ab791 853int drbd_resync_finished(struct drbd_device *device)
b411b363 854{
26a96110 855 struct drbd_connection *connection = first_peer_device(device)->connection;
b411b363
PR
856 unsigned long db, dt, dbdt;
857 unsigned long n_oos;
858 union drbd_state os, ns;
84b8c06b 859 struct drbd_device_work *dw;
b411b363 860 char *khelper_cmd = NULL;
26525618 861 int verify_done = 0;
b411b363
PR
862
863 /* Remove all elements from the resync LRU. Since future actions
864 * might set bits in the (main) bitmap, then the entries in the
865 * resync LRU would be wrong. */
b30ab791 866 if (drbd_rs_del_all(device)) {
b411b363
PR
867 /* In case this is not possible now, most probably because
868 * there are P_RS_DATA_REPLY Packets lingering on the worker's
869 * queue (or even the read operations for those packets
870 * is not finished by now). Retry in 100ms. */
871
20ee6390 872 schedule_timeout_interruptible(HZ / 10);
84b8c06b
AG
873 dw = kmalloc(sizeof(struct drbd_device_work), GFP_ATOMIC);
874 if (dw) {
875 dw->w.cb = w_resync_finished;
876 dw->device = device;
26a96110 877 drbd_queue_work(&connection->sender_work, &dw->w);
b411b363
PR
878 return 1;
879 }
84b8c06b 880 drbd_err(device, "Warn failed to drbd_rs_del_all() and to kmalloc(dw).\n");
b411b363
PR
881 }
882
b30ab791 883 dt = (jiffies - device->rs_start - device->rs_paused) / HZ;
b411b363
PR
884 if (dt <= 0)
885 dt = 1;
84b8c06b 886
b30ab791 887 db = device->rs_total;
58ffa580 888 /* adjust for verify start and stop sectors, respective reached position */
b30ab791
AG
889 if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T)
890 db -= device->ov_left;
58ffa580 891
b411b363 892 dbdt = Bit2KB(db/dt);
b30ab791 893 device->rs_paused /= HZ;
b411b363 894
b30ab791 895 if (!get_ldev(device))
b411b363
PR
896 goto out;
897
b30ab791 898 ping_peer(device);
af85e8e8 899
0500813f 900 spin_lock_irq(&device->resource->req_lock);
b30ab791 901 os = drbd_read_state(device);
b411b363 902
26525618
LE
903 verify_done = (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T);
904
b411b363
PR
905 /* This protects us against multiple calls (that can happen in the presence
906 of application IO), and against connectivity loss just before we arrive here. */
907 if (os.conn <= C_CONNECTED)
908 goto out_unlock;
909
910 ns = os;
911 ns.conn = C_CONNECTED;
912
d0180171 913 drbd_info(device, "%s done (total %lu sec; paused %lu sec; %lu K/sec)\n",
58ffa580 914 verify_done ? "Online verify" : "Resync",
b30ab791 915 dt + device->rs_paused, device->rs_paused, dbdt);
b411b363 916
b30ab791 917 n_oos = drbd_bm_total_weight(device);
b411b363
PR
918
919 if (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) {
920 if (n_oos) {
d0180171 921 drbd_alert(device, "Online verify found %lu %dk block out of sync!\n",
b411b363
PR
922 n_oos, Bit2KB(1));
923 khelper_cmd = "out-of-sync";
924 }
925 } else {
0b0ba1ef 926 D_ASSERT(device, (n_oos - device->rs_failed) == 0);
b411b363
PR
927
928 if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T)
929 khelper_cmd = "after-resync-target";
930
aaaba345 931 if (device->use_csums && device->rs_total) {
b30ab791
AG
932 const unsigned long s = device->rs_same_csum;
933 const unsigned long t = device->rs_total;
b411b363
PR
934 const int ratio =
935 (t == 0) ? 0 :
936 (t < 100000) ? ((s*100)/t) : (s/(t/100));
d0180171 937 drbd_info(device, "%u %% had equal checksums, eliminated: %luK; "
b411b363
PR
938 "transferred %luK total %luK\n",
939 ratio,
b30ab791
AG
940 Bit2KB(device->rs_same_csum),
941 Bit2KB(device->rs_total - device->rs_same_csum),
942 Bit2KB(device->rs_total));
b411b363
PR
943 }
944 }
945
b30ab791 946 if (device->rs_failed) {
d0180171 947 drbd_info(device, " %lu failed blocks\n", device->rs_failed);
b411b363
PR
948
949 if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) {
950 ns.disk = D_INCONSISTENT;
951 ns.pdsk = D_UP_TO_DATE;
952 } else {
953 ns.disk = D_UP_TO_DATE;
954 ns.pdsk = D_INCONSISTENT;
955 }
956 } else {
957 ns.disk = D_UP_TO_DATE;
958 ns.pdsk = D_UP_TO_DATE;
959
960 if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) {
b30ab791 961 if (device->p_uuid) {
b411b363
PR
962 int i;
963 for (i = UI_BITMAP ; i <= UI_HISTORY_END ; i++)
b30ab791
AG
964 _drbd_uuid_set(device, i, device->p_uuid[i]);
965 drbd_uuid_set(device, UI_BITMAP, device->ldev->md.uuid[UI_CURRENT]);
966 _drbd_uuid_set(device, UI_CURRENT, device->p_uuid[UI_CURRENT]);
b411b363 967 } else {
d0180171 968 drbd_err(device, "device->p_uuid is NULL! BUG\n");
b411b363
PR
969 }
970 }
971
62b0da3a
LE
972 if (!(os.conn == C_VERIFY_S || os.conn == C_VERIFY_T)) {
973 /* for verify runs, we don't update uuids here,
974 * so there would be nothing to report. */
b30ab791
AG
975 drbd_uuid_set_bm(device, 0UL);
976 drbd_print_uuids(device, "updated UUIDs");
977 if (device->p_uuid) {
62b0da3a
LE
978 /* Now the two UUID sets are equal, update what we
979 * know of the peer. */
980 int i;
981 for (i = UI_CURRENT ; i <= UI_HISTORY_END ; i++)
b30ab791 982 device->p_uuid[i] = device->ldev->md.uuid[i];
62b0da3a 983 }
b411b363
PR
984 }
985 }
986
b30ab791 987 _drbd_set_state(device, ns, CS_VERBOSE, NULL);
b411b363 988out_unlock:
0500813f 989 spin_unlock_irq(&device->resource->req_lock);
26a96110
LE
990
991 /* If we have been sync source, and have an effective fencing-policy,
992 * once *all* volumes are back in sync, call "unfence". */
993 if (os.conn == C_SYNC_SOURCE) {
994 enum drbd_disk_state disk_state = D_MASK;
995 enum drbd_disk_state pdsk_state = D_MASK;
996 enum drbd_fencing_p fp = FP_DONT_CARE;
997
998 rcu_read_lock();
999 fp = rcu_dereference(device->ldev->disk_conf)->fencing;
1000 if (fp != FP_DONT_CARE) {
1001 struct drbd_peer_device *peer_device;
1002 int vnr;
1003 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1004 struct drbd_device *device = peer_device->device;
1005 disk_state = min_t(enum drbd_disk_state, disk_state, device->state.disk);
1006 pdsk_state = min_t(enum drbd_disk_state, pdsk_state, device->state.pdsk);
1007 }
1008 }
1009 rcu_read_unlock();
1010 if (disk_state == D_UP_TO_DATE && pdsk_state == D_UP_TO_DATE)
1011 conn_khelper(connection, "unfence-peer");
1012 }
1013
b30ab791 1014 put_ldev(device);
b411b363 1015out:
b30ab791
AG
1016 device->rs_total = 0;
1017 device->rs_failed = 0;
1018 device->rs_paused = 0;
58ffa580
LE
1019
1020 /* reset start sector, if we reached end of device */
b30ab791
AG
1021 if (verify_done && device->ov_left == 0)
1022 device->ov_start_sector = 0;
b411b363 1023
b30ab791 1024 drbd_md_sync(device);
13d42685 1025
b411b363 1026 if (khelper_cmd)
b30ab791 1027 drbd_khelper(device, khelper_cmd);
b411b363
PR
1028
1029 return 1;
1030}
1031
1032/* helper */
b30ab791 1033static void move_to_net_ee_or_free(struct drbd_device *device, struct drbd_peer_request *peer_req)
b411b363 1034{
045417f7 1035 if (drbd_peer_req_has_active_page(peer_req)) {
b411b363 1036 /* This might happen if sendpage() has not finished */
db830c46 1037 int i = (peer_req->i.size + PAGE_SIZE -1) >> PAGE_SHIFT;
b30ab791
AG
1038 atomic_add(i, &device->pp_in_use_by_net);
1039 atomic_sub(i, &device->pp_in_use);
0500813f 1040 spin_lock_irq(&device->resource->req_lock);
a8cd15ba 1041 list_add_tail(&peer_req->w.list, &device->net_ee);
0500813f 1042 spin_unlock_irq(&device->resource->req_lock);
435f0740 1043 wake_up(&drbd_pp_wait);
b411b363 1044 } else
b30ab791 1045 drbd_free_peer_req(device, peer_req);
b411b363
PR
1046}
1047
1048/**
1049 * w_e_end_data_req() - Worker callback, to send a P_DATA_REPLY packet in response to a P_DATA_REQUEST
b411b363
PR
1050 * @w: work object.
1051 * @cancel: The connection will be closed anyways
1052 */
99920dc5 1053int w_e_end_data_req(struct drbd_work *w, int cancel)
b411b363 1054{
a8cd15ba 1055 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
6780139c
AG
1056 struct drbd_peer_device *peer_device = peer_req->peer_device;
1057 struct drbd_device *device = peer_device->device;
99920dc5 1058 int err;
b411b363
PR
1059
1060 if (unlikely(cancel)) {
b30ab791
AG
1061 drbd_free_peer_req(device, peer_req);
1062 dec_unacked(device);
99920dc5 1063 return 0;
b411b363
PR
1064 }
1065
db830c46 1066 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
6780139c 1067 err = drbd_send_block(peer_device, P_DATA_REPLY, peer_req);
b411b363
PR
1068 } else {
1069 if (__ratelimit(&drbd_ratelimit_state))
d0180171 1070 drbd_err(device, "Sending NegDReply. sector=%llus.\n",
db830c46 1071 (unsigned long long)peer_req->i.sector);
b411b363 1072
6780139c 1073 err = drbd_send_ack(peer_device, P_NEG_DREPLY, peer_req);
b411b363
PR
1074 }
1075
b30ab791 1076 dec_unacked(device);
b411b363 1077
b30ab791 1078 move_to_net_ee_or_free(device, peer_req);
b411b363 1079
99920dc5 1080 if (unlikely(err))
d0180171 1081 drbd_err(device, "drbd_send_block() failed\n");
99920dc5 1082 return err;
b411b363
PR
1083}
1084
700ca8c0
PR
1085static bool all_zero(struct drbd_peer_request *peer_req)
1086{
1087 struct page *page = peer_req->pages;
1088 unsigned int len = peer_req->i.size;
1089
1090 page_chain_for_each(page) {
1091 unsigned int l = min_t(unsigned int, len, PAGE_SIZE);
1092 unsigned int i, words = l / sizeof(long);
1093 unsigned long *d;
1094
1095 d = kmap_atomic(page);
1096 for (i = 0; i < words; i++) {
1097 if (d[i]) {
1098 kunmap_atomic(d);
1099 return false;
1100 }
1101 }
1102 kunmap_atomic(d);
1103 len -= l;
1104 }
1105
1106 return true;
1107}
1108
b411b363 1109/**
a209b4ae 1110 * w_e_end_rsdata_req() - Worker callback to send a P_RS_DATA_REPLY packet in response to a P_RS_DATA_REQUEST
b411b363
PR
1111 * @w: work object.
1112 * @cancel: The connection will be closed anyways
1113 */
99920dc5 1114int w_e_end_rsdata_req(struct drbd_work *w, int cancel)
b411b363 1115{
a8cd15ba 1116 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
6780139c
AG
1117 struct drbd_peer_device *peer_device = peer_req->peer_device;
1118 struct drbd_device *device = peer_device->device;
99920dc5 1119 int err;
b411b363
PR
1120
1121 if (unlikely(cancel)) {
b30ab791
AG
1122 drbd_free_peer_req(device, peer_req);
1123 dec_unacked(device);
99920dc5 1124 return 0;
b411b363
PR
1125 }
1126
b30ab791
AG
1127 if (get_ldev_if_state(device, D_FAILED)) {
1128 drbd_rs_complete_io(device, peer_req->i.sector);
1129 put_ldev(device);
b411b363
PR
1130 }
1131
b30ab791 1132 if (device->state.conn == C_AHEAD) {
6780139c 1133 err = drbd_send_ack(peer_device, P_RS_CANCEL, peer_req);
db830c46 1134 } else if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
b30ab791
AG
1135 if (likely(device->state.pdsk >= D_INCONSISTENT)) {
1136 inc_rs_pending(device);
700ca8c0
PR
1137 if (peer_req->flags & EE_RS_THIN_REQ && all_zero(peer_req))
1138 err = drbd_send_rs_deallocated(peer_device, peer_req);
1139 else
1140 err = drbd_send_block(peer_device, P_RS_DATA_REPLY, peer_req);
b411b363
PR
1141 } else {
1142 if (__ratelimit(&drbd_ratelimit_state))
d0180171 1143 drbd_err(device, "Not sending RSDataReply, "
b411b363 1144 "partner DISKLESS!\n");
99920dc5 1145 err = 0;
b411b363
PR
1146 }
1147 } else {
1148 if (__ratelimit(&drbd_ratelimit_state))
d0180171 1149 drbd_err(device, "Sending NegRSDReply. sector %llus.\n",
db830c46 1150 (unsigned long long)peer_req->i.sector);
b411b363 1151
6780139c 1152 err = drbd_send_ack(peer_device, P_NEG_RS_DREPLY, peer_req);
b411b363
PR
1153
1154 /* update resync data with failure */
b30ab791 1155 drbd_rs_failed_io(device, peer_req->i.sector, peer_req->i.size);
b411b363
PR
1156 }
1157
b30ab791 1158 dec_unacked(device);
b411b363 1159
b30ab791 1160 move_to_net_ee_or_free(device, peer_req);
b411b363 1161
99920dc5 1162 if (unlikely(err))
d0180171 1163 drbd_err(device, "drbd_send_block() failed\n");
99920dc5 1164 return err;
b411b363
PR
1165}
1166
99920dc5 1167int w_e_end_csum_rs_req(struct drbd_work *w, int cancel)
b411b363 1168{
a8cd15ba 1169 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
6780139c
AG
1170 struct drbd_peer_device *peer_device = peer_req->peer_device;
1171 struct drbd_device *device = peer_device->device;
b411b363
PR
1172 struct digest_info *di;
1173 int digest_size;
1174 void *digest = NULL;
99920dc5 1175 int err, eq = 0;
b411b363
PR
1176
1177 if (unlikely(cancel)) {
b30ab791
AG
1178 drbd_free_peer_req(device, peer_req);
1179 dec_unacked(device);
99920dc5 1180 return 0;
b411b363
PR
1181 }
1182
b30ab791
AG
1183 if (get_ldev(device)) {
1184 drbd_rs_complete_io(device, peer_req->i.sector);
1185 put_ldev(device);
1d53f09e 1186 }
b411b363 1187
db830c46 1188 di = peer_req->digest;
b411b363 1189
db830c46 1190 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
b411b363
PR
1191 /* quick hack to try to avoid a race against reconfiguration.
1192 * a real fix would be much more involved,
1193 * introducing more locking mechanisms */
6780139c 1194 if (peer_device->connection->csums_tfm) {
9534d671 1195 digest_size = crypto_ahash_digestsize(peer_device->connection->csums_tfm);
0b0ba1ef 1196 D_ASSERT(device, digest_size == di->digest_size);
b411b363
PR
1197 digest = kmalloc(digest_size, GFP_NOIO);
1198 }
1199 if (digest) {
6780139c 1200 drbd_csum_ee(peer_device->connection->csums_tfm, peer_req, digest);
b411b363
PR
1201 eq = !memcmp(digest, di->digest, digest_size);
1202 kfree(digest);
1203 }
1204
1205 if (eq) {
b30ab791 1206 drbd_set_in_sync(device, peer_req->i.sector, peer_req->i.size);
676396d5 1207 /* rs_same_csums unit is BM_BLOCK_SIZE */
b30ab791 1208 device->rs_same_csum += peer_req->i.size >> BM_BLOCK_SHIFT;
6780139c 1209 err = drbd_send_ack(peer_device, P_RS_IS_IN_SYNC, peer_req);
b411b363 1210 } else {
b30ab791 1211 inc_rs_pending(device);
db830c46
AG
1212 peer_req->block_id = ID_SYNCER; /* By setting block_id, digest pointer becomes invalid! */
1213 peer_req->flags &= ~EE_HAS_DIGEST; /* This peer request no longer has a digest pointer */
204bba99 1214 kfree(di);
6780139c 1215 err = drbd_send_block(peer_device, P_RS_DATA_REPLY, peer_req);
b411b363
PR
1216 }
1217 } else {
6780139c 1218 err = drbd_send_ack(peer_device, P_NEG_RS_DREPLY, peer_req);
b411b363 1219 if (__ratelimit(&drbd_ratelimit_state))
d0180171 1220 drbd_err(device, "Sending NegDReply. I guess it gets messy.\n");
b411b363
PR
1221 }
1222
b30ab791
AG
1223 dec_unacked(device);
1224 move_to_net_ee_or_free(device, peer_req);
b411b363 1225
99920dc5 1226 if (unlikely(err))
d0180171 1227 drbd_err(device, "drbd_send_block/ack() failed\n");
99920dc5 1228 return err;
b411b363
PR
1229}
1230
99920dc5 1231int w_e_end_ov_req(struct drbd_work *w, int cancel)
b411b363 1232{
a8cd15ba 1233 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
6780139c
AG
1234 struct drbd_peer_device *peer_device = peer_req->peer_device;
1235 struct drbd_device *device = peer_device->device;
db830c46
AG
1236 sector_t sector = peer_req->i.sector;
1237 unsigned int size = peer_req->i.size;
b411b363
PR
1238 int digest_size;
1239 void *digest;
99920dc5 1240 int err = 0;
b411b363
PR
1241
1242 if (unlikely(cancel))
1243 goto out;
1244
9534d671 1245 digest_size = crypto_ahash_digestsize(peer_device->connection->verify_tfm);
b411b363 1246 digest = kmalloc(digest_size, GFP_NOIO);
8f21420e 1247 if (!digest) {
99920dc5 1248 err = 1; /* terminate the connection in case the allocation failed */
8f21420e 1249 goto out;
b411b363
PR
1250 }
1251
db830c46 1252 if (likely(!(peer_req->flags & EE_WAS_ERROR)))
6780139c 1253 drbd_csum_ee(peer_device->connection->verify_tfm, peer_req, digest);
8f21420e
PR
1254 else
1255 memset(digest, 0, digest_size);
1256
53ea4331
LE
1257 /* Free e and pages before send.
1258 * In case we block on congestion, we could otherwise run into
1259 * some distributed deadlock, if the other side blocks on
1260 * congestion as well, because our receiver blocks in
c37c8ecf 1261 * drbd_alloc_pages due to pp_in_use > max_buffers. */
b30ab791 1262 drbd_free_peer_req(device, peer_req);
db830c46 1263 peer_req = NULL;
b30ab791 1264 inc_rs_pending(device);
6780139c 1265 err = drbd_send_drequest_csum(peer_device, sector, size, digest, digest_size, P_OV_REPLY);
99920dc5 1266 if (err)
b30ab791 1267 dec_rs_pending(device);
8f21420e
PR
1268 kfree(digest);
1269
b411b363 1270out:
db830c46 1271 if (peer_req)
b30ab791
AG
1272 drbd_free_peer_req(device, peer_req);
1273 dec_unacked(device);
99920dc5 1274 return err;
b411b363
PR
1275}
1276
b30ab791 1277void drbd_ov_out_of_sync_found(struct drbd_device *device, sector_t sector, int size)
b411b363 1278{
b30ab791
AG
1279 if (device->ov_last_oos_start + device->ov_last_oos_size == sector) {
1280 device->ov_last_oos_size += size>>9;
b411b363 1281 } else {
b30ab791
AG
1282 device->ov_last_oos_start = sector;
1283 device->ov_last_oos_size = size>>9;
b411b363 1284 }
b30ab791 1285 drbd_set_out_of_sync(device, sector, size);
b411b363
PR
1286}
1287
99920dc5 1288int w_e_end_ov_reply(struct drbd_work *w, int cancel)
b411b363 1289{
a8cd15ba 1290 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
6780139c
AG
1291 struct drbd_peer_device *peer_device = peer_req->peer_device;
1292 struct drbd_device *device = peer_device->device;
b411b363 1293 struct digest_info *di;
b411b363 1294 void *digest;
db830c46
AG
1295 sector_t sector = peer_req->i.sector;
1296 unsigned int size = peer_req->i.size;
53ea4331 1297 int digest_size;
99920dc5 1298 int err, eq = 0;
58ffa580 1299 bool stop_sector_reached = false;
b411b363
PR
1300
1301 if (unlikely(cancel)) {
b30ab791
AG
1302 drbd_free_peer_req(device, peer_req);
1303 dec_unacked(device);
99920dc5 1304 return 0;
b411b363
PR
1305 }
1306
1307 /* after "cancel", because after drbd_disconnect/drbd_rs_cancel_all
1308 * the resync lru has been cleaned up already */
b30ab791
AG
1309 if (get_ldev(device)) {
1310 drbd_rs_complete_io(device, peer_req->i.sector);
1311 put_ldev(device);
1d53f09e 1312 }
b411b363 1313
db830c46 1314 di = peer_req->digest;
b411b363 1315
db830c46 1316 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
9534d671 1317 digest_size = crypto_ahash_digestsize(peer_device->connection->verify_tfm);
b411b363
PR
1318 digest = kmalloc(digest_size, GFP_NOIO);
1319 if (digest) {
6780139c 1320 drbd_csum_ee(peer_device->connection->verify_tfm, peer_req, digest);
b411b363 1321
0b0ba1ef 1322 D_ASSERT(device, digest_size == di->digest_size);
b411b363
PR
1323 eq = !memcmp(digest, di->digest, digest_size);
1324 kfree(digest);
1325 }
b411b363
PR
1326 }
1327
9676c760
LE
1328 /* Free peer_req and pages before send.
1329 * In case we block on congestion, we could otherwise run into
1330 * some distributed deadlock, if the other side blocks on
1331 * congestion as well, because our receiver blocks in
c37c8ecf 1332 * drbd_alloc_pages due to pp_in_use > max_buffers. */
b30ab791 1333 drbd_free_peer_req(device, peer_req);
b411b363 1334 if (!eq)
b30ab791 1335 drbd_ov_out_of_sync_found(device, sector, size);
b411b363 1336 else
b30ab791 1337 ov_out_of_sync_print(device);
b411b363 1338
6780139c 1339 err = drbd_send_ack_ex(peer_device, P_OV_RESULT, sector, size,
fa79abd8 1340 eq ? ID_IN_SYNC : ID_OUT_OF_SYNC);
b411b363 1341
b30ab791 1342 dec_unacked(device);
b411b363 1343
b30ab791 1344 --device->ov_left;
ea5442af
LE
1345
1346 /* let's advance progress step marks only for every other megabyte */
b30ab791
AG
1347 if ((device->ov_left & 0x200) == 0x200)
1348 drbd_advance_rs_marks(device, device->ov_left);
ea5442af 1349
b30ab791
AG
1350 stop_sector_reached = verify_can_do_stop_sector(device) &&
1351 (sector + (size>>9)) >= device->ov_stop_sector;
58ffa580 1352
b30ab791
AG
1353 if (device->ov_left == 0 || stop_sector_reached) {
1354 ov_out_of_sync_print(device);
1355 drbd_resync_finished(device);
b411b363
PR
1356 }
1357
99920dc5 1358 return err;
b411b363
PR
1359}
1360
b6dd1a89
LE
1361/* FIXME
1362 * We need to track the number of pending barrier acks,
1363 * and to be able to wait for them.
1364 * See also comment in drbd_adm_attach before drbd_suspend_io.
1365 */
bde89a9e 1366static int drbd_send_barrier(struct drbd_connection *connection)
b411b363 1367{
9f5bdc33 1368 struct p_barrier *p;
b6dd1a89 1369 struct drbd_socket *sock;
b411b363 1370
bde89a9e
AG
1371 sock = &connection->data;
1372 p = conn_prepare_command(connection, sock);
9f5bdc33
AG
1373 if (!p)
1374 return -EIO;
bde89a9e 1375 p->barrier = connection->send.current_epoch_nr;
b6dd1a89 1376 p->pad = 0;
bde89a9e 1377 connection->send.current_epoch_writes = 0;
84d34f2f 1378 connection->send.last_sent_barrier_jif = jiffies;
b6dd1a89 1379
bde89a9e 1380 return conn_send_command(connection, sock, P_BARRIER, sizeof(*p), NULL, 0);
b411b363
PR
1381}
1382
99920dc5 1383int w_send_write_hint(struct drbd_work *w, int cancel)
b411b363 1384{
84b8c06b
AG
1385 struct drbd_device *device =
1386 container_of(w, struct drbd_device, unplug_work);
9f5bdc33
AG
1387 struct drbd_socket *sock;
1388
b411b363 1389 if (cancel)
99920dc5 1390 return 0;
a6b32bc3 1391 sock = &first_peer_device(device)->connection->data;
69a22773 1392 if (!drbd_prepare_command(first_peer_device(device), sock))
9f5bdc33 1393 return -EIO;
69a22773 1394 return drbd_send_command(first_peer_device(device), sock, P_UNPLUG_REMOTE, 0, NULL, 0);
b411b363
PR
1395}
1396
bde89a9e 1397static void re_init_if_first_write(struct drbd_connection *connection, unsigned int epoch)
4eb9b3cb 1398{
bde89a9e
AG
1399 if (!connection->send.seen_any_write_yet) {
1400 connection->send.seen_any_write_yet = true;
1401 connection->send.current_epoch_nr = epoch;
1402 connection->send.current_epoch_writes = 0;
84d34f2f 1403 connection->send.last_sent_barrier_jif = jiffies;
4eb9b3cb
LE
1404 }
1405}
1406
bde89a9e 1407static void maybe_send_barrier(struct drbd_connection *connection, unsigned int epoch)
4eb9b3cb
LE
1408{
1409 /* re-init if first write on this connection */
bde89a9e 1410 if (!connection->send.seen_any_write_yet)
4eb9b3cb 1411 return;
bde89a9e
AG
1412 if (connection->send.current_epoch_nr != epoch) {
1413 if (connection->send.current_epoch_writes)
1414 drbd_send_barrier(connection);
1415 connection->send.current_epoch_nr = epoch;
4eb9b3cb
LE
1416 }
1417}
1418
8f7bed77 1419int w_send_out_of_sync(struct drbd_work *w, int cancel)
73a01a18
PR
1420{
1421 struct drbd_request *req = container_of(w, struct drbd_request, w);
84b8c06b 1422 struct drbd_device *device = req->device;
44a4d551
LE
1423 struct drbd_peer_device *const peer_device = first_peer_device(device);
1424 struct drbd_connection *const connection = peer_device->connection;
99920dc5 1425 int err;
73a01a18
PR
1426
1427 if (unlikely(cancel)) {
8554df1c 1428 req_mod(req, SEND_CANCELED);
99920dc5 1429 return 0;
73a01a18 1430 }
e5f891b2 1431 req->pre_send_jif = jiffies;
73a01a18 1432
bde89a9e 1433 /* this time, no connection->send.current_epoch_writes++;
b6dd1a89
LE
1434 * If it was sent, it was the closing barrier for the last
1435 * replicated epoch, before we went into AHEAD mode.
1436 * No more barriers will be sent, until we leave AHEAD mode again. */
bde89a9e 1437 maybe_send_barrier(connection, req->epoch);
b6dd1a89 1438
44a4d551 1439 err = drbd_send_out_of_sync(peer_device, req);
8554df1c 1440 req_mod(req, OOS_HANDED_TO_NETWORK);
73a01a18 1441
99920dc5 1442 return err;
73a01a18
PR
1443}
1444
b411b363
PR
1445/**
1446 * w_send_dblock() - Worker callback to send a P_DATA packet in order to mirror a write request
b411b363
PR
1447 * @w: work object.
1448 * @cancel: The connection will be closed anyways
1449 */
99920dc5 1450int w_send_dblock(struct drbd_work *w, int cancel)
b411b363
PR
1451{
1452 struct drbd_request *req = container_of(w, struct drbd_request, w);
84b8c06b 1453 struct drbd_device *device = req->device;
44a4d551
LE
1454 struct drbd_peer_device *const peer_device = first_peer_device(device);
1455 struct drbd_connection *connection = peer_device->connection;
99920dc5 1456 int err;
b411b363
PR
1457
1458 if (unlikely(cancel)) {
8554df1c 1459 req_mod(req, SEND_CANCELED);
99920dc5 1460 return 0;
b411b363 1461 }
e5f891b2 1462 req->pre_send_jif = jiffies;
b411b363 1463
bde89a9e
AG
1464 re_init_if_first_write(connection, req->epoch);
1465 maybe_send_barrier(connection, req->epoch);
1466 connection->send.current_epoch_writes++;
b6dd1a89 1467
44a4d551 1468 err = drbd_send_dblock(peer_device, req);
99920dc5 1469 req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK);
b411b363 1470
99920dc5 1471 return err;
b411b363
PR
1472}
1473
1474/**
1475 * w_send_read_req() - Worker callback to send a read request (P_DATA_REQUEST) packet
b411b363
PR
1476 * @w: work object.
1477 * @cancel: The connection will be closed anyways
1478 */
99920dc5 1479int w_send_read_req(struct drbd_work *w, int cancel)
b411b363
PR
1480{
1481 struct drbd_request *req = container_of(w, struct drbd_request, w);
84b8c06b 1482 struct drbd_device *device = req->device;
44a4d551
LE
1483 struct drbd_peer_device *const peer_device = first_peer_device(device);
1484 struct drbd_connection *connection = peer_device->connection;
99920dc5 1485 int err;
b411b363
PR
1486
1487 if (unlikely(cancel)) {
8554df1c 1488 req_mod(req, SEND_CANCELED);
99920dc5 1489 return 0;
b411b363 1490 }
e5f891b2 1491 req->pre_send_jif = jiffies;
b411b363 1492
b6dd1a89
LE
1493 /* Even read requests may close a write epoch,
1494 * if there was any yet. */
bde89a9e 1495 maybe_send_barrier(connection, req->epoch);
b6dd1a89 1496
44a4d551 1497 err = drbd_send_drequest(peer_device, P_DATA_REQUEST, req->i.sector, req->i.size,
6c1005e7 1498 (unsigned long)req);
b411b363 1499
99920dc5 1500 req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK);
b411b363 1501
99920dc5 1502 return err;
b411b363
PR
1503}
1504
99920dc5 1505int w_restart_disk_io(struct drbd_work *w, int cancel)
265be2d0
PR
1506{
1507 struct drbd_request *req = container_of(w, struct drbd_request, w);
84b8c06b 1508 struct drbd_device *device = req->device;
265be2d0 1509
0778286a 1510 if (bio_data_dir(req->master_bio) == WRITE && req->rq_state & RQ_IN_ACT_LOG)
4dd726f0 1511 drbd_al_begin_io(device, &req->i);
265be2d0
PR
1512
1513 drbd_req_make_private_bio(req, req->master_bio);
b30ab791 1514 req->private_bio->bi_bdev = device->ldev->backing_bdev;
265be2d0
PR
1515 generic_make_request(req->private_bio);
1516
99920dc5 1517 return 0;
265be2d0
PR
1518}
1519
b30ab791 1520static int _drbd_may_sync_now(struct drbd_device *device)
b411b363 1521{
b30ab791 1522 struct drbd_device *odev = device;
95f8efd0 1523 int resync_after;
b411b363
PR
1524
1525 while (1) {
a3f8f7dc 1526 if (!odev->ldev || odev->state.disk == D_DISKLESS)
438c8374 1527 return 1;
daeda1cc 1528 rcu_read_lock();
95f8efd0 1529 resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after;
daeda1cc 1530 rcu_read_unlock();
95f8efd0 1531 if (resync_after == -1)
b411b363 1532 return 1;
b30ab791 1533 odev = minor_to_device(resync_after);
a3f8f7dc 1534 if (!odev)
841ce241 1535 return 1;
b411b363
PR
1536 if ((odev->state.conn >= C_SYNC_SOURCE &&
1537 odev->state.conn <= C_PAUSED_SYNC_T) ||
1538 odev->state.aftr_isp || odev->state.peer_isp ||
1539 odev->state.user_isp)
1540 return 0;
1541 }
1542}
1543
1544/**
28bc3b8c 1545 * drbd_pause_after() - Pause resync on all devices that may not resync now
b30ab791 1546 * @device: DRBD device.
b411b363
PR
1547 *
1548 * Called from process context only (admin command and after_state_ch).
1549 */
28bc3b8c 1550static bool drbd_pause_after(struct drbd_device *device)
b411b363 1551{
28bc3b8c 1552 bool changed = false;
54761697 1553 struct drbd_device *odev;
28bc3b8c 1554 int i;
b411b363 1555
695d08fa 1556 rcu_read_lock();
05a10ec7 1557 idr_for_each_entry(&drbd_devices, odev, i) {
b411b363
PR
1558 if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS)
1559 continue;
28bc3b8c
AG
1560 if (!_drbd_may_sync_now(odev) &&
1561 _drbd_set_state(_NS(odev, aftr_isp, 1),
1562 CS_HARD, NULL) != SS_NOTHING_TO_DO)
1563 changed = true;
b411b363 1564 }
695d08fa 1565 rcu_read_unlock();
b411b363 1566
28bc3b8c 1567 return changed;
b411b363
PR
1568}
1569
1570/**
28bc3b8c 1571 * drbd_resume_next() - Resume resync on all devices that may resync now
b30ab791 1572 * @device: DRBD device.
b411b363
PR
1573 *
1574 * Called from process context only (admin command and worker).
1575 */
28bc3b8c 1576static bool drbd_resume_next(struct drbd_device *device)
b411b363 1577{
28bc3b8c 1578 bool changed = false;
54761697 1579 struct drbd_device *odev;
28bc3b8c 1580 int i;
b411b363 1581
695d08fa 1582 rcu_read_lock();
05a10ec7 1583 idr_for_each_entry(&drbd_devices, odev, i) {
b411b363
PR
1584 if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS)
1585 continue;
1586 if (odev->state.aftr_isp) {
28bc3b8c
AG
1587 if (_drbd_may_sync_now(odev) &&
1588 _drbd_set_state(_NS(odev, aftr_isp, 0),
1589 CS_HARD, NULL) != SS_NOTHING_TO_DO)
1590 changed = true;
b411b363
PR
1591 }
1592 }
695d08fa 1593 rcu_read_unlock();
28bc3b8c 1594 return changed;
b411b363
PR
1595}
1596
b30ab791 1597void resume_next_sg(struct drbd_device *device)
b411b363 1598{
28bc3b8c
AG
1599 lock_all_resources();
1600 drbd_resume_next(device);
1601 unlock_all_resources();
b411b363
PR
1602}
1603
b30ab791 1604void suspend_other_sg(struct drbd_device *device)
b411b363 1605{
28bc3b8c
AG
1606 lock_all_resources();
1607 drbd_pause_after(device);
1608 unlock_all_resources();
b411b363
PR
1609}
1610
28bc3b8c 1611/* caller must lock_all_resources() */
b30ab791 1612enum drbd_ret_code drbd_resync_after_valid(struct drbd_device *device, int o_minor)
b411b363 1613{
54761697 1614 struct drbd_device *odev;
95f8efd0 1615 int resync_after;
b411b363
PR
1616
1617 if (o_minor == -1)
1618 return NO_ERROR;
a3f8f7dc 1619 if (o_minor < -1 || o_minor > MINORMASK)
95f8efd0 1620 return ERR_RESYNC_AFTER;
b411b363
PR
1621
1622 /* check for loops */
b30ab791 1623 odev = minor_to_device(o_minor);
b411b363 1624 while (1) {
b30ab791 1625 if (odev == device)
95f8efd0 1626 return ERR_RESYNC_AFTER_CYCLE;
b411b363 1627
a3f8f7dc
LE
1628 /* You are free to depend on diskless, non-existing,
1629 * or not yet/no longer existing minors.
1630 * We only reject dependency loops.
1631 * We cannot follow the dependency chain beyond a detached or
1632 * missing minor.
1633 */
1634 if (!odev || !odev->ldev || odev->state.disk == D_DISKLESS)
1635 return NO_ERROR;
1636
daeda1cc 1637 rcu_read_lock();
95f8efd0 1638 resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after;
daeda1cc 1639 rcu_read_unlock();
b411b363 1640 /* dependency chain ends here, no cycles. */
95f8efd0 1641 if (resync_after == -1)
b411b363
PR
1642 return NO_ERROR;
1643
1644 /* follow the dependency chain */
b30ab791 1645 odev = minor_to_device(resync_after);
b411b363
PR
1646 }
1647}
1648
28bc3b8c 1649/* caller must lock_all_resources() */
b30ab791 1650void drbd_resync_after_changed(struct drbd_device *device)
b411b363 1651{
28bc3b8c 1652 int changed;
b411b363 1653
dc97b708 1654 do {
28bc3b8c
AG
1655 changed = drbd_pause_after(device);
1656 changed |= drbd_resume_next(device);
1657 } while (changed);
b411b363
PR
1658}
1659
b30ab791 1660void drbd_rs_controller_reset(struct drbd_device *device)
9bd28d3c 1661{
ff8bd88b 1662 struct gendisk *disk = device->ldev->backing_bdev->bd_contains->bd_disk;
813472ce
PR
1663 struct fifo_buffer *plan;
1664
b30ab791
AG
1665 atomic_set(&device->rs_sect_in, 0);
1666 atomic_set(&device->rs_sect_ev, 0);
1667 device->rs_in_flight = 0;
ff8bd88b
LE
1668 device->rs_last_events =
1669 (int)part_stat_read(&disk->part0, sectors[0]) +
1670 (int)part_stat_read(&disk->part0, sectors[1]);
813472ce
PR
1671
1672 /* Updating the RCU protected object in place is necessary since
1673 this function gets called from atomic context.
1674 It is valid since all other updates also lead to an completely
1675 empty fifo */
1676 rcu_read_lock();
b30ab791 1677 plan = rcu_dereference(device->rs_plan_s);
813472ce
PR
1678 plan->total = 0;
1679 fifo_set(plan, 0);
1680 rcu_read_unlock();
9bd28d3c
LE
1681}
1682
1f04af33
PR
1683void start_resync_timer_fn(unsigned long data)
1684{
b30ab791 1685 struct drbd_device *device = (struct drbd_device *) data;
ac0acb9e 1686 drbd_device_post_work(device, RS_START);
1f04af33
PR
1687}
1688
ac0acb9e 1689static void do_start_resync(struct drbd_device *device)
1f04af33 1690{
b30ab791 1691 if (atomic_read(&device->unacked_cnt) || atomic_read(&device->rs_pending_cnt)) {
ac0acb9e 1692 drbd_warn(device, "postponing start_resync ...\n");
b30ab791
AG
1693 device->start_resync_timer.expires = jiffies + HZ/10;
1694 add_timer(&device->start_resync_timer);
ac0acb9e 1695 return;
1f04af33
PR
1696 }
1697
b30ab791
AG
1698 drbd_start_resync(device, C_SYNC_SOURCE);
1699 clear_bit(AHEAD_TO_SYNC_SOURCE, &device->flags);
1f04af33
PR
1700}
1701
aaaba345
LE
1702static bool use_checksum_based_resync(struct drbd_connection *connection, struct drbd_device *device)
1703{
1704 bool csums_after_crash_only;
1705 rcu_read_lock();
1706 csums_after_crash_only = rcu_dereference(connection->net_conf)->csums_after_crash_only;
1707 rcu_read_unlock();
1708 return connection->agreed_pro_version >= 89 && /* supported? */
1709 connection->csums_tfm && /* configured? */
7e5fec31 1710 (csums_after_crash_only == false /* use for each resync? */
aaaba345
LE
1711 || test_bit(CRASHED_PRIMARY, &device->flags)); /* or only after Primary crash? */
1712}
1713
b411b363
PR
1714/**
1715 * drbd_start_resync() - Start the resync process
b30ab791 1716 * @device: DRBD device.
b411b363
PR
1717 * @side: Either C_SYNC_SOURCE or C_SYNC_TARGET
1718 *
1719 * This function might bring you directly into one of the
1720 * C_PAUSED_SYNC_* states.
1721 */
b30ab791 1722void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
b411b363 1723{
44a4d551
LE
1724 struct drbd_peer_device *peer_device = first_peer_device(device);
1725 struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
b411b363
PR
1726 union drbd_state ns;
1727 int r;
1728
b30ab791 1729 if (device->state.conn >= C_SYNC_SOURCE && device->state.conn < C_AHEAD) {
d0180171 1730 drbd_err(device, "Resync already running!\n");
b411b363
PR
1731 return;
1732 }
1733
b30ab791 1734 if (!test_bit(B_RS_H_DONE, &device->flags)) {
e64a3294
PR
1735 if (side == C_SYNC_TARGET) {
1736 /* Since application IO was locked out during C_WF_BITMAP_T and
1737 C_WF_SYNC_UUID we are still unmodified. Before going to C_SYNC_TARGET
1738 we check that we might make the data inconsistent. */
b30ab791 1739 r = drbd_khelper(device, "before-resync-target");
e64a3294
PR
1740 r = (r >> 8) & 0xff;
1741 if (r > 0) {
d0180171 1742 drbd_info(device, "before-resync-target handler returned %d, "
09b9e797 1743 "dropping connection.\n", r);
44a4d551 1744 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
09b9e797
PR
1745 return;
1746 }
e64a3294 1747 } else /* C_SYNC_SOURCE */ {
b30ab791 1748 r = drbd_khelper(device, "before-resync-source");
e64a3294
PR
1749 r = (r >> 8) & 0xff;
1750 if (r > 0) {
1751 if (r == 3) {
d0180171 1752 drbd_info(device, "before-resync-source handler returned %d, "
e64a3294
PR
1753 "ignoring. Old userland tools?", r);
1754 } else {
d0180171 1755 drbd_info(device, "before-resync-source handler returned %d, "
e64a3294 1756 "dropping connection.\n", r);
44a4d551 1757 conn_request_state(connection,
a6b32bc3 1758 NS(conn, C_DISCONNECTING), CS_HARD);
e64a3294
PR
1759 return;
1760 }
1761 }
09b9e797 1762 }
b411b363
PR
1763 }
1764
44a4d551 1765 if (current == connection->worker.task) {
dad20554 1766 /* The worker should not sleep waiting for state_mutex,
e64a3294 1767 that can take long */
b30ab791
AG
1768 if (!mutex_trylock(device->state_mutex)) {
1769 set_bit(B_RS_H_DONE, &device->flags);
1770 device->start_resync_timer.expires = jiffies + HZ/5;
1771 add_timer(&device->start_resync_timer);
e64a3294
PR
1772 return;
1773 }
1774 } else {
b30ab791 1775 mutex_lock(device->state_mutex);
e64a3294 1776 }
b411b363 1777
28bc3b8c
AG
1778 lock_all_resources();
1779 clear_bit(B_RS_H_DONE, &device->flags);
a700471b 1780 /* Did some connection breakage or IO error race with us? */
b30ab791
AG
1781 if (device->state.conn < C_CONNECTED
1782 || !get_ldev_if_state(device, D_NEGOTIATING)) {
28bc3b8c
AG
1783 unlock_all_resources();
1784 goto out;
b411b363
PR
1785 }
1786
b30ab791 1787 ns = drbd_read_state(device);
b411b363 1788
b30ab791 1789 ns.aftr_isp = !_drbd_may_sync_now(device);
b411b363
PR
1790
1791 ns.conn = side;
1792
1793 if (side == C_SYNC_TARGET)
1794 ns.disk = D_INCONSISTENT;
1795 else /* side == C_SYNC_SOURCE */
1796 ns.pdsk = D_INCONSISTENT;
1797
28bc3b8c 1798 r = _drbd_set_state(device, ns, CS_VERBOSE, NULL);
b30ab791 1799 ns = drbd_read_state(device);
b411b363
PR
1800
1801 if (ns.conn < C_CONNECTED)
1802 r = SS_UNKNOWN_ERROR;
1803
1804 if (r == SS_SUCCESS) {
b30ab791 1805 unsigned long tw = drbd_bm_total_weight(device);
1d7734a0
LE
1806 unsigned long now = jiffies;
1807 int i;
1808
b30ab791
AG
1809 device->rs_failed = 0;
1810 device->rs_paused = 0;
1811 device->rs_same_csum = 0;
b30ab791
AG
1812 device->rs_last_sect_ev = 0;
1813 device->rs_total = tw;
1814 device->rs_start = now;
1d7734a0 1815 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
b30ab791
AG
1816 device->rs_mark_left[i] = tw;
1817 device->rs_mark_time[i] = now;
1d7734a0 1818 }
28bc3b8c 1819 drbd_pause_after(device);
5ab7d2c0
LE
1820 /* Forget potentially stale cached per resync extent bit-counts.
1821 * Open coded drbd_rs_cancel_all(device), we already have IRQs
1822 * disabled, and know the disk state is ok. */
1823 spin_lock(&device->al_lock);
1824 lc_reset(device->resync);
1825 device->resync_locked = 0;
1826 device->resync_wenr = LC_FREE;
1827 spin_unlock(&device->al_lock);
b411b363 1828 }
28bc3b8c 1829 unlock_all_resources();
5a22db89 1830
b411b363 1831 if (r == SS_SUCCESS) {
5ab7d2c0 1832 wake_up(&device->al_wait); /* for lc_reset() above */
328e0f12
PR
1833 /* reset rs_last_bcast when a resync or verify is started,
1834 * to deal with potential jiffies wrap. */
b30ab791 1835 device->rs_last_bcast = jiffies - HZ;
328e0f12 1836
d0180171 1837 drbd_info(device, "Began resync as %s (will sync %lu KB [%lu bits set]).\n",
b411b363 1838 drbd_conn_str(ns.conn),
b30ab791
AG
1839 (unsigned long) device->rs_total << (BM_BLOCK_SHIFT-10),
1840 (unsigned long) device->rs_total);
aaaba345 1841 if (side == C_SYNC_TARGET) {
b30ab791 1842 device->bm_resync_fo = 0;
aaaba345
LE
1843 device->use_csums = use_checksum_based_resync(connection, device);
1844 } else {
7e5fec31 1845 device->use_csums = false;
aaaba345 1846 }
6c922ed5
LE
1847
1848 /* Since protocol 96, we must serialize drbd_gen_and_send_sync_uuid
1849 * with w_send_oos, or the sync target will get confused as to
1850 * how much bits to resync. We cannot do that always, because for an
1851 * empty resync and protocol < 95, we need to do it here, as we call
1852 * drbd_resync_finished from here in that case.
1853 * We drbd_gen_and_send_sync_uuid here for protocol < 96,
1854 * and from after_state_ch otherwise. */
44a4d551
LE
1855 if (side == C_SYNC_SOURCE && connection->agreed_pro_version < 96)
1856 drbd_gen_and_send_sync_uuid(peer_device);
b411b363 1857
44a4d551 1858 if (connection->agreed_pro_version < 95 && device->rs_total == 0) {
af85e8e8
LE
1859 /* This still has a race (about when exactly the peers
1860 * detect connection loss) that can lead to a full sync
1861 * on next handshake. In 8.3.9 we fixed this with explicit
1862 * resync-finished notifications, but the fix
1863 * introduces a protocol change. Sleeping for some
1864 * time longer than the ping interval + timeout on the
1865 * SyncSource, to give the SyncTarget the chance to
1866 * detect connection loss, then waiting for a ping
1867 * response (implicit in drbd_resync_finished) reduces
1868 * the race considerably, but does not solve it. */
44ed167d
PR
1869 if (side == C_SYNC_SOURCE) {
1870 struct net_conf *nc;
1871 int timeo;
1872
1873 rcu_read_lock();
44a4d551 1874 nc = rcu_dereference(connection->net_conf);
44ed167d
PR
1875 timeo = nc->ping_int * HZ + nc->ping_timeo * HZ / 9;
1876 rcu_read_unlock();
1877 schedule_timeout_interruptible(timeo);
1878 }
b30ab791 1879 drbd_resync_finished(device);
b411b363
PR
1880 }
1881
b30ab791
AG
1882 drbd_rs_controller_reset(device);
1883 /* ns.conn may already be != device->state.conn,
b411b363
PR
1884 * we may have been paused in between, or become paused until
1885 * the timer triggers.
1886 * No matter, that is handled in resync_timer_fn() */
1887 if (ns.conn == C_SYNC_TARGET)
b30ab791 1888 mod_timer(&device->resync_timer, jiffies);
b411b363 1889
b30ab791 1890 drbd_md_sync(device);
b411b363 1891 }
b30ab791 1892 put_ldev(device);
28bc3b8c 1893out:
b30ab791 1894 mutex_unlock(device->state_mutex);
b411b363
PR
1895}
1896
e334f550 1897static void update_on_disk_bitmap(struct drbd_device *device, bool resync_done)
c7a58db4
LE
1898{
1899 struct sib_info sib = { .sib_reason = SIB_SYNC_PROGRESS, };
1900 device->rs_last_bcast = jiffies;
1901
1902 if (!get_ldev(device))
1903 return;
1904
1905 drbd_bm_write_lazy(device, 0);
5ab7d2c0 1906 if (resync_done && is_sync_state(device->state.conn))
c7a58db4 1907 drbd_resync_finished(device);
5ab7d2c0 1908
c7a58db4
LE
1909 drbd_bcast_event(device, &sib);
1910 /* update timestamp, in case it took a while to write out stuff */
1911 device->rs_last_bcast = jiffies;
1912 put_ldev(device);
1913}
1914
e334f550
LE
1915static void drbd_ldev_destroy(struct drbd_device *device)
1916{
1917 lc_destroy(device->resync);
1918 device->resync = NULL;
1919 lc_destroy(device->act_log);
1920 device->act_log = NULL;
d1b80853
AG
1921
1922 __acquire(local);
63a7c8ad 1923 drbd_backing_dev_free(device, device->ldev);
d1b80853
AG
1924 device->ldev = NULL;
1925 __release(local);
1926
e334f550
LE
1927 clear_bit(GOING_DISKLESS, &device->flags);
1928 wake_up(&device->misc_wait);
1929}
1930
1931static void go_diskless(struct drbd_device *device)
1932{
1933 D_ASSERT(device, device->state.disk == D_FAILED);
1934 /* we cannot assert local_cnt == 0 here, as get_ldev_if_state will
1935 * inc/dec it frequently. Once we are D_DISKLESS, no one will touch
1936 * the protected members anymore, though, so once put_ldev reaches zero
1937 * again, it will be safe to free them. */
1938
1939 /* Try to write changed bitmap pages, read errors may have just
1940 * set some bits outside the area covered by the activity log.
1941 *
1942 * If we have an IO error during the bitmap writeout,
1943 * we will want a full sync next time, just in case.
1944 * (Do we want a specific meta data flag for this?)
1945 *
1946 * If that does not make it to stable storage either,
1947 * we cannot do anything about that anymore.
1948 *
1949 * We still need to check if both bitmap and ldev are present, we may
1950 * end up here after a failed attach, before ldev was even assigned.
1951 */
1952 if (device->bitmap && device->ldev) {
1953 /* An interrupted resync or similar is allowed to recounts bits
1954 * while we detach.
1955 * Any modifications would not be expected anymore, though.
1956 */
1957 if (drbd_bitmap_io_from_worker(device, drbd_bm_write,
1958 "detach", BM_LOCKED_TEST_ALLOWED)) {
1959 if (test_bit(WAS_READ_ERROR, &device->flags)) {
1960 drbd_md_set_flag(device, MDF_FULL_SYNC);
1961 drbd_md_sync(device);
1962 }
1963 }
1964 }
1965
1966 drbd_force_state(device, NS(disk, D_DISKLESS));
1967}
1968
ac0acb9e
LE
1969static int do_md_sync(struct drbd_device *device)
1970{
1971 drbd_warn(device, "md_sync_timer expired! Worker calls drbd_md_sync().\n");
1972 drbd_md_sync(device);
1973 return 0;
1974}
1975
944410e9
LE
1976/* only called from drbd_worker thread, no locking */
1977void __update_timing_details(
1978 struct drbd_thread_timing_details *tdp,
1979 unsigned int *cb_nr,
1980 void *cb,
1981 const char *fn, const unsigned int line)
1982{
1983 unsigned int i = *cb_nr % DRBD_THREAD_DETAILS_HIST;
1984 struct drbd_thread_timing_details *td = tdp + i;
1985
1986 td->start_jif = jiffies;
1987 td->cb_addr = cb;
1988 td->caller_fn = fn;
1989 td->line = line;
1990 td->cb_nr = *cb_nr;
1991
1992 i = (i+1) % DRBD_THREAD_DETAILS_HIST;
1993 td = tdp + i;
1994 memset(td, 0, sizeof(*td));
1995
1996 ++(*cb_nr);
1997}
1998
e334f550
LE
1999static void do_device_work(struct drbd_device *device, const unsigned long todo)
2000{
b47a06d1 2001 if (test_bit(MD_SYNC, &todo))
ac0acb9e 2002 do_md_sync(device);
b47a06d1
AG
2003 if (test_bit(RS_DONE, &todo) ||
2004 test_bit(RS_PROGRESS, &todo))
2005 update_on_disk_bitmap(device, test_bit(RS_DONE, &todo));
2006 if (test_bit(GO_DISKLESS, &todo))
e334f550 2007 go_diskless(device);
b47a06d1 2008 if (test_bit(DESTROY_DISK, &todo))
e334f550 2009 drbd_ldev_destroy(device);
b47a06d1 2010 if (test_bit(RS_START, &todo))
ac0acb9e 2011 do_start_resync(device);
e334f550
LE
2012}
2013
2014#define DRBD_DEVICE_WORK_MASK \
2015 ((1UL << GO_DISKLESS) \
2016 |(1UL << DESTROY_DISK) \
ac0acb9e
LE
2017 |(1UL << MD_SYNC) \
2018 |(1UL << RS_START) \
e334f550
LE
2019 |(1UL << RS_PROGRESS) \
2020 |(1UL << RS_DONE) \
2021 )
2022
2023static unsigned long get_work_bits(unsigned long *flags)
2024{
2025 unsigned long old, new;
2026 do {
2027 old = *flags;
2028 new = old & ~DRBD_DEVICE_WORK_MASK;
2029 } while (cmpxchg(flags, old, new) != old);
2030 return old & DRBD_DEVICE_WORK_MASK;
2031}
2032
2033static void do_unqueued_work(struct drbd_connection *connection)
c7a58db4
LE
2034{
2035 struct drbd_peer_device *peer_device;
2036 int vnr;
2037
2038 rcu_read_lock();
2039 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
2040 struct drbd_device *device = peer_device->device;
e334f550
LE
2041 unsigned long todo = get_work_bits(&device->flags);
2042 if (!todo)
c7a58db4 2043 continue;
5ab7d2c0 2044
c7a58db4
LE
2045 kref_get(&device->kref);
2046 rcu_read_unlock();
e334f550 2047 do_device_work(device, todo);
c7a58db4
LE
2048 kref_put(&device->kref, drbd_destroy_device);
2049 rcu_read_lock();
2050 }
2051 rcu_read_unlock();
2052}
2053
a186e478 2054static bool dequeue_work_batch(struct drbd_work_queue *queue, struct list_head *work_list)
8c0785a5
LE
2055{
2056 spin_lock_irq(&queue->q_lock);
15e26f6a 2057 list_splice_tail_init(&queue->q, work_list);
8c0785a5
LE
2058 spin_unlock_irq(&queue->q_lock);
2059 return !list_empty(work_list);
2060}
2061
bde89a9e 2062static void wait_for_work(struct drbd_connection *connection, struct list_head *work_list)
b6dd1a89
LE
2063{
2064 DEFINE_WAIT(wait);
2065 struct net_conf *nc;
2066 int uncork, cork;
2067
abde9cc6 2068 dequeue_work_batch(&connection->sender_work, work_list);
b6dd1a89
LE
2069 if (!list_empty(work_list))
2070 return;
2071
2072 /* Still nothing to do?
2073 * Maybe we still need to close the current epoch,
2074 * even if no new requests are queued yet.
2075 *
2076 * Also, poke TCP, just in case.
2077 * Then wait for new work (or signal). */
2078 rcu_read_lock();
2079 nc = rcu_dereference(connection->net_conf);
2080 uncork = nc ? nc->tcp_cork : 0;
2081 rcu_read_unlock();
2082 if (uncork) {
2083 mutex_lock(&connection->data.mutex);
2084 if (connection->data.socket)
2085 drbd_tcp_uncork(connection->data.socket);
2086 mutex_unlock(&connection->data.mutex);
2087 }
2088
2089 for (;;) {
2090 int send_barrier;
2091 prepare_to_wait(&connection->sender_work.q_wait, &wait, TASK_INTERRUPTIBLE);
0500813f 2092 spin_lock_irq(&connection->resource->req_lock);
b6dd1a89 2093 spin_lock(&connection->sender_work.q_lock); /* FIXME get rid of this one? */
bc317a9e 2094 if (!list_empty(&connection->sender_work.q))
4dd726f0 2095 list_splice_tail_init(&connection->sender_work.q, work_list);
b6dd1a89
LE
2096 spin_unlock(&connection->sender_work.q_lock); /* FIXME get rid of this one? */
2097 if (!list_empty(work_list) || signal_pending(current)) {
0500813f 2098 spin_unlock_irq(&connection->resource->req_lock);
b6dd1a89
LE
2099 break;
2100 }
f9c78128
LE
2101
2102 /* We found nothing new to do, no to-be-communicated request,
2103 * no other work item. We may still need to close the last
2104 * epoch. Next incoming request epoch will be connection ->
2105 * current transfer log epoch number. If that is different
2106 * from the epoch of the last request we communicated, it is
2107 * safe to send the epoch separating barrier now.
2108 */
2109 send_barrier =
2110 atomic_read(&connection->current_tle_nr) !=
2111 connection->send.current_epoch_nr;
0500813f 2112 spin_unlock_irq(&connection->resource->req_lock);
f9c78128
LE
2113
2114 if (send_barrier)
2115 maybe_send_barrier(connection,
2116 connection->send.current_epoch_nr + 1);
5ab7d2c0 2117
e334f550 2118 if (test_bit(DEVICE_WORK_PENDING, &connection->flags))
5ab7d2c0
LE
2119 break;
2120
a80ca1ae
LE
2121 /* drbd_send() may have called flush_signals() */
2122 if (get_t_state(&connection->worker) != RUNNING)
2123 break;
5ab7d2c0 2124
b6dd1a89
LE
2125 schedule();
2126 /* may be woken up for other things but new work, too,
2127 * e.g. if the current epoch got closed.
2128 * In which case we send the barrier above. */
2129 }
2130 finish_wait(&connection->sender_work.q_wait, &wait);
2131
2132 /* someone may have changed the config while we have been waiting above. */
2133 rcu_read_lock();
2134 nc = rcu_dereference(connection->net_conf);
2135 cork = nc ? nc->tcp_cork : 0;
2136 rcu_read_unlock();
2137 mutex_lock(&connection->data.mutex);
2138 if (connection->data.socket) {
2139 if (cork)
2140 drbd_tcp_cork(connection->data.socket);
2141 else if (!uncork)
2142 drbd_tcp_uncork(connection->data.socket);
2143 }
2144 mutex_unlock(&connection->data.mutex);
2145}
2146
b411b363
PR
2147int drbd_worker(struct drbd_thread *thi)
2148{
bde89a9e 2149 struct drbd_connection *connection = thi->connection;
6db7e50a 2150 struct drbd_work *w = NULL;
c06ece6b 2151 struct drbd_peer_device *peer_device;
b411b363 2152 LIST_HEAD(work_list);
8c0785a5 2153 int vnr;
b411b363 2154
e77a0a5c 2155 while (get_t_state(thi) == RUNNING) {
80822284 2156 drbd_thread_current_set_cpu(thi);
b411b363 2157
944410e9
LE
2158 if (list_empty(&work_list)) {
2159 update_worker_timing_details(connection, wait_for_work);
bde89a9e 2160 wait_for_work(connection, &work_list);
944410e9 2161 }
b411b363 2162
944410e9
LE
2163 if (test_and_clear_bit(DEVICE_WORK_PENDING, &connection->flags)) {
2164 update_worker_timing_details(connection, do_unqueued_work);
e334f550 2165 do_unqueued_work(connection);
944410e9 2166 }
5ab7d2c0 2167
8c0785a5 2168 if (signal_pending(current)) {
b411b363 2169 flush_signals(current);
19393e10 2170 if (get_t_state(thi) == RUNNING) {
1ec861eb 2171 drbd_warn(connection, "Worker got an unexpected signal\n");
b411b363 2172 continue;
19393e10 2173 }
b411b363
PR
2174 break;
2175 }
2176
e77a0a5c 2177 if (get_t_state(thi) != RUNNING)
b411b363 2178 break;
b411b363 2179
729e8b87 2180 if (!list_empty(&work_list)) {
6db7e50a
AG
2181 w = list_first_entry(&work_list, struct drbd_work, list);
2182 list_del_init(&w->list);
944410e9 2183 update_worker_timing_details(connection, w->cb);
6db7e50a 2184 if (w->cb(w, connection->cstate < C_WF_REPORT_PARAMS) == 0)
8c0785a5 2185 continue;
bde89a9e
AG
2186 if (connection->cstate >= C_WF_REPORT_PARAMS)
2187 conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
b411b363
PR
2188 }
2189 }
b411b363 2190
8c0785a5 2191 do {
944410e9
LE
2192 if (test_and_clear_bit(DEVICE_WORK_PENDING, &connection->flags)) {
2193 update_worker_timing_details(connection, do_unqueued_work);
e334f550 2194 do_unqueued_work(connection);
944410e9 2195 }
729e8b87 2196 if (!list_empty(&work_list)) {
6db7e50a
AG
2197 w = list_first_entry(&work_list, struct drbd_work, list);
2198 list_del_init(&w->list);
944410e9 2199 update_worker_timing_details(connection, w->cb);
6db7e50a 2200 w->cb(w, 1);
729e8b87
LE
2201 } else
2202 dequeue_work_batch(&connection->sender_work, &work_list);
e334f550 2203 } while (!list_empty(&work_list) || test_bit(DEVICE_WORK_PENDING, &connection->flags));
b411b363 2204
c141ebda 2205 rcu_read_lock();
c06ece6b
AG
2206 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
2207 struct drbd_device *device = peer_device->device;
0b0ba1ef 2208 D_ASSERT(device, device->state.disk == D_DISKLESS && device->state.conn == C_STANDALONE);
b30ab791 2209 kref_get(&device->kref);
c141ebda 2210 rcu_read_unlock();
b30ab791 2211 drbd_device_cleanup(device);
05a10ec7 2212 kref_put(&device->kref, drbd_destroy_device);
c141ebda 2213 rcu_read_lock();
0e29d163 2214 }
c141ebda 2215 rcu_read_unlock();
b411b363
PR
2216
2217 return 0;
2218}