]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blame - drivers/block/drbd/drbd_req.c
drbd: Use ping-timeout when waiting for missing ack packets
[mirror_ubuntu-bionic-kernel.git] / drivers / block / drbd / drbd_req.c
CommitLineData
b411b363
PR
1/*
2 drbd_req.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23
24 */
25
b411b363
PR
26#include <linux/module.h>
27
28#include <linux/slab.h>
29#include <linux/drbd.h>
30#include "drbd_int.h"
b411b363
PR
31#include "drbd_req.h"
32
33
34/* Update disk stats at start of I/O request */
35static void _drbd_start_io_acct(struct drbd_conf *mdev, struct drbd_request *req, struct bio *bio)
36{
37 const int rw = bio_data_dir(bio);
38 int cpu;
39 cpu = part_stat_lock();
40 part_stat_inc(cpu, &mdev->vdisk->part0, ios[rw]);
41 part_stat_add(cpu, &mdev->vdisk->part0, sectors[rw], bio_sectors(bio));
753c8913 42 part_inc_in_flight(&mdev->vdisk->part0, rw);
b411b363 43 part_stat_unlock();
b411b363
PR
44}
45
46/* Update disk stats when completing request upwards */
47static void _drbd_end_io_acct(struct drbd_conf *mdev, struct drbd_request *req)
48{
49 int rw = bio_data_dir(req->master_bio);
50 unsigned long duration = jiffies - req->start_time;
51 int cpu;
52 cpu = part_stat_lock();
53 part_stat_add(cpu, &mdev->vdisk->part0, ticks[rw], duration);
54 part_round_stats(cpu, &mdev->vdisk->part0);
753c8913 55 part_dec_in_flight(&mdev->vdisk->part0, rw);
b411b363 56 part_stat_unlock();
b411b363
PR
57}
58
9e204cdd
AG
59static struct drbd_request *drbd_req_new(struct drbd_conf *mdev,
60 struct bio *bio_src)
61{
62 struct drbd_request *req;
63
64 req = mempool_alloc(drbd_request_mempool, GFP_NOIO);
65 if (!req)
66 return NULL;
67
68 drbd_req_make_private_bio(req, bio_src);
69 req->rq_state = bio_data_dir(bio_src) == WRITE ? RQ_WRITE : 0;
a21e9298 70 req->w.mdev = mdev;
9e204cdd
AG
71 req->master_bio = bio_src;
72 req->epoch = 0;
53840641 73
9e204cdd
AG
74 drbd_clear_interval(&req->i);
75 req->i.sector = bio_src->bi_sector;
76 req->i.size = bio_src->bi_size;
5e472264 77 req->i.local = true;
53840641
AG
78 req->i.waiting = false;
79
9e204cdd
AG
80 INIT_LIST_HEAD(&req->tl_requests);
81 INIT_LIST_HEAD(&req->w.list);
82
83 return req;
84}
85
86static void drbd_req_free(struct drbd_request *req)
87{
88 mempool_free(req, drbd_request_mempool);
89}
90
91/* rw is bio_data_dir(), only READ or WRITE */
b411b363
PR
92static void _req_is_done(struct drbd_conf *mdev, struct drbd_request *req, const int rw)
93{
94 const unsigned long s = req->rq_state;
288f422e
PR
95
96 /* remove it from the transfer log.
97 * well, only if it had been there in the first
98 * place... if it had not (local only or conflicting
99 * and never sent), it should still be "empty" as
100 * initialized in drbd_req_new(), so we can list_del() it
101 * here unconditionally */
102 list_del(&req->tl_requests);
103
b411b363
PR
104 /* if it was a write, we may have to set the corresponding
105 * bit(s) out-of-sync first. If it had a local part, we need to
106 * release the reference to the activity log. */
107 if (rw == WRITE) {
b411b363
PR
108 /* Set out-of-sync unless both OK flags are set
109 * (local only or remote failed).
110 * Other places where we set out-of-sync:
111 * READ with local io-error */
112 if (!(s & RQ_NET_OK) || !(s & RQ_LOCAL_OK))
ace652ac 113 drbd_set_out_of_sync(mdev, req->i.sector, req->i.size);
b411b363
PR
114
115 if ((s & RQ_NET_OK) && (s & RQ_LOCAL_OK) && (s & RQ_NET_SIS))
ace652ac 116 drbd_set_in_sync(mdev, req->i.sector, req->i.size);
b411b363
PR
117
118 /* one might be tempted to move the drbd_al_complete_io
fcefa62e 119 * to the local io completion callback drbd_request_endio.
b411b363
PR
120 * but, if this was a mirror write, we may only
121 * drbd_al_complete_io after this is RQ_NET_DONE,
122 * otherwise the extent could be dropped from the al
123 * before it has actually been written on the peer.
124 * if we crash before our peer knows about the request,
125 * but after the extent has been dropped from the al,
126 * we would forget to resync the corresponding extent.
127 */
128 if (s & RQ_LOCAL_MASK) {
129 if (get_ldev_if_state(mdev, D_FAILED)) {
0778286a 130 if (s & RQ_IN_ACT_LOG)
ace652ac 131 drbd_al_complete_io(mdev, req->i.sector);
b411b363
PR
132 put_ldev(mdev);
133 } else if (__ratelimit(&drbd_ratelimit_state)) {
134 dev_warn(DEV, "Should have called drbd_al_complete_io(, %llu), "
135 "but my Disk seems to have failed :(\n",
ace652ac 136 (unsigned long long) req->i.sector);
b411b363
PR
137 }
138 }
139 }
140
32fa7e91 141 drbd_req_free(req);
b411b363
PR
142}
143
144static void queue_barrier(struct drbd_conf *mdev)
145{
146 struct drbd_tl_epoch *b;
147
148 /* We are within the req_lock. Once we queued the barrier for sending,
149 * we set the CREATE_BARRIER bit. It is cleared as soon as a new
150 * barrier/epoch object is added. This is the only place this bit is
151 * set. It indicates that the barrier for this epoch is already queued,
152 * and no new epoch has been created yet. */
153 if (test_bit(CREATE_BARRIER, &mdev->flags))
154 return;
155
87eeee41 156 b = mdev->tconn->newest_tle;
b411b363 157 b->w.cb = w_send_barrier;
a21e9298 158 b->w.mdev = mdev;
b411b363
PR
159 /* inc_ap_pending done here, so we won't
160 * get imbalanced on connection loss.
161 * dec_ap_pending will be done in got_BarrierAck
162 * or (on connection loss) in tl_clear. */
163 inc_ap_pending(mdev);
e42325a5 164 drbd_queue_work(&mdev->tconn->data.work, &b->w);
b411b363
PR
165 set_bit(CREATE_BARRIER, &mdev->flags);
166}
167
168static void _about_to_complete_local_write(struct drbd_conf *mdev,
169 struct drbd_request *req)
170{
171 const unsigned long s = req->rq_state;
b411b363 172
8a3c1044
LE
173 /* Before we can signal completion to the upper layers,
174 * we may need to close the current epoch.
175 * We can skip this, if this request has not even been sent, because we
176 * did not have a fully established connection yet/anymore, during
177 * bitmap exchange, or while we are C_AHEAD due to congestion policy.
178 */
179 if (mdev->state.conn >= C_CONNECTED &&
180 (s & RQ_NET_SENT) != 0 &&
87eeee41 181 req->epoch == mdev->tconn->newest_tle->br_number)
b411b363 182 queue_barrier(mdev);
b411b363
PR
183}
184
185void complete_master_bio(struct drbd_conf *mdev,
186 struct bio_and_error *m)
187{
b411b363
PR
188 bio_endio(m->bio, m->error);
189 dec_ap_bio(mdev);
190}
191
53840641
AG
192
193static void drbd_remove_request_interval(struct rb_root *root,
194 struct drbd_request *req)
195{
a21e9298 196 struct drbd_conf *mdev = req->w.mdev;
53840641
AG
197 struct drbd_interval *i = &req->i;
198
199 drbd_remove_interval(root, i);
200
201 /* Wake up any processes waiting for this request to complete. */
202 if (i->waiting)
203 wake_up(&mdev->misc_wait);
204}
205
b411b363
PR
206/* Helper for __req_mod().
207 * Set m->bio to the master bio, if it is fit to be completed,
208 * or leave it alone (it is initialized to NULL in __req_mod),
209 * if it has already been completed, or cannot be completed yet.
210 * If m->bio is set, the error status to be returned is placed in m->error.
211 */
212void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m)
213{
214 const unsigned long s = req->rq_state;
a21e9298 215 struct drbd_conf *mdev = req->w.mdev;
b411b363
PR
216 /* only WRITES may end up here without a master bio (on barrier ack) */
217 int rw = req->master_bio ? bio_data_dir(req->master_bio) : WRITE;
218
b411b363
PR
219 /* we must not complete the master bio, while it is
220 * still being processed by _drbd_send_zc_bio (drbd_send_dblock)
221 * not yet acknowledged by the peer
222 * not yet completed by the local io subsystem
223 * these flags may get cleared in any order by
224 * the worker,
225 * the receiver,
226 * the bio_endio completion callbacks.
227 */
228 if (s & RQ_NET_QUEUED)
229 return;
230 if (s & RQ_NET_PENDING)
231 return;
232 if (s & RQ_LOCAL_PENDING)
233 return;
234
235 if (req->master_bio) {
8554df1c 236 /* this is DATA_RECEIVED (remote read)
b411b363
PR
237 * or protocol C P_WRITE_ACK
238 * or protocol B P_RECV_ACK
8554df1c 239 * or protocol A "HANDED_OVER_TO_NETWORK" (SendAck)
b411b363
PR
240 * or canceled or failed,
241 * or killed from the transfer log due to connection loss.
242 */
243
244 /*
245 * figure out whether to report success or failure.
246 *
247 * report success when at least one of the operations succeeded.
248 * or, to put the other way,
249 * only report failure, when both operations failed.
250 *
251 * what to do about the failures is handled elsewhere.
252 * what we need to do here is just: complete the master_bio.
253 *
254 * local completion error, if any, has been stored as ERR_PTR
fcefa62e 255 * in private_bio within drbd_request_endio.
b411b363
PR
256 */
257 int ok = (s & RQ_LOCAL_OK) || (s & RQ_NET_OK);
258 int error = PTR_ERR(req->private_bio);
259
260 /* remove the request from the conflict detection
261 * respective block_id verification hash */
dac1389c
AG
262 if (!drbd_interval_empty(&req->i)) {
263 struct rb_root *root;
264
dac1389c
AG
265 if (rw == WRITE)
266 root = &mdev->write_requests;
267 else
268 root = &mdev->read_requests;
53840641 269 drbd_remove_request_interval(root, req);
de696716 270 } else
8825f7c3 271 D_ASSERT((s & (RQ_NET_MASK & ~RQ_NET_DONE)) == 0);
b411b363
PR
272
273 /* for writes we need to do some extra housekeeping */
274 if (rw == WRITE)
275 _about_to_complete_local_write(mdev, req);
276
277 /* Update disk stats */
278 _drbd_end_io_acct(mdev, req);
279
280 m->error = ok ? 0 : (error ?: -EIO);
281 m->bio = req->master_bio;
282 req->master_bio = NULL;
283 }
284
285 if ((s & RQ_NET_MASK) == 0 || (s & RQ_NET_DONE)) {
286 /* this is disconnected (local only) operation,
287 * or protocol C P_WRITE_ACK,
288 * or protocol A or B P_BARRIER_ACK,
289 * or killed from the transfer log due to connection loss. */
290 _req_is_done(mdev, req, rw);
291 }
292 /* else: network part and not DONE yet. that is
293 * protocol A or B, barrier ack still pending... */
294}
295
cfa03415
PR
296static void _req_may_be_done_not_susp(struct drbd_request *req, struct bio_and_error *m)
297{
a21e9298 298 struct drbd_conf *mdev = req->w.mdev;
cfa03415 299
fb22c402 300 if (!is_susp(mdev->state))
cfa03415
PR
301 _req_may_be_done(req, m);
302}
303
b411b363
PR
304/* obviously this could be coded as many single functions
305 * instead of one huge switch,
306 * or by putting the code directly in the respective locations
307 * (as it has been before).
308 *
309 * but having it this way
310 * enforces that it is all in this one place, where it is easier to audit,
311 * it makes it obvious that whatever "event" "happens" to a request should
312 * happen "atomically" within the req_lock,
313 * and it enforces that we have to think in a very structured manner
314 * about the "events" that may happen to a request during its life time ...
315 */
2a80699f 316int __req_mod(struct drbd_request *req, enum drbd_req_event what,
b411b363
PR
317 struct bio_and_error *m)
318{
a21e9298 319 struct drbd_conf *mdev = req->w.mdev;
2a80699f 320 int rv = 0;
b411b363
PR
321 m->bio = NULL;
322
b411b363
PR
323 switch (what) {
324 default:
325 dev_err(DEV, "LOGIC BUG in %s:%u\n", __FILE__ , __LINE__);
326 break;
327
328 /* does not happen...
329 * initialization done in drbd_req_new
8554df1c 330 case CREATED:
b411b363
PR
331 break;
332 */
333
8554df1c 334 case TO_BE_SENT: /* via network */
b411b363
PR
335 /* reached via drbd_make_request_common
336 * and from w_read_retry_remote */
337 D_ASSERT(!(req->rq_state & RQ_NET_MASK));
338 req->rq_state |= RQ_NET_PENDING;
339 inc_ap_pending(mdev);
340 break;
341
8554df1c 342 case TO_BE_SUBMITTED: /* locally */
b411b363
PR
343 /* reached via drbd_make_request_common */
344 D_ASSERT(!(req->rq_state & RQ_LOCAL_MASK));
345 req->rq_state |= RQ_LOCAL_PENDING;
346 break;
347
8554df1c 348 case COMPLETED_OK:
b411b363 349 if (bio_data_dir(req->master_bio) == WRITE)
ace652ac 350 mdev->writ_cnt += req->i.size >> 9;
b411b363 351 else
ace652ac 352 mdev->read_cnt += req->i.size >> 9;
b411b363
PR
353
354 req->rq_state |= (RQ_LOCAL_COMPLETED|RQ_LOCAL_OK);
355 req->rq_state &= ~RQ_LOCAL_PENDING;
356
cfa03415 357 _req_may_be_done_not_susp(req, m);
b411b363
PR
358 put_ldev(mdev);
359 break;
360
8554df1c 361 case WRITE_COMPLETED_WITH_ERROR:
b411b363
PR
362 req->rq_state |= RQ_LOCAL_COMPLETED;
363 req->rq_state &= ~RQ_LOCAL_PENDING;
364
81e84650 365 __drbd_chk_io_error(mdev, false);
cfa03415 366 _req_may_be_done_not_susp(req, m);
b411b363
PR
367 put_ldev(mdev);
368 break;
369
8554df1c 370 case READ_AHEAD_COMPLETED_WITH_ERROR:
b411b363
PR
371 /* it is legal to fail READA */
372 req->rq_state |= RQ_LOCAL_COMPLETED;
373 req->rq_state &= ~RQ_LOCAL_PENDING;
cfa03415 374 _req_may_be_done_not_susp(req, m);
b411b363
PR
375 put_ldev(mdev);
376 break;
377
8554df1c 378 case READ_COMPLETED_WITH_ERROR:
ace652ac 379 drbd_set_out_of_sync(mdev, req->i.sector, req->i.size);
b411b363
PR
380
381 req->rq_state |= RQ_LOCAL_COMPLETED;
382 req->rq_state &= ~RQ_LOCAL_PENDING;
383
b411b363 384 D_ASSERT(!(req->rq_state & RQ_NET_MASK));
b411b363 385
81e84650 386 __drbd_chk_io_error(mdev, false);
b411b363 387 put_ldev(mdev);
b411b363 388
d255e5ff
LE
389 /* no point in retrying if there is no good remote data,
390 * or we have no connection. */
391 if (mdev->state.pdsk != D_UP_TO_DATE) {
cfa03415 392 _req_may_be_done_not_susp(req, m);
d255e5ff
LE
393 break;
394 }
395
8554df1c 396 /* _req_mod(req,TO_BE_SENT); oops, recursion... */
d255e5ff
LE
397 req->rq_state |= RQ_NET_PENDING;
398 inc_ap_pending(mdev);
8554df1c 399 /* fall through: _req_mod(req,QUEUE_FOR_NET_READ); */
b411b363 400
8554df1c 401 case QUEUE_FOR_NET_READ:
b411b363
PR
402 /* READ or READA, and
403 * no local disk,
404 * or target area marked as invalid,
405 * or just got an io-error. */
406 /* from drbd_make_request_common
407 * or from bio_endio during read io-error recovery */
408
409 /* so we can verify the handle in the answer packet
410 * corresponding hlist_del is in _req_may_be_done() */
dac1389c 411 drbd_insert_interval(&mdev->read_requests, &req->i);
b411b363 412
83c38830 413 set_bit(UNPLUG_REMOTE, &mdev->flags);
b411b363
PR
414
415 D_ASSERT(req->rq_state & RQ_NET_PENDING);
416 req->rq_state |= RQ_NET_QUEUED;
417 req->w.cb = (req->rq_state & RQ_LOCAL_MASK)
418 ? w_read_retry_remote
419 : w_send_read_req;
e42325a5 420 drbd_queue_work(&mdev->tconn->data.work, &req->w);
b411b363
PR
421 break;
422
8554df1c 423 case QUEUE_FOR_NET_WRITE:
b411b363
PR
424 /* assert something? */
425 /* from drbd_make_request_common only */
426
b411b363 427 /* corresponding hlist_del is in _req_may_be_done() */
de696716 428 drbd_insert_interval(&mdev->write_requests, &req->i);
b411b363
PR
429
430 /* NOTE
431 * In case the req ended up on the transfer log before being
432 * queued on the worker, it could lead to this request being
433 * missed during cleanup after connection loss.
434 * So we have to do both operations here,
435 * within the same lock that protects the transfer log.
436 *
437 * _req_add_to_epoch(req); this has to be after the
438 * _maybe_start_new_epoch(req); which happened in
439 * drbd_make_request_common, because we now may set the bit
440 * again ourselves to close the current epoch.
441 *
442 * Add req to the (now) current epoch (barrier). */
443
83c38830
LE
444 /* otherwise we may lose an unplug, which may cause some remote
445 * io-scheduler timeout to expire, increasing maximum latency,
446 * hurting performance. */
447 set_bit(UNPLUG_REMOTE, &mdev->flags);
448
b411b363
PR
449 /* see drbd_make_request_common,
450 * just after it grabs the req_lock */
451 D_ASSERT(test_bit(CREATE_BARRIER, &mdev->flags) == 0);
452
87eeee41 453 req->epoch = mdev->tconn->newest_tle->br_number;
b411b363
PR
454
455 /* increment size of current epoch */
87eeee41 456 mdev->tconn->newest_tle->n_writes++;
b411b363
PR
457
458 /* queue work item to send data */
459 D_ASSERT(req->rq_state & RQ_NET_PENDING);
460 req->rq_state |= RQ_NET_QUEUED;
461 req->w.cb = w_send_dblock;
e42325a5 462 drbd_queue_work(&mdev->tconn->data.work, &req->w);
b411b363
PR
463
464 /* close the epoch, in case it outgrew the limit */
87eeee41 465 if (mdev->tconn->newest_tle->n_writes >= mdev->tconn->net_conf->max_epoch_size)
b411b363
PR
466 queue_barrier(mdev);
467
468 break;
469
8554df1c 470 case QUEUE_FOR_SEND_OOS:
73a01a18
PR
471 req->rq_state |= RQ_NET_QUEUED;
472 req->w.cb = w_send_oos;
e42325a5 473 drbd_queue_work(&mdev->tconn->data.work, &req->w);
73a01a18
PR
474 break;
475
8554df1c 476 case OOS_HANDED_TO_NETWORK:
73a01a18 477 /* actually the same */
8554df1c 478 case SEND_CANCELED:
b411b363 479 /* treat it the same */
8554df1c 480 case SEND_FAILED:
b411b363
PR
481 /* real cleanup will be done from tl_clear. just update flags
482 * so it is no longer marked as on the worker queue */
483 req->rq_state &= ~RQ_NET_QUEUED;
484 /* if we did it right, tl_clear should be scheduled only after
485 * this, so this should not be necessary! */
cfa03415 486 _req_may_be_done_not_susp(req, m);
b411b363
PR
487 break;
488
8554df1c 489 case HANDED_OVER_TO_NETWORK:
b411b363 490 /* assert something? */
759fbdfb 491 if (bio_data_dir(req->master_bio) == WRITE)
ace652ac 492 atomic_add(req->i.size >> 9, &mdev->ap_in_flight);
759fbdfb 493
b411b363 494 if (bio_data_dir(req->master_bio) == WRITE &&
89e58e75 495 mdev->tconn->net_conf->wire_protocol == DRBD_PROT_A) {
b411b363
PR
496 /* this is what is dangerous about protocol A:
497 * pretend it was successfully written on the peer. */
498 if (req->rq_state & RQ_NET_PENDING) {
499 dec_ap_pending(mdev);
500 req->rq_state &= ~RQ_NET_PENDING;
501 req->rq_state |= RQ_NET_OK;
502 } /* else: neg-ack was faster... */
503 /* it is still not yet RQ_NET_DONE until the
504 * corresponding epoch barrier got acked as well,
505 * so we know what to dirty on connection loss */
506 }
507 req->rq_state &= ~RQ_NET_QUEUED;
508 req->rq_state |= RQ_NET_SENT;
509 /* because _drbd_send_zc_bio could sleep, and may want to
8554df1c
AG
510 * dereference the bio even after the "WRITE_ACKED_BY_PEER" and
511 * "COMPLETED_OK" events came in, once we return from
b411b363
PR
512 * _drbd_send_zc_bio (drbd_send_dblock), we have to check
513 * whether it is done already, and end it. */
cfa03415 514 _req_may_be_done_not_susp(req, m);
b411b363
PR
515 break;
516
8554df1c 517 case READ_RETRY_REMOTE_CANCELED:
d255e5ff
LE
518 req->rq_state &= ~RQ_NET_QUEUED;
519 /* fall through, in case we raced with drbd_disconnect */
8554df1c 520 case CONNECTION_LOST_WHILE_PENDING:
b411b363
PR
521 /* transfer log cleanup after connection loss */
522 /* assert something? */
523 if (req->rq_state & RQ_NET_PENDING)
524 dec_ap_pending(mdev);
525 req->rq_state &= ~(RQ_NET_OK|RQ_NET_PENDING);
526 req->rq_state |= RQ_NET_DONE;
759fbdfb 527 if (req->rq_state & RQ_NET_SENT && req->rq_state & RQ_WRITE)
ace652ac 528 atomic_sub(req->i.size >> 9, &mdev->ap_in_flight);
759fbdfb 529
b411b363
PR
530 /* if it is still queued, we may not complete it here.
531 * it will be canceled soon. */
532 if (!(req->rq_state & RQ_NET_QUEUED))
cfa03415 533 _req_may_be_done(req, m); /* Allowed while state.susp */
b411b363
PR
534 break;
535
8554df1c 536 case WRITE_ACKED_BY_PEER_AND_SIS:
b411b363 537 req->rq_state |= RQ_NET_SIS;
8554df1c 538 case CONFLICT_DISCARDED_BY_PEER:
b411b363
PR
539 /* for discarded conflicting writes of multiple primaries,
540 * there is no need to keep anything in the tl, potential
541 * node crashes are covered by the activity log. */
8554df1c 542 if (what == CONFLICT_DISCARDED_BY_PEER)
b411b363
PR
543 dev_alert(DEV, "Got DiscardAck packet %llus +%u!"
544 " DRBD is not a random data generator!\n",
ace652ac 545 (unsigned long long)req->i.sector, req->i.size);
b411b363
PR
546 req->rq_state |= RQ_NET_DONE;
547 /* fall through */
8554df1c 548 case WRITE_ACKED_BY_PEER:
b411b363
PR
549 /* protocol C; successfully written on peer.
550 * Nothing to do here.
551 * We want to keep the tl in place for all protocols, to cater
552 * for volatile write-back caches on lower level devices.
553 *
554 * A barrier request is expected to have forced all prior
555 * requests onto stable storage, so completion of a barrier
556 * request could set NET_DONE right here, and not wait for the
557 * P_BARRIER_ACK, but that is an unnecessary optimization. */
558
559 /* this makes it effectively the same as for: */
8554df1c 560 case RECV_ACKED_BY_PEER:
b411b363 561 /* protocol B; pretends to be successfully written on peer.
8554df1c 562 * see also notes above in HANDED_OVER_TO_NETWORK about
b411b363
PR
563 * protocol != C */
564 req->rq_state |= RQ_NET_OK;
565 D_ASSERT(req->rq_state & RQ_NET_PENDING);
566 dec_ap_pending(mdev);
ace652ac 567 atomic_sub(req->i.size >> 9, &mdev->ap_in_flight);
b411b363 568 req->rq_state &= ~RQ_NET_PENDING;
cfa03415 569 _req_may_be_done_not_susp(req, m);
b411b363
PR
570 break;
571
8554df1c 572 case NEG_ACKED:
b411b363 573 /* assert something? */
759fbdfb 574 if (req->rq_state & RQ_NET_PENDING) {
b411b363 575 dec_ap_pending(mdev);
ace652ac 576 atomic_sub(req->i.size >> 9, &mdev->ap_in_flight);
759fbdfb 577 }
b411b363
PR
578 req->rq_state &= ~(RQ_NET_OK|RQ_NET_PENDING);
579
580 req->rq_state |= RQ_NET_DONE;
cfa03415 581 _req_may_be_done_not_susp(req, m);
8554df1c 582 /* else: done by HANDED_OVER_TO_NETWORK */
b411b363
PR
583 break;
584
8554df1c 585 case FAIL_FROZEN_DISK_IO:
265be2d0
PR
586 if (!(req->rq_state & RQ_LOCAL_COMPLETED))
587 break;
588
cfa03415 589 _req_may_be_done(req, m); /* Allowed while state.susp */
265be2d0
PR
590 break;
591
8554df1c 592 case RESTART_FROZEN_DISK_IO:
265be2d0
PR
593 if (!(req->rq_state & RQ_LOCAL_COMPLETED))
594 break;
595
596 req->rq_state &= ~RQ_LOCAL_COMPLETED;
597
598 rv = MR_READ;
599 if (bio_data_dir(req->master_bio) == WRITE)
600 rv = MR_WRITE;
601
602 get_ldev(mdev);
603 req->w.cb = w_restart_disk_io;
e42325a5 604 drbd_queue_work(&mdev->tconn->data.work, &req->w);
265be2d0
PR
605 break;
606
8554df1c 607 case RESEND:
11b58e73 608 /* If RQ_NET_OK is already set, we got a P_WRITE_ACK or P_RECV_ACK
47ff2d0a 609 before the connection loss (B&C only); only P_BARRIER_ACK was missing.
11b58e73 610 Trowing them out of the TL here by pretending we got a BARRIER_ACK
481c6f50 611 We ensure that the peer was not rebooted */
11b58e73
PR
612 if (!(req->rq_state & RQ_NET_OK)) {
613 if (req->w.cb) {
e42325a5 614 drbd_queue_work(&mdev->tconn->data.work, &req->w);
11b58e73
PR
615 rv = req->rq_state & RQ_WRITE ? MR_WRITE : MR_READ;
616 }
617 break;
618 }
8554df1c 619 /* else, fall through to BARRIER_ACKED */
11b58e73 620
8554df1c 621 case BARRIER_ACKED:
288f422e
PR
622 if (!(req->rq_state & RQ_WRITE))
623 break;
624
b411b363
PR
625 if (req->rq_state & RQ_NET_PENDING) {
626 /* barrier came in before all requests have been acked.
627 * this is bad, because if the connection is lost now,
628 * we won't be able to clean them up... */
8554df1c 629 dev_err(DEV, "FIXME (BARRIER_ACKED but pending)\n");
87eeee41 630 list_move(&req->tl_requests, &mdev->tconn->out_of_sequence_requests);
b411b363 631 }
e636db5b
LE
632 if ((req->rq_state & RQ_NET_MASK) != 0) {
633 req->rq_state |= RQ_NET_DONE;
89e58e75
PR
634 if (mdev->tconn->net_conf->wire_protocol == DRBD_PROT_A)
635 atomic_sub(req->i.size>>9, &mdev->ap_in_flight);
e636db5b 636 }
cfa03415 637 _req_may_be_done(req, m); /* Allowed while state.susp */
b411b363
PR
638 break;
639
8554df1c 640 case DATA_RECEIVED:
b411b363
PR
641 D_ASSERT(req->rq_state & RQ_NET_PENDING);
642 dec_ap_pending(mdev);
643 req->rq_state &= ~RQ_NET_PENDING;
644 req->rq_state |= (RQ_NET_OK|RQ_NET_DONE);
cfa03415 645 _req_may_be_done_not_susp(req, m);
b411b363
PR
646 break;
647 };
2a80699f
PR
648
649 return rv;
b411b363
PR
650}
651
652/* we may do a local read if:
653 * - we are consistent (of course),
654 * - or we are generally inconsistent,
655 * BUT we are still/already IN SYNC for this area.
656 * since size may be bigger than BM_BLOCK_SIZE,
657 * we may need to check several bits.
658 */
659static int drbd_may_do_local_read(struct drbd_conf *mdev, sector_t sector, int size)
660{
661 unsigned long sbnr, ebnr;
662 sector_t esector, nr_sectors;
663
664 if (mdev->state.disk == D_UP_TO_DATE)
665 return 1;
8c387def 666 if (mdev->state.disk != D_INCONSISTENT)
b411b363 667 return 0;
b411b363
PR
668 esector = sector + (size >> 9) - 1;
669
8ca9844f 670 nr_sectors = drbd_get_capacity(mdev->this_bdev);
b411b363
PR
671 D_ASSERT(sector < nr_sectors);
672 D_ASSERT(esector < nr_sectors);
673
674 sbnr = BM_SECT_TO_BIT(sector);
675 ebnr = BM_SECT_TO_BIT(esector);
676
677 return 0 == drbd_bm_count_bits(mdev, sbnr, ebnr);
678}
679
6024fece
AG
680/*
681 * complete_conflicting_writes - wait for any conflicting write requests
682 *
683 * The write_requests tree contains all active write requests which we
684 * currently know about. Wait for any requests to complete which conflict with
685 * the new one.
686 */
687static int complete_conflicting_writes(struct drbd_conf *mdev,
688 sector_t sector, int size)
689{
690 for(;;) {
691 DEFINE_WAIT(wait);
692 struct drbd_interval *i;
693
694 i = drbd_find_overlap(&mdev->write_requests, sector, size);
695 if (!i)
696 return 0;
697 i->waiting = true;
698 prepare_to_wait(&mdev->misc_wait, &wait, TASK_INTERRUPTIBLE);
699 spin_unlock_irq(&mdev->tconn->req_lock);
700 schedule();
701 finish_wait(&mdev->misc_wait, &wait);
702 spin_lock_irq(&mdev->tconn->req_lock);
703 if (signal_pending(current))
704 return -ERESTARTSYS;
705 }
706}
707
aeda1cd6 708static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio, unsigned long start_time)
b411b363
PR
709{
710 const int rw = bio_rw(bio);
711 const int size = bio->bi_size;
712 const sector_t sector = bio->bi_sector;
713 struct drbd_tl_epoch *b = NULL;
714 struct drbd_request *req;
73a01a18 715 int local, remote, send_oos = 0;
6024fece 716 int err;
9a25a04c 717 int ret = 0;
b411b363
PR
718
719 /* allocate outside of all locks; */
720 req = drbd_req_new(mdev, bio);
721 if (!req) {
722 dec_ap_bio(mdev);
723 /* only pass the error to the upper layers.
724 * if user cannot handle io errors, that's not our business. */
725 dev_err(DEV, "could not kmalloc() req\n");
726 bio_endio(bio, -ENOMEM);
727 return 0;
728 }
aeda1cd6 729 req->start_time = start_time;
b411b363 730
b411b363
PR
731 local = get_ldev(mdev);
732 if (!local) {
733 bio_put(req->private_bio); /* or we get a bio leak */
734 req->private_bio = NULL;
735 }
736 if (rw == WRITE) {
737 remote = 1;
738 } else {
739 /* READ || READA */
740 if (local) {
741 if (!drbd_may_do_local_read(mdev, sector, size)) {
742 /* we could kick the syncer to
743 * sync this extent asap, wait for
744 * it, then continue locally.
745 * Or just issue the request remotely.
746 */
747 local = 0;
748 bio_put(req->private_bio);
749 req->private_bio = NULL;
750 put_ldev(mdev);
751 }
752 }
753 remote = !local && mdev->state.pdsk >= D_UP_TO_DATE;
754 }
755
756 /* If we have a disk, but a READA request is mapped to remote,
757 * we are R_PRIMARY, D_INCONSISTENT, SyncTarget.
758 * Just fail that READA request right here.
759 *
760 * THINK: maybe fail all READA when not local?
761 * or make this configurable...
762 * if network is slow, READA won't do any good.
763 */
764 if (rw == READA && mdev->state.disk >= D_INCONSISTENT && !local) {
765 err = -EWOULDBLOCK;
766 goto fail_and_free_req;
767 }
768
769 /* For WRITES going to the local disk, grab a reference on the target
770 * extent. This waits for any resync activity in the corresponding
771 * resync extent to finish, and, if necessary, pulls in the target
772 * extent into the activity log, which involves further disk io because
773 * of transactional on-disk meta data updates. */
0778286a
PR
774 if (rw == WRITE && local && !test_bit(AL_SUSPENDED, &mdev->flags)) {
775 req->rq_state |= RQ_IN_ACT_LOG;
b411b363 776 drbd_al_begin_io(mdev, sector);
0778286a 777 }
b411b363 778
6a35c45f
PR
779 remote = remote && drbd_should_do_remote(mdev->state);
780 send_oos = rw == WRITE && drbd_should_send_oos(mdev->state);
3719094e 781 D_ASSERT(!(remote && send_oos));
b411b363 782
fb22c402 783 if (!(local || remote) && !is_susp(mdev->state)) {
fb2c7a10
LE
784 if (__ratelimit(&drbd_ratelimit_state))
785 dev_err(DEV, "IO ERROR: neither local nor remote disk\n");
6024fece 786 err = -EIO;
b411b363
PR
787 goto fail_free_complete;
788 }
789
790 /* For WRITE request, we have to make sure that we have an
791 * unused_spare_tle, in case we need to start a new epoch.
792 * I try to be smart and avoid to pre-allocate always "just in case",
793 * but there is a race between testing the bit and pointer outside the
794 * spinlock, and grabbing the spinlock.
795 * if we lost that race, we retry. */
73a01a18 796 if (rw == WRITE && (remote || send_oos) &&
87eeee41 797 mdev->tconn->unused_spare_tle == NULL &&
b411b363
PR
798 test_bit(CREATE_BARRIER, &mdev->flags)) {
799allocate_barrier:
800 b = kmalloc(sizeof(struct drbd_tl_epoch), GFP_NOIO);
801 if (!b) {
802 dev_err(DEV, "Failed to alloc barrier.\n");
803 err = -ENOMEM;
804 goto fail_free_complete;
805 }
806 }
807
808 /* GOOD, everything prepared, grab the spin_lock */
87eeee41 809 spin_lock_irq(&mdev->tconn->req_lock);
b411b363 810
6024fece
AG
811 if (rw == WRITE) {
812 err = complete_conflicting_writes(mdev, sector, size);
813 if (err) {
814 spin_unlock_irq(&mdev->tconn->req_lock);
815 goto fail_free_complete;
816 }
817 }
818
fb22c402 819 if (is_susp(mdev->state)) {
9a25a04c
PR
820 /* If we got suspended, use the retry mechanism of
821 generic_make_request() to restart processing of this
2f58dcfc 822 bio. In the next call to drbd_make_request
9a25a04c
PR
823 we sleep in inc_ap_bio() */
824 ret = 1;
87eeee41 825 spin_unlock_irq(&mdev->tconn->req_lock);
9a25a04c
PR
826 goto fail_free_complete;
827 }
828
73a01a18 829 if (remote || send_oos) {
6a35c45f
PR
830 remote = drbd_should_do_remote(mdev->state);
831 send_oos = rw == WRITE && drbd_should_send_oos(mdev->state);
3719094e 832 D_ASSERT(!(remote && send_oos));
73a01a18
PR
833
834 if (!(remote || send_oos))
b411b363
PR
835 dev_warn(DEV, "lost connection while grabbing the req_lock!\n");
836 if (!(local || remote)) {
837 dev_err(DEV, "IO ERROR: neither local nor remote disk\n");
87eeee41 838 spin_unlock_irq(&mdev->tconn->req_lock);
6024fece 839 err = -EIO;
b411b363
PR
840 goto fail_free_complete;
841 }
842 }
843
87eeee41
PR
844 if (b && mdev->tconn->unused_spare_tle == NULL) {
845 mdev->tconn->unused_spare_tle = b;
b411b363
PR
846 b = NULL;
847 }
73a01a18 848 if (rw == WRITE && (remote || send_oos) &&
87eeee41 849 mdev->tconn->unused_spare_tle == NULL &&
b411b363
PR
850 test_bit(CREATE_BARRIER, &mdev->flags)) {
851 /* someone closed the current epoch
852 * while we were grabbing the spinlock */
87eeee41 853 spin_unlock_irq(&mdev->tconn->req_lock);
b411b363
PR
854 goto allocate_barrier;
855 }
856
857
858 /* Update disk stats */
859 _drbd_start_io_acct(mdev, req, bio);
860
861 /* _maybe_start_new_epoch(mdev);
862 * If we need to generate a write barrier packet, we have to add the
863 * new epoch (barrier) object, and queue the barrier packet for sending,
864 * and queue the req's data after it _within the same lock_, otherwise
865 * we have race conditions were the reorder domains could be mixed up.
866 *
867 * Even read requests may start a new epoch and queue the corresponding
868 * barrier packet. To get the write ordering right, we only have to
869 * make sure that, if this is a write request and it triggered a
870 * barrier packet, this request is queued within the same spinlock. */
87eeee41 871 if ((remote || send_oos) && mdev->tconn->unused_spare_tle &&
b411b363 872 test_and_clear_bit(CREATE_BARRIER, &mdev->flags)) {
87eeee41
PR
873 _tl_add_barrier(mdev, mdev->tconn->unused_spare_tle);
874 mdev->tconn->unused_spare_tle = NULL;
b411b363
PR
875 } else {
876 D_ASSERT(!(remote && rw == WRITE &&
877 test_bit(CREATE_BARRIER, &mdev->flags)));
878 }
879
880 /* NOTE
881 * Actually, 'local' may be wrong here already, since we may have failed
882 * to write to the meta data, and may become wrong anytime because of
883 * local io-error for some other request, which would lead to us
884 * "detaching" the local disk.
885 *
886 * 'remote' may become wrong any time because the network could fail.
887 *
888 * This is a harmless race condition, though, since it is handled
889 * correctly at the appropriate places; so it just defers the failure
890 * of the respective operation.
891 */
892
893 /* mark them early for readability.
894 * this just sets some state flags. */
895 if (remote)
8554df1c 896 _req_mod(req, TO_BE_SENT);
b411b363 897 if (local)
8554df1c 898 _req_mod(req, TO_BE_SUBMITTED);
b411b363 899
87eeee41 900 list_add_tail(&req->tl_requests, &mdev->tconn->newest_tle->requests);
288f422e 901
b411b363
PR
902 /* NOTE remote first: to get the concurrent write detection right,
903 * we must register the request before start of local IO. */
904 if (remote) {
905 /* either WRITE and C_CONNECTED,
906 * or READ, and no local disk,
907 * or READ, but not in sync.
908 */
909 _req_mod(req, (rw == WRITE)
8554df1c
AG
910 ? QUEUE_FOR_NET_WRITE
911 : QUEUE_FOR_NET_READ);
b411b363 912 }
73a01a18 913 if (send_oos && drbd_set_out_of_sync(mdev, sector, size))
8554df1c 914 _req_mod(req, QUEUE_FOR_SEND_OOS);
67531718 915
73a01a18 916 if (remote &&
31890f4a 917 mdev->tconn->net_conf->on_congestion != OC_BLOCK && mdev->tconn->agreed_pro_version >= 96) {
67531718
PR
918 int congested = 0;
919
89e58e75
PR
920 if (mdev->tconn->net_conf->cong_fill &&
921 atomic_read(&mdev->ap_in_flight) >= mdev->tconn->net_conf->cong_fill) {
67531718
PR
922 dev_info(DEV, "Congestion-fill threshold reached\n");
923 congested = 1;
924 }
925
89e58e75 926 if (mdev->act_log->used >= mdev->tconn->net_conf->cong_extents) {
67531718
PR
927 dev_info(DEV, "Congestion-extents threshold reached\n");
928 congested = 1;
929 }
930
71c78cfb 931 if (congested) {
039312b6 932 queue_barrier(mdev); /* last barrier, after mirrored writes */
73a01a18 933
89e58e75 934 if (mdev->tconn->net_conf->on_congestion == OC_PULL_AHEAD)
67531718 935 _drbd_set_state(_NS(mdev, conn, C_AHEAD), 0, NULL);
89e58e75 936 else /*mdev->tconn->net_conf->on_congestion == OC_DISCONNECT */
67531718
PR
937 _drbd_set_state(_NS(mdev, conn, C_DISCONNECTING), 0, NULL);
938 }
939 }
940
87eeee41 941 spin_unlock_irq(&mdev->tconn->req_lock);
b411b363
PR
942 kfree(b); /* if someone else has beaten us to it... */
943
944 if (local) {
945 req->private_bio->bi_bdev = mdev->ldev->backing_bdev;
946
6719fb03
LE
947 /* State may have changed since we grabbed our reference on the
948 * mdev->ldev member. Double check, and short-circuit to endio.
949 * In case the last activity log transaction failed to get on
950 * stable storage, and this is a WRITE, we may not even submit
951 * this bio. */
952 if (get_ldev(mdev)) {
0cf9d27e
AG
953 if (drbd_insert_fault(mdev, rw == WRITE ? DRBD_FAULT_DT_WR
954 : rw == READ ? DRBD_FAULT_DT_RD
955 : DRBD_FAULT_DT_RA))
6719fb03
LE
956 bio_endio(req->private_bio, -EIO);
957 else
958 generic_make_request(req->private_bio);
959 put_ldev(mdev);
960 } else
b411b363 961 bio_endio(req->private_bio, -EIO);
b411b363
PR
962 }
963
b411b363
PR
964 return 0;
965
966fail_free_complete:
76727f68 967 if (req->rq_state & RQ_IN_ACT_LOG)
b411b363
PR
968 drbd_al_complete_io(mdev, sector);
969fail_and_free_req:
970 if (local) {
971 bio_put(req->private_bio);
972 req->private_bio = NULL;
973 put_ldev(mdev);
974 }
9a25a04c
PR
975 if (!ret)
976 bio_endio(bio, err);
977
b411b363
PR
978 drbd_req_free(req);
979 dec_ap_bio(mdev);
980 kfree(b);
981
9a25a04c 982 return ret;
b411b363
PR
983}
984
985/* helper function for drbd_make_request
986 * if we can determine just by the mdev (state) that this request will fail,
987 * return 1
988 * otherwise return 0
989 */
990static int drbd_fail_request_early(struct drbd_conf *mdev, int is_write)
991{
b411b363
PR
992 if (mdev->state.role != R_PRIMARY &&
993 (!allow_oos || is_write)) {
994 if (__ratelimit(&drbd_ratelimit_state)) {
995 dev_err(DEV, "Process %s[%u] tried to %s; "
996 "since we are not in Primary state, "
997 "we cannot allow this\n",
998 current->comm, current->pid,
999 is_write ? "WRITE" : "READ");
1000 }
1001 return 1;
1002 }
1003
b411b363
PR
1004 return 0;
1005}
1006
2f58dcfc 1007int drbd_make_request(struct request_queue *q, struct bio *bio)
b411b363
PR
1008{
1009 unsigned int s_enr, e_enr;
1010 struct drbd_conf *mdev = (struct drbd_conf *) q->queuedata;
aeda1cd6 1011 unsigned long start_time;
b411b363
PR
1012
1013 if (drbd_fail_request_early(mdev, bio_data_dir(bio) & WRITE)) {
1014 bio_endio(bio, -EPERM);
1015 return 0;
1016 }
1017
aeda1cd6
PR
1018 start_time = jiffies;
1019
b411b363
PR
1020 /*
1021 * what we "blindly" assume:
1022 */
1023 D_ASSERT(bio->bi_size > 0);
c670a398 1024 D_ASSERT(IS_ALIGNED(bio->bi_size, 512));
b411b363
PR
1025 D_ASSERT(bio->bi_idx == 0);
1026
1027 /* to make some things easier, force alignment of requests within the
1028 * granularity of our hash tables */
1029 s_enr = bio->bi_sector >> HT_SHIFT;
1030 e_enr = (bio->bi_sector+(bio->bi_size>>9)-1) >> HT_SHIFT;
1031
1032 if (likely(s_enr == e_enr)) {
1033 inc_ap_bio(mdev, 1);
aeda1cd6 1034 return drbd_make_request_common(mdev, bio, start_time);
b411b363
PR
1035 }
1036
1037 /* can this bio be split generically?
1038 * Maybe add our own split-arbitrary-bios function. */
1816a2b4 1039 if (bio->bi_vcnt != 1 || bio->bi_idx != 0 || bio->bi_size > DRBD_MAX_BIO_SIZE) {
b411b363
PR
1040 /* rather error out here than BUG in bio_split */
1041 dev_err(DEV, "bio would need to, but cannot, be split: "
1042 "(vcnt=%u,idx=%u,size=%u,sector=%llu)\n",
1043 bio->bi_vcnt, bio->bi_idx, bio->bi_size,
1044 (unsigned long long)bio->bi_sector);
1045 bio_endio(bio, -EINVAL);
1046 } else {
1047 /* This bio crosses some boundary, so we have to split it. */
1048 struct bio_pair *bp;
1049 /* works for the "do not cross hash slot boundaries" case
1050 * e.g. sector 262269, size 4096
1051 * s_enr = 262269 >> 6 = 4097
1052 * e_enr = (262269+8-1) >> 6 = 4098
1053 * HT_SHIFT = 6
1054 * sps = 64, mask = 63
1055 * first_sectors = 64 - (262269 & 63) = 3
1056 */
1057 const sector_t sect = bio->bi_sector;
1058 const int sps = 1 << HT_SHIFT; /* sectors per slot */
1059 const int mask = sps - 1;
1060 const sector_t first_sectors = sps - (sect & mask);
03567812 1061 bp = bio_split(bio, first_sectors);
b411b363
PR
1062
1063 /* we need to get a "reference count" (ap_bio_cnt)
1064 * to avoid races with the disconnect/reconnect/suspend code.
9a25a04c 1065 * In case we need to split the bio here, we need to get three references
b411b363
PR
1066 * atomically, otherwise we might deadlock when trying to submit the
1067 * second one! */
9a25a04c 1068 inc_ap_bio(mdev, 3);
b411b363
PR
1069
1070 D_ASSERT(e_enr == s_enr + 1);
1071
aeda1cd6 1072 while (drbd_make_request_common(mdev, &bp->bio1, start_time))
9a25a04c
PR
1073 inc_ap_bio(mdev, 1);
1074
aeda1cd6 1075 while (drbd_make_request_common(mdev, &bp->bio2, start_time))
9a25a04c
PR
1076 inc_ap_bio(mdev, 1);
1077
1078 dec_ap_bio(mdev);
1079
b411b363
PR
1080 bio_pair_release(bp);
1081 }
1082 return 0;
1083}
1084
1085/* This is called by bio_add_page(). With this function we reduce
1816a2b4 1086 * the number of BIOs that span over multiple DRBD_MAX_BIO_SIZEs
b411b363
PR
1087 * units (was AL_EXTENTs).
1088 *
1089 * we do the calculation within the lower 32bit of the byte offsets,
1090 * since we don't care for actual offset, but only check whether it
1091 * would cross "activity log extent" boundaries.
1092 *
1093 * As long as the BIO is empty we have to allow at least one bvec,
1094 * regardless of size and offset. so the resulting bio may still
1095 * cross extent boundaries. those are dealt with (bio_split) in
2f58dcfc 1096 * drbd_make_request.
b411b363
PR
1097 */
1098int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct bio_vec *bvec)
1099{
1100 struct drbd_conf *mdev = (struct drbd_conf *) q->queuedata;
1101 unsigned int bio_offset =
1102 (unsigned int)bvm->bi_sector << 9; /* 32 bit */
1103 unsigned int bio_size = bvm->bi_size;
1104 int limit, backing_limit;
1105
1816a2b4
LE
1106 limit = DRBD_MAX_BIO_SIZE
1107 - ((bio_offset & (DRBD_MAX_BIO_SIZE-1)) + bio_size);
b411b363
PR
1108 if (limit < 0)
1109 limit = 0;
1110 if (bio_size == 0) {
1111 if (limit <= bvec->bv_len)
1112 limit = bvec->bv_len;
1113 } else if (limit && get_ldev(mdev)) {
1114 struct request_queue * const b =
1115 mdev->ldev->backing_bdev->bd_disk->queue;
a1c88d0d 1116 if (b->merge_bvec_fn) {
b411b363
PR
1117 backing_limit = b->merge_bvec_fn(b, bvm, bvec);
1118 limit = min(limit, backing_limit);
1119 }
1120 put_ldev(mdev);
1121 }
1122 return limit;
1123}
7fde2be9
PR
1124
1125void request_timer_fn(unsigned long data)
1126{
1127 struct drbd_conf *mdev = (struct drbd_conf *) data;
1128 struct drbd_request *req; /* oldest request */
1129 struct list_head *le;
1130 unsigned long et = 0; /* effective timeout = ko_count * timeout */
1131
b2fb6dbe 1132 if (get_net_conf(mdev->tconn)) {
89e58e75 1133 et = mdev->tconn->net_conf->timeout*HZ/10 * mdev->tconn->net_conf->ko_count;
b2fb6dbe 1134 put_net_conf(mdev->tconn);
7fde2be9
PR
1135 }
1136 if (!et || mdev->state.conn < C_WF_REPORT_PARAMS)
1137 return; /* Recurring timer stopped */
1138
87eeee41
PR
1139 spin_lock_irq(&mdev->tconn->req_lock);
1140 le = &mdev->tconn->oldest_tle->requests;
7fde2be9 1141 if (list_empty(le)) {
87eeee41 1142 spin_unlock_irq(&mdev->tconn->req_lock);
7fde2be9
PR
1143 mod_timer(&mdev->request_timer, jiffies + et);
1144 return;
1145 }
1146
1147 le = le->prev;
1148 req = list_entry(le, struct drbd_request, tl_requests);
1149 if (time_is_before_eq_jiffies(req->start_time + et)) {
1150 if (req->rq_state & RQ_NET_PENDING) {
1151 dev_warn(DEV, "Remote failed to finish a request within ko-count * timeout\n");
1152 _drbd_set_state(_NS(mdev, conn, C_TIMEOUT), CS_VERBOSE, NULL);
1153 } else {
1154 dev_warn(DEV, "Local backing block device frozen?\n");
1155 mod_timer(&mdev->request_timer, jiffies + et);
1156 }
1157 } else {
1158 mod_timer(&mdev->request_timer, req->start_time + et);
1159 }
1160
87eeee41 1161 spin_unlock_irq(&mdev->tconn->req_lock);
7fde2be9 1162}