]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - drivers/block/drbd/drbd_receiver.c
drbd: only wakeup if something changed in update_peer_seq
[mirror_ubuntu-artful-kernel.git] / drivers / block / drbd / drbd_receiver.c
CommitLineData
b411b363
PR
1/*
2 drbd_receiver.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23 */
24
25
b411b363
PR
26#include <linux/module.h>
27
28#include <asm/uaccess.h>
29#include <net/sock.h>
30
b411b363
PR
31#include <linux/drbd.h>
32#include <linux/fs.h>
33#include <linux/file.h>
34#include <linux/in.h>
35#include <linux/mm.h>
36#include <linux/memcontrol.h>
37#include <linux/mm_inline.h>
38#include <linux/slab.h>
b411b363
PR
39#include <linux/pkt_sched.h>
40#define __KERNEL_SYSCALLS__
41#include <linux/unistd.h>
42#include <linux/vmalloc.h>
43#include <linux/random.h>
b411b363
PR
44#include <linux/string.h>
45#include <linux/scatterlist.h>
46#include "drbd_int.h"
b411b363
PR
47#include "drbd_req.h"
48
49#include "drbd_vli.h"
50
77351055
PR
51struct packet_info {
52 enum drbd_packet cmd;
53 int size;
54 int vnr;
55};
56
b411b363
PR
57enum finish_epoch {
58 FE_STILL_LIVE,
59 FE_DESTROYED,
60 FE_RECYCLED,
61};
62
65d11ed6 63static int drbd_do_handshake(struct drbd_tconn *tconn);
13e6037d 64static int drbd_do_auth(struct drbd_tconn *tconn);
360cc740 65static int drbd_disconnected(int vnr, void *p, void *data);
b411b363
PR
66
67static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *, struct drbd_epoch *, enum epoch_event);
00d56944 68static int e_end_block(struct drbd_work *, int);
b411b363 69
b411b363
PR
70
71#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
72
45bb912b
LE
73/*
74 * some helper functions to deal with single linked page lists,
75 * page->private being our "next" pointer.
76 */
77
78/* If at least n pages are linked at head, get n pages off.
79 * Otherwise, don't modify head, and return NULL.
80 * Locking is the responsibility of the caller.
81 */
82static struct page *page_chain_del(struct page **head, int n)
83{
84 struct page *page;
85 struct page *tmp;
86
87 BUG_ON(!n);
88 BUG_ON(!head);
89
90 page = *head;
23ce4227
PR
91
92 if (!page)
93 return NULL;
94
45bb912b
LE
95 while (page) {
96 tmp = page_chain_next(page);
97 if (--n == 0)
98 break; /* found sufficient pages */
99 if (tmp == NULL)
100 /* insufficient pages, don't use any of them. */
101 return NULL;
102 page = tmp;
103 }
104
105 /* add end of list marker for the returned list */
106 set_page_private(page, 0);
107 /* actual return value, and adjustment of head */
108 page = *head;
109 *head = tmp;
110 return page;
111}
112
113/* may be used outside of locks to find the tail of a (usually short)
114 * "private" page chain, before adding it back to a global chain head
115 * with page_chain_add() under a spinlock. */
116static struct page *page_chain_tail(struct page *page, int *len)
117{
118 struct page *tmp;
119 int i = 1;
120 while ((tmp = page_chain_next(page)))
121 ++i, page = tmp;
122 if (len)
123 *len = i;
124 return page;
125}
126
127static int page_chain_free(struct page *page)
128{
129 struct page *tmp;
130 int i = 0;
131 page_chain_for_each_safe(page, tmp) {
132 put_page(page);
133 ++i;
134 }
135 return i;
136}
137
138static void page_chain_add(struct page **head,
139 struct page *chain_first, struct page *chain_last)
140{
141#if 1
142 struct page *tmp;
143 tmp = page_chain_tail(chain_first, NULL);
144 BUG_ON(tmp != chain_last);
145#endif
146
147 /* add chain to head */
148 set_page_private(chain_last, (unsigned long)*head);
149 *head = chain_first;
150}
151
152static struct page *drbd_pp_first_pages_or_try_alloc(struct drbd_conf *mdev, int number)
b411b363
PR
153{
154 struct page *page = NULL;
45bb912b
LE
155 struct page *tmp = NULL;
156 int i = 0;
b411b363
PR
157
158 /* Yes, testing drbd_pp_vacant outside the lock is racy.
159 * So what. It saves a spin_lock. */
45bb912b 160 if (drbd_pp_vacant >= number) {
b411b363 161 spin_lock(&drbd_pp_lock);
45bb912b
LE
162 page = page_chain_del(&drbd_pp_pool, number);
163 if (page)
164 drbd_pp_vacant -= number;
b411b363 165 spin_unlock(&drbd_pp_lock);
45bb912b
LE
166 if (page)
167 return page;
b411b363 168 }
45bb912b 169
b411b363
PR
170 /* GFP_TRY, because we must not cause arbitrary write-out: in a DRBD
171 * "criss-cross" setup, that might cause write-out on some other DRBD,
172 * which in turn might block on the other node at this very place. */
45bb912b
LE
173 for (i = 0; i < number; i++) {
174 tmp = alloc_page(GFP_TRY);
175 if (!tmp)
176 break;
177 set_page_private(tmp, (unsigned long)page);
178 page = tmp;
179 }
180
181 if (i == number)
182 return page;
183
184 /* Not enough pages immediately available this time.
185 * No need to jump around here, drbd_pp_alloc will retry this
186 * function "soon". */
187 if (page) {
188 tmp = page_chain_tail(page, NULL);
189 spin_lock(&drbd_pp_lock);
190 page_chain_add(&drbd_pp_pool, page, tmp);
191 drbd_pp_vacant += i;
192 spin_unlock(&drbd_pp_lock);
193 }
194 return NULL;
b411b363
PR
195}
196
b411b363
PR
197static void reclaim_net_ee(struct drbd_conf *mdev, struct list_head *to_be_freed)
198{
db830c46 199 struct drbd_peer_request *peer_req;
b411b363
PR
200 struct list_head *le, *tle;
201
202 /* The EEs are always appended to the end of the list. Since
203 they are sent in order over the wire, they have to finish
204 in order. As soon as we see the first not finished we can
205 stop to examine the list... */
206
207 list_for_each_safe(le, tle, &mdev->net_ee) {
db830c46
AG
208 peer_req = list_entry(le, struct drbd_peer_request, w.list);
209 if (drbd_ee_has_active_page(peer_req))
b411b363
PR
210 break;
211 list_move(le, to_be_freed);
212 }
213}
214
215static void drbd_kick_lo_and_reclaim_net(struct drbd_conf *mdev)
216{
217 LIST_HEAD(reclaimed);
db830c46 218 struct drbd_peer_request *peer_req, *t;
b411b363 219
87eeee41 220 spin_lock_irq(&mdev->tconn->req_lock);
b411b363 221 reclaim_net_ee(mdev, &reclaimed);
87eeee41 222 spin_unlock_irq(&mdev->tconn->req_lock);
b411b363 223
db830c46
AG
224 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
225 drbd_free_net_ee(mdev, peer_req);
b411b363
PR
226}
227
228/**
45bb912b 229 * drbd_pp_alloc() - Returns @number pages, retries forever (or until signalled)
b411b363 230 * @mdev: DRBD device.
45bb912b
LE
231 * @number: number of pages requested
232 * @retry: whether to retry, if not enough pages are available right now
233 *
234 * Tries to allocate number pages, first from our own page pool, then from
235 * the kernel, unless this allocation would exceed the max_buffers setting.
236 * Possibly retry until DRBD frees sufficient pages somewhere else.
b411b363 237 *
45bb912b 238 * Returns a page chain linked via page->private.
b411b363 239 */
45bb912b 240static struct page *drbd_pp_alloc(struct drbd_conf *mdev, unsigned number, bool retry)
b411b363
PR
241{
242 struct page *page = NULL;
243 DEFINE_WAIT(wait);
244
45bb912b
LE
245 /* Yes, we may run up to @number over max_buffers. If we
246 * follow it strictly, the admin will get it wrong anyways. */
89e58e75 247 if (atomic_read(&mdev->pp_in_use) < mdev->tconn->net_conf->max_buffers)
45bb912b 248 page = drbd_pp_first_pages_or_try_alloc(mdev, number);
b411b363 249
45bb912b 250 while (page == NULL) {
b411b363
PR
251 prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE);
252
253 drbd_kick_lo_and_reclaim_net(mdev);
254
89e58e75 255 if (atomic_read(&mdev->pp_in_use) < mdev->tconn->net_conf->max_buffers) {
45bb912b 256 page = drbd_pp_first_pages_or_try_alloc(mdev, number);
b411b363
PR
257 if (page)
258 break;
259 }
260
261 if (!retry)
262 break;
263
264 if (signal_pending(current)) {
265 dev_warn(DEV, "drbd_pp_alloc interrupted!\n");
266 break;
267 }
268
269 schedule();
270 }
271 finish_wait(&drbd_pp_wait, &wait);
272
45bb912b
LE
273 if (page)
274 atomic_add(number, &mdev->pp_in_use);
b411b363
PR
275 return page;
276}
277
278/* Must not be used from irq, as that may deadlock: see drbd_pp_alloc.
87eeee41 279 * Is also used from inside an other spin_lock_irq(&mdev->tconn->req_lock);
45bb912b
LE
280 * Either links the page chain back to the global pool,
281 * or returns all pages to the system. */
435f0740 282static void drbd_pp_free(struct drbd_conf *mdev, struct page *page, int is_net)
b411b363 283{
435f0740 284 atomic_t *a = is_net ? &mdev->pp_in_use_by_net : &mdev->pp_in_use;
b411b363 285 int i;
435f0740 286
81a5d60e 287 if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * minor_count)
45bb912b
LE
288 i = page_chain_free(page);
289 else {
290 struct page *tmp;
291 tmp = page_chain_tail(page, &i);
292 spin_lock(&drbd_pp_lock);
293 page_chain_add(&drbd_pp_pool, page, tmp);
294 drbd_pp_vacant += i;
295 spin_unlock(&drbd_pp_lock);
b411b363 296 }
435f0740 297 i = atomic_sub_return(i, a);
45bb912b 298 if (i < 0)
435f0740
LE
299 dev_warn(DEV, "ASSERTION FAILED: %s: %d < 0\n",
300 is_net ? "pp_in_use_by_net" : "pp_in_use", i);
b411b363
PR
301 wake_up(&drbd_pp_wait);
302}
303
304/*
305You need to hold the req_lock:
306 _drbd_wait_ee_list_empty()
307
308You must not have the req_lock:
309 drbd_free_ee()
310 drbd_alloc_ee()
311 drbd_init_ee()
312 drbd_release_ee()
313 drbd_ee_fix_bhs()
314 drbd_process_done_ee()
315 drbd_clear_done_ee()
316 drbd_wait_ee_list_empty()
317*/
318
f6ffca9f
AG
319struct drbd_peer_request *
320drbd_alloc_ee(struct drbd_conf *mdev, u64 id, sector_t sector,
321 unsigned int data_size, gfp_t gfp_mask) __must_hold(local)
b411b363 322{
db830c46 323 struct drbd_peer_request *peer_req;
b411b363 324 struct page *page;
45bb912b 325 unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
b411b363 326
0cf9d27e 327 if (drbd_insert_fault(mdev, DRBD_FAULT_AL_EE))
b411b363
PR
328 return NULL;
329
db830c46
AG
330 peer_req = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM);
331 if (!peer_req) {
b411b363
PR
332 if (!(gfp_mask & __GFP_NOWARN))
333 dev_err(DEV, "alloc_ee: Allocation of an EE failed\n");
334 return NULL;
335 }
336
45bb912b
LE
337 page = drbd_pp_alloc(mdev, nr_pages, (gfp_mask & __GFP_WAIT));
338 if (!page)
339 goto fail;
b411b363 340
db830c46
AG
341 drbd_clear_interval(&peer_req->i);
342 peer_req->i.size = data_size;
343 peer_req->i.sector = sector;
344 peer_req->i.local = false;
345 peer_req->i.waiting = false;
346
347 peer_req->epoch = NULL;
a21e9298 348 peer_req->w.mdev = mdev;
db830c46
AG
349 peer_req->pages = page;
350 atomic_set(&peer_req->pending_bios, 0);
351 peer_req->flags = 0;
9a8e7753
AG
352 /*
353 * The block_id is opaque to the receiver. It is not endianness
354 * converted, and sent back to the sender unchanged.
355 */
db830c46 356 peer_req->block_id = id;
b411b363 357
db830c46 358 return peer_req;
b411b363 359
45bb912b 360 fail:
db830c46 361 mempool_free(peer_req, drbd_ee_mempool);
b411b363
PR
362 return NULL;
363}
364
db830c46 365void drbd_free_some_ee(struct drbd_conf *mdev, struct drbd_peer_request *peer_req,
f6ffca9f 366 int is_net)
b411b363 367{
db830c46
AG
368 if (peer_req->flags & EE_HAS_DIGEST)
369 kfree(peer_req->digest);
370 drbd_pp_free(mdev, peer_req->pages, is_net);
371 D_ASSERT(atomic_read(&peer_req->pending_bios) == 0);
372 D_ASSERT(drbd_interval_empty(&peer_req->i));
373 mempool_free(peer_req, drbd_ee_mempool);
b411b363
PR
374}
375
376int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list)
377{
378 LIST_HEAD(work_list);
db830c46 379 struct drbd_peer_request *peer_req, *t;
b411b363 380 int count = 0;
435f0740 381 int is_net = list == &mdev->net_ee;
b411b363 382
87eeee41 383 spin_lock_irq(&mdev->tconn->req_lock);
b411b363 384 list_splice_init(list, &work_list);
87eeee41 385 spin_unlock_irq(&mdev->tconn->req_lock);
b411b363 386
db830c46
AG
387 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
388 drbd_free_some_ee(mdev, peer_req, is_net);
b411b363
PR
389 count++;
390 }
391 return count;
392}
393
394
32862ec7 395/* See also comments in _req_mod(,BARRIER_ACKED)
b411b363
PR
396 * and receive_Barrier.
397 *
398 * Move entries from net_ee to done_ee, if ready.
399 * Grab done_ee, call all callbacks, free the entries.
400 * The callbacks typically send out ACKs.
401 */
402static int drbd_process_done_ee(struct drbd_conf *mdev)
403{
404 LIST_HEAD(work_list);
405 LIST_HEAD(reclaimed);
db830c46 406 struct drbd_peer_request *peer_req, *t;
b411b363
PR
407 int ok = (mdev->state.conn >= C_WF_REPORT_PARAMS);
408
87eeee41 409 spin_lock_irq(&mdev->tconn->req_lock);
b411b363
PR
410 reclaim_net_ee(mdev, &reclaimed);
411 list_splice_init(&mdev->done_ee, &work_list);
87eeee41 412 spin_unlock_irq(&mdev->tconn->req_lock);
b411b363 413
db830c46
AG
414 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
415 drbd_free_net_ee(mdev, peer_req);
b411b363
PR
416
417 /* possible callbacks here:
7be8da07 418 * e_end_block, and e_end_resync_block, e_send_discard_write.
b411b363
PR
419 * all ignore the last argument.
420 */
db830c46 421 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
b411b363 422 /* list_del not necessary, next/prev members not touched */
00d56944 423 ok = peer_req->w.cb(&peer_req->w, !ok) && ok;
db830c46 424 drbd_free_ee(mdev, peer_req);
b411b363
PR
425 }
426 wake_up(&mdev->ee_wait);
427
428 return ok;
429}
430
431void _drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head)
432{
433 DEFINE_WAIT(wait);
434
435 /* avoids spin_lock/unlock
436 * and calling prepare_to_wait in the fast path */
437 while (!list_empty(head)) {
438 prepare_to_wait(&mdev->ee_wait, &wait, TASK_UNINTERRUPTIBLE);
87eeee41 439 spin_unlock_irq(&mdev->tconn->req_lock);
7eaceacc 440 io_schedule();
b411b363 441 finish_wait(&mdev->ee_wait, &wait);
87eeee41 442 spin_lock_irq(&mdev->tconn->req_lock);
b411b363
PR
443 }
444}
445
446void drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head)
447{
87eeee41 448 spin_lock_irq(&mdev->tconn->req_lock);
b411b363 449 _drbd_wait_ee_list_empty(mdev, head);
87eeee41 450 spin_unlock_irq(&mdev->tconn->req_lock);
b411b363
PR
451}
452
453/* see also kernel_accept; which is only present since 2.6.18.
454 * also we want to log which part of it failed, exactly */
7653620d 455static int drbd_accept(const char **what, struct socket *sock, struct socket **newsock)
b411b363
PR
456{
457 struct sock *sk = sock->sk;
458 int err = 0;
459
460 *what = "listen";
461 err = sock->ops->listen(sock, 5);
462 if (err < 0)
463 goto out;
464
465 *what = "sock_create_lite";
466 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
467 newsock);
468 if (err < 0)
469 goto out;
470
471 *what = "accept";
472 err = sock->ops->accept(sock, *newsock, 0);
473 if (err < 0) {
474 sock_release(*newsock);
475 *newsock = NULL;
476 goto out;
477 }
478 (*newsock)->ops = sock->ops;
479
480out:
481 return err;
482}
483
dbd9eea0 484static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flags)
b411b363
PR
485{
486 mm_segment_t oldfs;
487 struct kvec iov = {
488 .iov_base = buf,
489 .iov_len = size,
490 };
491 struct msghdr msg = {
492 .msg_iovlen = 1,
493 .msg_iov = (struct iovec *)&iov,
494 .msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL)
495 };
496 int rv;
497
498 oldfs = get_fs();
499 set_fs(KERNEL_DS);
500 rv = sock_recvmsg(sock, &msg, size, msg.msg_flags);
501 set_fs(oldfs);
502
503 return rv;
504}
505
de0ff338 506static int drbd_recv(struct drbd_tconn *tconn, void *buf, size_t size)
b411b363
PR
507{
508 mm_segment_t oldfs;
509 struct kvec iov = {
510 .iov_base = buf,
511 .iov_len = size,
512 };
513 struct msghdr msg = {
514 .msg_iovlen = 1,
515 .msg_iov = (struct iovec *)&iov,
516 .msg_flags = MSG_WAITALL | MSG_NOSIGNAL
517 };
518 int rv;
519
520 oldfs = get_fs();
521 set_fs(KERNEL_DS);
522
523 for (;;) {
de0ff338 524 rv = sock_recvmsg(tconn->data.socket, &msg, size, msg.msg_flags);
b411b363
PR
525 if (rv == size)
526 break;
527
528 /* Note:
529 * ECONNRESET other side closed the connection
530 * ERESTARTSYS (on sock) we got a signal
531 */
532
533 if (rv < 0) {
534 if (rv == -ECONNRESET)
de0ff338 535 conn_info(tconn, "sock was reset by peer\n");
b411b363 536 else if (rv != -ERESTARTSYS)
de0ff338 537 conn_err(tconn, "sock_recvmsg returned %d\n", rv);
b411b363
PR
538 break;
539 } else if (rv == 0) {
de0ff338 540 conn_info(tconn, "sock was shut down by peer\n");
b411b363
PR
541 break;
542 } else {
543 /* signal came in, or peer/link went down,
544 * after we read a partial message
545 */
546 /* D_ASSERT(signal_pending(current)); */
547 break;
548 }
549 };
550
551 set_fs(oldfs);
552
553 if (rv != size)
bbeb641c 554 conn_request_state(tconn, NS(conn, C_BROKEN_PIPE), CS_HARD);
b411b363
PR
555
556 return rv;
557}
558
5dbf1673
LE
559/* quoting tcp(7):
560 * On individual connections, the socket buffer size must be set prior to the
561 * listen(2) or connect(2) calls in order to have it take effect.
562 * This is our wrapper to do so.
563 */
564static void drbd_setbufsize(struct socket *sock, unsigned int snd,
565 unsigned int rcv)
566{
567 /* open coded SO_SNDBUF, SO_RCVBUF */
568 if (snd) {
569 sock->sk->sk_sndbuf = snd;
570 sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
571 }
572 if (rcv) {
573 sock->sk->sk_rcvbuf = rcv;
574 sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
575 }
576}
577
eac3e990 578static struct socket *drbd_try_connect(struct drbd_tconn *tconn)
b411b363
PR
579{
580 const char *what;
581 struct socket *sock;
582 struct sockaddr_in6 src_in6;
583 int err;
584 int disconnect_on_error = 1;
585
eac3e990 586 if (!get_net_conf(tconn))
b411b363
PR
587 return NULL;
588
589 what = "sock_create_kern";
eac3e990 590 err = sock_create_kern(((struct sockaddr *)tconn->net_conf->my_addr)->sa_family,
b411b363
PR
591 SOCK_STREAM, IPPROTO_TCP, &sock);
592 if (err < 0) {
593 sock = NULL;
594 goto out;
595 }
596
597 sock->sk->sk_rcvtimeo =
eac3e990
PR
598 sock->sk->sk_sndtimeo = tconn->net_conf->try_connect_int*HZ;
599 drbd_setbufsize(sock, tconn->net_conf->sndbuf_size,
600 tconn->net_conf->rcvbuf_size);
b411b363
PR
601
602 /* explicitly bind to the configured IP as source IP
603 * for the outgoing connections.
604 * This is needed for multihomed hosts and to be
605 * able to use lo: interfaces for drbd.
606 * Make sure to use 0 as port number, so linux selects
607 * a free one dynamically.
608 */
eac3e990
PR
609 memcpy(&src_in6, tconn->net_conf->my_addr,
610 min_t(int, tconn->net_conf->my_addr_len, sizeof(src_in6)));
611 if (((struct sockaddr *)tconn->net_conf->my_addr)->sa_family == AF_INET6)
b411b363
PR
612 src_in6.sin6_port = 0;
613 else
614 ((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */
615
616 what = "bind before connect";
617 err = sock->ops->bind(sock,
618 (struct sockaddr *) &src_in6,
eac3e990 619 tconn->net_conf->my_addr_len);
b411b363
PR
620 if (err < 0)
621 goto out;
622
623 /* connect may fail, peer not yet available.
624 * stay C_WF_CONNECTION, don't go Disconnecting! */
625 disconnect_on_error = 0;
626 what = "connect";
627 err = sock->ops->connect(sock,
eac3e990
PR
628 (struct sockaddr *)tconn->net_conf->peer_addr,
629 tconn->net_conf->peer_addr_len, 0);
b411b363
PR
630
631out:
632 if (err < 0) {
633 if (sock) {
634 sock_release(sock);
635 sock = NULL;
636 }
637 switch (-err) {
638 /* timeout, busy, signal pending */
639 case ETIMEDOUT: case EAGAIN: case EINPROGRESS:
640 case EINTR: case ERESTARTSYS:
641 /* peer not (yet) available, network problem */
642 case ECONNREFUSED: case ENETUNREACH:
643 case EHOSTDOWN: case EHOSTUNREACH:
644 disconnect_on_error = 0;
645 break;
646 default:
eac3e990 647 conn_err(tconn, "%s failed, err = %d\n", what, err);
b411b363
PR
648 }
649 if (disconnect_on_error)
bbeb641c 650 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
b411b363 651 }
eac3e990 652 put_net_conf(tconn);
b411b363
PR
653 return sock;
654}
655
7653620d 656static struct socket *drbd_wait_for_connect(struct drbd_tconn *tconn)
b411b363
PR
657{
658 int timeo, err;
659 struct socket *s_estab = NULL, *s_listen;
660 const char *what;
661
7653620d 662 if (!get_net_conf(tconn))
b411b363
PR
663 return NULL;
664
665 what = "sock_create_kern";
7653620d 666 err = sock_create_kern(((struct sockaddr *)tconn->net_conf->my_addr)->sa_family,
b411b363
PR
667 SOCK_STREAM, IPPROTO_TCP, &s_listen);
668 if (err) {
669 s_listen = NULL;
670 goto out;
671 }
672
7653620d 673 timeo = tconn->net_conf->try_connect_int * HZ;
b411b363
PR
674 timeo += (random32() & 1) ? timeo / 7 : -timeo / 7; /* 28.5% random jitter */
675
676 s_listen->sk->sk_reuse = 1; /* SO_REUSEADDR */
677 s_listen->sk->sk_rcvtimeo = timeo;
678 s_listen->sk->sk_sndtimeo = timeo;
7653620d
PR
679 drbd_setbufsize(s_listen, tconn->net_conf->sndbuf_size,
680 tconn->net_conf->rcvbuf_size);
b411b363
PR
681
682 what = "bind before listen";
683 err = s_listen->ops->bind(s_listen,
7653620d
PR
684 (struct sockaddr *) tconn->net_conf->my_addr,
685 tconn->net_conf->my_addr_len);
b411b363
PR
686 if (err < 0)
687 goto out;
688
7653620d 689 err = drbd_accept(&what, s_listen, &s_estab);
b411b363
PR
690
691out:
692 if (s_listen)
693 sock_release(s_listen);
694 if (err < 0) {
695 if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
7653620d 696 conn_err(tconn, "%s failed, err = %d\n", what, err);
bbeb641c 697 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
b411b363
PR
698 }
699 }
7653620d 700 put_net_conf(tconn);
b411b363
PR
701
702 return s_estab;
703}
704
d38e787e 705static int drbd_send_fp(struct drbd_tconn *tconn, struct socket *sock, enum drbd_packet cmd)
b411b363 706{
d38e787e 707 struct p_header *h = &tconn->data.sbuf.header;
b411b363 708
d38e787e 709 return _conn_send_cmd(tconn, 0, sock, cmd, h, sizeof(*h), 0);
b411b363
PR
710}
711
a25b63f1 712static enum drbd_packet drbd_recv_fp(struct drbd_tconn *tconn, struct socket *sock)
b411b363 713{
a25b63f1 714 struct p_header80 *h = &tconn->data.rbuf.header.h80;
b411b363
PR
715 int rr;
716
dbd9eea0 717 rr = drbd_recv_short(sock, h, sizeof(*h), 0);
b411b363 718
ca9bc12b 719 if (rr == sizeof(*h) && h->magic == cpu_to_be32(DRBD_MAGIC))
b411b363
PR
720 return be16_to_cpu(h->command);
721
722 return 0xffff;
723}
724
725/**
726 * drbd_socket_okay() - Free the socket if its connection is not okay
b411b363
PR
727 * @sock: pointer to the pointer to the socket.
728 */
dbd9eea0 729static int drbd_socket_okay(struct socket **sock)
b411b363
PR
730{
731 int rr;
732 char tb[4];
733
734 if (!*sock)
81e84650 735 return false;
b411b363 736
dbd9eea0 737 rr = drbd_recv_short(*sock, tb, 4, MSG_DONTWAIT | MSG_PEEK);
b411b363
PR
738
739 if (rr > 0 || rr == -EAGAIN) {
81e84650 740 return true;
b411b363
PR
741 } else {
742 sock_release(*sock);
743 *sock = NULL;
81e84650 744 return false;
b411b363
PR
745 }
746}
747
907599e0
PR
748static int drbd_connected(int vnr, void *p, void *data)
749{
750 struct drbd_conf *mdev = (struct drbd_conf *)p;
751 int ok = 1;
752
753 atomic_set(&mdev->packet_seq, 0);
754 mdev->peer_seq = 0;
755
8410da8f
PR
756 mdev->state_mutex = mdev->tconn->agreed_pro_version < 100 ?
757 &mdev->tconn->cstate_mutex :
758 &mdev->own_state_mutex;
759
907599e0
PR
760 ok &= drbd_send_sync_param(mdev, &mdev->sync_conf);
761 ok &= drbd_send_sizes(mdev, 0, 0);
762 ok &= drbd_send_uuids(mdev);
763 ok &= drbd_send_state(mdev);
764 clear_bit(USE_DEGR_WFC_T, &mdev->flags);
765 clear_bit(RESIZE_PENDING, &mdev->flags);
766
8410da8f 767
907599e0
PR
768 return !ok;
769}
770
b411b363
PR
771/*
772 * return values:
773 * 1 yes, we have a valid connection
774 * 0 oops, did not work out, please try again
775 * -1 peer talks different language,
776 * no point in trying again, please go standalone.
777 * -2 We do not have a network config...
778 */
907599e0 779static int drbd_connect(struct drbd_tconn *tconn)
b411b363
PR
780{
781 struct socket *s, *sock, *msock;
782 int try, h, ok;
783
bbeb641c 784 if (conn_request_state(tconn, NS(conn, C_WF_CONNECTION), CS_VERBOSE) < SS_SUCCESS)
b411b363
PR
785 return -2;
786
907599e0
PR
787 clear_bit(DISCARD_CONCURRENT, &tconn->flags);
788 tconn->agreed_pro_version = 99;
fd340c12
PR
789 /* agreed_pro_version must be smaller than 100 so we send the old
790 header (h80) in the first packet and in the handshake packet. */
b411b363
PR
791
792 sock = NULL;
793 msock = NULL;
794
795 do {
796 for (try = 0;;) {
797 /* 3 tries, this should take less than a second! */
907599e0 798 s = drbd_try_connect(tconn);
b411b363
PR
799 if (s || ++try >= 3)
800 break;
801 /* give the other side time to call bind() & listen() */
20ee6390 802 schedule_timeout_interruptible(HZ / 10);
b411b363
PR
803 }
804
805 if (s) {
806 if (!sock) {
907599e0 807 drbd_send_fp(tconn, s, P_HAND_SHAKE_S);
b411b363
PR
808 sock = s;
809 s = NULL;
810 } else if (!msock) {
907599e0 811 drbd_send_fp(tconn, s, P_HAND_SHAKE_M);
b411b363
PR
812 msock = s;
813 s = NULL;
814 } else {
907599e0 815 conn_err(tconn, "Logic error in drbd_connect()\n");
b411b363
PR
816 goto out_release_sockets;
817 }
818 }
819
820 if (sock && msock) {
907599e0 821 schedule_timeout_interruptible(tconn->net_conf->ping_timeo*HZ/10);
dbd9eea0
PR
822 ok = drbd_socket_okay(&sock);
823 ok = drbd_socket_okay(&msock) && ok;
b411b363
PR
824 if (ok)
825 break;
826 }
827
828retry:
907599e0 829 s = drbd_wait_for_connect(tconn);
b411b363 830 if (s) {
907599e0 831 try = drbd_recv_fp(tconn, s);
dbd9eea0
PR
832 drbd_socket_okay(&sock);
833 drbd_socket_okay(&msock);
b411b363
PR
834 switch (try) {
835 case P_HAND_SHAKE_S:
836 if (sock) {
907599e0 837 conn_warn(tconn, "initial packet S crossed\n");
b411b363
PR
838 sock_release(sock);
839 }
840 sock = s;
841 break;
842 case P_HAND_SHAKE_M:
843 if (msock) {
907599e0 844 conn_warn(tconn, "initial packet M crossed\n");
b411b363
PR
845 sock_release(msock);
846 }
847 msock = s;
907599e0 848 set_bit(DISCARD_CONCURRENT, &tconn->flags);
b411b363
PR
849 break;
850 default:
907599e0 851 conn_warn(tconn, "Error receiving initial packet\n");
b411b363
PR
852 sock_release(s);
853 if (random32() & 1)
854 goto retry;
855 }
856 }
857
bbeb641c 858 if (tconn->cstate <= C_DISCONNECTING)
b411b363
PR
859 goto out_release_sockets;
860 if (signal_pending(current)) {
861 flush_signals(current);
862 smp_rmb();
907599e0 863 if (get_t_state(&tconn->receiver) == EXITING)
b411b363
PR
864 goto out_release_sockets;
865 }
866
867 if (sock && msock) {
dbd9eea0
PR
868 ok = drbd_socket_okay(&sock);
869 ok = drbd_socket_okay(&msock) && ok;
b411b363
PR
870 if (ok)
871 break;
872 }
873 } while (1);
874
875 msock->sk->sk_reuse = 1; /* SO_REUSEADDR */
876 sock->sk->sk_reuse = 1; /* SO_REUSEADDR */
877
878 sock->sk->sk_allocation = GFP_NOIO;
879 msock->sk->sk_allocation = GFP_NOIO;
880
881 sock->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK;
882 msock->sk->sk_priority = TC_PRIO_INTERACTIVE;
883
b411b363 884 /* NOT YET ...
907599e0 885 * sock->sk->sk_sndtimeo = tconn->net_conf->timeout*HZ/10;
b411b363
PR
886 * sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
887 * first set it to the P_HAND_SHAKE timeout,
888 * which we set to 4x the configured ping_timeout. */
889 sock->sk->sk_sndtimeo =
907599e0 890 sock->sk->sk_rcvtimeo = tconn->net_conf->ping_timeo*4*HZ/10;
b411b363 891
907599e0
PR
892 msock->sk->sk_sndtimeo = tconn->net_conf->timeout*HZ/10;
893 msock->sk->sk_rcvtimeo = tconn->net_conf->ping_int*HZ;
b411b363
PR
894
895 /* we don't want delays.
25985edc 896 * we use TCP_CORK where appropriate, though */
b411b363
PR
897 drbd_tcp_nodelay(sock);
898 drbd_tcp_nodelay(msock);
899
907599e0
PR
900 tconn->data.socket = sock;
901 tconn->meta.socket = msock;
902 tconn->last_received = jiffies;
b411b363 903
907599e0 904 h = drbd_do_handshake(tconn);
b411b363
PR
905 if (h <= 0)
906 return h;
907
907599e0 908 if (tconn->cram_hmac_tfm) {
b411b363 909 /* drbd_request_state(mdev, NS(conn, WFAuth)); */
907599e0 910 switch (drbd_do_auth(tconn)) {
b10d96cb 911 case -1:
907599e0 912 conn_err(tconn, "Authentication of peer failed\n");
b411b363 913 return -1;
b10d96cb 914 case 0:
907599e0 915 conn_err(tconn, "Authentication of peer failed, trying again.\n");
b10d96cb 916 return 0;
b411b363
PR
917 }
918 }
919
bbeb641c 920 if (conn_request_state(tconn, NS(conn, C_WF_REPORT_PARAMS), CS_VERBOSE) < SS_SUCCESS)
b411b363
PR
921 return 0;
922
907599e0 923 sock->sk->sk_sndtimeo = tconn->net_conf->timeout*HZ/10;
b411b363
PR
924 sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
925
907599e0 926 drbd_thread_start(&tconn->asender);
b411b363 927
907599e0 928 if (drbd_send_protocol(tconn) == -1)
7e2455c1 929 return -1;
b411b363 930
907599e0 931 return !idr_for_each(&tconn->volumes, drbd_connected, tconn);
b411b363
PR
932
933out_release_sockets:
934 if (sock)
935 sock_release(sock);
936 if (msock)
937 sock_release(msock);
938 return -1;
939}
940
ce243853 941static bool decode_header(struct drbd_tconn *tconn, struct p_header *h, struct packet_info *pi)
b411b363 942{
fd340c12 943 if (h->h80.magic == cpu_to_be32(DRBD_MAGIC)) {
77351055
PR
944 pi->cmd = be16_to_cpu(h->h80.command);
945 pi->size = be16_to_cpu(h->h80.length);
eefc2f7d 946 pi->vnr = 0;
ca9bc12b 947 } else if (h->h95.magic == cpu_to_be16(DRBD_MAGIC_BIG)) {
77351055
PR
948 pi->cmd = be16_to_cpu(h->h95.command);
949 pi->size = be32_to_cpu(h->h95.length) & 0x00ffffff;
950 pi->vnr = 0;
02918be2 951 } else {
ce243853 952 conn_err(tconn, "magic?? on data m: 0x%08x c: %d l: %d\n",
004352fa
LE
953 be32_to_cpu(h->h80.magic),
954 be16_to_cpu(h->h80.command),
955 be16_to_cpu(h->h80.length));
81e84650 956 return false;
b411b363 957 }
257d0af6
PR
958 return true;
959}
960
9ba7aa00 961static int drbd_recv_header(struct drbd_tconn *tconn, struct packet_info *pi)
257d0af6 962{
9ba7aa00 963 struct p_header *h = &tconn->data.rbuf.header;
257d0af6
PR
964 int r;
965
9ba7aa00 966 r = drbd_recv(tconn, h, sizeof(*h));
257d0af6
PR
967 if (unlikely(r != sizeof(*h))) {
968 if (!signal_pending(current))
9ba7aa00 969 conn_warn(tconn, "short read expecting header on sock: r=%d\n", r);
257d0af6
PR
970 return false;
971 }
972
9ba7aa00
PR
973 r = decode_header(tconn, h, pi);
974 tconn->last_received = jiffies;
b411b363 975
257d0af6 976 return r;
b411b363
PR
977}
978
2451fc3b 979static void drbd_flush(struct drbd_conf *mdev)
b411b363
PR
980{
981 int rv;
982
983 if (mdev->write_ordering >= WO_bdev_flush && get_ldev(mdev)) {
fbd9b09a 984 rv = blkdev_issue_flush(mdev->ldev->backing_bdev, GFP_KERNEL,
dd3932ed 985 NULL);
b411b363
PR
986 if (rv) {
987 dev_err(DEV, "local disk flush failed with status %d\n", rv);
988 /* would rather check on EOPNOTSUPP, but that is not reliable.
989 * don't try again for ANY return value != 0
990 * if (rv == -EOPNOTSUPP) */
991 drbd_bump_write_ordering(mdev, WO_drain_io);
992 }
993 put_ldev(mdev);
994 }
b411b363
PR
995}
996
997/**
998 * drbd_may_finish_epoch() - Applies an epoch_event to the epoch's state, eventually finishes it.
999 * @mdev: DRBD device.
1000 * @epoch: Epoch object.
1001 * @ev: Epoch event.
1002 */
1003static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev,
1004 struct drbd_epoch *epoch,
1005 enum epoch_event ev)
1006{
2451fc3b 1007 int epoch_size;
b411b363 1008 struct drbd_epoch *next_epoch;
b411b363
PR
1009 enum finish_epoch rv = FE_STILL_LIVE;
1010
1011 spin_lock(&mdev->epoch_lock);
1012 do {
1013 next_epoch = NULL;
b411b363
PR
1014
1015 epoch_size = atomic_read(&epoch->epoch_size);
1016
1017 switch (ev & ~EV_CLEANUP) {
1018 case EV_PUT:
1019 atomic_dec(&epoch->active);
1020 break;
1021 case EV_GOT_BARRIER_NR:
1022 set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags);
b411b363
PR
1023 break;
1024 case EV_BECAME_LAST:
1025 /* nothing to do*/
1026 break;
1027 }
1028
b411b363
PR
1029 if (epoch_size != 0 &&
1030 atomic_read(&epoch->active) == 0 &&
2451fc3b 1031 test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags)) {
b411b363
PR
1032 if (!(ev & EV_CLEANUP)) {
1033 spin_unlock(&mdev->epoch_lock);
1034 drbd_send_b_ack(mdev, epoch->barrier_nr, epoch_size);
1035 spin_lock(&mdev->epoch_lock);
1036 }
1037 dec_unacked(mdev);
1038
1039 if (mdev->current_epoch != epoch) {
1040 next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list);
1041 list_del(&epoch->list);
1042 ev = EV_BECAME_LAST | (ev & EV_CLEANUP);
1043 mdev->epochs--;
b411b363
PR
1044 kfree(epoch);
1045
1046 if (rv == FE_STILL_LIVE)
1047 rv = FE_DESTROYED;
1048 } else {
1049 epoch->flags = 0;
1050 atomic_set(&epoch->epoch_size, 0);
698f9315 1051 /* atomic_set(&epoch->active, 0); is already zero */
b411b363
PR
1052 if (rv == FE_STILL_LIVE)
1053 rv = FE_RECYCLED;
2451fc3b 1054 wake_up(&mdev->ee_wait);
b411b363
PR
1055 }
1056 }
1057
1058 if (!next_epoch)
1059 break;
1060
1061 epoch = next_epoch;
1062 } while (1);
1063
1064 spin_unlock(&mdev->epoch_lock);
1065
b411b363
PR
1066 return rv;
1067}
1068
1069/**
1070 * drbd_bump_write_ordering() - Fall back to an other write ordering method
1071 * @mdev: DRBD device.
1072 * @wo: Write ordering method to try.
1073 */
1074void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo) __must_hold(local)
1075{
1076 enum write_ordering_e pwo;
1077 static char *write_ordering_str[] = {
1078 [WO_none] = "none",
1079 [WO_drain_io] = "drain",
1080 [WO_bdev_flush] = "flush",
b411b363
PR
1081 };
1082
1083 pwo = mdev->write_ordering;
1084 wo = min(pwo, wo);
b411b363
PR
1085 if (wo == WO_bdev_flush && mdev->ldev->dc.no_disk_flush)
1086 wo = WO_drain_io;
1087 if (wo == WO_drain_io && mdev->ldev->dc.no_disk_drain)
1088 wo = WO_none;
1089 mdev->write_ordering = wo;
2451fc3b 1090 if (pwo != mdev->write_ordering || wo == WO_bdev_flush)
b411b363
PR
1091 dev_info(DEV, "Method to ensure write ordering: %s\n", write_ordering_str[mdev->write_ordering]);
1092}
1093
45bb912b 1094/**
fbe29dec 1095 * drbd_submit_peer_request()
45bb912b 1096 * @mdev: DRBD device.
db830c46 1097 * @peer_req: peer request
45bb912b 1098 * @rw: flag field, see bio->bi_rw
10f6d992
LE
1099 *
1100 * May spread the pages to multiple bios,
1101 * depending on bio_add_page restrictions.
1102 *
1103 * Returns 0 if all bios have been submitted,
1104 * -ENOMEM if we could not allocate enough bios,
1105 * -ENOSPC (any better suggestion?) if we have not been able to bio_add_page a
1106 * single page to an empty bio (which should never happen and likely indicates
1107 * that the lower level IO stack is in some way broken). This has been observed
1108 * on certain Xen deployments.
45bb912b
LE
1109 */
1110/* TODO allocate from our own bio_set. */
fbe29dec
AG
1111int drbd_submit_peer_request(struct drbd_conf *mdev,
1112 struct drbd_peer_request *peer_req,
1113 const unsigned rw, const int fault_type)
45bb912b
LE
1114{
1115 struct bio *bios = NULL;
1116 struct bio *bio;
db830c46
AG
1117 struct page *page = peer_req->pages;
1118 sector_t sector = peer_req->i.sector;
1119 unsigned ds = peer_req->i.size;
45bb912b
LE
1120 unsigned n_bios = 0;
1121 unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT;
10f6d992 1122 int err = -ENOMEM;
45bb912b
LE
1123
1124 /* In most cases, we will only need one bio. But in case the lower
1125 * level restrictions happen to be different at this offset on this
1126 * side than those of the sending peer, we may need to submit the
1127 * request in more than one bio. */
1128next_bio:
1129 bio = bio_alloc(GFP_NOIO, nr_pages);
1130 if (!bio) {
1131 dev_err(DEV, "submit_ee: Allocation of a bio failed\n");
1132 goto fail;
1133 }
db830c46 1134 /* > peer_req->i.sector, unless this is the first bio */
45bb912b
LE
1135 bio->bi_sector = sector;
1136 bio->bi_bdev = mdev->ldev->backing_bdev;
45bb912b 1137 bio->bi_rw = rw;
db830c46 1138 bio->bi_private = peer_req;
fcefa62e 1139 bio->bi_end_io = drbd_peer_request_endio;
45bb912b
LE
1140
1141 bio->bi_next = bios;
1142 bios = bio;
1143 ++n_bios;
1144
1145 page_chain_for_each(page) {
1146 unsigned len = min_t(unsigned, ds, PAGE_SIZE);
1147 if (!bio_add_page(bio, page, len, 0)) {
10f6d992
LE
1148 /* A single page must always be possible!
1149 * But in case it fails anyways,
1150 * we deal with it, and complain (below). */
1151 if (bio->bi_vcnt == 0) {
1152 dev_err(DEV,
1153 "bio_add_page failed for len=%u, "
1154 "bi_vcnt=0 (bi_sector=%llu)\n",
1155 len, (unsigned long long)bio->bi_sector);
1156 err = -ENOSPC;
1157 goto fail;
1158 }
45bb912b
LE
1159 goto next_bio;
1160 }
1161 ds -= len;
1162 sector += len >> 9;
1163 --nr_pages;
1164 }
1165 D_ASSERT(page == NULL);
1166 D_ASSERT(ds == 0);
1167
db830c46 1168 atomic_set(&peer_req->pending_bios, n_bios);
45bb912b
LE
1169 do {
1170 bio = bios;
1171 bios = bios->bi_next;
1172 bio->bi_next = NULL;
1173
45bb912b 1174 drbd_generic_make_request(mdev, fault_type, bio);
45bb912b 1175 } while (bios);
45bb912b
LE
1176 return 0;
1177
1178fail:
1179 while (bios) {
1180 bio = bios;
1181 bios = bios->bi_next;
1182 bio_put(bio);
1183 }
10f6d992 1184 return err;
45bb912b
LE
1185}
1186
53840641 1187static void drbd_remove_epoch_entry_interval(struct drbd_conf *mdev,
db830c46 1188 struct drbd_peer_request *peer_req)
53840641 1189{
db830c46 1190 struct drbd_interval *i = &peer_req->i;
53840641
AG
1191
1192 drbd_remove_interval(&mdev->write_requests, i);
1193 drbd_clear_interval(i);
1194
6c852bec 1195 /* Wake up any processes waiting for this peer request to complete. */
53840641
AG
1196 if (i->waiting)
1197 wake_up(&mdev->misc_wait);
1198}
1199
d8763023
AG
1200static int receive_Barrier(struct drbd_conf *mdev, enum drbd_packet cmd,
1201 unsigned int data_size)
b411b363 1202{
2451fc3b 1203 int rv;
e42325a5 1204 struct p_barrier *p = &mdev->tconn->data.rbuf.barrier;
b411b363
PR
1205 struct drbd_epoch *epoch;
1206
b411b363
PR
1207 inc_unacked(mdev);
1208
b411b363
PR
1209 mdev->current_epoch->barrier_nr = p->barrier;
1210 rv = drbd_may_finish_epoch(mdev, mdev->current_epoch, EV_GOT_BARRIER_NR);
1211
1212 /* P_BARRIER_ACK may imply that the corresponding extent is dropped from
1213 * the activity log, which means it would not be resynced in case the
1214 * R_PRIMARY crashes now.
1215 * Therefore we must send the barrier_ack after the barrier request was
1216 * completed. */
1217 switch (mdev->write_ordering) {
b411b363
PR
1218 case WO_none:
1219 if (rv == FE_RECYCLED)
81e84650 1220 return true;
2451fc3b
PR
1221
1222 /* receiver context, in the writeout path of the other node.
1223 * avoid potential distributed deadlock */
1224 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1225 if (epoch)
1226 break;
1227 else
1228 dev_warn(DEV, "Allocation of an epoch failed, slowing down\n");
1229 /* Fall through */
b411b363
PR
1230
1231 case WO_bdev_flush:
1232 case WO_drain_io:
b411b363 1233 drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
2451fc3b
PR
1234 drbd_flush(mdev);
1235
1236 if (atomic_read(&mdev->current_epoch->epoch_size)) {
1237 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1238 if (epoch)
1239 break;
b411b363
PR
1240 }
1241
2451fc3b
PR
1242 epoch = mdev->current_epoch;
1243 wait_event(mdev->ee_wait, atomic_read(&epoch->epoch_size) == 0);
1244
1245 D_ASSERT(atomic_read(&epoch->active) == 0);
1246 D_ASSERT(epoch->flags == 0);
b411b363 1247
81e84650 1248 return true;
2451fc3b
PR
1249 default:
1250 dev_err(DEV, "Strangeness in mdev->write_ordering %d\n", mdev->write_ordering);
81e84650 1251 return false;
b411b363
PR
1252 }
1253
1254 epoch->flags = 0;
1255 atomic_set(&epoch->epoch_size, 0);
1256 atomic_set(&epoch->active, 0);
1257
1258 spin_lock(&mdev->epoch_lock);
1259 if (atomic_read(&mdev->current_epoch->epoch_size)) {
1260 list_add(&epoch->list, &mdev->current_epoch->list);
1261 mdev->current_epoch = epoch;
1262 mdev->epochs++;
b411b363
PR
1263 } else {
1264 /* The current_epoch got recycled while we allocated this one... */
1265 kfree(epoch);
1266 }
1267 spin_unlock(&mdev->epoch_lock);
1268
81e84650 1269 return true;
b411b363
PR
1270}
1271
1272/* used from receive_RSDataReply (recv_resync_read)
1273 * and from receive_Data */
f6ffca9f
AG
1274static struct drbd_peer_request *
1275read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector,
1276 int data_size) __must_hold(local)
b411b363 1277{
6666032a 1278 const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
db830c46 1279 struct drbd_peer_request *peer_req;
b411b363 1280 struct page *page;
45bb912b 1281 int dgs, ds, rr;
a0638456
PR
1282 void *dig_in = mdev->tconn->int_dig_in;
1283 void *dig_vv = mdev->tconn->int_dig_vv;
6b4388ac 1284 unsigned long *data;
b411b363 1285
a0638456
PR
1286 dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_r_tfm) ?
1287 crypto_hash_digestsize(mdev->tconn->integrity_r_tfm) : 0;
b411b363
PR
1288
1289 if (dgs) {
de0ff338 1290 rr = drbd_recv(mdev->tconn, dig_in, dgs);
b411b363 1291 if (rr != dgs) {
0ddc5549
LE
1292 if (!signal_pending(current))
1293 dev_warn(DEV,
1294 "short read receiving data digest: read %d expected %d\n",
1295 rr, dgs);
b411b363
PR
1296 return NULL;
1297 }
1298 }
1299
1300 data_size -= dgs;
1301
841ce241
AG
1302 if (!expect(data_size != 0))
1303 return NULL;
1304 if (!expect(IS_ALIGNED(data_size, 512)))
1305 return NULL;
1306 if (!expect(data_size <= DRBD_MAX_BIO_SIZE))
1307 return NULL;
b411b363 1308
6666032a
LE
1309 /* even though we trust out peer,
1310 * we sometimes have to double check. */
1311 if (sector + (data_size>>9) > capacity) {
fdda6544
LE
1312 dev_err(DEV, "request from peer beyond end of local disk: "
1313 "capacity: %llus < sector: %llus + size: %u\n",
6666032a
LE
1314 (unsigned long long)capacity,
1315 (unsigned long long)sector, data_size);
1316 return NULL;
1317 }
1318
b411b363
PR
1319 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
1320 * "criss-cross" setup, that might cause write-out on some other DRBD,
1321 * which in turn might block on the other node at this very place. */
db830c46
AG
1322 peer_req = drbd_alloc_ee(mdev, id, sector, data_size, GFP_NOIO);
1323 if (!peer_req)
b411b363 1324 return NULL;
45bb912b 1325
b411b363 1326 ds = data_size;
db830c46 1327 page = peer_req->pages;
45bb912b
LE
1328 page_chain_for_each(page) {
1329 unsigned len = min_t(int, ds, PAGE_SIZE);
6b4388ac 1330 data = kmap(page);
de0ff338 1331 rr = drbd_recv(mdev->tconn, data, len);
0cf9d27e 1332 if (drbd_insert_fault(mdev, DRBD_FAULT_RECEIVE)) {
6b4388ac
PR
1333 dev_err(DEV, "Fault injection: Corrupting data on receive\n");
1334 data[0] = data[0] ^ (unsigned long)-1;
1335 }
b411b363 1336 kunmap(page);
45bb912b 1337 if (rr != len) {
db830c46 1338 drbd_free_ee(mdev, peer_req);
0ddc5549
LE
1339 if (!signal_pending(current))
1340 dev_warn(DEV, "short read receiving data: read %d expected %d\n",
1341 rr, len);
b411b363
PR
1342 return NULL;
1343 }
1344 ds -= rr;
1345 }
1346
1347 if (dgs) {
db830c46 1348 drbd_csum_ee(mdev, mdev->tconn->integrity_r_tfm, peer_req, dig_vv);
b411b363 1349 if (memcmp(dig_in, dig_vv, dgs)) {
470be44a
LE
1350 dev_err(DEV, "Digest integrity check FAILED: %llus +%u\n",
1351 (unsigned long long)sector, data_size);
b411b363 1352 drbd_bcast_ee(mdev, "digest failed",
db830c46
AG
1353 dgs, dig_in, dig_vv, peer_req);
1354 drbd_free_ee(mdev, peer_req);
b411b363
PR
1355 return NULL;
1356 }
1357 }
1358 mdev->recv_cnt += data_size>>9;
db830c46 1359 return peer_req;
b411b363
PR
1360}
1361
1362/* drbd_drain_block() just takes a data block
1363 * out of the socket input buffer, and discards it.
1364 */
1365static int drbd_drain_block(struct drbd_conf *mdev, int data_size)
1366{
1367 struct page *page;
1368 int rr, rv = 1;
1369 void *data;
1370
c3470cde 1371 if (!data_size)
81e84650 1372 return true;
c3470cde 1373
45bb912b 1374 page = drbd_pp_alloc(mdev, 1, 1);
b411b363
PR
1375
1376 data = kmap(page);
1377 while (data_size) {
de0ff338 1378 rr = drbd_recv(mdev->tconn, data, min_t(int, data_size, PAGE_SIZE));
b411b363
PR
1379 if (rr != min_t(int, data_size, PAGE_SIZE)) {
1380 rv = 0;
0ddc5549
LE
1381 if (!signal_pending(current))
1382 dev_warn(DEV,
1383 "short read receiving data: read %d expected %d\n",
1384 rr, min_t(int, data_size, PAGE_SIZE));
b411b363
PR
1385 break;
1386 }
1387 data_size -= rr;
1388 }
1389 kunmap(page);
435f0740 1390 drbd_pp_free(mdev, page, 0);
b411b363
PR
1391 return rv;
1392}
1393
1394static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req,
1395 sector_t sector, int data_size)
1396{
1397 struct bio_vec *bvec;
1398 struct bio *bio;
1399 int dgs, rr, i, expect;
a0638456
PR
1400 void *dig_in = mdev->tconn->int_dig_in;
1401 void *dig_vv = mdev->tconn->int_dig_vv;
b411b363 1402
a0638456
PR
1403 dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_r_tfm) ?
1404 crypto_hash_digestsize(mdev->tconn->integrity_r_tfm) : 0;
b411b363
PR
1405
1406 if (dgs) {
de0ff338 1407 rr = drbd_recv(mdev->tconn, dig_in, dgs);
b411b363 1408 if (rr != dgs) {
0ddc5549
LE
1409 if (!signal_pending(current))
1410 dev_warn(DEV,
1411 "short read receiving data reply digest: read %d expected %d\n",
1412 rr, dgs);
b411b363
PR
1413 return 0;
1414 }
1415 }
1416
1417 data_size -= dgs;
1418
1419 /* optimistically update recv_cnt. if receiving fails below,
1420 * we disconnect anyways, and counters will be reset. */
1421 mdev->recv_cnt += data_size>>9;
1422
1423 bio = req->master_bio;
1424 D_ASSERT(sector == bio->bi_sector);
1425
1426 bio_for_each_segment(bvec, bio, i) {
1427 expect = min_t(int, data_size, bvec->bv_len);
de0ff338 1428 rr = drbd_recv(mdev->tconn,
b411b363
PR
1429 kmap(bvec->bv_page)+bvec->bv_offset,
1430 expect);
1431 kunmap(bvec->bv_page);
1432 if (rr != expect) {
0ddc5549
LE
1433 if (!signal_pending(current))
1434 dev_warn(DEV, "short read receiving data reply: "
1435 "read %d expected %d\n",
1436 rr, expect);
b411b363
PR
1437 return 0;
1438 }
1439 data_size -= rr;
1440 }
1441
1442 if (dgs) {
a0638456 1443 drbd_csum_bio(mdev, mdev->tconn->integrity_r_tfm, bio, dig_vv);
b411b363
PR
1444 if (memcmp(dig_in, dig_vv, dgs)) {
1445 dev_err(DEV, "Digest integrity check FAILED. Broken NICs?\n");
1446 return 0;
1447 }
1448 }
1449
1450 D_ASSERT(data_size == 0);
1451 return 1;
1452}
1453
1454/* e_end_resync_block() is called via
1455 * drbd_process_done_ee() by asender only */
00d56944 1456static int e_end_resync_block(struct drbd_work *w, int unused)
b411b363 1457{
8050e6d0
AG
1458 struct drbd_peer_request *peer_req =
1459 container_of(w, struct drbd_peer_request, w);
00d56944 1460 struct drbd_conf *mdev = w->mdev;
db830c46 1461 sector_t sector = peer_req->i.sector;
b411b363
PR
1462 int ok;
1463
db830c46 1464 D_ASSERT(drbd_interval_empty(&peer_req->i));
b411b363 1465
db830c46
AG
1466 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
1467 drbd_set_in_sync(mdev, sector, peer_req->i.size);
1468 ok = drbd_send_ack(mdev, P_RS_WRITE_ACK, peer_req);
b411b363
PR
1469 } else {
1470 /* Record failure to sync */
db830c46 1471 drbd_rs_failed_io(mdev, sector, peer_req->i.size);
b411b363 1472
db830c46 1473 ok = drbd_send_ack(mdev, P_NEG_ACK, peer_req);
b411b363
PR
1474 }
1475 dec_unacked(mdev);
1476
1477 return ok;
1478}
1479
1480static int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_size) __releases(local)
1481{
db830c46 1482 struct drbd_peer_request *peer_req;
b411b363 1483
db830c46
AG
1484 peer_req = read_in_block(mdev, ID_SYNCER, sector, data_size);
1485 if (!peer_req)
45bb912b 1486 goto fail;
b411b363
PR
1487
1488 dec_rs_pending(mdev);
1489
b411b363
PR
1490 inc_unacked(mdev);
1491 /* corresponding dec_unacked() in e_end_resync_block()
1492 * respective _drbd_clear_done_ee */
1493
db830c46 1494 peer_req->w.cb = e_end_resync_block;
45bb912b 1495
87eeee41 1496 spin_lock_irq(&mdev->tconn->req_lock);
db830c46 1497 list_add(&peer_req->w.list, &mdev->sync_ee);
87eeee41 1498 spin_unlock_irq(&mdev->tconn->req_lock);
b411b363 1499
0f0601f4 1500 atomic_add(data_size >> 9, &mdev->rs_sect_ev);
fbe29dec 1501 if (drbd_submit_peer_request(mdev, peer_req, WRITE, DRBD_FAULT_RS_WR) == 0)
81e84650 1502 return true;
b411b363 1503
10f6d992
LE
1504 /* don't care for the reason here */
1505 dev_err(DEV, "submit failed, triggering re-connect\n");
87eeee41 1506 spin_lock_irq(&mdev->tconn->req_lock);
db830c46 1507 list_del(&peer_req->w.list);
87eeee41 1508 spin_unlock_irq(&mdev->tconn->req_lock);
22cc37a9 1509
db830c46 1510 drbd_free_ee(mdev, peer_req);
45bb912b
LE
1511fail:
1512 put_ldev(mdev);
81e84650 1513 return false;
b411b363
PR
1514}
1515
668eebc6 1516static struct drbd_request *
bc9c5c41
AG
1517find_request(struct drbd_conf *mdev, struct rb_root *root, u64 id,
1518 sector_t sector, bool missing_ok, const char *func)
51624585 1519{
51624585
AG
1520 struct drbd_request *req;
1521
bc9c5c41
AG
1522 /* Request object according to our peer */
1523 req = (struct drbd_request *)(unsigned long)id;
5e472264 1524 if (drbd_contains_interval(root, sector, &req->i) && req->i.local)
668eebc6 1525 return req;
c3afd8f5
AG
1526 if (!missing_ok) {
1527 dev_err(DEV, "%s: failed to find request %lu, sector %llus\n", func,
1528 (unsigned long)id, (unsigned long long)sector);
1529 }
51624585
AG
1530 return NULL;
1531}
1532
d8763023
AG
1533static int receive_DataReply(struct drbd_conf *mdev, enum drbd_packet cmd,
1534 unsigned int data_size)
b411b363
PR
1535{
1536 struct drbd_request *req;
1537 sector_t sector;
b411b363 1538 int ok;
e42325a5 1539 struct p_data *p = &mdev->tconn->data.rbuf.data;
b411b363
PR
1540
1541 sector = be64_to_cpu(p->sector);
1542
87eeee41 1543 spin_lock_irq(&mdev->tconn->req_lock);
bc9c5c41 1544 req = find_request(mdev, &mdev->read_requests, p->block_id, sector, false, __func__);
87eeee41 1545 spin_unlock_irq(&mdev->tconn->req_lock);
c3afd8f5 1546 if (unlikely(!req))
81e84650 1547 return false;
b411b363 1548
24c4830c 1549 /* hlist_del(&req->collision) is done in _req_may_be_done, to avoid
b411b363
PR
1550 * special casing it there for the various failure cases.
1551 * still no race with drbd_fail_pending_reads */
1552 ok = recv_dless_read(mdev, req, sector, data_size);
1553
1554 if (ok)
8554df1c 1555 req_mod(req, DATA_RECEIVED);
b411b363
PR
1556 /* else: nothing. handled from drbd_disconnect...
1557 * I don't think we may complete this just yet
1558 * in case we are "on-disconnect: freeze" */
1559
1560 return ok;
1561}
1562
d8763023
AG
1563static int receive_RSDataReply(struct drbd_conf *mdev, enum drbd_packet cmd,
1564 unsigned int data_size)
b411b363
PR
1565{
1566 sector_t sector;
b411b363 1567 int ok;
e42325a5 1568 struct p_data *p = &mdev->tconn->data.rbuf.data;
b411b363
PR
1569
1570 sector = be64_to_cpu(p->sector);
1571 D_ASSERT(p->block_id == ID_SYNCER);
1572
1573 if (get_ldev(mdev)) {
1574 /* data is submitted to disk within recv_resync_read.
1575 * corresponding put_ldev done below on error,
fcefa62e 1576 * or in drbd_peer_request_endio. */
b411b363
PR
1577 ok = recv_resync_read(mdev, sector, data_size);
1578 } else {
1579 if (__ratelimit(&drbd_ratelimit_state))
1580 dev_err(DEV, "Can not write resync data to local disk.\n");
1581
1582 ok = drbd_drain_block(mdev, data_size);
1583
2b2bf214 1584 drbd_send_ack_dp(mdev, P_NEG_ACK, p, data_size);
b411b363
PR
1585 }
1586
778f271d
PR
1587 atomic_add(data_size >> 9, &mdev->rs_sect_in);
1588
b411b363
PR
1589 return ok;
1590}
1591
7be8da07
AG
1592static int w_restart_write(struct drbd_work *w, int cancel)
1593{
1594 struct drbd_request *req = container_of(w, struct drbd_request, w);
1595 struct drbd_conf *mdev = w->mdev;
1596 struct bio *bio;
1597 unsigned long start_time;
1598 unsigned long flags;
1599
1600 spin_lock_irqsave(&mdev->tconn->req_lock, flags);
1601 if (!expect(req->rq_state & RQ_POSTPONED)) {
1602 spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
1603 return 0;
1604 }
1605 bio = req->master_bio;
1606 start_time = req->start_time;
1607 /* Postponed requests will not have their master_bio completed! */
1608 __req_mod(req, DISCARD_WRITE, NULL);
1609 spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
1610
1611 while (__drbd_make_request(mdev, bio, start_time))
1612 /* retry */ ;
1613 return 1;
1614}
1615
1616static void restart_conflicting_writes(struct drbd_conf *mdev,
1617 sector_t sector, int size)
1618{
1619 struct drbd_interval *i;
1620 struct drbd_request *req;
1621
1622 drbd_for_each_overlap(i, &mdev->write_requests, sector, size) {
1623 if (!i->local)
1624 continue;
1625 req = container_of(i, struct drbd_request, i);
1626 if (req->rq_state & RQ_LOCAL_PENDING ||
1627 !(req->rq_state & RQ_POSTPONED))
1628 continue;
1629 if (expect(list_empty(&req->w.list))) {
1630 req->w.mdev = mdev;
1631 req->w.cb = w_restart_write;
1632 drbd_queue_work(&mdev->tconn->data.work, &req->w);
1633 }
1634 }
1635}
1636
b411b363
PR
1637/* e_end_block() is called via drbd_process_done_ee().
1638 * this means this function only runs in the asender thread
1639 */
00d56944 1640static int e_end_block(struct drbd_work *w, int cancel)
b411b363 1641{
8050e6d0
AG
1642 struct drbd_peer_request *peer_req =
1643 container_of(w, struct drbd_peer_request, w);
00d56944 1644 struct drbd_conf *mdev = w->mdev;
db830c46 1645 sector_t sector = peer_req->i.sector;
b411b363
PR
1646 int ok = 1, pcmd;
1647
89e58e75 1648 if (mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C) {
db830c46 1649 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
b411b363
PR
1650 pcmd = (mdev->state.conn >= C_SYNC_SOURCE &&
1651 mdev->state.conn <= C_PAUSED_SYNC_T &&
db830c46 1652 peer_req->flags & EE_MAY_SET_IN_SYNC) ?
b411b363 1653 P_RS_WRITE_ACK : P_WRITE_ACK;
db830c46 1654 ok &= drbd_send_ack(mdev, pcmd, peer_req);
b411b363 1655 if (pcmd == P_RS_WRITE_ACK)
db830c46 1656 drbd_set_in_sync(mdev, sector, peer_req->i.size);
b411b363 1657 } else {
db830c46 1658 ok = drbd_send_ack(mdev, P_NEG_ACK, peer_req);
b411b363
PR
1659 /* we expect it to be marked out of sync anyways...
1660 * maybe assert this? */
1661 }
1662 dec_unacked(mdev);
1663 }
1664 /* we delete from the conflict detection hash _after_ we sent out the
1665 * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */
89e58e75 1666 if (mdev->tconn->net_conf->two_primaries) {
87eeee41 1667 spin_lock_irq(&mdev->tconn->req_lock);
db830c46
AG
1668 D_ASSERT(!drbd_interval_empty(&peer_req->i));
1669 drbd_remove_epoch_entry_interval(mdev, peer_req);
7be8da07
AG
1670 if (peer_req->flags & EE_RESTART_REQUESTS)
1671 restart_conflicting_writes(mdev, sector, peer_req->i.size);
87eeee41 1672 spin_unlock_irq(&mdev->tconn->req_lock);
bb3bfe96 1673 } else
db830c46 1674 D_ASSERT(drbd_interval_empty(&peer_req->i));
b411b363 1675
db830c46 1676 drbd_may_finish_epoch(mdev, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
b411b363
PR
1677
1678 return ok;
1679}
1680
7be8da07 1681static int e_send_ack(struct drbd_work *w, enum drbd_packet ack)
b411b363 1682{
7be8da07 1683 struct drbd_conf *mdev = w->mdev;
8050e6d0
AG
1684 struct drbd_peer_request *peer_req =
1685 container_of(w, struct drbd_peer_request, w);
206d3589 1686 int ok;
b411b363 1687
7be8da07 1688 ok = drbd_send_ack(mdev, ack, peer_req);
b411b363
PR
1689 dec_unacked(mdev);
1690
1691 return ok;
1692}
1693
7be8da07
AG
1694static int e_send_discard_write(struct drbd_work *w, int unused)
1695{
1696 return e_send_ack(w, P_DISCARD_WRITE);
1697}
1698
1699static int e_send_retry_write(struct drbd_work *w, int unused)
1700{
1701 struct drbd_tconn *tconn = w->mdev->tconn;
1702
1703 return e_send_ack(w, tconn->agreed_pro_version >= 100 ?
1704 P_RETRY_WRITE : P_DISCARD_WRITE);
1705}
1706
3e394da1
AG
1707static bool seq_greater(u32 a, u32 b)
1708{
1709 /*
1710 * We assume 32-bit wrap-around here.
1711 * For 24-bit wrap-around, we would have to shift:
1712 * a <<= 8; b <<= 8;
1713 */
1714 return (s32)a - (s32)b > 0;
1715}
1716
1717static u32 seq_max(u32 a, u32 b)
1718{
1719 return seq_greater(a, b) ? a : b;
1720}
1721
7be8da07
AG
1722static bool need_peer_seq(struct drbd_conf *mdev)
1723{
1724 struct drbd_tconn *tconn = mdev->tconn;
1725
1726 /*
1727 * We only need to keep track of the last packet_seq number of our peer
1728 * if we are in dual-primary mode and we have the discard flag set; see
1729 * handle_write_conflicts().
1730 */
1731 return tconn->net_conf->two_primaries &&
1732 test_bit(DISCARD_CONCURRENT, &tconn->flags);
1733}
1734
43ae077d 1735static void update_peer_seq(struct drbd_conf *mdev, unsigned int peer_seq)
3e394da1 1736{
3c13b680 1737 unsigned int newest_peer_seq;
3e394da1 1738
7be8da07
AG
1739 if (need_peer_seq(mdev)) {
1740 spin_lock(&mdev->peer_seq_lock);
3c13b680
LE
1741 newest_peer_seq = seq_max(mdev->peer_seq, peer_seq);
1742 mdev->peer_seq = newest_peer_seq;
7be8da07 1743 spin_unlock(&mdev->peer_seq_lock);
3c13b680
LE
1744 /* wake up only if we actually changed mdev->peer_seq */
1745 if (peer_seq == newest_peer_seq)
7be8da07
AG
1746 wake_up(&mdev->seq_wait);
1747 }
3e394da1
AG
1748}
1749
b411b363
PR
1750/* Called from receive_Data.
1751 * Synchronize packets on sock with packets on msock.
1752 *
1753 * This is here so even when a P_DATA packet traveling via sock overtook an Ack
1754 * packet traveling on msock, they are still processed in the order they have
1755 * been sent.
1756 *
1757 * Note: we don't care for Ack packets overtaking P_DATA packets.
1758 *
1759 * In case packet_seq is larger than mdev->peer_seq number, there are
1760 * outstanding packets on the msock. We wait for them to arrive.
1761 * In case we are the logically next packet, we update mdev->peer_seq
1762 * ourselves. Correctly handles 32bit wrap around.
1763 *
1764 * Assume we have a 10 GBit connection, that is about 1<<30 byte per second,
1765 * about 1<<21 sectors per second. So "worst" case, we have 1<<3 == 8 seconds
1766 * for the 24bit wrap (historical atomic_t guarantee on some archs), and we have
1767 * 1<<9 == 512 seconds aka ages for the 32bit wrap around...
1768 *
1769 * returns 0 if we may process the packet,
1770 * -ERESTARTSYS if we were interrupted (by disconnect signal). */
7be8da07 1771static int wait_for_and_update_peer_seq(struct drbd_conf *mdev, const u32 peer_seq)
b411b363
PR
1772{
1773 DEFINE_WAIT(wait);
b411b363 1774 long timeout;
7be8da07
AG
1775 int ret;
1776
1777 if (!need_peer_seq(mdev))
1778 return 0;
1779
b411b363
PR
1780 spin_lock(&mdev->peer_seq_lock);
1781 for (;;) {
7be8da07
AG
1782 if (!seq_greater(peer_seq - 1, mdev->peer_seq)) {
1783 mdev->peer_seq = seq_max(mdev->peer_seq, peer_seq);
1784 ret = 0;
b411b363 1785 break;
7be8da07 1786 }
b411b363
PR
1787 if (signal_pending(current)) {
1788 ret = -ERESTARTSYS;
1789 break;
1790 }
7be8da07 1791 prepare_to_wait(&mdev->seq_wait, &wait, TASK_INTERRUPTIBLE);
b411b363 1792 spin_unlock(&mdev->peer_seq_lock);
71b1c1eb
AG
1793 timeout = mdev->tconn->net_conf->ping_timeo*HZ/10;
1794 timeout = schedule_timeout(timeout);
b411b363 1795 spin_lock(&mdev->peer_seq_lock);
7be8da07 1796 if (!timeout) {
b411b363 1797 ret = -ETIMEDOUT;
71b1c1eb 1798 dev_err(DEV, "Timed out waiting for missing ack packets; disconnecting\n");
b411b363
PR
1799 break;
1800 }
1801 }
b411b363 1802 spin_unlock(&mdev->peer_seq_lock);
7be8da07 1803 finish_wait(&mdev->seq_wait, &wait);
b411b363
PR
1804 return ret;
1805}
1806
688593c5
LE
1807/* see also bio_flags_to_wire()
1808 * DRBD_REQ_*, because we need to semantically map the flags to data packet
1809 * flags and back. We may replicate to other kernel versions. */
1810static unsigned long wire_flags_to_bio(struct drbd_conf *mdev, u32 dpf)
76d2e7ec 1811{
688593c5
LE
1812 return (dpf & DP_RW_SYNC ? REQ_SYNC : 0) |
1813 (dpf & DP_FUA ? REQ_FUA : 0) |
1814 (dpf & DP_FLUSH ? REQ_FLUSH : 0) |
1815 (dpf & DP_DISCARD ? REQ_DISCARD : 0);
76d2e7ec
PR
1816}
1817
7be8da07
AG
1818static void fail_postponed_requests(struct drbd_conf *mdev, sector_t sector,
1819 unsigned int size)
1820{
1821 struct drbd_interval *i;
1822
1823 repeat:
1824 drbd_for_each_overlap(i, &mdev->write_requests, sector, size) {
1825 struct drbd_request *req;
1826 struct bio_and_error m;
1827
1828 if (!i->local)
1829 continue;
1830 req = container_of(i, struct drbd_request, i);
1831 if (!(req->rq_state & RQ_POSTPONED))
1832 continue;
1833 req->rq_state &= ~RQ_POSTPONED;
1834 __req_mod(req, NEG_ACKED, &m);
1835 spin_unlock_irq(&mdev->tconn->req_lock);
1836 if (m.bio)
1837 complete_master_bio(mdev, &m);
1838 spin_lock_irq(&mdev->tconn->req_lock);
1839 goto repeat;
1840 }
1841}
1842
1843static int handle_write_conflicts(struct drbd_conf *mdev,
1844 struct drbd_peer_request *peer_req)
1845{
1846 struct drbd_tconn *tconn = mdev->tconn;
1847 bool resolve_conflicts = test_bit(DISCARD_CONCURRENT, &tconn->flags);
1848 sector_t sector = peer_req->i.sector;
1849 const unsigned int size = peer_req->i.size;
1850 struct drbd_interval *i;
1851 bool equal;
1852 int err;
1853
1854 /*
1855 * Inserting the peer request into the write_requests tree will prevent
1856 * new conflicting local requests from being added.
1857 */
1858 drbd_insert_interval(&mdev->write_requests, &peer_req->i);
1859
1860 repeat:
1861 drbd_for_each_overlap(i, &mdev->write_requests, sector, size) {
1862 if (i == &peer_req->i)
1863 continue;
1864
1865 if (!i->local) {
1866 /*
1867 * Our peer has sent a conflicting remote request; this
1868 * should not happen in a two-node setup. Wait for the
1869 * earlier peer request to complete.
1870 */
1871 err = drbd_wait_misc(mdev, i);
1872 if (err)
1873 goto out;
1874 goto repeat;
1875 }
1876
1877 equal = i->sector == sector && i->size == size;
1878 if (resolve_conflicts) {
1879 /*
1880 * If the peer request is fully contained within the
1881 * overlapping request, it can be discarded; otherwise,
1882 * it will be retried once all overlapping requests
1883 * have completed.
1884 */
1885 bool discard = i->sector <= sector && i->sector +
1886 (i->size >> 9) >= sector + (size >> 9);
1887
1888 if (!equal)
1889 dev_alert(DEV, "Concurrent writes detected: "
1890 "local=%llus +%u, remote=%llus +%u, "
1891 "assuming %s came first\n",
1892 (unsigned long long)i->sector, i->size,
1893 (unsigned long long)sector, size,
1894 discard ? "local" : "remote");
1895
1896 inc_unacked(mdev);
1897 peer_req->w.cb = discard ? e_send_discard_write :
1898 e_send_retry_write;
1899 list_add_tail(&peer_req->w.list, &mdev->done_ee);
1900 wake_asender(mdev->tconn);
1901
1902 err = -ENOENT;
1903 goto out;
1904 } else {
1905 struct drbd_request *req =
1906 container_of(i, struct drbd_request, i);
1907
1908 if (!equal)
1909 dev_alert(DEV, "Concurrent writes detected: "
1910 "local=%llus +%u, remote=%llus +%u\n",
1911 (unsigned long long)i->sector, i->size,
1912 (unsigned long long)sector, size);
1913
1914 if (req->rq_state & RQ_LOCAL_PENDING ||
1915 !(req->rq_state & RQ_POSTPONED)) {
1916 /*
1917 * Wait for the node with the discard flag to
1918 * decide if this request will be discarded or
1919 * retried. Requests that are discarded will
1920 * disappear from the write_requests tree.
1921 *
1922 * In addition, wait for the conflicting
1923 * request to finish locally before submitting
1924 * the conflicting peer request.
1925 */
1926 err = drbd_wait_misc(mdev, &req->i);
1927 if (err) {
1928 _conn_request_state(mdev->tconn,
1929 NS(conn, C_TIMEOUT),
1930 CS_HARD);
1931 fail_postponed_requests(mdev, sector, size);
1932 goto out;
1933 }
1934 goto repeat;
1935 }
1936 /*
1937 * Remember to restart the conflicting requests after
1938 * the new peer request has completed.
1939 */
1940 peer_req->flags |= EE_RESTART_REQUESTS;
1941 }
1942 }
1943 err = 0;
1944
1945 out:
1946 if (err)
1947 drbd_remove_epoch_entry_interval(mdev, peer_req);
1948 return err;
1949}
1950
b411b363 1951/* mirrored write */
d8763023
AG
1952static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd,
1953 unsigned int data_size)
b411b363
PR
1954{
1955 sector_t sector;
db830c46 1956 struct drbd_peer_request *peer_req;
e42325a5 1957 struct p_data *p = &mdev->tconn->data.rbuf.data;
7be8da07 1958 u32 peer_seq = be32_to_cpu(p->seq_num);
b411b363
PR
1959 int rw = WRITE;
1960 u32 dp_flags;
7be8da07 1961 int err;
b411b363 1962
b411b363 1963
7be8da07
AG
1964 if (!get_ldev(mdev)) {
1965 err = wait_for_and_update_peer_seq(mdev, peer_seq);
2b2bf214 1966 drbd_send_ack_dp(mdev, P_NEG_ACK, p, data_size);
b411b363 1967 atomic_inc(&mdev->current_epoch->epoch_size);
7be8da07 1968 return drbd_drain_block(mdev, data_size) && err == 0;
b411b363
PR
1969 }
1970
fcefa62e
AG
1971 /*
1972 * Corresponding put_ldev done either below (on various errors), or in
1973 * drbd_peer_request_endio, if we successfully submit the data at the
1974 * end of this function.
1975 */
b411b363
PR
1976
1977 sector = be64_to_cpu(p->sector);
db830c46
AG
1978 peer_req = read_in_block(mdev, p->block_id, sector, data_size);
1979 if (!peer_req) {
b411b363 1980 put_ldev(mdev);
81e84650 1981 return false;
b411b363
PR
1982 }
1983
db830c46 1984 peer_req->w.cb = e_end_block;
b411b363 1985
688593c5
LE
1986 dp_flags = be32_to_cpu(p->dp_flags);
1987 rw |= wire_flags_to_bio(mdev, dp_flags);
1988
1989 if (dp_flags & DP_MAY_SET_IN_SYNC)
db830c46 1990 peer_req->flags |= EE_MAY_SET_IN_SYNC;
688593c5 1991
b411b363 1992 spin_lock(&mdev->epoch_lock);
db830c46
AG
1993 peer_req->epoch = mdev->current_epoch;
1994 atomic_inc(&peer_req->epoch->epoch_size);
1995 atomic_inc(&peer_req->epoch->active);
b411b363
PR
1996 spin_unlock(&mdev->epoch_lock);
1997
7be8da07
AG
1998 if (mdev->tconn->net_conf->two_primaries) {
1999 err = wait_for_and_update_peer_seq(mdev, peer_seq);
2000 if (err)
b411b363 2001 goto out_interrupted;
87eeee41 2002 spin_lock_irq(&mdev->tconn->req_lock);
7be8da07
AG
2003 err = handle_write_conflicts(mdev, peer_req);
2004 if (err) {
2005 spin_unlock_irq(&mdev->tconn->req_lock);
2006 if (err == -ENOENT) {
b411b363 2007 put_ldev(mdev);
81e84650 2008 return true;
b411b363 2009 }
7be8da07 2010 goto out_interrupted;
b411b363 2011 }
7be8da07
AG
2012 } else
2013 spin_lock_irq(&mdev->tconn->req_lock);
db830c46 2014 list_add(&peer_req->w.list, &mdev->active_ee);
87eeee41 2015 spin_unlock_irq(&mdev->tconn->req_lock);
b411b363 2016
89e58e75 2017 switch (mdev->tconn->net_conf->wire_protocol) {
b411b363
PR
2018 case DRBD_PROT_C:
2019 inc_unacked(mdev);
2020 /* corresponding dec_unacked() in e_end_block()
2021 * respective _drbd_clear_done_ee */
2022 break;
2023 case DRBD_PROT_B:
2024 /* I really don't like it that the receiver thread
2025 * sends on the msock, but anyways */
db830c46 2026 drbd_send_ack(mdev, P_RECV_ACK, peer_req);
b411b363
PR
2027 break;
2028 case DRBD_PROT_A:
2029 /* nothing to do */
2030 break;
2031 }
2032
6719fb03 2033 if (mdev->state.pdsk < D_INCONSISTENT) {
b411b363 2034 /* In case we have the only disk of the cluster, */
db830c46
AG
2035 drbd_set_out_of_sync(mdev, peer_req->i.sector, peer_req->i.size);
2036 peer_req->flags |= EE_CALL_AL_COMPLETE_IO;
2037 peer_req->flags &= ~EE_MAY_SET_IN_SYNC;
2038 drbd_al_begin_io(mdev, peer_req->i.sector);
b411b363
PR
2039 }
2040
fbe29dec 2041 if (drbd_submit_peer_request(mdev, peer_req, rw, DRBD_FAULT_DT_WR) == 0)
81e84650 2042 return true;
b411b363 2043
10f6d992
LE
2044 /* don't care for the reason here */
2045 dev_err(DEV, "submit failed, triggering re-connect\n");
87eeee41 2046 spin_lock_irq(&mdev->tconn->req_lock);
db830c46
AG
2047 list_del(&peer_req->w.list);
2048 drbd_remove_epoch_entry_interval(mdev, peer_req);
87eeee41 2049 spin_unlock_irq(&mdev->tconn->req_lock);
db830c46
AG
2050 if (peer_req->flags & EE_CALL_AL_COMPLETE_IO)
2051 drbd_al_complete_io(mdev, peer_req->i.sector);
22cc37a9 2052
b411b363 2053out_interrupted:
db830c46 2054 drbd_may_finish_epoch(mdev, peer_req->epoch, EV_PUT + EV_CLEANUP);
b411b363 2055 put_ldev(mdev);
db830c46 2056 drbd_free_ee(mdev, peer_req);
81e84650 2057 return false;
b411b363
PR
2058}
2059
0f0601f4
LE
2060/* We may throttle resync, if the lower device seems to be busy,
2061 * and current sync rate is above c_min_rate.
2062 *
2063 * To decide whether or not the lower device is busy, we use a scheme similar
2064 * to MD RAID is_mddev_idle(): if the partition stats reveal "significant"
2065 * (more than 64 sectors) of activity we cannot account for with our own resync
2066 * activity, it obviously is "busy".
2067 *
2068 * The current sync rate used here uses only the most recent two step marks,
2069 * to have a short time average so we can react faster.
2070 */
e3555d85 2071int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector)
0f0601f4
LE
2072{
2073 struct gendisk *disk = mdev->ldev->backing_bdev->bd_contains->bd_disk;
2074 unsigned long db, dt, dbdt;
e3555d85 2075 struct lc_element *tmp;
0f0601f4
LE
2076 int curr_events;
2077 int throttle = 0;
2078
2079 /* feature disabled? */
2080 if (mdev->sync_conf.c_min_rate == 0)
2081 return 0;
2082
e3555d85
PR
2083 spin_lock_irq(&mdev->al_lock);
2084 tmp = lc_find(mdev->resync, BM_SECT_TO_EXT(sector));
2085 if (tmp) {
2086 struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
2087 if (test_bit(BME_PRIORITY, &bm_ext->flags)) {
2088 spin_unlock_irq(&mdev->al_lock);
2089 return 0;
2090 }
2091 /* Do not slow down if app IO is already waiting for this extent */
2092 }
2093 spin_unlock_irq(&mdev->al_lock);
2094
0f0601f4
LE
2095 curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
2096 (int)part_stat_read(&disk->part0, sectors[1]) -
2097 atomic_read(&mdev->rs_sect_ev);
e3555d85 2098
0f0601f4
LE
2099 if (!mdev->rs_last_events || curr_events - mdev->rs_last_events > 64) {
2100 unsigned long rs_left;
2101 int i;
2102
2103 mdev->rs_last_events = curr_events;
2104
2105 /* sync speed average over the last 2*DRBD_SYNC_MARK_STEP,
2106 * approx. */
2649f080
LE
2107 i = (mdev->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
2108
2109 if (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T)
2110 rs_left = mdev->ov_left;
2111 else
2112 rs_left = drbd_bm_total_weight(mdev) - mdev->rs_failed;
0f0601f4
LE
2113
2114 dt = ((long)jiffies - (long)mdev->rs_mark_time[i]) / HZ;
2115 if (!dt)
2116 dt++;
2117 db = mdev->rs_mark_left[i] - rs_left;
2118 dbdt = Bit2KB(db/dt);
2119
2120 if (dbdt > mdev->sync_conf.c_min_rate)
2121 throttle = 1;
2122 }
2123 return throttle;
2124}
2125
2126
d8763023
AG
2127static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packet cmd,
2128 unsigned int digest_size)
b411b363
PR
2129{
2130 sector_t sector;
2131 const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
db830c46 2132 struct drbd_peer_request *peer_req;
b411b363 2133 struct digest_info *di = NULL;
b18b37be 2134 int size, verb;
b411b363 2135 unsigned int fault_type;
e42325a5 2136 struct p_block_req *p = &mdev->tconn->data.rbuf.block_req;
b411b363
PR
2137
2138 sector = be64_to_cpu(p->sector);
2139 size = be32_to_cpu(p->blksize);
2140
c670a398 2141 if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) {
b411b363
PR
2142 dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
2143 (unsigned long long)sector, size);
81e84650 2144 return false;
b411b363
PR
2145 }
2146 if (sector + (size>>9) > capacity) {
2147 dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
2148 (unsigned long long)sector, size);
81e84650 2149 return false;
b411b363
PR
2150 }
2151
2152 if (!get_ldev_if_state(mdev, D_UP_TO_DATE)) {
b18b37be
PR
2153 verb = 1;
2154 switch (cmd) {
2155 case P_DATA_REQUEST:
2156 drbd_send_ack_rp(mdev, P_NEG_DREPLY, p);
2157 break;
2158 case P_RS_DATA_REQUEST:
2159 case P_CSUM_RS_REQUEST:
2160 case P_OV_REQUEST:
2161 drbd_send_ack_rp(mdev, P_NEG_RS_DREPLY , p);
2162 break;
2163 case P_OV_REPLY:
2164 verb = 0;
2165 dec_rs_pending(mdev);
2166 drbd_send_ack_ex(mdev, P_OV_RESULT, sector, size, ID_IN_SYNC);
2167 break;
2168 default:
2169 dev_err(DEV, "unexpected command (%s) in receive_DataRequest\n",
2170 cmdname(cmd));
2171 }
2172 if (verb && __ratelimit(&drbd_ratelimit_state))
b411b363
PR
2173 dev_err(DEV, "Can not satisfy peer's read request, "
2174 "no local data.\n");
b18b37be 2175
a821cc4a
LE
2176 /* drain possibly payload */
2177 return drbd_drain_block(mdev, digest_size);
b411b363
PR
2178 }
2179
2180 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
2181 * "criss-cross" setup, that might cause write-out on some other DRBD,
2182 * which in turn might block on the other node at this very place. */
db830c46
AG
2183 peer_req = drbd_alloc_ee(mdev, p->block_id, sector, size, GFP_NOIO);
2184 if (!peer_req) {
b411b363 2185 put_ldev(mdev);
81e84650 2186 return false;
b411b363
PR
2187 }
2188
02918be2 2189 switch (cmd) {
b411b363 2190 case P_DATA_REQUEST:
db830c46 2191 peer_req->w.cb = w_e_end_data_req;
b411b363 2192 fault_type = DRBD_FAULT_DT_RD;
80a40e43
LE
2193 /* application IO, don't drbd_rs_begin_io */
2194 goto submit;
2195
b411b363 2196 case P_RS_DATA_REQUEST:
db830c46 2197 peer_req->w.cb = w_e_end_rsdata_req;
b411b363 2198 fault_type = DRBD_FAULT_RS_RD;
5f9915bb
LE
2199 /* used in the sector offset progress display */
2200 mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
b411b363
PR
2201 break;
2202
2203 case P_OV_REPLY:
2204 case P_CSUM_RS_REQUEST:
2205 fault_type = DRBD_FAULT_RS_RD;
b411b363
PR
2206 di = kmalloc(sizeof(*di) + digest_size, GFP_NOIO);
2207 if (!di)
2208 goto out_free_e;
2209
2210 di->digest_size = digest_size;
2211 di->digest = (((char *)di)+sizeof(struct digest_info));
2212
db830c46
AG
2213 peer_req->digest = di;
2214 peer_req->flags |= EE_HAS_DIGEST;
c36c3ced 2215
de0ff338 2216 if (drbd_recv(mdev->tconn, di->digest, digest_size) != digest_size)
b411b363
PR
2217 goto out_free_e;
2218
02918be2 2219 if (cmd == P_CSUM_RS_REQUEST) {
31890f4a 2220 D_ASSERT(mdev->tconn->agreed_pro_version >= 89);
db830c46 2221 peer_req->w.cb = w_e_end_csum_rs_req;
5f9915bb
LE
2222 /* used in the sector offset progress display */
2223 mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
02918be2 2224 } else if (cmd == P_OV_REPLY) {
2649f080
LE
2225 /* track progress, we may need to throttle */
2226 atomic_add(size >> 9, &mdev->rs_sect_in);
db830c46 2227 peer_req->w.cb = w_e_end_ov_reply;
b411b363 2228 dec_rs_pending(mdev);
0f0601f4
LE
2229 /* drbd_rs_begin_io done when we sent this request,
2230 * but accounting still needs to be done. */
2231 goto submit_for_resync;
b411b363
PR
2232 }
2233 break;
2234
2235 case P_OV_REQUEST:
b411b363 2236 if (mdev->ov_start_sector == ~(sector_t)0 &&
31890f4a 2237 mdev->tconn->agreed_pro_version >= 90) {
de228bba
LE
2238 unsigned long now = jiffies;
2239 int i;
b411b363
PR
2240 mdev->ov_start_sector = sector;
2241 mdev->ov_position = sector;
30b743a2
LE
2242 mdev->ov_left = drbd_bm_bits(mdev) - BM_SECT_TO_BIT(sector);
2243 mdev->rs_total = mdev->ov_left;
de228bba
LE
2244 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
2245 mdev->rs_mark_left[i] = mdev->ov_left;
2246 mdev->rs_mark_time[i] = now;
2247 }
b411b363
PR
2248 dev_info(DEV, "Online Verify start sector: %llu\n",
2249 (unsigned long long)sector);
2250 }
db830c46 2251 peer_req->w.cb = w_e_end_ov_req;
b411b363 2252 fault_type = DRBD_FAULT_RS_RD;
b411b363
PR
2253 break;
2254
b411b363
PR
2255 default:
2256 dev_err(DEV, "unexpected command (%s) in receive_DataRequest\n",
02918be2 2257 cmdname(cmd));
b411b363 2258 fault_type = DRBD_FAULT_MAX;
80a40e43 2259 goto out_free_e;
b411b363
PR
2260 }
2261
0f0601f4
LE
2262 /* Throttle, drbd_rs_begin_io and submit should become asynchronous
2263 * wrt the receiver, but it is not as straightforward as it may seem.
2264 * Various places in the resync start and stop logic assume resync
2265 * requests are processed in order, requeuing this on the worker thread
2266 * introduces a bunch of new code for synchronization between threads.
2267 *
2268 * Unlimited throttling before drbd_rs_begin_io may stall the resync
2269 * "forever", throttling after drbd_rs_begin_io will lock that extent
2270 * for application writes for the same time. For now, just throttle
2271 * here, where the rest of the code expects the receiver to sleep for
2272 * a while, anyways.
2273 */
2274
2275 /* Throttle before drbd_rs_begin_io, as that locks out application IO;
2276 * this defers syncer requests for some time, before letting at least
2277 * on request through. The resync controller on the receiving side
2278 * will adapt to the incoming rate accordingly.
2279 *
2280 * We cannot throttle here if remote is Primary/SyncTarget:
2281 * we would also throttle its application reads.
2282 * In that case, throttling is done on the SyncTarget only.
2283 */
e3555d85
PR
2284 if (mdev->state.peer != R_PRIMARY && drbd_rs_should_slow_down(mdev, sector))
2285 schedule_timeout_uninterruptible(HZ/10);
2286 if (drbd_rs_begin_io(mdev, sector))
80a40e43 2287 goto out_free_e;
b411b363 2288
0f0601f4
LE
2289submit_for_resync:
2290 atomic_add(size >> 9, &mdev->rs_sect_ev);
2291
80a40e43 2292submit:
b411b363 2293 inc_unacked(mdev);
87eeee41 2294 spin_lock_irq(&mdev->tconn->req_lock);
db830c46 2295 list_add_tail(&peer_req->w.list, &mdev->read_ee);
87eeee41 2296 spin_unlock_irq(&mdev->tconn->req_lock);
b411b363 2297
fbe29dec 2298 if (drbd_submit_peer_request(mdev, peer_req, READ, fault_type) == 0)
81e84650 2299 return true;
b411b363 2300
10f6d992
LE
2301 /* don't care for the reason here */
2302 dev_err(DEV, "submit failed, triggering re-connect\n");
87eeee41 2303 spin_lock_irq(&mdev->tconn->req_lock);
db830c46 2304 list_del(&peer_req->w.list);
87eeee41 2305 spin_unlock_irq(&mdev->tconn->req_lock);
22cc37a9
LE
2306 /* no drbd_rs_complete_io(), we are dropping the connection anyways */
2307
b411b363 2308out_free_e:
b411b363 2309 put_ldev(mdev);
db830c46 2310 drbd_free_ee(mdev, peer_req);
81e84650 2311 return false;
b411b363
PR
2312}
2313
2314static int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local)
2315{
2316 int self, peer, rv = -100;
2317 unsigned long ch_self, ch_peer;
2318
2319 self = mdev->ldev->md.uuid[UI_BITMAP] & 1;
2320 peer = mdev->p_uuid[UI_BITMAP] & 1;
2321
2322 ch_peer = mdev->p_uuid[UI_SIZE];
2323 ch_self = mdev->comm_bm_set;
2324
89e58e75 2325 switch (mdev->tconn->net_conf->after_sb_0p) {
b411b363
PR
2326 case ASB_CONSENSUS:
2327 case ASB_DISCARD_SECONDARY:
2328 case ASB_CALL_HELPER:
2329 dev_err(DEV, "Configuration error.\n");
2330 break;
2331 case ASB_DISCONNECT:
2332 break;
2333 case ASB_DISCARD_YOUNGER_PRI:
2334 if (self == 0 && peer == 1) {
2335 rv = -1;
2336 break;
2337 }
2338 if (self == 1 && peer == 0) {
2339 rv = 1;
2340 break;
2341 }
2342 /* Else fall through to one of the other strategies... */
2343 case ASB_DISCARD_OLDER_PRI:
2344 if (self == 0 && peer == 1) {
2345 rv = 1;
2346 break;
2347 }
2348 if (self == 1 && peer == 0) {
2349 rv = -1;
2350 break;
2351 }
2352 /* Else fall through to one of the other strategies... */
ad19bf6e 2353 dev_warn(DEV, "Discard younger/older primary did not find a decision\n"
b411b363
PR
2354 "Using discard-least-changes instead\n");
2355 case ASB_DISCARD_ZERO_CHG:
2356 if (ch_peer == 0 && ch_self == 0) {
25703f83 2357 rv = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags)
b411b363
PR
2358 ? -1 : 1;
2359 break;
2360 } else {
2361 if (ch_peer == 0) { rv = 1; break; }
2362 if (ch_self == 0) { rv = -1; break; }
2363 }
89e58e75 2364 if (mdev->tconn->net_conf->after_sb_0p == ASB_DISCARD_ZERO_CHG)
b411b363
PR
2365 break;
2366 case ASB_DISCARD_LEAST_CHG:
2367 if (ch_self < ch_peer)
2368 rv = -1;
2369 else if (ch_self > ch_peer)
2370 rv = 1;
2371 else /* ( ch_self == ch_peer ) */
2372 /* Well, then use something else. */
25703f83 2373 rv = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags)
b411b363
PR
2374 ? -1 : 1;
2375 break;
2376 case ASB_DISCARD_LOCAL:
2377 rv = -1;
2378 break;
2379 case ASB_DISCARD_REMOTE:
2380 rv = 1;
2381 }
2382
2383 return rv;
2384}
2385
2386static int drbd_asb_recover_1p(struct drbd_conf *mdev) __must_hold(local)
2387{
6184ea21 2388 int hg, rv = -100;
b411b363 2389
89e58e75 2390 switch (mdev->tconn->net_conf->after_sb_1p) {
b411b363
PR
2391 case ASB_DISCARD_YOUNGER_PRI:
2392 case ASB_DISCARD_OLDER_PRI:
2393 case ASB_DISCARD_LEAST_CHG:
2394 case ASB_DISCARD_LOCAL:
2395 case ASB_DISCARD_REMOTE:
2396 dev_err(DEV, "Configuration error.\n");
2397 break;
2398 case ASB_DISCONNECT:
2399 break;
2400 case ASB_CONSENSUS:
2401 hg = drbd_asb_recover_0p(mdev);
2402 if (hg == -1 && mdev->state.role == R_SECONDARY)
2403 rv = hg;
2404 if (hg == 1 && mdev->state.role == R_PRIMARY)
2405 rv = hg;
2406 break;
2407 case ASB_VIOLENTLY:
2408 rv = drbd_asb_recover_0p(mdev);
2409 break;
2410 case ASB_DISCARD_SECONDARY:
2411 return mdev->state.role == R_PRIMARY ? 1 : -1;
2412 case ASB_CALL_HELPER:
2413 hg = drbd_asb_recover_0p(mdev);
2414 if (hg == -1 && mdev->state.role == R_PRIMARY) {
bb437946
AG
2415 enum drbd_state_rv rv2;
2416
2417 drbd_set_role(mdev, R_SECONDARY, 0);
b411b363
PR
2418 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2419 * we might be here in C_WF_REPORT_PARAMS which is transient.
2420 * we do not need to wait for the after state change work either. */
bb437946
AG
2421 rv2 = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY));
2422 if (rv2 != SS_SUCCESS) {
b411b363
PR
2423 drbd_khelper(mdev, "pri-lost-after-sb");
2424 } else {
2425 dev_warn(DEV, "Successfully gave up primary role.\n");
2426 rv = hg;
2427 }
2428 } else
2429 rv = hg;
2430 }
2431
2432 return rv;
2433}
2434
2435static int drbd_asb_recover_2p(struct drbd_conf *mdev) __must_hold(local)
2436{
6184ea21 2437 int hg, rv = -100;
b411b363 2438
89e58e75 2439 switch (mdev->tconn->net_conf->after_sb_2p) {
b411b363
PR
2440 case ASB_DISCARD_YOUNGER_PRI:
2441 case ASB_DISCARD_OLDER_PRI:
2442 case ASB_DISCARD_LEAST_CHG:
2443 case ASB_DISCARD_LOCAL:
2444 case ASB_DISCARD_REMOTE:
2445 case ASB_CONSENSUS:
2446 case ASB_DISCARD_SECONDARY:
2447 dev_err(DEV, "Configuration error.\n");
2448 break;
2449 case ASB_VIOLENTLY:
2450 rv = drbd_asb_recover_0p(mdev);
2451 break;
2452 case ASB_DISCONNECT:
2453 break;
2454 case ASB_CALL_HELPER:
2455 hg = drbd_asb_recover_0p(mdev);
2456 if (hg == -1) {
bb437946
AG
2457 enum drbd_state_rv rv2;
2458
b411b363
PR
2459 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2460 * we might be here in C_WF_REPORT_PARAMS which is transient.
2461 * we do not need to wait for the after state change work either. */
bb437946
AG
2462 rv2 = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY));
2463 if (rv2 != SS_SUCCESS) {
b411b363
PR
2464 drbd_khelper(mdev, "pri-lost-after-sb");
2465 } else {
2466 dev_warn(DEV, "Successfully gave up primary role.\n");
2467 rv = hg;
2468 }
2469 } else
2470 rv = hg;
2471 }
2472
2473 return rv;
2474}
2475
2476static void drbd_uuid_dump(struct drbd_conf *mdev, char *text, u64 *uuid,
2477 u64 bits, u64 flags)
2478{
2479 if (!uuid) {
2480 dev_info(DEV, "%s uuid info vanished while I was looking!\n", text);
2481 return;
2482 }
2483 dev_info(DEV, "%s %016llX:%016llX:%016llX:%016llX bits:%llu flags:%llX\n",
2484 text,
2485 (unsigned long long)uuid[UI_CURRENT],
2486 (unsigned long long)uuid[UI_BITMAP],
2487 (unsigned long long)uuid[UI_HISTORY_START],
2488 (unsigned long long)uuid[UI_HISTORY_END],
2489 (unsigned long long)bits,
2490 (unsigned long long)flags);
2491}
2492
2493/*
2494 100 after split brain try auto recover
2495 2 C_SYNC_SOURCE set BitMap
2496 1 C_SYNC_SOURCE use BitMap
2497 0 no Sync
2498 -1 C_SYNC_TARGET use BitMap
2499 -2 C_SYNC_TARGET set BitMap
2500 -100 after split brain, disconnect
2501-1000 unrelated data
4a23f264
PR
2502-1091 requires proto 91
2503-1096 requires proto 96
b411b363
PR
2504 */
2505static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(local)
2506{
2507 u64 self, peer;
2508 int i, j;
2509
2510 self = mdev->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
2511 peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
2512
2513 *rule_nr = 10;
2514 if (self == UUID_JUST_CREATED && peer == UUID_JUST_CREATED)
2515 return 0;
2516
2517 *rule_nr = 20;
2518 if ((self == UUID_JUST_CREATED || self == (u64)0) &&
2519 peer != UUID_JUST_CREATED)
2520 return -2;
2521
2522 *rule_nr = 30;
2523 if (self != UUID_JUST_CREATED &&
2524 (peer == UUID_JUST_CREATED || peer == (u64)0))
2525 return 2;
2526
2527 if (self == peer) {
2528 int rct, dc; /* roles at crash time */
2529
2530 if (mdev->p_uuid[UI_BITMAP] == (u64)0 && mdev->ldev->md.uuid[UI_BITMAP] != (u64)0) {
2531
31890f4a 2532 if (mdev->tconn->agreed_pro_version < 91)
4a23f264 2533 return -1091;
b411b363
PR
2534
2535 if ((mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) &&
2536 (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) {
2537 dev_info(DEV, "was SyncSource, missed the resync finished event, corrected myself:\n");
2538 drbd_uuid_set_bm(mdev, 0UL);
2539
2540 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid,
2541 mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0);
2542 *rule_nr = 34;
2543 } else {
2544 dev_info(DEV, "was SyncSource (peer failed to write sync_uuid)\n");
2545 *rule_nr = 36;
2546 }
2547
2548 return 1;
2549 }
2550
2551 if (mdev->ldev->md.uuid[UI_BITMAP] == (u64)0 && mdev->p_uuid[UI_BITMAP] != (u64)0) {
2552
31890f4a 2553 if (mdev->tconn->agreed_pro_version < 91)
4a23f264 2554 return -1091;
b411b363
PR
2555
2556 if ((mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_BITMAP] & ~((u64)1)) &&
2557 (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1))) {
2558 dev_info(DEV, "was SyncTarget, peer missed the resync finished event, corrected peer:\n");
2559
2560 mdev->p_uuid[UI_HISTORY_START + 1] = mdev->p_uuid[UI_HISTORY_START];
2561 mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_BITMAP];
2562 mdev->p_uuid[UI_BITMAP] = 0UL;
2563
2564 drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2565 *rule_nr = 35;
2566 } else {
2567 dev_info(DEV, "was SyncTarget (failed to write sync_uuid)\n");
2568 *rule_nr = 37;
2569 }
2570
2571 return -1;
2572 }
2573
2574 /* Common power [off|failure] */
2575 rct = (test_bit(CRASHED_PRIMARY, &mdev->flags) ? 1 : 0) +
2576 (mdev->p_uuid[UI_FLAGS] & 2);
2577 /* lowest bit is set when we were primary,
2578 * next bit (weight 2) is set when peer was primary */
2579 *rule_nr = 40;
2580
2581 switch (rct) {
2582 case 0: /* !self_pri && !peer_pri */ return 0;
2583 case 1: /* self_pri && !peer_pri */ return 1;
2584 case 2: /* !self_pri && peer_pri */ return -1;
2585 case 3: /* self_pri && peer_pri */
25703f83 2586 dc = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags);
b411b363
PR
2587 return dc ? -1 : 1;
2588 }
2589 }
2590
2591 *rule_nr = 50;
2592 peer = mdev->p_uuid[UI_BITMAP] & ~((u64)1);
2593 if (self == peer)
2594 return -1;
2595
2596 *rule_nr = 51;
2597 peer = mdev->p_uuid[UI_HISTORY_START] & ~((u64)1);
2598 if (self == peer) {
31890f4a 2599 if (mdev->tconn->agreed_pro_version < 96 ?
4a23f264
PR
2600 (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) ==
2601 (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) :
2602 peer + UUID_NEW_BM_OFFSET == (mdev->p_uuid[UI_BITMAP] & ~((u64)1))) {
b411b363
PR
2603 /* The last P_SYNC_UUID did not get though. Undo the last start of
2604 resync as sync source modifications of the peer's UUIDs. */
2605
31890f4a 2606 if (mdev->tconn->agreed_pro_version < 91)
4a23f264 2607 return -1091;
b411b363
PR
2608
2609 mdev->p_uuid[UI_BITMAP] = mdev->p_uuid[UI_HISTORY_START];
2610 mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_HISTORY_START + 1];
4a23f264
PR
2611
2612 dev_info(DEV, "Did not got last syncUUID packet, corrected:\n");
2613 drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2614
b411b363
PR
2615 return -1;
2616 }
2617 }
2618
2619 *rule_nr = 60;
2620 self = mdev->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
2621 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2622 peer = mdev->p_uuid[i] & ~((u64)1);
2623 if (self == peer)
2624 return -2;
2625 }
2626
2627 *rule_nr = 70;
2628 self = mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
2629 peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
2630 if (self == peer)
2631 return 1;
2632
2633 *rule_nr = 71;
2634 self = mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1);
2635 if (self == peer) {
31890f4a 2636 if (mdev->tconn->agreed_pro_version < 96 ?
4a23f264
PR
2637 (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) ==
2638 (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) :
2639 self + UUID_NEW_BM_OFFSET == (mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) {
b411b363
PR
2640 /* The last P_SYNC_UUID did not get though. Undo the last start of
2641 resync as sync source modifications of our UUIDs. */
2642
31890f4a 2643 if (mdev->tconn->agreed_pro_version < 91)
4a23f264 2644 return -1091;
b411b363
PR
2645
2646 _drbd_uuid_set(mdev, UI_BITMAP, mdev->ldev->md.uuid[UI_HISTORY_START]);
2647 _drbd_uuid_set(mdev, UI_HISTORY_START, mdev->ldev->md.uuid[UI_HISTORY_START + 1]);
2648
4a23f264 2649 dev_info(DEV, "Last syncUUID did not get through, corrected:\n");
b411b363
PR
2650 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid,
2651 mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0);
2652
2653 return 1;
2654 }
2655 }
2656
2657
2658 *rule_nr = 80;
d8c2a36b 2659 peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
b411b363
PR
2660 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2661 self = mdev->ldev->md.uuid[i] & ~((u64)1);
2662 if (self == peer)
2663 return 2;
2664 }
2665
2666 *rule_nr = 90;
2667 self = mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
2668 peer = mdev->p_uuid[UI_BITMAP] & ~((u64)1);
2669 if (self == peer && self != ((u64)0))
2670 return 100;
2671
2672 *rule_nr = 100;
2673 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2674 self = mdev->ldev->md.uuid[i] & ~((u64)1);
2675 for (j = UI_HISTORY_START; j <= UI_HISTORY_END; j++) {
2676 peer = mdev->p_uuid[j] & ~((u64)1);
2677 if (self == peer)
2678 return -100;
2679 }
2680 }
2681
2682 return -1000;
2683}
2684
2685/* drbd_sync_handshake() returns the new conn state on success, or
2686 CONN_MASK (-1) on failure.
2687 */
2688static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_role peer_role,
2689 enum drbd_disk_state peer_disk) __must_hold(local)
2690{
2691 int hg, rule_nr;
2692 enum drbd_conns rv = C_MASK;
2693 enum drbd_disk_state mydisk;
2694
2695 mydisk = mdev->state.disk;
2696 if (mydisk == D_NEGOTIATING)
2697 mydisk = mdev->new_state_tmp.disk;
2698
2699 dev_info(DEV, "drbd_sync_handshake:\n");
2700 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid, mdev->comm_bm_set, 0);
2701 drbd_uuid_dump(mdev, "peer", mdev->p_uuid,
2702 mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2703
2704 hg = drbd_uuid_compare(mdev, &rule_nr);
2705
2706 dev_info(DEV, "uuid_compare()=%d by rule %d\n", hg, rule_nr);
2707
2708 if (hg == -1000) {
2709 dev_alert(DEV, "Unrelated data, aborting!\n");
2710 return C_MASK;
2711 }
4a23f264
PR
2712 if (hg < -1000) {
2713 dev_alert(DEV, "To resolve this both sides have to support at least protocol %d\n", -hg - 1000);
b411b363
PR
2714 return C_MASK;
2715 }
2716
2717 if ((mydisk == D_INCONSISTENT && peer_disk > D_INCONSISTENT) ||
2718 (peer_disk == D_INCONSISTENT && mydisk > D_INCONSISTENT)) {
2719 int f = (hg == -100) || abs(hg) == 2;
2720 hg = mydisk > D_INCONSISTENT ? 1 : -1;
2721 if (f)
2722 hg = hg*2;
2723 dev_info(DEV, "Becoming sync %s due to disk states.\n",
2724 hg > 0 ? "source" : "target");
2725 }
2726
3a11a487
AG
2727 if (abs(hg) == 100)
2728 drbd_khelper(mdev, "initial-split-brain");
2729
89e58e75 2730 if (hg == 100 || (hg == -100 && mdev->tconn->net_conf->always_asbp)) {
b411b363
PR
2731 int pcount = (mdev->state.role == R_PRIMARY)
2732 + (peer_role == R_PRIMARY);
2733 int forced = (hg == -100);
2734
2735 switch (pcount) {
2736 case 0:
2737 hg = drbd_asb_recover_0p(mdev);
2738 break;
2739 case 1:
2740 hg = drbd_asb_recover_1p(mdev);
2741 break;
2742 case 2:
2743 hg = drbd_asb_recover_2p(mdev);
2744 break;
2745 }
2746 if (abs(hg) < 100) {
2747 dev_warn(DEV, "Split-Brain detected, %d primaries, "
2748 "automatically solved. Sync from %s node\n",
2749 pcount, (hg < 0) ? "peer" : "this");
2750 if (forced) {
2751 dev_warn(DEV, "Doing a full sync, since"
2752 " UUIDs where ambiguous.\n");
2753 hg = hg*2;
2754 }
2755 }
2756 }
2757
2758 if (hg == -100) {
89e58e75 2759 if (mdev->tconn->net_conf->want_lose && !(mdev->p_uuid[UI_FLAGS]&1))
b411b363 2760 hg = -1;
89e58e75 2761 if (!mdev->tconn->net_conf->want_lose && (mdev->p_uuid[UI_FLAGS]&1))
b411b363
PR
2762 hg = 1;
2763
2764 if (abs(hg) < 100)
2765 dev_warn(DEV, "Split-Brain detected, manually solved. "
2766 "Sync from %s node\n",
2767 (hg < 0) ? "peer" : "this");
2768 }
2769
2770 if (hg == -100) {
580b9767
LE
2771 /* FIXME this log message is not correct if we end up here
2772 * after an attempted attach on a diskless node.
2773 * We just refuse to attach -- well, we drop the "connection"
2774 * to that disk, in a way... */
3a11a487 2775 dev_alert(DEV, "Split-Brain detected but unresolved, dropping connection!\n");
b411b363
PR
2776 drbd_khelper(mdev, "split-brain");
2777 return C_MASK;
2778 }
2779
2780 if (hg > 0 && mydisk <= D_INCONSISTENT) {
2781 dev_err(DEV, "I shall become SyncSource, but I am inconsistent!\n");
2782 return C_MASK;
2783 }
2784
2785 if (hg < 0 && /* by intention we do not use mydisk here. */
2786 mdev->state.role == R_PRIMARY && mdev->state.disk >= D_CONSISTENT) {
89e58e75 2787 switch (mdev->tconn->net_conf->rr_conflict) {
b411b363
PR
2788 case ASB_CALL_HELPER:
2789 drbd_khelper(mdev, "pri-lost");
2790 /* fall through */
2791 case ASB_DISCONNECT:
2792 dev_err(DEV, "I shall become SyncTarget, but I am primary!\n");
2793 return C_MASK;
2794 case ASB_VIOLENTLY:
2795 dev_warn(DEV, "Becoming SyncTarget, violating the stable-data"
2796 "assumption\n");
2797 }
2798 }
2799
89e58e75 2800 if (mdev->tconn->net_conf->dry_run || test_bit(CONN_DRY_RUN, &mdev->flags)) {
cf14c2e9
PR
2801 if (hg == 0)
2802 dev_info(DEV, "dry-run connect: No resync, would become Connected immediately.\n");
2803 else
2804 dev_info(DEV, "dry-run connect: Would become %s, doing a %s resync.",
2805 drbd_conn_str(hg > 0 ? C_SYNC_SOURCE : C_SYNC_TARGET),
2806 abs(hg) >= 2 ? "full" : "bit-map based");
2807 return C_MASK;
2808 }
2809
b411b363
PR
2810 if (abs(hg) >= 2) {
2811 dev_info(DEV, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n");
20ceb2b2
LE
2812 if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, "set_n_write from sync_handshake",
2813 BM_LOCKED_SET_ALLOWED))
b411b363
PR
2814 return C_MASK;
2815 }
2816
2817 if (hg > 0) { /* become sync source. */
2818 rv = C_WF_BITMAP_S;
2819 } else if (hg < 0) { /* become sync target */
2820 rv = C_WF_BITMAP_T;
2821 } else {
2822 rv = C_CONNECTED;
2823 if (drbd_bm_total_weight(mdev)) {
2824 dev_info(DEV, "No resync, but %lu bits in bitmap!\n",
2825 drbd_bm_total_weight(mdev));
2826 }
2827 }
2828
2829 return rv;
2830}
2831
2832/* returns 1 if invalid */
2833static int cmp_after_sb(enum drbd_after_sb_p peer, enum drbd_after_sb_p self)
2834{
2835 /* ASB_DISCARD_REMOTE - ASB_DISCARD_LOCAL is valid */
2836 if ((peer == ASB_DISCARD_REMOTE && self == ASB_DISCARD_LOCAL) ||
2837 (self == ASB_DISCARD_REMOTE && peer == ASB_DISCARD_LOCAL))
2838 return 0;
2839
2840 /* any other things with ASB_DISCARD_REMOTE or ASB_DISCARD_LOCAL are invalid */
2841 if (peer == ASB_DISCARD_REMOTE || peer == ASB_DISCARD_LOCAL ||
2842 self == ASB_DISCARD_REMOTE || self == ASB_DISCARD_LOCAL)
2843 return 1;
2844
2845 /* everything else is valid if they are equal on both sides. */
2846 if (peer == self)
2847 return 0;
2848
2849 /* everything es is invalid. */
2850 return 1;
2851}
2852
d8763023
AG
2853static int receive_protocol(struct drbd_conf *mdev, enum drbd_packet cmd,
2854 unsigned int data_size)
b411b363 2855{
e42325a5 2856 struct p_protocol *p = &mdev->tconn->data.rbuf.protocol;
b411b363 2857 int p_proto, p_after_sb_0p, p_after_sb_1p, p_after_sb_2p;
cf14c2e9 2858 int p_want_lose, p_two_primaries, cf;
b411b363
PR
2859 char p_integrity_alg[SHARED_SECRET_MAX] = "";
2860
b411b363
PR
2861 p_proto = be32_to_cpu(p->protocol);
2862 p_after_sb_0p = be32_to_cpu(p->after_sb_0p);
2863 p_after_sb_1p = be32_to_cpu(p->after_sb_1p);
2864 p_after_sb_2p = be32_to_cpu(p->after_sb_2p);
b411b363 2865 p_two_primaries = be32_to_cpu(p->two_primaries);
cf14c2e9
PR
2866 cf = be32_to_cpu(p->conn_flags);
2867 p_want_lose = cf & CF_WANT_LOSE;
2868
2869 clear_bit(CONN_DRY_RUN, &mdev->flags);
2870
2871 if (cf & CF_DRY_RUN)
2872 set_bit(CONN_DRY_RUN, &mdev->flags);
b411b363 2873
89e58e75 2874 if (p_proto != mdev->tconn->net_conf->wire_protocol) {
b411b363
PR
2875 dev_err(DEV, "incompatible communication protocols\n");
2876 goto disconnect;
2877 }
2878
89e58e75 2879 if (cmp_after_sb(p_after_sb_0p, mdev->tconn->net_conf->after_sb_0p)) {
b411b363
PR
2880 dev_err(DEV, "incompatible after-sb-0pri settings\n");
2881 goto disconnect;
2882 }
2883
89e58e75 2884 if (cmp_after_sb(p_after_sb_1p, mdev->tconn->net_conf->after_sb_1p)) {
b411b363
PR
2885 dev_err(DEV, "incompatible after-sb-1pri settings\n");
2886 goto disconnect;
2887 }
2888
89e58e75 2889 if (cmp_after_sb(p_after_sb_2p, mdev->tconn->net_conf->after_sb_2p)) {
b411b363
PR
2890 dev_err(DEV, "incompatible after-sb-2pri settings\n");
2891 goto disconnect;
2892 }
2893
89e58e75 2894 if (p_want_lose && mdev->tconn->net_conf->want_lose) {
b411b363
PR
2895 dev_err(DEV, "both sides have the 'want_lose' flag set\n");
2896 goto disconnect;
2897 }
2898
89e58e75 2899 if (p_two_primaries != mdev->tconn->net_conf->two_primaries) {
b411b363
PR
2900 dev_err(DEV, "incompatible setting of the two-primaries options\n");
2901 goto disconnect;
2902 }
2903
31890f4a 2904 if (mdev->tconn->agreed_pro_version >= 87) {
89e58e75 2905 unsigned char *my_alg = mdev->tconn->net_conf->integrity_alg;
b411b363 2906
de0ff338 2907 if (drbd_recv(mdev->tconn, p_integrity_alg, data_size) != data_size)
81e84650 2908 return false;
b411b363
PR
2909
2910 p_integrity_alg[SHARED_SECRET_MAX-1] = 0;
2911 if (strcmp(p_integrity_alg, my_alg)) {
2912 dev_err(DEV, "incompatible setting of the data-integrity-alg\n");
2913 goto disconnect;
2914 }
2915 dev_info(DEV, "data-integrity-alg: %s\n",
2916 my_alg[0] ? my_alg : (unsigned char *)"<not-used>");
2917 }
2918
81e84650 2919 return true;
b411b363
PR
2920
2921disconnect:
2922 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
81e84650 2923 return false;
b411b363
PR
2924}
2925
2926/* helper function
2927 * input: alg name, feature name
2928 * return: NULL (alg name was "")
2929 * ERR_PTR(error) if something goes wrong
2930 * or the crypto hash ptr, if it worked out ok. */
2931struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_conf *mdev,
2932 const char *alg, const char *name)
2933{
2934 struct crypto_hash *tfm;
2935
2936 if (!alg[0])
2937 return NULL;
2938
2939 tfm = crypto_alloc_hash(alg, 0, CRYPTO_ALG_ASYNC);
2940 if (IS_ERR(tfm)) {
2941 dev_err(DEV, "Can not allocate \"%s\" as %s (reason: %ld)\n",
2942 alg, name, PTR_ERR(tfm));
2943 return tfm;
2944 }
2945 if (!drbd_crypto_is_hash(crypto_hash_tfm(tfm))) {
2946 crypto_free_hash(tfm);
2947 dev_err(DEV, "\"%s\" is not a digest (%s)\n", alg, name);
2948 return ERR_PTR(-EINVAL);
2949 }
2950 return tfm;
2951}
2952
d8763023
AG
2953static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packet cmd,
2954 unsigned int packet_size)
b411b363 2955{
81e84650 2956 int ok = true;
e42325a5 2957 struct p_rs_param_95 *p = &mdev->tconn->data.rbuf.rs_param_95;
b411b363
PR
2958 unsigned int header_size, data_size, exp_max_sz;
2959 struct crypto_hash *verify_tfm = NULL;
2960 struct crypto_hash *csums_tfm = NULL;
31890f4a 2961 const int apv = mdev->tconn->agreed_pro_version;
778f271d
PR
2962 int *rs_plan_s = NULL;
2963 int fifo_size = 0;
b411b363
PR
2964
2965 exp_max_sz = apv <= 87 ? sizeof(struct p_rs_param)
2966 : apv == 88 ? sizeof(struct p_rs_param)
2967 + SHARED_SECRET_MAX
8e26f9cc
PR
2968 : apv <= 94 ? sizeof(struct p_rs_param_89)
2969 : /* apv >= 95 */ sizeof(struct p_rs_param_95);
b411b363 2970
02918be2 2971 if (packet_size > exp_max_sz) {
b411b363 2972 dev_err(DEV, "SyncParam packet too long: received %u, expected <= %u bytes\n",
02918be2 2973 packet_size, exp_max_sz);
81e84650 2974 return false;
b411b363
PR
2975 }
2976
2977 if (apv <= 88) {
257d0af6 2978 header_size = sizeof(struct p_rs_param) - sizeof(struct p_header);
02918be2 2979 data_size = packet_size - header_size;
8e26f9cc 2980 } else if (apv <= 94) {
257d0af6 2981 header_size = sizeof(struct p_rs_param_89) - sizeof(struct p_header);
02918be2 2982 data_size = packet_size - header_size;
b411b363 2983 D_ASSERT(data_size == 0);
8e26f9cc 2984 } else {
257d0af6 2985 header_size = sizeof(struct p_rs_param_95) - sizeof(struct p_header);
02918be2 2986 data_size = packet_size - header_size;
b411b363
PR
2987 D_ASSERT(data_size == 0);
2988 }
2989
2990 /* initialize verify_alg and csums_alg */
2991 memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
2992
de0ff338 2993 if (drbd_recv(mdev->tconn, &p->head.payload, header_size) != header_size)
81e84650 2994 return false;
b411b363
PR
2995
2996 mdev->sync_conf.rate = be32_to_cpu(p->rate);
2997
2998 if (apv >= 88) {
2999 if (apv == 88) {
3000 if (data_size > SHARED_SECRET_MAX) {
3001 dev_err(DEV, "verify-alg too long, "
3002 "peer wants %u, accepting only %u byte\n",
3003 data_size, SHARED_SECRET_MAX);
81e84650 3004 return false;
b411b363
PR
3005 }
3006
de0ff338 3007 if (drbd_recv(mdev->tconn, p->verify_alg, data_size) != data_size)
81e84650 3008 return false;
b411b363
PR
3009
3010 /* we expect NUL terminated string */
3011 /* but just in case someone tries to be evil */
3012 D_ASSERT(p->verify_alg[data_size-1] == 0);
3013 p->verify_alg[data_size-1] = 0;
3014
3015 } else /* apv >= 89 */ {
3016 /* we still expect NUL terminated strings */
3017 /* but just in case someone tries to be evil */
3018 D_ASSERT(p->verify_alg[SHARED_SECRET_MAX-1] == 0);
3019 D_ASSERT(p->csums_alg[SHARED_SECRET_MAX-1] == 0);
3020 p->verify_alg[SHARED_SECRET_MAX-1] = 0;
3021 p->csums_alg[SHARED_SECRET_MAX-1] = 0;
3022 }
3023
3024 if (strcmp(mdev->sync_conf.verify_alg, p->verify_alg)) {
3025 if (mdev->state.conn == C_WF_REPORT_PARAMS) {
3026 dev_err(DEV, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n",
3027 mdev->sync_conf.verify_alg, p->verify_alg);
3028 goto disconnect;
3029 }
3030 verify_tfm = drbd_crypto_alloc_digest_safe(mdev,
3031 p->verify_alg, "verify-alg");
3032 if (IS_ERR(verify_tfm)) {
3033 verify_tfm = NULL;
3034 goto disconnect;
3035 }
3036 }
3037
3038 if (apv >= 89 && strcmp(mdev->sync_conf.csums_alg, p->csums_alg)) {
3039 if (mdev->state.conn == C_WF_REPORT_PARAMS) {
3040 dev_err(DEV, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n",
3041 mdev->sync_conf.csums_alg, p->csums_alg);
3042 goto disconnect;
3043 }
3044 csums_tfm = drbd_crypto_alloc_digest_safe(mdev,
3045 p->csums_alg, "csums-alg");
3046 if (IS_ERR(csums_tfm)) {
3047 csums_tfm = NULL;
3048 goto disconnect;
3049 }
3050 }
3051
8e26f9cc
PR
3052 if (apv > 94) {
3053 mdev->sync_conf.rate = be32_to_cpu(p->rate);
3054 mdev->sync_conf.c_plan_ahead = be32_to_cpu(p->c_plan_ahead);
3055 mdev->sync_conf.c_delay_target = be32_to_cpu(p->c_delay_target);
3056 mdev->sync_conf.c_fill_target = be32_to_cpu(p->c_fill_target);
3057 mdev->sync_conf.c_max_rate = be32_to_cpu(p->c_max_rate);
778f271d
PR
3058
3059 fifo_size = (mdev->sync_conf.c_plan_ahead * 10 * SLEEP_TIME) / HZ;
3060 if (fifo_size != mdev->rs_plan_s.size && fifo_size > 0) {
3061 rs_plan_s = kzalloc(sizeof(int) * fifo_size, GFP_KERNEL);
3062 if (!rs_plan_s) {
3063 dev_err(DEV, "kmalloc of fifo_buffer failed");
3064 goto disconnect;
3065 }
3066 }
8e26f9cc 3067 }
b411b363
PR
3068
3069 spin_lock(&mdev->peer_seq_lock);
3070 /* lock against drbd_nl_syncer_conf() */
3071 if (verify_tfm) {
3072 strcpy(mdev->sync_conf.verify_alg, p->verify_alg);
3073 mdev->sync_conf.verify_alg_len = strlen(p->verify_alg) + 1;
3074 crypto_free_hash(mdev->verify_tfm);
3075 mdev->verify_tfm = verify_tfm;
3076 dev_info(DEV, "using verify-alg: \"%s\"\n", p->verify_alg);
3077 }
3078 if (csums_tfm) {
3079 strcpy(mdev->sync_conf.csums_alg, p->csums_alg);
3080 mdev->sync_conf.csums_alg_len = strlen(p->csums_alg) + 1;
3081 crypto_free_hash(mdev->csums_tfm);
3082 mdev->csums_tfm = csums_tfm;
3083 dev_info(DEV, "using csums-alg: \"%s\"\n", p->csums_alg);
3084 }
778f271d
PR
3085 if (fifo_size != mdev->rs_plan_s.size) {
3086 kfree(mdev->rs_plan_s.values);
3087 mdev->rs_plan_s.values = rs_plan_s;
3088 mdev->rs_plan_s.size = fifo_size;
3089 mdev->rs_planed = 0;
3090 }
b411b363
PR
3091 spin_unlock(&mdev->peer_seq_lock);
3092 }
3093
3094 return ok;
3095disconnect:
3096 /* just for completeness: actually not needed,
3097 * as this is not reached if csums_tfm was ok. */
3098 crypto_free_hash(csums_tfm);
3099 /* but free the verify_tfm again, if csums_tfm did not work out */
3100 crypto_free_hash(verify_tfm);
3101 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
81e84650 3102 return false;
b411b363
PR
3103}
3104
b411b363
PR
3105/* warn if the arguments differ by more than 12.5% */
3106static void warn_if_differ_considerably(struct drbd_conf *mdev,
3107 const char *s, sector_t a, sector_t b)
3108{
3109 sector_t d;
3110 if (a == 0 || b == 0)
3111 return;
3112 d = (a > b) ? (a - b) : (b - a);
3113 if (d > (a>>3) || d > (b>>3))
3114 dev_warn(DEV, "Considerable difference in %s: %llus vs. %llus\n", s,
3115 (unsigned long long)a, (unsigned long long)b);
3116}
3117
d8763023
AG
3118static int receive_sizes(struct drbd_conf *mdev, enum drbd_packet cmd,
3119 unsigned int data_size)
b411b363 3120{
e42325a5 3121 struct p_sizes *p = &mdev->tconn->data.rbuf.sizes;
b411b363 3122 enum determine_dev_size dd = unchanged;
b411b363
PR
3123 sector_t p_size, p_usize, my_usize;
3124 int ldsc = 0; /* local disk size changed */
e89b591c 3125 enum dds_flags ddsf;
b411b363 3126
b411b363
PR
3127 p_size = be64_to_cpu(p->d_size);
3128 p_usize = be64_to_cpu(p->u_size);
3129
3130 if (p_size == 0 && mdev->state.disk == D_DISKLESS) {
3131 dev_err(DEV, "some backing storage is needed\n");
3132 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
81e84650 3133 return false;
b411b363
PR
3134 }
3135
3136 /* just store the peer's disk size for now.
3137 * we still need to figure out whether we accept that. */
3138 mdev->p_size = p_size;
3139
b411b363
PR
3140 if (get_ldev(mdev)) {
3141 warn_if_differ_considerably(mdev, "lower level device sizes",
3142 p_size, drbd_get_max_capacity(mdev->ldev));
3143 warn_if_differ_considerably(mdev, "user requested size",
3144 p_usize, mdev->ldev->dc.disk_size);
3145
3146 /* if this is the first connect, or an otherwise expected
3147 * param exchange, choose the minimum */
3148 if (mdev->state.conn == C_WF_REPORT_PARAMS)
3149 p_usize = min_not_zero((sector_t)mdev->ldev->dc.disk_size,
3150 p_usize);
3151
3152 my_usize = mdev->ldev->dc.disk_size;
3153
3154 if (mdev->ldev->dc.disk_size != p_usize) {
3155 mdev->ldev->dc.disk_size = p_usize;
3156 dev_info(DEV, "Peer sets u_size to %lu sectors\n",
3157 (unsigned long)mdev->ldev->dc.disk_size);
3158 }
3159
3160 /* Never shrink a device with usable data during connect.
3161 But allow online shrinking if we are connected. */
a393db6f 3162 if (drbd_new_dev_size(mdev, mdev->ldev, 0) <
b411b363
PR
3163 drbd_get_capacity(mdev->this_bdev) &&
3164 mdev->state.disk >= D_OUTDATED &&
3165 mdev->state.conn < C_CONNECTED) {
3166 dev_err(DEV, "The peer's disk size is too small!\n");
3167 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
3168 mdev->ldev->dc.disk_size = my_usize;
3169 put_ldev(mdev);
81e84650 3170 return false;
b411b363
PR
3171 }
3172 put_ldev(mdev);
3173 }
b411b363 3174
e89b591c 3175 ddsf = be16_to_cpu(p->dds_flags);
b411b363 3176 if (get_ldev(mdev)) {
24c4830c 3177 dd = drbd_determine_dev_size(mdev, ddsf);
b411b363
PR
3178 put_ldev(mdev);
3179 if (dd == dev_size_error)
81e84650 3180 return false;
b411b363
PR
3181 drbd_md_sync(mdev);
3182 } else {
3183 /* I am diskless, need to accept the peer's size. */
3184 drbd_set_my_capacity(mdev, p_size);
3185 }
3186
99432fcc
PR
3187 mdev->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
3188 drbd_reconsider_max_bio_size(mdev);
3189
b411b363
PR
3190 if (get_ldev(mdev)) {
3191 if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev)) {
3192 mdev->ldev->known_size = drbd_get_capacity(mdev->ldev->backing_bdev);
3193 ldsc = 1;
3194 }
3195
b411b363
PR
3196 put_ldev(mdev);
3197 }
3198
3199 if (mdev->state.conn > C_WF_REPORT_PARAMS) {
3200 if (be64_to_cpu(p->c_size) !=
3201 drbd_get_capacity(mdev->this_bdev) || ldsc) {
3202 /* we have different sizes, probably peer
3203 * needs to know my new size... */
e89b591c 3204 drbd_send_sizes(mdev, 0, ddsf);
b411b363
PR
3205 }
3206 if (test_and_clear_bit(RESIZE_PENDING, &mdev->flags) ||
3207 (dd == grew && mdev->state.conn == C_CONNECTED)) {
3208 if (mdev->state.pdsk >= D_INCONSISTENT &&
e89b591c
PR
3209 mdev->state.disk >= D_INCONSISTENT) {
3210 if (ddsf & DDSF_NO_RESYNC)
3211 dev_info(DEV, "Resync of new storage suppressed with --assume-clean\n");
3212 else
3213 resync_after_online_grow(mdev);
3214 } else
b411b363
PR
3215 set_bit(RESYNC_AFTER_NEG, &mdev->flags);
3216 }
3217 }
3218
81e84650 3219 return true;
b411b363
PR
3220}
3221
d8763023
AG
3222static int receive_uuids(struct drbd_conf *mdev, enum drbd_packet cmd,
3223 unsigned int data_size)
b411b363 3224{
e42325a5 3225 struct p_uuids *p = &mdev->tconn->data.rbuf.uuids;
b411b363 3226 u64 *p_uuid;
62b0da3a 3227 int i, updated_uuids = 0;
b411b363 3228
b411b363
PR
3229 p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO);
3230
3231 for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++)
3232 p_uuid[i] = be64_to_cpu(p->uuid[i]);
3233
3234 kfree(mdev->p_uuid);
3235 mdev->p_uuid = p_uuid;
3236
3237 if (mdev->state.conn < C_CONNECTED &&
3238 mdev->state.disk < D_INCONSISTENT &&
3239 mdev->state.role == R_PRIMARY &&
3240 (mdev->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) {
3241 dev_err(DEV, "Can only connect to data with current UUID=%016llX\n",
3242 (unsigned long long)mdev->ed_uuid);
3243 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
81e84650 3244 return false;
b411b363
PR
3245 }
3246
3247 if (get_ldev(mdev)) {
3248 int skip_initial_sync =
3249 mdev->state.conn == C_CONNECTED &&
31890f4a 3250 mdev->tconn->agreed_pro_version >= 90 &&
b411b363
PR
3251 mdev->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED &&
3252 (p_uuid[UI_FLAGS] & 8);
3253 if (skip_initial_sync) {
3254 dev_info(DEV, "Accepted new current UUID, preparing to skip initial sync\n");
3255 drbd_bitmap_io(mdev, &drbd_bmio_clear_n_write,
20ceb2b2
LE
3256 "clear_n_write from receive_uuids",
3257 BM_LOCKED_TEST_ALLOWED);
b411b363
PR
3258 _drbd_uuid_set(mdev, UI_CURRENT, p_uuid[UI_CURRENT]);
3259 _drbd_uuid_set(mdev, UI_BITMAP, 0);
3260 _drbd_set_state(_NS2(mdev, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
3261 CS_VERBOSE, NULL);
3262 drbd_md_sync(mdev);
62b0da3a 3263 updated_uuids = 1;
b411b363
PR
3264 }
3265 put_ldev(mdev);
18a50fa2
PR
3266 } else if (mdev->state.disk < D_INCONSISTENT &&
3267 mdev->state.role == R_PRIMARY) {
3268 /* I am a diskless primary, the peer just created a new current UUID
3269 for me. */
62b0da3a 3270 updated_uuids = drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]);
b411b363
PR
3271 }
3272
3273 /* Before we test for the disk state, we should wait until an eventually
3274 ongoing cluster wide state change is finished. That is important if
3275 we are primary and are detaching from our disk. We need to see the
3276 new disk state... */
8410da8f
PR
3277 mutex_lock(mdev->state_mutex);
3278 mutex_unlock(mdev->state_mutex);
b411b363 3279 if (mdev->state.conn >= C_CONNECTED && mdev->state.disk < D_INCONSISTENT)
62b0da3a
LE
3280 updated_uuids |= drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]);
3281
3282 if (updated_uuids)
3283 drbd_print_uuids(mdev, "receiver updated UUIDs to");
b411b363 3284
81e84650 3285 return true;
b411b363
PR
3286}
3287
3288/**
3289 * convert_state() - Converts the peer's view of the cluster state to our point of view
3290 * @ps: The state as seen by the peer.
3291 */
3292static union drbd_state convert_state(union drbd_state ps)
3293{
3294 union drbd_state ms;
3295
3296 static enum drbd_conns c_tab[] = {
3297 [C_CONNECTED] = C_CONNECTED,
3298
3299 [C_STARTING_SYNC_S] = C_STARTING_SYNC_T,
3300 [C_STARTING_SYNC_T] = C_STARTING_SYNC_S,
3301 [C_DISCONNECTING] = C_TEAR_DOWN, /* C_NETWORK_FAILURE, */
3302 [C_VERIFY_S] = C_VERIFY_T,
3303 [C_MASK] = C_MASK,
3304 };
3305
3306 ms.i = ps.i;
3307
3308 ms.conn = c_tab[ps.conn];
3309 ms.peer = ps.role;
3310 ms.role = ps.peer;
3311 ms.pdsk = ps.disk;
3312 ms.disk = ps.pdsk;
3313 ms.peer_isp = (ps.aftr_isp | ps.user_isp);
3314
3315 return ms;
3316}
3317
d8763023
AG
3318static int receive_req_state(struct drbd_conf *mdev, enum drbd_packet cmd,
3319 unsigned int data_size)
b411b363 3320{
e42325a5 3321 struct p_req_state *p = &mdev->tconn->data.rbuf.req_state;
b411b363 3322 union drbd_state mask, val;
bf885f8a 3323 enum drbd_state_rv rv;
b411b363 3324
b411b363
PR
3325 mask.i = be32_to_cpu(p->mask);
3326 val.i = be32_to_cpu(p->val);
3327
25703f83 3328 if (test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags) &&
8410da8f 3329 mutex_is_locked(mdev->state_mutex)) {
b411b363 3330 drbd_send_sr_reply(mdev, SS_CONCURRENT_ST_CHG);
81e84650 3331 return true;
b411b363
PR
3332 }
3333
3334 mask = convert_state(mask);
3335 val = convert_state(val);
3336
047cd4a6
PR
3337 if (cmd == P_CONN_ST_CHG_REQ) {
3338 rv = conn_request_state(mdev->tconn, mask, val, CS_VERBOSE | CS_LOCAL_ONLY);
3339 conn_send_sr_reply(mdev->tconn, rv);
3340 } else {
3341 rv = drbd_change_state(mdev, CS_VERBOSE, mask, val);
3342 drbd_send_sr_reply(mdev, rv);
3343 }
b411b363 3344
b411b363
PR
3345 drbd_md_sync(mdev);
3346
81e84650 3347 return true;
b411b363
PR
3348}
3349
d8763023
AG
3350static int receive_state(struct drbd_conf *mdev, enum drbd_packet cmd,
3351 unsigned int data_size)
b411b363 3352{
e42325a5 3353 struct p_state *p = &mdev->tconn->data.rbuf.state;
4ac4aada 3354 union drbd_state os, ns, peer_state;
b411b363 3355 enum drbd_disk_state real_peer_disk;
65d922c3 3356 enum chg_state_flags cs_flags;
b411b363
PR
3357 int rv;
3358
b411b363
PR
3359 peer_state.i = be32_to_cpu(p->state);
3360
3361 real_peer_disk = peer_state.disk;
3362 if (peer_state.disk == D_NEGOTIATING) {
3363 real_peer_disk = mdev->p_uuid[UI_FLAGS] & 4 ? D_INCONSISTENT : D_CONSISTENT;
3364 dev_info(DEV, "real peer disk state = %s\n", drbd_disk_str(real_peer_disk));
3365 }
3366
87eeee41 3367 spin_lock_irq(&mdev->tconn->req_lock);
b411b363 3368 retry:
4ac4aada 3369 os = ns = mdev->state;
87eeee41 3370 spin_unlock_irq(&mdev->tconn->req_lock);
b411b363 3371
e9ef7bb6
LE
3372 /* peer says his disk is uptodate, while we think it is inconsistent,
3373 * and this happens while we think we have a sync going on. */
3374 if (os.pdsk == D_INCONSISTENT && real_peer_disk == D_UP_TO_DATE &&
3375 os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) {
3376 /* If we are (becoming) SyncSource, but peer is still in sync
3377 * preparation, ignore its uptodate-ness to avoid flapping, it
3378 * will change to inconsistent once the peer reaches active
3379 * syncing states.
3380 * It may have changed syncer-paused flags, however, so we
3381 * cannot ignore this completely. */
3382 if (peer_state.conn > C_CONNECTED &&
3383 peer_state.conn < C_SYNC_SOURCE)
3384 real_peer_disk = D_INCONSISTENT;
3385
3386 /* if peer_state changes to connected at the same time,
3387 * it explicitly notifies us that it finished resync.
3388 * Maybe we should finish it up, too? */
3389 else if (os.conn >= C_SYNC_SOURCE &&
3390 peer_state.conn == C_CONNECTED) {
3391 if (drbd_bm_total_weight(mdev) <= mdev->rs_failed)
3392 drbd_resync_finished(mdev);
81e84650 3393 return true;
e9ef7bb6
LE
3394 }
3395 }
3396
3397 /* peer says his disk is inconsistent, while we think it is uptodate,
3398 * and this happens while the peer still thinks we have a sync going on,
3399 * but we think we are already done with the sync.
3400 * We ignore this to avoid flapping pdsk.
3401 * This should not happen, if the peer is a recent version of drbd. */
3402 if (os.pdsk == D_UP_TO_DATE && real_peer_disk == D_INCONSISTENT &&
3403 os.conn == C_CONNECTED && peer_state.conn > C_SYNC_SOURCE)
3404 real_peer_disk = D_UP_TO_DATE;
3405
4ac4aada
LE
3406 if (ns.conn == C_WF_REPORT_PARAMS)
3407 ns.conn = C_CONNECTED;
b411b363 3408
67531718
PR
3409 if (peer_state.conn == C_AHEAD)
3410 ns.conn = C_BEHIND;
3411
b411b363
PR
3412 if (mdev->p_uuid && peer_state.disk >= D_NEGOTIATING &&
3413 get_ldev_if_state(mdev, D_NEGOTIATING)) {
3414 int cr; /* consider resync */
3415
3416 /* if we established a new connection */
4ac4aada 3417 cr = (os.conn < C_CONNECTED);
b411b363
PR
3418 /* if we had an established connection
3419 * and one of the nodes newly attaches a disk */
4ac4aada 3420 cr |= (os.conn == C_CONNECTED &&
b411b363 3421 (peer_state.disk == D_NEGOTIATING ||
4ac4aada 3422 os.disk == D_NEGOTIATING));
b411b363
PR
3423 /* if we have both been inconsistent, and the peer has been
3424 * forced to be UpToDate with --overwrite-data */
3425 cr |= test_bit(CONSIDER_RESYNC, &mdev->flags);
3426 /* if we had been plain connected, and the admin requested to
3427 * start a sync by "invalidate" or "invalidate-remote" */
4ac4aada 3428 cr |= (os.conn == C_CONNECTED &&
b411b363
PR
3429 (peer_state.conn >= C_STARTING_SYNC_S &&
3430 peer_state.conn <= C_WF_BITMAP_T));
3431
3432 if (cr)
4ac4aada 3433 ns.conn = drbd_sync_handshake(mdev, peer_state.role, real_peer_disk);
b411b363
PR
3434
3435 put_ldev(mdev);
4ac4aada
LE
3436 if (ns.conn == C_MASK) {
3437 ns.conn = C_CONNECTED;
b411b363 3438 if (mdev->state.disk == D_NEGOTIATING) {
82f59cc6 3439 drbd_force_state(mdev, NS(disk, D_FAILED));
b411b363
PR
3440 } else if (peer_state.disk == D_NEGOTIATING) {
3441 dev_err(DEV, "Disk attach process on the peer node was aborted.\n");
3442 peer_state.disk = D_DISKLESS;
580b9767 3443 real_peer_disk = D_DISKLESS;
b411b363 3444 } else {
cf14c2e9 3445 if (test_and_clear_bit(CONN_DRY_RUN, &mdev->flags))
81e84650 3446 return false;
4ac4aada 3447 D_ASSERT(os.conn == C_WF_REPORT_PARAMS);
b411b363 3448 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
81e84650 3449 return false;
b411b363
PR
3450 }
3451 }
3452 }
3453
87eeee41 3454 spin_lock_irq(&mdev->tconn->req_lock);
4ac4aada 3455 if (mdev->state.i != os.i)
b411b363
PR
3456 goto retry;
3457 clear_bit(CONSIDER_RESYNC, &mdev->flags);
b411b363
PR
3458 ns.peer = peer_state.role;
3459 ns.pdsk = real_peer_disk;
3460 ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp);
4ac4aada 3461 if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING)
b411b363 3462 ns.disk = mdev->new_state_tmp.disk;
4ac4aada
LE
3463 cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD);
3464 if (ns.pdsk == D_CONSISTENT && is_susp(ns) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED &&
481c6f50 3465 test_bit(NEW_CUR_UUID, &mdev->flags)) {
8554df1c 3466 /* Do not allow tl_restart(RESEND) for a rebooted peer. We can only allow this
481c6f50 3467 for temporal network outages! */
87eeee41 3468 spin_unlock_irq(&mdev->tconn->req_lock);
481c6f50 3469 dev_err(DEV, "Aborting Connect, can not thaw IO with an only Consistent peer\n");
2f5cdd0b 3470 tl_clear(mdev->tconn);
481c6f50
PR
3471 drbd_uuid_new_current(mdev);
3472 clear_bit(NEW_CUR_UUID, &mdev->flags);
3473 drbd_force_state(mdev, NS2(conn, C_PROTOCOL_ERROR, susp, 0));
81e84650 3474 return false;
481c6f50 3475 }
65d922c3 3476 rv = _drbd_set_state(mdev, ns, cs_flags, NULL);
b411b363 3477 ns = mdev->state;
87eeee41 3478 spin_unlock_irq(&mdev->tconn->req_lock);
b411b363
PR
3479
3480 if (rv < SS_SUCCESS) {
3481 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
81e84650 3482 return false;
b411b363
PR
3483 }
3484
4ac4aada
LE
3485 if (os.conn > C_WF_REPORT_PARAMS) {
3486 if (ns.conn > C_CONNECTED && peer_state.conn <= C_CONNECTED &&
b411b363
PR
3487 peer_state.disk != D_NEGOTIATING ) {
3488 /* we want resync, peer has not yet decided to sync... */
3489 /* Nowadays only used when forcing a node into primary role and
3490 setting its disk to UpToDate with that */
3491 drbd_send_uuids(mdev);
3492 drbd_send_state(mdev);
3493 }
3494 }
3495
89e58e75 3496 mdev->tconn->net_conf->want_lose = 0;
b411b363
PR
3497
3498 drbd_md_sync(mdev); /* update connected indicator, la_size, ... */
3499
81e84650 3500 return true;
b411b363
PR
3501}
3502
d8763023
AG
3503static int receive_sync_uuid(struct drbd_conf *mdev, enum drbd_packet cmd,
3504 unsigned int data_size)
b411b363 3505{
e42325a5 3506 struct p_rs_uuid *p = &mdev->tconn->data.rbuf.rs_uuid;
b411b363
PR
3507
3508 wait_event(mdev->misc_wait,
3509 mdev->state.conn == C_WF_SYNC_UUID ||
c4752ef1 3510 mdev->state.conn == C_BEHIND ||
b411b363
PR
3511 mdev->state.conn < C_CONNECTED ||
3512 mdev->state.disk < D_NEGOTIATING);
3513
3514 /* D_ASSERT( mdev->state.conn == C_WF_SYNC_UUID ); */
3515
b411b363
PR
3516 /* Here the _drbd_uuid_ functions are right, current should
3517 _not_ be rotated into the history */
3518 if (get_ldev_if_state(mdev, D_NEGOTIATING)) {
3519 _drbd_uuid_set(mdev, UI_CURRENT, be64_to_cpu(p->uuid));
3520 _drbd_uuid_set(mdev, UI_BITMAP, 0UL);
3521
62b0da3a 3522 drbd_print_uuids(mdev, "updated sync uuid");
b411b363
PR
3523 drbd_start_resync(mdev, C_SYNC_TARGET);
3524
3525 put_ldev(mdev);
3526 } else
3527 dev_err(DEV, "Ignoring SyncUUID packet!\n");
3528
81e84650 3529 return true;
b411b363
PR
3530}
3531
2c46407d
AG
3532/**
3533 * receive_bitmap_plain
3534 *
3535 * Return 0 when done, 1 when another iteration is needed, and a negative error
3536 * code upon failure.
3537 */
3538static int
02918be2
PR
3539receive_bitmap_plain(struct drbd_conf *mdev, unsigned int data_size,
3540 unsigned long *buffer, struct bm_xfer_ctx *c)
b411b363
PR
3541{
3542 unsigned num_words = min_t(size_t, BM_PACKET_WORDS, c->bm_words - c->word_offset);
3543 unsigned want = num_words * sizeof(long);
2c46407d 3544 int err;
b411b363 3545
02918be2
PR
3546 if (want != data_size) {
3547 dev_err(DEV, "%s:want (%u) != data_size (%u)\n", __func__, want, data_size);
2c46407d 3548 return -EIO;
b411b363
PR
3549 }
3550 if (want == 0)
2c46407d 3551 return 0;
de0ff338 3552 err = drbd_recv(mdev->tconn, buffer, want);
2c46407d
AG
3553 if (err != want) {
3554 if (err >= 0)
3555 err = -EIO;
3556 return err;
3557 }
b411b363
PR
3558
3559 drbd_bm_merge_lel(mdev, c->word_offset, num_words, buffer);
3560
3561 c->word_offset += num_words;
3562 c->bit_offset = c->word_offset * BITS_PER_LONG;
3563 if (c->bit_offset > c->bm_bits)
3564 c->bit_offset = c->bm_bits;
3565
2c46407d 3566 return 1;
b411b363
PR
3567}
3568
2c46407d
AG
3569/**
3570 * recv_bm_rle_bits
3571 *
3572 * Return 0 when done, 1 when another iteration is needed, and a negative error
3573 * code upon failure.
3574 */
3575static int
b411b363
PR
3576recv_bm_rle_bits(struct drbd_conf *mdev,
3577 struct p_compressed_bm *p,
c6d25cfe
PR
3578 struct bm_xfer_ctx *c,
3579 unsigned int len)
b411b363
PR
3580{
3581 struct bitstream bs;
3582 u64 look_ahead;
3583 u64 rl;
3584 u64 tmp;
3585 unsigned long s = c->bit_offset;
3586 unsigned long e;
b411b363
PR
3587 int toggle = DCBP_get_start(p);
3588 int have;
3589 int bits;
3590
3591 bitstream_init(&bs, p->code, len, DCBP_get_pad_bits(p));
3592
3593 bits = bitstream_get_bits(&bs, &look_ahead, 64);
3594 if (bits < 0)
2c46407d 3595 return -EIO;
b411b363
PR
3596
3597 for (have = bits; have > 0; s += rl, toggle = !toggle) {
3598 bits = vli_decode_bits(&rl, look_ahead);
3599 if (bits <= 0)
2c46407d 3600 return -EIO;
b411b363
PR
3601
3602 if (toggle) {
3603 e = s + rl -1;
3604 if (e >= c->bm_bits) {
3605 dev_err(DEV, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e);
2c46407d 3606 return -EIO;
b411b363
PR
3607 }
3608 _drbd_bm_set_bits(mdev, s, e);
3609 }
3610
3611 if (have < bits) {
3612 dev_err(DEV, "bitmap decoding error: h:%d b:%d la:0x%08llx l:%u/%u\n",
3613 have, bits, look_ahead,
3614 (unsigned int)(bs.cur.b - p->code),
3615 (unsigned int)bs.buf_len);
2c46407d 3616 return -EIO;
b411b363
PR
3617 }
3618 look_ahead >>= bits;
3619 have -= bits;
3620
3621 bits = bitstream_get_bits(&bs, &tmp, 64 - have);
3622 if (bits < 0)
2c46407d 3623 return -EIO;
b411b363
PR
3624 look_ahead |= tmp << have;
3625 have += bits;
3626 }
3627
3628 c->bit_offset = s;
3629 bm_xfer_ctx_bit_to_word_offset(c);
3630
2c46407d 3631 return (s != c->bm_bits);
b411b363
PR
3632}
3633
2c46407d
AG
3634/**
3635 * decode_bitmap_c
3636 *
3637 * Return 0 when done, 1 when another iteration is needed, and a negative error
3638 * code upon failure.
3639 */
3640static int
b411b363
PR
3641decode_bitmap_c(struct drbd_conf *mdev,
3642 struct p_compressed_bm *p,
c6d25cfe
PR
3643 struct bm_xfer_ctx *c,
3644 unsigned int len)
b411b363
PR
3645{
3646 if (DCBP_get_code(p) == RLE_VLI_Bits)
c6d25cfe 3647 return recv_bm_rle_bits(mdev, p, c, len);
b411b363
PR
3648
3649 /* other variants had been implemented for evaluation,
3650 * but have been dropped as this one turned out to be "best"
3651 * during all our tests. */
3652
3653 dev_err(DEV, "receive_bitmap_c: unknown encoding %u\n", p->encoding);
3654 drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR));
2c46407d 3655 return -EIO;
b411b363
PR
3656}
3657
3658void INFO_bm_xfer_stats(struct drbd_conf *mdev,
3659 const char *direction, struct bm_xfer_ctx *c)
3660{
3661 /* what would it take to transfer it "plaintext" */
c012949a 3662 unsigned plain = sizeof(struct p_header) *
b411b363
PR
3663 ((c->bm_words+BM_PACKET_WORDS-1)/BM_PACKET_WORDS+1)
3664 + c->bm_words * sizeof(long);
3665 unsigned total = c->bytes[0] + c->bytes[1];
3666 unsigned r;
3667
3668 /* total can not be zero. but just in case: */
3669 if (total == 0)
3670 return;
3671
3672 /* don't report if not compressed */
3673 if (total >= plain)
3674 return;
3675
3676 /* total < plain. check for overflow, still */
3677 r = (total > UINT_MAX/1000) ? (total / (plain/1000))
3678 : (1000 * total / plain);
3679
3680 if (r > 1000)
3681 r = 1000;
3682
3683 r = 1000 - r;
3684 dev_info(DEV, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), "
3685 "total %u; compression: %u.%u%%\n",
3686 direction,
3687 c->bytes[1], c->packets[1],
3688 c->bytes[0], c->packets[0],
3689 total, r/10, r % 10);
3690}
3691
3692/* Since we are processing the bitfield from lower addresses to higher,
3693 it does not matter if the process it in 32 bit chunks or 64 bit
3694 chunks as long as it is little endian. (Understand it as byte stream,
3695 beginning with the lowest byte...) If we would use big endian
3696 we would need to process it from the highest address to the lowest,
3697 in order to be agnostic to the 32 vs 64 bits issue.
3698
3699 returns 0 on failure, 1 if we successfully received it. */
d8763023
AG
3700static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packet cmd,
3701 unsigned int data_size)
b411b363
PR
3702{
3703 struct bm_xfer_ctx c;
3704 void *buffer;
2c46407d 3705 int err;
81e84650 3706 int ok = false;
257d0af6 3707 struct p_header *h = &mdev->tconn->data.rbuf.header;
77351055 3708 struct packet_info pi;
b411b363 3709
20ceb2b2
LE
3710 drbd_bm_lock(mdev, "receive bitmap", BM_LOCKED_SET_ALLOWED);
3711 /* you are supposed to send additional out-of-sync information
3712 * if you actually set bits during this phase */
b411b363
PR
3713
3714 /* maybe we should use some per thread scratch page,
3715 * and allocate that during initial device creation? */
3716 buffer = (unsigned long *) __get_free_page(GFP_NOIO);
3717 if (!buffer) {
3718 dev_err(DEV, "failed to allocate one page buffer in %s\n", __func__);
3719 goto out;
3720 }
3721
3722 c = (struct bm_xfer_ctx) {
3723 .bm_bits = drbd_bm_bits(mdev),
3724 .bm_words = drbd_bm_words(mdev),
3725 };
3726
2c46407d 3727 for(;;) {
02918be2 3728 if (cmd == P_BITMAP) {
2c46407d 3729 err = receive_bitmap_plain(mdev, data_size, buffer, &c);
02918be2 3730 } else if (cmd == P_COMPRESSED_BITMAP) {
b411b363
PR
3731 /* MAYBE: sanity check that we speak proto >= 90,
3732 * and the feature is enabled! */
3733 struct p_compressed_bm *p;
3734
02918be2 3735 if (data_size > BM_PACKET_PAYLOAD_BYTES) {
b411b363
PR
3736 dev_err(DEV, "ReportCBitmap packet too large\n");
3737 goto out;
3738 }
3739 /* use the page buff */
3740 p = buffer;
3741 memcpy(p, h, sizeof(*h));
de0ff338 3742 if (drbd_recv(mdev->tconn, p->head.payload, data_size) != data_size)
b411b363 3743 goto out;
004352fa
LE
3744 if (data_size <= (sizeof(*p) - sizeof(p->head))) {
3745 dev_err(DEV, "ReportCBitmap packet too small (l:%u)\n", data_size);
78fcbdae 3746 goto out;
b411b363 3747 }
c6d25cfe 3748 err = decode_bitmap_c(mdev, p, &c, data_size);
b411b363 3749 } else {
02918be2 3750 dev_warn(DEV, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", cmd);
b411b363
PR
3751 goto out;
3752 }
3753
02918be2 3754 c.packets[cmd == P_BITMAP]++;
257d0af6 3755 c.bytes[cmd == P_BITMAP] += sizeof(struct p_header) + data_size;
b411b363 3756
2c46407d
AG
3757 if (err <= 0) {
3758 if (err < 0)
3759 goto out;
b411b363 3760 break;
2c46407d 3761 }
9ba7aa00 3762 if (!drbd_recv_header(mdev->tconn, &pi))
b411b363 3763 goto out;
77351055
PR
3764 cmd = pi.cmd;
3765 data_size = pi.size;
2c46407d 3766 }
b411b363
PR
3767
3768 INFO_bm_xfer_stats(mdev, "receive", &c);
3769
3770 if (mdev->state.conn == C_WF_BITMAP_T) {
de1f8e4a
AG
3771 enum drbd_state_rv rv;
3772
b411b363
PR
3773 ok = !drbd_send_bitmap(mdev);
3774 if (!ok)
3775 goto out;
3776 /* Omit CS_ORDERED with this state transition to avoid deadlocks. */
de1f8e4a
AG
3777 rv = _drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
3778 D_ASSERT(rv == SS_SUCCESS);
b411b363
PR
3779 } else if (mdev->state.conn != C_WF_BITMAP_S) {
3780 /* admin may have requested C_DISCONNECTING,
3781 * other threads may have noticed network errors */
3782 dev_info(DEV, "unexpected cstate (%s) in receive_bitmap\n",
3783 drbd_conn_str(mdev->state.conn));
3784 }
3785
81e84650 3786 ok = true;
b411b363 3787 out:
20ceb2b2 3788 drbd_bm_unlock(mdev);
b411b363
PR
3789 if (ok && mdev->state.conn == C_WF_BITMAP_S)
3790 drbd_start_resync(mdev, C_SYNC_SOURCE);
3791 free_page((unsigned long) buffer);
3792 return ok;
3793}
3794
d8763023
AG
3795static int receive_skip(struct drbd_conf *mdev, enum drbd_packet cmd,
3796 unsigned int data_size)
b411b363
PR
3797{
3798 /* TODO zero copy sink :) */
3799 static char sink[128];
3800 int size, want, r;
3801
02918be2
PR
3802 dev_warn(DEV, "skipping unknown optional packet type %d, l: %d!\n",
3803 cmd, data_size);
b411b363 3804
02918be2 3805 size = data_size;
b411b363
PR
3806 while (size > 0) {
3807 want = min_t(int, size, sizeof(sink));
de0ff338 3808 r = drbd_recv(mdev->tconn, sink, want);
841ce241
AG
3809 if (!expect(r > 0))
3810 break;
b411b363
PR
3811 size -= r;
3812 }
3813 return size == 0;
3814}
3815
d8763023
AG
3816static int receive_UnplugRemote(struct drbd_conf *mdev, enum drbd_packet cmd,
3817 unsigned int data_size)
0ced55a3 3818{
e7f52dfb
LE
3819 /* Make sure we've acked all the TCP data associated
3820 * with the data requests being unplugged */
e42325a5 3821 drbd_tcp_quickack(mdev->tconn->data.socket);
0ced55a3 3822
81e84650 3823 return true;
0ced55a3
PR
3824}
3825
d8763023
AG
3826static int receive_out_of_sync(struct drbd_conf *mdev, enum drbd_packet cmd,
3827 unsigned int data_size)
73a01a18 3828{
e42325a5 3829 struct p_block_desc *p = &mdev->tconn->data.rbuf.block_desc;
73a01a18 3830
f735e363
LE
3831 switch (mdev->state.conn) {
3832 case C_WF_SYNC_UUID:
3833 case C_WF_BITMAP_T:
3834 case C_BEHIND:
3835 break;
3836 default:
3837 dev_err(DEV, "ASSERT FAILED cstate = %s, expected: WFSyncUUID|WFBitMapT|Behind\n",
3838 drbd_conn_str(mdev->state.conn));
3839 }
3840
73a01a18
PR
3841 drbd_set_out_of_sync(mdev, be64_to_cpu(p->sector), be32_to_cpu(p->blksize));
3842
81e84650 3843 return true;
73a01a18
PR
3844}
3845
d8763023
AG
3846typedef int (*drbd_cmd_handler_f)(struct drbd_conf *, enum drbd_packet cmd,
3847 unsigned int to_receive);
02918be2
PR
3848
3849struct data_cmd {
3850 int expect_payload;
3851 size_t pkt_size;
3852 drbd_cmd_handler_f function;
3853};
3854
3855static struct data_cmd drbd_cmd_handler[] = {
3856 [P_DATA] = { 1, sizeof(struct p_data), receive_Data },
3857 [P_DATA_REPLY] = { 1, sizeof(struct p_data), receive_DataReply },
3858 [P_RS_DATA_REPLY] = { 1, sizeof(struct p_data), receive_RSDataReply } ,
3859 [P_BARRIER] = { 0, sizeof(struct p_barrier), receive_Barrier } ,
257d0af6
PR
3860 [P_BITMAP] = { 1, sizeof(struct p_header), receive_bitmap } ,
3861 [P_COMPRESSED_BITMAP] = { 1, sizeof(struct p_header), receive_bitmap } ,
3862 [P_UNPLUG_REMOTE] = { 0, sizeof(struct p_header), receive_UnplugRemote },
02918be2
PR
3863 [P_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
3864 [P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
257d0af6
PR
3865 [P_SYNC_PARAM] = { 1, sizeof(struct p_header), receive_SyncParam },
3866 [P_SYNC_PARAM89] = { 1, sizeof(struct p_header), receive_SyncParam },
02918be2
PR
3867 [P_PROTOCOL] = { 1, sizeof(struct p_protocol), receive_protocol },
3868 [P_UUIDS] = { 0, sizeof(struct p_uuids), receive_uuids },
3869 [P_SIZES] = { 0, sizeof(struct p_sizes), receive_sizes },
3870 [P_STATE] = { 0, sizeof(struct p_state), receive_state },
3871 [P_STATE_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_state },
3872 [P_SYNC_UUID] = { 0, sizeof(struct p_rs_uuid), receive_sync_uuid },
3873 [P_OV_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
3874 [P_OV_REPLY] = { 1, sizeof(struct p_block_req), receive_DataRequest },
3875 [P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest },
3876 [P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), receive_skip },
73a01a18 3877 [P_OUT_OF_SYNC] = { 0, sizeof(struct p_block_desc), receive_out_of_sync },
047cd4a6 3878 [P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_state },
b411b363
PR
3879};
3880
02918be2 3881/* All handler functions that expect a sub-header get that sub-heder in
e42325a5 3882 mdev->tconn->data.rbuf.header.head.payload.
02918be2 3883
e42325a5 3884 Usually in mdev->tconn->data.rbuf.header.head the callback can find the usual
02918be2
PR
3885 p_header, but they may not rely on that. Since there is also p_header95 !
3886 */
b411b363 3887
eefc2f7d 3888static void drbdd(struct drbd_tconn *tconn)
b411b363 3889{
eefc2f7d 3890 struct p_header *header = &tconn->data.rbuf.header;
77351055 3891 struct packet_info pi;
02918be2
PR
3892 size_t shs; /* sub header size */
3893 int rv;
b411b363 3894
eefc2f7d
PR
3895 while (get_t_state(&tconn->receiver) == RUNNING) {
3896 drbd_thread_current_set_cpu(&tconn->receiver);
3897 if (!drbd_recv_header(tconn, &pi))
02918be2 3898 goto err_out;
b411b363 3899
6e849ce8
AG
3900 if (unlikely(pi.cmd >= ARRAY_SIZE(drbd_cmd_handler) ||
3901 !drbd_cmd_handler[pi.cmd].function)) {
eefc2f7d 3902 conn_err(tconn, "unknown packet type %d, l: %d!\n", pi.cmd, pi.size);
02918be2 3903 goto err_out;
0b33a916 3904 }
b411b363 3905
77351055
PR
3906 shs = drbd_cmd_handler[pi.cmd].pkt_size - sizeof(struct p_header);
3907 if (pi.size - shs > 0 && !drbd_cmd_handler[pi.cmd].expect_payload) {
eefc2f7d 3908 conn_err(tconn, "No payload expected %s l:%d\n", cmdname(pi.cmd), pi.size);
02918be2 3909 goto err_out;
b411b363 3910 }
b411b363 3911
c13f7e1a 3912 if (shs) {
eefc2f7d 3913 rv = drbd_recv(tconn, &header->payload, shs);
c13f7e1a 3914 if (unlikely(rv != shs)) {
0ddc5549 3915 if (!signal_pending(current))
eefc2f7d 3916 conn_warn(tconn, "short read while reading sub header: rv=%d\n", rv);
c13f7e1a
LE
3917 goto err_out;
3918 }
3919 }
3920
eefc2f7d 3921 rv = drbd_cmd_handler[pi.cmd].function(vnr_to_mdev(tconn, pi.vnr), pi.cmd, pi.size - shs);
b411b363 3922
02918be2 3923 if (unlikely(!rv)) {
eefc2f7d 3924 conn_err(tconn, "error receiving %s, l: %d!\n",
77351055 3925 cmdname(pi.cmd), pi.size);
02918be2 3926 goto err_out;
b411b363
PR
3927 }
3928 }
b411b363 3929
02918be2
PR
3930 if (0) {
3931 err_out:
bbeb641c 3932 conn_request_state(tconn, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
02918be2 3933 }
b411b363
PR
3934}
3935
0e29d163 3936void conn_flush_workqueue(struct drbd_tconn *tconn)
b411b363
PR
3937{
3938 struct drbd_wq_barrier barr;
3939
3940 barr.w.cb = w_prev_work_done;
0e29d163 3941 barr.w.tconn = tconn;
b411b363 3942 init_completion(&barr.done);
0e29d163 3943 drbd_queue_work(&tconn->data.work, &barr.w);
b411b363
PR
3944 wait_for_completion(&barr.done);
3945}
3946
360cc740 3947static void drbd_disconnect(struct drbd_tconn *tconn)
b411b363 3948{
bbeb641c 3949 enum drbd_conns oc;
b411b363 3950 int rv = SS_UNKNOWN_ERROR;
b411b363 3951
bbeb641c 3952 if (tconn->cstate == C_STANDALONE)
b411b363 3953 return;
b411b363
PR
3954
3955 /* asender does not clean up anything. it must not interfere, either */
360cc740
PR
3956 drbd_thread_stop(&tconn->asender);
3957 drbd_free_sock(tconn);
3958
3959 idr_for_each(&tconn->volumes, drbd_disconnected, tconn);
3960
3961 conn_info(tconn, "Connection closed\n");
3962
3963 spin_lock_irq(&tconn->req_lock);
bbeb641c
PR
3964 oc = tconn->cstate;
3965 if (oc >= C_UNCONNECTED)
3966 rv = _conn_request_state(tconn, NS(conn, C_UNCONNECTED), CS_VERBOSE);
3967
360cc740
PR
3968 spin_unlock_irq(&tconn->req_lock);
3969
bbeb641c 3970 if (oc == C_DISCONNECTING) {
360cc740
PR
3971 wait_event(tconn->net_cnt_wait, atomic_read(&tconn->net_cnt) == 0);
3972
3973 crypto_free_hash(tconn->cram_hmac_tfm);
3974 tconn->cram_hmac_tfm = NULL;
3975
3976 kfree(tconn->net_conf);
3977 tconn->net_conf = NULL;
bbeb641c 3978 conn_request_state(tconn, NS(conn, C_STANDALONE), CS_VERBOSE);
360cc740
PR
3979 }
3980}
3981
3982static int drbd_disconnected(int vnr, void *p, void *data)
3983{
3984 struct drbd_conf *mdev = (struct drbd_conf *)p;
3985 enum drbd_fencing_p fp;
3986 unsigned int i;
b411b363 3987
85719573 3988 /* wait for current activity to cease. */
87eeee41 3989 spin_lock_irq(&mdev->tconn->req_lock);
b411b363
PR
3990 _drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
3991 _drbd_wait_ee_list_empty(mdev, &mdev->sync_ee);
3992 _drbd_wait_ee_list_empty(mdev, &mdev->read_ee);
87eeee41 3993 spin_unlock_irq(&mdev->tconn->req_lock);
b411b363
PR
3994
3995 /* We do not have data structures that would allow us to
3996 * get the rs_pending_cnt down to 0 again.
3997 * * On C_SYNC_TARGET we do not have any data structures describing
3998 * the pending RSDataRequest's we have sent.
3999 * * On C_SYNC_SOURCE there is no data structure that tracks
4000 * the P_RS_DATA_REPLY blocks that we sent to the SyncTarget.
4001 * And no, it is not the sum of the reference counts in the
4002 * resync_LRU. The resync_LRU tracks the whole operation including
4003 * the disk-IO, while the rs_pending_cnt only tracks the blocks
4004 * on the fly. */
4005 drbd_rs_cancel_all(mdev);
4006 mdev->rs_total = 0;
4007 mdev->rs_failed = 0;
4008 atomic_set(&mdev->rs_pending_cnt, 0);
4009 wake_up(&mdev->misc_wait);
4010
7fde2be9
PR
4011 del_timer(&mdev->request_timer);
4012
b411b363 4013 del_timer_sync(&mdev->resync_timer);
b411b363
PR
4014 resync_timer_fn((unsigned long)mdev);
4015
b411b363
PR
4016 /* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier,
4017 * w_make_resync_request etc. which may still be on the worker queue
4018 * to be "canceled" */
a21e9298 4019 drbd_flush_workqueue(mdev);
b411b363
PR
4020
4021 /* This also does reclaim_net_ee(). If we do this too early, we might
4022 * miss some resync ee and pages.*/
4023 drbd_process_done_ee(mdev);
4024
4025 kfree(mdev->p_uuid);
4026 mdev->p_uuid = NULL;
4027
fb22c402 4028 if (!is_susp(mdev->state))
2f5cdd0b 4029 tl_clear(mdev->tconn);
b411b363 4030
b411b363
PR
4031 drbd_md_sync(mdev);
4032
4033 fp = FP_DONT_CARE;
4034 if (get_ldev(mdev)) {
4035 fp = mdev->ldev->dc.fencing;
4036 put_ldev(mdev);
4037 }
4038
87f7be4c
PR
4039 if (mdev->state.role == R_PRIMARY && fp >= FP_RESOURCE && mdev->state.pdsk >= D_UNKNOWN)
4040 drbd_try_outdate_peer_async(mdev);
b411b363 4041
20ceb2b2
LE
4042 /* serialize with bitmap writeout triggered by the state change,
4043 * if any. */
4044 wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
4045
b411b363
PR
4046 /* tcp_close and release of sendpage pages can be deferred. I don't
4047 * want to use SO_LINGER, because apparently it can be deferred for
4048 * more than 20 seconds (longest time I checked).
4049 *
4050 * Actually we don't care for exactly when the network stack does its
4051 * put_page(), but release our reference on these pages right here.
4052 */
4053 i = drbd_release_ee(mdev, &mdev->net_ee);
4054 if (i)
4055 dev_info(DEV, "net_ee not empty, killed %u entries\n", i);
435f0740
LE
4056 i = atomic_read(&mdev->pp_in_use_by_net);
4057 if (i)
4058 dev_info(DEV, "pp_in_use_by_net = %d, expected 0\n", i);
b411b363
PR
4059 i = atomic_read(&mdev->pp_in_use);
4060 if (i)
45bb912b 4061 dev_info(DEV, "pp_in_use = %d, expected 0\n", i);
b411b363
PR
4062
4063 D_ASSERT(list_empty(&mdev->read_ee));
4064 D_ASSERT(list_empty(&mdev->active_ee));
4065 D_ASSERT(list_empty(&mdev->sync_ee));
4066 D_ASSERT(list_empty(&mdev->done_ee));
4067
4068 /* ok, no more ee's on the fly, it is safe to reset the epoch_size */
4069 atomic_set(&mdev->current_epoch->epoch_size, 0);
4070 D_ASSERT(list_empty(&mdev->current_epoch->list));
360cc740
PR
4071
4072 return 0;
b411b363
PR
4073}
4074
4075/*
4076 * We support PRO_VERSION_MIN to PRO_VERSION_MAX. The protocol version
4077 * we can agree on is stored in agreed_pro_version.
4078 *
4079 * feature flags and the reserved array should be enough room for future
4080 * enhancements of the handshake protocol, and possible plugins...
4081 *
4082 * for now, they are expected to be zero, but ignored.
4083 */
8a22cccc 4084static int drbd_send_handshake(struct drbd_tconn *tconn)
b411b363 4085{
e6b3ea83 4086 /* ASSERT current == mdev->tconn->receiver ... */
8a22cccc 4087 struct p_handshake *p = &tconn->data.sbuf.handshake;
b411b363
PR
4088 int ok;
4089
8a22cccc
PR
4090 if (mutex_lock_interruptible(&tconn->data.mutex)) {
4091 conn_err(tconn, "interrupted during initial handshake\n");
b411b363
PR
4092 return 0; /* interrupted. not ok. */
4093 }
4094
8a22cccc
PR
4095 if (tconn->data.socket == NULL) {
4096 mutex_unlock(&tconn->data.mutex);
b411b363
PR
4097 return 0;
4098 }
4099
4100 memset(p, 0, sizeof(*p));
4101 p->protocol_min = cpu_to_be32(PRO_VERSION_MIN);
4102 p->protocol_max = cpu_to_be32(PRO_VERSION_MAX);
8a22cccc
PR
4103 ok = _conn_send_cmd(tconn, 0, tconn->data.socket, P_HAND_SHAKE,
4104 &p->head, sizeof(*p), 0);
4105 mutex_unlock(&tconn->data.mutex);
b411b363
PR
4106 return ok;
4107}
4108
4109/*
4110 * return values:
4111 * 1 yes, we have a valid connection
4112 * 0 oops, did not work out, please try again
4113 * -1 peer talks different language,
4114 * no point in trying again, please go standalone.
4115 */
65d11ed6 4116static int drbd_do_handshake(struct drbd_tconn *tconn)
b411b363 4117{
65d11ed6
PR
4118 /* ASSERT current == tconn->receiver ... */
4119 struct p_handshake *p = &tconn->data.rbuf.handshake;
02918be2 4120 const int expect = sizeof(struct p_handshake) - sizeof(struct p_header80);
77351055 4121 struct packet_info pi;
b411b363
PR
4122 int rv;
4123
65d11ed6 4124 rv = drbd_send_handshake(tconn);
b411b363
PR
4125 if (!rv)
4126 return 0;
4127
65d11ed6 4128 rv = drbd_recv_header(tconn, &pi);
b411b363
PR
4129 if (!rv)
4130 return 0;
4131
77351055 4132 if (pi.cmd != P_HAND_SHAKE) {
65d11ed6 4133 conn_err(tconn, "expected HandShake packet, received: %s (0x%04x)\n",
77351055 4134 cmdname(pi.cmd), pi.cmd);
b411b363
PR
4135 return -1;
4136 }
4137
77351055 4138 if (pi.size != expect) {
65d11ed6 4139 conn_err(tconn, "expected HandShake length: %u, received: %u\n",
77351055 4140 expect, pi.size);
b411b363
PR
4141 return -1;
4142 }
4143
65d11ed6 4144 rv = drbd_recv(tconn, &p->head.payload, expect);
b411b363
PR
4145
4146 if (rv != expect) {
0ddc5549 4147 if (!signal_pending(current))
65d11ed6 4148 conn_warn(tconn, "short read receiving handshake packet: l=%u\n", rv);
b411b363
PR
4149 return 0;
4150 }
4151
b411b363
PR
4152 p->protocol_min = be32_to_cpu(p->protocol_min);
4153 p->protocol_max = be32_to_cpu(p->protocol_max);
4154 if (p->protocol_max == 0)
4155 p->protocol_max = p->protocol_min;
4156
4157 if (PRO_VERSION_MAX < p->protocol_min ||
4158 PRO_VERSION_MIN > p->protocol_max)
4159 goto incompat;
4160
65d11ed6 4161 tconn->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max);
b411b363 4162
65d11ed6
PR
4163 conn_info(tconn, "Handshake successful: "
4164 "Agreed network protocol version %d\n", tconn->agreed_pro_version);
b411b363
PR
4165
4166 return 1;
4167
4168 incompat:
65d11ed6 4169 conn_err(tconn, "incompatible DRBD dialects: "
b411b363
PR
4170 "I support %d-%d, peer supports %d-%d\n",
4171 PRO_VERSION_MIN, PRO_VERSION_MAX,
4172 p->protocol_min, p->protocol_max);
4173 return -1;
4174}
4175
4176#if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE)
13e6037d 4177static int drbd_do_auth(struct drbd_tconn *tconn)
b411b363
PR
4178{
4179 dev_err(DEV, "This kernel was build without CONFIG_CRYPTO_HMAC.\n");
4180 dev_err(DEV, "You need to disable 'cram-hmac-alg' in drbd.conf.\n");
b10d96cb 4181 return -1;
b411b363
PR
4182}
4183#else
4184#define CHALLENGE_LEN 64
b10d96cb
JT
4185
4186/* Return value:
4187 1 - auth succeeded,
4188 0 - failed, try again (network error),
4189 -1 - auth failed, don't try again.
4190*/
4191
13e6037d 4192static int drbd_do_auth(struct drbd_tconn *tconn)
b411b363
PR
4193{
4194 char my_challenge[CHALLENGE_LEN]; /* 64 Bytes... */
4195 struct scatterlist sg;
4196 char *response = NULL;
4197 char *right_response = NULL;
4198 char *peers_ch = NULL;
13e6037d 4199 unsigned int key_len = strlen(tconn->net_conf->shared_secret);
b411b363
PR
4200 unsigned int resp_size;
4201 struct hash_desc desc;
77351055 4202 struct packet_info pi;
b411b363
PR
4203 int rv;
4204
13e6037d 4205 desc.tfm = tconn->cram_hmac_tfm;
b411b363
PR
4206 desc.flags = 0;
4207
13e6037d
PR
4208 rv = crypto_hash_setkey(tconn->cram_hmac_tfm,
4209 (u8 *)tconn->net_conf->shared_secret, key_len);
b411b363 4210 if (rv) {
13e6037d 4211 conn_err(tconn, "crypto_hash_setkey() failed with %d\n", rv);
b10d96cb 4212 rv = -1;
b411b363
PR
4213 goto fail;
4214 }
4215
4216 get_random_bytes(my_challenge, CHALLENGE_LEN);
4217
13e6037d 4218 rv = conn_send_cmd2(tconn, P_AUTH_CHALLENGE, my_challenge, CHALLENGE_LEN);
b411b363
PR
4219 if (!rv)
4220 goto fail;
4221
13e6037d 4222 rv = drbd_recv_header(tconn, &pi);
b411b363
PR
4223 if (!rv)
4224 goto fail;
4225
77351055 4226 if (pi.cmd != P_AUTH_CHALLENGE) {
13e6037d 4227 conn_err(tconn, "expected AuthChallenge packet, received: %s (0x%04x)\n",
77351055 4228 cmdname(pi.cmd), pi.cmd);
b411b363
PR
4229 rv = 0;
4230 goto fail;
4231 }
4232
77351055 4233 if (pi.size > CHALLENGE_LEN * 2) {
13e6037d 4234 conn_err(tconn, "expected AuthChallenge payload too big.\n");
b10d96cb 4235 rv = -1;
b411b363
PR
4236 goto fail;
4237 }
4238
77351055 4239 peers_ch = kmalloc(pi.size, GFP_NOIO);
b411b363 4240 if (peers_ch == NULL) {
13e6037d 4241 conn_err(tconn, "kmalloc of peers_ch failed\n");
b10d96cb 4242 rv = -1;
b411b363
PR
4243 goto fail;
4244 }
4245
13e6037d 4246 rv = drbd_recv(tconn, peers_ch, pi.size);
b411b363 4247
77351055 4248 if (rv != pi.size) {
0ddc5549 4249 if (!signal_pending(current))
13e6037d 4250 conn_warn(tconn, "short read AuthChallenge: l=%u\n", rv);
b411b363
PR
4251 rv = 0;
4252 goto fail;
4253 }
4254
13e6037d 4255 resp_size = crypto_hash_digestsize(tconn->cram_hmac_tfm);
b411b363
PR
4256 response = kmalloc(resp_size, GFP_NOIO);
4257 if (response == NULL) {
13e6037d 4258 conn_err(tconn, "kmalloc of response failed\n");
b10d96cb 4259 rv = -1;
b411b363
PR
4260 goto fail;
4261 }
4262
4263 sg_init_table(&sg, 1);
77351055 4264 sg_set_buf(&sg, peers_ch, pi.size);
b411b363
PR
4265
4266 rv = crypto_hash_digest(&desc, &sg, sg.length, response);
4267 if (rv) {
13e6037d 4268 conn_err(tconn, "crypto_hash_digest() failed with %d\n", rv);
b10d96cb 4269 rv = -1;
b411b363
PR
4270 goto fail;
4271 }
4272
13e6037d 4273 rv = conn_send_cmd2(tconn, P_AUTH_RESPONSE, response, resp_size);
b411b363
PR
4274 if (!rv)
4275 goto fail;
4276
13e6037d 4277 rv = drbd_recv_header(tconn, &pi);
b411b363
PR
4278 if (!rv)
4279 goto fail;
4280
77351055 4281 if (pi.cmd != P_AUTH_RESPONSE) {
13e6037d 4282 conn_err(tconn, "expected AuthResponse packet, received: %s (0x%04x)\n",
77351055 4283 cmdname(pi.cmd), pi.cmd);
b411b363
PR
4284 rv = 0;
4285 goto fail;
4286 }
4287
77351055 4288 if (pi.size != resp_size) {
13e6037d 4289 conn_err(tconn, "expected AuthResponse payload of wrong size\n");
b411b363
PR
4290 rv = 0;
4291 goto fail;
4292 }
4293
13e6037d 4294 rv = drbd_recv(tconn, response , resp_size);
b411b363
PR
4295
4296 if (rv != resp_size) {
0ddc5549 4297 if (!signal_pending(current))
13e6037d 4298 conn_warn(tconn, "short read receiving AuthResponse: l=%u\n", rv);
b411b363
PR
4299 rv = 0;
4300 goto fail;
4301 }
4302
4303 right_response = kmalloc(resp_size, GFP_NOIO);
2d1ee87d 4304 if (right_response == NULL) {
13e6037d 4305 conn_err(tconn, "kmalloc of right_response failed\n");
b10d96cb 4306 rv = -1;
b411b363
PR
4307 goto fail;
4308 }
4309
4310 sg_set_buf(&sg, my_challenge, CHALLENGE_LEN);
4311
4312 rv = crypto_hash_digest(&desc, &sg, sg.length, right_response);
4313 if (rv) {
13e6037d 4314 conn_err(tconn, "crypto_hash_digest() failed with %d\n", rv);
b10d96cb 4315 rv = -1;
b411b363
PR
4316 goto fail;
4317 }
4318
4319 rv = !memcmp(response, right_response, resp_size);
4320
4321 if (rv)
13e6037d
PR
4322 conn_info(tconn, "Peer authenticated using %d bytes of '%s' HMAC\n",
4323 resp_size, tconn->net_conf->cram_hmac_alg);
b10d96cb
JT
4324 else
4325 rv = -1;
b411b363
PR
4326
4327 fail:
4328 kfree(peers_ch);
4329 kfree(response);
4330 kfree(right_response);
4331
4332 return rv;
4333}
4334#endif
4335
4336int drbdd_init(struct drbd_thread *thi)
4337{
392c8801 4338 struct drbd_tconn *tconn = thi->tconn;
b411b363
PR
4339 int h;
4340
4d641dd7 4341 conn_info(tconn, "receiver (re)started\n");
b411b363
PR
4342
4343 do {
4d641dd7 4344 h = drbd_connect(tconn);
b411b363 4345 if (h == 0) {
4d641dd7 4346 drbd_disconnect(tconn);
20ee6390 4347 schedule_timeout_interruptible(HZ);
b411b363
PR
4348 }
4349 if (h == -1) {
4d641dd7 4350 conn_warn(tconn, "Discarding network configuration.\n");
bbeb641c 4351 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
b411b363
PR
4352 }
4353 } while (h == 0);
4354
4355 if (h > 0) {
4d641dd7
PR
4356 if (get_net_conf(tconn)) {
4357 drbdd(tconn);
4358 put_net_conf(tconn);
b411b363
PR
4359 }
4360 }
4361
4d641dd7 4362 drbd_disconnect(tconn);
b411b363 4363
4d641dd7 4364 conn_info(tconn, "receiver terminated\n");
b411b363
PR
4365 return 0;
4366}
4367
4368/* ********* acknowledge sender ******** */
4369
d8763023 4370static int got_RqSReply(struct drbd_conf *mdev, enum drbd_packet cmd)
b411b363 4371{
257d0af6 4372 struct p_req_state_reply *p = &mdev->tconn->meta.rbuf.req_state_reply;
fc3b10a4 4373 struct drbd_tconn *tconn = mdev->tconn;
b411b363
PR
4374
4375 int retcode = be32_to_cpu(p->retcode);
4376
fc3b10a4
PR
4377 if (cmd == P_STATE_CHG_REPLY) {
4378 if (retcode >= SS_SUCCESS) {
4379 set_bit(CL_ST_CHG_SUCCESS, &mdev->flags);
4380 } else {
4381 set_bit(CL_ST_CHG_FAIL, &mdev->flags);
4382 dev_err(DEV, "Requested state change failed by peer: %s (%d)\n",
4383 drbd_set_st_err_str(retcode), retcode);
4384 }
4385 wake_up(&mdev->state_wait);
4386 } else /* conn == P_CONN_ST_CHG_REPLY */ {
4387 if (retcode >= SS_SUCCESS) {
4388 set_bit(CONN_WD_ST_CHG_OKAY, &tconn->flags);
4389 } else {
4390 set_bit(CONN_WD_ST_CHG_FAIL, &tconn->flags);
4391 conn_err(tconn, "Requested state change failed by peer: %s (%d)\n",
4392 drbd_set_st_err_str(retcode), retcode);
4393 }
4394 wake_up(&tconn->ping_wait);
b411b363 4395 }
81e84650 4396 return true;
b411b363
PR
4397}
4398
d8763023 4399static int got_Ping(struct drbd_conf *mdev, enum drbd_packet cmd)
b411b363 4400{
2a67d8b9 4401 return drbd_send_ping_ack(mdev->tconn);
b411b363
PR
4402
4403}
4404
d8763023 4405static int got_PingAck(struct drbd_conf *mdev, enum drbd_packet cmd)
b411b363 4406{
2a67d8b9 4407 struct drbd_tconn *tconn = mdev->tconn;
b411b363 4408 /* restore idle timeout */
2a67d8b9
PR
4409 tconn->meta.socket->sk->sk_rcvtimeo = tconn->net_conf->ping_int*HZ;
4410 if (!test_and_set_bit(GOT_PING_ACK, &tconn->flags))
4411 wake_up(&tconn->ping_wait);
b411b363 4412
81e84650 4413 return true;
b411b363
PR
4414}
4415
d8763023 4416static int got_IsInSync(struct drbd_conf *mdev, enum drbd_packet cmd)
b411b363 4417{
257d0af6 4418 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
b411b363
PR
4419 sector_t sector = be64_to_cpu(p->sector);
4420 int blksize = be32_to_cpu(p->blksize);
4421
31890f4a 4422 D_ASSERT(mdev->tconn->agreed_pro_version >= 89);
b411b363
PR
4423
4424 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4425
1d53f09e
LE
4426 if (get_ldev(mdev)) {
4427 drbd_rs_complete_io(mdev, sector);
4428 drbd_set_in_sync(mdev, sector, blksize);
4429 /* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */
4430 mdev->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
4431 put_ldev(mdev);
4432 }
b411b363 4433 dec_rs_pending(mdev);
778f271d 4434 atomic_add(blksize >> 9, &mdev->rs_sect_in);
b411b363 4435
81e84650 4436 return true;
b411b363
PR
4437}
4438
bc9c5c41
AG
4439static int
4440validate_req_change_req_state(struct drbd_conf *mdev, u64 id, sector_t sector,
4441 struct rb_root *root, const char *func,
4442 enum drbd_req_event what, bool missing_ok)
b411b363
PR
4443{
4444 struct drbd_request *req;
4445 struct bio_and_error m;
4446
87eeee41 4447 spin_lock_irq(&mdev->tconn->req_lock);
bc9c5c41 4448 req = find_request(mdev, root, id, sector, missing_ok, func);
b411b363 4449 if (unlikely(!req)) {
87eeee41 4450 spin_unlock_irq(&mdev->tconn->req_lock);
81e84650 4451 return false;
b411b363
PR
4452 }
4453 __req_mod(req, what, &m);
87eeee41 4454 spin_unlock_irq(&mdev->tconn->req_lock);
b411b363
PR
4455
4456 if (m.bio)
4457 complete_master_bio(mdev, &m);
81e84650 4458 return true;
b411b363
PR
4459}
4460
d8763023 4461static int got_BlockAck(struct drbd_conf *mdev, enum drbd_packet cmd)
b411b363 4462{
257d0af6 4463 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
b411b363
PR
4464 sector_t sector = be64_to_cpu(p->sector);
4465 int blksize = be32_to_cpu(p->blksize);
4466 enum drbd_req_event what;
4467
4468 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4469
579b57ed 4470 if (p->block_id == ID_SYNCER) {
b411b363
PR
4471 drbd_set_in_sync(mdev, sector, blksize);
4472 dec_rs_pending(mdev);
81e84650 4473 return true;
b411b363 4474 }
257d0af6 4475 switch (cmd) {
b411b363 4476 case P_RS_WRITE_ACK:
89e58e75 4477 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
8554df1c 4478 what = WRITE_ACKED_BY_PEER_AND_SIS;
b411b363
PR
4479 break;
4480 case P_WRITE_ACK:
89e58e75 4481 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
8554df1c 4482 what = WRITE_ACKED_BY_PEER;
b411b363
PR
4483 break;
4484 case P_RECV_ACK:
89e58e75 4485 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_B);
8554df1c 4486 what = RECV_ACKED_BY_PEER;
b411b363 4487 break;
7be8da07 4488 case P_DISCARD_WRITE:
89e58e75 4489 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
7be8da07
AG
4490 what = DISCARD_WRITE;
4491 break;
4492 case P_RETRY_WRITE:
4493 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
4494 what = POSTPONE_WRITE;
b411b363
PR
4495 break;
4496 default:
4497 D_ASSERT(0);
81e84650 4498 return false;
b411b363
PR
4499 }
4500
4501 return validate_req_change_req_state(mdev, p->block_id, sector,
bc9c5c41
AG
4502 &mdev->write_requests, __func__,
4503 what, false);
b411b363
PR
4504}
4505
d8763023 4506static int got_NegAck(struct drbd_conf *mdev, enum drbd_packet cmd)
b411b363 4507{
257d0af6 4508 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
b411b363 4509 sector_t sector = be64_to_cpu(p->sector);
2deb8336 4510 int size = be32_to_cpu(p->blksize);
89e58e75
PR
4511 bool missing_ok = mdev->tconn->net_conf->wire_protocol == DRBD_PROT_A ||
4512 mdev->tconn->net_conf->wire_protocol == DRBD_PROT_B;
c3afd8f5 4513 bool found;
b411b363
PR
4514
4515 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4516
579b57ed 4517 if (p->block_id == ID_SYNCER) {
b411b363
PR
4518 dec_rs_pending(mdev);
4519 drbd_rs_failed_io(mdev, sector, size);
81e84650 4520 return true;
b411b363 4521 }
2deb8336 4522
c3afd8f5 4523 found = validate_req_change_req_state(mdev, p->block_id, sector,
bc9c5c41 4524 &mdev->write_requests, __func__,
8554df1c 4525 NEG_ACKED, missing_ok);
c3afd8f5
AG
4526 if (!found) {
4527 /* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs.
4528 The master bio might already be completed, therefore the
4529 request is no longer in the collision hash. */
4530 /* In Protocol B we might already have got a P_RECV_ACK
4531 but then get a P_NEG_ACK afterwards. */
4532 if (!missing_ok)
2deb8336 4533 return false;
c3afd8f5 4534 drbd_set_out_of_sync(mdev, sector, size);
2deb8336 4535 }
2deb8336 4536 return true;
b411b363
PR
4537}
4538
d8763023 4539static int got_NegDReply(struct drbd_conf *mdev, enum drbd_packet cmd)
b411b363 4540{
257d0af6 4541 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
b411b363
PR
4542 sector_t sector = be64_to_cpu(p->sector);
4543
4544 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
7be8da07 4545
b411b363
PR
4546 dev_err(DEV, "Got NegDReply; Sector %llus, len %u; Fail original request.\n",
4547 (unsigned long long)sector, be32_to_cpu(p->blksize));
4548
4549 return validate_req_change_req_state(mdev, p->block_id, sector,
bc9c5c41 4550 &mdev->read_requests, __func__,
8554df1c 4551 NEG_ACKED, false);
b411b363
PR
4552}
4553
d8763023 4554static int got_NegRSDReply(struct drbd_conf *mdev, enum drbd_packet cmd)
b411b363
PR
4555{
4556 sector_t sector;
4557 int size;
257d0af6 4558 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
b411b363
PR
4559
4560 sector = be64_to_cpu(p->sector);
4561 size = be32_to_cpu(p->blksize);
b411b363
PR
4562
4563 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4564
4565 dec_rs_pending(mdev);
4566
4567 if (get_ldev_if_state(mdev, D_FAILED)) {
4568 drbd_rs_complete_io(mdev, sector);
257d0af6 4569 switch (cmd) {
d612d309
PR
4570 case P_NEG_RS_DREPLY:
4571 drbd_rs_failed_io(mdev, sector, size);
4572 case P_RS_CANCEL:
4573 break;
4574 default:
4575 D_ASSERT(0);
4576 put_ldev(mdev);
4577 return false;
4578 }
b411b363
PR
4579 put_ldev(mdev);
4580 }
4581
81e84650 4582 return true;
b411b363
PR
4583}
4584
d8763023 4585static int got_BarrierAck(struct drbd_conf *mdev, enum drbd_packet cmd)
b411b363 4586{
257d0af6 4587 struct p_barrier_ack *p = &mdev->tconn->meta.rbuf.barrier_ack;
b411b363 4588
2f5cdd0b 4589 tl_release(mdev->tconn, p->barrier, be32_to_cpu(p->set_size));
b411b363 4590
c4752ef1
PR
4591 if (mdev->state.conn == C_AHEAD &&
4592 atomic_read(&mdev->ap_in_flight) == 0 &&
370a43e7
PR
4593 !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &mdev->current_epoch->flags)) {
4594 mdev->start_resync_timer.expires = jiffies + HZ;
4595 add_timer(&mdev->start_resync_timer);
c4752ef1
PR
4596 }
4597
81e84650 4598 return true;
b411b363
PR
4599}
4600
d8763023 4601static int got_OVResult(struct drbd_conf *mdev, enum drbd_packet cmd)
b411b363 4602{
257d0af6 4603 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
b411b363
PR
4604 struct drbd_work *w;
4605 sector_t sector;
4606 int size;
4607
4608 sector = be64_to_cpu(p->sector);
4609 size = be32_to_cpu(p->blksize);
4610
4611 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4612
4613 if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC)
4614 drbd_ov_oos_found(mdev, sector, size);
4615 else
4616 ov_oos_print(mdev);
4617
1d53f09e 4618 if (!get_ldev(mdev))
81e84650 4619 return true;
1d53f09e 4620
b411b363
PR
4621 drbd_rs_complete_io(mdev, sector);
4622 dec_rs_pending(mdev);
4623
ea5442af
LE
4624 --mdev->ov_left;
4625
4626 /* let's advance progress step marks only for every other megabyte */
4627 if ((mdev->ov_left & 0x200) == 0x200)
4628 drbd_advance_rs_marks(mdev, mdev->ov_left);
4629
4630 if (mdev->ov_left == 0) {
b411b363
PR
4631 w = kmalloc(sizeof(*w), GFP_NOIO);
4632 if (w) {
4633 w->cb = w_ov_finished;
a21e9298 4634 w->mdev = mdev;
e42325a5 4635 drbd_queue_work_front(&mdev->tconn->data.work, w);
b411b363
PR
4636 } else {
4637 dev_err(DEV, "kmalloc(w) failed.");
4638 ov_oos_print(mdev);
4639 drbd_resync_finished(mdev);
4640 }
4641 }
1d53f09e 4642 put_ldev(mdev);
81e84650 4643 return true;
b411b363
PR
4644}
4645
d8763023 4646static int got_skip(struct drbd_conf *mdev, enum drbd_packet cmd)
0ced55a3 4647{
81e84650 4648 return true;
0ced55a3
PR
4649}
4650
32862ec7
PR
4651static int _drbd_process_done_ee(int vnr, void *p, void *data)
4652{
4653 struct drbd_conf *mdev = (struct drbd_conf *)p;
4654 return !drbd_process_done_ee(mdev);
4655}
4656
4657static int _check_ee_empty(int vnr, void *p, void *data)
4658{
4659 struct drbd_conf *mdev = (struct drbd_conf *)p;
4660 struct drbd_tconn *tconn = mdev->tconn;
4661 int not_empty;
4662
4663 spin_lock_irq(&tconn->req_lock);
4664 not_empty = !list_empty(&mdev->done_ee);
4665 spin_unlock_irq(&tconn->req_lock);
4666
4667 return not_empty;
4668}
4669
4670static int tconn_process_done_ee(struct drbd_tconn *tconn)
4671{
4672 int not_empty, err;
4673
4674 do {
4675 clear_bit(SIGNAL_ASENDER, &tconn->flags);
4676 flush_signals(current);
4677 err = idr_for_each(&tconn->volumes, _drbd_process_done_ee, NULL);
4678 if (err)
4679 return err;
4680 set_bit(SIGNAL_ASENDER, &tconn->flags);
4681 not_empty = idr_for_each(&tconn->volumes, _check_ee_empty, NULL);
4682 } while (not_empty);
4683
4684 return 0;
4685}
4686
7201b972
AG
4687struct asender_cmd {
4688 size_t pkt_size;
4689 int (*process)(struct drbd_conf *, enum drbd_packet);
4690};
4691
4692static struct asender_cmd asender_tbl[] = {
4693 [P_PING] = { sizeof(struct p_header), got_Ping },
4694 [P_PING_ACK] = { sizeof(struct p_header), got_PingAck },
4695 [P_RECV_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
4696 [P_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
4697 [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
4698 [P_DISCARD_WRITE] = { sizeof(struct p_block_ack), got_BlockAck },
4699 [P_NEG_ACK] = { sizeof(struct p_block_ack), got_NegAck },
4700 [P_NEG_DREPLY] = { sizeof(struct p_block_ack), got_NegDReply },
4701 [P_NEG_RS_DREPLY] = { sizeof(struct p_block_ack), got_NegRSDReply},
4702 [P_OV_RESULT] = { sizeof(struct p_block_ack), got_OVResult },
4703 [P_BARRIER_ACK] = { sizeof(struct p_barrier_ack), got_BarrierAck },
4704 [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply },
4705 [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync },
4706 [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), got_skip },
4707 [P_RS_CANCEL] = { sizeof(struct p_block_ack), got_NegRSDReply},
4708 [P_CONN_ST_CHG_REPLY]={ sizeof(struct p_req_state_reply), got_RqSReply },
4709 [P_RETRY_WRITE] = { sizeof(struct p_block_ack), got_BlockAck },
4710};
4711
b411b363
PR
4712int drbd_asender(struct drbd_thread *thi)
4713{
392c8801 4714 struct drbd_tconn *tconn = thi->tconn;
32862ec7 4715 struct p_header *h = &tconn->meta.rbuf.header;
b411b363 4716 struct asender_cmd *cmd = NULL;
77351055 4717 struct packet_info pi;
257d0af6 4718 int rv;
b411b363
PR
4719 void *buf = h;
4720 int received = 0;
257d0af6 4721 int expect = sizeof(struct p_header);
f36af18c 4722 int ping_timeout_active = 0;
b411b363 4723
b411b363
PR
4724 current->policy = SCHED_RR; /* Make this a realtime task! */
4725 current->rt_priority = 2; /* more important than all other tasks */
4726
e77a0a5c 4727 while (get_t_state(thi) == RUNNING) {
80822284 4728 drbd_thread_current_set_cpu(thi);
32862ec7 4729 if (test_and_clear_bit(SEND_PING, &tconn->flags)) {
2a67d8b9 4730 if (!drbd_send_ping(tconn)) {
32862ec7 4731 conn_err(tconn, "drbd_send_ping has failed\n");
841ce241
AG
4732 goto reconnect;
4733 }
32862ec7
PR
4734 tconn->meta.socket->sk->sk_rcvtimeo =
4735 tconn->net_conf->ping_timeo*HZ/10;
f36af18c 4736 ping_timeout_active = 1;
b411b363
PR
4737 }
4738
32862ec7
PR
4739 /* TODO: conditionally cork; it may hurt latency if we cork without
4740 much to send */
4741 if (!tconn->net_conf->no_cork)
4742 drbd_tcp_cork(tconn->meta.socket);
4743 if (tconn_process_done_ee(tconn))
4744 goto reconnect;
b411b363 4745 /* but unconditionally uncork unless disabled */
32862ec7
PR
4746 if (!tconn->net_conf->no_cork)
4747 drbd_tcp_uncork(tconn->meta.socket);
b411b363
PR
4748
4749 /* short circuit, recv_msg would return EINTR anyways. */
4750 if (signal_pending(current))
4751 continue;
4752
32862ec7
PR
4753 rv = drbd_recv_short(tconn->meta.socket, buf, expect-received, 0);
4754 clear_bit(SIGNAL_ASENDER, &tconn->flags);
b411b363
PR
4755
4756 flush_signals(current);
4757
4758 /* Note:
4759 * -EINTR (on meta) we got a signal
4760 * -EAGAIN (on meta) rcvtimeo expired
4761 * -ECONNRESET other side closed the connection
4762 * -ERESTARTSYS (on data) we got a signal
4763 * rv < 0 other than above: unexpected error!
4764 * rv == expected: full header or command
4765 * rv < expected: "woken" by signal during receive
4766 * rv == 0 : "connection shut down by peer"
4767 */
4768 if (likely(rv > 0)) {
4769 received += rv;
4770 buf += rv;
4771 } else if (rv == 0) {
32862ec7 4772 conn_err(tconn, "meta connection shut down by peer.\n");
b411b363
PR
4773 goto reconnect;
4774 } else if (rv == -EAGAIN) {
cb6518cb
LE
4775 /* If the data socket received something meanwhile,
4776 * that is good enough: peer is still alive. */
32862ec7
PR
4777 if (time_after(tconn->last_received,
4778 jiffies - tconn->meta.socket->sk->sk_rcvtimeo))
cb6518cb 4779 continue;
f36af18c 4780 if (ping_timeout_active) {
32862ec7 4781 conn_err(tconn, "PingAck did not arrive in time.\n");
b411b363
PR
4782 goto reconnect;
4783 }
32862ec7 4784 set_bit(SEND_PING, &tconn->flags);
b411b363
PR
4785 continue;
4786 } else if (rv == -EINTR) {
4787 continue;
4788 } else {
32862ec7 4789 conn_err(tconn, "sock_recvmsg returned %d\n", rv);
b411b363
PR
4790 goto reconnect;
4791 }
4792
4793 if (received == expect && cmd == NULL) {
32862ec7 4794 if (!decode_header(tconn, h, &pi))
b411b363 4795 goto reconnect;
7201b972
AG
4796 cmd = &asender_tbl[pi.cmd];
4797 if (pi.cmd >= ARRAY_SIZE(asender_tbl) || !cmd) {
32862ec7 4798 conn_err(tconn, "unknown command %d on meta (l: %d)\n",
77351055 4799 pi.cmd, pi.size);
b411b363
PR
4800 goto disconnect;
4801 }
4802 expect = cmd->pkt_size;
77351055 4803 if (pi.size != expect - sizeof(struct p_header)) {
32862ec7 4804 conn_err(tconn, "Wrong packet size on meta (c: %d, l: %d)\n",
77351055 4805 pi.cmd, pi.size);
b411b363 4806 goto reconnect;
257d0af6 4807 }
b411b363
PR
4808 }
4809 if (received == expect) {
32862ec7
PR
4810 tconn->last_received = jiffies;
4811 if (!cmd->process(vnr_to_mdev(tconn, pi.vnr), pi.cmd))
b411b363
PR
4812 goto reconnect;
4813
f36af18c
LE
4814 /* the idle_timeout (ping-int)
4815 * has been restored in got_PingAck() */
7201b972 4816 if (cmd == &asender_tbl[P_PING_ACK])
f36af18c
LE
4817 ping_timeout_active = 0;
4818
b411b363
PR
4819 buf = h;
4820 received = 0;
257d0af6 4821 expect = sizeof(struct p_header);
b411b363
PR
4822 cmd = NULL;
4823 }
4824 }
4825
4826 if (0) {
4827reconnect:
bbeb641c 4828 conn_request_state(tconn, NS(conn, C_NETWORK_FAILURE), CS_HARD);
b411b363
PR
4829 }
4830 if (0) {
4831disconnect:
bbeb641c 4832 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
b411b363 4833 }
32862ec7 4834 clear_bit(SIGNAL_ASENDER, &tconn->flags);
b411b363 4835
32862ec7 4836 conn_info(tconn, "asender terminated\n");
b411b363
PR
4837
4838 return 0;
4839}