]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blame - drivers/block/drbd/drbd_receiver.c
drbd: don't implicitly resize Diskless node beyond end of device
[mirror_ubuntu-bionic-kernel.git] / drivers / block / drbd / drbd_receiver.c
CommitLineData
b411b363
PR
1/*
2 drbd_receiver.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23 */
24
25
b411b363
PR
26#include <linux/module.h>
27
28#include <asm/uaccess.h>
29#include <net/sock.h>
30
b411b363
PR
31#include <linux/drbd.h>
32#include <linux/fs.h>
33#include <linux/file.h>
34#include <linux/in.h>
35#include <linux/mm.h>
36#include <linux/memcontrol.h>
37#include <linux/mm_inline.h>
38#include <linux/slab.h>
b411b363
PR
39#include <linux/pkt_sched.h>
40#define __KERNEL_SYSCALLS__
41#include <linux/unistd.h>
42#include <linux/vmalloc.h>
43#include <linux/random.h>
b411b363
PR
44#include <linux/string.h>
45#include <linux/scatterlist.h>
46#include "drbd_int.h"
a3603a6e 47#include "drbd_protocol.h"
b411b363 48#include "drbd_req.h"
b411b363
PR
49#include "drbd_vli.h"
50
20c68fde
LE
51#define PRO_FEATURES (FF_TRIM)
52
77351055
PR
53struct packet_info {
54 enum drbd_packet cmd;
e2857216
AG
55 unsigned int size;
56 unsigned int vnr;
e658983a 57 void *data;
77351055
PR
58};
59
b411b363
PR
60enum finish_epoch {
61 FE_STILL_LIVE,
62 FE_DESTROYED,
63 FE_RECYCLED,
64};
65
bde89a9e
AG
66static int drbd_do_features(struct drbd_connection *connection);
67static int drbd_do_auth(struct drbd_connection *connection);
69a22773 68static int drbd_disconnected(struct drbd_peer_device *);
a0fb3c47 69static void conn_wait_active_ee_empty(struct drbd_connection *connection);
bde89a9e 70static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *, struct drbd_epoch *, enum epoch_event);
99920dc5 71static int e_end_block(struct drbd_work *, int);
b411b363 72
b411b363
PR
73
74#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
75
45bb912b
LE
76/*
77 * some helper functions to deal with single linked page lists,
78 * page->private being our "next" pointer.
79 */
80
81/* If at least n pages are linked at head, get n pages off.
82 * Otherwise, don't modify head, and return NULL.
83 * Locking is the responsibility of the caller.
84 */
85static struct page *page_chain_del(struct page **head, int n)
86{
87 struct page *page;
88 struct page *tmp;
89
90 BUG_ON(!n);
91 BUG_ON(!head);
92
93 page = *head;
23ce4227
PR
94
95 if (!page)
96 return NULL;
97
45bb912b
LE
98 while (page) {
99 tmp = page_chain_next(page);
100 if (--n == 0)
101 break; /* found sufficient pages */
102 if (tmp == NULL)
103 /* insufficient pages, don't use any of them. */
104 return NULL;
105 page = tmp;
106 }
107
108 /* add end of list marker for the returned list */
109 set_page_private(page, 0);
110 /* actual return value, and adjustment of head */
111 page = *head;
112 *head = tmp;
113 return page;
114}
115
116/* may be used outside of locks to find the tail of a (usually short)
117 * "private" page chain, before adding it back to a global chain head
118 * with page_chain_add() under a spinlock. */
119static struct page *page_chain_tail(struct page *page, int *len)
120{
121 struct page *tmp;
122 int i = 1;
123 while ((tmp = page_chain_next(page)))
124 ++i, page = tmp;
125 if (len)
126 *len = i;
127 return page;
128}
129
130static int page_chain_free(struct page *page)
131{
132 struct page *tmp;
133 int i = 0;
134 page_chain_for_each_safe(page, tmp) {
135 put_page(page);
136 ++i;
137 }
138 return i;
139}
140
141static void page_chain_add(struct page **head,
142 struct page *chain_first, struct page *chain_last)
143{
144#if 1
145 struct page *tmp;
146 tmp = page_chain_tail(chain_first, NULL);
147 BUG_ON(tmp != chain_last);
148#endif
149
150 /* add chain to head */
151 set_page_private(chain_last, (unsigned long)*head);
152 *head = chain_first;
153}
154
b30ab791 155static struct page *__drbd_alloc_pages(struct drbd_device *device,
18c2d522 156 unsigned int number)
b411b363
PR
157{
158 struct page *page = NULL;
45bb912b 159 struct page *tmp = NULL;
18c2d522 160 unsigned int i = 0;
b411b363
PR
161
162 /* Yes, testing drbd_pp_vacant outside the lock is racy.
163 * So what. It saves a spin_lock. */
45bb912b 164 if (drbd_pp_vacant >= number) {
b411b363 165 spin_lock(&drbd_pp_lock);
45bb912b
LE
166 page = page_chain_del(&drbd_pp_pool, number);
167 if (page)
168 drbd_pp_vacant -= number;
b411b363 169 spin_unlock(&drbd_pp_lock);
45bb912b
LE
170 if (page)
171 return page;
b411b363 172 }
45bb912b 173
b411b363
PR
174 /* GFP_TRY, because we must not cause arbitrary write-out: in a DRBD
175 * "criss-cross" setup, that might cause write-out on some other DRBD,
176 * which in turn might block on the other node at this very place. */
45bb912b
LE
177 for (i = 0; i < number; i++) {
178 tmp = alloc_page(GFP_TRY);
179 if (!tmp)
180 break;
181 set_page_private(tmp, (unsigned long)page);
182 page = tmp;
183 }
184
185 if (i == number)
186 return page;
187
188 /* Not enough pages immediately available this time.
c37c8ecf 189 * No need to jump around here, drbd_alloc_pages will retry this
45bb912b
LE
190 * function "soon". */
191 if (page) {
192 tmp = page_chain_tail(page, NULL);
193 spin_lock(&drbd_pp_lock);
194 page_chain_add(&drbd_pp_pool, page, tmp);
195 drbd_pp_vacant += i;
196 spin_unlock(&drbd_pp_lock);
197 }
198 return NULL;
b411b363
PR
199}
200
b30ab791 201static void reclaim_finished_net_peer_reqs(struct drbd_device *device,
a990be46 202 struct list_head *to_be_freed)
b411b363 203{
a8cd15ba 204 struct drbd_peer_request *peer_req, *tmp;
b411b363
PR
205
206 /* The EEs are always appended to the end of the list. Since
207 they are sent in order over the wire, they have to finish
208 in order. As soon as we see the first not finished we can
209 stop to examine the list... */
210
a8cd15ba 211 list_for_each_entry_safe(peer_req, tmp, &device->net_ee, w.list) {
045417f7 212 if (drbd_peer_req_has_active_page(peer_req))
b411b363 213 break;
a8cd15ba 214 list_move(&peer_req->w.list, to_be_freed);
b411b363
PR
215 }
216}
217
b30ab791 218static void drbd_kick_lo_and_reclaim_net(struct drbd_device *device)
b411b363
PR
219{
220 LIST_HEAD(reclaimed);
db830c46 221 struct drbd_peer_request *peer_req, *t;
b411b363 222
0500813f 223 spin_lock_irq(&device->resource->req_lock);
b30ab791 224 reclaim_finished_net_peer_reqs(device, &reclaimed);
0500813f 225 spin_unlock_irq(&device->resource->req_lock);
b411b363 226
a8cd15ba 227 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
b30ab791 228 drbd_free_net_peer_req(device, peer_req);
b411b363
PR
229}
230
231/**
c37c8ecf 232 * drbd_alloc_pages() - Returns @number pages, retries forever (or until signalled)
b30ab791 233 * @device: DRBD device.
45bb912b
LE
234 * @number: number of pages requested
235 * @retry: whether to retry, if not enough pages are available right now
236 *
237 * Tries to allocate number pages, first from our own page pool, then from
0e49d7b0 238 * the kernel.
45bb912b 239 * Possibly retry until DRBD frees sufficient pages somewhere else.
b411b363 240 *
0e49d7b0
LE
241 * If this allocation would exceed the max_buffers setting, we throttle
242 * allocation (schedule_timeout) to give the system some room to breathe.
243 *
244 * We do not use max-buffers as hard limit, because it could lead to
245 * congestion and further to a distributed deadlock during online-verify or
246 * (checksum based) resync, if the max-buffers, socket buffer sizes and
247 * resync-rate settings are mis-configured.
248 *
45bb912b 249 * Returns a page chain linked via page->private.
b411b363 250 */
69a22773 251struct page *drbd_alloc_pages(struct drbd_peer_device *peer_device, unsigned int number,
c37c8ecf 252 bool retry)
b411b363 253{
69a22773 254 struct drbd_device *device = peer_device->device;
b411b363 255 struct page *page = NULL;
44ed167d 256 struct net_conf *nc;
b411b363 257 DEFINE_WAIT(wait);
0e49d7b0 258 unsigned int mxb;
b411b363 259
44ed167d 260 rcu_read_lock();
69a22773 261 nc = rcu_dereference(peer_device->connection->net_conf);
44ed167d
PR
262 mxb = nc ? nc->max_buffers : 1000000;
263 rcu_read_unlock();
264
b30ab791
AG
265 if (atomic_read(&device->pp_in_use) < mxb)
266 page = __drbd_alloc_pages(device, number);
b411b363 267
45bb912b 268 while (page == NULL) {
b411b363
PR
269 prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE);
270
b30ab791 271 drbd_kick_lo_and_reclaim_net(device);
b411b363 272
b30ab791
AG
273 if (atomic_read(&device->pp_in_use) < mxb) {
274 page = __drbd_alloc_pages(device, number);
b411b363
PR
275 if (page)
276 break;
277 }
278
279 if (!retry)
280 break;
281
282 if (signal_pending(current)) {
d0180171 283 drbd_warn(device, "drbd_alloc_pages interrupted!\n");
b411b363
PR
284 break;
285 }
286
0e49d7b0
LE
287 if (schedule_timeout(HZ/10) == 0)
288 mxb = UINT_MAX;
b411b363
PR
289 }
290 finish_wait(&drbd_pp_wait, &wait);
291
45bb912b 292 if (page)
b30ab791 293 atomic_add(number, &device->pp_in_use);
b411b363
PR
294 return page;
295}
296
c37c8ecf 297/* Must not be used from irq, as that may deadlock: see drbd_alloc_pages.
0500813f 298 * Is also used from inside an other spin_lock_irq(&resource->req_lock);
45bb912b
LE
299 * Either links the page chain back to the global pool,
300 * or returns all pages to the system. */
b30ab791 301static void drbd_free_pages(struct drbd_device *device, struct page *page, int is_net)
b411b363 302{
b30ab791 303 atomic_t *a = is_net ? &device->pp_in_use_by_net : &device->pp_in_use;
b411b363 304 int i;
435f0740 305
a73ff323
LE
306 if (page == NULL)
307 return;
308
81a5d60e 309 if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * minor_count)
45bb912b
LE
310 i = page_chain_free(page);
311 else {
312 struct page *tmp;
313 tmp = page_chain_tail(page, &i);
314 spin_lock(&drbd_pp_lock);
315 page_chain_add(&drbd_pp_pool, page, tmp);
316 drbd_pp_vacant += i;
317 spin_unlock(&drbd_pp_lock);
b411b363 318 }
435f0740 319 i = atomic_sub_return(i, a);
45bb912b 320 if (i < 0)
d0180171 321 drbd_warn(device, "ASSERTION FAILED: %s: %d < 0\n",
435f0740 322 is_net ? "pp_in_use_by_net" : "pp_in_use", i);
b411b363
PR
323 wake_up(&drbd_pp_wait);
324}
325
326/*
327You need to hold the req_lock:
328 _drbd_wait_ee_list_empty()
329
330You must not have the req_lock:
3967deb1 331 drbd_free_peer_req()
0db55363 332 drbd_alloc_peer_req()
7721f567 333 drbd_free_peer_reqs()
b411b363 334 drbd_ee_fix_bhs()
a990be46 335 drbd_finish_peer_reqs()
b411b363
PR
336 drbd_clear_done_ee()
337 drbd_wait_ee_list_empty()
338*/
339
f6ffca9f 340struct drbd_peer_request *
69a22773 341drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
a0fb3c47 342 unsigned int data_size, bool has_payload, gfp_t gfp_mask) __must_hold(local)
b411b363 343{
69a22773 344 struct drbd_device *device = peer_device->device;
db830c46 345 struct drbd_peer_request *peer_req;
a73ff323 346 struct page *page = NULL;
45bb912b 347 unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
b411b363 348
b30ab791 349 if (drbd_insert_fault(device, DRBD_FAULT_AL_EE))
b411b363
PR
350 return NULL;
351
db830c46
AG
352 peer_req = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM);
353 if (!peer_req) {
b411b363 354 if (!(gfp_mask & __GFP_NOWARN))
d0180171 355 drbd_err(device, "%s: allocation failed\n", __func__);
b411b363
PR
356 return NULL;
357 }
358
a0fb3c47 359 if (has_payload && data_size) {
69a22773 360 page = drbd_alloc_pages(peer_device, nr_pages, (gfp_mask & __GFP_WAIT));
a73ff323
LE
361 if (!page)
362 goto fail;
363 }
b411b363 364
db830c46
AG
365 drbd_clear_interval(&peer_req->i);
366 peer_req->i.size = data_size;
367 peer_req->i.sector = sector;
368 peer_req->i.local = false;
369 peer_req->i.waiting = false;
370
371 peer_req->epoch = NULL;
a8cd15ba 372 peer_req->peer_device = peer_device;
db830c46
AG
373 peer_req->pages = page;
374 atomic_set(&peer_req->pending_bios, 0);
375 peer_req->flags = 0;
9a8e7753
AG
376 /*
377 * The block_id is opaque to the receiver. It is not endianness
378 * converted, and sent back to the sender unchanged.
379 */
db830c46 380 peer_req->block_id = id;
b411b363 381
db830c46 382 return peer_req;
b411b363 383
45bb912b 384 fail:
db830c46 385 mempool_free(peer_req, drbd_ee_mempool);
b411b363
PR
386 return NULL;
387}
388
b30ab791 389void __drbd_free_peer_req(struct drbd_device *device, struct drbd_peer_request *peer_req,
f6ffca9f 390 int is_net)
b411b363 391{
db830c46
AG
392 if (peer_req->flags & EE_HAS_DIGEST)
393 kfree(peer_req->digest);
b30ab791 394 drbd_free_pages(device, peer_req->pages, is_net);
0b0ba1ef
AG
395 D_ASSERT(device, atomic_read(&peer_req->pending_bios) == 0);
396 D_ASSERT(device, drbd_interval_empty(&peer_req->i));
db830c46 397 mempool_free(peer_req, drbd_ee_mempool);
b411b363
PR
398}
399
b30ab791 400int drbd_free_peer_reqs(struct drbd_device *device, struct list_head *list)
b411b363
PR
401{
402 LIST_HEAD(work_list);
db830c46 403 struct drbd_peer_request *peer_req, *t;
b411b363 404 int count = 0;
b30ab791 405 int is_net = list == &device->net_ee;
b411b363 406
0500813f 407 spin_lock_irq(&device->resource->req_lock);
b411b363 408 list_splice_init(list, &work_list);
0500813f 409 spin_unlock_irq(&device->resource->req_lock);
b411b363 410
a8cd15ba 411 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
b30ab791 412 __drbd_free_peer_req(device, peer_req, is_net);
b411b363
PR
413 count++;
414 }
415 return count;
416}
417
b411b363 418/*
a990be46 419 * See also comments in _req_mod(,BARRIER_ACKED) and receive_Barrier.
b411b363 420 */
b30ab791 421static int drbd_finish_peer_reqs(struct drbd_device *device)
b411b363
PR
422{
423 LIST_HEAD(work_list);
424 LIST_HEAD(reclaimed);
db830c46 425 struct drbd_peer_request *peer_req, *t;
e2b3032b 426 int err = 0;
b411b363 427
0500813f 428 spin_lock_irq(&device->resource->req_lock);
b30ab791
AG
429 reclaim_finished_net_peer_reqs(device, &reclaimed);
430 list_splice_init(&device->done_ee, &work_list);
0500813f 431 spin_unlock_irq(&device->resource->req_lock);
b411b363 432
a8cd15ba 433 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
b30ab791 434 drbd_free_net_peer_req(device, peer_req);
b411b363
PR
435
436 /* possible callbacks here:
d4dabbe2 437 * e_end_block, and e_end_resync_block, e_send_superseded.
b411b363
PR
438 * all ignore the last argument.
439 */
a8cd15ba 440 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
e2b3032b
AG
441 int err2;
442
b411b363 443 /* list_del not necessary, next/prev members not touched */
a8cd15ba 444 err2 = peer_req->w.cb(&peer_req->w, !!err);
e2b3032b
AG
445 if (!err)
446 err = err2;
b30ab791 447 drbd_free_peer_req(device, peer_req);
b411b363 448 }
b30ab791 449 wake_up(&device->ee_wait);
b411b363 450
e2b3032b 451 return err;
b411b363
PR
452}
453
b30ab791 454static void _drbd_wait_ee_list_empty(struct drbd_device *device,
d4da1537 455 struct list_head *head)
b411b363
PR
456{
457 DEFINE_WAIT(wait);
458
459 /* avoids spin_lock/unlock
460 * and calling prepare_to_wait in the fast path */
461 while (!list_empty(head)) {
b30ab791 462 prepare_to_wait(&device->ee_wait, &wait, TASK_UNINTERRUPTIBLE);
0500813f 463 spin_unlock_irq(&device->resource->req_lock);
7eaceacc 464 io_schedule();
b30ab791 465 finish_wait(&device->ee_wait, &wait);
0500813f 466 spin_lock_irq(&device->resource->req_lock);
b411b363
PR
467 }
468}
469
b30ab791 470static void drbd_wait_ee_list_empty(struct drbd_device *device,
d4da1537 471 struct list_head *head)
b411b363 472{
0500813f 473 spin_lock_irq(&device->resource->req_lock);
b30ab791 474 _drbd_wait_ee_list_empty(device, head);
0500813f 475 spin_unlock_irq(&device->resource->req_lock);
b411b363
PR
476}
477
dbd9eea0 478static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flags)
b411b363 479{
b411b363
PR
480 struct kvec iov = {
481 .iov_base = buf,
482 .iov_len = size,
483 };
484 struct msghdr msg = {
b411b363
PR
485 .msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL)
486 };
f730c848 487 return kernel_recvmsg(sock, &msg, &iov, 1, size, msg.msg_flags);
b411b363
PR
488}
489
bde89a9e 490static int drbd_recv(struct drbd_connection *connection, void *buf, size_t size)
b411b363 491{
b411b363
PR
492 int rv;
493
bde89a9e 494 rv = drbd_recv_short(connection->data.socket, buf, size, 0);
b411b363 495
dbd0820c
PR
496 if (rv < 0) {
497 if (rv == -ECONNRESET)
1ec861eb 498 drbd_info(connection, "sock was reset by peer\n");
dbd0820c 499 else if (rv != -ERESTARTSYS)
1ec861eb 500 drbd_err(connection, "sock_recvmsg returned %d\n", rv);
dbd0820c 501 } else if (rv == 0) {
bde89a9e 502 if (test_bit(DISCONNECT_SENT, &connection->flags)) {
b66623e3
PR
503 long t;
504 rcu_read_lock();
bde89a9e 505 t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10;
b66623e3
PR
506 rcu_read_unlock();
507
bde89a9e 508 t = wait_event_timeout(connection->ping_wait, connection->cstate < C_WF_REPORT_PARAMS, t);
b66623e3 509
599377ac
PR
510 if (t)
511 goto out;
512 }
1ec861eb 513 drbd_info(connection, "sock was shut down by peer\n");
599377ac
PR
514 }
515
b411b363 516 if (rv != size)
bde89a9e 517 conn_request_state(connection, NS(conn, C_BROKEN_PIPE), CS_HARD);
b411b363 518
599377ac 519out:
b411b363
PR
520 return rv;
521}
522
bde89a9e 523static int drbd_recv_all(struct drbd_connection *connection, void *buf, size_t size)
c6967746
AG
524{
525 int err;
526
bde89a9e 527 err = drbd_recv(connection, buf, size);
c6967746
AG
528 if (err != size) {
529 if (err >= 0)
530 err = -EIO;
531 } else
532 err = 0;
533 return err;
534}
535
bde89a9e 536static int drbd_recv_all_warn(struct drbd_connection *connection, void *buf, size_t size)
a5c31904
AG
537{
538 int err;
539
bde89a9e 540 err = drbd_recv_all(connection, buf, size);
a5c31904 541 if (err && !signal_pending(current))
1ec861eb 542 drbd_warn(connection, "short read (expected size %d)\n", (int)size);
a5c31904
AG
543 return err;
544}
545
5dbf1673
LE
546/* quoting tcp(7):
547 * On individual connections, the socket buffer size must be set prior to the
548 * listen(2) or connect(2) calls in order to have it take effect.
549 * This is our wrapper to do so.
550 */
551static void drbd_setbufsize(struct socket *sock, unsigned int snd,
552 unsigned int rcv)
553{
554 /* open coded SO_SNDBUF, SO_RCVBUF */
555 if (snd) {
556 sock->sk->sk_sndbuf = snd;
557 sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
558 }
559 if (rcv) {
560 sock->sk->sk_rcvbuf = rcv;
561 sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
562 }
563}
564
bde89a9e 565static struct socket *drbd_try_connect(struct drbd_connection *connection)
b411b363
PR
566{
567 const char *what;
568 struct socket *sock;
569 struct sockaddr_in6 src_in6;
44ed167d
PR
570 struct sockaddr_in6 peer_in6;
571 struct net_conf *nc;
572 int err, peer_addr_len, my_addr_len;
69ef82de 573 int sndbuf_size, rcvbuf_size, connect_int;
b411b363
PR
574 int disconnect_on_error = 1;
575
44ed167d 576 rcu_read_lock();
bde89a9e 577 nc = rcu_dereference(connection->net_conf);
44ed167d
PR
578 if (!nc) {
579 rcu_read_unlock();
b411b363 580 return NULL;
44ed167d 581 }
44ed167d
PR
582 sndbuf_size = nc->sndbuf_size;
583 rcvbuf_size = nc->rcvbuf_size;
69ef82de 584 connect_int = nc->connect_int;
089c075d 585 rcu_read_unlock();
44ed167d 586
bde89a9e
AG
587 my_addr_len = min_t(int, connection->my_addr_len, sizeof(src_in6));
588 memcpy(&src_in6, &connection->my_addr, my_addr_len);
44ed167d 589
bde89a9e 590 if (((struct sockaddr *)&connection->my_addr)->sa_family == AF_INET6)
44ed167d
PR
591 src_in6.sin6_port = 0;
592 else
593 ((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */
594
bde89a9e
AG
595 peer_addr_len = min_t(int, connection->peer_addr_len, sizeof(src_in6));
596 memcpy(&peer_in6, &connection->peer_addr, peer_addr_len);
b411b363
PR
597
598 what = "sock_create_kern";
44ed167d
PR
599 err = sock_create_kern(((struct sockaddr *)&src_in6)->sa_family,
600 SOCK_STREAM, IPPROTO_TCP, &sock);
b411b363
PR
601 if (err < 0) {
602 sock = NULL;
603 goto out;
604 }
605
606 sock->sk->sk_rcvtimeo =
69ef82de 607 sock->sk->sk_sndtimeo = connect_int * HZ;
44ed167d 608 drbd_setbufsize(sock, sndbuf_size, rcvbuf_size);
b411b363
PR
609
610 /* explicitly bind to the configured IP as source IP
611 * for the outgoing connections.
612 * This is needed for multihomed hosts and to be
613 * able to use lo: interfaces for drbd.
614 * Make sure to use 0 as port number, so linux selects
615 * a free one dynamically.
616 */
b411b363 617 what = "bind before connect";
44ed167d 618 err = sock->ops->bind(sock, (struct sockaddr *) &src_in6, my_addr_len);
b411b363
PR
619 if (err < 0)
620 goto out;
621
622 /* connect may fail, peer not yet available.
623 * stay C_WF_CONNECTION, don't go Disconnecting! */
624 disconnect_on_error = 0;
625 what = "connect";
44ed167d 626 err = sock->ops->connect(sock, (struct sockaddr *) &peer_in6, peer_addr_len, 0);
b411b363
PR
627
628out:
629 if (err < 0) {
630 if (sock) {
631 sock_release(sock);
632 sock = NULL;
633 }
634 switch (-err) {
635 /* timeout, busy, signal pending */
636 case ETIMEDOUT: case EAGAIN: case EINPROGRESS:
637 case EINTR: case ERESTARTSYS:
638 /* peer not (yet) available, network problem */
639 case ECONNREFUSED: case ENETUNREACH:
640 case EHOSTDOWN: case EHOSTUNREACH:
641 disconnect_on_error = 0;
642 break;
643 default:
1ec861eb 644 drbd_err(connection, "%s failed, err = %d\n", what, err);
b411b363
PR
645 }
646 if (disconnect_on_error)
bde89a9e 647 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
b411b363 648 }
44ed167d 649
b411b363
PR
650 return sock;
651}
652
7a426fd8 653struct accept_wait_data {
bde89a9e 654 struct drbd_connection *connection;
7a426fd8
PR
655 struct socket *s_listen;
656 struct completion door_bell;
657 void (*original_sk_state_change)(struct sock *sk);
658
659};
660
715306f6 661static void drbd_incoming_connection(struct sock *sk)
7a426fd8
PR
662{
663 struct accept_wait_data *ad = sk->sk_user_data;
715306f6 664 void (*state_change)(struct sock *sk);
7a426fd8 665
715306f6
AG
666 state_change = ad->original_sk_state_change;
667 if (sk->sk_state == TCP_ESTABLISHED)
668 complete(&ad->door_bell);
669 state_change(sk);
7a426fd8
PR
670}
671
bde89a9e 672static int prepare_listen_socket(struct drbd_connection *connection, struct accept_wait_data *ad)
b411b363 673{
1f3e509b 674 int err, sndbuf_size, rcvbuf_size, my_addr_len;
44ed167d 675 struct sockaddr_in6 my_addr;
1f3e509b 676 struct socket *s_listen;
44ed167d 677 struct net_conf *nc;
b411b363
PR
678 const char *what;
679
44ed167d 680 rcu_read_lock();
bde89a9e 681 nc = rcu_dereference(connection->net_conf);
44ed167d
PR
682 if (!nc) {
683 rcu_read_unlock();
7a426fd8 684 return -EIO;
44ed167d 685 }
44ed167d
PR
686 sndbuf_size = nc->sndbuf_size;
687 rcvbuf_size = nc->rcvbuf_size;
44ed167d 688 rcu_read_unlock();
b411b363 689
bde89a9e
AG
690 my_addr_len = min_t(int, connection->my_addr_len, sizeof(struct sockaddr_in6));
691 memcpy(&my_addr, &connection->my_addr, my_addr_len);
b411b363
PR
692
693 what = "sock_create_kern";
44ed167d 694 err = sock_create_kern(((struct sockaddr *)&my_addr)->sa_family,
1f3e509b 695 SOCK_STREAM, IPPROTO_TCP, &s_listen);
b411b363
PR
696 if (err) {
697 s_listen = NULL;
698 goto out;
699 }
700
98683650 701 s_listen->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
44ed167d 702 drbd_setbufsize(s_listen, sndbuf_size, rcvbuf_size);
b411b363
PR
703
704 what = "bind before listen";
44ed167d 705 err = s_listen->ops->bind(s_listen, (struct sockaddr *)&my_addr, my_addr_len);
b411b363
PR
706 if (err < 0)
707 goto out;
708
7a426fd8
PR
709 ad->s_listen = s_listen;
710 write_lock_bh(&s_listen->sk->sk_callback_lock);
711 ad->original_sk_state_change = s_listen->sk->sk_state_change;
715306f6 712 s_listen->sk->sk_state_change = drbd_incoming_connection;
7a426fd8
PR
713 s_listen->sk->sk_user_data = ad;
714 write_unlock_bh(&s_listen->sk->sk_callback_lock);
b411b363 715
2820fd39
PR
716 what = "listen";
717 err = s_listen->ops->listen(s_listen, 5);
718 if (err < 0)
719 goto out;
720
7a426fd8 721 return 0;
b411b363
PR
722out:
723 if (s_listen)
724 sock_release(s_listen);
725 if (err < 0) {
726 if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
1ec861eb 727 drbd_err(connection, "%s failed, err = %d\n", what, err);
bde89a9e 728 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
b411b363
PR
729 }
730 }
b411b363 731
7a426fd8 732 return -EIO;
b411b363
PR
733}
734
715306f6 735static void unregister_state_change(struct sock *sk, struct accept_wait_data *ad)
b411b363 736{
715306f6
AG
737 write_lock_bh(&sk->sk_callback_lock);
738 sk->sk_state_change = ad->original_sk_state_change;
739 sk->sk_user_data = NULL;
740 write_unlock_bh(&sk->sk_callback_lock);
b411b363
PR
741}
742
bde89a9e 743static struct socket *drbd_wait_for_connect(struct drbd_connection *connection, struct accept_wait_data *ad)
b411b363 744{
1f3e509b
PR
745 int timeo, connect_int, err = 0;
746 struct socket *s_estab = NULL;
1f3e509b
PR
747 struct net_conf *nc;
748
749 rcu_read_lock();
bde89a9e 750 nc = rcu_dereference(connection->net_conf);
1f3e509b
PR
751 if (!nc) {
752 rcu_read_unlock();
753 return NULL;
754 }
755 connect_int = nc->connect_int;
756 rcu_read_unlock();
757
758 timeo = connect_int * HZ;
38b682b2
AM
759 /* 28.5% random jitter */
760 timeo += (prandom_u32() & 1) ? timeo / 7 : -timeo / 7;
1f3e509b 761
7a426fd8
PR
762 err = wait_for_completion_interruptible_timeout(&ad->door_bell, timeo);
763 if (err <= 0)
764 return NULL;
b411b363 765
7a426fd8 766 err = kernel_accept(ad->s_listen, &s_estab, 0);
b411b363
PR
767 if (err < 0) {
768 if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
1ec861eb 769 drbd_err(connection, "accept failed, err = %d\n", err);
bde89a9e 770 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
b411b363
PR
771 }
772 }
b411b363 773
715306f6
AG
774 if (s_estab)
775 unregister_state_change(s_estab->sk, ad);
b411b363 776
b411b363
PR
777 return s_estab;
778}
b411b363 779
bde89a9e 780static int decode_header(struct drbd_connection *, void *, struct packet_info *);
b411b363 781
bde89a9e 782static int send_first_packet(struct drbd_connection *connection, struct drbd_socket *sock,
9f5bdc33
AG
783 enum drbd_packet cmd)
784{
bde89a9e 785 if (!conn_prepare_command(connection, sock))
9f5bdc33 786 return -EIO;
bde89a9e 787 return conn_send_command(connection, sock, cmd, 0, NULL, 0);
b411b363
PR
788}
789
bde89a9e 790static int receive_first_packet(struct drbd_connection *connection, struct socket *sock)
b411b363 791{
bde89a9e 792 unsigned int header_size = drbd_header_size(connection);
9f5bdc33
AG
793 struct packet_info pi;
794 int err;
b411b363 795
bde89a9e 796 err = drbd_recv_short(sock, connection->data.rbuf, header_size, 0);
9f5bdc33
AG
797 if (err != header_size) {
798 if (err >= 0)
799 err = -EIO;
800 return err;
801 }
bde89a9e 802 err = decode_header(connection, connection->data.rbuf, &pi);
9f5bdc33
AG
803 if (err)
804 return err;
805 return pi.cmd;
b411b363
PR
806}
807
808/**
809 * drbd_socket_okay() - Free the socket if its connection is not okay
b411b363
PR
810 * @sock: pointer to the pointer to the socket.
811 */
dbd9eea0 812static int drbd_socket_okay(struct socket **sock)
b411b363
PR
813{
814 int rr;
815 char tb[4];
816
817 if (!*sock)
81e84650 818 return false;
b411b363 819
dbd9eea0 820 rr = drbd_recv_short(*sock, tb, 4, MSG_DONTWAIT | MSG_PEEK);
b411b363
PR
821
822 if (rr > 0 || rr == -EAGAIN) {
81e84650 823 return true;
b411b363
PR
824 } else {
825 sock_release(*sock);
826 *sock = NULL;
81e84650 827 return false;
b411b363
PR
828 }
829}
2325eb66
PR
830/* Gets called if a connection is established, or if a new minor gets created
831 in a connection */
69a22773 832int drbd_connected(struct drbd_peer_device *peer_device)
907599e0 833{
69a22773 834 struct drbd_device *device = peer_device->device;
0829f5ed 835 int err;
907599e0 836
b30ab791
AG
837 atomic_set(&device->packet_seq, 0);
838 device->peer_seq = 0;
907599e0 839
69a22773
AG
840 device->state_mutex = peer_device->connection->agreed_pro_version < 100 ?
841 &peer_device->connection->cstate_mutex :
b30ab791 842 &device->own_state_mutex;
8410da8f 843
69a22773 844 err = drbd_send_sync_param(peer_device);
0829f5ed 845 if (!err)
69a22773 846 err = drbd_send_sizes(peer_device, 0, 0);
0829f5ed 847 if (!err)
69a22773 848 err = drbd_send_uuids(peer_device);
0829f5ed 849 if (!err)
69a22773 850 err = drbd_send_current_state(peer_device);
b30ab791
AG
851 clear_bit(USE_DEGR_WFC_T, &device->flags);
852 clear_bit(RESIZE_PENDING, &device->flags);
853 atomic_set(&device->ap_in_flight, 0);
854 mod_timer(&device->request_timer, jiffies + HZ); /* just start it here. */
0829f5ed 855 return err;
907599e0 856}
b411b363
PR
857
858/*
859 * return values:
860 * 1 yes, we have a valid connection
861 * 0 oops, did not work out, please try again
862 * -1 peer talks different language,
863 * no point in trying again, please go standalone.
864 * -2 We do not have a network config...
865 */
bde89a9e 866static int conn_connect(struct drbd_connection *connection)
b411b363 867{
7da35862 868 struct drbd_socket sock, msock;
c06ece6b 869 struct drbd_peer_device *peer_device;
44ed167d 870 struct net_conf *nc;
92f14951 871 int vnr, timeout, h, ok;
08b165ba 872 bool discard_my_data;
197296ff 873 enum drbd_state_rv rv;
7a426fd8 874 struct accept_wait_data ad = {
bde89a9e 875 .connection = connection,
7a426fd8
PR
876 .door_bell = COMPLETION_INITIALIZER_ONSTACK(ad.door_bell),
877 };
b411b363 878
bde89a9e
AG
879 clear_bit(DISCONNECT_SENT, &connection->flags);
880 if (conn_request_state(connection, NS(conn, C_WF_CONNECTION), CS_VERBOSE) < SS_SUCCESS)
b411b363
PR
881 return -2;
882
7da35862 883 mutex_init(&sock.mutex);
bde89a9e
AG
884 sock.sbuf = connection->data.sbuf;
885 sock.rbuf = connection->data.rbuf;
7da35862
PR
886 sock.socket = NULL;
887 mutex_init(&msock.mutex);
bde89a9e
AG
888 msock.sbuf = connection->meta.sbuf;
889 msock.rbuf = connection->meta.rbuf;
7da35862
PR
890 msock.socket = NULL;
891
0916e0e3 892 /* Assume that the peer only understands protocol 80 until we know better. */
bde89a9e 893 connection->agreed_pro_version = 80;
b411b363 894
bde89a9e 895 if (prepare_listen_socket(connection, &ad))
7a426fd8 896 return 0;
b411b363
PR
897
898 do {
2bf89621 899 struct socket *s;
b411b363 900
bde89a9e 901 s = drbd_try_connect(connection);
b411b363 902 if (s) {
7da35862
PR
903 if (!sock.socket) {
904 sock.socket = s;
bde89a9e 905 send_first_packet(connection, &sock, P_INITIAL_DATA);
7da35862 906 } else if (!msock.socket) {
bde89a9e 907 clear_bit(RESOLVE_CONFLICTS, &connection->flags);
7da35862 908 msock.socket = s;
bde89a9e 909 send_first_packet(connection, &msock, P_INITIAL_META);
b411b363 910 } else {
1ec861eb 911 drbd_err(connection, "Logic error in conn_connect()\n");
b411b363
PR
912 goto out_release_sockets;
913 }
914 }
915
7da35862
PR
916 if (sock.socket && msock.socket) {
917 rcu_read_lock();
bde89a9e 918 nc = rcu_dereference(connection->net_conf);
7da35862
PR
919 timeout = nc->ping_timeo * HZ / 10;
920 rcu_read_unlock();
921 schedule_timeout_interruptible(timeout);
922 ok = drbd_socket_okay(&sock.socket);
923 ok = drbd_socket_okay(&msock.socket) && ok;
b411b363
PR
924 if (ok)
925 break;
926 }
927
928retry:
bde89a9e 929 s = drbd_wait_for_connect(connection, &ad);
b411b363 930 if (s) {
bde89a9e 931 int fp = receive_first_packet(connection, s);
7da35862
PR
932 drbd_socket_okay(&sock.socket);
933 drbd_socket_okay(&msock.socket);
92f14951 934 switch (fp) {
e5d6f33a 935 case P_INITIAL_DATA:
7da35862 936 if (sock.socket) {
1ec861eb 937 drbd_warn(connection, "initial packet S crossed\n");
7da35862 938 sock_release(sock.socket);
80c6eed4
PR
939 sock.socket = s;
940 goto randomize;
b411b363 941 }
7da35862 942 sock.socket = s;
b411b363 943 break;
e5d6f33a 944 case P_INITIAL_META:
bde89a9e 945 set_bit(RESOLVE_CONFLICTS, &connection->flags);
7da35862 946 if (msock.socket) {
1ec861eb 947 drbd_warn(connection, "initial packet M crossed\n");
7da35862 948 sock_release(msock.socket);
80c6eed4
PR
949 msock.socket = s;
950 goto randomize;
b411b363 951 }
7da35862 952 msock.socket = s;
b411b363
PR
953 break;
954 default:
1ec861eb 955 drbd_warn(connection, "Error receiving initial packet\n");
b411b363 956 sock_release(s);
80c6eed4 957randomize:
38b682b2 958 if (prandom_u32() & 1)
b411b363
PR
959 goto retry;
960 }
961 }
962
bde89a9e 963 if (connection->cstate <= C_DISCONNECTING)
b411b363
PR
964 goto out_release_sockets;
965 if (signal_pending(current)) {
966 flush_signals(current);
967 smp_rmb();
bde89a9e 968 if (get_t_state(&connection->receiver) == EXITING)
b411b363
PR
969 goto out_release_sockets;
970 }
971
b666dbf8
PR
972 ok = drbd_socket_okay(&sock.socket);
973 ok = drbd_socket_okay(&msock.socket) && ok;
974 } while (!ok);
b411b363 975
7a426fd8
PR
976 if (ad.s_listen)
977 sock_release(ad.s_listen);
b411b363 978
98683650
PR
979 sock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
980 msock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
b411b363 981
7da35862
PR
982 sock.socket->sk->sk_allocation = GFP_NOIO;
983 msock.socket->sk->sk_allocation = GFP_NOIO;
b411b363 984
7da35862
PR
985 sock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK;
986 msock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE;
b411b363 987
b411b363 988 /* NOT YET ...
bde89a9e 989 * sock.socket->sk->sk_sndtimeo = connection->net_conf->timeout*HZ/10;
7da35862 990 * sock.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
6038178e 991 * first set it to the P_CONNECTION_FEATURES timeout,
b411b363 992 * which we set to 4x the configured ping_timeout. */
44ed167d 993 rcu_read_lock();
bde89a9e 994 nc = rcu_dereference(connection->net_conf);
44ed167d 995
7da35862
PR
996 sock.socket->sk->sk_sndtimeo =
997 sock.socket->sk->sk_rcvtimeo = nc->ping_timeo*4*HZ/10;
b411b363 998
7da35862 999 msock.socket->sk->sk_rcvtimeo = nc->ping_int*HZ;
44ed167d 1000 timeout = nc->timeout * HZ / 10;
08b165ba 1001 discard_my_data = nc->discard_my_data;
44ed167d 1002 rcu_read_unlock();
b411b363 1003
7da35862 1004 msock.socket->sk->sk_sndtimeo = timeout;
b411b363
PR
1005
1006 /* we don't want delays.
25985edc 1007 * we use TCP_CORK where appropriate, though */
7da35862
PR
1008 drbd_tcp_nodelay(sock.socket);
1009 drbd_tcp_nodelay(msock.socket);
b411b363 1010
bde89a9e
AG
1011 connection->data.socket = sock.socket;
1012 connection->meta.socket = msock.socket;
1013 connection->last_received = jiffies;
b411b363 1014
bde89a9e 1015 h = drbd_do_features(connection);
b411b363
PR
1016 if (h <= 0)
1017 return h;
1018
bde89a9e 1019 if (connection->cram_hmac_tfm) {
b30ab791 1020 /* drbd_request_state(device, NS(conn, WFAuth)); */
bde89a9e 1021 switch (drbd_do_auth(connection)) {
b10d96cb 1022 case -1:
1ec861eb 1023 drbd_err(connection, "Authentication of peer failed\n");
b411b363 1024 return -1;
b10d96cb 1025 case 0:
1ec861eb 1026 drbd_err(connection, "Authentication of peer failed, trying again.\n");
b10d96cb 1027 return 0;
b411b363
PR
1028 }
1029 }
1030
bde89a9e
AG
1031 connection->data.socket->sk->sk_sndtimeo = timeout;
1032 connection->data.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
b411b363 1033
bde89a9e 1034 if (drbd_send_protocol(connection) == -EOPNOTSUPP)
7e2455c1 1035 return -1;
b411b363 1036
31007745
PR
1037 /* Prevent a race between resync-handshake and
1038 * being promoted to Primary.
1039 *
1040 * Grab and release the state mutex, so we know that any current
1041 * drbd_set_role() is finished, and any incoming drbd_set_role
1042 * will see the STATE_SENT flag, and wait for it to be cleared.
1043 */
1044 idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
1045 mutex_lock(peer_device->device->state_mutex);
1046
bde89a9e 1047 set_bit(STATE_SENT, &connection->flags);
a1096a6e 1048
31007745
PR
1049 idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
1050 mutex_unlock(peer_device->device->state_mutex);
1051
c141ebda 1052 rcu_read_lock();
c06ece6b
AG
1053 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1054 struct drbd_device *device = peer_device->device;
b30ab791 1055 kref_get(&device->kref);
26ea8f92
AG
1056 rcu_read_unlock();
1057
08b165ba 1058 if (discard_my_data)
b30ab791 1059 set_bit(DISCARD_MY_DATA, &device->flags);
08b165ba 1060 else
b30ab791 1061 clear_bit(DISCARD_MY_DATA, &device->flags);
08b165ba 1062
69a22773 1063 drbd_connected(peer_device);
05a10ec7 1064 kref_put(&device->kref, drbd_destroy_device);
c141ebda
PR
1065 rcu_read_lock();
1066 }
1067 rcu_read_unlock();
1068
bde89a9e
AG
1069 rv = conn_request_state(connection, NS(conn, C_WF_REPORT_PARAMS), CS_VERBOSE);
1070 if (rv < SS_SUCCESS || connection->cstate != C_WF_REPORT_PARAMS) {
1071 clear_bit(STATE_SENT, &connection->flags);
1e86ac48 1072 return 0;
a1096a6e 1073 }
1e86ac48 1074
bde89a9e 1075 drbd_thread_start(&connection->asender);
b411b363 1076
0500813f 1077 mutex_lock(&connection->resource->conf_update);
08b165ba
PR
1078 /* The discard_my_data flag is a single-shot modifier to the next
1079 * connection attempt, the handshake of which is now well underway.
1080 * No need for rcu style copying of the whole struct
1081 * just to clear a single value. */
bde89a9e 1082 connection->net_conf->discard_my_data = 0;
0500813f 1083 mutex_unlock(&connection->resource->conf_update);
08b165ba 1084
d3fcb490 1085 return h;
b411b363
PR
1086
1087out_release_sockets:
7a426fd8
PR
1088 if (ad.s_listen)
1089 sock_release(ad.s_listen);
7da35862
PR
1090 if (sock.socket)
1091 sock_release(sock.socket);
1092 if (msock.socket)
1093 sock_release(msock.socket);
b411b363
PR
1094 return -1;
1095}
1096
bde89a9e 1097static int decode_header(struct drbd_connection *connection, void *header, struct packet_info *pi)
b411b363 1098{
bde89a9e 1099 unsigned int header_size = drbd_header_size(connection);
e658983a 1100
0c8e36d9
AG
1101 if (header_size == sizeof(struct p_header100) &&
1102 *(__be32 *)header == cpu_to_be32(DRBD_MAGIC_100)) {
1103 struct p_header100 *h = header;
1104 if (h->pad != 0) {
1ec861eb 1105 drbd_err(connection, "Header padding is not zero\n");
0c8e36d9
AG
1106 return -EINVAL;
1107 }
1108 pi->vnr = be16_to_cpu(h->volume);
1109 pi->cmd = be16_to_cpu(h->command);
1110 pi->size = be32_to_cpu(h->length);
1111 } else if (header_size == sizeof(struct p_header95) &&
1112 *(__be16 *)header == cpu_to_be16(DRBD_MAGIC_BIG)) {
e658983a 1113 struct p_header95 *h = header;
e658983a 1114 pi->cmd = be16_to_cpu(h->command);
b55d84ba
AG
1115 pi->size = be32_to_cpu(h->length);
1116 pi->vnr = 0;
e658983a
AG
1117 } else if (header_size == sizeof(struct p_header80) &&
1118 *(__be32 *)header == cpu_to_be32(DRBD_MAGIC)) {
1119 struct p_header80 *h = header;
1120 pi->cmd = be16_to_cpu(h->command);
1121 pi->size = be16_to_cpu(h->length);
77351055 1122 pi->vnr = 0;
02918be2 1123 } else {
1ec861eb 1124 drbd_err(connection, "Wrong magic value 0x%08x in protocol version %d\n",
e658983a 1125 be32_to_cpu(*(__be32 *)header),
bde89a9e 1126 connection->agreed_pro_version);
8172f3e9 1127 return -EINVAL;
b411b363 1128 }
e658983a 1129 pi->data = header + header_size;
8172f3e9 1130 return 0;
257d0af6 1131}
b411b363 1132
bde89a9e 1133static int drbd_recv_header(struct drbd_connection *connection, struct packet_info *pi)
257d0af6 1134{
bde89a9e 1135 void *buffer = connection->data.rbuf;
69bc7bc3 1136 int err;
257d0af6 1137
bde89a9e 1138 err = drbd_recv_all_warn(connection, buffer, drbd_header_size(connection));
a5c31904 1139 if (err)
69bc7bc3 1140 return err;
257d0af6 1141
bde89a9e
AG
1142 err = decode_header(connection, buffer, pi);
1143 connection->last_received = jiffies;
b411b363 1144
69bc7bc3 1145 return err;
b411b363
PR
1146}
1147
bde89a9e 1148static void drbd_flush(struct drbd_connection *connection)
b411b363
PR
1149{
1150 int rv;
c06ece6b 1151 struct drbd_peer_device *peer_device;
4b0007c0
PR
1152 int vnr;
1153
e9526580 1154 if (connection->resource->write_ordering >= WO_bdev_flush) {
615e087f 1155 rcu_read_lock();
c06ece6b
AG
1156 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1157 struct drbd_device *device = peer_device->device;
1158
b30ab791 1159 if (!get_ldev(device))
615e087f 1160 continue;
b30ab791 1161 kref_get(&device->kref);
615e087f
LE
1162 rcu_read_unlock();
1163
b30ab791 1164 rv = blkdev_issue_flush(device->ldev->backing_bdev,
615e087f
LE
1165 GFP_NOIO, NULL);
1166 if (rv) {
d0180171 1167 drbd_info(device, "local disk flush failed with status %d\n", rv);
615e087f
LE
1168 /* would rather check on EOPNOTSUPP, but that is not reliable.
1169 * don't try again for ANY return value != 0
1170 * if (rv == -EOPNOTSUPP) */
8fe39aac 1171 drbd_bump_write_ordering(connection->resource, NULL, WO_drain_io);
4b0007c0 1172 }
b30ab791 1173 put_ldev(device);
05a10ec7 1174 kref_put(&device->kref, drbd_destroy_device);
b411b363 1175
615e087f
LE
1176 rcu_read_lock();
1177 if (rv)
1178 break;
b411b363 1179 }
615e087f 1180 rcu_read_unlock();
b411b363 1181 }
b411b363
PR
1182}
1183
1184/**
1185 * drbd_may_finish_epoch() - Applies an epoch_event to the epoch's state, eventually finishes it.
b30ab791 1186 * @device: DRBD device.
b411b363
PR
1187 * @epoch: Epoch object.
1188 * @ev: Epoch event.
1189 */
bde89a9e 1190static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *connection,
b411b363
PR
1191 struct drbd_epoch *epoch,
1192 enum epoch_event ev)
1193{
2451fc3b 1194 int epoch_size;
b411b363 1195 struct drbd_epoch *next_epoch;
b411b363
PR
1196 enum finish_epoch rv = FE_STILL_LIVE;
1197
bde89a9e 1198 spin_lock(&connection->epoch_lock);
b411b363
PR
1199 do {
1200 next_epoch = NULL;
b411b363
PR
1201
1202 epoch_size = atomic_read(&epoch->epoch_size);
1203
1204 switch (ev & ~EV_CLEANUP) {
1205 case EV_PUT:
1206 atomic_dec(&epoch->active);
1207 break;
1208 case EV_GOT_BARRIER_NR:
1209 set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags);
b411b363
PR
1210 break;
1211 case EV_BECAME_LAST:
1212 /* nothing to do*/
1213 break;
1214 }
1215
b411b363
PR
1216 if (epoch_size != 0 &&
1217 atomic_read(&epoch->active) == 0 &&
80f9fd55 1218 (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) || ev & EV_CLEANUP)) {
b411b363 1219 if (!(ev & EV_CLEANUP)) {
bde89a9e
AG
1220 spin_unlock(&connection->epoch_lock);
1221 drbd_send_b_ack(epoch->connection, epoch->barrier_nr, epoch_size);
1222 spin_lock(&connection->epoch_lock);
b411b363 1223 }
9ed57dcb
LE
1224#if 0
1225 /* FIXME: dec unacked on connection, once we have
1226 * something to count pending connection packets in. */
80f9fd55 1227 if (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags))
bde89a9e 1228 dec_unacked(epoch->connection);
9ed57dcb 1229#endif
b411b363 1230
bde89a9e 1231 if (connection->current_epoch != epoch) {
b411b363
PR
1232 next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list);
1233 list_del(&epoch->list);
1234 ev = EV_BECAME_LAST | (ev & EV_CLEANUP);
bde89a9e 1235 connection->epochs--;
b411b363
PR
1236 kfree(epoch);
1237
1238 if (rv == FE_STILL_LIVE)
1239 rv = FE_DESTROYED;
1240 } else {
1241 epoch->flags = 0;
1242 atomic_set(&epoch->epoch_size, 0);
698f9315 1243 /* atomic_set(&epoch->active, 0); is already zero */
b411b363
PR
1244 if (rv == FE_STILL_LIVE)
1245 rv = FE_RECYCLED;
1246 }
1247 }
1248
1249 if (!next_epoch)
1250 break;
1251
1252 epoch = next_epoch;
1253 } while (1);
1254
bde89a9e 1255 spin_unlock(&connection->epoch_lock);
b411b363 1256
b411b363
PR
1257 return rv;
1258}
1259
8fe39aac
PR
1260static enum write_ordering_e
1261max_allowed_wo(struct drbd_backing_dev *bdev, enum write_ordering_e wo)
1262{
1263 struct disk_conf *dc;
1264
1265 dc = rcu_dereference(bdev->disk_conf);
1266
1267 if (wo == WO_bdev_flush && !dc->disk_flushes)
1268 wo = WO_drain_io;
1269 if (wo == WO_drain_io && !dc->disk_drain)
1270 wo = WO_none;
1271
1272 return wo;
1273}
1274
b411b363
PR
1275/**
1276 * drbd_bump_write_ordering() - Fall back to an other write ordering method
bde89a9e 1277 * @connection: DRBD connection.
b411b363
PR
1278 * @wo: Write ordering method to try.
1279 */
8fe39aac
PR
1280void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backing_dev *bdev,
1281 enum write_ordering_e wo)
b411b363 1282{
e9526580 1283 struct drbd_device *device;
b411b363 1284 enum write_ordering_e pwo;
4b0007c0 1285 int vnr;
b411b363
PR
1286 static char *write_ordering_str[] = {
1287 [WO_none] = "none",
1288 [WO_drain_io] = "drain",
1289 [WO_bdev_flush] = "flush",
b411b363
PR
1290 };
1291
e9526580 1292 pwo = resource->write_ordering;
70df7092
LE
1293 if (wo != WO_bdev_flush)
1294 wo = min(pwo, wo);
daeda1cc 1295 rcu_read_lock();
e9526580 1296 idr_for_each_entry(&resource->devices, device, vnr) {
8fe39aac
PR
1297 if (get_ldev(device)) {
1298 wo = max_allowed_wo(device->ldev, wo);
1299 if (device->ldev == bdev)
1300 bdev = NULL;
1301 put_ldev(device);
1302 }
4b0007c0 1303 }
8fe39aac
PR
1304
1305 if (bdev)
1306 wo = max_allowed_wo(bdev, wo);
1307
70df7092
LE
1308 rcu_read_unlock();
1309
e9526580
PR
1310 resource->write_ordering = wo;
1311 if (pwo != resource->write_ordering || wo == WO_bdev_flush)
1312 drbd_info(resource, "Method to ensure write ordering: %s\n", write_ordering_str[resource->write_ordering]);
b411b363
PR
1313}
1314
45bb912b 1315/**
fbe29dec 1316 * drbd_submit_peer_request()
b30ab791 1317 * @device: DRBD device.
db830c46 1318 * @peer_req: peer request
45bb912b 1319 * @rw: flag field, see bio->bi_rw
10f6d992
LE
1320 *
1321 * May spread the pages to multiple bios,
1322 * depending on bio_add_page restrictions.
1323 *
1324 * Returns 0 if all bios have been submitted,
1325 * -ENOMEM if we could not allocate enough bios,
1326 * -ENOSPC (any better suggestion?) if we have not been able to bio_add_page a
1327 * single page to an empty bio (which should never happen and likely indicates
1328 * that the lower level IO stack is in some way broken). This has been observed
1329 * on certain Xen deployments.
45bb912b
LE
1330 */
1331/* TODO allocate from our own bio_set. */
b30ab791 1332int drbd_submit_peer_request(struct drbd_device *device,
fbe29dec
AG
1333 struct drbd_peer_request *peer_req,
1334 const unsigned rw, const int fault_type)
45bb912b
LE
1335{
1336 struct bio *bios = NULL;
1337 struct bio *bio;
db830c46
AG
1338 struct page *page = peer_req->pages;
1339 sector_t sector = peer_req->i.sector;
1340 unsigned ds = peer_req->i.size;
45bb912b
LE
1341 unsigned n_bios = 0;
1342 unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT;
10f6d992 1343 int err = -ENOMEM;
45bb912b 1344
a0fb3c47
LE
1345 if (peer_req->flags & EE_IS_TRIM_USE_ZEROOUT) {
1346 /* wait for all pending IO completions, before we start
1347 * zeroing things out. */
1348 conn_wait_active_ee_empty(first_peer_device(device)->connection);
1349 if (blkdev_issue_zeroout(device->ldev->backing_bdev,
1350 sector, ds >> 9, GFP_NOIO))
1351 peer_req->flags |= EE_WAS_ERROR;
1352 drbd_endio_write_sec_final(peer_req);
1353 return 0;
1354 }
1355
54ed4ed8
LE
1356 /* Discards don't have any payload.
1357 * But the scsi layer still expects a bio_vec it can use internally,
1358 * see sd_setup_discard_cmnd() and blk_add_request_payload(). */
a0fb3c47 1359 if (peer_req->flags & EE_IS_TRIM)
54ed4ed8 1360 nr_pages = 1;
a0fb3c47 1361
45bb912b
LE
1362 /* In most cases, we will only need one bio. But in case the lower
1363 * level restrictions happen to be different at this offset on this
1364 * side than those of the sending peer, we may need to submit the
9476f39d
LE
1365 * request in more than one bio.
1366 *
1367 * Plain bio_alloc is good enough here, this is no DRBD internally
1368 * generated bio, but a bio allocated on behalf of the peer.
1369 */
45bb912b
LE
1370next_bio:
1371 bio = bio_alloc(GFP_NOIO, nr_pages);
1372 if (!bio) {
a0fb3c47 1373 drbd_err(device, "submit_ee: Allocation of a bio failed (nr_pages=%u)\n", nr_pages);
45bb912b
LE
1374 goto fail;
1375 }
db830c46 1376 /* > peer_req->i.sector, unless this is the first bio */
4f024f37 1377 bio->bi_iter.bi_sector = sector;
b30ab791 1378 bio->bi_bdev = device->ldev->backing_bdev;
45bb912b 1379 bio->bi_rw = rw;
db830c46 1380 bio->bi_private = peer_req;
fcefa62e 1381 bio->bi_end_io = drbd_peer_request_endio;
45bb912b
LE
1382
1383 bio->bi_next = bios;
1384 bios = bio;
1385 ++n_bios;
1386
a0fb3c47
LE
1387 if (rw & REQ_DISCARD) {
1388 bio->bi_iter.bi_size = ds;
1389 goto submit;
1390 }
1391
45bb912b
LE
1392 page_chain_for_each(page) {
1393 unsigned len = min_t(unsigned, ds, PAGE_SIZE);
1394 if (!bio_add_page(bio, page, len, 0)) {
10f6d992
LE
1395 /* A single page must always be possible!
1396 * But in case it fails anyways,
1397 * we deal with it, and complain (below). */
1398 if (bio->bi_vcnt == 0) {
d0180171 1399 drbd_err(device,
10f6d992
LE
1400 "bio_add_page failed for len=%u, "
1401 "bi_vcnt=0 (bi_sector=%llu)\n",
4f024f37 1402 len, (uint64_t)bio->bi_iter.bi_sector);
10f6d992
LE
1403 err = -ENOSPC;
1404 goto fail;
1405 }
45bb912b
LE
1406 goto next_bio;
1407 }
1408 ds -= len;
1409 sector += len >> 9;
1410 --nr_pages;
1411 }
0b0ba1ef 1412 D_ASSERT(device, ds == 0);
a0fb3c47
LE
1413submit:
1414 D_ASSERT(device, page == NULL);
45bb912b 1415
db830c46 1416 atomic_set(&peer_req->pending_bios, n_bios);
45bb912b
LE
1417 do {
1418 bio = bios;
1419 bios = bios->bi_next;
1420 bio->bi_next = NULL;
1421
b30ab791 1422 drbd_generic_make_request(device, fault_type, bio);
45bb912b 1423 } while (bios);
45bb912b
LE
1424 return 0;
1425
1426fail:
1427 while (bios) {
1428 bio = bios;
1429 bios = bios->bi_next;
1430 bio_put(bio);
1431 }
10f6d992 1432 return err;
45bb912b
LE
1433}
1434
b30ab791 1435static void drbd_remove_epoch_entry_interval(struct drbd_device *device,
db830c46 1436 struct drbd_peer_request *peer_req)
53840641 1437{
db830c46 1438 struct drbd_interval *i = &peer_req->i;
53840641 1439
b30ab791 1440 drbd_remove_interval(&device->write_requests, i);
53840641
AG
1441 drbd_clear_interval(i);
1442
6c852bec 1443 /* Wake up any processes waiting for this peer request to complete. */
53840641 1444 if (i->waiting)
b30ab791 1445 wake_up(&device->misc_wait);
53840641
AG
1446}
1447
bde89a9e 1448static void conn_wait_active_ee_empty(struct drbd_connection *connection)
77fede51 1449{
c06ece6b 1450 struct drbd_peer_device *peer_device;
77fede51
PR
1451 int vnr;
1452
1453 rcu_read_lock();
c06ece6b
AG
1454 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1455 struct drbd_device *device = peer_device->device;
1456
b30ab791 1457 kref_get(&device->kref);
77fede51 1458 rcu_read_unlock();
b30ab791 1459 drbd_wait_ee_list_empty(device, &device->active_ee);
05a10ec7 1460 kref_put(&device->kref, drbd_destroy_device);
77fede51
PR
1461 rcu_read_lock();
1462 }
1463 rcu_read_unlock();
1464}
1465
9f4fe9ad
AG
1466static struct drbd_peer_device *
1467conn_peer_device(struct drbd_connection *connection, int volume_number)
1468{
1469 return idr_find(&connection->peer_devices, volume_number);
1470}
1471
bde89a9e 1472static int receive_Barrier(struct drbd_connection *connection, struct packet_info *pi)
b411b363 1473{
2451fc3b 1474 int rv;
e658983a 1475 struct p_barrier *p = pi->data;
b411b363
PR
1476 struct drbd_epoch *epoch;
1477
9ed57dcb
LE
1478 /* FIXME these are unacked on connection,
1479 * not a specific (peer)device.
1480 */
bde89a9e
AG
1481 connection->current_epoch->barrier_nr = p->barrier;
1482 connection->current_epoch->connection = connection;
1483 rv = drbd_may_finish_epoch(connection, connection->current_epoch, EV_GOT_BARRIER_NR);
b411b363
PR
1484
1485 /* P_BARRIER_ACK may imply that the corresponding extent is dropped from
1486 * the activity log, which means it would not be resynced in case the
1487 * R_PRIMARY crashes now.
1488 * Therefore we must send the barrier_ack after the barrier request was
1489 * completed. */
e9526580 1490 switch (connection->resource->write_ordering) {
b411b363
PR
1491 case WO_none:
1492 if (rv == FE_RECYCLED)
82bc0194 1493 return 0;
2451fc3b
PR
1494
1495 /* receiver context, in the writeout path of the other node.
1496 * avoid potential distributed deadlock */
1497 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1498 if (epoch)
1499 break;
1500 else
1ec861eb 1501 drbd_warn(connection, "Allocation of an epoch failed, slowing down\n");
2451fc3b 1502 /* Fall through */
b411b363
PR
1503
1504 case WO_bdev_flush:
1505 case WO_drain_io:
bde89a9e
AG
1506 conn_wait_active_ee_empty(connection);
1507 drbd_flush(connection);
2451fc3b 1508
bde89a9e 1509 if (atomic_read(&connection->current_epoch->epoch_size)) {
2451fc3b
PR
1510 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1511 if (epoch)
1512 break;
b411b363
PR
1513 }
1514
82bc0194 1515 return 0;
2451fc3b 1516 default:
e9526580
PR
1517 drbd_err(connection, "Strangeness in connection->write_ordering %d\n",
1518 connection->resource->write_ordering);
82bc0194 1519 return -EIO;
b411b363
PR
1520 }
1521
1522 epoch->flags = 0;
1523 atomic_set(&epoch->epoch_size, 0);
1524 atomic_set(&epoch->active, 0);
1525
bde89a9e
AG
1526 spin_lock(&connection->epoch_lock);
1527 if (atomic_read(&connection->current_epoch->epoch_size)) {
1528 list_add(&epoch->list, &connection->current_epoch->list);
1529 connection->current_epoch = epoch;
1530 connection->epochs++;
b411b363
PR
1531 } else {
1532 /* The current_epoch got recycled while we allocated this one... */
1533 kfree(epoch);
1534 }
bde89a9e 1535 spin_unlock(&connection->epoch_lock);
b411b363 1536
82bc0194 1537 return 0;
b411b363
PR
1538}
1539
1540/* used from receive_RSDataReply (recv_resync_read)
1541 * and from receive_Data */
f6ffca9f 1542static struct drbd_peer_request *
69a22773 1543read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
a0fb3c47 1544 struct packet_info *pi) __must_hold(local)
b411b363 1545{
69a22773 1546 struct drbd_device *device = peer_device->device;
b30ab791 1547 const sector_t capacity = drbd_get_capacity(device->this_bdev);
db830c46 1548 struct drbd_peer_request *peer_req;
b411b363 1549 struct page *page;
a5c31904 1550 int dgs, ds, err;
a0fb3c47 1551 int data_size = pi->size;
69a22773
AG
1552 void *dig_in = peer_device->connection->int_dig_in;
1553 void *dig_vv = peer_device->connection->int_dig_vv;
6b4388ac 1554 unsigned long *data;
a0fb3c47 1555 struct p_trim *trim = (pi->cmd == P_TRIM) ? pi->data : NULL;
b411b363 1556
88104ca4 1557 dgs = 0;
a0fb3c47 1558 if (!trim && peer_device->connection->peer_integrity_tfm) {
69a22773 1559 dgs = crypto_hash_digestsize(peer_device->connection->peer_integrity_tfm);
9f5bdc33
AG
1560 /*
1561 * FIXME: Receive the incoming digest into the receive buffer
1562 * here, together with its struct p_data?
1563 */
69a22773 1564 err = drbd_recv_all_warn(peer_device->connection, dig_in, dgs);
a5c31904 1565 if (err)
b411b363 1566 return NULL;
88104ca4 1567 data_size -= dgs;
b411b363
PR
1568 }
1569
a0fb3c47
LE
1570 if (trim) {
1571 D_ASSERT(peer_device, data_size == 0);
1572 data_size = be32_to_cpu(trim->size);
1573 }
1574
841ce241
AG
1575 if (!expect(IS_ALIGNED(data_size, 512)))
1576 return NULL;
a0fb3c47
LE
1577 /* prepare for larger trim requests. */
1578 if (!trim && !expect(data_size <= DRBD_MAX_BIO_SIZE))
841ce241 1579 return NULL;
b411b363 1580
6666032a
LE
1581 /* even though we trust out peer,
1582 * we sometimes have to double check. */
1583 if (sector + (data_size>>9) > capacity) {
d0180171 1584 drbd_err(device, "request from peer beyond end of local disk: "
fdda6544 1585 "capacity: %llus < sector: %llus + size: %u\n",
6666032a
LE
1586 (unsigned long long)capacity,
1587 (unsigned long long)sector, data_size);
1588 return NULL;
1589 }
1590
b411b363
PR
1591 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
1592 * "criss-cross" setup, that might cause write-out on some other DRBD,
1593 * which in turn might block on the other node at this very place. */
a0fb3c47 1594 peer_req = drbd_alloc_peer_req(peer_device, id, sector, data_size, trim == NULL, GFP_NOIO);
db830c46 1595 if (!peer_req)
b411b363 1596 return NULL;
45bb912b 1597
a0fb3c47 1598 if (trim)
81a3537a 1599 return peer_req;
a73ff323 1600
b411b363 1601 ds = data_size;
db830c46 1602 page = peer_req->pages;
45bb912b
LE
1603 page_chain_for_each(page) {
1604 unsigned len = min_t(int, ds, PAGE_SIZE);
6b4388ac 1605 data = kmap(page);
69a22773 1606 err = drbd_recv_all_warn(peer_device->connection, data, len);
b30ab791 1607 if (drbd_insert_fault(device, DRBD_FAULT_RECEIVE)) {
d0180171 1608 drbd_err(device, "Fault injection: Corrupting data on receive\n");
6b4388ac
PR
1609 data[0] = data[0] ^ (unsigned long)-1;
1610 }
b411b363 1611 kunmap(page);
a5c31904 1612 if (err) {
b30ab791 1613 drbd_free_peer_req(device, peer_req);
b411b363
PR
1614 return NULL;
1615 }
a5c31904 1616 ds -= len;
b411b363
PR
1617 }
1618
1619 if (dgs) {
69a22773 1620 drbd_csum_ee(peer_device->connection->peer_integrity_tfm, peer_req, dig_vv);
b411b363 1621 if (memcmp(dig_in, dig_vv, dgs)) {
d0180171 1622 drbd_err(device, "Digest integrity check FAILED: %llus +%u\n",
470be44a 1623 (unsigned long long)sector, data_size);
b30ab791 1624 drbd_free_peer_req(device, peer_req);
b411b363
PR
1625 return NULL;
1626 }
1627 }
b30ab791 1628 device->recv_cnt += data_size>>9;
db830c46 1629 return peer_req;
b411b363
PR
1630}
1631
1632/* drbd_drain_block() just takes a data block
1633 * out of the socket input buffer, and discards it.
1634 */
69a22773 1635static int drbd_drain_block(struct drbd_peer_device *peer_device, int data_size)
b411b363
PR
1636{
1637 struct page *page;
a5c31904 1638 int err = 0;
b411b363
PR
1639 void *data;
1640
c3470cde 1641 if (!data_size)
fc5be839 1642 return 0;
c3470cde 1643
69a22773 1644 page = drbd_alloc_pages(peer_device, 1, 1);
b411b363
PR
1645
1646 data = kmap(page);
1647 while (data_size) {
fc5be839
AG
1648 unsigned int len = min_t(int, data_size, PAGE_SIZE);
1649
69a22773 1650 err = drbd_recv_all_warn(peer_device->connection, data, len);
a5c31904 1651 if (err)
b411b363 1652 break;
a5c31904 1653 data_size -= len;
b411b363
PR
1654 }
1655 kunmap(page);
69a22773 1656 drbd_free_pages(peer_device->device, page, 0);
fc5be839 1657 return err;
b411b363
PR
1658}
1659
69a22773 1660static int recv_dless_read(struct drbd_peer_device *peer_device, struct drbd_request *req,
b411b363
PR
1661 sector_t sector, int data_size)
1662{
7988613b
KO
1663 struct bio_vec bvec;
1664 struct bvec_iter iter;
b411b363 1665 struct bio *bio;
7988613b 1666 int dgs, err, expect;
69a22773
AG
1667 void *dig_in = peer_device->connection->int_dig_in;
1668 void *dig_vv = peer_device->connection->int_dig_vv;
b411b363 1669
88104ca4 1670 dgs = 0;
69a22773
AG
1671 if (peer_device->connection->peer_integrity_tfm) {
1672 dgs = crypto_hash_digestsize(peer_device->connection->peer_integrity_tfm);
1673 err = drbd_recv_all_warn(peer_device->connection, dig_in, dgs);
a5c31904
AG
1674 if (err)
1675 return err;
88104ca4 1676 data_size -= dgs;
b411b363
PR
1677 }
1678
b411b363
PR
1679 /* optimistically update recv_cnt. if receiving fails below,
1680 * we disconnect anyways, and counters will be reset. */
69a22773 1681 peer_device->device->recv_cnt += data_size>>9;
b411b363
PR
1682
1683 bio = req->master_bio;
69a22773 1684 D_ASSERT(peer_device->device, sector == bio->bi_iter.bi_sector);
b411b363 1685
7988613b
KO
1686 bio_for_each_segment(bvec, bio, iter) {
1687 void *mapped = kmap(bvec.bv_page) + bvec.bv_offset;
1688 expect = min_t(int, data_size, bvec.bv_len);
69a22773 1689 err = drbd_recv_all_warn(peer_device->connection, mapped, expect);
7988613b 1690 kunmap(bvec.bv_page);
a5c31904
AG
1691 if (err)
1692 return err;
1693 data_size -= expect;
b411b363
PR
1694 }
1695
1696 if (dgs) {
69a22773 1697 drbd_csum_bio(peer_device->connection->peer_integrity_tfm, bio, dig_vv);
b411b363 1698 if (memcmp(dig_in, dig_vv, dgs)) {
69a22773 1699 drbd_err(peer_device, "Digest integrity check FAILED. Broken NICs?\n");
28284cef 1700 return -EINVAL;
b411b363
PR
1701 }
1702 }
1703
69a22773 1704 D_ASSERT(peer_device->device, data_size == 0);
28284cef 1705 return 0;
b411b363
PR
1706}
1707
a990be46
AG
1708/*
1709 * e_end_resync_block() is called in asender context via
1710 * drbd_finish_peer_reqs().
1711 */
99920dc5 1712static int e_end_resync_block(struct drbd_work *w, int unused)
b411b363 1713{
8050e6d0 1714 struct drbd_peer_request *peer_req =
a8cd15ba
AG
1715 container_of(w, struct drbd_peer_request, w);
1716 struct drbd_peer_device *peer_device = peer_req->peer_device;
1717 struct drbd_device *device = peer_device->device;
db830c46 1718 sector_t sector = peer_req->i.sector;
99920dc5 1719 int err;
b411b363 1720
0b0ba1ef 1721 D_ASSERT(device, drbd_interval_empty(&peer_req->i));
b411b363 1722
db830c46 1723 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
b30ab791 1724 drbd_set_in_sync(device, sector, peer_req->i.size);
a8cd15ba 1725 err = drbd_send_ack(peer_device, P_RS_WRITE_ACK, peer_req);
b411b363
PR
1726 } else {
1727 /* Record failure to sync */
b30ab791 1728 drbd_rs_failed_io(device, sector, peer_req->i.size);
b411b363 1729
a8cd15ba 1730 err = drbd_send_ack(peer_device, P_NEG_ACK, peer_req);
b411b363 1731 }
b30ab791 1732 dec_unacked(device);
b411b363 1733
99920dc5 1734 return err;
b411b363
PR
1735}
1736
69a22773 1737static int recv_resync_read(struct drbd_peer_device *peer_device, sector_t sector,
a0fb3c47 1738 struct packet_info *pi) __releases(local)
b411b363 1739{
69a22773 1740 struct drbd_device *device = peer_device->device;
db830c46 1741 struct drbd_peer_request *peer_req;
b411b363 1742
a0fb3c47 1743 peer_req = read_in_block(peer_device, ID_SYNCER, sector, pi);
db830c46 1744 if (!peer_req)
45bb912b 1745 goto fail;
b411b363 1746
b30ab791 1747 dec_rs_pending(device);
b411b363 1748
b30ab791 1749 inc_unacked(device);
b411b363
PR
1750 /* corresponding dec_unacked() in e_end_resync_block()
1751 * respective _drbd_clear_done_ee */
1752
a8cd15ba 1753 peer_req->w.cb = e_end_resync_block;
45bb912b 1754
0500813f 1755 spin_lock_irq(&device->resource->req_lock);
a8cd15ba 1756 list_add(&peer_req->w.list, &device->sync_ee);
0500813f 1757 spin_unlock_irq(&device->resource->req_lock);
b411b363 1758
a0fb3c47 1759 atomic_add(pi->size >> 9, &device->rs_sect_ev);
b30ab791 1760 if (drbd_submit_peer_request(device, peer_req, WRITE, DRBD_FAULT_RS_WR) == 0)
e1c1b0fc 1761 return 0;
b411b363 1762
10f6d992 1763 /* don't care for the reason here */
d0180171 1764 drbd_err(device, "submit failed, triggering re-connect\n");
0500813f 1765 spin_lock_irq(&device->resource->req_lock);
a8cd15ba 1766 list_del(&peer_req->w.list);
0500813f 1767 spin_unlock_irq(&device->resource->req_lock);
22cc37a9 1768
b30ab791 1769 drbd_free_peer_req(device, peer_req);
45bb912b 1770fail:
b30ab791 1771 put_ldev(device);
e1c1b0fc 1772 return -EIO;
b411b363
PR
1773}
1774
668eebc6 1775static struct drbd_request *
b30ab791 1776find_request(struct drbd_device *device, struct rb_root *root, u64 id,
bc9c5c41 1777 sector_t sector, bool missing_ok, const char *func)
51624585 1778{
51624585
AG
1779 struct drbd_request *req;
1780
bc9c5c41
AG
1781 /* Request object according to our peer */
1782 req = (struct drbd_request *)(unsigned long)id;
5e472264 1783 if (drbd_contains_interval(root, sector, &req->i) && req->i.local)
668eebc6 1784 return req;
c3afd8f5 1785 if (!missing_ok) {
d0180171 1786 drbd_err(device, "%s: failed to find request 0x%lx, sector %llus\n", func,
c3afd8f5
AG
1787 (unsigned long)id, (unsigned long long)sector);
1788 }
51624585 1789 return NULL;
b411b363
PR
1790}
1791
bde89a9e 1792static int receive_DataReply(struct drbd_connection *connection, struct packet_info *pi)
b411b363 1793{
9f4fe9ad 1794 struct drbd_peer_device *peer_device;
b30ab791 1795 struct drbd_device *device;
b411b363
PR
1796 struct drbd_request *req;
1797 sector_t sector;
82bc0194 1798 int err;
e658983a 1799 struct p_data *p = pi->data;
4a76b161 1800
9f4fe9ad
AG
1801 peer_device = conn_peer_device(connection, pi->vnr);
1802 if (!peer_device)
4a76b161 1803 return -EIO;
9f4fe9ad 1804 device = peer_device->device;
b411b363
PR
1805
1806 sector = be64_to_cpu(p->sector);
1807
0500813f 1808 spin_lock_irq(&device->resource->req_lock);
b30ab791 1809 req = find_request(device, &device->read_requests, p->block_id, sector, false, __func__);
0500813f 1810 spin_unlock_irq(&device->resource->req_lock);
c3afd8f5 1811 if (unlikely(!req))
82bc0194 1812 return -EIO;
b411b363 1813
24c4830c 1814 /* hlist_del(&req->collision) is done in _req_may_be_done, to avoid
b411b363
PR
1815 * special casing it there for the various failure cases.
1816 * still no race with drbd_fail_pending_reads */
69a22773 1817 err = recv_dless_read(peer_device, req, sector, pi->size);
82bc0194 1818 if (!err)
8554df1c 1819 req_mod(req, DATA_RECEIVED);
b411b363
PR
1820 /* else: nothing. handled from drbd_disconnect...
1821 * I don't think we may complete this just yet
1822 * in case we are "on-disconnect: freeze" */
1823
82bc0194 1824 return err;
b411b363
PR
1825}
1826
bde89a9e 1827static int receive_RSDataReply(struct drbd_connection *connection, struct packet_info *pi)
b411b363 1828{
9f4fe9ad 1829 struct drbd_peer_device *peer_device;
b30ab791 1830 struct drbd_device *device;
b411b363 1831 sector_t sector;
82bc0194 1832 int err;
e658983a 1833 struct p_data *p = pi->data;
4a76b161 1834
9f4fe9ad
AG
1835 peer_device = conn_peer_device(connection, pi->vnr);
1836 if (!peer_device)
4a76b161 1837 return -EIO;
9f4fe9ad 1838 device = peer_device->device;
b411b363
PR
1839
1840 sector = be64_to_cpu(p->sector);
0b0ba1ef 1841 D_ASSERT(device, p->block_id == ID_SYNCER);
b411b363 1842
b30ab791 1843 if (get_ldev(device)) {
b411b363
PR
1844 /* data is submitted to disk within recv_resync_read.
1845 * corresponding put_ldev done below on error,
fcefa62e 1846 * or in drbd_peer_request_endio. */
a0fb3c47 1847 err = recv_resync_read(peer_device, sector, pi);
b411b363
PR
1848 } else {
1849 if (__ratelimit(&drbd_ratelimit_state))
d0180171 1850 drbd_err(device, "Can not write resync data to local disk.\n");
b411b363 1851
69a22773 1852 err = drbd_drain_block(peer_device, pi->size);
b411b363 1853
69a22773 1854 drbd_send_ack_dp(peer_device, P_NEG_ACK, p, pi->size);
b411b363
PR
1855 }
1856
b30ab791 1857 atomic_add(pi->size >> 9, &device->rs_sect_in);
778f271d 1858
82bc0194 1859 return err;
b411b363
PR
1860}
1861
b30ab791 1862static void restart_conflicting_writes(struct drbd_device *device,
7be8da07 1863 sector_t sector, int size)
b411b363 1864{
7be8da07
AG
1865 struct drbd_interval *i;
1866 struct drbd_request *req;
1867
b30ab791 1868 drbd_for_each_overlap(i, &device->write_requests, sector, size) {
7be8da07
AG
1869 if (!i->local)
1870 continue;
1871 req = container_of(i, struct drbd_request, i);
1872 if (req->rq_state & RQ_LOCAL_PENDING ||
1873 !(req->rq_state & RQ_POSTPONED))
1874 continue;
2312f0b3
LE
1875 /* as it is RQ_POSTPONED, this will cause it to
1876 * be queued on the retry workqueue. */
d4dabbe2 1877 __req_mod(req, CONFLICT_RESOLVED, NULL);
7be8da07
AG
1878 }
1879}
b411b363 1880
a990be46
AG
1881/*
1882 * e_end_block() is called in asender context via drbd_finish_peer_reqs().
b411b363 1883 */
99920dc5 1884static int e_end_block(struct drbd_work *w, int cancel)
b411b363 1885{
8050e6d0 1886 struct drbd_peer_request *peer_req =
a8cd15ba
AG
1887 container_of(w, struct drbd_peer_request, w);
1888 struct drbd_peer_device *peer_device = peer_req->peer_device;
1889 struct drbd_device *device = peer_device->device;
db830c46 1890 sector_t sector = peer_req->i.sector;
99920dc5 1891 int err = 0, pcmd;
b411b363 1892
303d1448 1893 if (peer_req->flags & EE_SEND_WRITE_ACK) {
db830c46 1894 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
b30ab791
AG
1895 pcmd = (device->state.conn >= C_SYNC_SOURCE &&
1896 device->state.conn <= C_PAUSED_SYNC_T &&
db830c46 1897 peer_req->flags & EE_MAY_SET_IN_SYNC) ?
b411b363 1898 P_RS_WRITE_ACK : P_WRITE_ACK;
a8cd15ba 1899 err = drbd_send_ack(peer_device, pcmd, peer_req);
b411b363 1900 if (pcmd == P_RS_WRITE_ACK)
b30ab791 1901 drbd_set_in_sync(device, sector, peer_req->i.size);
b411b363 1902 } else {
a8cd15ba 1903 err = drbd_send_ack(peer_device, P_NEG_ACK, peer_req);
b411b363
PR
1904 /* we expect it to be marked out of sync anyways...
1905 * maybe assert this? */
1906 }
b30ab791 1907 dec_unacked(device);
b411b363
PR
1908 }
1909 /* we delete from the conflict detection hash _after_ we sent out the
1910 * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */
302bdeae 1911 if (peer_req->flags & EE_IN_INTERVAL_TREE) {
0500813f 1912 spin_lock_irq(&device->resource->req_lock);
0b0ba1ef 1913 D_ASSERT(device, !drbd_interval_empty(&peer_req->i));
b30ab791 1914 drbd_remove_epoch_entry_interval(device, peer_req);
7be8da07 1915 if (peer_req->flags & EE_RESTART_REQUESTS)
b30ab791 1916 restart_conflicting_writes(device, sector, peer_req->i.size);
0500813f 1917 spin_unlock_irq(&device->resource->req_lock);
bb3bfe96 1918 } else
0b0ba1ef 1919 D_ASSERT(device, drbd_interval_empty(&peer_req->i));
b411b363 1920
a6b32bc3 1921 drbd_may_finish_epoch(first_peer_device(device)->connection, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
b411b363 1922
99920dc5 1923 return err;
b411b363
PR
1924}
1925
a8cd15ba 1926static int e_send_ack(struct drbd_work *w, enum drbd_packet ack)
b411b363 1927{
8050e6d0 1928 struct drbd_peer_request *peer_req =
a8cd15ba
AG
1929 container_of(w, struct drbd_peer_request, w);
1930 struct drbd_peer_device *peer_device = peer_req->peer_device;
99920dc5 1931 int err;
b411b363 1932
a8cd15ba
AG
1933 err = drbd_send_ack(peer_device, ack, peer_req);
1934 dec_unacked(peer_device->device);
b411b363 1935
99920dc5 1936 return err;
b411b363
PR
1937}
1938
d4dabbe2 1939static int e_send_superseded(struct drbd_work *w, int unused)
7be8da07 1940{
a8cd15ba 1941 return e_send_ack(w, P_SUPERSEDED);
7be8da07
AG
1942}
1943
99920dc5 1944static int e_send_retry_write(struct drbd_work *w, int unused)
7be8da07 1945{
a8cd15ba
AG
1946 struct drbd_peer_request *peer_req =
1947 container_of(w, struct drbd_peer_request, w);
1948 struct drbd_connection *connection = peer_req->peer_device->connection;
7be8da07 1949
a8cd15ba 1950 return e_send_ack(w, connection->agreed_pro_version >= 100 ?
d4dabbe2 1951 P_RETRY_WRITE : P_SUPERSEDED);
7be8da07 1952}
b411b363 1953
3e394da1
AG
1954static bool seq_greater(u32 a, u32 b)
1955{
1956 /*
1957 * We assume 32-bit wrap-around here.
1958 * For 24-bit wrap-around, we would have to shift:
1959 * a <<= 8; b <<= 8;
1960 */
1961 return (s32)a - (s32)b > 0;
1962}
b411b363 1963
3e394da1
AG
1964static u32 seq_max(u32 a, u32 b)
1965{
1966 return seq_greater(a, b) ? a : b;
b411b363
PR
1967}
1968
69a22773 1969static void update_peer_seq(struct drbd_peer_device *peer_device, unsigned int peer_seq)
3e394da1 1970{
69a22773 1971 struct drbd_device *device = peer_device->device;
3c13b680 1972 unsigned int newest_peer_seq;
3e394da1 1973
69a22773 1974 if (test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)) {
b30ab791
AG
1975 spin_lock(&device->peer_seq_lock);
1976 newest_peer_seq = seq_max(device->peer_seq, peer_seq);
1977 device->peer_seq = newest_peer_seq;
1978 spin_unlock(&device->peer_seq_lock);
1979 /* wake up only if we actually changed device->peer_seq */
3c13b680 1980 if (peer_seq == newest_peer_seq)
b30ab791 1981 wake_up(&device->seq_wait);
7be8da07 1982 }
b411b363
PR
1983}
1984
d93f6302 1985static inline int overlaps(sector_t s1, int l1, sector_t s2, int l2)
b6a370ba 1986{
d93f6302
LE
1987 return !((s1 + (l1>>9) <= s2) || (s1 >= s2 + (l2>>9)));
1988}
b6a370ba 1989
d93f6302 1990/* maybe change sync_ee into interval trees as well? */
b30ab791 1991static bool overlapping_resync_write(struct drbd_device *device, struct drbd_peer_request *peer_req)
d93f6302
LE
1992{
1993 struct drbd_peer_request *rs_req;
b6a370ba
PR
1994 bool rv = 0;
1995
0500813f 1996 spin_lock_irq(&device->resource->req_lock);
a8cd15ba 1997 list_for_each_entry(rs_req, &device->sync_ee, w.list) {
d93f6302
LE
1998 if (overlaps(peer_req->i.sector, peer_req->i.size,
1999 rs_req->i.sector, rs_req->i.size)) {
b6a370ba
PR
2000 rv = 1;
2001 break;
2002 }
2003 }
0500813f 2004 spin_unlock_irq(&device->resource->req_lock);
b6a370ba
PR
2005
2006 return rv;
2007}
2008
b411b363
PR
2009/* Called from receive_Data.
2010 * Synchronize packets on sock with packets on msock.
2011 *
2012 * This is here so even when a P_DATA packet traveling via sock overtook an Ack
2013 * packet traveling on msock, they are still processed in the order they have
2014 * been sent.
2015 *
2016 * Note: we don't care for Ack packets overtaking P_DATA packets.
2017 *
b30ab791 2018 * In case packet_seq is larger than device->peer_seq number, there are
b411b363 2019 * outstanding packets on the msock. We wait for them to arrive.
b30ab791 2020 * In case we are the logically next packet, we update device->peer_seq
b411b363
PR
2021 * ourselves. Correctly handles 32bit wrap around.
2022 *
2023 * Assume we have a 10 GBit connection, that is about 1<<30 byte per second,
2024 * about 1<<21 sectors per second. So "worst" case, we have 1<<3 == 8 seconds
2025 * for the 24bit wrap (historical atomic_t guarantee on some archs), and we have
2026 * 1<<9 == 512 seconds aka ages for the 32bit wrap around...
2027 *
2028 * returns 0 if we may process the packet,
2029 * -ERESTARTSYS if we were interrupted (by disconnect signal). */
69a22773 2030static int wait_for_and_update_peer_seq(struct drbd_peer_device *peer_device, const u32 peer_seq)
b411b363 2031{
69a22773 2032 struct drbd_device *device = peer_device->device;
b411b363 2033 DEFINE_WAIT(wait);
b411b363 2034 long timeout;
b874d231 2035 int ret = 0, tp;
7be8da07 2036
69a22773 2037 if (!test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags))
7be8da07
AG
2038 return 0;
2039
b30ab791 2040 spin_lock(&device->peer_seq_lock);
b411b363 2041 for (;;) {
b30ab791
AG
2042 if (!seq_greater(peer_seq - 1, device->peer_seq)) {
2043 device->peer_seq = seq_max(device->peer_seq, peer_seq);
b411b363 2044 break;
7be8da07 2045 }
b874d231 2046
b411b363
PR
2047 if (signal_pending(current)) {
2048 ret = -ERESTARTSYS;
2049 break;
2050 }
b874d231
PR
2051
2052 rcu_read_lock();
a6b32bc3 2053 tp = rcu_dereference(first_peer_device(device)->connection->net_conf)->two_primaries;
b874d231
PR
2054 rcu_read_unlock();
2055
2056 if (!tp)
2057 break;
2058
2059 /* Only need to wait if two_primaries is enabled */
b30ab791
AG
2060 prepare_to_wait(&device->seq_wait, &wait, TASK_INTERRUPTIBLE);
2061 spin_unlock(&device->peer_seq_lock);
44ed167d 2062 rcu_read_lock();
69a22773 2063 timeout = rcu_dereference(peer_device->connection->net_conf)->ping_timeo*HZ/10;
44ed167d 2064 rcu_read_unlock();
71b1c1eb 2065 timeout = schedule_timeout(timeout);
b30ab791 2066 spin_lock(&device->peer_seq_lock);
7be8da07 2067 if (!timeout) {
b411b363 2068 ret = -ETIMEDOUT;
d0180171 2069 drbd_err(device, "Timed out waiting for missing ack packets; disconnecting\n");
b411b363
PR
2070 break;
2071 }
2072 }
b30ab791
AG
2073 spin_unlock(&device->peer_seq_lock);
2074 finish_wait(&device->seq_wait, &wait);
b411b363
PR
2075 return ret;
2076}
2077
688593c5
LE
2078/* see also bio_flags_to_wire()
2079 * DRBD_REQ_*, because we need to semantically map the flags to data packet
2080 * flags and back. We may replicate to other kernel versions. */
81f0ffd2 2081static unsigned long wire_flags_to_bio(u32 dpf)
76d2e7ec 2082{
688593c5
LE
2083 return (dpf & DP_RW_SYNC ? REQ_SYNC : 0) |
2084 (dpf & DP_FUA ? REQ_FUA : 0) |
2085 (dpf & DP_FLUSH ? REQ_FLUSH : 0) |
2086 (dpf & DP_DISCARD ? REQ_DISCARD : 0);
76d2e7ec
PR
2087}
2088
b30ab791 2089static void fail_postponed_requests(struct drbd_device *device, sector_t sector,
7be8da07
AG
2090 unsigned int size)
2091{
2092 struct drbd_interval *i;
2093
2094 repeat:
b30ab791 2095 drbd_for_each_overlap(i, &device->write_requests, sector, size) {
7be8da07
AG
2096 struct drbd_request *req;
2097 struct bio_and_error m;
2098
2099 if (!i->local)
2100 continue;
2101 req = container_of(i, struct drbd_request, i);
2102 if (!(req->rq_state & RQ_POSTPONED))
2103 continue;
2104 req->rq_state &= ~RQ_POSTPONED;
2105 __req_mod(req, NEG_ACKED, &m);
0500813f 2106 spin_unlock_irq(&device->resource->req_lock);
7be8da07 2107 if (m.bio)
b30ab791 2108 complete_master_bio(device, &m);
0500813f 2109 spin_lock_irq(&device->resource->req_lock);
7be8da07
AG
2110 goto repeat;
2111 }
2112}
2113
b30ab791 2114static int handle_write_conflicts(struct drbd_device *device,
7be8da07
AG
2115 struct drbd_peer_request *peer_req)
2116{
e33b32de 2117 struct drbd_connection *connection = peer_req->peer_device->connection;
bde89a9e 2118 bool resolve_conflicts = test_bit(RESOLVE_CONFLICTS, &connection->flags);
7be8da07
AG
2119 sector_t sector = peer_req->i.sector;
2120 const unsigned int size = peer_req->i.size;
2121 struct drbd_interval *i;
2122 bool equal;
2123 int err;
2124
2125 /*
2126 * Inserting the peer request into the write_requests tree will prevent
2127 * new conflicting local requests from being added.
2128 */
b30ab791 2129 drbd_insert_interval(&device->write_requests, &peer_req->i);
7be8da07
AG
2130
2131 repeat:
b30ab791 2132 drbd_for_each_overlap(i, &device->write_requests, sector, size) {
7be8da07
AG
2133 if (i == &peer_req->i)
2134 continue;
2135
2136 if (!i->local) {
2137 /*
2138 * Our peer has sent a conflicting remote request; this
2139 * should not happen in a two-node setup. Wait for the
2140 * earlier peer request to complete.
2141 */
b30ab791 2142 err = drbd_wait_misc(device, i);
7be8da07
AG
2143 if (err)
2144 goto out;
2145 goto repeat;
2146 }
2147
2148 equal = i->sector == sector && i->size == size;
2149 if (resolve_conflicts) {
2150 /*
2151 * If the peer request is fully contained within the
d4dabbe2
LE
2152 * overlapping request, it can be considered overwritten
2153 * and thus superseded; otherwise, it will be retried
2154 * once all overlapping requests have completed.
7be8da07 2155 */
d4dabbe2 2156 bool superseded = i->sector <= sector && i->sector +
7be8da07
AG
2157 (i->size >> 9) >= sector + (size >> 9);
2158
2159 if (!equal)
d0180171 2160 drbd_alert(device, "Concurrent writes detected: "
7be8da07
AG
2161 "local=%llus +%u, remote=%llus +%u, "
2162 "assuming %s came first\n",
2163 (unsigned long long)i->sector, i->size,
2164 (unsigned long long)sector, size,
d4dabbe2 2165 superseded ? "local" : "remote");
7be8da07 2166
b30ab791 2167 inc_unacked(device);
a8cd15ba 2168 peer_req->w.cb = superseded ? e_send_superseded :
7be8da07 2169 e_send_retry_write;
a8cd15ba 2170 list_add_tail(&peer_req->w.list, &device->done_ee);
e33b32de 2171 wake_asender(connection);
7be8da07
AG
2172
2173 err = -ENOENT;
2174 goto out;
2175 } else {
2176 struct drbd_request *req =
2177 container_of(i, struct drbd_request, i);
2178
2179 if (!equal)
d0180171 2180 drbd_alert(device, "Concurrent writes detected: "
7be8da07
AG
2181 "local=%llus +%u, remote=%llus +%u\n",
2182 (unsigned long long)i->sector, i->size,
2183 (unsigned long long)sector, size);
2184
2185 if (req->rq_state & RQ_LOCAL_PENDING ||
2186 !(req->rq_state & RQ_POSTPONED)) {
2187 /*
2188 * Wait for the node with the discard flag to
d4dabbe2
LE
2189 * decide if this request has been superseded
2190 * or needs to be retried.
2191 * Requests that have been superseded will
7be8da07
AG
2192 * disappear from the write_requests tree.
2193 *
2194 * In addition, wait for the conflicting
2195 * request to finish locally before submitting
2196 * the conflicting peer request.
2197 */
b30ab791 2198 err = drbd_wait_misc(device, &req->i);
7be8da07 2199 if (err) {
e33b32de 2200 _conn_request_state(connection, NS(conn, C_TIMEOUT), CS_HARD);
b30ab791 2201 fail_postponed_requests(device, sector, size);
7be8da07
AG
2202 goto out;
2203 }
2204 goto repeat;
2205 }
2206 /*
2207 * Remember to restart the conflicting requests after
2208 * the new peer request has completed.
2209 */
2210 peer_req->flags |= EE_RESTART_REQUESTS;
2211 }
2212 }
2213 err = 0;
2214
2215 out:
2216 if (err)
b30ab791 2217 drbd_remove_epoch_entry_interval(device, peer_req);
7be8da07
AG
2218 return err;
2219}
2220
b411b363 2221/* mirrored write */
bde89a9e 2222static int receive_Data(struct drbd_connection *connection, struct packet_info *pi)
b411b363 2223{
9f4fe9ad 2224 struct drbd_peer_device *peer_device;
b30ab791 2225 struct drbd_device *device;
b411b363 2226 sector_t sector;
db830c46 2227 struct drbd_peer_request *peer_req;
e658983a 2228 struct p_data *p = pi->data;
7be8da07 2229 u32 peer_seq = be32_to_cpu(p->seq_num);
b411b363
PR
2230 int rw = WRITE;
2231 u32 dp_flags;
302bdeae 2232 int err, tp;
b411b363 2233
9f4fe9ad
AG
2234 peer_device = conn_peer_device(connection, pi->vnr);
2235 if (!peer_device)
4a76b161 2236 return -EIO;
9f4fe9ad 2237 device = peer_device->device;
b411b363 2238
b30ab791 2239 if (!get_ldev(device)) {
82bc0194
AG
2240 int err2;
2241
69a22773
AG
2242 err = wait_for_and_update_peer_seq(peer_device, peer_seq);
2243 drbd_send_ack_dp(peer_device, P_NEG_ACK, p, pi->size);
bde89a9e 2244 atomic_inc(&connection->current_epoch->epoch_size);
69a22773 2245 err2 = drbd_drain_block(peer_device, pi->size);
82bc0194
AG
2246 if (!err)
2247 err = err2;
2248 return err;
b411b363
PR
2249 }
2250
fcefa62e
AG
2251 /*
2252 * Corresponding put_ldev done either below (on various errors), or in
2253 * drbd_peer_request_endio, if we successfully submit the data at the
2254 * end of this function.
2255 */
b411b363
PR
2256
2257 sector = be64_to_cpu(p->sector);
a0fb3c47 2258 peer_req = read_in_block(peer_device, p->block_id, sector, pi);
db830c46 2259 if (!peer_req) {
b30ab791 2260 put_ldev(device);
82bc0194 2261 return -EIO;
b411b363
PR
2262 }
2263
a8cd15ba 2264 peer_req->w.cb = e_end_block;
b411b363 2265
688593c5 2266 dp_flags = be32_to_cpu(p->dp_flags);
81f0ffd2 2267 rw |= wire_flags_to_bio(dp_flags);
a0fb3c47
LE
2268 if (pi->cmd == P_TRIM) {
2269 struct request_queue *q = bdev_get_queue(device->ldev->backing_bdev);
2270 peer_req->flags |= EE_IS_TRIM;
2271 if (!blk_queue_discard(q))
2272 peer_req->flags |= EE_IS_TRIM_USE_ZEROOUT;
2273 D_ASSERT(peer_device, peer_req->i.size > 0);
2274 D_ASSERT(peer_device, rw & REQ_DISCARD);
2275 D_ASSERT(peer_device, peer_req->pages == NULL);
2276 } else if (peer_req->pages == NULL) {
0b0ba1ef
AG
2277 D_ASSERT(device, peer_req->i.size == 0);
2278 D_ASSERT(device, dp_flags & DP_FLUSH);
a73ff323 2279 }
688593c5
LE
2280
2281 if (dp_flags & DP_MAY_SET_IN_SYNC)
db830c46 2282 peer_req->flags |= EE_MAY_SET_IN_SYNC;
688593c5 2283
bde89a9e
AG
2284 spin_lock(&connection->epoch_lock);
2285 peer_req->epoch = connection->current_epoch;
db830c46
AG
2286 atomic_inc(&peer_req->epoch->epoch_size);
2287 atomic_inc(&peer_req->epoch->active);
bde89a9e 2288 spin_unlock(&connection->epoch_lock);
b411b363 2289
302bdeae 2290 rcu_read_lock();
9f4fe9ad 2291 tp = rcu_dereference(peer_device->connection->net_conf)->two_primaries;
302bdeae
PR
2292 rcu_read_unlock();
2293 if (tp) {
2294 peer_req->flags |= EE_IN_INTERVAL_TREE;
69a22773 2295 err = wait_for_and_update_peer_seq(peer_device, peer_seq);
7be8da07 2296 if (err)
b411b363 2297 goto out_interrupted;
0500813f 2298 spin_lock_irq(&device->resource->req_lock);
b30ab791 2299 err = handle_write_conflicts(device, peer_req);
7be8da07 2300 if (err) {
0500813f 2301 spin_unlock_irq(&device->resource->req_lock);
7be8da07 2302 if (err == -ENOENT) {
b30ab791 2303 put_ldev(device);
82bc0194 2304 return 0;
b411b363 2305 }
7be8da07 2306 goto out_interrupted;
b411b363 2307 }
b874d231 2308 } else {
69a22773 2309 update_peer_seq(peer_device, peer_seq);
0500813f 2310 spin_lock_irq(&device->resource->req_lock);
b874d231 2311 }
a0fb3c47
LE
2312 /* if we use the zeroout fallback code, we process synchronously
2313 * and we wait for all pending requests, respectively wait for
2314 * active_ee to become empty in drbd_submit_peer_request();
2315 * better not add ourselves here. */
2316 if ((peer_req->flags & EE_IS_TRIM_USE_ZEROOUT) == 0)
2317 list_add(&peer_req->w.list, &device->active_ee);
0500813f 2318 spin_unlock_irq(&device->resource->req_lock);
b411b363 2319
b30ab791
AG
2320 if (device->state.conn == C_SYNC_TARGET)
2321 wait_event(device->ee_wait, !overlapping_resync_write(device, peer_req));
b411b363 2322
9f4fe9ad 2323 if (peer_device->connection->agreed_pro_version < 100) {
44ed167d 2324 rcu_read_lock();
9f4fe9ad 2325 switch (rcu_dereference(peer_device->connection->net_conf)->wire_protocol) {
303d1448
PR
2326 case DRBD_PROT_C:
2327 dp_flags |= DP_SEND_WRITE_ACK;
2328 break;
2329 case DRBD_PROT_B:
2330 dp_flags |= DP_SEND_RECEIVE_ACK;
2331 break;
b411b363 2332 }
44ed167d 2333 rcu_read_unlock();
b411b363
PR
2334 }
2335
303d1448
PR
2336 if (dp_flags & DP_SEND_WRITE_ACK) {
2337 peer_req->flags |= EE_SEND_WRITE_ACK;
b30ab791 2338 inc_unacked(device);
b411b363
PR
2339 /* corresponding dec_unacked() in e_end_block()
2340 * respective _drbd_clear_done_ee */
303d1448
PR
2341 }
2342
2343 if (dp_flags & DP_SEND_RECEIVE_ACK) {
b411b363
PR
2344 /* I really don't like it that the receiver thread
2345 * sends on the msock, but anyways */
69a22773 2346 drbd_send_ack(first_peer_device(device), P_RECV_ACK, peer_req);
b411b363
PR
2347 }
2348
b30ab791 2349 if (device->state.pdsk < D_INCONSISTENT) {
b411b363 2350 /* In case we have the only disk of the cluster, */
b30ab791 2351 drbd_set_out_of_sync(device, peer_req->i.sector, peer_req->i.size);
db830c46
AG
2352 peer_req->flags |= EE_CALL_AL_COMPLETE_IO;
2353 peer_req->flags &= ~EE_MAY_SET_IN_SYNC;
4dd726f0 2354 drbd_al_begin_io(device, &peer_req->i);
b411b363
PR
2355 }
2356
b30ab791 2357 err = drbd_submit_peer_request(device, peer_req, rw, DRBD_FAULT_DT_WR);
82bc0194
AG
2358 if (!err)
2359 return 0;
b411b363 2360
10f6d992 2361 /* don't care for the reason here */
d0180171 2362 drbd_err(device, "submit failed, triggering re-connect\n");
0500813f 2363 spin_lock_irq(&device->resource->req_lock);
a8cd15ba 2364 list_del(&peer_req->w.list);
b30ab791 2365 drbd_remove_epoch_entry_interval(device, peer_req);
0500813f 2366 spin_unlock_irq(&device->resource->req_lock);
db830c46 2367 if (peer_req->flags & EE_CALL_AL_COMPLETE_IO)
b30ab791 2368 drbd_al_complete_io(device, &peer_req->i);
22cc37a9 2369
b411b363 2370out_interrupted:
bde89a9e 2371 drbd_may_finish_epoch(connection, peer_req->epoch, EV_PUT + EV_CLEANUP);
b30ab791
AG
2372 put_ldev(device);
2373 drbd_free_peer_req(device, peer_req);
82bc0194 2374 return err;
b411b363
PR
2375}
2376
0f0601f4
LE
2377/* We may throttle resync, if the lower device seems to be busy,
2378 * and current sync rate is above c_min_rate.
2379 *
2380 * To decide whether or not the lower device is busy, we use a scheme similar
2381 * to MD RAID is_mddev_idle(): if the partition stats reveal "significant"
2382 * (more than 64 sectors) of activity we cannot account for with our own resync
2383 * activity, it obviously is "busy".
2384 *
2385 * The current sync rate used here uses only the most recent two step marks,
2386 * to have a short time average so we can react faster.
2387 */
e8299874 2388bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector)
0f0601f4 2389{
e3555d85 2390 struct lc_element *tmp;
e8299874 2391 bool throttle = true;
daeda1cc 2392
e8299874
LE
2393 if (!drbd_rs_c_min_rate_throttle(device))
2394 return false;
0f0601f4 2395
b30ab791
AG
2396 spin_lock_irq(&device->al_lock);
2397 tmp = lc_find(device->resync, BM_SECT_TO_EXT(sector));
e3555d85
PR
2398 if (tmp) {
2399 struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
e8299874
LE
2400 if (test_bit(BME_PRIORITY, &bm_ext->flags))
2401 throttle = false;
e3555d85
PR
2402 /* Do not slow down if app IO is already waiting for this extent */
2403 }
b30ab791 2404 spin_unlock_irq(&device->al_lock);
e3555d85 2405
e8299874
LE
2406 return throttle;
2407}
2408
2409bool drbd_rs_c_min_rate_throttle(struct drbd_device *device)
2410{
2411 struct gendisk *disk = device->ldev->backing_bdev->bd_contains->bd_disk;
2412 unsigned long db, dt, dbdt;
2413 unsigned int c_min_rate;
2414 int curr_events;
2415
2416 rcu_read_lock();
2417 c_min_rate = rcu_dereference(device->ldev->disk_conf)->c_min_rate;
2418 rcu_read_unlock();
2419
2420 /* feature disabled? */
2421 if (c_min_rate == 0)
2422 return false;
2423
0f0601f4
LE
2424 curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
2425 (int)part_stat_read(&disk->part0, sectors[1]) -
b30ab791 2426 atomic_read(&device->rs_sect_ev);
b30ab791 2427 if (!device->rs_last_events || curr_events - device->rs_last_events > 64) {
0f0601f4
LE
2428 unsigned long rs_left;
2429 int i;
2430
b30ab791 2431 device->rs_last_events = curr_events;
0f0601f4
LE
2432
2433 /* sync speed average over the last 2*DRBD_SYNC_MARK_STEP,
2434 * approx. */
b30ab791 2435 i = (device->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
2649f080 2436
b30ab791
AG
2437 if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T)
2438 rs_left = device->ov_left;
2649f080 2439 else
b30ab791 2440 rs_left = drbd_bm_total_weight(device) - device->rs_failed;
0f0601f4 2441
b30ab791 2442 dt = ((long)jiffies - (long)device->rs_mark_time[i]) / HZ;
0f0601f4
LE
2443 if (!dt)
2444 dt++;
b30ab791 2445 db = device->rs_mark_left[i] - rs_left;
0f0601f4
LE
2446 dbdt = Bit2KB(db/dt);
2447
daeda1cc 2448 if (dbdt > c_min_rate)
e8299874 2449 return true;
0f0601f4 2450 }
e8299874 2451 return false;
0f0601f4
LE
2452}
2453
bde89a9e 2454static int receive_DataRequest(struct drbd_connection *connection, struct packet_info *pi)
b411b363 2455{
9f4fe9ad 2456 struct drbd_peer_device *peer_device;
b30ab791 2457 struct drbd_device *device;
b411b363 2458 sector_t sector;
4a76b161 2459 sector_t capacity;
db830c46 2460 struct drbd_peer_request *peer_req;
b411b363 2461 struct digest_info *di = NULL;
b18b37be 2462 int size, verb;
b411b363 2463 unsigned int fault_type;
e658983a 2464 struct p_block_req *p = pi->data;
4a76b161 2465
9f4fe9ad
AG
2466 peer_device = conn_peer_device(connection, pi->vnr);
2467 if (!peer_device)
4a76b161 2468 return -EIO;
9f4fe9ad 2469 device = peer_device->device;
b30ab791 2470 capacity = drbd_get_capacity(device->this_bdev);
b411b363
PR
2471
2472 sector = be64_to_cpu(p->sector);
2473 size = be32_to_cpu(p->blksize);
2474
c670a398 2475 if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) {
d0180171 2476 drbd_err(device, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
b411b363 2477 (unsigned long long)sector, size);
82bc0194 2478 return -EINVAL;
b411b363
PR
2479 }
2480 if (sector + (size>>9) > capacity) {
d0180171 2481 drbd_err(device, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
b411b363 2482 (unsigned long long)sector, size);
82bc0194 2483 return -EINVAL;
b411b363
PR
2484 }
2485
b30ab791 2486 if (!get_ldev_if_state(device, D_UP_TO_DATE)) {
b18b37be 2487 verb = 1;
e2857216 2488 switch (pi->cmd) {
b18b37be 2489 case P_DATA_REQUEST:
69a22773 2490 drbd_send_ack_rp(peer_device, P_NEG_DREPLY, p);
b18b37be
PR
2491 break;
2492 case P_RS_DATA_REQUEST:
2493 case P_CSUM_RS_REQUEST:
2494 case P_OV_REQUEST:
69a22773 2495 drbd_send_ack_rp(peer_device, P_NEG_RS_DREPLY , p);
b18b37be
PR
2496 break;
2497 case P_OV_REPLY:
2498 verb = 0;
b30ab791 2499 dec_rs_pending(device);
69a22773 2500 drbd_send_ack_ex(peer_device, P_OV_RESULT, sector, size, ID_IN_SYNC);
b18b37be
PR
2501 break;
2502 default:
49ba9b1b 2503 BUG();
b18b37be
PR
2504 }
2505 if (verb && __ratelimit(&drbd_ratelimit_state))
d0180171 2506 drbd_err(device, "Can not satisfy peer's read request, "
b411b363 2507 "no local data.\n");
b18b37be 2508
a821cc4a 2509 /* drain possibly payload */
69a22773 2510 return drbd_drain_block(peer_device, pi->size);
b411b363
PR
2511 }
2512
2513 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
2514 * "criss-cross" setup, that might cause write-out on some other DRBD,
2515 * which in turn might block on the other node at this very place. */
a0fb3c47
LE
2516 peer_req = drbd_alloc_peer_req(peer_device, p->block_id, sector, size,
2517 true /* has real payload */, GFP_NOIO);
db830c46 2518 if (!peer_req) {
b30ab791 2519 put_ldev(device);
82bc0194 2520 return -ENOMEM;
b411b363
PR
2521 }
2522
e2857216 2523 switch (pi->cmd) {
b411b363 2524 case P_DATA_REQUEST:
a8cd15ba 2525 peer_req->w.cb = w_e_end_data_req;
b411b363 2526 fault_type = DRBD_FAULT_DT_RD;
80a40e43
LE
2527 /* application IO, don't drbd_rs_begin_io */
2528 goto submit;
2529
b411b363 2530 case P_RS_DATA_REQUEST:
a8cd15ba 2531 peer_req->w.cb = w_e_end_rsdata_req;
b411b363 2532 fault_type = DRBD_FAULT_RS_RD;
5f9915bb 2533 /* used in the sector offset progress display */
b30ab791 2534 device->bm_resync_fo = BM_SECT_TO_BIT(sector);
b411b363
PR
2535 break;
2536
2537 case P_OV_REPLY:
2538 case P_CSUM_RS_REQUEST:
2539 fault_type = DRBD_FAULT_RS_RD;
e2857216 2540 di = kmalloc(sizeof(*di) + pi->size, GFP_NOIO);
b411b363
PR
2541 if (!di)
2542 goto out_free_e;
2543
e2857216 2544 di->digest_size = pi->size;
b411b363
PR
2545 di->digest = (((char *)di)+sizeof(struct digest_info));
2546
db830c46
AG
2547 peer_req->digest = di;
2548 peer_req->flags |= EE_HAS_DIGEST;
c36c3ced 2549
9f4fe9ad 2550 if (drbd_recv_all(peer_device->connection, di->digest, pi->size))
b411b363
PR
2551 goto out_free_e;
2552
e2857216 2553 if (pi->cmd == P_CSUM_RS_REQUEST) {
9f4fe9ad 2554 D_ASSERT(device, peer_device->connection->agreed_pro_version >= 89);
a8cd15ba 2555 peer_req->w.cb = w_e_end_csum_rs_req;
5f9915bb 2556 /* used in the sector offset progress display */
b30ab791 2557 device->bm_resync_fo = BM_SECT_TO_BIT(sector);
e2857216 2558 } else if (pi->cmd == P_OV_REPLY) {
2649f080 2559 /* track progress, we may need to throttle */
b30ab791 2560 atomic_add(size >> 9, &device->rs_sect_in);
a8cd15ba 2561 peer_req->w.cb = w_e_end_ov_reply;
b30ab791 2562 dec_rs_pending(device);
0f0601f4
LE
2563 /* drbd_rs_begin_io done when we sent this request,
2564 * but accounting still needs to be done. */
2565 goto submit_for_resync;
b411b363
PR
2566 }
2567 break;
2568
2569 case P_OV_REQUEST:
b30ab791 2570 if (device->ov_start_sector == ~(sector_t)0 &&
9f4fe9ad 2571 peer_device->connection->agreed_pro_version >= 90) {
de228bba
LE
2572 unsigned long now = jiffies;
2573 int i;
b30ab791
AG
2574 device->ov_start_sector = sector;
2575 device->ov_position = sector;
2576 device->ov_left = drbd_bm_bits(device) - BM_SECT_TO_BIT(sector);
2577 device->rs_total = device->ov_left;
de228bba 2578 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
b30ab791
AG
2579 device->rs_mark_left[i] = device->ov_left;
2580 device->rs_mark_time[i] = now;
de228bba 2581 }
d0180171 2582 drbd_info(device, "Online Verify start sector: %llu\n",
b411b363
PR
2583 (unsigned long long)sector);
2584 }
a8cd15ba 2585 peer_req->w.cb = w_e_end_ov_req;
b411b363 2586 fault_type = DRBD_FAULT_RS_RD;
b411b363
PR
2587 break;
2588
b411b363 2589 default:
49ba9b1b 2590 BUG();
b411b363
PR
2591 }
2592
0f0601f4
LE
2593 /* Throttle, drbd_rs_begin_io and submit should become asynchronous
2594 * wrt the receiver, but it is not as straightforward as it may seem.
2595 * Various places in the resync start and stop logic assume resync
2596 * requests are processed in order, requeuing this on the worker thread
2597 * introduces a bunch of new code for synchronization between threads.
2598 *
2599 * Unlimited throttling before drbd_rs_begin_io may stall the resync
2600 * "forever", throttling after drbd_rs_begin_io will lock that extent
2601 * for application writes for the same time. For now, just throttle
2602 * here, where the rest of the code expects the receiver to sleep for
2603 * a while, anyways.
2604 */
2605
2606 /* Throttle before drbd_rs_begin_io, as that locks out application IO;
2607 * this defers syncer requests for some time, before letting at least
2608 * on request through. The resync controller on the receiving side
2609 * will adapt to the incoming rate accordingly.
2610 *
2611 * We cannot throttle here if remote is Primary/SyncTarget:
2612 * we would also throttle its application reads.
2613 * In that case, throttling is done on the SyncTarget only.
2614 */
b30ab791 2615 if (device->state.peer != R_PRIMARY && drbd_rs_should_slow_down(device, sector))
e3555d85 2616 schedule_timeout_uninterruptible(HZ/10);
b30ab791 2617 if (drbd_rs_begin_io(device, sector))
80a40e43 2618 goto out_free_e;
b411b363 2619
0f0601f4 2620submit_for_resync:
b30ab791 2621 atomic_add(size >> 9, &device->rs_sect_ev);
0f0601f4 2622
80a40e43 2623submit:
b30ab791 2624 inc_unacked(device);
0500813f 2625 spin_lock_irq(&device->resource->req_lock);
a8cd15ba 2626 list_add_tail(&peer_req->w.list, &device->read_ee);
0500813f 2627 spin_unlock_irq(&device->resource->req_lock);
b411b363 2628
b30ab791 2629 if (drbd_submit_peer_request(device, peer_req, READ, fault_type) == 0)
82bc0194 2630 return 0;
b411b363 2631
10f6d992 2632 /* don't care for the reason here */
d0180171 2633 drbd_err(device, "submit failed, triggering re-connect\n");
0500813f 2634 spin_lock_irq(&device->resource->req_lock);
a8cd15ba 2635 list_del(&peer_req->w.list);
0500813f 2636 spin_unlock_irq(&device->resource->req_lock);
22cc37a9
LE
2637 /* no drbd_rs_complete_io(), we are dropping the connection anyways */
2638
b411b363 2639out_free_e:
b30ab791
AG
2640 put_ldev(device);
2641 drbd_free_peer_req(device, peer_req);
82bc0194 2642 return -EIO;
b411b363
PR
2643}
2644
69a22773
AG
2645/**
2646 * drbd_asb_recover_0p - Recover after split-brain with no remaining primaries
2647 */
2648static int drbd_asb_recover_0p(struct drbd_peer_device *peer_device) __must_hold(local)
b411b363 2649{
69a22773 2650 struct drbd_device *device = peer_device->device;
b411b363
PR
2651 int self, peer, rv = -100;
2652 unsigned long ch_self, ch_peer;
44ed167d 2653 enum drbd_after_sb_p after_sb_0p;
b411b363 2654
b30ab791
AG
2655 self = device->ldev->md.uuid[UI_BITMAP] & 1;
2656 peer = device->p_uuid[UI_BITMAP] & 1;
b411b363 2657
b30ab791
AG
2658 ch_peer = device->p_uuid[UI_SIZE];
2659 ch_self = device->comm_bm_set;
b411b363 2660
44ed167d 2661 rcu_read_lock();
69a22773 2662 after_sb_0p = rcu_dereference(peer_device->connection->net_conf)->after_sb_0p;
44ed167d
PR
2663 rcu_read_unlock();
2664 switch (after_sb_0p) {
b411b363
PR
2665 case ASB_CONSENSUS:
2666 case ASB_DISCARD_SECONDARY:
2667 case ASB_CALL_HELPER:
44ed167d 2668 case ASB_VIOLENTLY:
d0180171 2669 drbd_err(device, "Configuration error.\n");
b411b363
PR
2670 break;
2671 case ASB_DISCONNECT:
2672 break;
2673 case ASB_DISCARD_YOUNGER_PRI:
2674 if (self == 0 && peer == 1) {
2675 rv = -1;
2676 break;
2677 }
2678 if (self == 1 && peer == 0) {
2679 rv = 1;
2680 break;
2681 }
2682 /* Else fall through to one of the other strategies... */
2683 case ASB_DISCARD_OLDER_PRI:
2684 if (self == 0 && peer == 1) {
2685 rv = 1;
2686 break;
2687 }
2688 if (self == 1 && peer == 0) {
2689 rv = -1;
2690 break;
2691 }
2692 /* Else fall through to one of the other strategies... */
d0180171 2693 drbd_warn(device, "Discard younger/older primary did not find a decision\n"
b411b363
PR
2694 "Using discard-least-changes instead\n");
2695 case ASB_DISCARD_ZERO_CHG:
2696 if (ch_peer == 0 && ch_self == 0) {
69a22773 2697 rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)
b411b363
PR
2698 ? -1 : 1;
2699 break;
2700 } else {
2701 if (ch_peer == 0) { rv = 1; break; }
2702 if (ch_self == 0) { rv = -1; break; }
2703 }
44ed167d 2704 if (after_sb_0p == ASB_DISCARD_ZERO_CHG)
b411b363
PR
2705 break;
2706 case ASB_DISCARD_LEAST_CHG:
2707 if (ch_self < ch_peer)
2708 rv = -1;
2709 else if (ch_self > ch_peer)
2710 rv = 1;
2711 else /* ( ch_self == ch_peer ) */
2712 /* Well, then use something else. */
69a22773 2713 rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)
b411b363
PR
2714 ? -1 : 1;
2715 break;
2716 case ASB_DISCARD_LOCAL:
2717 rv = -1;
2718 break;
2719 case ASB_DISCARD_REMOTE:
2720 rv = 1;
2721 }
2722
2723 return rv;
2724}
2725
69a22773
AG
2726/**
2727 * drbd_asb_recover_1p - Recover after split-brain with one remaining primary
2728 */
2729static int drbd_asb_recover_1p(struct drbd_peer_device *peer_device) __must_hold(local)
b411b363 2730{
69a22773 2731 struct drbd_device *device = peer_device->device;
6184ea21 2732 int hg, rv = -100;
44ed167d 2733 enum drbd_after_sb_p after_sb_1p;
b411b363 2734
44ed167d 2735 rcu_read_lock();
69a22773 2736 after_sb_1p = rcu_dereference(peer_device->connection->net_conf)->after_sb_1p;
44ed167d
PR
2737 rcu_read_unlock();
2738 switch (after_sb_1p) {
b411b363
PR
2739 case ASB_DISCARD_YOUNGER_PRI:
2740 case ASB_DISCARD_OLDER_PRI:
2741 case ASB_DISCARD_LEAST_CHG:
2742 case ASB_DISCARD_LOCAL:
2743 case ASB_DISCARD_REMOTE:
44ed167d 2744 case ASB_DISCARD_ZERO_CHG:
d0180171 2745 drbd_err(device, "Configuration error.\n");
b411b363
PR
2746 break;
2747 case ASB_DISCONNECT:
2748 break;
2749 case ASB_CONSENSUS:
69a22773 2750 hg = drbd_asb_recover_0p(peer_device);
b30ab791 2751 if (hg == -1 && device->state.role == R_SECONDARY)
b411b363 2752 rv = hg;
b30ab791 2753 if (hg == 1 && device->state.role == R_PRIMARY)
b411b363
PR
2754 rv = hg;
2755 break;
2756 case ASB_VIOLENTLY:
69a22773 2757 rv = drbd_asb_recover_0p(peer_device);
b411b363
PR
2758 break;
2759 case ASB_DISCARD_SECONDARY:
b30ab791 2760 return device->state.role == R_PRIMARY ? 1 : -1;
b411b363 2761 case ASB_CALL_HELPER:
69a22773 2762 hg = drbd_asb_recover_0p(peer_device);
b30ab791 2763 if (hg == -1 && device->state.role == R_PRIMARY) {
bb437946
AG
2764 enum drbd_state_rv rv2;
2765
b411b363
PR
2766 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2767 * we might be here in C_WF_REPORT_PARAMS which is transient.
2768 * we do not need to wait for the after state change work either. */
b30ab791 2769 rv2 = drbd_change_state(device, CS_VERBOSE, NS(role, R_SECONDARY));
bb437946 2770 if (rv2 != SS_SUCCESS) {
b30ab791 2771 drbd_khelper(device, "pri-lost-after-sb");
b411b363 2772 } else {
d0180171 2773 drbd_warn(device, "Successfully gave up primary role.\n");
b411b363
PR
2774 rv = hg;
2775 }
2776 } else
2777 rv = hg;
2778 }
2779
2780 return rv;
2781}
2782
69a22773
AG
2783/**
2784 * drbd_asb_recover_2p - Recover after split-brain with two remaining primaries
2785 */
2786static int drbd_asb_recover_2p(struct drbd_peer_device *peer_device) __must_hold(local)
b411b363 2787{
69a22773 2788 struct drbd_device *device = peer_device->device;
6184ea21 2789 int hg, rv = -100;
44ed167d 2790 enum drbd_after_sb_p after_sb_2p;
b411b363 2791
44ed167d 2792 rcu_read_lock();
69a22773 2793 after_sb_2p = rcu_dereference(peer_device->connection->net_conf)->after_sb_2p;
44ed167d
PR
2794 rcu_read_unlock();
2795 switch (after_sb_2p) {
b411b363
PR
2796 case ASB_DISCARD_YOUNGER_PRI:
2797 case ASB_DISCARD_OLDER_PRI:
2798 case ASB_DISCARD_LEAST_CHG:
2799 case ASB_DISCARD_LOCAL:
2800 case ASB_DISCARD_REMOTE:
2801 case ASB_CONSENSUS:
2802 case ASB_DISCARD_SECONDARY:
44ed167d 2803 case ASB_DISCARD_ZERO_CHG:
d0180171 2804 drbd_err(device, "Configuration error.\n");
b411b363
PR
2805 break;
2806 case ASB_VIOLENTLY:
69a22773 2807 rv = drbd_asb_recover_0p(peer_device);
b411b363
PR
2808 break;
2809 case ASB_DISCONNECT:
2810 break;
2811 case ASB_CALL_HELPER:
69a22773 2812 hg = drbd_asb_recover_0p(peer_device);
b411b363 2813 if (hg == -1) {
bb437946
AG
2814 enum drbd_state_rv rv2;
2815
b411b363
PR
2816 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2817 * we might be here in C_WF_REPORT_PARAMS which is transient.
2818 * we do not need to wait for the after state change work either. */
b30ab791 2819 rv2 = drbd_change_state(device, CS_VERBOSE, NS(role, R_SECONDARY));
bb437946 2820 if (rv2 != SS_SUCCESS) {
b30ab791 2821 drbd_khelper(device, "pri-lost-after-sb");
b411b363 2822 } else {
d0180171 2823 drbd_warn(device, "Successfully gave up primary role.\n");
b411b363
PR
2824 rv = hg;
2825 }
2826 } else
2827 rv = hg;
2828 }
2829
2830 return rv;
2831}
2832
b30ab791 2833static void drbd_uuid_dump(struct drbd_device *device, char *text, u64 *uuid,
b411b363
PR
2834 u64 bits, u64 flags)
2835{
2836 if (!uuid) {
d0180171 2837 drbd_info(device, "%s uuid info vanished while I was looking!\n", text);
b411b363
PR
2838 return;
2839 }
d0180171 2840 drbd_info(device, "%s %016llX:%016llX:%016llX:%016llX bits:%llu flags:%llX\n",
b411b363
PR
2841 text,
2842 (unsigned long long)uuid[UI_CURRENT],
2843 (unsigned long long)uuid[UI_BITMAP],
2844 (unsigned long long)uuid[UI_HISTORY_START],
2845 (unsigned long long)uuid[UI_HISTORY_END],
2846 (unsigned long long)bits,
2847 (unsigned long long)flags);
2848}
2849
2850/*
2851 100 after split brain try auto recover
2852 2 C_SYNC_SOURCE set BitMap
2853 1 C_SYNC_SOURCE use BitMap
2854 0 no Sync
2855 -1 C_SYNC_TARGET use BitMap
2856 -2 C_SYNC_TARGET set BitMap
2857 -100 after split brain, disconnect
2858-1000 unrelated data
4a23f264
PR
2859-1091 requires proto 91
2860-1096 requires proto 96
b411b363 2861 */
44a4d551 2862static int drbd_uuid_compare(struct drbd_device *const device, int *rule_nr) __must_hold(local)
b411b363 2863{
44a4d551
LE
2864 struct drbd_peer_device *const peer_device = first_peer_device(device);
2865 struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL;
b411b363
PR
2866 u64 self, peer;
2867 int i, j;
2868
b30ab791
AG
2869 self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
2870 peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
b411b363
PR
2871
2872 *rule_nr = 10;
2873 if (self == UUID_JUST_CREATED && peer == UUID_JUST_CREATED)
2874 return 0;
2875
2876 *rule_nr = 20;
2877 if ((self == UUID_JUST_CREATED || self == (u64)0) &&
2878 peer != UUID_JUST_CREATED)
2879 return -2;
2880
2881 *rule_nr = 30;
2882 if (self != UUID_JUST_CREATED &&
2883 (peer == UUID_JUST_CREATED || peer == (u64)0))
2884 return 2;
2885
2886 if (self == peer) {
2887 int rct, dc; /* roles at crash time */
2888
b30ab791 2889 if (device->p_uuid[UI_BITMAP] == (u64)0 && device->ldev->md.uuid[UI_BITMAP] != (u64)0) {
b411b363 2890
44a4d551 2891 if (connection->agreed_pro_version < 91)
4a23f264 2892 return -1091;
b411b363 2893
b30ab791
AG
2894 if ((device->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) &&
2895 (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) {
d0180171 2896 drbd_info(device, "was SyncSource, missed the resync finished event, corrected myself:\n");
b30ab791
AG
2897 drbd_uuid_move_history(device);
2898 device->ldev->md.uuid[UI_HISTORY_START] = device->ldev->md.uuid[UI_BITMAP];
2899 device->ldev->md.uuid[UI_BITMAP] = 0;
b411b363 2900
b30ab791
AG
2901 drbd_uuid_dump(device, "self", device->ldev->md.uuid,
2902 device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0);
b411b363
PR
2903 *rule_nr = 34;
2904 } else {
d0180171 2905 drbd_info(device, "was SyncSource (peer failed to write sync_uuid)\n");
b411b363
PR
2906 *rule_nr = 36;
2907 }
2908
2909 return 1;
2910 }
2911
b30ab791 2912 if (device->ldev->md.uuid[UI_BITMAP] == (u64)0 && device->p_uuid[UI_BITMAP] != (u64)0) {
b411b363 2913
44a4d551 2914 if (connection->agreed_pro_version < 91)
4a23f264 2915 return -1091;
b411b363 2916
b30ab791
AG
2917 if ((device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_BITMAP] & ~((u64)1)) &&
2918 (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1))) {
d0180171 2919 drbd_info(device, "was SyncTarget, peer missed the resync finished event, corrected peer:\n");
b411b363 2920
b30ab791
AG
2921 device->p_uuid[UI_HISTORY_START + 1] = device->p_uuid[UI_HISTORY_START];
2922 device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_BITMAP];
2923 device->p_uuid[UI_BITMAP] = 0UL;
b411b363 2924
b30ab791 2925 drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
b411b363
PR
2926 *rule_nr = 35;
2927 } else {
d0180171 2928 drbd_info(device, "was SyncTarget (failed to write sync_uuid)\n");
b411b363
PR
2929 *rule_nr = 37;
2930 }
2931
2932 return -1;
2933 }
2934
2935 /* Common power [off|failure] */
b30ab791
AG
2936 rct = (test_bit(CRASHED_PRIMARY, &device->flags) ? 1 : 0) +
2937 (device->p_uuid[UI_FLAGS] & 2);
b411b363
PR
2938 /* lowest bit is set when we were primary,
2939 * next bit (weight 2) is set when peer was primary */
2940 *rule_nr = 40;
2941
2942 switch (rct) {
2943 case 0: /* !self_pri && !peer_pri */ return 0;
2944 case 1: /* self_pri && !peer_pri */ return 1;
2945 case 2: /* !self_pri && peer_pri */ return -1;
2946 case 3: /* self_pri && peer_pri */
44a4d551 2947 dc = test_bit(RESOLVE_CONFLICTS, &connection->flags);
b411b363
PR
2948 return dc ? -1 : 1;
2949 }
2950 }
2951
2952 *rule_nr = 50;
b30ab791 2953 peer = device->p_uuid[UI_BITMAP] & ~((u64)1);
b411b363
PR
2954 if (self == peer)
2955 return -1;
2956
2957 *rule_nr = 51;
b30ab791 2958 peer = device->p_uuid[UI_HISTORY_START] & ~((u64)1);
b411b363 2959 if (self == peer) {
44a4d551 2960 if (connection->agreed_pro_version < 96 ?
b30ab791
AG
2961 (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) ==
2962 (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) :
2963 peer + UUID_NEW_BM_OFFSET == (device->p_uuid[UI_BITMAP] & ~((u64)1))) {
b411b363
PR
2964 /* The last P_SYNC_UUID did not get though. Undo the last start of
2965 resync as sync source modifications of the peer's UUIDs. */
2966
44a4d551 2967 if (connection->agreed_pro_version < 91)
4a23f264 2968 return -1091;
b411b363 2969
b30ab791
AG
2970 device->p_uuid[UI_BITMAP] = device->p_uuid[UI_HISTORY_START];
2971 device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_HISTORY_START + 1];
4a23f264 2972
d0180171 2973 drbd_info(device, "Lost last syncUUID packet, corrected:\n");
b30ab791 2974 drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
4a23f264 2975
b411b363
PR
2976 return -1;
2977 }
2978 }
2979
2980 *rule_nr = 60;
b30ab791 2981 self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
b411b363 2982 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
b30ab791 2983 peer = device->p_uuid[i] & ~((u64)1);
b411b363
PR
2984 if (self == peer)
2985 return -2;
2986 }
2987
2988 *rule_nr = 70;
b30ab791
AG
2989 self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
2990 peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
b411b363
PR
2991 if (self == peer)
2992 return 1;
2993
2994 *rule_nr = 71;
b30ab791 2995 self = device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1);
b411b363 2996 if (self == peer) {
44a4d551 2997 if (connection->agreed_pro_version < 96 ?
b30ab791
AG
2998 (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) ==
2999 (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) :
3000 self + UUID_NEW_BM_OFFSET == (device->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) {
b411b363
PR
3001 /* The last P_SYNC_UUID did not get though. Undo the last start of
3002 resync as sync source modifications of our UUIDs. */
3003
44a4d551 3004 if (connection->agreed_pro_version < 91)
4a23f264 3005 return -1091;
b411b363 3006
b30ab791
AG
3007 __drbd_uuid_set(device, UI_BITMAP, device->ldev->md.uuid[UI_HISTORY_START]);
3008 __drbd_uuid_set(device, UI_HISTORY_START, device->ldev->md.uuid[UI_HISTORY_START + 1]);
b411b363 3009
d0180171 3010 drbd_info(device, "Last syncUUID did not get through, corrected:\n");
b30ab791
AG
3011 drbd_uuid_dump(device, "self", device->ldev->md.uuid,
3012 device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0);
b411b363
PR
3013
3014 return 1;
3015 }
3016 }
3017
3018
3019 *rule_nr = 80;
b30ab791 3020 peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
b411b363 3021 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
b30ab791 3022 self = device->ldev->md.uuid[i] & ~((u64)1);
b411b363
PR
3023 if (self == peer)
3024 return 2;
3025 }
3026
3027 *rule_nr = 90;
b30ab791
AG
3028 self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
3029 peer = device->p_uuid[UI_BITMAP] & ~((u64)1);
b411b363
PR
3030 if (self == peer && self != ((u64)0))
3031 return 100;
3032
3033 *rule_nr = 100;
3034 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
b30ab791 3035 self = device->ldev->md.uuid[i] & ~((u64)1);
b411b363 3036 for (j = UI_HISTORY_START; j <= UI_HISTORY_END; j++) {
b30ab791 3037 peer = device->p_uuid[j] & ~((u64)1);
b411b363
PR
3038 if (self == peer)
3039 return -100;
3040 }
3041 }
3042
3043 return -1000;
3044}
3045
3046/* drbd_sync_handshake() returns the new conn state on success, or
3047 CONN_MASK (-1) on failure.
3048 */
69a22773
AG
3049static enum drbd_conns drbd_sync_handshake(struct drbd_peer_device *peer_device,
3050 enum drbd_role peer_role,
b411b363
PR
3051 enum drbd_disk_state peer_disk) __must_hold(local)
3052{
69a22773 3053 struct drbd_device *device = peer_device->device;
b411b363
PR
3054 enum drbd_conns rv = C_MASK;
3055 enum drbd_disk_state mydisk;
44ed167d 3056 struct net_conf *nc;
6dff2902 3057 int hg, rule_nr, rr_conflict, tentative;
b411b363 3058
b30ab791 3059 mydisk = device->state.disk;
b411b363 3060 if (mydisk == D_NEGOTIATING)
b30ab791 3061 mydisk = device->new_state_tmp.disk;
b411b363 3062
d0180171 3063 drbd_info(device, "drbd_sync_handshake:\n");
9f2247bb 3064
b30ab791
AG
3065 spin_lock_irq(&device->ldev->md.uuid_lock);
3066 drbd_uuid_dump(device, "self", device->ldev->md.uuid, device->comm_bm_set, 0);
3067 drbd_uuid_dump(device, "peer", device->p_uuid,
3068 device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
b411b363 3069
b30ab791
AG
3070 hg = drbd_uuid_compare(device, &rule_nr);
3071 spin_unlock_irq(&device->ldev->md.uuid_lock);
b411b363 3072
d0180171 3073 drbd_info(device, "uuid_compare()=%d by rule %d\n", hg, rule_nr);
b411b363
PR
3074
3075 if (hg == -1000) {
d0180171 3076 drbd_alert(device, "Unrelated data, aborting!\n");
b411b363
PR
3077 return C_MASK;
3078 }
4a23f264 3079 if (hg < -1000) {
d0180171 3080 drbd_alert(device, "To resolve this both sides have to support at least protocol %d\n", -hg - 1000);
b411b363
PR
3081 return C_MASK;
3082 }
3083
3084 if ((mydisk == D_INCONSISTENT && peer_disk > D_INCONSISTENT) ||
3085 (peer_disk == D_INCONSISTENT && mydisk > D_INCONSISTENT)) {
3086 int f = (hg == -100) || abs(hg) == 2;
3087 hg = mydisk > D_INCONSISTENT ? 1 : -1;
3088 if (f)
3089 hg = hg*2;
d0180171 3090 drbd_info(device, "Becoming sync %s due to disk states.\n",
b411b363
PR
3091 hg > 0 ? "source" : "target");
3092 }
3093
3a11a487 3094 if (abs(hg) == 100)
b30ab791 3095 drbd_khelper(device, "initial-split-brain");
3a11a487 3096
44ed167d 3097 rcu_read_lock();
69a22773 3098 nc = rcu_dereference(peer_device->connection->net_conf);
44ed167d
PR
3099
3100 if (hg == 100 || (hg == -100 && nc->always_asbp)) {
b30ab791 3101 int pcount = (device->state.role == R_PRIMARY)
b411b363
PR
3102 + (peer_role == R_PRIMARY);
3103 int forced = (hg == -100);
3104
3105 switch (pcount) {
3106 case 0:
69a22773 3107 hg = drbd_asb_recover_0p(peer_device);
b411b363
PR
3108 break;
3109 case 1:
69a22773 3110 hg = drbd_asb_recover_1p(peer_device);
b411b363
PR
3111 break;
3112 case 2:
69a22773 3113 hg = drbd_asb_recover_2p(peer_device);
b411b363
PR
3114 break;
3115 }
3116 if (abs(hg) < 100) {
d0180171 3117 drbd_warn(device, "Split-Brain detected, %d primaries, "
b411b363
PR
3118 "automatically solved. Sync from %s node\n",
3119 pcount, (hg < 0) ? "peer" : "this");
3120 if (forced) {
d0180171 3121 drbd_warn(device, "Doing a full sync, since"
b411b363
PR
3122 " UUIDs where ambiguous.\n");
3123 hg = hg*2;
3124 }
3125 }
3126 }
3127
3128 if (hg == -100) {
b30ab791 3129 if (test_bit(DISCARD_MY_DATA, &device->flags) && !(device->p_uuid[UI_FLAGS]&1))
b411b363 3130 hg = -1;
b30ab791 3131 if (!test_bit(DISCARD_MY_DATA, &device->flags) && (device->p_uuid[UI_FLAGS]&1))
b411b363
PR
3132 hg = 1;
3133
3134 if (abs(hg) < 100)
d0180171 3135 drbd_warn(device, "Split-Brain detected, manually solved. "
b411b363
PR
3136 "Sync from %s node\n",
3137 (hg < 0) ? "peer" : "this");
3138 }
44ed167d 3139 rr_conflict = nc->rr_conflict;
6dff2902 3140 tentative = nc->tentative;
44ed167d 3141 rcu_read_unlock();
b411b363
PR
3142
3143 if (hg == -100) {
580b9767
LE
3144 /* FIXME this log message is not correct if we end up here
3145 * after an attempted attach on a diskless node.
3146 * We just refuse to attach -- well, we drop the "connection"
3147 * to that disk, in a way... */
d0180171 3148 drbd_alert(device, "Split-Brain detected but unresolved, dropping connection!\n");
b30ab791 3149 drbd_khelper(device, "split-brain");
b411b363
PR
3150 return C_MASK;
3151 }
3152
3153 if (hg > 0 && mydisk <= D_INCONSISTENT) {
d0180171 3154 drbd_err(device, "I shall become SyncSource, but I am inconsistent!\n");
b411b363
PR
3155 return C_MASK;
3156 }
3157
3158 if (hg < 0 && /* by intention we do not use mydisk here. */
b30ab791 3159 device->state.role == R_PRIMARY && device->state.disk >= D_CONSISTENT) {
44ed167d 3160 switch (rr_conflict) {
b411b363 3161 case ASB_CALL_HELPER:
b30ab791 3162 drbd_khelper(device, "pri-lost");
b411b363
PR
3163 /* fall through */
3164 case ASB_DISCONNECT:
d0180171 3165 drbd_err(device, "I shall become SyncTarget, but I am primary!\n");
b411b363
PR
3166 return C_MASK;
3167 case ASB_VIOLENTLY:
d0180171 3168 drbd_warn(device, "Becoming SyncTarget, violating the stable-data"
b411b363
PR
3169 "assumption\n");
3170 }
3171 }
3172
69a22773 3173 if (tentative || test_bit(CONN_DRY_RUN, &peer_device->connection->flags)) {
cf14c2e9 3174 if (hg == 0)
d0180171 3175 drbd_info(device, "dry-run connect: No resync, would become Connected immediately.\n");
cf14c2e9 3176 else
d0180171 3177 drbd_info(device, "dry-run connect: Would become %s, doing a %s resync.",
cf14c2e9
PR
3178 drbd_conn_str(hg > 0 ? C_SYNC_SOURCE : C_SYNC_TARGET),
3179 abs(hg) >= 2 ? "full" : "bit-map based");
3180 return C_MASK;
3181 }
3182
b411b363 3183 if (abs(hg) >= 2) {
d0180171 3184 drbd_info(device, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n");
b30ab791 3185 if (drbd_bitmap_io(device, &drbd_bmio_set_n_write, "set_n_write from sync_handshake",
20ceb2b2 3186 BM_LOCKED_SET_ALLOWED))
b411b363
PR
3187 return C_MASK;
3188 }
3189
3190 if (hg > 0) { /* become sync source. */
3191 rv = C_WF_BITMAP_S;
3192 } else if (hg < 0) { /* become sync target */
3193 rv = C_WF_BITMAP_T;
3194 } else {
3195 rv = C_CONNECTED;
b30ab791 3196 if (drbd_bm_total_weight(device)) {
d0180171 3197 drbd_info(device, "No resync, but %lu bits in bitmap!\n",
b30ab791 3198 drbd_bm_total_weight(device));
b411b363
PR
3199 }
3200 }
3201
3202 return rv;
3203}
3204
f179d76d 3205static enum drbd_after_sb_p convert_after_sb(enum drbd_after_sb_p peer)
b411b363
PR
3206{
3207 /* ASB_DISCARD_REMOTE - ASB_DISCARD_LOCAL is valid */
f179d76d
PR
3208 if (peer == ASB_DISCARD_REMOTE)
3209 return ASB_DISCARD_LOCAL;
b411b363
PR
3210
3211 /* any other things with ASB_DISCARD_REMOTE or ASB_DISCARD_LOCAL are invalid */
f179d76d
PR
3212 if (peer == ASB_DISCARD_LOCAL)
3213 return ASB_DISCARD_REMOTE;
b411b363
PR
3214
3215 /* everything else is valid if they are equal on both sides. */
f179d76d 3216 return peer;
b411b363
PR
3217}
3218
bde89a9e 3219static int receive_protocol(struct drbd_connection *connection, struct packet_info *pi)
b411b363 3220{
e658983a 3221 struct p_protocol *p = pi->data;
036b17ea
PR
3222 enum drbd_after_sb_p p_after_sb_0p, p_after_sb_1p, p_after_sb_2p;
3223 int p_proto, p_discard_my_data, p_two_primaries, cf;
3224 struct net_conf *nc, *old_net_conf, *new_net_conf = NULL;
3225 char integrity_alg[SHARED_SECRET_MAX] = "";
accdbcc5 3226 struct crypto_hash *peer_integrity_tfm = NULL;
7aca6c75 3227 void *int_dig_in = NULL, *int_dig_vv = NULL;
b411b363 3228
b411b363
PR
3229 p_proto = be32_to_cpu(p->protocol);
3230 p_after_sb_0p = be32_to_cpu(p->after_sb_0p);
3231 p_after_sb_1p = be32_to_cpu(p->after_sb_1p);
3232 p_after_sb_2p = be32_to_cpu(p->after_sb_2p);
b411b363 3233 p_two_primaries = be32_to_cpu(p->two_primaries);
cf14c2e9 3234 cf = be32_to_cpu(p->conn_flags);
6139f60d 3235 p_discard_my_data = cf & CF_DISCARD_MY_DATA;
cf14c2e9 3236
bde89a9e 3237 if (connection->agreed_pro_version >= 87) {
86db0618 3238 int err;
cf14c2e9 3239
88104ca4 3240 if (pi->size > sizeof(integrity_alg))
86db0618 3241 return -EIO;
bde89a9e 3242 err = drbd_recv_all(connection, integrity_alg, pi->size);
86db0618
AG
3243 if (err)
3244 return err;
036b17ea 3245 integrity_alg[SHARED_SECRET_MAX - 1] = 0;
b411b363
PR
3246 }
3247
7d4c782c 3248 if (pi->cmd != P_PROTOCOL_UPDATE) {
bde89a9e 3249 clear_bit(CONN_DRY_RUN, &connection->flags);
b411b363 3250
fbc12f45 3251 if (cf & CF_DRY_RUN)
bde89a9e 3252 set_bit(CONN_DRY_RUN, &connection->flags);
b411b363 3253
fbc12f45 3254 rcu_read_lock();
bde89a9e 3255 nc = rcu_dereference(connection->net_conf);
b411b363 3256
fbc12f45 3257 if (p_proto != nc->wire_protocol) {
1ec861eb 3258 drbd_err(connection, "incompatible %s settings\n", "protocol");
fbc12f45
AG
3259 goto disconnect_rcu_unlock;
3260 }
b411b363 3261
fbc12f45 3262 if (convert_after_sb(p_after_sb_0p) != nc->after_sb_0p) {
1ec861eb 3263 drbd_err(connection, "incompatible %s settings\n", "after-sb-0pri");
fbc12f45
AG
3264 goto disconnect_rcu_unlock;
3265 }
b411b363 3266
fbc12f45 3267 if (convert_after_sb(p_after_sb_1p) != nc->after_sb_1p) {
1ec861eb 3268 drbd_err(connection, "incompatible %s settings\n", "after-sb-1pri");
fbc12f45
AG
3269 goto disconnect_rcu_unlock;
3270 }
b411b363 3271
fbc12f45 3272 if (convert_after_sb(p_after_sb_2p) != nc->after_sb_2p) {
1ec861eb 3273 drbd_err(connection, "incompatible %s settings\n", "after-sb-2pri");
fbc12f45
AG
3274 goto disconnect_rcu_unlock;
3275 }
b411b363 3276
fbc12f45 3277 if (p_discard_my_data && nc->discard_my_data) {
1ec861eb 3278 drbd_err(connection, "incompatible %s settings\n", "discard-my-data");
fbc12f45
AG
3279 goto disconnect_rcu_unlock;
3280 }
b411b363 3281
fbc12f45 3282 if (p_two_primaries != nc->two_primaries) {
1ec861eb 3283 drbd_err(connection, "incompatible %s settings\n", "allow-two-primaries");
fbc12f45
AG
3284 goto disconnect_rcu_unlock;
3285 }
b411b363 3286
fbc12f45 3287 if (strcmp(integrity_alg, nc->integrity_alg)) {
1ec861eb 3288 drbd_err(connection, "incompatible %s settings\n", "data-integrity-alg");
fbc12f45
AG
3289 goto disconnect_rcu_unlock;
3290 }
b411b363 3291
fbc12f45 3292 rcu_read_unlock();
b411b363
PR
3293 }
3294
7d4c782c
AG
3295 if (integrity_alg[0]) {
3296 int hash_size;
3297
3298 /*
3299 * We can only change the peer data integrity algorithm
3300 * here. Changing our own data integrity algorithm
3301 * requires that we send a P_PROTOCOL_UPDATE packet at
3302 * the same time; otherwise, the peer has no way to
3303 * tell between which packets the algorithm should
3304 * change.
3305 */
b411b363 3306
7d4c782c
AG
3307 peer_integrity_tfm = crypto_alloc_hash(integrity_alg, 0, CRYPTO_ALG_ASYNC);
3308 if (!peer_integrity_tfm) {
1ec861eb 3309 drbd_err(connection, "peer data-integrity-alg %s not supported\n",
7d4c782c
AG
3310 integrity_alg);
3311 goto disconnect;
3312 }
b411b363 3313
7d4c782c
AG
3314 hash_size = crypto_hash_digestsize(peer_integrity_tfm);
3315 int_dig_in = kmalloc(hash_size, GFP_KERNEL);
3316 int_dig_vv = kmalloc(hash_size, GFP_KERNEL);
3317 if (!(int_dig_in && int_dig_vv)) {
1ec861eb 3318 drbd_err(connection, "Allocation of buffers for data integrity checking failed\n");
b411b363
PR
3319 goto disconnect;
3320 }
b411b363
PR
3321 }
3322
7d4c782c
AG
3323 new_net_conf = kmalloc(sizeof(struct net_conf), GFP_KERNEL);
3324 if (!new_net_conf) {
1ec861eb 3325 drbd_err(connection, "Allocation of new net_conf failed\n");
7d4c782c
AG
3326 goto disconnect;
3327 }
3328
bde89a9e 3329 mutex_lock(&connection->data.mutex);
0500813f 3330 mutex_lock(&connection->resource->conf_update);
bde89a9e 3331 old_net_conf = connection->net_conf;
7d4c782c
AG
3332 *new_net_conf = *old_net_conf;
3333
3334 new_net_conf->wire_protocol = p_proto;
3335 new_net_conf->after_sb_0p = convert_after_sb(p_after_sb_0p);
3336 new_net_conf->after_sb_1p = convert_after_sb(p_after_sb_1p);
3337 new_net_conf->after_sb_2p = convert_after_sb(p_after_sb_2p);
3338 new_net_conf->two_primaries = p_two_primaries;
3339
bde89a9e 3340 rcu_assign_pointer(connection->net_conf, new_net_conf);
0500813f 3341 mutex_unlock(&connection->resource->conf_update);
bde89a9e 3342 mutex_unlock(&connection->data.mutex);
7d4c782c 3343
bde89a9e
AG
3344 crypto_free_hash(connection->peer_integrity_tfm);
3345 kfree(connection->int_dig_in);
3346 kfree(connection->int_dig_vv);
3347 connection->peer_integrity_tfm = peer_integrity_tfm;
3348 connection->int_dig_in = int_dig_in;
3349 connection->int_dig_vv = int_dig_vv;
7d4c782c
AG
3350
3351 if (strcmp(old_net_conf->integrity_alg, integrity_alg))
1ec861eb 3352 drbd_info(connection, "peer data-integrity-alg: %s\n",
7d4c782c
AG
3353 integrity_alg[0] ? integrity_alg : "(none)");
3354
3355 synchronize_rcu();
3356 kfree(old_net_conf);
82bc0194 3357 return 0;
b411b363 3358
44ed167d
PR
3359disconnect_rcu_unlock:
3360 rcu_read_unlock();
b411b363 3361disconnect:
b792c35c 3362 crypto_free_hash(peer_integrity_tfm);
036b17ea
PR
3363 kfree(int_dig_in);
3364 kfree(int_dig_vv);
bde89a9e 3365 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
82bc0194 3366 return -EIO;
b411b363
PR
3367}
3368
3369/* helper function
3370 * input: alg name, feature name
3371 * return: NULL (alg name was "")
3372 * ERR_PTR(error) if something goes wrong
3373 * or the crypto hash ptr, if it worked out ok. */
8ce953aa 3374static struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_device *device,
b411b363
PR
3375 const char *alg, const char *name)
3376{
3377 struct crypto_hash *tfm;
3378
3379 if (!alg[0])
3380 return NULL;
3381
3382 tfm = crypto_alloc_hash(alg, 0, CRYPTO_ALG_ASYNC);
3383 if (IS_ERR(tfm)) {
d0180171 3384 drbd_err(device, "Can not allocate \"%s\" as %s (reason: %ld)\n",
b411b363
PR
3385 alg, name, PTR_ERR(tfm));
3386 return tfm;
3387 }
b411b363
PR
3388 return tfm;
3389}
3390
bde89a9e 3391static int ignore_remaining_packet(struct drbd_connection *connection, struct packet_info *pi)
4a76b161 3392{
bde89a9e 3393 void *buffer = connection->data.rbuf;
4a76b161
AG
3394 int size = pi->size;
3395
3396 while (size) {
3397 int s = min_t(int, size, DRBD_SOCKET_BUFFER_SIZE);
bde89a9e 3398 s = drbd_recv(connection, buffer, s);
4a76b161
AG
3399 if (s <= 0) {
3400 if (s < 0)
3401 return s;
3402 break;
3403 }
3404 size -= s;
3405 }
3406 if (size)
3407 return -EIO;
3408 return 0;
3409}
3410
3411/*
3412 * config_unknown_volume - device configuration command for unknown volume
3413 *
3414 * When a device is added to an existing connection, the node on which the
3415 * device is added first will send configuration commands to its peer but the
3416 * peer will not know about the device yet. It will warn and ignore these
3417 * commands. Once the device is added on the second node, the second node will
3418 * send the same device configuration commands, but in the other direction.
3419 *
3420 * (We can also end up here if drbd is misconfigured.)
3421 */
bde89a9e 3422static int config_unknown_volume(struct drbd_connection *connection, struct packet_info *pi)
4a76b161 3423{
1ec861eb 3424 drbd_warn(connection, "%s packet received for volume %u, which is not configured locally\n",
2fcb8f30 3425 cmdname(pi->cmd), pi->vnr);
bde89a9e 3426 return ignore_remaining_packet(connection, pi);
4a76b161
AG
3427}
3428
bde89a9e 3429static int receive_SyncParam(struct drbd_connection *connection, struct packet_info *pi)
b411b363 3430{
9f4fe9ad 3431 struct drbd_peer_device *peer_device;
b30ab791 3432 struct drbd_device *device;
e658983a 3433 struct p_rs_param_95 *p;
b411b363
PR
3434 unsigned int header_size, data_size, exp_max_sz;
3435 struct crypto_hash *verify_tfm = NULL;
3436 struct crypto_hash *csums_tfm = NULL;
2ec91e0e 3437 struct net_conf *old_net_conf, *new_net_conf = NULL;
813472ce 3438 struct disk_conf *old_disk_conf = NULL, *new_disk_conf = NULL;
bde89a9e 3439 const int apv = connection->agreed_pro_version;
813472ce 3440 struct fifo_buffer *old_plan = NULL, *new_plan = NULL;
778f271d 3441 int fifo_size = 0;
82bc0194 3442 int err;
b411b363 3443
9f4fe9ad
AG
3444 peer_device = conn_peer_device(connection, pi->vnr);
3445 if (!peer_device)
bde89a9e 3446 return config_unknown_volume(connection, pi);
9f4fe9ad 3447 device = peer_device->device;
b411b363
PR
3448
3449 exp_max_sz = apv <= 87 ? sizeof(struct p_rs_param)
3450 : apv == 88 ? sizeof(struct p_rs_param)
3451 + SHARED_SECRET_MAX
8e26f9cc
PR
3452 : apv <= 94 ? sizeof(struct p_rs_param_89)
3453 : /* apv >= 95 */ sizeof(struct p_rs_param_95);
b411b363 3454
e2857216 3455 if (pi->size > exp_max_sz) {
d0180171 3456 drbd_err(device, "SyncParam packet too long: received %u, expected <= %u bytes\n",
e2857216 3457 pi->size, exp_max_sz);
82bc0194 3458 return -EIO;
b411b363
PR
3459 }
3460
3461 if (apv <= 88) {
e658983a 3462 header_size = sizeof(struct p_rs_param);
e2857216 3463 data_size = pi->size - header_size;
8e26f9cc 3464 } else if (apv <= 94) {
e658983a 3465 header_size = sizeof(struct p_rs_param_89);
e2857216 3466 data_size = pi->size - header_size;
0b0ba1ef 3467 D_ASSERT(device, data_size == 0);
8e26f9cc 3468 } else {
e658983a 3469 header_size = sizeof(struct p_rs_param_95);
e2857216 3470 data_size = pi->size - header_size;
0b0ba1ef 3471 D_ASSERT(device, data_size == 0);
b411b363
PR
3472 }
3473
3474 /* initialize verify_alg and csums_alg */
e658983a 3475 p = pi->data;
b411b363
PR
3476 memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
3477
9f4fe9ad 3478 err = drbd_recv_all(peer_device->connection, p, header_size);
82bc0194
AG
3479 if (err)
3480 return err;
b411b363 3481
0500813f 3482 mutex_lock(&connection->resource->conf_update);
9f4fe9ad 3483 old_net_conf = peer_device->connection->net_conf;
b30ab791 3484 if (get_ldev(device)) {
813472ce
PR
3485 new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
3486 if (!new_disk_conf) {
b30ab791 3487 put_ldev(device);
0500813f 3488 mutex_unlock(&connection->resource->conf_update);
d0180171 3489 drbd_err(device, "Allocation of new disk_conf failed\n");
813472ce
PR
3490 return -ENOMEM;
3491 }
daeda1cc 3492
b30ab791 3493 old_disk_conf = device->ldev->disk_conf;
813472ce 3494 *new_disk_conf = *old_disk_conf;
b411b363 3495
6394b935 3496 new_disk_conf->resync_rate = be32_to_cpu(p->resync_rate);
813472ce 3497 }
b411b363
PR
3498
3499 if (apv >= 88) {
3500 if (apv == 88) {
5de73827 3501 if (data_size > SHARED_SECRET_MAX || data_size == 0) {
d0180171 3502 drbd_err(device, "verify-alg of wrong size, "
5de73827
PR
3503 "peer wants %u, accepting only up to %u byte\n",
3504 data_size, SHARED_SECRET_MAX);
813472ce
PR
3505 err = -EIO;
3506 goto reconnect;
b411b363
PR
3507 }
3508
9f4fe9ad 3509 err = drbd_recv_all(peer_device->connection, p->verify_alg, data_size);
813472ce
PR
3510 if (err)
3511 goto reconnect;
b411b363
PR
3512 /* we expect NUL terminated string */
3513 /* but just in case someone tries to be evil */
0b0ba1ef 3514 D_ASSERT(device, p->verify_alg[data_size-1] == 0);
b411b363
PR
3515 p->verify_alg[data_size-1] = 0;
3516
3517 } else /* apv >= 89 */ {
3518 /* we still expect NUL terminated strings */
3519 /* but just in case someone tries to be evil */
0b0ba1ef
AG
3520 D_ASSERT(device, p->verify_alg[SHARED_SECRET_MAX-1] == 0);
3521 D_ASSERT(device, p->csums_alg[SHARED_SECRET_MAX-1] == 0);
b411b363
PR
3522 p->verify_alg[SHARED_SECRET_MAX-1] = 0;
3523 p->csums_alg[SHARED_SECRET_MAX-1] = 0;
3524 }
3525
2ec91e0e 3526 if (strcmp(old_net_conf->verify_alg, p->verify_alg)) {
b30ab791 3527 if (device->state.conn == C_WF_REPORT_PARAMS) {
d0180171 3528 drbd_err(device, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n",
2ec91e0e 3529 old_net_conf->verify_alg, p->verify_alg);
b411b363
PR
3530 goto disconnect;
3531 }
b30ab791 3532 verify_tfm = drbd_crypto_alloc_digest_safe(device,
b411b363
PR
3533 p->verify_alg, "verify-alg");
3534 if (IS_ERR(verify_tfm)) {
3535 verify_tfm = NULL;
3536 goto disconnect;
3537 }
3538 }
3539
2ec91e0e 3540 if (apv >= 89 && strcmp(old_net_conf->csums_alg, p->csums_alg)) {
b30ab791 3541 if (device->state.conn == C_WF_REPORT_PARAMS) {
d0180171 3542 drbd_err(device, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n",
2ec91e0e 3543 old_net_conf->csums_alg, p->csums_alg);
b411b363
PR
3544 goto disconnect;
3545 }
b30ab791 3546 csums_tfm = drbd_crypto_alloc_digest_safe(device,
b411b363
PR
3547 p->csums_alg, "csums-alg");
3548 if (IS_ERR(csums_tfm)) {
3549 csums_tfm = NULL;
3550 goto disconnect;
3551 }
3552 }
3553
813472ce 3554 if (apv > 94 && new_disk_conf) {
daeda1cc
PR
3555 new_disk_conf->c_plan_ahead = be32_to_cpu(p->c_plan_ahead);
3556 new_disk_conf->c_delay_target = be32_to_cpu(p->c_delay_target);
3557 new_disk_conf->c_fill_target = be32_to_cpu(p->c_fill_target);
3558 new_disk_conf->c_max_rate = be32_to_cpu(p->c_max_rate);
778f271d 3559
daeda1cc 3560 fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ;
b30ab791 3561 if (fifo_size != device->rs_plan_s->size) {
813472ce
PR
3562 new_plan = fifo_alloc(fifo_size);
3563 if (!new_plan) {
d0180171 3564 drbd_err(device, "kmalloc of fifo_buffer failed");
b30ab791 3565 put_ldev(device);
778f271d
PR
3566 goto disconnect;
3567 }
3568 }
8e26f9cc 3569 }
b411b363 3570
91fd4dad 3571 if (verify_tfm || csums_tfm) {
2ec91e0e
PR
3572 new_net_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL);
3573 if (!new_net_conf) {
d0180171 3574 drbd_err(device, "Allocation of new net_conf failed\n");
91fd4dad
PR
3575 goto disconnect;
3576 }
3577
2ec91e0e 3578 *new_net_conf = *old_net_conf;
91fd4dad
PR
3579
3580 if (verify_tfm) {
2ec91e0e
PR
3581 strcpy(new_net_conf->verify_alg, p->verify_alg);
3582 new_net_conf->verify_alg_len = strlen(p->verify_alg) + 1;
9f4fe9ad
AG
3583 crypto_free_hash(peer_device->connection->verify_tfm);
3584 peer_device->connection->verify_tfm = verify_tfm;
d0180171 3585 drbd_info(device, "using verify-alg: \"%s\"\n", p->verify_alg);
91fd4dad
PR
3586 }
3587 if (csums_tfm) {
2ec91e0e
PR
3588 strcpy(new_net_conf->csums_alg, p->csums_alg);
3589 new_net_conf->csums_alg_len = strlen(p->csums_alg) + 1;
9f4fe9ad
AG
3590 crypto_free_hash(peer_device->connection->csums_tfm);
3591 peer_device->connection->csums_tfm = csums_tfm;
d0180171 3592 drbd_info(device, "using csums-alg: \"%s\"\n", p->csums_alg);
91fd4dad 3593 }
bde89a9e 3594 rcu_assign_pointer(connection->net_conf, new_net_conf);
778f271d 3595 }
b411b363
PR
3596 }
3597
813472ce 3598 if (new_disk_conf) {
b30ab791
AG
3599 rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
3600 put_ldev(device);
813472ce
PR
3601 }
3602
3603 if (new_plan) {
b30ab791
AG
3604 old_plan = device->rs_plan_s;
3605 rcu_assign_pointer(device->rs_plan_s, new_plan);
b411b363 3606 }
daeda1cc 3607
0500813f 3608 mutex_unlock(&connection->resource->conf_update);
daeda1cc
PR
3609 synchronize_rcu();
3610 if (new_net_conf)
3611 kfree(old_net_conf);
3612 kfree(old_disk_conf);
813472ce 3613 kfree(old_plan);
daeda1cc 3614
82bc0194 3615 return 0;
b411b363 3616
813472ce
PR
3617reconnect:
3618 if (new_disk_conf) {
b30ab791 3619 put_ldev(device);
813472ce
PR
3620 kfree(new_disk_conf);
3621 }
0500813f 3622 mutex_unlock(&connection->resource->conf_update);
813472ce
PR
3623 return -EIO;
3624
b411b363 3625disconnect:
813472ce
PR
3626 kfree(new_plan);
3627 if (new_disk_conf) {
b30ab791 3628 put_ldev(device);
813472ce
PR
3629 kfree(new_disk_conf);
3630 }
0500813f 3631 mutex_unlock(&connection->resource->conf_update);
b411b363
PR
3632 /* just for completeness: actually not needed,
3633 * as this is not reached if csums_tfm was ok. */
3634 crypto_free_hash(csums_tfm);
3635 /* but free the verify_tfm again, if csums_tfm did not work out */
3636 crypto_free_hash(verify_tfm);
9f4fe9ad 3637 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
82bc0194 3638 return -EIO;
b411b363
PR
3639}
3640
b411b363 3641/* warn if the arguments differ by more than 12.5% */
b30ab791 3642static void warn_if_differ_considerably(struct drbd_device *device,
b411b363
PR
3643 const char *s, sector_t a, sector_t b)
3644{
3645 sector_t d;
3646 if (a == 0 || b == 0)
3647 return;
3648 d = (a > b) ? (a - b) : (b - a);
3649 if (d > (a>>3) || d > (b>>3))
d0180171 3650 drbd_warn(device, "Considerable difference in %s: %llus vs. %llus\n", s,
b411b363
PR
3651 (unsigned long long)a, (unsigned long long)b);
3652}
3653
bde89a9e 3654static int receive_sizes(struct drbd_connection *connection, struct packet_info *pi)
b411b363 3655{
9f4fe9ad 3656 struct drbd_peer_device *peer_device;
b30ab791 3657 struct drbd_device *device;
e658983a 3658 struct p_sizes *p = pi->data;
e96c9633 3659 enum determine_dev_size dd = DS_UNCHANGED;
6a8d68b1 3660 sector_t p_size, p_usize, p_csize, my_usize;
b411b363 3661 int ldsc = 0; /* local disk size changed */
e89b591c 3662 enum dds_flags ddsf;
b411b363 3663
9f4fe9ad
AG
3664 peer_device = conn_peer_device(connection, pi->vnr);
3665 if (!peer_device)
bde89a9e 3666 return config_unknown_volume(connection, pi);
9f4fe9ad 3667 device = peer_device->device;
4a76b161 3668
b411b363
PR
3669 p_size = be64_to_cpu(p->d_size);
3670 p_usize = be64_to_cpu(p->u_size);
6a8d68b1 3671 p_csize = be64_to_cpu(p->c_size);
b411b363 3672
b411b363
PR
3673 /* just store the peer's disk size for now.
3674 * we still need to figure out whether we accept that. */
b30ab791 3675 device->p_size = p_size;
b411b363 3676
b30ab791 3677 if (get_ldev(device)) {
daeda1cc 3678 rcu_read_lock();
b30ab791 3679 my_usize = rcu_dereference(device->ldev->disk_conf)->disk_size;
daeda1cc
PR
3680 rcu_read_unlock();
3681
b30ab791
AG
3682 warn_if_differ_considerably(device, "lower level device sizes",
3683 p_size, drbd_get_max_capacity(device->ldev));
3684 warn_if_differ_considerably(device, "user requested size",
daeda1cc 3685 p_usize, my_usize);
b411b363
PR
3686
3687 /* if this is the first connect, or an otherwise expected
3688 * param exchange, choose the minimum */
b30ab791 3689 if (device->state.conn == C_WF_REPORT_PARAMS)
daeda1cc 3690 p_usize = min_not_zero(my_usize, p_usize);
b411b363
PR
3691
3692 /* Never shrink a device with usable data during connect.
3693 But allow online shrinking if we are connected. */
b30ab791
AG
3694 if (drbd_new_dev_size(device, device->ldev, p_usize, 0) <
3695 drbd_get_capacity(device->this_bdev) &&
3696 device->state.disk >= D_OUTDATED &&
3697 device->state.conn < C_CONNECTED) {
d0180171 3698 drbd_err(device, "The peer's disk size is too small!\n");
9f4fe9ad 3699 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
b30ab791 3700 put_ldev(device);
82bc0194 3701 return -EIO;
b411b363 3702 }
daeda1cc
PR
3703
3704 if (my_usize != p_usize) {
3705 struct disk_conf *old_disk_conf, *new_disk_conf = NULL;
3706
3707 new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
3708 if (!new_disk_conf) {
d0180171 3709 drbd_err(device, "Allocation of new disk_conf failed\n");
b30ab791 3710 put_ldev(device);
daeda1cc
PR
3711 return -ENOMEM;
3712 }
3713
0500813f 3714 mutex_lock(&connection->resource->conf_update);
b30ab791 3715 old_disk_conf = device->ldev->disk_conf;
daeda1cc
PR
3716 *new_disk_conf = *old_disk_conf;
3717 new_disk_conf->disk_size = p_usize;
3718
b30ab791 3719 rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
0500813f 3720 mutex_unlock(&connection->resource->conf_update);
daeda1cc
PR
3721 synchronize_rcu();
3722 kfree(old_disk_conf);
3723
d0180171 3724 drbd_info(device, "Peer sets u_size to %lu sectors\n",
daeda1cc 3725 (unsigned long)my_usize);
b411b363 3726 }
daeda1cc 3727
b30ab791 3728 put_ldev(device);
b411b363 3729 }
b411b363 3730
20c68fde 3731 device->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
20c68fde
LE
3732 /* Leave drbd_reconsider_max_bio_size() before drbd_determine_dev_size().
3733 In case we cleared the QUEUE_FLAG_DISCARD from our queue in
3734 drbd_reconsider_max_bio_size(), we can be sure that after
3735 drbd_determine_dev_size() no REQ_DISCARDs are in the queue. */
3736
e89b591c 3737 ddsf = be16_to_cpu(p->dds_flags);
b30ab791 3738 if (get_ldev(device)) {
8fe39aac 3739 drbd_reconsider_max_bio_size(device, device->ldev);
b30ab791
AG
3740 dd = drbd_determine_dev_size(device, ddsf, NULL);
3741 put_ldev(device);
e96c9633 3742 if (dd == DS_ERROR)
82bc0194 3743 return -EIO;
b30ab791 3744 drbd_md_sync(device);
b411b363 3745 } else {
6a8d68b1
LE
3746 /*
3747 * I am diskless, need to accept the peer's *current* size.
3748 * I must NOT accept the peers backing disk size,
3749 * it may have been larger than mine all along...
3750 *
3751 * At this point, the peer knows more about my disk, or at
3752 * least about what we last agreed upon, than myself.
3753 * So if his c_size is less than his d_size, the most likely
3754 * reason is that *my* d_size was smaller last time we checked.
3755 *
3756 * However, if he sends a zero current size,
3757 * take his (user-capped or) backing disk size anyways.
3758 */
8fe39aac 3759 drbd_reconsider_max_bio_size(device, NULL);
6a8d68b1 3760 drbd_set_my_capacity(device, p_csize ?: p_usize ?: p_size);
b411b363
PR
3761 }
3762
b30ab791
AG
3763 if (get_ldev(device)) {
3764 if (device->ldev->known_size != drbd_get_capacity(device->ldev->backing_bdev)) {
3765 device->ldev->known_size = drbd_get_capacity(device->ldev->backing_bdev);
b411b363
PR
3766 ldsc = 1;
3767 }
3768
b30ab791 3769 put_ldev(device);
b411b363
PR
3770 }
3771
b30ab791 3772 if (device->state.conn > C_WF_REPORT_PARAMS) {
b411b363 3773 if (be64_to_cpu(p->c_size) !=
b30ab791 3774 drbd_get_capacity(device->this_bdev) || ldsc) {
b411b363
PR
3775 /* we have different sizes, probably peer
3776 * needs to know my new size... */
69a22773 3777 drbd_send_sizes(peer_device, 0, ddsf);
b411b363 3778 }
b30ab791
AG
3779 if (test_and_clear_bit(RESIZE_PENDING, &device->flags) ||
3780 (dd == DS_GREW && device->state.conn == C_CONNECTED)) {
3781 if (device->state.pdsk >= D_INCONSISTENT &&
3782 device->state.disk >= D_INCONSISTENT) {
e89b591c 3783 if (ddsf & DDSF_NO_RESYNC)
d0180171 3784 drbd_info(device, "Resync of new storage suppressed with --assume-clean\n");
e89b591c 3785 else
b30ab791 3786 resync_after_online_grow(device);
e89b591c 3787 } else
b30ab791 3788 set_bit(RESYNC_AFTER_NEG, &device->flags);
b411b363
PR
3789 }
3790 }
3791
82bc0194 3792 return 0;
b411b363
PR
3793}
3794
bde89a9e 3795static int receive_uuids(struct drbd_connection *connection, struct packet_info *pi)
b411b363 3796{
9f4fe9ad 3797 struct drbd_peer_device *peer_device;
b30ab791 3798 struct drbd_device *device;
e658983a 3799 struct p_uuids *p = pi->data;
b411b363 3800 u64 *p_uuid;
62b0da3a 3801 int i, updated_uuids = 0;
b411b363 3802
9f4fe9ad
AG
3803 peer_device = conn_peer_device(connection, pi->vnr);
3804 if (!peer_device)
bde89a9e 3805 return config_unknown_volume(connection, pi);
9f4fe9ad 3806 device = peer_device->device;
4a76b161 3807
b411b363 3808 p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO);
063eacf8 3809 if (!p_uuid) {
d0180171 3810 drbd_err(device, "kmalloc of p_uuid failed\n");
063eacf8
JW
3811 return false;
3812 }
b411b363
PR
3813
3814 for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++)
3815 p_uuid[i] = be64_to_cpu(p->uuid[i]);
3816
b30ab791
AG
3817 kfree(device->p_uuid);
3818 device->p_uuid = p_uuid;
b411b363 3819
b30ab791
AG
3820 if (device->state.conn < C_CONNECTED &&
3821 device->state.disk < D_INCONSISTENT &&
3822 device->state.role == R_PRIMARY &&
3823 (device->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) {
d0180171 3824 drbd_err(device, "Can only connect to data with current UUID=%016llX\n",
b30ab791 3825 (unsigned long long)device->ed_uuid);
9f4fe9ad 3826 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
82bc0194 3827 return -EIO;
b411b363
PR
3828 }
3829
b30ab791 3830 if (get_ldev(device)) {
b411b363 3831 int skip_initial_sync =
b30ab791 3832 device->state.conn == C_CONNECTED &&
9f4fe9ad 3833 peer_device->connection->agreed_pro_version >= 90 &&
b30ab791 3834 device->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED &&
b411b363
PR
3835 (p_uuid[UI_FLAGS] & 8);
3836 if (skip_initial_sync) {
d0180171 3837 drbd_info(device, "Accepted new current UUID, preparing to skip initial sync\n");
b30ab791 3838 drbd_bitmap_io(device, &drbd_bmio_clear_n_write,
20ceb2b2
LE
3839 "clear_n_write from receive_uuids",
3840 BM_LOCKED_TEST_ALLOWED);
b30ab791
AG
3841 _drbd_uuid_set(device, UI_CURRENT, p_uuid[UI_CURRENT]);
3842 _drbd_uuid_set(device, UI_BITMAP, 0);
3843 _drbd_set_state(_NS2(device, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
b411b363 3844 CS_VERBOSE, NULL);
b30ab791 3845 drbd_md_sync(device);
62b0da3a 3846 updated_uuids = 1;
b411b363 3847 }
b30ab791
AG
3848 put_ldev(device);
3849 } else if (device->state.disk < D_INCONSISTENT &&
3850 device->state.role == R_PRIMARY) {
18a50fa2
PR
3851 /* I am a diskless primary, the peer just created a new current UUID
3852 for me. */
b30ab791 3853 updated_uuids = drbd_set_ed_uuid(device, p_uuid[UI_CURRENT]);
b411b363
PR
3854 }
3855
3856 /* Before we test for the disk state, we should wait until an eventually
3857 ongoing cluster wide state change is finished. That is important if
3858 we are primary and are detaching from our disk. We need to see the
3859 new disk state... */
b30ab791
AG
3860 mutex_lock(device->state_mutex);
3861 mutex_unlock(device->state_mutex);
3862 if (device->state.conn >= C_CONNECTED && device->state.disk < D_INCONSISTENT)
3863 updated_uuids |= drbd_set_ed_uuid(device, p_uuid[UI_CURRENT]);
62b0da3a
LE
3864
3865 if (updated_uuids)
b30ab791 3866 drbd_print_uuids(device, "receiver updated UUIDs to");
b411b363 3867
82bc0194 3868 return 0;
b411b363
PR
3869}
3870
3871/**
3872 * convert_state() - Converts the peer's view of the cluster state to our point of view
3873 * @ps: The state as seen by the peer.
3874 */
3875static union drbd_state convert_state(union drbd_state ps)
3876{
3877 union drbd_state ms;
3878
3879 static enum drbd_conns c_tab[] = {
369bea63 3880 [C_WF_REPORT_PARAMS] = C_WF_REPORT_PARAMS,
b411b363
PR
3881 [C_CONNECTED] = C_CONNECTED,
3882
3883 [C_STARTING_SYNC_S] = C_STARTING_SYNC_T,
3884 [C_STARTING_SYNC_T] = C_STARTING_SYNC_S,
3885 [C_DISCONNECTING] = C_TEAR_DOWN, /* C_NETWORK_FAILURE, */
3886 [C_VERIFY_S] = C_VERIFY_T,
3887 [C_MASK] = C_MASK,
3888 };
3889
3890 ms.i = ps.i;
3891
3892 ms.conn = c_tab[ps.conn];
3893 ms.peer = ps.role;
3894 ms.role = ps.peer;
3895 ms.pdsk = ps.disk;
3896 ms.disk = ps.pdsk;
3897 ms.peer_isp = (ps.aftr_isp | ps.user_isp);
3898
3899 return ms;
3900}
3901
bde89a9e 3902static int receive_req_state(struct drbd_connection *connection, struct packet_info *pi)
b411b363 3903{
9f4fe9ad 3904 struct drbd_peer_device *peer_device;
b30ab791 3905 struct drbd_device *device;
e658983a 3906 struct p_req_state *p = pi->data;
b411b363 3907 union drbd_state mask, val;
bf885f8a 3908 enum drbd_state_rv rv;
b411b363 3909
9f4fe9ad
AG
3910 peer_device = conn_peer_device(connection, pi->vnr);
3911 if (!peer_device)
4a76b161 3912 return -EIO;
9f4fe9ad 3913 device = peer_device->device;
4a76b161 3914
b411b363
PR
3915 mask.i = be32_to_cpu(p->mask);
3916 val.i = be32_to_cpu(p->val);
3917
9f4fe9ad 3918 if (test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags) &&
b30ab791 3919 mutex_is_locked(device->state_mutex)) {
69a22773 3920 drbd_send_sr_reply(peer_device, SS_CONCURRENT_ST_CHG);
82bc0194 3921 return 0;
b411b363
PR
3922 }
3923
3924 mask = convert_state(mask);
3925 val = convert_state(val);
3926
b30ab791 3927 rv = drbd_change_state(device, CS_VERBOSE, mask, val);
69a22773 3928 drbd_send_sr_reply(peer_device, rv);
b411b363 3929
b30ab791 3930 drbd_md_sync(device);
b411b363 3931
82bc0194 3932 return 0;
b411b363
PR
3933}
3934
bde89a9e 3935static int receive_req_conn_state(struct drbd_connection *connection, struct packet_info *pi)
b411b363 3936{
e658983a 3937 struct p_req_state *p = pi->data;
b411b363 3938 union drbd_state mask, val;
bf885f8a 3939 enum drbd_state_rv rv;
b411b363 3940
b411b363
PR
3941 mask.i = be32_to_cpu(p->mask);
3942 val.i = be32_to_cpu(p->val);
3943
bde89a9e
AG
3944 if (test_bit(RESOLVE_CONFLICTS, &connection->flags) &&
3945 mutex_is_locked(&connection->cstate_mutex)) {
3946 conn_send_sr_reply(connection, SS_CONCURRENT_ST_CHG);
82bc0194 3947 return 0;
b411b363
PR
3948 }
3949
3950 mask = convert_state(mask);
3951 val = convert_state(val);
3952
bde89a9e
AG
3953 rv = conn_request_state(connection, mask, val, CS_VERBOSE | CS_LOCAL_ONLY | CS_IGN_OUTD_FAIL);
3954 conn_send_sr_reply(connection, rv);
b411b363 3955
82bc0194 3956 return 0;
b411b363
PR
3957}
3958
bde89a9e 3959static int receive_state(struct drbd_connection *connection, struct packet_info *pi)
b411b363 3960{
9f4fe9ad 3961 struct drbd_peer_device *peer_device;
b30ab791 3962 struct drbd_device *device;
e658983a 3963 struct p_state *p = pi->data;
4ac4aada 3964 union drbd_state os, ns, peer_state;
b411b363 3965 enum drbd_disk_state real_peer_disk;
65d922c3 3966 enum chg_state_flags cs_flags;
b411b363
PR
3967 int rv;
3968
9f4fe9ad
AG
3969 peer_device = conn_peer_device(connection, pi->vnr);
3970 if (!peer_device)
bde89a9e 3971 return config_unknown_volume(connection, pi);
9f4fe9ad 3972 device = peer_device->device;
4a76b161 3973
b411b363
PR
3974 peer_state.i = be32_to_cpu(p->state);
3975
3976 real_peer_disk = peer_state.disk;
3977 if (peer_state.disk == D_NEGOTIATING) {
b30ab791 3978 real_peer_disk = device->p_uuid[UI_FLAGS] & 4 ? D_INCONSISTENT : D_CONSISTENT;
d0180171 3979 drbd_info(device, "real peer disk state = %s\n", drbd_disk_str(real_peer_disk));
b411b363
PR
3980 }
3981
0500813f 3982 spin_lock_irq(&device->resource->req_lock);
b411b363 3983 retry:
b30ab791 3984 os = ns = drbd_read_state(device);
0500813f 3985 spin_unlock_irq(&device->resource->req_lock);
b411b363 3986
545752d5
LE
3987 /* If some other part of the code (asender thread, timeout)
3988 * already decided to close the connection again,
3989 * we must not "re-establish" it here. */
3990 if (os.conn <= C_TEAR_DOWN)
58ffa580 3991 return -ECONNRESET;
545752d5 3992
40424e4a
LE
3993 /* If this is the "end of sync" confirmation, usually the peer disk
3994 * transitions from D_INCONSISTENT to D_UP_TO_DATE. For empty (0 bits
3995 * set) resync started in PausedSyncT, or if the timing of pause-/
3996 * unpause-sync events has been "just right", the peer disk may
3997 * transition from D_CONSISTENT to D_UP_TO_DATE as well.
3998 */
3999 if ((os.pdsk == D_INCONSISTENT || os.pdsk == D_CONSISTENT) &&
4000 real_peer_disk == D_UP_TO_DATE &&
e9ef7bb6
LE
4001 os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) {
4002 /* If we are (becoming) SyncSource, but peer is still in sync
4003 * preparation, ignore its uptodate-ness to avoid flapping, it
4004 * will change to inconsistent once the peer reaches active
4005 * syncing states.
4006 * It may have changed syncer-paused flags, however, so we
4007 * cannot ignore this completely. */
4008 if (peer_state.conn > C_CONNECTED &&
4009 peer_state.conn < C_SYNC_SOURCE)
4010 real_peer_disk = D_INCONSISTENT;
4011
4012 /* if peer_state changes to connected at the same time,
4013 * it explicitly notifies us that it finished resync.
4014 * Maybe we should finish it up, too? */
4015 else if (os.conn >= C_SYNC_SOURCE &&
4016 peer_state.conn == C_CONNECTED) {
b30ab791
AG
4017 if (drbd_bm_total_weight(device) <= device->rs_failed)
4018 drbd_resync_finished(device);
82bc0194 4019 return 0;
e9ef7bb6
LE
4020 }
4021 }
4022
02b91b55
LE
4023 /* explicit verify finished notification, stop sector reached. */
4024 if (os.conn == C_VERIFY_T && os.disk == D_UP_TO_DATE &&
4025 peer_state.conn == C_CONNECTED && real_peer_disk == D_UP_TO_DATE) {
b30ab791
AG
4026 ov_out_of_sync_print(device);
4027 drbd_resync_finished(device);
58ffa580 4028 return 0;
02b91b55
LE
4029 }
4030
e9ef7bb6
LE
4031 /* peer says his disk is inconsistent, while we think it is uptodate,
4032 * and this happens while the peer still thinks we have a sync going on,
4033 * but we think we are already done with the sync.
4034 * We ignore this to avoid flapping pdsk.
4035 * This should not happen, if the peer is a recent version of drbd. */
4036 if (os.pdsk == D_UP_TO_DATE && real_peer_disk == D_INCONSISTENT &&
4037 os.conn == C_CONNECTED && peer_state.conn > C_SYNC_SOURCE)
4038 real_peer_disk = D_UP_TO_DATE;
4039
4ac4aada
LE
4040 if (ns.conn == C_WF_REPORT_PARAMS)
4041 ns.conn = C_CONNECTED;
b411b363 4042
67531718
PR
4043 if (peer_state.conn == C_AHEAD)
4044 ns.conn = C_BEHIND;
4045
b30ab791
AG
4046 if (device->p_uuid && peer_state.disk >= D_NEGOTIATING &&
4047 get_ldev_if_state(device, D_NEGOTIATING)) {
b411b363
PR
4048 int cr; /* consider resync */
4049
4050 /* if we established a new connection */
4ac4aada 4051 cr = (os.conn < C_CONNECTED);
b411b363
PR
4052 /* if we had an established connection
4053 * and one of the nodes newly attaches a disk */
4ac4aada 4054 cr |= (os.conn == C_CONNECTED &&
b411b363 4055 (peer_state.disk == D_NEGOTIATING ||
4ac4aada 4056 os.disk == D_NEGOTIATING));
b411b363
PR
4057 /* if we have both been inconsistent, and the peer has been
4058 * forced to be UpToDate with --overwrite-data */
b30ab791 4059 cr |= test_bit(CONSIDER_RESYNC, &device->flags);
b411b363
PR
4060 /* if we had been plain connected, and the admin requested to
4061 * start a sync by "invalidate" or "invalidate-remote" */
4ac4aada 4062 cr |= (os.conn == C_CONNECTED &&
b411b363
PR
4063 (peer_state.conn >= C_STARTING_SYNC_S &&
4064 peer_state.conn <= C_WF_BITMAP_T));
4065
4066 if (cr)
69a22773 4067 ns.conn = drbd_sync_handshake(peer_device, peer_state.role, real_peer_disk);
b411b363 4068
b30ab791 4069 put_ldev(device);
4ac4aada
LE
4070 if (ns.conn == C_MASK) {
4071 ns.conn = C_CONNECTED;
b30ab791
AG
4072 if (device->state.disk == D_NEGOTIATING) {
4073 drbd_force_state(device, NS(disk, D_FAILED));
b411b363 4074 } else if (peer_state.disk == D_NEGOTIATING) {
d0180171 4075 drbd_err(device, "Disk attach process on the peer node was aborted.\n");
b411b363 4076 peer_state.disk = D_DISKLESS;
580b9767 4077 real_peer_disk = D_DISKLESS;
b411b363 4078 } else {
9f4fe9ad 4079 if (test_and_clear_bit(CONN_DRY_RUN, &peer_device->connection->flags))
82bc0194 4080 return -EIO;
0b0ba1ef 4081 D_ASSERT(device, os.conn == C_WF_REPORT_PARAMS);
9f4fe9ad 4082 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
82bc0194 4083 return -EIO;
b411b363
PR
4084 }
4085 }
4086 }
4087
0500813f 4088 spin_lock_irq(&device->resource->req_lock);
b30ab791 4089 if (os.i != drbd_read_state(device).i)
b411b363 4090 goto retry;
b30ab791 4091 clear_bit(CONSIDER_RESYNC, &device->flags);
b411b363
PR
4092 ns.peer = peer_state.role;
4093 ns.pdsk = real_peer_disk;
4094 ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp);
4ac4aada 4095 if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING)
b30ab791 4096 ns.disk = device->new_state_tmp.disk;
4ac4aada 4097 cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD);
b30ab791
AG
4098 if (ns.pdsk == D_CONSISTENT && drbd_suspended(device) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED &&
4099 test_bit(NEW_CUR_UUID, &device->flags)) {
8554df1c 4100 /* Do not allow tl_restart(RESEND) for a rebooted peer. We can only allow this
481c6f50 4101 for temporal network outages! */
0500813f 4102 spin_unlock_irq(&device->resource->req_lock);
d0180171 4103 drbd_err(device, "Aborting Connect, can not thaw IO with an only Consistent peer\n");
9f4fe9ad 4104 tl_clear(peer_device->connection);
b30ab791
AG
4105 drbd_uuid_new_current(device);
4106 clear_bit(NEW_CUR_UUID, &device->flags);
9f4fe9ad 4107 conn_request_state(peer_device->connection, NS2(conn, C_PROTOCOL_ERROR, susp, 0), CS_HARD);
82bc0194 4108 return -EIO;
481c6f50 4109 }
b30ab791
AG
4110 rv = _drbd_set_state(device, ns, cs_flags, NULL);
4111 ns = drbd_read_state(device);
0500813f 4112 spin_unlock_irq(&device->resource->req_lock);
b411b363
PR
4113
4114 if (rv < SS_SUCCESS) {
9f4fe9ad 4115 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
82bc0194 4116 return -EIO;
b411b363
PR
4117 }
4118
4ac4aada
LE
4119 if (os.conn > C_WF_REPORT_PARAMS) {
4120 if (ns.conn > C_CONNECTED && peer_state.conn <= C_CONNECTED &&
b411b363
PR
4121 peer_state.disk != D_NEGOTIATING ) {
4122 /* we want resync, peer has not yet decided to sync... */
4123 /* Nowadays only used when forcing a node into primary role and
4124 setting its disk to UpToDate with that */
69a22773
AG
4125 drbd_send_uuids(peer_device);
4126 drbd_send_current_state(peer_device);
b411b363
PR
4127 }
4128 }
4129
b30ab791 4130 clear_bit(DISCARD_MY_DATA, &device->flags);
b411b363 4131
b30ab791 4132 drbd_md_sync(device); /* update connected indicator, la_size_sect, ... */
b411b363 4133
82bc0194 4134 return 0;
b411b363
PR
4135}
4136
bde89a9e 4137static int receive_sync_uuid(struct drbd_connection *connection, struct packet_info *pi)
b411b363 4138{
9f4fe9ad 4139 struct drbd_peer_device *peer_device;
b30ab791 4140 struct drbd_device *device;
e658983a 4141 struct p_rs_uuid *p = pi->data;
4a76b161 4142
9f4fe9ad
AG
4143 peer_device = conn_peer_device(connection, pi->vnr);
4144 if (!peer_device)
4a76b161 4145 return -EIO;
9f4fe9ad 4146 device = peer_device->device;
b411b363 4147
b30ab791
AG
4148 wait_event(device->misc_wait,
4149 device->state.conn == C_WF_SYNC_UUID ||
4150 device->state.conn == C_BEHIND ||
4151 device->state.conn < C_CONNECTED ||
4152 device->state.disk < D_NEGOTIATING);
b411b363 4153
0b0ba1ef 4154 /* D_ASSERT(device, device->state.conn == C_WF_SYNC_UUID ); */
b411b363 4155
b411b363
PR
4156 /* Here the _drbd_uuid_ functions are right, current should
4157 _not_ be rotated into the history */
b30ab791
AG
4158 if (get_ldev_if_state(device, D_NEGOTIATING)) {
4159 _drbd_uuid_set(device, UI_CURRENT, be64_to_cpu(p->uuid));
4160 _drbd_uuid_set(device, UI_BITMAP, 0UL);
b411b363 4161
b30ab791
AG
4162 drbd_print_uuids(device, "updated sync uuid");
4163 drbd_start_resync(device, C_SYNC_TARGET);
b411b363 4164
b30ab791 4165 put_ldev(device);
b411b363 4166 } else
d0180171 4167 drbd_err(device, "Ignoring SyncUUID packet!\n");
b411b363 4168
82bc0194 4169 return 0;
b411b363
PR
4170}
4171
2c46407d
AG
4172/**
4173 * receive_bitmap_plain
4174 *
4175 * Return 0 when done, 1 when another iteration is needed, and a negative error
4176 * code upon failure.
4177 */
4178static int
69a22773 4179receive_bitmap_plain(struct drbd_peer_device *peer_device, unsigned int size,
e658983a 4180 unsigned long *p, struct bm_xfer_ctx *c)
b411b363 4181{
50d0b1ad 4182 unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE -
69a22773 4183 drbd_header_size(peer_device->connection);
e658983a 4184 unsigned int num_words = min_t(size_t, data_size / sizeof(*p),
50d0b1ad 4185 c->bm_words - c->word_offset);
e658983a 4186 unsigned int want = num_words * sizeof(*p);
2c46407d 4187 int err;
b411b363 4188
50d0b1ad 4189 if (want != size) {
69a22773 4190 drbd_err(peer_device, "%s:want (%u) != size (%u)\n", __func__, want, size);
2c46407d 4191 return -EIO;
b411b363
PR
4192 }
4193 if (want == 0)
2c46407d 4194 return 0;
69a22773 4195 err = drbd_recv_all(peer_device->connection, p, want);
82bc0194 4196 if (err)
2c46407d 4197 return err;
b411b363 4198
69a22773 4199 drbd_bm_merge_lel(peer_device->device, c->word_offset, num_words, p);
b411b363
PR
4200
4201 c->word_offset += num_words;
4202 c->bit_offset = c->word_offset * BITS_PER_LONG;
4203 if (c->bit_offset > c->bm_bits)
4204 c->bit_offset = c->bm_bits;
4205
2c46407d 4206 return 1;
b411b363
PR
4207}
4208
a02d1240
AG
4209static enum drbd_bitmap_code dcbp_get_code(struct p_compressed_bm *p)
4210{
4211 return (enum drbd_bitmap_code)(p->encoding & 0x0f);
4212}
4213
4214static int dcbp_get_start(struct p_compressed_bm *p)
4215{
4216 return (p->encoding & 0x80) != 0;
4217}
4218
4219static int dcbp_get_pad_bits(struct p_compressed_bm *p)
4220{
4221 return (p->encoding >> 4) & 0x7;
4222}
4223
2c46407d
AG
4224/**
4225 * recv_bm_rle_bits
4226 *
4227 * Return 0 when done, 1 when another iteration is needed, and a negative error
4228 * code upon failure.
4229 */
4230static int
69a22773 4231recv_bm_rle_bits(struct drbd_peer_device *peer_device,
b411b363 4232 struct p_compressed_bm *p,
c6d25cfe
PR
4233 struct bm_xfer_ctx *c,
4234 unsigned int len)
b411b363
PR
4235{
4236 struct bitstream bs;
4237 u64 look_ahead;
4238 u64 rl;
4239 u64 tmp;
4240 unsigned long s = c->bit_offset;
4241 unsigned long e;
a02d1240 4242 int toggle = dcbp_get_start(p);
b411b363
PR
4243 int have;
4244 int bits;
4245
a02d1240 4246 bitstream_init(&bs, p->code, len, dcbp_get_pad_bits(p));
b411b363
PR
4247
4248 bits = bitstream_get_bits(&bs, &look_ahead, 64);
4249 if (bits < 0)
2c46407d 4250 return -EIO;
b411b363
PR
4251
4252 for (have = bits; have > 0; s += rl, toggle = !toggle) {
4253 bits = vli_decode_bits(&rl, look_ahead);
4254 if (bits <= 0)
2c46407d 4255 return -EIO;
b411b363
PR
4256
4257 if (toggle) {
4258 e = s + rl -1;
4259 if (e >= c->bm_bits) {
69a22773 4260 drbd_err(peer_device, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e);
2c46407d 4261 return -EIO;
b411b363 4262 }
69a22773 4263 _drbd_bm_set_bits(peer_device->device, s, e);
b411b363
PR
4264 }
4265
4266 if (have < bits) {
69a22773 4267 drbd_err(peer_device, "bitmap decoding error: h:%d b:%d la:0x%08llx l:%u/%u\n",
b411b363
PR
4268 have, bits, look_ahead,
4269 (unsigned int)(bs.cur.b - p->code),
4270 (unsigned int)bs.buf_len);
2c46407d 4271 return -EIO;
b411b363 4272 }
d2da5b0c
LE
4273 /* if we consumed all 64 bits, assign 0; >> 64 is "undefined"; */
4274 if (likely(bits < 64))
4275 look_ahead >>= bits;
4276 else
4277 look_ahead = 0;
b411b363
PR
4278 have -= bits;
4279
4280 bits = bitstream_get_bits(&bs, &tmp, 64 - have);
4281 if (bits < 0)
2c46407d 4282 return -EIO;
b411b363
PR
4283 look_ahead |= tmp << have;
4284 have += bits;
4285 }
4286
4287 c->bit_offset = s;
4288 bm_xfer_ctx_bit_to_word_offset(c);
4289
2c46407d 4290 return (s != c->bm_bits);
b411b363
PR
4291}
4292
2c46407d
AG
4293/**
4294 * decode_bitmap_c
4295 *
4296 * Return 0 when done, 1 when another iteration is needed, and a negative error
4297 * code upon failure.
4298 */
4299static int
69a22773 4300decode_bitmap_c(struct drbd_peer_device *peer_device,
b411b363 4301 struct p_compressed_bm *p,
c6d25cfe
PR
4302 struct bm_xfer_ctx *c,
4303 unsigned int len)
b411b363 4304{
a02d1240 4305 if (dcbp_get_code(p) == RLE_VLI_Bits)
69a22773 4306 return recv_bm_rle_bits(peer_device, p, c, len - sizeof(*p));
b411b363
PR
4307
4308 /* other variants had been implemented for evaluation,
4309 * but have been dropped as this one turned out to be "best"
4310 * during all our tests. */
4311
69a22773
AG
4312 drbd_err(peer_device, "receive_bitmap_c: unknown encoding %u\n", p->encoding);
4313 conn_request_state(peer_device->connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
2c46407d 4314 return -EIO;
b411b363
PR
4315}
4316
b30ab791 4317void INFO_bm_xfer_stats(struct drbd_device *device,
b411b363
PR
4318 const char *direction, struct bm_xfer_ctx *c)
4319{
4320 /* what would it take to transfer it "plaintext" */
a6b32bc3 4321 unsigned int header_size = drbd_header_size(first_peer_device(device)->connection);
50d0b1ad
AG
4322 unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE - header_size;
4323 unsigned int plain =
4324 header_size * (DIV_ROUND_UP(c->bm_words, data_size) + 1) +
4325 c->bm_words * sizeof(unsigned long);
4326 unsigned int total = c->bytes[0] + c->bytes[1];
4327 unsigned int r;
b411b363
PR
4328
4329 /* total can not be zero. but just in case: */
4330 if (total == 0)
4331 return;
4332
4333 /* don't report if not compressed */
4334 if (total >= plain)
4335 return;
4336
4337 /* total < plain. check for overflow, still */
4338 r = (total > UINT_MAX/1000) ? (total / (plain/1000))
4339 : (1000 * total / plain);
4340
4341 if (r > 1000)
4342 r = 1000;
4343
4344 r = 1000 - r;
d0180171 4345 drbd_info(device, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), "
b411b363
PR
4346 "total %u; compression: %u.%u%%\n",
4347 direction,
4348 c->bytes[1], c->packets[1],
4349 c->bytes[0], c->packets[0],
4350 total, r/10, r % 10);
4351}
4352
4353/* Since we are processing the bitfield from lower addresses to higher,
4354 it does not matter if the process it in 32 bit chunks or 64 bit
4355 chunks as long as it is little endian. (Understand it as byte stream,
4356 beginning with the lowest byte...) If we would use big endian
4357 we would need to process it from the highest address to the lowest,
4358 in order to be agnostic to the 32 vs 64 bits issue.
4359
4360 returns 0 on failure, 1 if we successfully received it. */
bde89a9e 4361static int receive_bitmap(struct drbd_connection *connection, struct packet_info *pi)
b411b363 4362{
9f4fe9ad 4363 struct drbd_peer_device *peer_device;
b30ab791 4364 struct drbd_device *device;
b411b363 4365 struct bm_xfer_ctx c;
2c46407d 4366 int err;
4a76b161 4367
9f4fe9ad
AG
4368 peer_device = conn_peer_device(connection, pi->vnr);
4369 if (!peer_device)
4a76b161 4370 return -EIO;
9f4fe9ad 4371 device = peer_device->device;
b411b363 4372
b30ab791 4373 drbd_bm_lock(device, "receive bitmap", BM_LOCKED_SET_ALLOWED);
20ceb2b2
LE
4374 /* you are supposed to send additional out-of-sync information
4375 * if you actually set bits during this phase */
b411b363 4376
b411b363 4377 c = (struct bm_xfer_ctx) {
b30ab791
AG
4378 .bm_bits = drbd_bm_bits(device),
4379 .bm_words = drbd_bm_words(device),
b411b363
PR
4380 };
4381
2c46407d 4382 for(;;) {
e658983a 4383 if (pi->cmd == P_BITMAP)
69a22773 4384 err = receive_bitmap_plain(peer_device, pi->size, pi->data, &c);
e658983a 4385 else if (pi->cmd == P_COMPRESSED_BITMAP) {
b411b363
PR
4386 /* MAYBE: sanity check that we speak proto >= 90,
4387 * and the feature is enabled! */
e658983a 4388 struct p_compressed_bm *p = pi->data;
b411b363 4389
bde89a9e 4390 if (pi->size > DRBD_SOCKET_BUFFER_SIZE - drbd_header_size(connection)) {
d0180171 4391 drbd_err(device, "ReportCBitmap packet too large\n");
82bc0194 4392 err = -EIO;
b411b363
PR
4393 goto out;
4394 }
e658983a 4395 if (pi->size <= sizeof(*p)) {
d0180171 4396 drbd_err(device, "ReportCBitmap packet too small (l:%u)\n", pi->size);
82bc0194 4397 err = -EIO;
78fcbdae 4398 goto out;
b411b363 4399 }
9f4fe9ad 4400 err = drbd_recv_all(peer_device->connection, p, pi->size);
e658983a
AG
4401 if (err)
4402 goto out;
69a22773 4403 err = decode_bitmap_c(peer_device, p, &c, pi->size);
b411b363 4404 } else {
d0180171 4405 drbd_warn(device, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", pi->cmd);
82bc0194 4406 err = -EIO;
b411b363
PR
4407 goto out;
4408 }
4409
e2857216 4410 c.packets[pi->cmd == P_BITMAP]++;
bde89a9e 4411 c.bytes[pi->cmd == P_BITMAP] += drbd_header_size(connection) + pi->size;
b411b363 4412
2c46407d
AG
4413 if (err <= 0) {
4414 if (err < 0)
4415 goto out;
b411b363 4416 break;
2c46407d 4417 }
9f4fe9ad 4418 err = drbd_recv_header(peer_device->connection, pi);
82bc0194 4419 if (err)
b411b363 4420 goto out;
2c46407d 4421 }
b411b363 4422
b30ab791 4423 INFO_bm_xfer_stats(device, "receive", &c);
b411b363 4424
b30ab791 4425 if (device->state.conn == C_WF_BITMAP_T) {
de1f8e4a
AG
4426 enum drbd_state_rv rv;
4427
b30ab791 4428 err = drbd_send_bitmap(device);
82bc0194 4429 if (err)
b411b363
PR
4430 goto out;
4431 /* Omit CS_ORDERED with this state transition to avoid deadlocks. */
b30ab791 4432 rv = _drbd_request_state(device, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
0b0ba1ef 4433 D_ASSERT(device, rv == SS_SUCCESS);
b30ab791 4434 } else if (device->state.conn != C_WF_BITMAP_S) {
b411b363
PR
4435 /* admin may have requested C_DISCONNECTING,
4436 * other threads may have noticed network errors */
d0180171 4437 drbd_info(device, "unexpected cstate (%s) in receive_bitmap\n",
b30ab791 4438 drbd_conn_str(device->state.conn));
b411b363 4439 }
82bc0194 4440 err = 0;
b411b363 4441
b411b363 4442 out:
b30ab791
AG
4443 drbd_bm_unlock(device);
4444 if (!err && device->state.conn == C_WF_BITMAP_S)
4445 drbd_start_resync(device, C_SYNC_SOURCE);
82bc0194 4446 return err;
b411b363
PR
4447}
4448
bde89a9e 4449static int receive_skip(struct drbd_connection *connection, struct packet_info *pi)
b411b363 4450{
1ec861eb 4451 drbd_warn(connection, "skipping unknown optional packet type %d, l: %d!\n",
e2857216 4452 pi->cmd, pi->size);
b411b363 4453
bde89a9e 4454 return ignore_remaining_packet(connection, pi);
b411b363
PR
4455}
4456
bde89a9e 4457static int receive_UnplugRemote(struct drbd_connection *connection, struct packet_info *pi)
0ced55a3 4458{
e7f52dfb
LE
4459 /* Make sure we've acked all the TCP data associated
4460 * with the data requests being unplugged */
bde89a9e 4461 drbd_tcp_quickack(connection->data.socket);
0ced55a3 4462
82bc0194 4463 return 0;
0ced55a3
PR
4464}
4465
bde89a9e 4466static int receive_out_of_sync(struct drbd_connection *connection, struct packet_info *pi)
73a01a18 4467{
9f4fe9ad 4468 struct drbd_peer_device *peer_device;
b30ab791 4469 struct drbd_device *device;
e658983a 4470 struct p_block_desc *p = pi->data;
4a76b161 4471
9f4fe9ad
AG
4472 peer_device = conn_peer_device(connection, pi->vnr);
4473 if (!peer_device)
4a76b161 4474 return -EIO;
9f4fe9ad 4475 device = peer_device->device;
73a01a18 4476
b30ab791 4477 switch (device->state.conn) {
f735e363
LE
4478 case C_WF_SYNC_UUID:
4479 case C_WF_BITMAP_T:
4480 case C_BEHIND:
4481 break;
4482 default:
d0180171 4483 drbd_err(device, "ASSERT FAILED cstate = %s, expected: WFSyncUUID|WFBitMapT|Behind\n",
b30ab791 4484 drbd_conn_str(device->state.conn));
f735e363
LE
4485 }
4486
b30ab791 4487 drbd_set_out_of_sync(device, be64_to_cpu(p->sector), be32_to_cpu(p->blksize));
73a01a18 4488
82bc0194 4489 return 0;
73a01a18
PR
4490}
4491
02918be2
PR
4492struct data_cmd {
4493 int expect_payload;
4494 size_t pkt_size;
bde89a9e 4495 int (*fn)(struct drbd_connection *, struct packet_info *);
02918be2
PR
4496};
4497
4498static struct data_cmd drbd_cmd_handler[] = {
4499 [P_DATA] = { 1, sizeof(struct p_data), receive_Data },
4500 [P_DATA_REPLY] = { 1, sizeof(struct p_data), receive_DataReply },
4501 [P_RS_DATA_REPLY] = { 1, sizeof(struct p_data), receive_RSDataReply } ,
4502 [P_BARRIER] = { 0, sizeof(struct p_barrier), receive_Barrier } ,
e658983a
AG
4503 [P_BITMAP] = { 1, 0, receive_bitmap } ,
4504 [P_COMPRESSED_BITMAP] = { 1, 0, receive_bitmap } ,
4505 [P_UNPLUG_REMOTE] = { 0, 0, receive_UnplugRemote },
02918be2
PR
4506 [P_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
4507 [P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
e658983a
AG
4508 [P_SYNC_PARAM] = { 1, 0, receive_SyncParam },
4509 [P_SYNC_PARAM89] = { 1, 0, receive_SyncParam },
02918be2
PR
4510 [P_PROTOCOL] = { 1, sizeof(struct p_protocol), receive_protocol },
4511 [P_UUIDS] = { 0, sizeof(struct p_uuids), receive_uuids },
4512 [P_SIZES] = { 0, sizeof(struct p_sizes), receive_sizes },
4513 [P_STATE] = { 0, sizeof(struct p_state), receive_state },
4514 [P_STATE_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_state },
4515 [P_SYNC_UUID] = { 0, sizeof(struct p_rs_uuid), receive_sync_uuid },
4516 [P_OV_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
4517 [P_OV_REPLY] = { 1, sizeof(struct p_block_req), receive_DataRequest },
4518 [P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest },
4519 [P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), receive_skip },
73a01a18 4520 [P_OUT_OF_SYNC] = { 0, sizeof(struct p_block_desc), receive_out_of_sync },
4a76b161 4521 [P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_conn_state },
036b17ea 4522 [P_PROTOCOL_UPDATE] = { 1, sizeof(struct p_protocol), receive_protocol },
a0fb3c47 4523 [P_TRIM] = { 0, sizeof(struct p_trim), receive_Data },
b411b363
PR
4524};
4525
bde89a9e 4526static void drbdd(struct drbd_connection *connection)
b411b363 4527{
77351055 4528 struct packet_info pi;
02918be2 4529 size_t shs; /* sub header size */
82bc0194 4530 int err;
b411b363 4531
bde89a9e 4532 while (get_t_state(&connection->receiver) == RUNNING) {
deebe195 4533 struct data_cmd *cmd;
b411b363 4534
bde89a9e
AG
4535 drbd_thread_current_set_cpu(&connection->receiver);
4536 if (drbd_recv_header(connection, &pi))
02918be2 4537 goto err_out;
b411b363 4538
deebe195 4539 cmd = &drbd_cmd_handler[pi.cmd];
4a76b161 4540 if (unlikely(pi.cmd >= ARRAY_SIZE(drbd_cmd_handler) || !cmd->fn)) {
1ec861eb 4541 drbd_err(connection, "Unexpected data packet %s (0x%04x)",
2fcb8f30 4542 cmdname(pi.cmd), pi.cmd);
02918be2 4543 goto err_out;
0b33a916 4544 }
b411b363 4545
e658983a
AG
4546 shs = cmd->pkt_size;
4547 if (pi.size > shs && !cmd->expect_payload) {
1ec861eb 4548 drbd_err(connection, "No payload expected %s l:%d\n",
2fcb8f30 4549 cmdname(pi.cmd), pi.size);
02918be2 4550 goto err_out;
b411b363 4551 }
b411b363 4552
c13f7e1a 4553 if (shs) {
bde89a9e 4554 err = drbd_recv_all_warn(connection, pi.data, shs);
a5c31904 4555 if (err)
c13f7e1a 4556 goto err_out;
e2857216 4557 pi.size -= shs;
c13f7e1a
LE
4558 }
4559
bde89a9e 4560 err = cmd->fn(connection, &pi);
4a76b161 4561 if (err) {
1ec861eb 4562 drbd_err(connection, "error receiving %s, e: %d l: %d!\n",
9f5bdc33 4563 cmdname(pi.cmd), err, pi.size);
02918be2 4564 goto err_out;
b411b363
PR
4565 }
4566 }
82bc0194 4567 return;
b411b363 4568
82bc0194 4569 err_out:
bde89a9e 4570 conn_request_state(connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
b411b363
PR
4571}
4572
bde89a9e 4573static void conn_disconnect(struct drbd_connection *connection)
b411b363 4574{
c06ece6b 4575 struct drbd_peer_device *peer_device;
bbeb641c 4576 enum drbd_conns oc;
376694a0 4577 int vnr;
b411b363 4578
bde89a9e 4579 if (connection->cstate == C_STANDALONE)
b411b363 4580 return;
b411b363 4581
545752d5
LE
4582 /* We are about to start the cleanup after connection loss.
4583 * Make sure drbd_make_request knows about that.
4584 * Usually we should be in some network failure state already,
4585 * but just in case we are not, we fix it up here.
4586 */
bde89a9e 4587 conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
545752d5 4588
b411b363 4589 /* asender does not clean up anything. it must not interfere, either */
bde89a9e
AG
4590 drbd_thread_stop(&connection->asender);
4591 drbd_free_sock(connection);
360cc740 4592
c141ebda 4593 rcu_read_lock();
c06ece6b
AG
4594 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
4595 struct drbd_device *device = peer_device->device;
b30ab791 4596 kref_get(&device->kref);
c141ebda 4597 rcu_read_unlock();
69a22773 4598 drbd_disconnected(peer_device);
c06ece6b 4599 kref_put(&device->kref, drbd_destroy_device);
c141ebda
PR
4600 rcu_read_lock();
4601 }
4602 rcu_read_unlock();
4603
bde89a9e 4604 if (!list_empty(&connection->current_epoch->list))
1ec861eb 4605 drbd_err(connection, "ASSERTION FAILED: connection->current_epoch->list not empty\n");
12038a3a 4606 /* ok, no more ee's on the fly, it is safe to reset the epoch_size */
bde89a9e
AG
4607 atomic_set(&connection->current_epoch->epoch_size, 0);
4608 connection->send.seen_any_write_yet = false;
12038a3a 4609
1ec861eb 4610 drbd_info(connection, "Connection closed\n");
360cc740 4611
bde89a9e
AG
4612 if (conn_highest_role(connection) == R_PRIMARY && conn_highest_pdsk(connection) >= D_UNKNOWN)
4613 conn_try_outdate_peer_async(connection);
cb703454 4614
0500813f 4615 spin_lock_irq(&connection->resource->req_lock);
bde89a9e 4616 oc = connection->cstate;
bbeb641c 4617 if (oc >= C_UNCONNECTED)
bde89a9e 4618 _conn_request_state(connection, NS(conn, C_UNCONNECTED), CS_VERBOSE);
bbeb641c 4619
0500813f 4620 spin_unlock_irq(&connection->resource->req_lock);
360cc740 4621
f3dfa40a 4622 if (oc == C_DISCONNECTING)
bde89a9e 4623 conn_request_state(connection, NS(conn, C_STANDALONE), CS_VERBOSE | CS_HARD);
360cc740
PR
4624}
4625
69a22773 4626static int drbd_disconnected(struct drbd_peer_device *peer_device)
360cc740 4627{
69a22773 4628 struct drbd_device *device = peer_device->device;
360cc740 4629 unsigned int i;
b411b363 4630
85719573 4631 /* wait for current activity to cease. */
0500813f 4632 spin_lock_irq(&device->resource->req_lock);
b30ab791
AG
4633 _drbd_wait_ee_list_empty(device, &device->active_ee);
4634 _drbd_wait_ee_list_empty(device, &device->sync_ee);
4635 _drbd_wait_ee_list_empty(device, &device->read_ee);
0500813f 4636 spin_unlock_irq(&device->resource->req_lock);
b411b363
PR
4637
4638 /* We do not have data structures that would allow us to
4639 * get the rs_pending_cnt down to 0 again.
4640 * * On C_SYNC_TARGET we do not have any data structures describing
4641 * the pending RSDataRequest's we have sent.
4642 * * On C_SYNC_SOURCE there is no data structure that tracks
4643 * the P_RS_DATA_REPLY blocks that we sent to the SyncTarget.
4644 * And no, it is not the sum of the reference counts in the
4645 * resync_LRU. The resync_LRU tracks the whole operation including
4646 * the disk-IO, while the rs_pending_cnt only tracks the blocks
4647 * on the fly. */
b30ab791
AG
4648 drbd_rs_cancel_all(device);
4649 device->rs_total = 0;
4650 device->rs_failed = 0;
4651 atomic_set(&device->rs_pending_cnt, 0);
4652 wake_up(&device->misc_wait);
b411b363 4653
b30ab791
AG
4654 del_timer_sync(&device->resync_timer);
4655 resync_timer_fn((unsigned long)device);
b411b363 4656
b411b363
PR
4657 /* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier,
4658 * w_make_resync_request etc. which may still be on the worker queue
4659 * to be "canceled" */
b5043c5e 4660 drbd_flush_workqueue(&peer_device->connection->sender_work);
b411b363 4661
b30ab791 4662 drbd_finish_peer_reqs(device);
b411b363 4663
d10b4ea3
PR
4664 /* This second workqueue flush is necessary, since drbd_finish_peer_reqs()
4665 might have issued a work again. The one before drbd_finish_peer_reqs() is
4666 necessary to reclain net_ee in drbd_finish_peer_reqs(). */
b5043c5e 4667 drbd_flush_workqueue(&peer_device->connection->sender_work);
d10b4ea3 4668
08332d73
LE
4669 /* need to do it again, drbd_finish_peer_reqs() may have populated it
4670 * again via drbd_try_clear_on_disk_bm(). */
b30ab791 4671 drbd_rs_cancel_all(device);
b411b363 4672
b30ab791
AG
4673 kfree(device->p_uuid);
4674 device->p_uuid = NULL;
b411b363 4675
b30ab791 4676 if (!drbd_suspended(device))
69a22773 4677 tl_clear(peer_device->connection);
b411b363 4678
b30ab791 4679 drbd_md_sync(device);
b411b363 4680
20ceb2b2
LE
4681 /* serialize with bitmap writeout triggered by the state change,
4682 * if any. */
b30ab791 4683 wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags));
20ceb2b2 4684
b411b363
PR
4685 /* tcp_close and release of sendpage pages can be deferred. I don't
4686 * want to use SO_LINGER, because apparently it can be deferred for
4687 * more than 20 seconds (longest time I checked).
4688 *
4689 * Actually we don't care for exactly when the network stack does its
4690 * put_page(), but release our reference on these pages right here.
4691 */
b30ab791 4692 i = drbd_free_peer_reqs(device, &device->net_ee);
b411b363 4693 if (i)
d0180171 4694 drbd_info(device, "net_ee not empty, killed %u entries\n", i);
b30ab791 4695 i = atomic_read(&device->pp_in_use_by_net);
435f0740 4696 if (i)
d0180171 4697 drbd_info(device, "pp_in_use_by_net = %d, expected 0\n", i);
b30ab791 4698 i = atomic_read(&device->pp_in_use);
b411b363 4699 if (i)
d0180171 4700 drbd_info(device, "pp_in_use = %d, expected 0\n", i);
b411b363 4701
0b0ba1ef
AG
4702 D_ASSERT(device, list_empty(&device->read_ee));
4703 D_ASSERT(device, list_empty(&device->active_ee));
4704 D_ASSERT(device, list_empty(&device->sync_ee));
4705 D_ASSERT(device, list_empty(&device->done_ee));
b411b363 4706
360cc740 4707 return 0;
b411b363
PR
4708}
4709
4710/*
4711 * We support PRO_VERSION_MIN to PRO_VERSION_MAX. The protocol version
4712 * we can agree on is stored in agreed_pro_version.
4713 *
4714 * feature flags and the reserved array should be enough room for future
4715 * enhancements of the handshake protocol, and possible plugins...
4716 *
4717 * for now, they are expected to be zero, but ignored.
4718 */
bde89a9e 4719static int drbd_send_features(struct drbd_connection *connection)
b411b363 4720{
9f5bdc33
AG
4721 struct drbd_socket *sock;
4722 struct p_connection_features *p;
b411b363 4723
bde89a9e
AG
4724 sock = &connection->data;
4725 p = conn_prepare_command(connection, sock);
9f5bdc33 4726 if (!p)
e8d17b01 4727 return -EIO;
b411b363
PR
4728 memset(p, 0, sizeof(*p));
4729 p->protocol_min = cpu_to_be32(PRO_VERSION_MIN);
4730 p->protocol_max = cpu_to_be32(PRO_VERSION_MAX);
20c68fde 4731 p->feature_flags = cpu_to_be32(PRO_FEATURES);
bde89a9e 4732 return conn_send_command(connection, sock, P_CONNECTION_FEATURES, sizeof(*p), NULL, 0);
b411b363
PR
4733}
4734
4735/*
4736 * return values:
4737 * 1 yes, we have a valid connection
4738 * 0 oops, did not work out, please try again
4739 * -1 peer talks different language,
4740 * no point in trying again, please go standalone.
4741 */
bde89a9e 4742static int drbd_do_features(struct drbd_connection *connection)
b411b363 4743{
bde89a9e 4744 /* ASSERT current == connection->receiver ... */
e658983a
AG
4745 struct p_connection_features *p;
4746 const int expect = sizeof(struct p_connection_features);
77351055 4747 struct packet_info pi;
a5c31904 4748 int err;
b411b363 4749
bde89a9e 4750 err = drbd_send_features(connection);
e8d17b01 4751 if (err)
b411b363
PR
4752 return 0;
4753
bde89a9e 4754 err = drbd_recv_header(connection, &pi);
69bc7bc3 4755 if (err)
b411b363
PR
4756 return 0;
4757
6038178e 4758 if (pi.cmd != P_CONNECTION_FEATURES) {
1ec861eb 4759 drbd_err(connection, "expected ConnectionFeatures packet, received: %s (0x%04x)\n",
2fcb8f30 4760 cmdname(pi.cmd), pi.cmd);
b411b363
PR
4761 return -1;
4762 }
4763
77351055 4764 if (pi.size != expect) {
1ec861eb 4765 drbd_err(connection, "expected ConnectionFeatures length: %u, received: %u\n",
77351055 4766 expect, pi.size);
b411b363
PR
4767 return -1;
4768 }
4769
e658983a 4770 p = pi.data;
bde89a9e 4771 err = drbd_recv_all_warn(connection, p, expect);
a5c31904 4772 if (err)
b411b363 4773 return 0;
b411b363 4774
b411b363
PR
4775 p->protocol_min = be32_to_cpu(p->protocol_min);
4776 p->protocol_max = be32_to_cpu(p->protocol_max);
4777 if (p->protocol_max == 0)
4778 p->protocol_max = p->protocol_min;
4779
4780 if (PRO_VERSION_MAX < p->protocol_min ||
4781 PRO_VERSION_MIN > p->protocol_max)
4782 goto incompat;
4783
bde89a9e 4784 connection->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max);
20c68fde 4785 connection->agreed_features = PRO_FEATURES & be32_to_cpu(p->feature_flags);
b411b363 4786
1ec861eb 4787 drbd_info(connection, "Handshake successful: "
bde89a9e 4788 "Agreed network protocol version %d\n", connection->agreed_pro_version);
b411b363 4789
20c68fde
LE
4790 drbd_info(connection, "Agreed to%ssupport TRIM on protocol level\n",
4791 connection->agreed_features & FF_TRIM ? " " : " not ");
4792
b411b363
PR
4793 return 1;
4794
4795 incompat:
1ec861eb 4796 drbd_err(connection, "incompatible DRBD dialects: "
b411b363
PR
4797 "I support %d-%d, peer supports %d-%d\n",
4798 PRO_VERSION_MIN, PRO_VERSION_MAX,
4799 p->protocol_min, p->protocol_max);
4800 return -1;
4801}
4802
4803#if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE)
bde89a9e 4804static int drbd_do_auth(struct drbd_connection *connection)
b411b363 4805{
1ec861eb
AG
4806 drbd_err(connection, "This kernel was build without CONFIG_CRYPTO_HMAC.\n");
4807 drbd_err(connection, "You need to disable 'cram-hmac-alg' in drbd.conf.\n");
b10d96cb 4808 return -1;
b411b363
PR
4809}
4810#else
4811#define CHALLENGE_LEN 64
b10d96cb
JT
4812
4813/* Return value:
4814 1 - auth succeeded,
4815 0 - failed, try again (network error),
4816 -1 - auth failed, don't try again.
4817*/
4818
bde89a9e 4819static int drbd_do_auth(struct drbd_connection *connection)
b411b363 4820{
9f5bdc33 4821 struct drbd_socket *sock;
b411b363
PR
4822 char my_challenge[CHALLENGE_LEN]; /* 64 Bytes... */
4823 struct scatterlist sg;
4824 char *response = NULL;
4825 char *right_response = NULL;
4826 char *peers_ch = NULL;
44ed167d
PR
4827 unsigned int key_len;
4828 char secret[SHARED_SECRET_MAX]; /* 64 byte */
b411b363
PR
4829 unsigned int resp_size;
4830 struct hash_desc desc;
77351055 4831 struct packet_info pi;
44ed167d 4832 struct net_conf *nc;
69bc7bc3 4833 int err, rv;
b411b363 4834
9f5bdc33 4835 /* FIXME: Put the challenge/response into the preallocated socket buffer. */
b411b363 4836
44ed167d 4837 rcu_read_lock();
bde89a9e 4838 nc = rcu_dereference(connection->net_conf);
44ed167d
PR
4839 key_len = strlen(nc->shared_secret);
4840 memcpy(secret, nc->shared_secret, key_len);
4841 rcu_read_unlock();
4842
bde89a9e 4843 desc.tfm = connection->cram_hmac_tfm;
b411b363
PR
4844 desc.flags = 0;
4845
bde89a9e 4846 rv = crypto_hash_setkey(connection->cram_hmac_tfm, (u8 *)secret, key_len);
b411b363 4847 if (rv) {
1ec861eb 4848 drbd_err(connection, "crypto_hash_setkey() failed with %d\n", rv);
b10d96cb 4849 rv = -1;
b411b363
PR
4850 goto fail;
4851 }
4852
4853 get_random_bytes(my_challenge, CHALLENGE_LEN);
4854
bde89a9e
AG
4855 sock = &connection->data;
4856 if (!conn_prepare_command(connection, sock)) {
9f5bdc33
AG
4857 rv = 0;
4858 goto fail;
4859 }
bde89a9e 4860 rv = !conn_send_command(connection, sock, P_AUTH_CHALLENGE, 0,
9f5bdc33 4861 my_challenge, CHALLENGE_LEN);
b411b363
PR
4862 if (!rv)
4863 goto fail;
4864
bde89a9e 4865 err = drbd_recv_header(connection, &pi);
69bc7bc3
AG
4866 if (err) {
4867 rv = 0;
b411b363 4868 goto fail;
69bc7bc3 4869 }
b411b363 4870
77351055 4871 if (pi.cmd != P_AUTH_CHALLENGE) {
1ec861eb 4872 drbd_err(connection, "expected AuthChallenge packet, received: %s (0x%04x)\n",
2fcb8f30 4873 cmdname(pi.cmd), pi.cmd);
b411b363
PR
4874 rv = 0;
4875 goto fail;
4876 }
4877
77351055 4878 if (pi.size > CHALLENGE_LEN * 2) {
1ec861eb 4879 drbd_err(connection, "expected AuthChallenge payload too big.\n");
b10d96cb 4880 rv = -1;
b411b363
PR
4881 goto fail;
4882 }
4883
67cca286
PR
4884 if (pi.size < CHALLENGE_LEN) {
4885 drbd_err(connection, "AuthChallenge payload too small.\n");
4886 rv = -1;
4887 goto fail;
4888 }
4889
77351055 4890 peers_ch = kmalloc(pi.size, GFP_NOIO);
b411b363 4891 if (peers_ch == NULL) {
1ec861eb 4892 drbd_err(connection, "kmalloc of peers_ch failed\n");
b10d96cb 4893 rv = -1;
b411b363
PR
4894 goto fail;
4895 }
4896
bde89a9e 4897 err = drbd_recv_all_warn(connection, peers_ch, pi.size);
a5c31904 4898 if (err) {
b411b363
PR
4899 rv = 0;
4900 goto fail;
4901 }
4902
67cca286
PR
4903 if (!memcmp(my_challenge, peers_ch, CHALLENGE_LEN)) {
4904 drbd_err(connection, "Peer presented the same challenge!\n");
4905 rv = -1;
4906 goto fail;
4907 }
4908
bde89a9e 4909 resp_size = crypto_hash_digestsize(connection->cram_hmac_tfm);
b411b363
PR
4910 response = kmalloc(resp_size, GFP_NOIO);
4911 if (response == NULL) {
1ec861eb 4912 drbd_err(connection, "kmalloc of response failed\n");
b10d96cb 4913 rv = -1;
b411b363
PR
4914 goto fail;
4915 }
4916
4917 sg_init_table(&sg, 1);
77351055 4918 sg_set_buf(&sg, peers_ch, pi.size);
b411b363
PR
4919
4920 rv = crypto_hash_digest(&desc, &sg, sg.length, response);
4921 if (rv) {
1ec861eb 4922 drbd_err(connection, "crypto_hash_digest() failed with %d\n", rv);
b10d96cb 4923 rv = -1;
b411b363
PR
4924 goto fail;
4925 }
4926
bde89a9e 4927 if (!conn_prepare_command(connection, sock)) {
9f5bdc33 4928 rv = 0;
b411b363 4929 goto fail;
9f5bdc33 4930 }
bde89a9e 4931 rv = !conn_send_command(connection, sock, P_AUTH_RESPONSE, 0,
9f5bdc33 4932 response, resp_size);
b411b363
PR
4933 if (!rv)
4934 goto fail;
4935
bde89a9e 4936 err = drbd_recv_header(connection, &pi);
69bc7bc3 4937 if (err) {
b411b363
PR
4938 rv = 0;
4939 goto fail;
4940 }
4941
77351055 4942 if (pi.cmd != P_AUTH_RESPONSE) {
1ec861eb 4943 drbd_err(connection, "expected AuthResponse packet, received: %s (0x%04x)\n",
2fcb8f30 4944 cmdname(pi.cmd), pi.cmd);
b411b363
PR
4945 rv = 0;
4946 goto fail;
4947 }
4948
77351055 4949 if (pi.size != resp_size) {
1ec861eb 4950 drbd_err(connection, "expected AuthResponse payload of wrong size\n");
b411b363
PR
4951 rv = 0;
4952 goto fail;
4953 }
b411b363 4954
bde89a9e 4955 err = drbd_recv_all_warn(connection, response , resp_size);
a5c31904 4956 if (err) {
b411b363
PR
4957 rv = 0;
4958 goto fail;
4959 }
4960
4961 right_response = kmalloc(resp_size, GFP_NOIO);
2d1ee87d 4962 if (right_response == NULL) {
1ec861eb 4963 drbd_err(connection, "kmalloc of right_response failed\n");
b10d96cb 4964 rv = -1;
b411b363
PR
4965 goto fail;
4966 }
4967
4968 sg_set_buf(&sg, my_challenge, CHALLENGE_LEN);
4969
4970 rv = crypto_hash_digest(&desc, &sg, sg.length, right_response);
4971 if (rv) {
1ec861eb 4972 drbd_err(connection, "crypto_hash_digest() failed with %d\n", rv);
b10d96cb 4973 rv = -1;
b411b363
PR
4974 goto fail;
4975 }
4976
4977 rv = !memcmp(response, right_response, resp_size);
4978
4979 if (rv)
1ec861eb 4980 drbd_info(connection, "Peer authenticated using %d bytes HMAC\n",
44ed167d 4981 resp_size);
b10d96cb
JT
4982 else
4983 rv = -1;
b411b363
PR
4984
4985 fail:
4986 kfree(peers_ch);
4987 kfree(response);
4988 kfree(right_response);
4989
4990 return rv;
4991}
4992#endif
4993
8fe60551 4994int drbd_receiver(struct drbd_thread *thi)
b411b363 4995{
bde89a9e 4996 struct drbd_connection *connection = thi->connection;
b411b363
PR
4997 int h;
4998
1ec861eb 4999 drbd_info(connection, "receiver (re)started\n");
b411b363
PR
5000
5001 do {
bde89a9e 5002 h = conn_connect(connection);
b411b363 5003 if (h == 0) {
bde89a9e 5004 conn_disconnect(connection);
20ee6390 5005 schedule_timeout_interruptible(HZ);
b411b363
PR
5006 }
5007 if (h == -1) {
1ec861eb 5008 drbd_warn(connection, "Discarding network configuration.\n");
bde89a9e 5009 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
b411b363
PR
5010 }
5011 } while (h == 0);
5012
91fd4dad 5013 if (h > 0)
bde89a9e 5014 drbdd(connection);
b411b363 5015
bde89a9e 5016 conn_disconnect(connection);
b411b363 5017
1ec861eb 5018 drbd_info(connection, "receiver terminated\n");
b411b363
PR
5019 return 0;
5020}
5021
5022/* ********* acknowledge sender ******** */
5023
bde89a9e 5024static int got_conn_RqSReply(struct drbd_connection *connection, struct packet_info *pi)
b411b363 5025{
e658983a 5026 struct p_req_state_reply *p = pi->data;
e4f78ede
PR
5027 int retcode = be32_to_cpu(p->retcode);
5028
5029 if (retcode >= SS_SUCCESS) {
bde89a9e 5030 set_bit(CONN_WD_ST_CHG_OKAY, &connection->flags);
e4f78ede 5031 } else {
bde89a9e 5032 set_bit(CONN_WD_ST_CHG_FAIL, &connection->flags);
1ec861eb 5033 drbd_err(connection, "Requested state change failed by peer: %s (%d)\n",
e4f78ede
PR
5034 drbd_set_st_err_str(retcode), retcode);
5035 }
bde89a9e 5036 wake_up(&connection->ping_wait);
e4f78ede 5037
2735a594 5038 return 0;
e4f78ede 5039}
b411b363 5040
bde89a9e 5041static int got_RqSReply(struct drbd_connection *connection, struct packet_info *pi)
b411b363 5042{
9f4fe9ad 5043 struct drbd_peer_device *peer_device;
b30ab791 5044 struct drbd_device *device;
e658983a 5045 struct p_req_state_reply *p = pi->data;
b411b363
PR
5046 int retcode = be32_to_cpu(p->retcode);
5047
9f4fe9ad
AG
5048 peer_device = conn_peer_device(connection, pi->vnr);
5049 if (!peer_device)
2735a594 5050 return -EIO;
9f4fe9ad 5051 device = peer_device->device;
1952e916 5052
bde89a9e 5053 if (test_bit(CONN_WD_ST_CHG_REQ, &connection->flags)) {
0b0ba1ef 5054 D_ASSERT(device, connection->agreed_pro_version < 100);
bde89a9e 5055 return got_conn_RqSReply(connection, pi);
4d0fc3fd
PR
5056 }
5057
b411b363 5058 if (retcode >= SS_SUCCESS) {
b30ab791 5059 set_bit(CL_ST_CHG_SUCCESS, &device->flags);
b411b363 5060 } else {
b30ab791 5061 set_bit(CL_ST_CHG_FAIL, &device->flags);
d0180171 5062 drbd_err(device, "Requested state change failed by peer: %s (%d)\n",
e4f78ede 5063 drbd_set_st_err_str(retcode), retcode);
b411b363 5064 }
b30ab791 5065 wake_up(&device->state_wait);
b411b363 5066
2735a594 5067 return 0;
b411b363
PR
5068}
5069
bde89a9e 5070static int got_Ping(struct drbd_connection *connection, struct packet_info *pi)
b411b363 5071{
bde89a9e 5072 return drbd_send_ping_ack(connection);
b411b363
PR
5073
5074}
5075
bde89a9e 5076static int got_PingAck(struct drbd_connection *connection, struct packet_info *pi)
b411b363
PR
5077{
5078 /* restore idle timeout */
bde89a9e
AG
5079 connection->meta.socket->sk->sk_rcvtimeo = connection->net_conf->ping_int*HZ;
5080 if (!test_and_set_bit(GOT_PING_ACK, &connection->flags))
5081 wake_up(&connection->ping_wait);
b411b363 5082
2735a594 5083 return 0;
b411b363
PR
5084}
5085
bde89a9e 5086static int got_IsInSync(struct drbd_connection *connection, struct packet_info *pi)
b411b363 5087{
9f4fe9ad 5088 struct drbd_peer_device *peer_device;
b30ab791 5089 struct drbd_device *device;
e658983a 5090 struct p_block_ack *p = pi->data;
b411b363
PR
5091 sector_t sector = be64_to_cpu(p->sector);
5092 int blksize = be32_to_cpu(p->blksize);
5093
9f4fe9ad
AG
5094 peer_device = conn_peer_device(connection, pi->vnr);
5095 if (!peer_device)
2735a594 5096 return -EIO;
9f4fe9ad 5097 device = peer_device->device;
1952e916 5098
9f4fe9ad 5099 D_ASSERT(device, peer_device->connection->agreed_pro_version >= 89);
b411b363 5100
69a22773 5101 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
b411b363 5102
b30ab791
AG
5103 if (get_ldev(device)) {
5104 drbd_rs_complete_io(device, sector);
5105 drbd_set_in_sync(device, sector, blksize);
1d53f09e 5106 /* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */
b30ab791
AG
5107 device->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
5108 put_ldev(device);
1d53f09e 5109 }
b30ab791
AG
5110 dec_rs_pending(device);
5111 atomic_add(blksize >> 9, &device->rs_sect_in);
b411b363 5112
2735a594 5113 return 0;
b411b363
PR
5114}
5115
bc9c5c41 5116static int
b30ab791 5117validate_req_change_req_state(struct drbd_device *device, u64 id, sector_t sector,
bc9c5c41
AG
5118 struct rb_root *root, const char *func,
5119 enum drbd_req_event what, bool missing_ok)
b411b363
PR
5120{
5121 struct drbd_request *req;
5122 struct bio_and_error m;
5123
0500813f 5124 spin_lock_irq(&device->resource->req_lock);
b30ab791 5125 req = find_request(device, root, id, sector, missing_ok, func);
b411b363 5126 if (unlikely(!req)) {
0500813f 5127 spin_unlock_irq(&device->resource->req_lock);
85997675 5128 return -EIO;
b411b363
PR
5129 }
5130 __req_mod(req, what, &m);
0500813f 5131 spin_unlock_irq(&device->resource->req_lock);
b411b363
PR
5132
5133 if (m.bio)
b30ab791 5134 complete_master_bio(device, &m);
85997675 5135 return 0;
b411b363
PR
5136}
5137
bde89a9e 5138static int got_BlockAck(struct drbd_connection *connection, struct packet_info *pi)
b411b363 5139{
9f4fe9ad 5140 struct drbd_peer_device *peer_device;
b30ab791 5141 struct drbd_device *device;
e658983a 5142 struct p_block_ack *p = pi->data;
b411b363
PR
5143 sector_t sector = be64_to_cpu(p->sector);
5144 int blksize = be32_to_cpu(p->blksize);
5145 enum drbd_req_event what;
5146
9f4fe9ad
AG
5147 peer_device = conn_peer_device(connection, pi->vnr);
5148 if (!peer_device)
2735a594 5149 return -EIO;
9f4fe9ad 5150 device = peer_device->device;
1952e916 5151
69a22773 5152 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
b411b363 5153
579b57ed 5154 if (p->block_id == ID_SYNCER) {
b30ab791
AG
5155 drbd_set_in_sync(device, sector, blksize);
5156 dec_rs_pending(device);
2735a594 5157 return 0;
b411b363 5158 }
e05e1e59 5159 switch (pi->cmd) {
b411b363 5160 case P_RS_WRITE_ACK:
8554df1c 5161 what = WRITE_ACKED_BY_PEER_AND_SIS;
b411b363
PR
5162 break;
5163 case P_WRITE_ACK:
8554df1c 5164 what = WRITE_ACKED_BY_PEER;
b411b363
PR
5165 break;
5166 case P_RECV_ACK:
8554df1c 5167 what = RECV_ACKED_BY_PEER;
b411b363 5168 break;
d4dabbe2
LE
5169 case P_SUPERSEDED:
5170 what = CONFLICT_RESOLVED;
b411b363 5171 break;
7be8da07 5172 case P_RETRY_WRITE:
7be8da07 5173 what = POSTPONE_WRITE;
b411b363
PR
5174 break;
5175 default:
2735a594 5176 BUG();
b411b363
PR
5177 }
5178
b30ab791
AG
5179 return validate_req_change_req_state(device, p->block_id, sector,
5180 &device->write_requests, __func__,
2735a594 5181 what, false);
b411b363
PR
5182}
5183
bde89a9e 5184static int got_NegAck(struct drbd_connection *connection, struct packet_info *pi)
b411b363 5185{
9f4fe9ad 5186 struct drbd_peer_device *peer_device;
b30ab791 5187 struct drbd_device *device;
e658983a 5188 struct p_block_ack *p = pi->data;
b411b363 5189 sector_t sector = be64_to_cpu(p->sector);
2deb8336 5190 int size = be32_to_cpu(p->blksize);
85997675 5191 int err;
b411b363 5192
9f4fe9ad
AG
5193 peer_device = conn_peer_device(connection, pi->vnr);
5194 if (!peer_device)
2735a594 5195 return -EIO;
9f4fe9ad 5196 device = peer_device->device;
b411b363 5197
69a22773 5198 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
b411b363 5199
579b57ed 5200 if (p->block_id == ID_SYNCER) {
b30ab791
AG
5201 dec_rs_pending(device);
5202 drbd_rs_failed_io(device, sector, size);
2735a594 5203 return 0;
b411b363 5204 }
2deb8336 5205
b30ab791
AG
5206 err = validate_req_change_req_state(device, p->block_id, sector,
5207 &device->write_requests, __func__,
303d1448 5208 NEG_ACKED, true);
85997675 5209 if (err) {
c3afd8f5
AG
5210 /* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs.
5211 The master bio might already be completed, therefore the
5212 request is no longer in the collision hash. */
5213 /* In Protocol B we might already have got a P_RECV_ACK
5214 but then get a P_NEG_ACK afterwards. */
b30ab791 5215 drbd_set_out_of_sync(device, sector, size);
2deb8336 5216 }
2735a594 5217 return 0;
b411b363
PR
5218}
5219
bde89a9e 5220static int got_NegDReply(struct drbd_connection *connection, struct packet_info *pi)
b411b363 5221{
9f4fe9ad 5222 struct drbd_peer_device *peer_device;
b30ab791 5223 struct drbd_device *device;
e658983a 5224 struct p_block_ack *p = pi->data;
b411b363
PR
5225 sector_t sector = be64_to_cpu(p->sector);
5226
9f4fe9ad
AG
5227 peer_device = conn_peer_device(connection, pi->vnr);
5228 if (!peer_device)
2735a594 5229 return -EIO;
9f4fe9ad 5230 device = peer_device->device;
1952e916 5231
69a22773 5232 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
7be8da07 5233
d0180171 5234 drbd_err(device, "Got NegDReply; Sector %llus, len %u.\n",
b411b363
PR
5235 (unsigned long long)sector, be32_to_cpu(p->blksize));
5236
b30ab791
AG
5237 return validate_req_change_req_state(device, p->block_id, sector,
5238 &device->read_requests, __func__,
2735a594 5239 NEG_ACKED, false);
b411b363
PR
5240}
5241
bde89a9e 5242static int got_NegRSDReply(struct drbd_connection *connection, struct packet_info *pi)
b411b363 5243{
9f4fe9ad 5244 struct drbd_peer_device *peer_device;
b30ab791 5245 struct drbd_device *device;
b411b363
PR
5246 sector_t sector;
5247 int size;
e658983a 5248 struct p_block_ack *p = pi->data;
1952e916 5249
9f4fe9ad
AG
5250 peer_device = conn_peer_device(connection, pi->vnr);
5251 if (!peer_device)
2735a594 5252 return -EIO;
9f4fe9ad 5253 device = peer_device->device;
b411b363
PR
5254
5255 sector = be64_to_cpu(p->sector);
5256 size = be32_to_cpu(p->blksize);
b411b363 5257
69a22773 5258 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
b411b363 5259
b30ab791 5260 dec_rs_pending(device);
b411b363 5261
b30ab791
AG
5262 if (get_ldev_if_state(device, D_FAILED)) {
5263 drbd_rs_complete_io(device, sector);
e05e1e59 5264 switch (pi->cmd) {
d612d309 5265 case P_NEG_RS_DREPLY:
b30ab791 5266 drbd_rs_failed_io(device, sector, size);
d612d309
PR
5267 case P_RS_CANCEL:
5268 break;
5269 default:
2735a594 5270 BUG();
d612d309 5271 }
b30ab791 5272 put_ldev(device);
b411b363
PR
5273 }
5274
2735a594 5275 return 0;
b411b363
PR
5276}
5277
bde89a9e 5278static int got_BarrierAck(struct drbd_connection *connection, struct packet_info *pi)
b411b363 5279{
e658983a 5280 struct p_barrier_ack *p = pi->data;
c06ece6b 5281 struct drbd_peer_device *peer_device;
9ed57dcb 5282 int vnr;
1952e916 5283
bde89a9e 5284 tl_release(connection, p->barrier, be32_to_cpu(p->set_size));
b411b363 5285
9ed57dcb 5286 rcu_read_lock();
c06ece6b
AG
5287 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5288 struct drbd_device *device = peer_device->device;
5289
b30ab791
AG
5290 if (device->state.conn == C_AHEAD &&
5291 atomic_read(&device->ap_in_flight) == 0 &&
5292 !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &device->flags)) {
5293 device->start_resync_timer.expires = jiffies + HZ;
5294 add_timer(&device->start_resync_timer);
9ed57dcb 5295 }
c4752ef1 5296 }
9ed57dcb 5297 rcu_read_unlock();
c4752ef1 5298
2735a594 5299 return 0;
b411b363
PR
5300}
5301
bde89a9e 5302static int got_OVResult(struct drbd_connection *connection, struct packet_info *pi)
b411b363 5303{
9f4fe9ad 5304 struct drbd_peer_device *peer_device;
b30ab791 5305 struct drbd_device *device;
e658983a 5306 struct p_block_ack *p = pi->data;
84b8c06b 5307 struct drbd_device_work *dw;
b411b363
PR
5308 sector_t sector;
5309 int size;
5310
9f4fe9ad
AG
5311 peer_device = conn_peer_device(connection, pi->vnr);
5312 if (!peer_device)
2735a594 5313 return -EIO;
9f4fe9ad 5314 device = peer_device->device;
1952e916 5315
b411b363
PR
5316 sector = be64_to_cpu(p->sector);
5317 size = be32_to_cpu(p->blksize);
5318
69a22773 5319 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
b411b363
PR
5320
5321 if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC)
b30ab791 5322 drbd_ov_out_of_sync_found(device, sector, size);
b411b363 5323 else
b30ab791 5324 ov_out_of_sync_print(device);
b411b363 5325
b30ab791 5326 if (!get_ldev(device))
2735a594 5327 return 0;
1d53f09e 5328
b30ab791
AG
5329 drbd_rs_complete_io(device, sector);
5330 dec_rs_pending(device);
b411b363 5331
b30ab791 5332 --device->ov_left;
ea5442af
LE
5333
5334 /* let's advance progress step marks only for every other megabyte */
b30ab791
AG
5335 if ((device->ov_left & 0x200) == 0x200)
5336 drbd_advance_rs_marks(device, device->ov_left);
ea5442af 5337
b30ab791 5338 if (device->ov_left == 0) {
84b8c06b
AG
5339 dw = kmalloc(sizeof(*dw), GFP_NOIO);
5340 if (dw) {
5341 dw->w.cb = w_ov_finished;
5342 dw->device = device;
5343 drbd_queue_work(&peer_device->connection->sender_work, &dw->w);
b411b363 5344 } else {
84b8c06b 5345 drbd_err(device, "kmalloc(dw) failed.");
b30ab791
AG
5346 ov_out_of_sync_print(device);
5347 drbd_resync_finished(device);
b411b363
PR
5348 }
5349 }
b30ab791 5350 put_ldev(device);
2735a594 5351 return 0;
b411b363
PR
5352}
5353
bde89a9e 5354static int got_skip(struct drbd_connection *connection, struct packet_info *pi)
0ced55a3 5355{
2735a594 5356 return 0;
b411b363
PR
5357}
5358
bde89a9e 5359static int connection_finish_peer_reqs(struct drbd_connection *connection)
0ced55a3 5360{
c06ece6b 5361 struct drbd_peer_device *peer_device;
c141ebda 5362 int vnr, not_empty = 0;
32862ec7
PR
5363
5364 do {
bde89a9e 5365 clear_bit(SIGNAL_ASENDER, &connection->flags);
32862ec7 5366 flush_signals(current);
c141ebda
PR
5367
5368 rcu_read_lock();
c06ece6b
AG
5369 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5370 struct drbd_device *device = peer_device->device;
b30ab791 5371 kref_get(&device->kref);
c141ebda 5372 rcu_read_unlock();
b30ab791 5373 if (drbd_finish_peer_reqs(device)) {
05a10ec7 5374 kref_put(&device->kref, drbd_destroy_device);
c141ebda 5375 return 1;
d3fcb490 5376 }
05a10ec7 5377 kref_put(&device->kref, drbd_destroy_device);
c141ebda 5378 rcu_read_lock();
082a3439 5379 }
bde89a9e 5380 set_bit(SIGNAL_ASENDER, &connection->flags);
082a3439 5381
0500813f 5382 spin_lock_irq(&connection->resource->req_lock);
c06ece6b
AG
5383 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5384 struct drbd_device *device = peer_device->device;
b30ab791 5385 not_empty = !list_empty(&device->done_ee);
082a3439
PR
5386 if (not_empty)
5387 break;
5388 }
0500813f 5389 spin_unlock_irq(&connection->resource->req_lock);
c141ebda 5390 rcu_read_unlock();
32862ec7
PR
5391 } while (not_empty);
5392
5393 return 0;
0ced55a3
PR
5394}
5395
b411b363
PR
5396struct asender_cmd {
5397 size_t pkt_size;
bde89a9e 5398 int (*fn)(struct drbd_connection *connection, struct packet_info *);
b411b363
PR
5399};
5400
7201b972 5401static struct asender_cmd asender_tbl[] = {
e658983a
AG
5402 [P_PING] = { 0, got_Ping },
5403 [P_PING_ACK] = { 0, got_PingAck },
b411b363
PR
5404 [P_RECV_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
5405 [P_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
5406 [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
d4dabbe2 5407 [P_SUPERSEDED] = { sizeof(struct p_block_ack), got_BlockAck },
b411b363
PR
5408 [P_NEG_ACK] = { sizeof(struct p_block_ack), got_NegAck },
5409 [P_NEG_DREPLY] = { sizeof(struct p_block_ack), got_NegDReply },
1952e916 5410 [P_NEG_RS_DREPLY] = { sizeof(struct p_block_ack), got_NegRSDReply },
b411b363
PR
5411 [P_OV_RESULT] = { sizeof(struct p_block_ack), got_OVResult },
5412 [P_BARRIER_ACK] = { sizeof(struct p_barrier_ack), got_BarrierAck },
5413 [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply },
5414 [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync },
02918be2 5415 [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), got_skip },
1952e916
AG
5416 [P_RS_CANCEL] = { sizeof(struct p_block_ack), got_NegRSDReply },
5417 [P_CONN_ST_CHG_REPLY]={ sizeof(struct p_req_state_reply), got_conn_RqSReply },
5418 [P_RETRY_WRITE] = { sizeof(struct p_block_ack), got_BlockAck },
7201b972 5419};
b411b363
PR
5420
5421int drbd_asender(struct drbd_thread *thi)
5422{
bde89a9e 5423 struct drbd_connection *connection = thi->connection;
b411b363 5424 struct asender_cmd *cmd = NULL;
77351055 5425 struct packet_info pi;
257d0af6 5426 int rv;
bde89a9e 5427 void *buf = connection->meta.rbuf;
b411b363 5428 int received = 0;
bde89a9e 5429 unsigned int header_size = drbd_header_size(connection);
52b061a4 5430 int expect = header_size;
44ed167d
PR
5431 bool ping_timeout_active = false;
5432 struct net_conf *nc;
bb77d34e 5433 int ping_timeo, tcp_cork, ping_int;
3990e04d 5434 struct sched_param param = { .sched_priority = 2 };
b411b363 5435
3990e04d
PR
5436 rv = sched_setscheduler(current, SCHED_RR, &param);
5437 if (rv < 0)
1ec861eb 5438 drbd_err(connection, "drbd_asender: ERROR set priority, ret=%d\n", rv);
b411b363 5439
e77a0a5c 5440 while (get_t_state(thi) == RUNNING) {
80822284 5441 drbd_thread_current_set_cpu(thi);
b411b363 5442
44ed167d 5443 rcu_read_lock();
bde89a9e 5444 nc = rcu_dereference(connection->net_conf);
44ed167d 5445 ping_timeo = nc->ping_timeo;
bb77d34e 5446 tcp_cork = nc->tcp_cork;
44ed167d
PR
5447 ping_int = nc->ping_int;
5448 rcu_read_unlock();
5449
bde89a9e
AG
5450 if (test_and_clear_bit(SEND_PING, &connection->flags)) {
5451 if (drbd_send_ping(connection)) {
1ec861eb 5452 drbd_err(connection, "drbd_send_ping has failed\n");
b411b363 5453 goto reconnect;
841ce241 5454 }
bde89a9e 5455 connection->meta.socket->sk->sk_rcvtimeo = ping_timeo * HZ / 10;
44ed167d 5456 ping_timeout_active = true;
b411b363
PR
5457 }
5458
32862ec7
PR
5459 /* TODO: conditionally cork; it may hurt latency if we cork without
5460 much to send */
bb77d34e 5461 if (tcp_cork)
bde89a9e
AG
5462 drbd_tcp_cork(connection->meta.socket);
5463 if (connection_finish_peer_reqs(connection)) {
1ec861eb 5464 drbd_err(connection, "connection_finish_peer_reqs() failed\n");
32862ec7 5465 goto reconnect;
b411b363
PR
5466 }
5467 /* but unconditionally uncork unless disabled */
bb77d34e 5468 if (tcp_cork)
bde89a9e 5469 drbd_tcp_uncork(connection->meta.socket);
b411b363
PR
5470
5471 /* short circuit, recv_msg would return EINTR anyways. */
5472 if (signal_pending(current))
5473 continue;
5474
bde89a9e
AG
5475 rv = drbd_recv_short(connection->meta.socket, buf, expect-received, 0);
5476 clear_bit(SIGNAL_ASENDER, &connection->flags);
b411b363
PR
5477
5478 flush_signals(current);
5479
5480 /* Note:
5481 * -EINTR (on meta) we got a signal
5482 * -EAGAIN (on meta) rcvtimeo expired
5483 * -ECONNRESET other side closed the connection
5484 * -ERESTARTSYS (on data) we got a signal
5485 * rv < 0 other than above: unexpected error!
5486 * rv == expected: full header or command
5487 * rv < expected: "woken" by signal during receive
5488 * rv == 0 : "connection shut down by peer"
5489 */
5490 if (likely(rv > 0)) {
5491 received += rv;
5492 buf += rv;
5493 } else if (rv == 0) {
bde89a9e 5494 if (test_bit(DISCONNECT_SENT, &connection->flags)) {
b66623e3
PR
5495 long t;
5496 rcu_read_lock();
bde89a9e 5497 t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10;
b66623e3
PR
5498 rcu_read_unlock();
5499
bde89a9e
AG
5500 t = wait_event_timeout(connection->ping_wait,
5501 connection->cstate < C_WF_REPORT_PARAMS,
b66623e3 5502 t);
599377ac
PR
5503 if (t)
5504 break;
5505 }
1ec861eb 5506 drbd_err(connection, "meta connection shut down by peer.\n");
b411b363
PR
5507 goto reconnect;
5508 } else if (rv == -EAGAIN) {
cb6518cb
LE
5509 /* If the data socket received something meanwhile,
5510 * that is good enough: peer is still alive. */
bde89a9e
AG
5511 if (time_after(connection->last_received,
5512 jiffies - connection->meta.socket->sk->sk_rcvtimeo))
cb6518cb 5513 continue;
f36af18c 5514 if (ping_timeout_active) {
1ec861eb 5515 drbd_err(connection, "PingAck did not arrive in time.\n");
b411b363
PR
5516 goto reconnect;
5517 }
bde89a9e 5518 set_bit(SEND_PING, &connection->flags);
b411b363
PR
5519 continue;
5520 } else if (rv == -EINTR) {
5521 continue;
5522 } else {
1ec861eb 5523 drbd_err(connection, "sock_recvmsg returned %d\n", rv);
b411b363
PR
5524 goto reconnect;
5525 }
5526
5527 if (received == expect && cmd == NULL) {
bde89a9e 5528 if (decode_header(connection, connection->meta.rbuf, &pi))
b411b363 5529 goto reconnect;
7201b972 5530 cmd = &asender_tbl[pi.cmd];
1952e916 5531 if (pi.cmd >= ARRAY_SIZE(asender_tbl) || !cmd->fn) {
1ec861eb 5532 drbd_err(connection, "Unexpected meta packet %s (0x%04x)\n",
2fcb8f30 5533 cmdname(pi.cmd), pi.cmd);
b411b363
PR
5534 goto disconnect;
5535 }
e658983a 5536 expect = header_size + cmd->pkt_size;
52b061a4 5537 if (pi.size != expect - header_size) {
1ec861eb 5538 drbd_err(connection, "Wrong packet size on meta (c: %d, l: %d)\n",
77351055 5539 pi.cmd, pi.size);
b411b363 5540 goto reconnect;
257d0af6 5541 }
b411b363
PR
5542 }
5543 if (received == expect) {
2735a594 5544 bool err;
a4fbda8e 5545
bde89a9e 5546 err = cmd->fn(connection, &pi);
2735a594 5547 if (err) {
1ec861eb 5548 drbd_err(connection, "%pf failed\n", cmd->fn);
b411b363 5549 goto reconnect;
1952e916 5550 }
b411b363 5551
bde89a9e 5552 connection->last_received = jiffies;
f36af18c 5553
44ed167d
PR
5554 if (cmd == &asender_tbl[P_PING_ACK]) {
5555 /* restore idle timeout */
bde89a9e 5556 connection->meta.socket->sk->sk_rcvtimeo = ping_int * HZ;
44ed167d
PR
5557 ping_timeout_active = false;
5558 }
f36af18c 5559
bde89a9e 5560 buf = connection->meta.rbuf;
b411b363 5561 received = 0;
52b061a4 5562 expect = header_size;
b411b363
PR
5563 cmd = NULL;
5564 }
5565 }
5566
5567 if (0) {
5568reconnect:
bde89a9e
AG
5569 conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
5570 conn_md_sync(connection);
b411b363
PR
5571 }
5572 if (0) {
5573disconnect:
bde89a9e 5574 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
b411b363 5575 }
bde89a9e 5576 clear_bit(SIGNAL_ASENDER, &connection->flags);
b411b363 5577
1ec861eb 5578 drbd_info(connection, "asender terminated\n");
b411b363
PR
5579
5580 return 0;
5581}