]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blame - drivers/block/drbd/drbd_main.c
drbd: Remove now-unused int_dig_out buffer
[mirror_ubuntu-zesty-kernel.git] / drivers / block / drbd / drbd_main.c
CommitLineData
b411b363
PR
1/*
2 drbd.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 Thanks to Carter Burden, Bart Grantham and Gennadiy Nerubayev
11 from Logicworks, Inc. for making SDP replication support possible.
12
13 drbd is free software; you can redistribute it and/or modify
14 it under the terms of the GNU General Public License as published by
15 the Free Software Foundation; either version 2, or (at your option)
16 any later version.
17
18 drbd is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 GNU General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with drbd; see the file COPYING. If not, write to
25 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
26
27 */
28
b411b363 29#include <linux/module.h>
b411b363
PR
30#include <linux/drbd.h>
31#include <asm/uaccess.h>
32#include <asm/types.h>
33#include <net/sock.h>
34#include <linux/ctype.h>
2a48fc0a 35#include <linux/mutex.h>
b411b363
PR
36#include <linux/fs.h>
37#include <linux/file.h>
38#include <linux/proc_fs.h>
39#include <linux/init.h>
40#include <linux/mm.h>
41#include <linux/memcontrol.h>
42#include <linux/mm_inline.h>
43#include <linux/slab.h>
44#include <linux/random.h>
45#include <linux/reboot.h>
46#include <linux/notifier.h>
47#include <linux/kthread.h>
48
49#define __KERNEL_SYSCALLS__
50#include <linux/unistd.h>
51#include <linux/vmalloc.h>
52
53#include <linux/drbd_limits.h>
54#include "drbd_int.h"
b411b363
PR
55#include "drbd_req.h" /* only for _req_mod in tl_release and tl_clear */
56
57#include "drbd_vli.h"
58
2a48fc0a 59static DEFINE_MUTEX(drbd_main_mutex);
b411b363
PR
60int drbdd_init(struct drbd_thread *);
61int drbd_worker(struct drbd_thread *);
62int drbd_asender(struct drbd_thread *);
63
64int drbd_init(void);
65static int drbd_open(struct block_device *bdev, fmode_t mode);
66static int drbd_release(struct gendisk *gd, fmode_t mode);
99920dc5 67static int w_md_sync(struct drbd_work *w, int unused);
b411b363 68static void md_sync_timer_fn(unsigned long data);
99920dc5
AG
69static int w_bitmap_io(struct drbd_work *w, int unused);
70static int w_go_diskless(struct drbd_work *w, int unused);
b411b363 71
b411b363
PR
72MODULE_AUTHOR("Philipp Reisner <phil@linbit.com>, "
73 "Lars Ellenberg <lars@linbit.com>");
74MODULE_DESCRIPTION("drbd - Distributed Replicated Block Device v" REL_VERSION);
75MODULE_VERSION(REL_VERSION);
76MODULE_LICENSE("GPL");
81a5d60e 77MODULE_PARM_DESC(minor_count, "Approximate number of drbd devices ("
2b8a90b5 78 __stringify(DRBD_MINOR_COUNT_MIN) "-" __stringify(DRBD_MINOR_COUNT_MAX) ")");
b411b363
PR
79MODULE_ALIAS_BLOCKDEV_MAJOR(DRBD_MAJOR);
80
81#include <linux/moduleparam.h>
82/* allow_open_on_secondary */
83MODULE_PARM_DESC(allow_oos, "DONT USE!");
84/* thanks to these macros, if compiled into the kernel (not-module),
85 * this becomes the boot parameter drbd.minor_count */
86module_param(minor_count, uint, 0444);
87module_param(disable_sendpage, bool, 0644);
88module_param(allow_oos, bool, 0);
b411b363
PR
89module_param(proc_details, int, 0644);
90
91#ifdef CONFIG_DRBD_FAULT_INJECTION
92int enable_faults;
93int fault_rate;
94static int fault_count;
95int fault_devs;
96/* bitmap of enabled faults */
97module_param(enable_faults, int, 0664);
98/* fault rate % value - applies to all enabled faults */
99module_param(fault_rate, int, 0664);
100/* count of faults inserted */
101module_param(fault_count, int, 0664);
102/* bitmap of devices to insert faults on */
103module_param(fault_devs, int, 0644);
104#endif
105
106/* module parameter, defined */
2b8a90b5 107unsigned int minor_count = DRBD_MINOR_COUNT_DEF;
b411b363
PR
108int disable_sendpage;
109int allow_oos;
b411b363
PR
110int proc_details; /* Detail level in proc drbd*/
111
112/* Module parameter for setting the user mode helper program
113 * to run. Default is /sbin/drbdadm */
114char usermode_helper[80] = "/sbin/drbdadm";
115
116module_param_string(usermode_helper, usermode_helper, sizeof(usermode_helper), 0644);
117
118/* in 2.6.x, our device mapping and config info contains our virtual gendisks
119 * as member "struct gendisk *vdisk;"
120 */
81a5d60e 121struct idr minors;
2111438b 122struct list_head drbd_tconns; /* list of struct drbd_tconn */
543cc10b 123DEFINE_MUTEX(drbd_cfg_mutex);
b411b363
PR
124
125struct kmem_cache *drbd_request_cache;
6c852bec 126struct kmem_cache *drbd_ee_cache; /* peer requests */
b411b363
PR
127struct kmem_cache *drbd_bm_ext_cache; /* bitmap extents */
128struct kmem_cache *drbd_al_ext_cache; /* activity log extents */
129mempool_t *drbd_request_mempool;
130mempool_t *drbd_ee_mempool;
35abf594 131mempool_t *drbd_md_io_page_pool;
da4a75d2 132struct bio_set *drbd_md_io_bio_set;
b411b363
PR
133
134/* I do not use a standard mempool, because:
135 1) I want to hand out the pre-allocated objects first.
136 2) I want to be able to interrupt sleeping allocation with a signal.
137 Note: This is a single linked list, the next pointer is the private
138 member of struct page.
139 */
140struct page *drbd_pp_pool;
141spinlock_t drbd_pp_lock;
142int drbd_pp_vacant;
143wait_queue_head_t drbd_pp_wait;
144
145DEFINE_RATELIMIT_STATE(drbd_ratelimit_state, 5 * HZ, 5);
146
7d4e9d09 147static const struct block_device_operations drbd_ops = {
b411b363
PR
148 .owner = THIS_MODULE,
149 .open = drbd_open,
150 .release = drbd_release,
151};
152
da4a75d2
LE
153static void bio_destructor_drbd(struct bio *bio)
154{
155 bio_free(bio, drbd_md_io_bio_set);
156}
157
158struct bio *bio_alloc_drbd(gfp_t gfp_mask)
159{
160 struct bio *bio;
161
162 if (!drbd_md_io_bio_set)
163 return bio_alloc(gfp_mask, 1);
164
165 bio = bio_alloc_bioset(gfp_mask, 1, drbd_md_io_bio_set);
166 if (!bio)
167 return NULL;
168 bio->bi_destructor = bio_destructor_drbd;
169 return bio;
170}
171
b411b363
PR
172#ifdef __CHECKER__
173/* When checking with sparse, and this is an inline function, sparse will
174 give tons of false positives. When this is a real functions sparse works.
175 */
176int _get_ldev_if_state(struct drbd_conf *mdev, enum drbd_disk_state mins)
177{
178 int io_allowed;
179
180 atomic_inc(&mdev->local_cnt);
181 io_allowed = (mdev->state.disk >= mins);
182 if (!io_allowed) {
183 if (atomic_dec_and_test(&mdev->local_cnt))
184 wake_up(&mdev->misc_wait);
185 }
186 return io_allowed;
187}
188
189#endif
190
191/**
192 * DOC: The transfer log
193 *
194 * The transfer log is a single linked list of &struct drbd_tl_epoch objects.
87eeee41 195 * mdev->tconn->newest_tle points to the head, mdev->tconn->oldest_tle points to the tail
b411b363
PR
196 * of the list. There is always at least one &struct drbd_tl_epoch object.
197 *
198 * Each &struct drbd_tl_epoch has a circular double linked list of requests
199 * attached.
200 */
2f5cdd0b 201static int tl_init(struct drbd_tconn *tconn)
b411b363
PR
202{
203 struct drbd_tl_epoch *b;
204
205 /* during device minor initialization, we may well use GFP_KERNEL */
206 b = kmalloc(sizeof(struct drbd_tl_epoch), GFP_KERNEL);
207 if (!b)
208 return 0;
209 INIT_LIST_HEAD(&b->requests);
210 INIT_LIST_HEAD(&b->w.list);
211 b->next = NULL;
212 b->br_number = 4711;
7e602c0a 213 b->n_writes = 0;
b411b363
PR
214 b->w.cb = NULL; /* if this is != NULL, we need to dec_ap_pending in tl_clear */
215
2f5cdd0b
PR
216 tconn->oldest_tle = b;
217 tconn->newest_tle = b;
218 INIT_LIST_HEAD(&tconn->out_of_sequence_requests);
b411b363 219
b411b363
PR
220 return 1;
221}
222
2f5cdd0b 223static void tl_cleanup(struct drbd_tconn *tconn)
b411b363 224{
2f5cdd0b
PR
225 if (tconn->oldest_tle != tconn->newest_tle)
226 conn_err(tconn, "ASSERT FAILED: oldest_tle == newest_tle\n");
227 if (!list_empty(&tconn->out_of_sequence_requests))
228 conn_err(tconn, "ASSERT FAILED: list_empty(out_of_sequence_requests)\n");
229 kfree(tconn->oldest_tle);
230 tconn->oldest_tle = NULL;
231 kfree(tconn->unused_spare_tle);
232 tconn->unused_spare_tle = NULL;
d628769b
AG
233}
234
b411b363
PR
235/**
236 * _tl_add_barrier() - Adds a barrier to the transfer log
237 * @mdev: DRBD device.
238 * @new: Barrier to be added before the current head of the TL.
239 *
240 * The caller must hold the req_lock.
241 */
2f5cdd0b 242void _tl_add_barrier(struct drbd_tconn *tconn, struct drbd_tl_epoch *new)
b411b363
PR
243{
244 struct drbd_tl_epoch *newest_before;
245
246 INIT_LIST_HEAD(&new->requests);
247 INIT_LIST_HEAD(&new->w.list);
248 new->w.cb = NULL; /* if this is != NULL, we need to dec_ap_pending in tl_clear */
249 new->next = NULL;
7e602c0a 250 new->n_writes = 0;
b411b363 251
2f5cdd0b 252 newest_before = tconn->newest_tle;
b411b363
PR
253 /* never send a barrier number == 0, because that is special-cased
254 * when using TCQ for our write ordering code */
255 new->br_number = (newest_before->br_number+1) ?: 1;
2f5cdd0b
PR
256 if (tconn->newest_tle != new) {
257 tconn->newest_tle->next = new;
258 tconn->newest_tle = new;
b411b363
PR
259 }
260}
261
262/**
263 * tl_release() - Free or recycle the oldest &struct drbd_tl_epoch object of the TL
264 * @mdev: DRBD device.
265 * @barrier_nr: Expected identifier of the DRBD write barrier packet.
266 * @set_size: Expected number of requests before that barrier.
267 *
268 * In case the passed barrier_nr or set_size does not match the oldest
269 * &struct drbd_tl_epoch objects this function will cause a termination
270 * of the connection.
271 */
2f5cdd0b
PR
272void tl_release(struct drbd_tconn *tconn, unsigned int barrier_nr,
273 unsigned int set_size)
b411b363 274{
2f5cdd0b 275 struct drbd_conf *mdev;
b411b363
PR
276 struct drbd_tl_epoch *b, *nob; /* next old barrier */
277 struct list_head *le, *tle;
278 struct drbd_request *r;
279
2f5cdd0b 280 spin_lock_irq(&tconn->req_lock);
b411b363 281
2f5cdd0b 282 b = tconn->oldest_tle;
b411b363
PR
283
284 /* first some paranoia code */
285 if (b == NULL) {
2f5cdd0b
PR
286 conn_err(tconn, "BAD! BarrierAck #%u received, but no epoch in tl!?\n",
287 barrier_nr);
b411b363
PR
288 goto bail;
289 }
290 if (b->br_number != barrier_nr) {
2f5cdd0b
PR
291 conn_err(tconn, "BAD! BarrierAck #%u received, expected #%u!\n",
292 barrier_nr, b->br_number);
b411b363
PR
293 goto bail;
294 }
7e602c0a 295 if (b->n_writes != set_size) {
2f5cdd0b
PR
296 conn_err(tconn, "BAD! BarrierAck #%u received with n_writes=%u, expected n_writes=%u!\n",
297 barrier_nr, set_size, b->n_writes);
b411b363
PR
298 goto bail;
299 }
300
301 /* Clean up list of requests processed during current epoch */
302 list_for_each_safe(le, tle, &b->requests) {
303 r = list_entry(le, struct drbd_request, tl_requests);
8554df1c 304 _req_mod(r, BARRIER_ACKED);
b411b363
PR
305 }
306 /* There could be requests on the list waiting for completion
307 of the write to the local disk. To avoid corruptions of
308 slab's data structures we have to remove the lists head.
309
310 Also there could have been a barrier ack out of sequence, overtaking
311 the write acks - which would be a bug and violating write ordering.
312 To not deadlock in case we lose connection while such requests are
313 still pending, we need some way to find them for the
8554df1c 314 _req_mode(CONNECTION_LOST_WHILE_PENDING).
b411b363
PR
315
316 These have been list_move'd to the out_of_sequence_requests list in
8554df1c 317 _req_mod(, BARRIER_ACKED) above.
b411b363
PR
318 */
319 list_del_init(&b->requests);
2f5cdd0b 320 mdev = b->w.mdev;
b411b363
PR
321
322 nob = b->next;
323 if (test_and_clear_bit(CREATE_BARRIER, &mdev->flags)) {
2f5cdd0b 324 _tl_add_barrier(tconn, b);
b411b363 325 if (nob)
2f5cdd0b 326 tconn->oldest_tle = nob;
b411b363 327 /* if nob == NULL b was the only barrier, and becomes the new
2f5cdd0b 328 barrier. Therefore tconn->oldest_tle points already to b */
b411b363
PR
329 } else {
330 D_ASSERT(nob != NULL);
2f5cdd0b 331 tconn->oldest_tle = nob;
b411b363
PR
332 kfree(b);
333 }
334
2f5cdd0b 335 spin_unlock_irq(&tconn->req_lock);
b411b363
PR
336 dec_ap_pending(mdev);
337
338 return;
339
340bail:
2f5cdd0b
PR
341 spin_unlock_irq(&tconn->req_lock);
342 conn_request_state(tconn, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
b411b363
PR
343}
344
617049aa 345
b411b363 346/**
11b58e73 347 * _tl_restart() - Walks the transfer log, and applies an action to all requests
b411b363 348 * @mdev: DRBD device.
11b58e73 349 * @what: The action/event to perform with all request objects
b411b363 350 *
8554df1c
AG
351 * @what might be one of CONNECTION_LOST_WHILE_PENDING, RESEND, FAIL_FROZEN_DISK_IO,
352 * RESTART_FROZEN_DISK_IO.
b411b363 353 */
2f5cdd0b 354void _tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what)
b411b363 355{
11b58e73 356 struct drbd_tl_epoch *b, *tmp, **pn;
b9b98716 357 struct list_head *le, *tle, carry_reads;
11b58e73
PR
358 struct drbd_request *req;
359 int rv, n_writes, n_reads;
b411b363 360
2f5cdd0b
PR
361 b = tconn->oldest_tle;
362 pn = &tconn->oldest_tle;
b411b363 363 while (b) {
11b58e73
PR
364 n_writes = 0;
365 n_reads = 0;
b9b98716 366 INIT_LIST_HEAD(&carry_reads);
b411b363 367 list_for_each_safe(le, tle, &b->requests) {
11b58e73
PR
368 req = list_entry(le, struct drbd_request, tl_requests);
369 rv = _req_mod(req, what);
370
371 n_writes += (rv & MR_WRITE) >> MR_WRITE_SHIFT;
372 n_reads += (rv & MR_READ) >> MR_READ_SHIFT;
b411b363
PR
373 }
374 tmp = b->next;
375
b9b98716 376 if (n_writes) {
8554df1c 377 if (what == RESEND) {
11b58e73
PR
378 b->n_writes = n_writes;
379 if (b->w.cb == NULL) {
380 b->w.cb = w_send_barrier;
2f5cdd0b
PR
381 inc_ap_pending(b->w.mdev);
382 set_bit(CREATE_BARRIER, &b->w.mdev->flags);
11b58e73
PR
383 }
384
2f5cdd0b 385 drbd_queue_work(&tconn->data.work, &b->w);
11b58e73
PR
386 }
387 pn = &b->next;
388 } else {
b9b98716
PR
389 if (n_reads)
390 list_add(&carry_reads, &b->requests);
11b58e73
PR
391 /* there could still be requests on that ring list,
392 * in case local io is still pending */
393 list_del(&b->requests);
394
395 /* dec_ap_pending corresponding to queue_barrier.
396 * the newest barrier may not have been queued yet,
397 * in which case w.cb is still NULL. */
398 if (b->w.cb != NULL)
2f5cdd0b 399 dec_ap_pending(b->w.mdev);
11b58e73 400
2f5cdd0b 401 if (b == tconn->newest_tle) {
11b58e73 402 /* recycle, but reinit! */
2f5cdd0b
PR
403 if (tmp != NULL)
404 conn_err(tconn, "ASSERT FAILED tmp == NULL");
11b58e73 405 INIT_LIST_HEAD(&b->requests);
b9b98716 406 list_splice(&carry_reads, &b->requests);
11b58e73
PR
407 INIT_LIST_HEAD(&b->w.list);
408 b->w.cb = NULL;
409 b->br_number = net_random();
410 b->n_writes = 0;
411
412 *pn = b;
413 break;
414 }
415 *pn = tmp;
416 kfree(b);
b411b363 417 }
b411b363 418 b = tmp;
b9b98716 419 list_splice(&carry_reads, &b->requests);
b411b363 420 }
11b58e73
PR
421}
422
b411b363
PR
423
424/**
425 * tl_clear() - Clears all requests and &struct drbd_tl_epoch objects out of the TL
426 * @mdev: DRBD device.
427 *
428 * This is called after the connection to the peer was lost. The storage covered
429 * by the requests on the transfer gets marked as our of sync. Called from the
430 * receiver thread and the worker thread.
431 */
2f5cdd0b 432void tl_clear(struct drbd_tconn *tconn)
b411b363 433{
2f5cdd0b 434 struct drbd_conf *mdev;
b411b363
PR
435 struct list_head *le, *tle;
436 struct drbd_request *r;
e90285e0 437 int vnr;
b411b363 438
2f5cdd0b 439 spin_lock_irq(&tconn->req_lock);
b411b363 440
2f5cdd0b 441 _tl_restart(tconn, CONNECTION_LOST_WHILE_PENDING);
b411b363
PR
442
443 /* we expect this list to be empty. */
2f5cdd0b
PR
444 if (!list_empty(&tconn->out_of_sequence_requests))
445 conn_err(tconn, "ASSERT FAILED list_empty(&out_of_sequence_requests)\n");
b411b363
PR
446
447 /* but just in case, clean it up anyways! */
2f5cdd0b 448 list_for_each_safe(le, tle, &tconn->out_of_sequence_requests) {
b411b363
PR
449 r = list_entry(le, struct drbd_request, tl_requests);
450 /* It would be nice to complete outside of spinlock.
451 * But this is easier for now. */
8554df1c 452 _req_mod(r, CONNECTION_LOST_WHILE_PENDING);
b411b363
PR
453 }
454
455 /* ensure bit indicating barrier is required is clear */
e90285e0 456 idr_for_each_entry(&tconn->volumes, mdev, vnr)
2f5cdd0b 457 clear_bit(CREATE_BARRIER, &mdev->flags);
b411b363 458
2f5cdd0b 459 spin_unlock_irq(&tconn->req_lock);
b411b363
PR
460}
461
2f5cdd0b 462void tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what)
11b58e73 463{
2f5cdd0b
PR
464 spin_lock_irq(&tconn->req_lock);
465 _tl_restart(tconn, what);
466 spin_unlock_irq(&tconn->req_lock);
b411b363
PR
467}
468
b411b363
PR
469static int drbd_thread_setup(void *arg)
470{
471 struct drbd_thread *thi = (struct drbd_thread *) arg;
392c8801 472 struct drbd_tconn *tconn = thi->tconn;
b411b363
PR
473 unsigned long flags;
474 int retval;
475
f1b3a6ec 476 snprintf(current->comm, sizeof(current->comm), "drbd_%c_%s",
392c8801 477 thi->name[0], thi->tconn->name);
f1b3a6ec 478
b411b363
PR
479restart:
480 retval = thi->function(thi);
481
482 spin_lock_irqsave(&thi->t_lock, flags);
483
e77a0a5c 484 /* if the receiver has been "EXITING", the last thing it did
b411b363
PR
485 * was set the conn state to "StandAlone",
486 * if now a re-connect request comes in, conn state goes C_UNCONNECTED,
487 * and receiver thread will be "started".
e77a0a5c 488 * drbd_thread_start needs to set "RESTARTING" in that case.
b411b363 489 * t_state check and assignment needs to be within the same spinlock,
e77a0a5c
AG
490 * so either thread_start sees EXITING, and can remap to RESTARTING,
491 * or thread_start see NONE, and can proceed as normal.
b411b363
PR
492 */
493
e77a0a5c 494 if (thi->t_state == RESTARTING) {
392c8801 495 conn_info(tconn, "Restarting %s thread\n", thi->name);
e77a0a5c 496 thi->t_state = RUNNING;
b411b363
PR
497 spin_unlock_irqrestore(&thi->t_lock, flags);
498 goto restart;
499 }
500
501 thi->task = NULL;
e77a0a5c 502 thi->t_state = NONE;
b411b363
PR
503 smp_mb();
504 complete(&thi->stop);
505 spin_unlock_irqrestore(&thi->t_lock, flags);
506
392c8801 507 conn_info(tconn, "Terminating %s\n", current->comm);
b411b363
PR
508
509 /* Release mod reference taken when thread was started */
510 module_put(THIS_MODULE);
511 return retval;
512}
513
392c8801 514static void drbd_thread_init(struct drbd_tconn *tconn, struct drbd_thread *thi,
bed879ae 515 int (*func) (struct drbd_thread *), char *name)
b411b363
PR
516{
517 spin_lock_init(&thi->t_lock);
518 thi->task = NULL;
e77a0a5c 519 thi->t_state = NONE;
b411b363 520 thi->function = func;
392c8801 521 thi->tconn = tconn;
bed879ae 522 strncpy(thi->name, name, ARRAY_SIZE(thi->name));
b411b363
PR
523}
524
525int drbd_thread_start(struct drbd_thread *thi)
526{
392c8801 527 struct drbd_tconn *tconn = thi->tconn;
b411b363
PR
528 struct task_struct *nt;
529 unsigned long flags;
530
b411b363
PR
531 /* is used from state engine doing drbd_thread_stop_nowait,
532 * while holding the req lock irqsave */
533 spin_lock_irqsave(&thi->t_lock, flags);
534
535 switch (thi->t_state) {
e77a0a5c 536 case NONE:
392c8801 537 conn_info(tconn, "Starting %s thread (from %s [%d])\n",
bed879ae 538 thi->name, current->comm, current->pid);
b411b363
PR
539
540 /* Get ref on module for thread - this is released when thread exits */
541 if (!try_module_get(THIS_MODULE)) {
392c8801 542 conn_err(tconn, "Failed to get module reference in drbd_thread_start\n");
b411b363 543 spin_unlock_irqrestore(&thi->t_lock, flags);
81e84650 544 return false;
b411b363
PR
545 }
546
547 init_completion(&thi->stop);
b411b363 548 thi->reset_cpu_mask = 1;
e77a0a5c 549 thi->t_state = RUNNING;
b411b363
PR
550 spin_unlock_irqrestore(&thi->t_lock, flags);
551 flush_signals(current); /* otherw. may get -ERESTARTNOINTR */
552
553 nt = kthread_create(drbd_thread_setup, (void *) thi,
392c8801 554 "drbd_%c_%s", thi->name[0], thi->tconn->name);
b411b363
PR
555
556 if (IS_ERR(nt)) {
392c8801 557 conn_err(tconn, "Couldn't start thread\n");
b411b363
PR
558
559 module_put(THIS_MODULE);
81e84650 560 return false;
b411b363
PR
561 }
562 spin_lock_irqsave(&thi->t_lock, flags);
563 thi->task = nt;
e77a0a5c 564 thi->t_state = RUNNING;
b411b363
PR
565 spin_unlock_irqrestore(&thi->t_lock, flags);
566 wake_up_process(nt);
567 break;
e77a0a5c
AG
568 case EXITING:
569 thi->t_state = RESTARTING;
392c8801 570 conn_info(tconn, "Restarting %s thread (from %s [%d])\n",
bed879ae 571 thi->name, current->comm, current->pid);
b411b363 572 /* fall through */
e77a0a5c
AG
573 case RUNNING:
574 case RESTARTING:
b411b363
PR
575 default:
576 spin_unlock_irqrestore(&thi->t_lock, flags);
577 break;
578 }
579
81e84650 580 return true;
b411b363
PR
581}
582
583
584void _drbd_thread_stop(struct drbd_thread *thi, int restart, int wait)
585{
586 unsigned long flags;
587
e77a0a5c 588 enum drbd_thread_state ns = restart ? RESTARTING : EXITING;
b411b363
PR
589
590 /* may be called from state engine, holding the req lock irqsave */
591 spin_lock_irqsave(&thi->t_lock, flags);
592
e77a0a5c 593 if (thi->t_state == NONE) {
b411b363
PR
594 spin_unlock_irqrestore(&thi->t_lock, flags);
595 if (restart)
596 drbd_thread_start(thi);
597 return;
598 }
599
600 if (thi->t_state != ns) {
601 if (thi->task == NULL) {
602 spin_unlock_irqrestore(&thi->t_lock, flags);
603 return;
604 }
605
606 thi->t_state = ns;
607 smp_mb();
608 init_completion(&thi->stop);
609 if (thi->task != current)
610 force_sig(DRBD_SIGKILL, thi->task);
b411b363
PR
611 }
612
613 spin_unlock_irqrestore(&thi->t_lock, flags);
614
615 if (wait)
616 wait_for_completion(&thi->stop);
617}
618
392c8801 619static struct drbd_thread *drbd_task_to_thread(struct drbd_tconn *tconn, struct task_struct *task)
bed879ae 620{
bed879ae
PR
621 struct drbd_thread *thi =
622 task == tconn->receiver.task ? &tconn->receiver :
623 task == tconn->asender.task ? &tconn->asender :
624 task == tconn->worker.task ? &tconn->worker : NULL;
625
626 return thi;
627}
628
392c8801 629char *drbd_task_to_thread_name(struct drbd_tconn *tconn, struct task_struct *task)
bed879ae 630{
392c8801 631 struct drbd_thread *thi = drbd_task_to_thread(tconn, task);
bed879ae
PR
632 return thi ? thi->name : task->comm;
633}
634
80883197 635int conn_lowest_minor(struct drbd_tconn *tconn)
80822284 636{
e90285e0
PR
637 int vnr = 0;
638 struct drbd_conf *mdev;
774b3055 639
e90285e0
PR
640 mdev = idr_get_next(&tconn->volumes, &vnr);
641 if (!mdev)
774b3055 642 return -1;
e90285e0 643 return mdev_to_minor(mdev);
80822284 644}
774b3055
PR
645
646#ifdef CONFIG_SMP
b411b363
PR
647/**
648 * drbd_calc_cpu_mask() - Generate CPU masks, spread over all CPUs
649 * @mdev: DRBD device.
650 *
651 * Forces all threads of a device onto the same CPU. This is beneficial for
652 * DRBD's performance. May be overwritten by user's configuration.
653 */
80822284 654void drbd_calc_cpu_mask(struct drbd_tconn *tconn)
b411b363
PR
655{
656 int ord, cpu;
657
658 /* user override. */
80822284 659 if (cpumask_weight(tconn->cpu_mask))
b411b363
PR
660 return;
661
80822284 662 ord = conn_lowest_minor(tconn) % cpumask_weight(cpu_online_mask);
b411b363
PR
663 for_each_online_cpu(cpu) {
664 if (ord-- == 0) {
80822284 665 cpumask_set_cpu(cpu, tconn->cpu_mask);
b411b363
PR
666 return;
667 }
668 }
669 /* should not be reached */
80822284 670 cpumask_setall(tconn->cpu_mask);
b411b363
PR
671}
672
673/**
674 * drbd_thread_current_set_cpu() - modifies the cpu mask of the _current_ thread
675 * @mdev: DRBD device.
bc31fe33 676 * @thi: drbd_thread object
b411b363
PR
677 *
678 * call in the "main loop" of _all_ threads, no need for any mutex, current won't die
679 * prematurely.
680 */
80822284 681void drbd_thread_current_set_cpu(struct drbd_thread *thi)
b411b363
PR
682{
683 struct task_struct *p = current;
bed879ae 684
b411b363
PR
685 if (!thi->reset_cpu_mask)
686 return;
687 thi->reset_cpu_mask = 0;
392c8801 688 set_cpus_allowed_ptr(p, thi->tconn->cpu_mask);
b411b363
PR
689}
690#endif
691
52b061a4
AG
692/**
693 * drbd_header_size - size of a packet header
694 *
695 * The header size is a multiple of 8, so any payload following the header is
696 * word aligned on 64-bit architectures. (The bitmap send and receive code
697 * relies on this.)
698 */
699unsigned int drbd_header_size(struct drbd_tconn *tconn)
700{
701 BUILD_BUG_ON(sizeof(struct p_header80) != sizeof(struct p_header95));
702 BUILD_BUG_ON(!IS_ALIGNED(sizeof(struct p_header80), 8));
703 return sizeof(struct p_header80);
704}
705
d38e787e 706static void prepare_header80(struct p_header80 *h, enum drbd_packet cmd, int size)
fd340c12
PR
707{
708 h->magic = cpu_to_be32(DRBD_MAGIC);
709 h->command = cpu_to_be16(cmd);
710 h->length = cpu_to_be16(size);
711}
712
d38e787e 713static void prepare_header95(struct p_header95 *h, enum drbd_packet cmd, int size)
fd340c12
PR
714{
715 h->magic = cpu_to_be16(DRBD_MAGIC_BIG);
716 h->command = cpu_to_be16(cmd);
717 h->length = cpu_to_be32(size);
718}
719
9f5bdc33
AG
720static void prepare_header(struct drbd_tconn *tconn, int vnr, struct p_header *h,
721 enum drbd_packet cmd, int size)
d38e787e 722{
0916e0e3 723 if (tconn->agreed_pro_version >= 95)
d38e787e
PR
724 prepare_header95(&h->h95, cmd, size);
725 else
726 prepare_header80(&h->h80, cmd, size);
727}
728
dba58587
AG
729void *conn_prepare_command(struct drbd_tconn *tconn, struct drbd_socket *sock)
730{
731 mutex_lock(&sock->mutex);
732 if (!sock->socket) {
733 mutex_unlock(&sock->mutex);
734 return NULL;
735 }
736 return sock->sbuf;
737}
738
739void *drbd_prepare_command(struct drbd_conf *mdev, struct drbd_socket *sock)
740{
741 return conn_prepare_command(mdev->tconn, sock);
742}
743
744static int __send_command(struct drbd_tconn *tconn, int vnr,
745 struct drbd_socket *sock, enum drbd_packet cmd,
746 unsigned int header_size, void *data,
747 unsigned int size)
748{
749 int msg_flags;
750 int err;
751
752 /*
753 * Called with @data == NULL and the size of the data blocks in @size
754 * for commands that send data blocks. For those commands, omit the
755 * MSG_MORE flag: this will increase the likelihood that data blocks
756 * which are page aligned on the sender will end up page aligned on the
757 * receiver.
758 */
759 msg_flags = data ? MSG_MORE : 0;
760
9f5bdc33
AG
761 prepare_header(tconn, vnr, sock->sbuf, cmd,
762 header_size - sizeof(struct p_header) + size);
dba58587
AG
763 err = drbd_send_all(tconn, sock->socket, sock->sbuf, header_size,
764 msg_flags);
765 if (data && !err)
766 err = drbd_send_all(tconn, sock->socket, data, size, 0);
767 return err;
768}
769
770int conn_send_command(struct drbd_tconn *tconn, struct drbd_socket *sock,
771 enum drbd_packet cmd, unsigned int header_size,
772 void *data, unsigned int size)
773{
774 int err;
775
776 err = __send_command(tconn, 0, sock, cmd, header_size, data, size);
777 mutex_unlock(&sock->mutex);
778 return err;
779}
780
781int drbd_send_command(struct drbd_conf *mdev, struct drbd_socket *sock,
782 enum drbd_packet cmd, unsigned int header_size,
783 void *data, unsigned int size)
784{
785 int err;
786
787 err = __send_command(mdev->tconn, mdev->vnr, sock, cmd, header_size,
788 data, size);
789 mutex_unlock(&sock->mutex);
790 return err;
791}
792
e307f352
AG
793int drbd_send_ping(struct drbd_tconn *tconn)
794{
9f5bdc33
AG
795 struct drbd_socket *sock;
796
797 sock = &tconn->meta;
798 if (!conn_prepare_command(tconn, sock))
799 return -EIO;
800 return conn_send_command(tconn, sock, P_PING, sizeof(struct p_header), NULL, 0);
e307f352
AG
801}
802
803int drbd_send_ping_ack(struct drbd_tconn *tconn)
804{
9f5bdc33
AG
805 struct drbd_socket *sock;
806
807 sock = &tconn->meta;
808 if (!conn_prepare_command(tconn, sock))
809 return -EIO;
810 return conn_send_command(tconn, sock, P_PING_ACK, sizeof(struct p_header), NULL, 0);
e307f352
AG
811}
812
f399002e 813int drbd_send_sync_param(struct drbd_conf *mdev)
b411b363 814{
7c96715a 815 struct drbd_socket *sock;
9f5bdc33
AG
816 struct p_rs_param_95 *p;
817 int size;
31890f4a 818 const int apv = mdev->tconn->agreed_pro_version;
9f5bdc33
AG
819 enum drbd_packet cmd;
820
821 sock = &mdev->tconn->data;
822 p = drbd_prepare_command(mdev, sock);
823 if (!p)
824 return -EIO;
b411b363
PR
825
826 size = apv <= 87 ? sizeof(struct p_rs_param)
827 : apv == 88 ? sizeof(struct p_rs_param)
f399002e 828 + strlen(mdev->tconn->net_conf->verify_alg) + 1
8e26f9cc
PR
829 : apv <= 94 ? sizeof(struct p_rs_param_89)
830 : /* apv >= 95 */ sizeof(struct p_rs_param_95);
b411b363 831
9f5bdc33 832 cmd = apv >= 89 ? P_SYNC_PARAM89 : P_SYNC_PARAM;
b411b363 833
9f5bdc33
AG
834 /* initialize verify_alg and csums_alg */
835 memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
b411b363 836
9f5bdc33
AG
837 if (get_ldev(mdev)) {
838 p->rate = cpu_to_be32(mdev->ldev->dc.resync_rate);
839 p->c_plan_ahead = cpu_to_be32(mdev->ldev->dc.c_plan_ahead);
840 p->c_delay_target = cpu_to_be32(mdev->ldev->dc.c_delay_target);
841 p->c_fill_target = cpu_to_be32(mdev->ldev->dc.c_fill_target);
842 p->c_max_rate = cpu_to_be32(mdev->ldev->dc.c_max_rate);
843 put_ldev(mdev);
844 } else {
845 p->rate = cpu_to_be32(DRBD_RATE_DEF);
846 p->c_plan_ahead = cpu_to_be32(DRBD_C_PLAN_AHEAD_DEF);
847 p->c_delay_target = cpu_to_be32(DRBD_C_DELAY_TARGET_DEF);
848 p->c_fill_target = cpu_to_be32(DRBD_C_FILL_TARGET_DEF);
849 p->c_max_rate = cpu_to_be32(DRBD_C_MAX_RATE_DEF);
850 }
b411b363 851
9f5bdc33
AG
852 if (apv >= 88)
853 strcpy(p->verify_alg, mdev->tconn->net_conf->verify_alg);
854 if (apv >= 89)
855 strcpy(p->csums_alg, mdev->tconn->net_conf->csums_alg);
b411b363 856
9f5bdc33 857 return drbd_send_command(mdev, sock, cmd, size, NULL, 0);
b411b363
PR
858}
859
dc8228d1 860int drbd_send_protocol(struct drbd_tconn *tconn)
b411b363 861{
9f5bdc33 862 struct drbd_socket *sock;
b411b363 863 struct p_protocol *p;
9f5bdc33 864 int size, cf;
b411b363 865
9f5bdc33
AG
866 if (tconn->net_conf->dry_run && tconn->agreed_pro_version < 92) {
867 conn_err(tconn, "--dry-run is not supported by peer");
868 return -EOPNOTSUPP;
869 }
b411b363 870
9f5bdc33
AG
871 sock = &tconn->data;
872 p = conn_prepare_command(tconn, sock);
873 if (!p)
874 return -EIO;
875
876 size = sizeof(*p);
dc8228d1
PR
877 if (tconn->agreed_pro_version >= 87)
878 size += strlen(tconn->net_conf->integrity_alg) + 1;
b411b363 879
dc8228d1
PR
880 p->protocol = cpu_to_be32(tconn->net_conf->wire_protocol);
881 p->after_sb_0p = cpu_to_be32(tconn->net_conf->after_sb_0p);
882 p->after_sb_1p = cpu_to_be32(tconn->net_conf->after_sb_1p);
883 p->after_sb_2p = cpu_to_be32(tconn->net_conf->after_sb_2p);
884 p->two_primaries = cpu_to_be32(tconn->net_conf->two_primaries);
cf14c2e9 885 cf = 0;
dc8228d1 886 if (tconn->net_conf->want_lose)
cf14c2e9 887 cf |= CF_WANT_LOSE;
9f5bdc33
AG
888 if (tconn->net_conf->dry_run)
889 cf |= CF_DRY_RUN;
cf14c2e9
PR
890 p->conn_flags = cpu_to_be32(cf);
891
dc8228d1
PR
892 if (tconn->agreed_pro_version >= 87)
893 strcpy(p->integrity_alg, tconn->net_conf->integrity_alg);
9f5bdc33 894 return conn_send_command(tconn, sock, P_PROTOCOL, size, NULL, 0);
b411b363
PR
895}
896
897int _drbd_send_uuids(struct drbd_conf *mdev, u64 uuid_flags)
898{
9f5bdc33
AG
899 struct drbd_socket *sock;
900 struct p_uuids *p;
b411b363
PR
901 int i;
902
903 if (!get_ldev_if_state(mdev, D_NEGOTIATING))
2ae5f95b 904 return 0;
b411b363 905
9f5bdc33
AG
906 sock = &mdev->tconn->data;
907 p = drbd_prepare_command(mdev, sock);
908 if (!p) {
909 put_ldev(mdev);
910 return -EIO;
911 }
b411b363 912 for (i = UI_CURRENT; i < UI_SIZE; i++)
9f5bdc33 913 p->uuid[i] = mdev->ldev ? cpu_to_be64(mdev->ldev->md.uuid[i]) : 0;
b411b363
PR
914
915 mdev->comm_bm_set = drbd_bm_total_weight(mdev);
9f5bdc33 916 p->uuid[UI_SIZE] = cpu_to_be64(mdev->comm_bm_set);
89e58e75 917 uuid_flags |= mdev->tconn->net_conf->want_lose ? 1 : 0;
b411b363
PR
918 uuid_flags |= test_bit(CRASHED_PRIMARY, &mdev->flags) ? 2 : 0;
919 uuid_flags |= mdev->new_state_tmp.disk == D_INCONSISTENT ? 4 : 0;
9f5bdc33 920 p->uuid[UI_FLAGS] = cpu_to_be64(uuid_flags);
b411b363
PR
921
922 put_ldev(mdev);
9f5bdc33 923 return drbd_send_command(mdev, sock, P_UUIDS, sizeof(*p), NULL, 0);
b411b363
PR
924}
925
926int drbd_send_uuids(struct drbd_conf *mdev)
927{
928 return _drbd_send_uuids(mdev, 0);
929}
930
931int drbd_send_uuids_skip_initial_sync(struct drbd_conf *mdev)
932{
933 return _drbd_send_uuids(mdev, 8);
934}
935
62b0da3a
LE
936void drbd_print_uuids(struct drbd_conf *mdev, const char *text)
937{
938 if (get_ldev_if_state(mdev, D_NEGOTIATING)) {
939 u64 *uuid = mdev->ldev->md.uuid;
940 dev_info(DEV, "%s %016llX:%016llX:%016llX:%016llX\n",
941 text,
942 (unsigned long long)uuid[UI_CURRENT],
943 (unsigned long long)uuid[UI_BITMAP],
944 (unsigned long long)uuid[UI_HISTORY_START],
945 (unsigned long long)uuid[UI_HISTORY_END]);
946 put_ldev(mdev);
947 } else {
948 dev_info(DEV, "%s effective data uuid: %016llX\n",
949 text,
950 (unsigned long long)mdev->ed_uuid);
951 }
952}
953
9c1b7f72 954void drbd_gen_and_send_sync_uuid(struct drbd_conf *mdev)
b411b363 955{
9f5bdc33
AG
956 struct drbd_socket *sock;
957 struct p_rs_uuid *p;
5a22db89
LE
958 u64 uuid;
959
960 D_ASSERT(mdev->state.disk == D_UP_TO_DATE);
b411b363 961
4a23f264 962 uuid = mdev->ldev->md.uuid[UI_BITMAP] + UUID_NEW_BM_OFFSET;
5a22db89 963 drbd_uuid_set(mdev, UI_BITMAP, uuid);
62b0da3a 964 drbd_print_uuids(mdev, "updated sync UUID");
5a22db89 965 drbd_md_sync(mdev);
b411b363 966
9f5bdc33
AG
967 sock = &mdev->tconn->data;
968 p = drbd_prepare_command(mdev, sock);
969 if (p) {
970 p->uuid = cpu_to_be64(uuid);
971 drbd_send_command(mdev, sock, P_SYNC_UUID, sizeof(*p), NULL, 0);
972 }
b411b363
PR
973}
974
e89b591c 975int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags flags)
b411b363 976{
9f5bdc33
AG
977 struct drbd_socket *sock;
978 struct p_sizes *p;
b411b363 979 sector_t d_size, u_size;
99432fcc 980 int q_order_type, max_bio_size;
b411b363
PR
981
982 if (get_ldev_if_state(mdev, D_NEGOTIATING)) {
983 D_ASSERT(mdev->ldev->backing_bdev);
984 d_size = drbd_get_max_capacity(mdev->ldev);
985 u_size = mdev->ldev->dc.disk_size;
986 q_order_type = drbd_queue_order_type(mdev);
99432fcc
PR
987 max_bio_size = queue_max_hw_sectors(mdev->ldev->backing_bdev->bd_disk->queue) << 9;
988 max_bio_size = min_t(int, max_bio_size, DRBD_MAX_BIO_SIZE);
b411b363
PR
989 put_ldev(mdev);
990 } else {
991 d_size = 0;
992 u_size = 0;
993 q_order_type = QUEUE_ORDERED_NONE;
99432fcc 994 max_bio_size = DRBD_MAX_BIO_SIZE; /* ... multiple BIOs per peer_request */
b411b363
PR
995 }
996
9f5bdc33
AG
997 sock = &mdev->tconn->data;
998 p = drbd_prepare_command(mdev, sock);
999 if (!p)
1000 return -EIO;
1001 p->d_size = cpu_to_be64(d_size);
1002 p->u_size = cpu_to_be64(u_size);
1003 p->c_size = cpu_to_be64(trigger_reply ? 0 : drbd_get_capacity(mdev->this_bdev));
1004 p->max_bio_size = cpu_to_be32(max_bio_size);
1005 p->queue_order_type = cpu_to_be16(q_order_type);
1006 p->dds_flags = cpu_to_be16(flags);
1007 return drbd_send_command(mdev, sock, P_SIZES, sizeof(*p), NULL, 0);
b411b363
PR
1008}
1009
1010/**
1011 * drbd_send_state() - Sends the drbd state to the peer
1012 * @mdev: DRBD device.
1013 */
1014int drbd_send_state(struct drbd_conf *mdev)
1015{
7c96715a 1016 struct drbd_socket *sock;
9f5bdc33 1017 struct p_state *p;
b411b363 1018
7c96715a 1019 sock = &mdev->tconn->data;
9f5bdc33
AG
1020 p = drbd_prepare_command(mdev, sock);
1021 if (!p)
1022 return -EIO;
1023 p->state = cpu_to_be32(mdev->state.i); /* Within the send mutex */
1024 return drbd_send_command(mdev, sock, P_STATE, sizeof(*p), NULL, 0);
1025}
b411b363 1026
9f5bdc33
AG
1027int drbd_send_state_req(struct drbd_conf *mdev, union drbd_state mask, union drbd_state val)
1028{
1029 struct drbd_socket *sock;
1030 struct p_req_state *p;
b411b363 1031
9f5bdc33
AG
1032 sock = &mdev->tconn->data;
1033 p = drbd_prepare_command(mdev, sock);
1034 if (!p)
1035 return -EIO;
1036 p->mask = cpu_to_be32(mask.i);
1037 p->val = cpu_to_be32(val.i);
1038 return drbd_send_command(mdev, sock, P_STATE_CHG_REQ, sizeof(*p), NULL, 0);
b411b363 1039
b411b363
PR
1040}
1041
9f5bdc33 1042int conn_send_state_req(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val)
b411b363 1043{
9f5bdc33
AG
1044 enum drbd_packet cmd;
1045 struct drbd_socket *sock;
1046 struct p_req_state *p;
b411b363 1047
9f5bdc33
AG
1048 cmd = tconn->agreed_pro_version < 100 ? P_STATE_CHG_REQ : P_CONN_ST_CHG_REQ;
1049 sock = &tconn->data;
1050 p = conn_prepare_command(tconn, sock);
1051 if (!p)
1052 return -EIO;
1053 p->mask = cpu_to_be32(mask.i);
1054 p->val = cpu_to_be32(val.i);
1055 return conn_send_command(tconn, sock, cmd, sizeof(*p), NULL, 0);
b411b363
PR
1056}
1057
2f4e7abe 1058void drbd_send_sr_reply(struct drbd_conf *mdev, enum drbd_state_rv retcode)
b411b363 1059{
9f5bdc33
AG
1060 struct drbd_socket *sock;
1061 struct p_req_state_reply *p;
b411b363 1062
9f5bdc33
AG
1063 sock = &mdev->tconn->meta;
1064 p = drbd_prepare_command(mdev, sock);
1065 if (p) {
1066 p->retcode = cpu_to_be32(retcode);
1067 drbd_send_command(mdev, sock, P_STATE_CHG_REPLY, sizeof(*p), NULL, 0);
1068 }
b411b363
PR
1069}
1070
9f5bdc33 1071void conn_send_sr_reply(struct drbd_tconn *tconn, enum drbd_state_rv retcode)
047cd4a6 1072{
9f5bdc33
AG
1073 struct drbd_socket *sock;
1074 struct p_req_state_reply *p;
047cd4a6
PR
1075 enum drbd_packet cmd = tconn->agreed_pro_version < 100 ? P_STATE_CHG_REPLY : P_CONN_ST_CHG_REPLY;
1076
9f5bdc33
AG
1077 sock = &tconn->meta;
1078 p = conn_prepare_command(tconn, sock);
1079 if (p) {
1080 p->retcode = cpu_to_be32(retcode);
1081 conn_send_command(tconn, sock, cmd, sizeof(*p), NULL, 0);
1082 }
047cd4a6
PR
1083}
1084
a02d1240
AG
1085static void dcbp_set_code(struct p_compressed_bm *p, enum drbd_bitmap_code code)
1086{
1087 BUG_ON(code & ~0xf);
1088 p->encoding = (p->encoding & ~0xf) | code;
1089}
1090
1091static void dcbp_set_start(struct p_compressed_bm *p, int set)
1092{
1093 p->encoding = (p->encoding & ~0x80) | (set ? 0x80 : 0);
1094}
1095
1096static void dcbp_set_pad_bits(struct p_compressed_bm *p, int n)
1097{
1098 BUG_ON(n & ~0x7);
1099 p->encoding = (p->encoding & (~0x7 << 4)) | (n << 4);
1100}
1101
b411b363
PR
1102int fill_bitmap_rle_bits(struct drbd_conf *mdev,
1103 struct p_compressed_bm *p,
1104 struct bm_xfer_ctx *c)
1105{
1106 struct bitstream bs;
1107 unsigned long plain_bits;
1108 unsigned long tmp;
1109 unsigned long rl;
1110 unsigned len;
1111 unsigned toggle;
1112 int bits;
1113
1114 /* may we use this feature? */
f399002e 1115 if ((mdev->tconn->net_conf->use_rle == 0) ||
31890f4a 1116 (mdev->tconn->agreed_pro_version < 90))
b411b363
PR
1117 return 0;
1118
1119 if (c->bit_offset >= c->bm_bits)
1120 return 0; /* nothing to do. */
1121
1122 /* use at most thus many bytes */
1123 bitstream_init(&bs, p->code, BM_PACKET_VLI_BYTES_MAX, 0);
1124 memset(p->code, 0, BM_PACKET_VLI_BYTES_MAX);
1125 /* plain bits covered in this code string */
1126 plain_bits = 0;
1127
1128 /* p->encoding & 0x80 stores whether the first run length is set.
1129 * bit offset is implicit.
1130 * start with toggle == 2 to be able to tell the first iteration */
1131 toggle = 2;
1132
1133 /* see how much plain bits we can stuff into one packet
1134 * using RLE and VLI. */
1135 do {
1136 tmp = (toggle == 0) ? _drbd_bm_find_next_zero(mdev, c->bit_offset)
1137 : _drbd_bm_find_next(mdev, c->bit_offset);
1138 if (tmp == -1UL)
1139 tmp = c->bm_bits;
1140 rl = tmp - c->bit_offset;
1141
1142 if (toggle == 2) { /* first iteration */
1143 if (rl == 0) {
1144 /* the first checked bit was set,
1145 * store start value, */
a02d1240 1146 dcbp_set_start(p, 1);
b411b363
PR
1147 /* but skip encoding of zero run length */
1148 toggle = !toggle;
1149 continue;
1150 }
a02d1240 1151 dcbp_set_start(p, 0);
b411b363
PR
1152 }
1153
1154 /* paranoia: catch zero runlength.
1155 * can only happen if bitmap is modified while we scan it. */
1156 if (rl == 0) {
1157 dev_err(DEV, "unexpected zero runlength while encoding bitmap "
1158 "t:%u bo:%lu\n", toggle, c->bit_offset);
1159 return -1;
1160 }
1161
1162 bits = vli_encode_bits(&bs, rl);
1163 if (bits == -ENOBUFS) /* buffer full */
1164 break;
1165 if (bits <= 0) {
1166 dev_err(DEV, "error while encoding bitmap: %d\n", bits);
1167 return 0;
1168 }
1169
1170 toggle = !toggle;
1171 plain_bits += rl;
1172 c->bit_offset = tmp;
1173 } while (c->bit_offset < c->bm_bits);
1174
1175 len = bs.cur.b - p->code + !!bs.cur.bit;
1176
1177 if (plain_bits < (len << 3)) {
1178 /* incompressible with this method.
1179 * we need to rewind both word and bit position. */
1180 c->bit_offset -= plain_bits;
1181 bm_xfer_ctx_bit_to_word_offset(c);
1182 c->bit_offset = c->word_offset * BITS_PER_LONG;
1183 return 0;
1184 }
1185
1186 /* RLE + VLI was able to compress it just fine.
1187 * update c->word_offset. */
1188 bm_xfer_ctx_bit_to_word_offset(c);
1189
1190 /* store pad_bits */
a02d1240 1191 dcbp_set_pad_bits(p, (8 - bs.cur.bit) & 0x7);
b411b363
PR
1192
1193 return len;
1194}
1195
f70af118
AG
1196/**
1197 * send_bitmap_rle_or_plain
1198 *
1199 * Return 0 when done, 1 when another iteration is needed, and a negative error
1200 * code upon failure.
1201 */
1202static int
79ed9bd0 1203send_bitmap_rle_or_plain(struct drbd_conf *mdev, struct bm_xfer_ctx *c)
b411b363 1204{
9f5bdc33
AG
1205 struct drbd_socket *sock = &mdev->tconn->data;
1206 struct p_compressed_bm *p = sock->sbuf;
b411b363 1207 unsigned long num_words;
a982dd57 1208 int len, err;
b411b363
PR
1209
1210 len = fill_bitmap_rle_bits(mdev, p, c);
b411b363 1211 if (len < 0)
f70af118 1212 return -EIO;
b411b363
PR
1213
1214 if (len) {
a02d1240 1215 dcbp_set_code(p, RLE_VLI_Bits);
9f5bdc33
AG
1216 err = __send_command(mdev->tconn, mdev->vnr, sock,
1217 P_COMPRESSED_BITMAP, sizeof(*p) + len,
1218 NULL, 0);
b411b363
PR
1219 c->packets[0]++;
1220 c->bytes[0] += sizeof(*p) + len;
1221
1222 if (c->bit_offset >= c->bm_bits)
1223 len = 0; /* DONE */
1224 } else {
1225 /* was not compressible.
1226 * send a buffer full of plain text bits instead. */
9f5bdc33 1227 struct p_header *h = sock->sbuf;
b411b363
PR
1228 num_words = min_t(size_t, BM_PACKET_WORDS, c->bm_words - c->word_offset);
1229 len = num_words * sizeof(long);
1230 if (len)
79ed9bd0
AG
1231 drbd_bm_get_lel(mdev, c->word_offset, num_words,
1232 (unsigned long *)h->payload);
9f5bdc33
AG
1233 err = __send_command(mdev->tconn, mdev->vnr, sock, P_BITMAP,
1234 sizeof(*h) + len, NULL, 0);
b411b363
PR
1235 c->word_offset += num_words;
1236 c->bit_offset = c->word_offset * BITS_PER_LONG;
1237
1238 c->packets[1]++;
0b70a13d 1239 c->bytes[1] += sizeof(struct p_header80) + len;
b411b363
PR
1240
1241 if (c->bit_offset > c->bm_bits)
1242 c->bit_offset = c->bm_bits;
1243 }
a982dd57 1244 if (!err) {
f70af118
AG
1245 if (len == 0) {
1246 INFO_bm_xfer_stats(mdev, "send", c);
1247 return 0;
1248 } else
1249 return 1;
1250 }
1251 return -EIO;
b411b363
PR
1252}
1253
1254/* See the comment at receive_bitmap() */
058820cd 1255static int _drbd_send_bitmap(struct drbd_conf *mdev)
b411b363
PR
1256{
1257 struct bm_xfer_ctx c;
f70af118 1258 int err;
b411b363 1259
841ce241
AG
1260 if (!expect(mdev->bitmap))
1261 return false;
b411b363 1262
b411b363
PR
1263 if (get_ldev(mdev)) {
1264 if (drbd_md_test_flag(mdev->ldev, MDF_FULL_SYNC)) {
1265 dev_info(DEV, "Writing the whole bitmap, MDF_FullSync was set.\n");
1266 drbd_bm_set_all(mdev);
1267 if (drbd_bm_write(mdev)) {
1268 /* write_bm did fail! Leave full sync flag set in Meta P_DATA
1269 * but otherwise process as per normal - need to tell other
1270 * side that a full resync is required! */
1271 dev_err(DEV, "Failed to write bitmap to disk!\n");
1272 } else {
1273 drbd_md_clear_flag(mdev, MDF_FULL_SYNC);
1274 drbd_md_sync(mdev);
1275 }
1276 }
1277 put_ldev(mdev);
1278 }
1279
1280 c = (struct bm_xfer_ctx) {
1281 .bm_bits = drbd_bm_bits(mdev),
1282 .bm_words = drbd_bm_words(mdev),
1283 };
1284
1285 do {
79ed9bd0 1286 err = send_bitmap_rle_or_plain(mdev, &c);
f70af118 1287 } while (err > 0);
b411b363 1288
f70af118 1289 return err == 0;
b411b363
PR
1290}
1291
1292int drbd_send_bitmap(struct drbd_conf *mdev)
1293{
9f5bdc33
AG
1294 struct drbd_socket *sock = &mdev->tconn->data;
1295 int err = -1;
b411b363 1296
9f5bdc33
AG
1297 mutex_lock(&sock->mutex);
1298 if (sock->socket)
1299 err = !_drbd_send_bitmap(mdev);
1300 mutex_unlock(&sock->mutex);
b411b363
PR
1301 return err;
1302}
9f5bdc33 1303
d4e67d7c 1304void drbd_send_b_ack(struct drbd_conf *mdev, u32 barrier_nr, u32 set_size)
b411b363 1305{
9f5bdc33
AG
1306 struct drbd_socket *sock;
1307 struct p_barrier_ack *p;
b411b363 1308
9f5bdc33
AG
1309 if (mdev->state.conn < C_CONNECTED)
1310 return;
b411b363 1311
9f5bdc33
AG
1312 sock = &mdev->tconn->meta;
1313 p = drbd_prepare_command(mdev, sock);
1314 if (!p)
1315 return;
1316 p->barrier = barrier_nr;
1317 p->set_size = cpu_to_be32(set_size);
1318 drbd_send_command(mdev, sock, P_BARRIER_ACK, sizeof(*p), NULL, 0);
b411b363
PR
1319}
1320
1321/**
1322 * _drbd_send_ack() - Sends an ack packet
1323 * @mdev: DRBD device.
1324 * @cmd: Packet command code.
1325 * @sector: sector, needs to be in big endian byte order
1326 * @blksize: size in byte, needs to be in big endian byte order
1327 * @block_id: Id, big endian byte order
1328 */
d8763023
AG
1329static int _drbd_send_ack(struct drbd_conf *mdev, enum drbd_packet cmd,
1330 u64 sector, u32 blksize, u64 block_id)
b411b363 1331{
9f5bdc33
AG
1332 struct drbd_socket *sock;
1333 struct p_block_ack *p;
b411b363 1334
9f5bdc33
AG
1335 if (mdev->state.conn < C_CONNECTED)
1336 return -EIO;
b411b363 1337
9f5bdc33
AG
1338 sock = &mdev->tconn->meta;
1339 p = drbd_prepare_command(mdev, sock);
1340 if (!p)
a8c32aa8 1341 return -EIO;
9f5bdc33
AG
1342 p->sector = sector;
1343 p->block_id = block_id;
1344 p->blksize = blksize;
1345 p->seq_num = cpu_to_be32(atomic_inc_return(&mdev->packet_seq));
1346 return drbd_send_command(mdev, sock, cmd, sizeof(*p), NULL, 0);
b411b363
PR
1347}
1348
2b2bf214
LE
1349/* dp->sector and dp->block_id already/still in network byte order,
1350 * data_size is payload size according to dp->head,
1351 * and may need to be corrected for digest size. */
a9a9994d
AG
1352void drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packet cmd,
1353 struct p_data *dp, int data_size)
b411b363 1354{
a0638456
PR
1355 data_size -= (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_r_tfm) ?
1356 crypto_hash_digestsize(mdev->tconn->integrity_r_tfm) : 0;
a9a9994d
AG
1357 _drbd_send_ack(mdev, cmd, dp->sector, cpu_to_be32(data_size),
1358 dp->block_id);
b411b363
PR
1359}
1360
a9a9994d
AG
1361void drbd_send_ack_rp(struct drbd_conf *mdev, enum drbd_packet cmd,
1362 struct p_block_req *rp)
b411b363 1363{
a9a9994d 1364 _drbd_send_ack(mdev, cmd, rp->sector, rp->blksize, rp->block_id);
b411b363
PR
1365}
1366
1367/**
1368 * drbd_send_ack() - Sends an ack packet
db830c46
AG
1369 * @mdev: DRBD device
1370 * @cmd: packet command code
1371 * @peer_req: peer request
b411b363 1372 */
d8763023 1373int drbd_send_ack(struct drbd_conf *mdev, enum drbd_packet cmd,
db830c46 1374 struct drbd_peer_request *peer_req)
b411b363 1375{
dd516121
AG
1376 return _drbd_send_ack(mdev, cmd,
1377 cpu_to_be64(peer_req->i.sector),
1378 cpu_to_be32(peer_req->i.size),
1379 peer_req->block_id);
b411b363
PR
1380}
1381
1382/* This function misuses the block_id field to signal if the blocks
1383 * are is sync or not. */
d8763023 1384int drbd_send_ack_ex(struct drbd_conf *mdev, enum drbd_packet cmd,
b411b363
PR
1385 sector_t sector, int blksize, u64 block_id)
1386{
fa79abd8
AG
1387 return _drbd_send_ack(mdev, cmd,
1388 cpu_to_be64(sector),
1389 cpu_to_be32(blksize),
1390 cpu_to_be64(block_id));
b411b363
PR
1391}
1392
1393int drbd_send_drequest(struct drbd_conf *mdev, int cmd,
1394 sector_t sector, int size, u64 block_id)
1395{
9f5bdc33
AG
1396 struct drbd_socket *sock;
1397 struct p_block_req *p;
b411b363 1398
9f5bdc33
AG
1399 sock = &mdev->tconn->data;
1400 p = drbd_prepare_command(mdev, sock);
1401 if (!p)
1402 return -EIO;
1403 p->sector = cpu_to_be64(sector);
1404 p->block_id = block_id;
1405 p->blksize = cpu_to_be32(size);
1406 return drbd_send_command(mdev, sock, cmd, sizeof(*p), NULL, 0);
b411b363
PR
1407}
1408
d8763023
AG
1409int drbd_send_drequest_csum(struct drbd_conf *mdev, sector_t sector, int size,
1410 void *digest, int digest_size, enum drbd_packet cmd)
b411b363 1411{
9f5bdc33
AG
1412 struct drbd_socket *sock;
1413 struct p_block_req *p;
b411b363 1414
9f5bdc33 1415 /* FIXME: Put the digest into the preallocated socket buffer. */
b411b363 1416
9f5bdc33
AG
1417 sock = &mdev->tconn->data;
1418 p = drbd_prepare_command(mdev, sock);
1419 if (!p)
1420 return -EIO;
1421 p->sector = cpu_to_be64(sector);
1422 p->block_id = ID_SYNCER /* unused */;
1423 p->blksize = cpu_to_be32(size);
1424 return drbd_send_command(mdev, sock, cmd, sizeof(*p),
1425 digest, digest_size);
b411b363
PR
1426}
1427
1428int drbd_send_ov_request(struct drbd_conf *mdev, sector_t sector, int size)
1429{
9f5bdc33
AG
1430 struct drbd_socket *sock;
1431 struct p_block_req *p;
b411b363 1432
9f5bdc33
AG
1433 sock = &mdev->tconn->data;
1434 p = drbd_prepare_command(mdev, sock);
1435 if (!p)
1436 return -EIO;
1437 p->sector = cpu_to_be64(sector);
1438 p->block_id = ID_SYNCER /* unused */;
1439 p->blksize = cpu_to_be32(size);
1440 return drbd_send_command(mdev, sock, P_OV_REQUEST, sizeof(*p), NULL, 0);
b411b363
PR
1441}
1442
1443/* called on sndtimeo
81e84650
AG
1444 * returns false if we should retry,
1445 * true if we think connection is dead
b411b363 1446 */
1a7ba646 1447static int we_should_drop_the_connection(struct drbd_tconn *tconn, struct socket *sock)
b411b363
PR
1448{
1449 int drop_it;
1450 /* long elapsed = (long)(jiffies - mdev->last_received); */
1451
1a7ba646
PR
1452 drop_it = tconn->meta.socket == sock
1453 || !tconn->asender.task
1454 || get_t_state(&tconn->asender) != RUNNING
bbeb641c 1455 || tconn->cstate < C_WF_REPORT_PARAMS;
b411b363
PR
1456
1457 if (drop_it)
81e84650 1458 return true;
b411b363 1459
1a7ba646 1460 drop_it = !--tconn->ko_count;
b411b363 1461 if (!drop_it) {
1a7ba646
PR
1462 conn_err(tconn, "[%s/%d] sock_sendmsg time expired, ko = %u\n",
1463 current->comm, current->pid, tconn->ko_count);
1464 request_ping(tconn);
b411b363
PR
1465 }
1466
1467 return drop_it; /* && (mdev->state == R_PRIMARY) */;
1468}
1469
1a7ba646 1470static void drbd_update_congested(struct drbd_tconn *tconn)
9e204cdd 1471{
1a7ba646 1472 struct sock *sk = tconn->data.socket->sk;
9e204cdd 1473 if (sk->sk_wmem_queued > sk->sk_sndbuf * 4 / 5)
1a7ba646 1474 set_bit(NET_CONGESTED, &tconn->flags);
9e204cdd
AG
1475}
1476
b411b363
PR
1477/* The idea of sendpage seems to be to put some kind of reference
1478 * to the page into the skb, and to hand it over to the NIC. In
1479 * this process get_page() gets called.
1480 *
1481 * As soon as the page was really sent over the network put_page()
1482 * gets called by some part of the network layer. [ NIC driver? ]
1483 *
1484 * [ get_page() / put_page() increment/decrement the count. If count
1485 * reaches 0 the page will be freed. ]
1486 *
1487 * This works nicely with pages from FSs.
1488 * But this means that in protocol A we might signal IO completion too early!
1489 *
1490 * In order not to corrupt data during a resync we must make sure
1491 * that we do not reuse our own buffer pages (EEs) to early, therefore
1492 * we have the net_ee list.
1493 *
1494 * XFS seems to have problems, still, it submits pages with page_count == 0!
1495 * As a workaround, we disable sendpage on pages
1496 * with page_count == 0 or PageSlab.
1497 */
1498static int _drbd_no_send_page(struct drbd_conf *mdev, struct page *page,
b987427b 1499 int offset, size_t size, unsigned msg_flags)
b411b363 1500{
b987427b
AG
1501 struct socket *socket;
1502 void *addr;
1503 int err;
1504
1505 socket = mdev->tconn->data.socket;
1506 addr = kmap(page) + offset;
1507 err = drbd_send_all(mdev->tconn, socket, addr, size, msg_flags);
b411b363 1508 kunmap(page);
b987427b
AG
1509 if (!err)
1510 mdev->send_cnt += size >> 9;
1511 return err;
b411b363
PR
1512}
1513
1514static int _drbd_send_page(struct drbd_conf *mdev, struct page *page,
ba11ad9a 1515 int offset, size_t size, unsigned msg_flags)
b411b363 1516{
88b390ff 1517 struct socket *socket = mdev->tconn->data.socket;
b411b363 1518 mm_segment_t oldfs = get_fs();
b411b363 1519 int len = size;
88b390ff 1520 int err = -EIO;
b411b363
PR
1521
1522 /* e.g. XFS meta- & log-data is in slab pages, which have a
1523 * page_count of 0 and/or have PageSlab() set.
1524 * we cannot use send_page for those, as that does get_page();
1525 * put_page(); and would cause either a VM_BUG directly, or
1526 * __page_cache_release a page that would actually still be referenced
1527 * by someone, leading to some obscure delayed Oops somewhere else. */
1528 if (disable_sendpage || (page_count(page) < 1) || PageSlab(page))
88b390ff 1529 return _drbd_no_send_page(mdev, page, offset, size, msg_flags);
b411b363 1530
ba11ad9a 1531 msg_flags |= MSG_NOSIGNAL;
1a7ba646 1532 drbd_update_congested(mdev->tconn);
b411b363
PR
1533 set_fs(KERNEL_DS);
1534 do {
88b390ff
AG
1535 int sent;
1536
1537 sent = socket->ops->sendpage(socket, page, offset, len, msg_flags);
b411b363 1538 if (sent <= 0) {
88b390ff
AG
1539 if (sent == -EAGAIN) {
1540 if (we_should_drop_the_connection(mdev->tconn, socket))
1541 break;
1542 continue;
1543 }
b411b363
PR
1544 dev_warn(DEV, "%s: size=%d len=%d sent=%d\n",
1545 __func__, (int)size, len, sent);
88b390ff
AG
1546 if (sent < 0)
1547 err = sent;
b411b363
PR
1548 break;
1549 }
1550 len -= sent;
1551 offset += sent;
1552 } while (len > 0 /* THINK && mdev->cstate >= C_CONNECTED*/);
1553 set_fs(oldfs);
01a311a5 1554 clear_bit(NET_CONGESTED, &mdev->tconn->flags);
b411b363 1555
88b390ff
AG
1556 if (len == 0) {
1557 err = 0;
1558 mdev->send_cnt += size >> 9;
1559 }
1560 return err;
b411b363
PR
1561}
1562
1563static int _drbd_send_bio(struct drbd_conf *mdev, struct bio *bio)
1564{
1565 struct bio_vec *bvec;
1566 int i;
ba11ad9a 1567 /* hint all but last page with MSG_MORE */
b411b363 1568 __bio_for_each_segment(bvec, bio, i, 0) {
7fae55da
AG
1569 int err;
1570
1571 err = _drbd_no_send_page(mdev, bvec->bv_page,
1572 bvec->bv_offset, bvec->bv_len,
1573 i == bio->bi_vcnt - 1 ? 0 : MSG_MORE);
1574 if (err)
1575 return err;
b411b363 1576 }
7fae55da 1577 return 0;
b411b363
PR
1578}
1579
1580static int _drbd_send_zc_bio(struct drbd_conf *mdev, struct bio *bio)
1581{
1582 struct bio_vec *bvec;
1583 int i;
ba11ad9a 1584 /* hint all but last page with MSG_MORE */
b411b363 1585 __bio_for_each_segment(bvec, bio, i, 0) {
7fae55da
AG
1586 int err;
1587
1588 err = _drbd_send_page(mdev, bvec->bv_page,
1589 bvec->bv_offset, bvec->bv_len,
1590 i == bio->bi_vcnt - 1 ? 0 : MSG_MORE);
1591 if (err)
1592 return err;
b411b363 1593 }
7fae55da 1594 return 0;
b411b363
PR
1595}
1596
db830c46
AG
1597static int _drbd_send_zc_ee(struct drbd_conf *mdev,
1598 struct drbd_peer_request *peer_req)
45bb912b 1599{
db830c46
AG
1600 struct page *page = peer_req->pages;
1601 unsigned len = peer_req->i.size;
9f69230c 1602 int err;
db830c46 1603
ba11ad9a 1604 /* hint all but last page with MSG_MORE */
45bb912b
LE
1605 page_chain_for_each(page) {
1606 unsigned l = min_t(unsigned, len, PAGE_SIZE);
9f69230c
AG
1607
1608 err = _drbd_send_page(mdev, page, 0, l,
1609 page_chain_next(page) ? MSG_MORE : 0);
1610 if (err)
1611 return err;
45bb912b
LE
1612 len -= l;
1613 }
9f69230c 1614 return 0;
45bb912b
LE
1615}
1616
76d2e7ec
PR
1617static u32 bio_flags_to_wire(struct drbd_conf *mdev, unsigned long bi_rw)
1618{
31890f4a 1619 if (mdev->tconn->agreed_pro_version >= 95)
76d2e7ec 1620 return (bi_rw & REQ_SYNC ? DP_RW_SYNC : 0) |
76d2e7ec
PR
1621 (bi_rw & REQ_FUA ? DP_FUA : 0) |
1622 (bi_rw & REQ_FLUSH ? DP_FLUSH : 0) |
1623 (bi_rw & REQ_DISCARD ? DP_DISCARD : 0);
1624 else
721a9602 1625 return bi_rw & REQ_SYNC ? DP_RW_SYNC : 0;
76d2e7ec
PR
1626}
1627
b411b363
PR
1628/* Used to send write requests
1629 * R_PRIMARY -> Peer (P_DATA)
1630 */
1631int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req)
1632{
9f5bdc33
AG
1633 struct drbd_socket *sock;
1634 struct p_data *p;
b411b363 1635 unsigned int dp_flags = 0;
b411b363 1636 int dgs;
9f5bdc33 1637 int err;
b411b363 1638
a0638456
PR
1639 dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_w_tfm) ?
1640 crypto_hash_digestsize(mdev->tconn->integrity_w_tfm) : 0;
b411b363 1641
9f5bdc33
AG
1642 sock = &mdev->tconn->data;
1643 p = drbd_prepare_command(mdev, sock);
1644 if (!p)
1645 return -EIO;
1646 p->sector = cpu_to_be64(req->i.sector);
1647 p->block_id = (unsigned long)req;
1648 p->seq_num = cpu_to_be32(req->seq_num = atomic_inc_return(&mdev->packet_seq));
76d2e7ec 1649 dp_flags = bio_flags_to_wire(mdev, req->master_bio->bi_rw);
b411b363
PR
1650 if (mdev->state.conn >= C_SYNC_SOURCE &&
1651 mdev->state.conn <= C_PAUSED_SYNC_T)
1652 dp_flags |= DP_MAY_SET_IN_SYNC;
9f5bdc33
AG
1653 p->dp_flags = cpu_to_be32(dp_flags);
1654 if (dgs)
1655 drbd_csum_bio(mdev, mdev->tconn->integrity_w_tfm, req->master_bio, p + 1);
1656 err = __send_command(mdev->tconn, mdev->vnr, sock, P_DATA, sizeof(*p) + dgs, NULL, req->i.size);
6bdb9b0e 1657 if (!err) {
470be44a
LE
1658 /* For protocol A, we have to memcpy the payload into
1659 * socket buffers, as we may complete right away
1660 * as soon as we handed it over to tcp, at which point the data
1661 * pages may become invalid.
1662 *
1663 * For data-integrity enabled, we copy it as well, so we can be
1664 * sure that even if the bio pages may still be modified, it
1665 * won't change the data on the wire, thus if the digest checks
1666 * out ok after sending on this side, but does not fit on the
1667 * receiving side, we sure have detected corruption elsewhere.
1668 */
89e58e75 1669 if (mdev->tconn->net_conf->wire_protocol == DRBD_PROT_A || dgs)
6bdb9b0e 1670 err = _drbd_send_bio(mdev, req->master_bio);
b411b363 1671 else
6bdb9b0e 1672 err = _drbd_send_zc_bio(mdev, req->master_bio);
470be44a
LE
1673
1674 /* double check digest, sometimes buffers have been modified in flight. */
1675 if (dgs > 0 && dgs <= 64) {
24c4830c 1676 /* 64 byte, 512 bit, is the largest digest size
470be44a
LE
1677 * currently supported in kernel crypto. */
1678 unsigned char digest[64];
a0638456 1679 drbd_csum_bio(mdev, mdev->tconn->integrity_w_tfm, req->master_bio, digest);
9f5bdc33 1680 if (memcmp(p + 1, digest, dgs)) {
470be44a
LE
1681 dev_warn(DEV,
1682 "Digest mismatch, buffer modified by upper layers during write: %llus +%u\n",
ace652ac 1683 (unsigned long long)req->i.sector, req->i.size);
470be44a
LE
1684 }
1685 } /* else if (dgs > 64) {
1686 ... Be noisy about digest too large ...
1687 } */
b411b363 1688 }
9f5bdc33 1689 mutex_unlock(&sock->mutex); /* locked by drbd_prepare_command() */
bd26bfc5 1690
6bdb9b0e 1691 return err;
b411b363
PR
1692}
1693
1694/* answer packet, used to send data back for read requests:
1695 * Peer -> (diskless) R_PRIMARY (P_DATA_REPLY)
1696 * C_SYNC_SOURCE -> C_SYNC_TARGET (P_RS_DATA_REPLY)
1697 */
d8763023 1698int drbd_send_block(struct drbd_conf *mdev, enum drbd_packet cmd,
db830c46 1699 struct drbd_peer_request *peer_req)
b411b363 1700{
9f5bdc33
AG
1701 struct drbd_socket *sock;
1702 struct p_data *p;
7b57b89d 1703 int err;
b411b363
PR
1704 int dgs;
1705
a0638456
PR
1706 dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_w_tfm) ?
1707 crypto_hash_digestsize(mdev->tconn->integrity_w_tfm) : 0;
b411b363 1708
9f5bdc33
AG
1709 sock = &mdev->tconn->data;
1710 p = drbd_prepare_command(mdev, sock);
1711 if (!p)
1712 return -EIO;
1713 p->sector = cpu_to_be64(peer_req->i.sector);
1714 p->block_id = peer_req->block_id;
1715 p->seq_num = 0; /* unused */
1716 if (dgs)
1717 drbd_csum_ee(mdev, mdev->tconn->integrity_w_tfm, peer_req, p + 1);
1718 err = __send_command(mdev->tconn, mdev->vnr, sock, cmd, sizeof(*p) + dgs, NULL, peer_req->i.size);
7b57b89d
AG
1719 if (!err)
1720 err = _drbd_send_zc_ee(mdev, peer_req);
9f5bdc33 1721 mutex_unlock(&sock->mutex); /* locked by drbd_prepare_command() */
bd26bfc5 1722
7b57b89d 1723 return err;
b411b363
PR
1724}
1725
8f7bed77 1726int drbd_send_out_of_sync(struct drbd_conf *mdev, struct drbd_request *req)
73a01a18 1727{
9f5bdc33
AG
1728 struct drbd_socket *sock;
1729 struct p_block_desc *p;
73a01a18 1730
9f5bdc33
AG
1731 sock = &mdev->tconn->data;
1732 p = drbd_prepare_command(mdev, sock);
1733 if (!p)
1734 return -EIO;
1735 p->sector = cpu_to_be64(req->i.sector);
1736 p->blksize = cpu_to_be32(req->i.size);
1737 return drbd_send_command(mdev, sock, P_OUT_OF_SYNC, sizeof(*p), NULL, 0);
73a01a18
PR
1738}
1739
b411b363
PR
1740/*
1741 drbd_send distinguishes two cases:
1742
1743 Packets sent via the data socket "sock"
1744 and packets sent via the meta data socket "msock"
1745
1746 sock msock
1747 -----------------+-------------------------+------------------------------
1748 timeout conf.timeout / 2 conf.timeout / 2
1749 timeout action send a ping via msock Abort communication
1750 and close all sockets
1751*/
1752
1753/*
1754 * you must have down()ed the appropriate [m]sock_mutex elsewhere!
1755 */
bedbd2a5 1756int drbd_send(struct drbd_tconn *tconn, struct socket *sock,
b411b363
PR
1757 void *buf, size_t size, unsigned msg_flags)
1758{
1759 struct kvec iov;
1760 struct msghdr msg;
1761 int rv, sent = 0;
1762
1763 if (!sock)
c0d42c8e 1764 return -EBADR;
b411b363
PR
1765
1766 /* THINK if (signal_pending) return ... ? */
1767
1768 iov.iov_base = buf;
1769 iov.iov_len = size;
1770
1771 msg.msg_name = NULL;
1772 msg.msg_namelen = 0;
1773 msg.msg_control = NULL;
1774 msg.msg_controllen = 0;
1775 msg.msg_flags = msg_flags | MSG_NOSIGNAL;
1776
bedbd2a5
PR
1777 if (sock == tconn->data.socket) {
1778 tconn->ko_count = tconn->net_conf->ko_count;
1779 drbd_update_congested(tconn);
b411b363
PR
1780 }
1781 do {
1782 /* STRANGE
1783 * tcp_sendmsg does _not_ use its size parameter at all ?
1784 *
1785 * -EAGAIN on timeout, -EINTR on signal.
1786 */
1787/* THINK
1788 * do we need to block DRBD_SIG if sock == &meta.socket ??
1789 * otherwise wake_asender() might interrupt some send_*Ack !
1790 */
1791 rv = kernel_sendmsg(sock, &msg, &iov, 1, size);
1792 if (rv == -EAGAIN) {
bedbd2a5 1793 if (we_should_drop_the_connection(tconn, sock))
b411b363
PR
1794 break;
1795 else
1796 continue;
1797 }
b411b363
PR
1798 if (rv == -EINTR) {
1799 flush_signals(current);
1800 rv = 0;
1801 }
1802 if (rv < 0)
1803 break;
1804 sent += rv;
1805 iov.iov_base += rv;
1806 iov.iov_len -= rv;
1807 } while (sent < size);
1808
bedbd2a5
PR
1809 if (sock == tconn->data.socket)
1810 clear_bit(NET_CONGESTED, &tconn->flags);
b411b363
PR
1811
1812 if (rv <= 0) {
1813 if (rv != -EAGAIN) {
bedbd2a5
PR
1814 conn_err(tconn, "%s_sendmsg returned %d\n",
1815 sock == tconn->meta.socket ? "msock" : "sock",
1816 rv);
bbeb641c 1817 conn_request_state(tconn, NS(conn, C_BROKEN_PIPE), CS_HARD);
b411b363 1818 } else
bbeb641c 1819 conn_request_state(tconn, NS(conn, C_TIMEOUT), CS_HARD);
b411b363
PR
1820 }
1821
1822 return sent;
1823}
1824
fb708e40
AG
1825/**
1826 * drbd_send_all - Send an entire buffer
1827 *
1828 * Returns 0 upon success and a negative error value otherwise.
1829 */
1830int drbd_send_all(struct drbd_tconn *tconn, struct socket *sock, void *buffer,
1831 size_t size, unsigned msg_flags)
1832{
1833 int err;
1834
1835 err = drbd_send(tconn, sock, buffer, size, msg_flags);
1836 if (err < 0)
1837 return err;
1838 if (err != size)
1839 return -EIO;
1840 return 0;
1841}
1842
b411b363
PR
1843static int drbd_open(struct block_device *bdev, fmode_t mode)
1844{
1845 struct drbd_conf *mdev = bdev->bd_disk->private_data;
1846 unsigned long flags;
1847 int rv = 0;
1848
2a48fc0a 1849 mutex_lock(&drbd_main_mutex);
87eeee41 1850 spin_lock_irqsave(&mdev->tconn->req_lock, flags);
b411b363
PR
1851 /* to have a stable mdev->state.role
1852 * and no race with updating open_cnt */
1853
1854 if (mdev->state.role != R_PRIMARY) {
1855 if (mode & FMODE_WRITE)
1856 rv = -EROFS;
1857 else if (!allow_oos)
1858 rv = -EMEDIUMTYPE;
1859 }
1860
1861 if (!rv)
1862 mdev->open_cnt++;
87eeee41 1863 spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
2a48fc0a 1864 mutex_unlock(&drbd_main_mutex);
b411b363
PR
1865
1866 return rv;
1867}
1868
1869static int drbd_release(struct gendisk *gd, fmode_t mode)
1870{
1871 struct drbd_conf *mdev = gd->private_data;
2a48fc0a 1872 mutex_lock(&drbd_main_mutex);
b411b363 1873 mdev->open_cnt--;
2a48fc0a 1874 mutex_unlock(&drbd_main_mutex);
b411b363
PR
1875 return 0;
1876}
1877
b411b363
PR
1878static void drbd_set_defaults(struct drbd_conf *mdev)
1879{
f399002e
LE
1880 /* Beware! The actual layout differs
1881 * between big endian and little endian */
da9fbc27 1882 mdev->state = (union drbd_dev_state) {
b411b363
PR
1883 { .role = R_SECONDARY,
1884 .peer = R_UNKNOWN,
1885 .conn = C_STANDALONE,
1886 .disk = D_DISKLESS,
1887 .pdsk = D_UNKNOWN,
b411b363
PR
1888 } };
1889}
1890
1891void drbd_init_set_defaults(struct drbd_conf *mdev)
1892{
1893 /* the memset(,0,) did most of this.
1894 * note: only assignments, no allocation in here */
1895
1896 drbd_set_defaults(mdev);
1897
b411b363
PR
1898 atomic_set(&mdev->ap_bio_cnt, 0);
1899 atomic_set(&mdev->ap_pending_cnt, 0);
1900 atomic_set(&mdev->rs_pending_cnt, 0);
1901 atomic_set(&mdev->unacked_cnt, 0);
1902 atomic_set(&mdev->local_cnt, 0);
435f0740 1903 atomic_set(&mdev->pp_in_use_by_net, 0);
778f271d 1904 atomic_set(&mdev->rs_sect_in, 0);
0f0601f4 1905 atomic_set(&mdev->rs_sect_ev, 0);
759fbdfb 1906 atomic_set(&mdev->ap_in_flight, 0);
b411b363
PR
1907
1908 mutex_init(&mdev->md_io_mutex);
8410da8f
PR
1909 mutex_init(&mdev->own_state_mutex);
1910 mdev->state_mutex = &mdev->own_state_mutex;
b411b363 1911
b411b363 1912 spin_lock_init(&mdev->al_lock);
b411b363
PR
1913 spin_lock_init(&mdev->peer_seq_lock);
1914 spin_lock_init(&mdev->epoch_lock);
1915
1916 INIT_LIST_HEAD(&mdev->active_ee);
1917 INIT_LIST_HEAD(&mdev->sync_ee);
1918 INIT_LIST_HEAD(&mdev->done_ee);
1919 INIT_LIST_HEAD(&mdev->read_ee);
1920 INIT_LIST_HEAD(&mdev->net_ee);
1921 INIT_LIST_HEAD(&mdev->resync_reads);
b411b363
PR
1922 INIT_LIST_HEAD(&mdev->resync_work.list);
1923 INIT_LIST_HEAD(&mdev->unplug_work.list);
e9e6f3ec 1924 INIT_LIST_HEAD(&mdev->go_diskless.list);
b411b363 1925 INIT_LIST_HEAD(&mdev->md_sync_work.list);
c4752ef1 1926 INIT_LIST_HEAD(&mdev->start_resync_work.list);
b411b363 1927 INIT_LIST_HEAD(&mdev->bm_io_work.w.list);
0ced55a3 1928
794abb75 1929 mdev->resync_work.cb = w_resync_timer;
b411b363 1930 mdev->unplug_work.cb = w_send_write_hint;
e9e6f3ec 1931 mdev->go_diskless.cb = w_go_diskless;
b411b363
PR
1932 mdev->md_sync_work.cb = w_md_sync;
1933 mdev->bm_io_work.w.cb = w_bitmap_io;
370a43e7 1934 mdev->start_resync_work.cb = w_start_resync;
a21e9298
PR
1935
1936 mdev->resync_work.mdev = mdev;
1937 mdev->unplug_work.mdev = mdev;
1938 mdev->go_diskless.mdev = mdev;
1939 mdev->md_sync_work.mdev = mdev;
1940 mdev->bm_io_work.w.mdev = mdev;
1941 mdev->start_resync_work.mdev = mdev;
1942
b411b363
PR
1943 init_timer(&mdev->resync_timer);
1944 init_timer(&mdev->md_sync_timer);
370a43e7 1945 init_timer(&mdev->start_resync_timer);
7fde2be9 1946 init_timer(&mdev->request_timer);
b411b363
PR
1947 mdev->resync_timer.function = resync_timer_fn;
1948 mdev->resync_timer.data = (unsigned long) mdev;
1949 mdev->md_sync_timer.function = md_sync_timer_fn;
1950 mdev->md_sync_timer.data = (unsigned long) mdev;
370a43e7
PR
1951 mdev->start_resync_timer.function = start_resync_timer_fn;
1952 mdev->start_resync_timer.data = (unsigned long) mdev;
7fde2be9
PR
1953 mdev->request_timer.function = request_timer_fn;
1954 mdev->request_timer.data = (unsigned long) mdev;
b411b363
PR
1955
1956 init_waitqueue_head(&mdev->misc_wait);
1957 init_waitqueue_head(&mdev->state_wait);
1958 init_waitqueue_head(&mdev->ee_wait);
1959 init_waitqueue_head(&mdev->al_wait);
1960 init_waitqueue_head(&mdev->seq_wait);
1961
fd340c12 1962 /* mdev->tconn->agreed_pro_version gets initialized in drbd_connect() */
2451fc3b 1963 mdev->write_ordering = WO_bdev_flush;
b411b363 1964 mdev->resync_wenr = LC_FREE;
99432fcc
PR
1965 mdev->peer_max_bio_size = DRBD_MAX_BIO_SIZE_SAFE;
1966 mdev->local_max_bio_size = DRBD_MAX_BIO_SIZE_SAFE;
b411b363
PR
1967}
1968
1969void drbd_mdev_cleanup(struct drbd_conf *mdev)
1970{
1d7734a0 1971 int i;
e6b3ea83 1972 if (mdev->tconn->receiver.t_state != NONE)
b411b363 1973 dev_err(DEV, "ASSERT FAILED: receiver t_state == %d expected 0.\n",
e6b3ea83 1974 mdev->tconn->receiver.t_state);
b411b363
PR
1975
1976 /* no need to lock it, I'm the only thread alive */
1977 if (atomic_read(&mdev->current_epoch->epoch_size) != 0)
1978 dev_err(DEV, "epoch_size:%d\n", atomic_read(&mdev->current_epoch->epoch_size));
1979 mdev->al_writ_cnt =
1980 mdev->bm_writ_cnt =
1981 mdev->read_cnt =
1982 mdev->recv_cnt =
1983 mdev->send_cnt =
1984 mdev->writ_cnt =
1985 mdev->p_size =
1986 mdev->rs_start =
1987 mdev->rs_total =
1d7734a0
LE
1988 mdev->rs_failed = 0;
1989 mdev->rs_last_events = 0;
0f0601f4 1990 mdev->rs_last_sect_ev = 0;
1d7734a0
LE
1991 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
1992 mdev->rs_mark_left[i] = 0;
1993 mdev->rs_mark_time[i] = 0;
1994 }
89e58e75 1995 D_ASSERT(mdev->tconn->net_conf == NULL);
b411b363
PR
1996
1997 drbd_set_my_capacity(mdev, 0);
1998 if (mdev->bitmap) {
1999 /* maybe never allocated. */
02d9a94b 2000 drbd_bm_resize(mdev, 0, 1);
b411b363
PR
2001 drbd_bm_cleanup(mdev);
2002 }
2003
2004 drbd_free_resources(mdev);
0778286a 2005 clear_bit(AL_SUSPENDED, &mdev->flags);
b411b363
PR
2006
2007 /*
2008 * currently we drbd_init_ee only on module load, so
2009 * we may do drbd_release_ee only on module unload!
2010 */
2011 D_ASSERT(list_empty(&mdev->active_ee));
2012 D_ASSERT(list_empty(&mdev->sync_ee));
2013 D_ASSERT(list_empty(&mdev->done_ee));
2014 D_ASSERT(list_empty(&mdev->read_ee));
2015 D_ASSERT(list_empty(&mdev->net_ee));
2016 D_ASSERT(list_empty(&mdev->resync_reads));
e42325a5
PR
2017 D_ASSERT(list_empty(&mdev->tconn->data.work.q));
2018 D_ASSERT(list_empty(&mdev->tconn->meta.work.q));
b411b363
PR
2019 D_ASSERT(list_empty(&mdev->resync_work.list));
2020 D_ASSERT(list_empty(&mdev->unplug_work.list));
e9e6f3ec 2021 D_ASSERT(list_empty(&mdev->go_diskless.list));
2265b473
LE
2022
2023 drbd_set_defaults(mdev);
b411b363
PR
2024}
2025
2026
2027static void drbd_destroy_mempools(void)
2028{
2029 struct page *page;
2030
2031 while (drbd_pp_pool) {
2032 page = drbd_pp_pool;
2033 drbd_pp_pool = (struct page *)page_private(page);
2034 __free_page(page);
2035 drbd_pp_vacant--;
2036 }
2037
2038 /* D_ASSERT(atomic_read(&drbd_pp_vacant)==0); */
2039
da4a75d2
LE
2040 if (drbd_md_io_bio_set)
2041 bioset_free(drbd_md_io_bio_set);
35abf594
LE
2042 if (drbd_md_io_page_pool)
2043 mempool_destroy(drbd_md_io_page_pool);
b411b363
PR
2044 if (drbd_ee_mempool)
2045 mempool_destroy(drbd_ee_mempool);
2046 if (drbd_request_mempool)
2047 mempool_destroy(drbd_request_mempool);
2048 if (drbd_ee_cache)
2049 kmem_cache_destroy(drbd_ee_cache);
2050 if (drbd_request_cache)
2051 kmem_cache_destroy(drbd_request_cache);
2052 if (drbd_bm_ext_cache)
2053 kmem_cache_destroy(drbd_bm_ext_cache);
2054 if (drbd_al_ext_cache)
2055 kmem_cache_destroy(drbd_al_ext_cache);
2056
da4a75d2 2057 drbd_md_io_bio_set = NULL;
35abf594 2058 drbd_md_io_page_pool = NULL;
b411b363
PR
2059 drbd_ee_mempool = NULL;
2060 drbd_request_mempool = NULL;
2061 drbd_ee_cache = NULL;
2062 drbd_request_cache = NULL;
2063 drbd_bm_ext_cache = NULL;
2064 drbd_al_ext_cache = NULL;
2065
2066 return;
2067}
2068
2069static int drbd_create_mempools(void)
2070{
2071 struct page *page;
1816a2b4 2072 const int number = (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * minor_count;
b411b363
PR
2073 int i;
2074
2075 /* prepare our caches and mempools */
2076 drbd_request_mempool = NULL;
2077 drbd_ee_cache = NULL;
2078 drbd_request_cache = NULL;
2079 drbd_bm_ext_cache = NULL;
2080 drbd_al_ext_cache = NULL;
2081 drbd_pp_pool = NULL;
35abf594 2082 drbd_md_io_page_pool = NULL;
da4a75d2 2083 drbd_md_io_bio_set = NULL;
b411b363
PR
2084
2085 /* caches */
2086 drbd_request_cache = kmem_cache_create(
2087 "drbd_req", sizeof(struct drbd_request), 0, 0, NULL);
2088 if (drbd_request_cache == NULL)
2089 goto Enomem;
2090
2091 drbd_ee_cache = kmem_cache_create(
f6ffca9f 2092 "drbd_ee", sizeof(struct drbd_peer_request), 0, 0, NULL);
b411b363
PR
2093 if (drbd_ee_cache == NULL)
2094 goto Enomem;
2095
2096 drbd_bm_ext_cache = kmem_cache_create(
2097 "drbd_bm", sizeof(struct bm_extent), 0, 0, NULL);
2098 if (drbd_bm_ext_cache == NULL)
2099 goto Enomem;
2100
2101 drbd_al_ext_cache = kmem_cache_create(
2102 "drbd_al", sizeof(struct lc_element), 0, 0, NULL);
2103 if (drbd_al_ext_cache == NULL)
2104 goto Enomem;
2105
2106 /* mempools */
da4a75d2
LE
2107 drbd_md_io_bio_set = bioset_create(DRBD_MIN_POOL_PAGES, 0);
2108 if (drbd_md_io_bio_set == NULL)
2109 goto Enomem;
2110
35abf594
LE
2111 drbd_md_io_page_pool = mempool_create_page_pool(DRBD_MIN_POOL_PAGES, 0);
2112 if (drbd_md_io_page_pool == NULL)
2113 goto Enomem;
2114
b411b363
PR
2115 drbd_request_mempool = mempool_create(number,
2116 mempool_alloc_slab, mempool_free_slab, drbd_request_cache);
2117 if (drbd_request_mempool == NULL)
2118 goto Enomem;
2119
2120 drbd_ee_mempool = mempool_create(number,
2121 mempool_alloc_slab, mempool_free_slab, drbd_ee_cache);
2027ae1f 2122 if (drbd_ee_mempool == NULL)
b411b363
PR
2123 goto Enomem;
2124
2125 /* drbd's page pool */
2126 spin_lock_init(&drbd_pp_lock);
2127
2128 for (i = 0; i < number; i++) {
2129 page = alloc_page(GFP_HIGHUSER);
2130 if (!page)
2131 goto Enomem;
2132 set_page_private(page, (unsigned long)drbd_pp_pool);
2133 drbd_pp_pool = page;
2134 }
2135 drbd_pp_vacant = number;
2136
2137 return 0;
2138
2139Enomem:
2140 drbd_destroy_mempools(); /* in case we allocated some */
2141 return -ENOMEM;
2142}
2143
2144static int drbd_notify_sys(struct notifier_block *this, unsigned long code,
2145 void *unused)
2146{
2147 /* just so we have it. you never know what interesting things we
2148 * might want to do here some day...
2149 */
2150
2151 return NOTIFY_DONE;
2152}
2153
2154static struct notifier_block drbd_notifier = {
2155 .notifier_call = drbd_notify_sys,
2156};
2157
2158static void drbd_release_ee_lists(struct drbd_conf *mdev)
2159{
2160 int rr;
2161
2162 rr = drbd_release_ee(mdev, &mdev->active_ee);
2163 if (rr)
2164 dev_err(DEV, "%d EEs in active list found!\n", rr);
2165
2166 rr = drbd_release_ee(mdev, &mdev->sync_ee);
2167 if (rr)
2168 dev_err(DEV, "%d EEs in sync list found!\n", rr);
2169
2170 rr = drbd_release_ee(mdev, &mdev->read_ee);
2171 if (rr)
2172 dev_err(DEV, "%d EEs in read list found!\n", rr);
2173
2174 rr = drbd_release_ee(mdev, &mdev->done_ee);
2175 if (rr)
2176 dev_err(DEV, "%d EEs in done list found!\n", rr);
2177
2178 rr = drbd_release_ee(mdev, &mdev->net_ee);
2179 if (rr)
2180 dev_err(DEV, "%d EEs in net list found!\n", rr);
2181}
2182
774b3055
PR
2183/* caution. no locking. */
2184void drbd_delete_device(unsigned int minor)
b411b363
PR
2185{
2186 struct drbd_conf *mdev = minor_to_mdev(minor);
2187
2188 if (!mdev)
2189 return;
2190
569083c0
LE
2191 idr_remove(&mdev->tconn->volumes, mdev->vnr);
2192 idr_remove(&minors, minor);
2193 synchronize_rcu();
774b3055 2194
b411b363 2195 /* paranoia asserts */
70dc65e1 2196 D_ASSERT(mdev->open_cnt == 0);
e42325a5 2197 D_ASSERT(list_empty(&mdev->tconn->data.work.q));
b411b363
PR
2198 /* end paranoia asserts */
2199
2200 del_gendisk(mdev->vdisk);
2201
2202 /* cleanup stuff that may have been allocated during
2203 * device (re-)configuration or state changes */
2204
2205 if (mdev->this_bdev)
2206 bdput(mdev->this_bdev);
2207
2208 drbd_free_resources(mdev);
2209
2210 drbd_release_ee_lists(mdev);
2211
b411b363
PR
2212 lc_destroy(mdev->act_log);
2213 lc_destroy(mdev->resync);
2214
2215 kfree(mdev->p_uuid);
2216 /* mdev->p_uuid = NULL; */
2217
b411b363
PR
2218 /* cleanup the rest that has been
2219 * allocated from drbd_new_device
2220 * and actually free the mdev itself */
2221 drbd_free_mdev(mdev);
2222}
2223
2224static void drbd_cleanup(void)
2225{
2226 unsigned int i;
81a5d60e 2227 struct drbd_conf *mdev;
b411b363
PR
2228
2229 unregister_reboot_notifier(&drbd_notifier);
2230
17a93f30
LE
2231 /* first remove proc,
2232 * drbdsetup uses it's presence to detect
2233 * whether DRBD is loaded.
2234 * If we would get stuck in proc removal,
2235 * but have netlink already deregistered,
2236 * some drbdsetup commands may wait forever
2237 * for an answer.
2238 */
2239 if (drbd_proc)
2240 remove_proc_entry("drbd", NULL);
2241
3b98c0c2 2242 drbd_genl_unregister();
b411b363 2243
81a5d60e
PR
2244 idr_for_each_entry(&minors, mdev, i)
2245 drbd_delete_device(i);
2246 drbd_destroy_mempools();
b411b363
PR
2247 unregister_blkdev(DRBD_MAJOR, "drbd");
2248
81a5d60e
PR
2249 idr_destroy(&minors);
2250
b411b363
PR
2251 printk(KERN_INFO "drbd: module cleanup done.\n");
2252}
2253
2254/**
2255 * drbd_congested() - Callback for pdflush
2256 * @congested_data: User data
2257 * @bdi_bits: Bits pdflush is currently interested in
2258 *
2259 * Returns 1<<BDI_async_congested and/or 1<<BDI_sync_congested if we are congested.
2260 */
2261static int drbd_congested(void *congested_data, int bdi_bits)
2262{
2263 struct drbd_conf *mdev = congested_data;
2264 struct request_queue *q;
2265 char reason = '-';
2266 int r = 0;
2267
1b881ef7 2268 if (!may_inc_ap_bio(mdev)) {
b411b363
PR
2269 /* DRBD has frozen IO */
2270 r = bdi_bits;
2271 reason = 'd';
2272 goto out;
2273 }
2274
2275 if (get_ldev(mdev)) {
2276 q = bdev_get_queue(mdev->ldev->backing_bdev);
2277 r = bdi_congested(&q->backing_dev_info, bdi_bits);
2278 put_ldev(mdev);
2279 if (r)
2280 reason = 'b';
2281 }
2282
01a311a5 2283 if (bdi_bits & (1 << BDI_async_congested) && test_bit(NET_CONGESTED, &mdev->tconn->flags)) {
b411b363
PR
2284 r |= (1 << BDI_async_congested);
2285 reason = reason == 'b' ? 'a' : 'n';
2286 }
2287
2288out:
2289 mdev->congestion_reason = reason;
2290 return r;
2291}
2292
6699b655
PR
2293static void drbd_init_workqueue(struct drbd_work_queue* wq)
2294{
2295 sema_init(&wq->s, 0);
2296 spin_lock_init(&wq->q_lock);
2297 INIT_LIST_HEAD(&wq->q);
2298}
2299
1aba4d7f
PR
2300struct drbd_tconn *conn_by_name(const char *name)
2301{
2302 struct drbd_tconn *tconn;
2303
3b98c0c2
LE
2304 if (!name || !name[0])
2305 return NULL;
2306
543cc10b 2307 mutex_lock(&drbd_cfg_mutex);
1aba4d7f
PR
2308 list_for_each_entry(tconn, &drbd_tconns, all_tconn) {
2309 if (!strcmp(tconn->name, name))
2310 goto found;
2311 }
2312 tconn = NULL;
2313found:
543cc10b 2314 mutex_unlock(&drbd_cfg_mutex);
1aba4d7f
PR
2315 return tconn;
2316}
2317
e6ef8a5c
AG
2318static int drbd_alloc_socket(struct drbd_socket *socket)
2319{
2320 socket->rbuf = (void *) __get_free_page(GFP_KERNEL);
2321 if (!socket->rbuf)
2322 return -ENOMEM;
5a87d920
AG
2323 socket->sbuf = (void *) __get_free_page(GFP_KERNEL);
2324 if (!socket->sbuf)
2325 return -ENOMEM;
e6ef8a5c
AG
2326 return 0;
2327}
2328
2329static void drbd_free_socket(struct drbd_socket *socket)
2330{
5a87d920 2331 free_page((unsigned long) socket->sbuf);
e6ef8a5c
AG
2332 free_page((unsigned long) socket->rbuf);
2333}
2334
3b98c0c2 2335struct drbd_tconn *drbd_new_tconn(const char *name)
2111438b
PR
2336{
2337 struct drbd_tconn *tconn;
2338
2339 tconn = kzalloc(sizeof(struct drbd_tconn), GFP_KERNEL);
2340 if (!tconn)
2341 return NULL;
2342
2343 tconn->name = kstrdup(name, GFP_KERNEL);
2344 if (!tconn->name)
2345 goto fail;
2346
e6ef8a5c
AG
2347 if (drbd_alloc_socket(&tconn->data))
2348 goto fail;
2349 if (drbd_alloc_socket(&tconn->meta))
2350 goto fail;
2351
774b3055
PR
2352 if (!zalloc_cpumask_var(&tconn->cpu_mask, GFP_KERNEL))
2353 goto fail;
2354
2f5cdd0b
PR
2355 if (!tl_init(tconn))
2356 goto fail;
2357
bbeb641c 2358 tconn->cstate = C_STANDALONE;
8410da8f 2359 mutex_init(&tconn->cstate_mutex);
6699b655 2360 spin_lock_init(&tconn->req_lock);
b2fb6dbe
PR
2361 atomic_set(&tconn->net_cnt, 0);
2362 init_waitqueue_head(&tconn->net_cnt_wait);
2a67d8b9 2363 init_waitqueue_head(&tconn->ping_wait);
062e879c 2364 idr_init(&tconn->volumes);
b2fb6dbe 2365
6699b655
PR
2366 drbd_init_workqueue(&tconn->data.work);
2367 mutex_init(&tconn->data.mutex);
2368
2369 drbd_init_workqueue(&tconn->meta.work);
2370 mutex_init(&tconn->meta.mutex);
2371
392c8801
PR
2372 drbd_thread_init(tconn, &tconn->receiver, drbdd_init, "receiver");
2373 drbd_thread_init(tconn, &tconn->worker, drbd_worker, "worker");
2374 drbd_thread_init(tconn, &tconn->asender, drbd_asender, "asender");
2375
f399002e
LE
2376 tconn->res_opts = (struct res_opts) {
2377 {}, 0, /* cpu_mask */
2378 DRBD_ON_NO_DATA_DEF, /* on_no_data */
2379 };
2380
543cc10b
LE
2381 mutex_lock(&drbd_cfg_mutex);
2382 list_add_tail(&tconn->all_tconn, &drbd_tconns);
2383 mutex_unlock(&drbd_cfg_mutex);
2111438b
PR
2384
2385 return tconn;
2386
2387fail:
2f5cdd0b 2388 tl_cleanup(tconn);
774b3055 2389 free_cpumask_var(tconn->cpu_mask);
e6ef8a5c
AG
2390 drbd_free_socket(&tconn->meta);
2391 drbd_free_socket(&tconn->data);
2111438b
PR
2392 kfree(tconn->name);
2393 kfree(tconn);
2394
2395 return NULL;
2396}
2397
2398void drbd_free_tconn(struct drbd_tconn *tconn)
2399{
2111438b 2400 list_del(&tconn->all_tconn);
062e879c 2401 idr_destroy(&tconn->volumes);
2111438b 2402
774b3055 2403 free_cpumask_var(tconn->cpu_mask);
e6ef8a5c
AG
2404 drbd_free_socket(&tconn->meta);
2405 drbd_free_socket(&tconn->data);
2111438b 2406 kfree(tconn->name);
b42a70ad
PR
2407 kfree(tconn->int_dig_in);
2408 kfree(tconn->int_dig_vv);
2111438b
PR
2409 kfree(tconn);
2410}
2411
774b3055 2412enum drbd_ret_code conn_new_minor(struct drbd_tconn *tconn, unsigned int minor, int vnr)
b411b363
PR
2413{
2414 struct drbd_conf *mdev;
2415 struct gendisk *disk;
2416 struct request_queue *q;
774b3055 2417 int vnr_got = vnr;
81a5d60e 2418 int minor_got = minor;
8432b314 2419 enum drbd_ret_code err = ERR_NOMEM;
774b3055
PR
2420
2421 mdev = minor_to_mdev(minor);
2422 if (mdev)
2423 return ERR_MINOR_EXISTS;
b411b363
PR
2424
2425 /* GFP_KERNEL, we are outside of all write-out paths */
2426 mdev = kzalloc(sizeof(struct drbd_conf), GFP_KERNEL);
2427 if (!mdev)
774b3055
PR
2428 return ERR_NOMEM;
2429
2430 mdev->tconn = tconn;
b411b363 2431 mdev->minor = minor;
3b98c0c2 2432 mdev->vnr = vnr;
b411b363
PR
2433
2434 drbd_init_set_defaults(mdev);
2435
2436 q = blk_alloc_queue(GFP_KERNEL);
2437 if (!q)
2438 goto out_no_q;
2439 mdev->rq_queue = q;
2440 q->queuedata = mdev;
b411b363
PR
2441
2442 disk = alloc_disk(1);
2443 if (!disk)
2444 goto out_no_disk;
2445 mdev->vdisk = disk;
2446
81e84650 2447 set_disk_ro(disk, true);
b411b363
PR
2448
2449 disk->queue = q;
2450 disk->major = DRBD_MAJOR;
2451 disk->first_minor = minor;
2452 disk->fops = &drbd_ops;
2453 sprintf(disk->disk_name, "drbd%d", minor);
2454 disk->private_data = mdev;
2455
2456 mdev->this_bdev = bdget(MKDEV(DRBD_MAJOR, minor));
2457 /* we have no partitions. we contain only ourselves. */
2458 mdev->this_bdev->bd_contains = mdev->this_bdev;
2459
2460 q->backing_dev_info.congested_fn = drbd_congested;
2461 q->backing_dev_info.congested_data = mdev;
2462
2f58dcfc 2463 blk_queue_make_request(q, drbd_make_request);
99432fcc
PR
2464 /* Setting the max_hw_sectors to an odd value of 8kibyte here
2465 This triggers a max_bio_size message upon first attach or connect */
2466 blk_queue_max_hw_sectors(q, DRBD_MAX_BIO_SIZE_SAFE >> 8);
b411b363
PR
2467 blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
2468 blk_queue_merge_bvec(q, drbd_merge_bvec);
87eeee41 2469 q->queue_lock = &mdev->tconn->req_lock; /* needed since we use */
b411b363
PR
2470
2471 mdev->md_io_page = alloc_page(GFP_KERNEL);
2472 if (!mdev->md_io_page)
2473 goto out_no_io_page;
2474
2475 if (drbd_bm_init(mdev))
2476 goto out_no_bitmap;
dac1389c 2477 mdev->read_requests = RB_ROOT;
de696716 2478 mdev->write_requests = RB_ROOT;
b411b363 2479
b411b363
PR
2480 mdev->current_epoch = kzalloc(sizeof(struct drbd_epoch), GFP_KERNEL);
2481 if (!mdev->current_epoch)
2482 goto out_no_epoch;
2483
2484 INIT_LIST_HEAD(&mdev->current_epoch->list);
2485 mdev->epochs = 1;
2486
81a5d60e 2487 if (!idr_pre_get(&minors, GFP_KERNEL))
8432b314
LE
2488 goto out_no_minor_idr;
2489 if (idr_get_new_above(&minors, mdev, minor, &minor_got))
2490 goto out_no_minor_idr;
81a5d60e 2491 if (minor_got != minor) {
8432b314
LE
2492 err = ERR_MINOR_EXISTS;
2493 drbd_msg_put_info("requested minor exists already");
569083c0 2494 goto out_idr_remove_minor;
81a5d60e 2495 }
8432b314
LE
2496
2497 if (!idr_pre_get(&tconn->volumes, GFP_KERNEL))
2498 goto out_idr_remove_minor;
2499 if (idr_get_new_above(&tconn->volumes, mdev, vnr, &vnr_got))
2500 goto out_idr_remove_minor;
2501 if (vnr_got != vnr) {
2502 err = ERR_INVALID_REQUEST;
2503 drbd_msg_put_info("requested volume exists already");
2504 goto out_idr_remove_vol;
2505 }
774b3055
PR
2506 add_disk(disk);
2507
2325eb66
PR
2508 /* inherit the connection state */
2509 mdev->state.conn = tconn->cstate;
2510 if (mdev->state.conn == C_WF_REPORT_PARAMS)
2511 drbd_connected(vnr, mdev, tconn);
2512
774b3055 2513 return NO_ERROR;
b411b363 2514
569083c0
LE
2515out_idr_remove_vol:
2516 idr_remove(&tconn->volumes, vnr_got);
8432b314
LE
2517out_idr_remove_minor:
2518 idr_remove(&minors, minor_got);
569083c0 2519 synchronize_rcu();
8432b314 2520out_no_minor_idr:
81a5d60e 2521 kfree(mdev->current_epoch);
b411b363 2522out_no_epoch:
b411b363
PR
2523 drbd_bm_cleanup(mdev);
2524out_no_bitmap:
2525 __free_page(mdev->md_io_page);
2526out_no_io_page:
2527 put_disk(disk);
2528out_no_disk:
2529 blk_cleanup_queue(q);
2530out_no_q:
b411b363 2531 kfree(mdev);
8432b314 2532 return err;
b411b363
PR
2533}
2534
2535/* counterpart of drbd_new_device.
2536 * last part of drbd_delete_device. */
2537void drbd_free_mdev(struct drbd_conf *mdev)
2538{
2539 kfree(mdev->current_epoch);
b411b363
PR
2540 if (mdev->bitmap) /* should no longer be there. */
2541 drbd_bm_cleanup(mdev);
2542 __free_page(mdev->md_io_page);
2543 put_disk(mdev->vdisk);
2544 blk_cleanup_queue(mdev->rq_queue);
b411b363
PR
2545 kfree(mdev);
2546}
2547
2548
2549int __init drbd_init(void)
2550{
2551 int err;
2552
fd340c12 2553 BUILD_BUG_ON(sizeof(struct p_header80) != sizeof(struct p_header95));
6038178e 2554 BUILD_BUG_ON(sizeof(struct p_connection_features) != 80);
b411b363 2555
2b8a90b5 2556 if (minor_count < DRBD_MINOR_COUNT_MIN || minor_count > DRBD_MINOR_COUNT_MAX) {
b411b363 2557 printk(KERN_ERR
81a5d60e 2558 "drbd: invalid minor_count (%d)\n", minor_count);
b411b363
PR
2559#ifdef MODULE
2560 return -EINVAL;
2561#else
2562 minor_count = 8;
2563#endif
2564 }
2565
b411b363
PR
2566 err = register_blkdev(DRBD_MAJOR, "drbd");
2567 if (err) {
2568 printk(KERN_ERR
2569 "drbd: unable to register block device major %d\n",
2570 DRBD_MAJOR);
2571 return err;
2572 }
2573
3b98c0c2
LE
2574 err = drbd_genl_register();
2575 if (err) {
2576 printk(KERN_ERR "drbd: unable to register generic netlink family\n");
2577 goto fail;
2578 }
2579
2580
b411b363
PR
2581 register_reboot_notifier(&drbd_notifier);
2582
2583 /*
2584 * allocate all necessary structs
2585 */
2586 err = -ENOMEM;
2587
2588 init_waitqueue_head(&drbd_pp_wait);
2589
2590 drbd_proc = NULL; /* play safe for drbd_cleanup */
81a5d60e 2591 idr_init(&minors);
b411b363
PR
2592
2593 err = drbd_create_mempools();
2594 if (err)
3b98c0c2 2595 goto fail;
b411b363 2596
8c484ee4 2597 drbd_proc = proc_create_data("drbd", S_IFREG | S_IRUGO , NULL, &drbd_proc_fops, NULL);
b411b363
PR
2598 if (!drbd_proc) {
2599 printk(KERN_ERR "drbd: unable to register proc file\n");
3b98c0c2 2600 goto fail;
b411b363
PR
2601 }
2602
2603 rwlock_init(&global_state_lock);
2111438b 2604 INIT_LIST_HEAD(&drbd_tconns);
b411b363
PR
2605
2606 printk(KERN_INFO "drbd: initialized. "
2607 "Version: " REL_VERSION " (api:%d/proto:%d-%d)\n",
2608 API_VERSION, PRO_VERSION_MIN, PRO_VERSION_MAX);
2609 printk(KERN_INFO "drbd: %s\n", drbd_buildtag());
2610 printk(KERN_INFO "drbd: registered as block device major %d\n",
2611 DRBD_MAJOR);
b411b363
PR
2612
2613 return 0; /* Success! */
2614
3b98c0c2 2615fail:
b411b363
PR
2616 drbd_cleanup();
2617 if (err == -ENOMEM)
2618 /* currently always the case */
2619 printk(KERN_ERR "drbd: ran out of memory\n");
2620 else
2621 printk(KERN_ERR "drbd: initialization failure\n");
2622 return err;
2623}
2624
2625void drbd_free_bc(struct drbd_backing_dev *ldev)
2626{
2627 if (ldev == NULL)
2628 return;
2629
e525fd89
TH
2630 blkdev_put(ldev->backing_bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
2631 blkdev_put(ldev->md_bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
b411b363
PR
2632
2633 kfree(ldev);
2634}
2635
360cc740
PR
2636void drbd_free_sock(struct drbd_tconn *tconn)
2637{
2638 if (tconn->data.socket) {
2639 mutex_lock(&tconn->data.mutex);
2640 kernel_sock_shutdown(tconn->data.socket, SHUT_RDWR);
2641 sock_release(tconn->data.socket);
2642 tconn->data.socket = NULL;
2643 mutex_unlock(&tconn->data.mutex);
b411b363 2644 }
360cc740
PR
2645 if (tconn->meta.socket) {
2646 mutex_lock(&tconn->meta.mutex);
2647 kernel_sock_shutdown(tconn->meta.socket, SHUT_RDWR);
2648 sock_release(tconn->meta.socket);
2649 tconn->meta.socket = NULL;
2650 mutex_unlock(&tconn->meta.mutex);
b411b363
PR
2651 }
2652}
2653
2654
2655void drbd_free_resources(struct drbd_conf *mdev)
2656{
f399002e
LE
2657 crypto_free_hash(mdev->tconn->csums_tfm);
2658 mdev->tconn->csums_tfm = NULL;
2659 crypto_free_hash(mdev->tconn->verify_tfm);
2660 mdev->tconn->verify_tfm = NULL;
a0638456
PR
2661 crypto_free_hash(mdev->tconn->cram_hmac_tfm);
2662 mdev->tconn->cram_hmac_tfm = NULL;
2663 crypto_free_hash(mdev->tconn->integrity_w_tfm);
2664 mdev->tconn->integrity_w_tfm = NULL;
2665 crypto_free_hash(mdev->tconn->integrity_r_tfm);
2666 mdev->tconn->integrity_r_tfm = NULL;
b411b363 2667
360cc740 2668 drbd_free_sock(mdev->tconn);
b411b363
PR
2669
2670 __no_warn(local,
2671 drbd_free_bc(mdev->ldev);
2672 mdev->ldev = NULL;);
2673}
2674
2675/* meta data management */
2676
2677struct meta_data_on_disk {
2678 u64 la_size; /* last agreed size. */
2679 u64 uuid[UI_SIZE]; /* UUIDs. */
2680 u64 device_uuid;
2681 u64 reserved_u64_1;
2682 u32 flags; /* MDF */
2683 u32 magic;
2684 u32 md_size_sect;
2685 u32 al_offset; /* offset to this block */
2686 u32 al_nr_extents; /* important for restoring the AL */
f399002e 2687 /* `-- act_log->nr_elements <-- ldev->dc.al_extents */
b411b363
PR
2688 u32 bm_offset; /* offset to the bitmap, from here */
2689 u32 bm_bytes_per_bit; /* BM_BLOCK_SIZE */
99432fcc
PR
2690 u32 la_peer_max_bio_size; /* last peer max_bio_size */
2691 u32 reserved_u32[3];
b411b363
PR
2692
2693} __packed;
2694
2695/**
2696 * drbd_md_sync() - Writes the meta data super block if the MD_DIRTY flag bit is set
2697 * @mdev: DRBD device.
2698 */
2699void drbd_md_sync(struct drbd_conf *mdev)
2700{
2701 struct meta_data_on_disk *buffer;
2702 sector_t sector;
2703 int i;
2704
ee15b038
LE
2705 del_timer(&mdev->md_sync_timer);
2706 /* timer may be rearmed by drbd_md_mark_dirty() now. */
b411b363
PR
2707 if (!test_and_clear_bit(MD_DIRTY, &mdev->flags))
2708 return;
b411b363
PR
2709
2710 /* We use here D_FAILED and not D_ATTACHING because we try to write
2711 * metadata even if we detach due to a disk failure! */
2712 if (!get_ldev_if_state(mdev, D_FAILED))
2713 return;
2714
b411b363
PR
2715 mutex_lock(&mdev->md_io_mutex);
2716 buffer = (struct meta_data_on_disk *)page_address(mdev->md_io_page);
2717 memset(buffer, 0, 512);
2718
2719 buffer->la_size = cpu_to_be64(drbd_get_capacity(mdev->this_bdev));
2720 for (i = UI_CURRENT; i < UI_SIZE; i++)
2721 buffer->uuid[i] = cpu_to_be64(mdev->ldev->md.uuid[i]);
2722 buffer->flags = cpu_to_be32(mdev->ldev->md.flags);
2723 buffer->magic = cpu_to_be32(DRBD_MD_MAGIC);
2724
2725 buffer->md_size_sect = cpu_to_be32(mdev->ldev->md.md_size_sect);
2726 buffer->al_offset = cpu_to_be32(mdev->ldev->md.al_offset);
2727 buffer->al_nr_extents = cpu_to_be32(mdev->act_log->nr_elements);
2728 buffer->bm_bytes_per_bit = cpu_to_be32(BM_BLOCK_SIZE);
2729 buffer->device_uuid = cpu_to_be64(mdev->ldev->md.device_uuid);
2730
2731 buffer->bm_offset = cpu_to_be32(mdev->ldev->md.bm_offset);
99432fcc 2732 buffer->la_peer_max_bio_size = cpu_to_be32(mdev->peer_max_bio_size);
b411b363
PR
2733
2734 D_ASSERT(drbd_md_ss__(mdev, mdev->ldev) == mdev->ldev->md.md_offset);
2735 sector = mdev->ldev->md.md_offset;
2736
3fbf4d21 2737 if (drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) {
b411b363
PR
2738 /* this was a try anyways ... */
2739 dev_err(DEV, "meta data update failed!\n");
81e84650 2740 drbd_chk_io_error(mdev, 1, true);
b411b363
PR
2741 }
2742
2743 /* Update mdev->ldev->md.la_size_sect,
2744 * since we updated it on metadata. */
2745 mdev->ldev->md.la_size_sect = drbd_get_capacity(mdev->this_bdev);
2746
2747 mutex_unlock(&mdev->md_io_mutex);
2748 put_ldev(mdev);
2749}
2750
2751/**
2752 * drbd_md_read() - Reads in the meta data super block
2753 * @mdev: DRBD device.
2754 * @bdev: Device from which the meta data should be read in.
2755 *
116676ca 2756 * Return 0 (NO_ERROR) on success, and an enum drbd_ret_code in case
b411b363
PR
2757 * something goes wrong. Currently only: ERR_IO_MD_DISK, ERR_MD_INVALID.
2758 */
2759int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
2760{
2761 struct meta_data_on_disk *buffer;
2762 int i, rv = NO_ERROR;
2763
2764 if (!get_ldev_if_state(mdev, D_ATTACHING))
2765 return ERR_IO_MD_DISK;
2766
b411b363
PR
2767 mutex_lock(&mdev->md_io_mutex);
2768 buffer = (struct meta_data_on_disk *)page_address(mdev->md_io_page);
2769
3fbf4d21 2770 if (drbd_md_sync_page_io(mdev, bdev, bdev->md.md_offset, READ)) {
25985edc 2771 /* NOTE: can't do normal error processing here as this is
b411b363
PR
2772 called BEFORE disk is attached */
2773 dev_err(DEV, "Error while reading metadata.\n");
2774 rv = ERR_IO_MD_DISK;
2775 goto err;
2776 }
2777
e7fad8af 2778 if (buffer->magic != cpu_to_be32(DRBD_MD_MAGIC)) {
b411b363
PR
2779 dev_err(DEV, "Error while reading metadata, magic not found.\n");
2780 rv = ERR_MD_INVALID;
2781 goto err;
2782 }
2783 if (be32_to_cpu(buffer->al_offset) != bdev->md.al_offset) {
2784 dev_err(DEV, "unexpected al_offset: %d (expected %d)\n",
2785 be32_to_cpu(buffer->al_offset), bdev->md.al_offset);
2786 rv = ERR_MD_INVALID;
2787 goto err;
2788 }
2789 if (be32_to_cpu(buffer->bm_offset) != bdev->md.bm_offset) {
2790 dev_err(DEV, "unexpected bm_offset: %d (expected %d)\n",
2791 be32_to_cpu(buffer->bm_offset), bdev->md.bm_offset);
2792 rv = ERR_MD_INVALID;
2793 goto err;
2794 }
2795 if (be32_to_cpu(buffer->md_size_sect) != bdev->md.md_size_sect) {
2796 dev_err(DEV, "unexpected md_size: %u (expected %u)\n",
2797 be32_to_cpu(buffer->md_size_sect), bdev->md.md_size_sect);
2798 rv = ERR_MD_INVALID;
2799 goto err;
2800 }
2801
2802 if (be32_to_cpu(buffer->bm_bytes_per_bit) != BM_BLOCK_SIZE) {
2803 dev_err(DEV, "unexpected bm_bytes_per_bit: %u (expected %u)\n",
2804 be32_to_cpu(buffer->bm_bytes_per_bit), BM_BLOCK_SIZE);
2805 rv = ERR_MD_INVALID;
2806 goto err;
2807 }
2808
2809 bdev->md.la_size_sect = be64_to_cpu(buffer->la_size);
2810 for (i = UI_CURRENT; i < UI_SIZE; i++)
2811 bdev->md.uuid[i] = be64_to_cpu(buffer->uuid[i]);
2812 bdev->md.flags = be32_to_cpu(buffer->flags);
f399002e 2813 bdev->dc.al_extents = be32_to_cpu(buffer->al_nr_extents);
b411b363
PR
2814 bdev->md.device_uuid = be64_to_cpu(buffer->device_uuid);
2815
87eeee41 2816 spin_lock_irq(&mdev->tconn->req_lock);
99432fcc
PR
2817 if (mdev->state.conn < C_CONNECTED) {
2818 int peer;
2819 peer = be32_to_cpu(buffer->la_peer_max_bio_size);
2820 peer = max_t(int, peer, DRBD_MAX_BIO_SIZE_SAFE);
2821 mdev->peer_max_bio_size = peer;
2822 }
87eeee41 2823 spin_unlock_irq(&mdev->tconn->req_lock);
99432fcc 2824
f399002e
LE
2825 if (bdev->dc.al_extents < 7)
2826 bdev->dc.al_extents = 127;
b411b363
PR
2827
2828 err:
2829 mutex_unlock(&mdev->md_io_mutex);
2830 put_ldev(mdev);
2831
2832 return rv;
2833}
2834
2835/**
2836 * drbd_md_mark_dirty() - Mark meta data super block as dirty
2837 * @mdev: DRBD device.
2838 *
2839 * Call this function if you change anything that should be written to
2840 * the meta-data super block. This function sets MD_DIRTY, and starts a
2841 * timer that ensures that within five seconds you have to call drbd_md_sync().
2842 */
ca0e6098 2843#ifdef DEBUG
ee15b038
LE
2844void drbd_md_mark_dirty_(struct drbd_conf *mdev, unsigned int line, const char *func)
2845{
2846 if (!test_and_set_bit(MD_DIRTY, &mdev->flags)) {
2847 mod_timer(&mdev->md_sync_timer, jiffies + HZ);
2848 mdev->last_md_mark_dirty.line = line;
2849 mdev->last_md_mark_dirty.func = func;
2850 }
2851}
2852#else
b411b363
PR
2853void drbd_md_mark_dirty(struct drbd_conf *mdev)
2854{
ee15b038 2855 if (!test_and_set_bit(MD_DIRTY, &mdev->flags))
ca0e6098 2856 mod_timer(&mdev->md_sync_timer, jiffies + 5*HZ);
b411b363 2857}
ee15b038 2858#endif
b411b363
PR
2859
2860static void drbd_uuid_move_history(struct drbd_conf *mdev) __must_hold(local)
2861{
2862 int i;
2863
62b0da3a 2864 for (i = UI_HISTORY_START; i < UI_HISTORY_END; i++)
b411b363 2865 mdev->ldev->md.uuid[i+1] = mdev->ldev->md.uuid[i];
b411b363
PR
2866}
2867
2868void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local)
2869{
2870 if (idx == UI_CURRENT) {
2871 if (mdev->state.role == R_PRIMARY)
2872 val |= 1;
2873 else
2874 val &= ~((u64)1);
2875
2876 drbd_set_ed_uuid(mdev, val);
2877 }
2878
2879 mdev->ldev->md.uuid[idx] = val;
b411b363
PR
2880 drbd_md_mark_dirty(mdev);
2881}
2882
2883
2884void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local)
2885{
2886 if (mdev->ldev->md.uuid[idx]) {
2887 drbd_uuid_move_history(mdev);
2888 mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[idx];
b411b363
PR
2889 }
2890 _drbd_uuid_set(mdev, idx, val);
2891}
2892
2893/**
2894 * drbd_uuid_new_current() - Creates a new current UUID
2895 * @mdev: DRBD device.
2896 *
2897 * Creates a new current UUID, and rotates the old current UUID into
2898 * the bitmap slot. Causes an incremental resync upon next connect.
2899 */
2900void drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local)
2901{
2902 u64 val;
62b0da3a
LE
2903 unsigned long long bm_uuid = mdev->ldev->md.uuid[UI_BITMAP];
2904
2905 if (bm_uuid)
2906 dev_warn(DEV, "bm UUID was already set: %llX\n", bm_uuid);
b411b363 2907
b411b363 2908 mdev->ldev->md.uuid[UI_BITMAP] = mdev->ldev->md.uuid[UI_CURRENT];
b411b363
PR
2909
2910 get_random_bytes(&val, sizeof(u64));
2911 _drbd_uuid_set(mdev, UI_CURRENT, val);
62b0da3a 2912 drbd_print_uuids(mdev, "new current UUID");
aaa8e2b3
LE
2913 /* get it to stable storage _now_ */
2914 drbd_md_sync(mdev);
b411b363
PR
2915}
2916
2917void drbd_uuid_set_bm(struct drbd_conf *mdev, u64 val) __must_hold(local)
2918{
2919 if (mdev->ldev->md.uuid[UI_BITMAP] == 0 && val == 0)
2920 return;
2921
2922 if (val == 0) {
2923 drbd_uuid_move_history(mdev);
2924 mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[UI_BITMAP];
2925 mdev->ldev->md.uuid[UI_BITMAP] = 0;
b411b363 2926 } else {
62b0da3a
LE
2927 unsigned long long bm_uuid = mdev->ldev->md.uuid[UI_BITMAP];
2928 if (bm_uuid)
2929 dev_warn(DEV, "bm UUID was already set: %llX\n", bm_uuid);
b411b363 2930
62b0da3a 2931 mdev->ldev->md.uuid[UI_BITMAP] = val & ~((u64)1);
b411b363
PR
2932 }
2933 drbd_md_mark_dirty(mdev);
2934}
2935
2936/**
2937 * drbd_bmio_set_n_write() - io_fn for drbd_queue_bitmap_io() or drbd_bitmap_io()
2938 * @mdev: DRBD device.
2939 *
2940 * Sets all bits in the bitmap and writes the whole bitmap to stable storage.
2941 */
2942int drbd_bmio_set_n_write(struct drbd_conf *mdev)
2943{
2944 int rv = -EIO;
2945
2946 if (get_ldev_if_state(mdev, D_ATTACHING)) {
2947 drbd_md_set_flag(mdev, MDF_FULL_SYNC);
2948 drbd_md_sync(mdev);
2949 drbd_bm_set_all(mdev);
2950
2951 rv = drbd_bm_write(mdev);
2952
2953 if (!rv) {
2954 drbd_md_clear_flag(mdev, MDF_FULL_SYNC);
2955 drbd_md_sync(mdev);
2956 }
2957
2958 put_ldev(mdev);
2959 }
2960
2961 return rv;
2962}
2963
2964/**
2965 * drbd_bmio_clear_n_write() - io_fn for drbd_queue_bitmap_io() or drbd_bitmap_io()
2966 * @mdev: DRBD device.
2967 *
2968 * Clears all bits in the bitmap and writes the whole bitmap to stable storage.
2969 */
2970int drbd_bmio_clear_n_write(struct drbd_conf *mdev)
2971{
2972 int rv = -EIO;
2973
0778286a 2974 drbd_resume_al(mdev);
b411b363
PR
2975 if (get_ldev_if_state(mdev, D_ATTACHING)) {
2976 drbd_bm_clear_all(mdev);
2977 rv = drbd_bm_write(mdev);
2978 put_ldev(mdev);
2979 }
2980
2981 return rv;
2982}
2983
99920dc5 2984static int w_bitmap_io(struct drbd_work *w, int unused)
b411b363
PR
2985{
2986 struct bm_io_work *work = container_of(w, struct bm_io_work, w);
00d56944 2987 struct drbd_conf *mdev = w->mdev;
02851e9f 2988 int rv = -EIO;
b411b363
PR
2989
2990 D_ASSERT(atomic_read(&mdev->ap_bio_cnt) == 0);
2991
02851e9f 2992 if (get_ldev(mdev)) {
20ceb2b2 2993 drbd_bm_lock(mdev, work->why, work->flags);
02851e9f
LE
2994 rv = work->io_fn(mdev);
2995 drbd_bm_unlock(mdev);
2996 put_ldev(mdev);
2997 }
b411b363 2998
4738fa16 2999 clear_bit_unlock(BITMAP_IO, &mdev->flags);
b411b363
PR
3000 wake_up(&mdev->misc_wait);
3001
3002 if (work->done)
3003 work->done(mdev, rv);
3004
3005 clear_bit(BITMAP_IO_QUEUED, &mdev->flags);
3006 work->why = NULL;
20ceb2b2 3007 work->flags = 0;
b411b363 3008
99920dc5 3009 return 0;
b411b363
PR
3010}
3011
82f59cc6
LE
3012void drbd_ldev_destroy(struct drbd_conf *mdev)
3013{
3014 lc_destroy(mdev->resync);
3015 mdev->resync = NULL;
3016 lc_destroy(mdev->act_log);
3017 mdev->act_log = NULL;
3018 __no_warn(local,
3019 drbd_free_bc(mdev->ldev);
3020 mdev->ldev = NULL;);
3021
82f59cc6
LE
3022 clear_bit(GO_DISKLESS, &mdev->flags);
3023}
3024
99920dc5 3025static int w_go_diskless(struct drbd_work *w, int unused)
e9e6f3ec 3026{
00d56944
PR
3027 struct drbd_conf *mdev = w->mdev;
3028
e9e6f3ec 3029 D_ASSERT(mdev->state.disk == D_FAILED);
9d282875
LE
3030 /* we cannot assert local_cnt == 0 here, as get_ldev_if_state will
3031 * inc/dec it frequently. Once we are D_DISKLESS, no one will touch
82f59cc6
LE
3032 * the protected members anymore, though, so once put_ldev reaches zero
3033 * again, it will be safe to free them. */
e9e6f3ec 3034 drbd_force_state(mdev, NS(disk, D_DISKLESS));
99920dc5 3035 return 0;
e9e6f3ec
LE
3036}
3037
3038void drbd_go_diskless(struct drbd_conf *mdev)
3039{
3040 D_ASSERT(mdev->state.disk == D_FAILED);
3041 if (!test_and_set_bit(GO_DISKLESS, &mdev->flags))
e42325a5 3042 drbd_queue_work(&mdev->tconn->data.work, &mdev->go_diskless);
e9e6f3ec
LE
3043}
3044
b411b363
PR
3045/**
3046 * drbd_queue_bitmap_io() - Queues an IO operation on the whole bitmap
3047 * @mdev: DRBD device.
3048 * @io_fn: IO callback to be called when bitmap IO is possible
3049 * @done: callback to be called after the bitmap IO was performed
3050 * @why: Descriptive text of the reason for doing the IO
3051 *
3052 * While IO on the bitmap happens we freeze application IO thus we ensure
3053 * that drbd_set_out_of_sync() can not be called. This function MAY ONLY be
3054 * called from worker context. It MUST NOT be used while a previous such
3055 * work is still pending!
3056 */
3057void drbd_queue_bitmap_io(struct drbd_conf *mdev,
3058 int (*io_fn)(struct drbd_conf *),
3059 void (*done)(struct drbd_conf *, int),
20ceb2b2 3060 char *why, enum bm_flag flags)
b411b363 3061{
e6b3ea83 3062 D_ASSERT(current == mdev->tconn->worker.task);
b411b363
PR
3063
3064 D_ASSERT(!test_bit(BITMAP_IO_QUEUED, &mdev->flags));
3065 D_ASSERT(!test_bit(BITMAP_IO, &mdev->flags));
3066 D_ASSERT(list_empty(&mdev->bm_io_work.w.list));
3067 if (mdev->bm_io_work.why)
3068 dev_err(DEV, "FIXME going to queue '%s' but '%s' still pending?\n",
3069 why, mdev->bm_io_work.why);
3070
3071 mdev->bm_io_work.io_fn = io_fn;
3072 mdev->bm_io_work.done = done;
3073 mdev->bm_io_work.why = why;
20ceb2b2 3074 mdev->bm_io_work.flags = flags;
b411b363 3075
87eeee41 3076 spin_lock_irq(&mdev->tconn->req_lock);
b411b363
PR
3077 set_bit(BITMAP_IO, &mdev->flags);
3078 if (atomic_read(&mdev->ap_bio_cnt) == 0) {
127b3178 3079 if (!test_and_set_bit(BITMAP_IO_QUEUED, &mdev->flags))
e42325a5 3080 drbd_queue_work(&mdev->tconn->data.work, &mdev->bm_io_work.w);
b411b363 3081 }
87eeee41 3082 spin_unlock_irq(&mdev->tconn->req_lock);
b411b363
PR
3083}
3084
3085/**
3086 * drbd_bitmap_io() - Does an IO operation on the whole bitmap
3087 * @mdev: DRBD device.
3088 * @io_fn: IO callback to be called when bitmap IO is possible
3089 * @why: Descriptive text of the reason for doing the IO
3090 *
3091 * freezes application IO while that the actual IO operations runs. This
3092 * functions MAY NOT be called from worker context.
3093 */
20ceb2b2
LE
3094int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *),
3095 char *why, enum bm_flag flags)
b411b363
PR
3096{
3097 int rv;
3098
e6b3ea83 3099 D_ASSERT(current != mdev->tconn->worker.task);
b411b363 3100
20ceb2b2
LE
3101 if ((flags & BM_LOCKED_SET_ALLOWED) == 0)
3102 drbd_suspend_io(mdev);
b411b363 3103
20ceb2b2 3104 drbd_bm_lock(mdev, why, flags);
b411b363
PR
3105 rv = io_fn(mdev);
3106 drbd_bm_unlock(mdev);
3107
20ceb2b2
LE
3108 if ((flags & BM_LOCKED_SET_ALLOWED) == 0)
3109 drbd_resume_io(mdev);
b411b363
PR
3110
3111 return rv;
3112}
3113
3114void drbd_md_set_flag(struct drbd_conf *mdev, int flag) __must_hold(local)
3115{
3116 if ((mdev->ldev->md.flags & flag) != flag) {
3117 drbd_md_mark_dirty(mdev);
3118 mdev->ldev->md.flags |= flag;
3119 }
3120}
3121
3122void drbd_md_clear_flag(struct drbd_conf *mdev, int flag) __must_hold(local)
3123{
3124 if ((mdev->ldev->md.flags & flag) != 0) {
3125 drbd_md_mark_dirty(mdev);
3126 mdev->ldev->md.flags &= ~flag;
3127 }
3128}
3129int drbd_md_test_flag(struct drbd_backing_dev *bdev, int flag)
3130{
3131 return (bdev->md.flags & flag) != 0;
3132}
3133
3134static void md_sync_timer_fn(unsigned long data)
3135{
3136 struct drbd_conf *mdev = (struct drbd_conf *) data;
3137
e42325a5 3138 drbd_queue_work_front(&mdev->tconn->data.work, &mdev->md_sync_work);
b411b363
PR
3139}
3140
99920dc5 3141static int w_md_sync(struct drbd_work *w, int unused)
b411b363 3142{
00d56944
PR
3143 struct drbd_conf *mdev = w->mdev;
3144
b411b363 3145 dev_warn(DEV, "md_sync_timer expired! Worker calls drbd_md_sync().\n");
ee15b038
LE
3146#ifdef DEBUG
3147 dev_warn(DEV, "last md_mark_dirty: %s:%u\n",
3148 mdev->last_md_mark_dirty.func, mdev->last_md_mark_dirty.line);
3149#endif
b411b363 3150 drbd_md_sync(mdev);
99920dc5 3151 return 0;
b411b363
PR
3152}
3153
d8763023 3154const char *cmdname(enum drbd_packet cmd)
f2ad9063
AG
3155{
3156 /* THINK may need to become several global tables
3157 * when we want to support more than
3158 * one PRO_VERSION */
3159 static const char *cmdnames[] = {
3160 [P_DATA] = "Data",
3161 [P_DATA_REPLY] = "DataReply",
3162 [P_RS_DATA_REPLY] = "RSDataReply",
3163 [P_BARRIER] = "Barrier",
3164 [P_BITMAP] = "ReportBitMap",
3165 [P_BECOME_SYNC_TARGET] = "BecomeSyncTarget",
3166 [P_BECOME_SYNC_SOURCE] = "BecomeSyncSource",
3167 [P_UNPLUG_REMOTE] = "UnplugRemote",
3168 [P_DATA_REQUEST] = "DataRequest",
3169 [P_RS_DATA_REQUEST] = "RSDataRequest",
3170 [P_SYNC_PARAM] = "SyncParam",
3171 [P_SYNC_PARAM89] = "SyncParam89",
3172 [P_PROTOCOL] = "ReportProtocol",
3173 [P_UUIDS] = "ReportUUIDs",
3174 [P_SIZES] = "ReportSizes",
3175 [P_STATE] = "ReportState",
3176 [P_SYNC_UUID] = "ReportSyncUUID",
3177 [P_AUTH_CHALLENGE] = "AuthChallenge",
3178 [P_AUTH_RESPONSE] = "AuthResponse",
3179 [P_PING] = "Ping",
3180 [P_PING_ACK] = "PingAck",
3181 [P_RECV_ACK] = "RecvAck",
3182 [P_WRITE_ACK] = "WriteAck",
3183 [P_RS_WRITE_ACK] = "RSWriteAck",
7be8da07 3184 [P_DISCARD_WRITE] = "DiscardWrite",
f2ad9063
AG
3185 [P_NEG_ACK] = "NegAck",
3186 [P_NEG_DREPLY] = "NegDReply",
3187 [P_NEG_RS_DREPLY] = "NegRSDReply",
3188 [P_BARRIER_ACK] = "BarrierAck",
3189 [P_STATE_CHG_REQ] = "StateChgRequest",
3190 [P_STATE_CHG_REPLY] = "StateChgReply",
3191 [P_OV_REQUEST] = "OVRequest",
3192 [P_OV_REPLY] = "OVReply",
3193 [P_OV_RESULT] = "OVResult",
3194 [P_CSUM_RS_REQUEST] = "CsumRSRequest",
3195 [P_RS_IS_IN_SYNC] = "CsumRSIsInSync",
3196 [P_COMPRESSED_BITMAP] = "CBitmap",
3197 [P_DELAY_PROBE] = "DelayProbe",
3198 [P_OUT_OF_SYNC] = "OutOfSync",
7be8da07 3199 [P_RETRY_WRITE] = "RetryWrite",
f2ad9063
AG
3200 };
3201
e5d6f33a
AG
3202 if (cmd == P_INITIAL_META)
3203 return "InitialMeta";
3204 if (cmd == P_INITIAL_DATA)
3205 return "InitialData";
6038178e
AG
3206 if (cmd == P_CONNECTION_FEATURES)
3207 return "ConnectionFeatures";
6e849ce8 3208 if (cmd >= ARRAY_SIZE(cmdnames))
f2ad9063
AG
3209 return "Unknown";
3210 return cmdnames[cmd];
3211}
3212
7be8da07
AG
3213/**
3214 * drbd_wait_misc - wait for a request to make progress
3215 * @mdev: device associated with the request
3216 * @i: the struct drbd_interval embedded in struct drbd_request or
3217 * struct drbd_peer_request
3218 */
3219int drbd_wait_misc(struct drbd_conf *mdev, struct drbd_interval *i)
3220{
3221 struct net_conf *net_conf = mdev->tconn->net_conf;
3222 DEFINE_WAIT(wait);
3223 long timeout;
3224
3225 if (!net_conf)
3226 return -ETIMEDOUT;
3227 timeout = MAX_SCHEDULE_TIMEOUT;
3228 if (net_conf->ko_count)
3229 timeout = net_conf->timeout * HZ / 10 * net_conf->ko_count;
3230
3231 /* Indicate to wake up mdev->misc_wait on progress. */
3232 i->waiting = true;
3233 prepare_to_wait(&mdev->misc_wait, &wait, TASK_INTERRUPTIBLE);
3234 spin_unlock_irq(&mdev->tconn->req_lock);
3235 timeout = schedule_timeout(timeout);
3236 finish_wait(&mdev->misc_wait, &wait);
3237 spin_lock_irq(&mdev->tconn->req_lock);
3238 if (!timeout || mdev->state.conn < C_CONNECTED)
3239 return -ETIMEDOUT;
3240 if (signal_pending(current))
3241 return -ERESTARTSYS;
3242 return 0;
3243}
3244
b411b363
PR
3245#ifdef CONFIG_DRBD_FAULT_INJECTION
3246/* Fault insertion support including random number generator shamelessly
3247 * stolen from kernel/rcutorture.c */
3248struct fault_random_state {
3249 unsigned long state;
3250 unsigned long count;
3251};
3252
3253#define FAULT_RANDOM_MULT 39916801 /* prime */
3254#define FAULT_RANDOM_ADD 479001701 /* prime */
3255#define FAULT_RANDOM_REFRESH 10000
3256
3257/*
3258 * Crude but fast random-number generator. Uses a linear congruential
3259 * generator, with occasional help from get_random_bytes().
3260 */
3261static unsigned long
3262_drbd_fault_random(struct fault_random_state *rsp)
3263{
3264 long refresh;
3265
49829ea7 3266 if (!rsp->count--) {
b411b363
PR
3267 get_random_bytes(&refresh, sizeof(refresh));
3268 rsp->state += refresh;
3269 rsp->count = FAULT_RANDOM_REFRESH;
3270 }
3271 rsp->state = rsp->state * FAULT_RANDOM_MULT + FAULT_RANDOM_ADD;
3272 return swahw32(rsp->state);
3273}
3274
3275static char *
3276_drbd_fault_str(unsigned int type) {
3277 static char *_faults[] = {
3278 [DRBD_FAULT_MD_WR] = "Meta-data write",
3279 [DRBD_FAULT_MD_RD] = "Meta-data read",
3280 [DRBD_FAULT_RS_WR] = "Resync write",
3281 [DRBD_FAULT_RS_RD] = "Resync read",
3282 [DRBD_FAULT_DT_WR] = "Data write",
3283 [DRBD_FAULT_DT_RD] = "Data read",
3284 [DRBD_FAULT_DT_RA] = "Data read ahead",
3285 [DRBD_FAULT_BM_ALLOC] = "BM allocation",
6b4388ac
PR
3286 [DRBD_FAULT_AL_EE] = "EE allocation",
3287 [DRBD_FAULT_RECEIVE] = "receive data corruption",
b411b363
PR
3288 };
3289
3290 return (type < DRBD_FAULT_MAX) ? _faults[type] : "**Unknown**";
3291}
3292
3293unsigned int
3294_drbd_insert_fault(struct drbd_conf *mdev, unsigned int type)
3295{
3296 static struct fault_random_state rrs = {0, 0};
3297
3298 unsigned int ret = (
3299 (fault_devs == 0 ||
3300 ((1 << mdev_to_minor(mdev)) & fault_devs) != 0) &&
3301 (((_drbd_fault_random(&rrs) % 100) + 1) <= fault_rate));
3302
3303 if (ret) {
3304 fault_count++;
3305
7383506c 3306 if (__ratelimit(&drbd_ratelimit_state))
b411b363
PR
3307 dev_warn(DEV, "***Simulating %s failure\n",
3308 _drbd_fault_str(type));
3309 }
3310
3311 return ret;
3312}
3313#endif
3314
3315const char *drbd_buildtag(void)
3316{
3317 /* DRBD built from external sources has here a reference to the
3318 git hash of the source code. */
3319
3320 static char buildtag[38] = "\0uilt-in";
3321
3322 if (buildtag[0] == 0) {
3323#ifdef CONFIG_MODULES
3324 if (THIS_MODULE != NULL)
3325 sprintf(buildtag, "srcversion: %-24s", THIS_MODULE->srcversion);
3326 else
3327#endif
3328 buildtag[0] = 'b';
3329 }
3330
3331 return buildtag;
3332}
3333
3334module_init(drbd_init)
3335module_exit(drbd_cleanup)
3336
b411b363
PR
3337EXPORT_SYMBOL(drbd_conn_str);
3338EXPORT_SYMBOL(drbd_role_str);
3339EXPORT_SYMBOL(drbd_disk_str);
3340EXPORT_SYMBOL(drbd_set_st_err_str);