]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blob - drivers/block/drbd/drbd_nl.c
drbd: re-add lost conf_mutex protection in drbd_set_role
[mirror_ubuntu-zesty-kernel.git] / drivers / block / drbd / drbd_nl.c
1 /*
2 drbd_nl.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23
24 */
25
26 #include <linux/module.h>
27 #include <linux/drbd.h>
28 #include <linux/in.h>
29 #include <linux/fs.h>
30 #include <linux/file.h>
31 #include <linux/slab.h>
32 #include <linux/blkpg.h>
33 #include <linux/cpumask.h>
34 #include "drbd_int.h"
35 #include "drbd_protocol.h"
36 #include "drbd_req.h"
37 #include <asm/unaligned.h>
38 #include <linux/drbd_limits.h>
39 #include <linux/kthread.h>
40
41 #include <net/genetlink.h>
42
43 /* .doit */
44 // int drbd_adm_create_resource(struct sk_buff *skb, struct genl_info *info);
45 // int drbd_adm_delete_resource(struct sk_buff *skb, struct genl_info *info);
46
47 int drbd_adm_new_minor(struct sk_buff *skb, struct genl_info *info);
48 int drbd_adm_del_minor(struct sk_buff *skb, struct genl_info *info);
49
50 int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info);
51 int drbd_adm_del_resource(struct sk_buff *skb, struct genl_info *info);
52 int drbd_adm_down(struct sk_buff *skb, struct genl_info *info);
53
54 int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info);
55 int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info);
56 int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info);
57 int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info);
58 int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info);
59 int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info);
60 int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info);
61 int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info);
62 int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info);
63 int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info);
64 int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info);
65 int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info);
66 int drbd_adm_pause_sync(struct sk_buff *skb, struct genl_info *info);
67 int drbd_adm_resume_sync(struct sk_buff *skb, struct genl_info *info);
68 int drbd_adm_suspend_io(struct sk_buff *skb, struct genl_info *info);
69 int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info);
70 int drbd_adm_outdate(struct sk_buff *skb, struct genl_info *info);
71 int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info);
72 int drbd_adm_get_status(struct sk_buff *skb, struct genl_info *info);
73 int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info);
74 /* .dumpit */
75 int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb);
76
77 #include <linux/drbd_genl_api.h>
78 #include "drbd_nla.h"
79 #include <linux/genl_magic_func.h>
80
81 /* used blkdev_get_by_path, to claim our meta data device(s) */
82 static char *drbd_m_holder = "Hands off! this is DRBD's meta data device.";
83
84 static void drbd_adm_send_reply(struct sk_buff *skb, struct genl_info *info)
85 {
86 genlmsg_end(skb, genlmsg_data(nlmsg_data(nlmsg_hdr(skb))));
87 if (genlmsg_reply(skb, info))
88 printk(KERN_ERR "drbd: error sending genl reply\n");
89 }
90
91 /* Used on a fresh "drbd_adm_prepare"d reply_skb, this cannot fail: The only
92 * reason it could fail was no space in skb, and there are 4k available. */
93 int drbd_msg_put_info(struct sk_buff *skb, const char *info)
94 {
95 struct nlattr *nla;
96 int err = -EMSGSIZE;
97
98 if (!info || !info[0])
99 return 0;
100
101 nla = nla_nest_start(skb, DRBD_NLA_CFG_REPLY);
102 if (!nla)
103 return err;
104
105 err = nla_put_string(skb, T_info_text, info);
106 if (err) {
107 nla_nest_cancel(skb, nla);
108 return err;
109 } else
110 nla_nest_end(skb, nla);
111 return 0;
112 }
113
114 /* This would be a good candidate for a "pre_doit" hook,
115 * and per-family private info->pointers.
116 * But we need to stay compatible with older kernels.
117 * If it returns successfully, adm_ctx members are valid.
118 *
119 * At this point, we still rely on the global genl_lock().
120 * If we want to avoid that, and allow "genl_family.parallel_ops", we may need
121 * to add additional synchronization against object destruction/modification.
122 */
123 #define DRBD_ADM_NEED_MINOR 1
124 #define DRBD_ADM_NEED_RESOURCE 2
125 #define DRBD_ADM_NEED_CONNECTION 4
126 static int drbd_adm_prepare(struct drbd_config_context *adm_ctx,
127 struct sk_buff *skb, struct genl_info *info, unsigned flags)
128 {
129 struct drbd_genlmsghdr *d_in = info->userhdr;
130 const u8 cmd = info->genlhdr->cmd;
131 int err;
132
133 memset(adm_ctx, 0, sizeof(*adm_ctx));
134
135 /* genl_rcv_msg only checks for CAP_NET_ADMIN on "GENL_ADMIN_PERM" :( */
136 if (cmd != DRBD_ADM_GET_STATUS && !capable(CAP_NET_ADMIN))
137 return -EPERM;
138
139 adm_ctx->reply_skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
140 if (!adm_ctx->reply_skb) {
141 err = -ENOMEM;
142 goto fail;
143 }
144
145 adm_ctx->reply_dh = genlmsg_put_reply(adm_ctx->reply_skb,
146 info, &drbd_genl_family, 0, cmd);
147 /* put of a few bytes into a fresh skb of >= 4k will always succeed.
148 * but anyways */
149 if (!adm_ctx->reply_dh) {
150 err = -ENOMEM;
151 goto fail;
152 }
153
154 adm_ctx->reply_dh->minor = d_in->minor;
155 adm_ctx->reply_dh->ret_code = NO_ERROR;
156
157 adm_ctx->volume = VOLUME_UNSPECIFIED;
158 if (info->attrs[DRBD_NLA_CFG_CONTEXT]) {
159 struct nlattr *nla;
160 /* parse and validate only */
161 err = drbd_cfg_context_from_attrs(NULL, info);
162 if (err)
163 goto fail;
164
165 /* It was present, and valid,
166 * copy it over to the reply skb. */
167 err = nla_put_nohdr(adm_ctx->reply_skb,
168 info->attrs[DRBD_NLA_CFG_CONTEXT]->nla_len,
169 info->attrs[DRBD_NLA_CFG_CONTEXT]);
170 if (err)
171 goto fail;
172
173 /* and assign stuff to the adm_ctx */
174 nla = nested_attr_tb[__nla_type(T_ctx_volume)];
175 if (nla)
176 adm_ctx->volume = nla_get_u32(nla);
177 nla = nested_attr_tb[__nla_type(T_ctx_resource_name)];
178 if (nla)
179 adm_ctx->resource_name = nla_data(nla);
180 adm_ctx->my_addr = nested_attr_tb[__nla_type(T_ctx_my_addr)];
181 adm_ctx->peer_addr = nested_attr_tb[__nla_type(T_ctx_peer_addr)];
182 if ((adm_ctx->my_addr &&
183 nla_len(adm_ctx->my_addr) > sizeof(adm_ctx->connection->my_addr)) ||
184 (adm_ctx->peer_addr &&
185 nla_len(adm_ctx->peer_addr) > sizeof(adm_ctx->connection->peer_addr))) {
186 err = -EINVAL;
187 goto fail;
188 }
189 }
190
191 adm_ctx->minor = d_in->minor;
192 adm_ctx->device = minor_to_device(d_in->minor);
193
194 /* We are protected by the global genl_lock().
195 * But we may explicitly drop it/retake it in drbd_adm_set_role(),
196 * so make sure this object stays around. */
197 if (adm_ctx->device)
198 kref_get(&adm_ctx->device->kref);
199
200 if (adm_ctx->resource_name) {
201 adm_ctx->resource = drbd_find_resource(adm_ctx->resource_name);
202 }
203
204 if (!adm_ctx->device && (flags & DRBD_ADM_NEED_MINOR)) {
205 drbd_msg_put_info(adm_ctx->reply_skb, "unknown minor");
206 return ERR_MINOR_INVALID;
207 }
208 if (!adm_ctx->resource && (flags & DRBD_ADM_NEED_RESOURCE)) {
209 drbd_msg_put_info(adm_ctx->reply_skb, "unknown resource");
210 if (adm_ctx->resource_name)
211 return ERR_RES_NOT_KNOWN;
212 return ERR_INVALID_REQUEST;
213 }
214
215 if (flags & DRBD_ADM_NEED_CONNECTION) {
216 if (adm_ctx->resource) {
217 drbd_msg_put_info(adm_ctx->reply_skb, "no resource name expected");
218 return ERR_INVALID_REQUEST;
219 }
220 if (adm_ctx->device) {
221 drbd_msg_put_info(adm_ctx->reply_skb, "no minor number expected");
222 return ERR_INVALID_REQUEST;
223 }
224 if (adm_ctx->my_addr && adm_ctx->peer_addr)
225 adm_ctx->connection = conn_get_by_addrs(nla_data(adm_ctx->my_addr),
226 nla_len(adm_ctx->my_addr),
227 nla_data(adm_ctx->peer_addr),
228 nla_len(adm_ctx->peer_addr));
229 if (!adm_ctx->connection) {
230 drbd_msg_put_info(adm_ctx->reply_skb, "unknown connection");
231 return ERR_INVALID_REQUEST;
232 }
233 }
234
235 /* some more paranoia, if the request was over-determined */
236 if (adm_ctx->device && adm_ctx->resource &&
237 adm_ctx->device->resource != adm_ctx->resource) {
238 pr_warning("request: minor=%u, resource=%s; but that minor belongs to resource %s\n",
239 adm_ctx->minor, adm_ctx->resource->name,
240 adm_ctx->device->resource->name);
241 drbd_msg_put_info(adm_ctx->reply_skb, "minor exists in different resource");
242 return ERR_INVALID_REQUEST;
243 }
244 if (adm_ctx->device &&
245 adm_ctx->volume != VOLUME_UNSPECIFIED &&
246 adm_ctx->volume != adm_ctx->device->vnr) {
247 pr_warning("request: minor=%u, volume=%u; but that minor is volume %u in %s\n",
248 adm_ctx->minor, adm_ctx->volume,
249 adm_ctx->device->vnr,
250 adm_ctx->device->resource->name);
251 drbd_msg_put_info(adm_ctx->reply_skb, "minor exists as different volume");
252 return ERR_INVALID_REQUEST;
253 }
254
255 /* still, provide adm_ctx->resource always, if possible. */
256 if (!adm_ctx->resource) {
257 adm_ctx->resource = adm_ctx->device ? adm_ctx->device->resource
258 : adm_ctx->connection ? adm_ctx->connection->resource : NULL;
259 if (adm_ctx->resource)
260 kref_get(&adm_ctx->resource->kref);
261 }
262
263 return NO_ERROR;
264
265 fail:
266 nlmsg_free(adm_ctx->reply_skb);
267 adm_ctx->reply_skb = NULL;
268 return err;
269 }
270
271 static int drbd_adm_finish(struct drbd_config_context *adm_ctx,
272 struct genl_info *info, int retcode)
273 {
274 if (adm_ctx->device) {
275 kref_put(&adm_ctx->device->kref, drbd_destroy_device);
276 adm_ctx->device = NULL;
277 }
278 if (adm_ctx->connection) {
279 kref_put(&adm_ctx->connection->kref, &drbd_destroy_connection);
280 adm_ctx->connection = NULL;
281 }
282 if (adm_ctx->resource) {
283 kref_put(&adm_ctx->resource->kref, drbd_destroy_resource);
284 adm_ctx->resource = NULL;
285 }
286
287 if (!adm_ctx->reply_skb)
288 return -ENOMEM;
289
290 adm_ctx->reply_dh->ret_code = retcode;
291 drbd_adm_send_reply(adm_ctx->reply_skb, info);
292 return 0;
293 }
294
295 static void setup_khelper_env(struct drbd_connection *connection, char **envp)
296 {
297 char *afs;
298
299 /* FIXME: A future version will not allow this case. */
300 if (connection->my_addr_len == 0 || connection->peer_addr_len == 0)
301 return;
302
303 switch (((struct sockaddr *)&connection->peer_addr)->sa_family) {
304 case AF_INET6:
305 afs = "ipv6";
306 snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI6",
307 &((struct sockaddr_in6 *)&connection->peer_addr)->sin6_addr);
308 break;
309 case AF_INET:
310 afs = "ipv4";
311 snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI4",
312 &((struct sockaddr_in *)&connection->peer_addr)->sin_addr);
313 break;
314 default:
315 afs = "ssocks";
316 snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI4",
317 &((struct sockaddr_in *)&connection->peer_addr)->sin_addr);
318 }
319 snprintf(envp[3], 20, "DRBD_PEER_AF=%s", afs);
320 }
321
322 int drbd_khelper(struct drbd_device *device, char *cmd)
323 {
324 char *envp[] = { "HOME=/",
325 "TERM=linux",
326 "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
327 (char[20]) { }, /* address family */
328 (char[60]) { }, /* address */
329 NULL };
330 char mb[12];
331 char *argv[] = {usermode_helper, cmd, mb, NULL };
332 struct drbd_connection *connection = first_peer_device(device)->connection;
333 struct sib_info sib;
334 int ret;
335
336 if (current == connection->worker.task)
337 set_bit(CALLBACK_PENDING, &connection->flags);
338
339 snprintf(mb, 12, "minor-%d", device_to_minor(device));
340 setup_khelper_env(connection, envp);
341
342 /* The helper may take some time.
343 * write out any unsynced meta data changes now */
344 drbd_md_sync(device);
345
346 drbd_info(device, "helper command: %s %s %s\n", usermode_helper, cmd, mb);
347 sib.sib_reason = SIB_HELPER_PRE;
348 sib.helper_name = cmd;
349 drbd_bcast_event(device, &sib);
350 ret = call_usermodehelper(usermode_helper, argv, envp, UMH_WAIT_PROC);
351 if (ret)
352 drbd_warn(device, "helper command: %s %s %s exit code %u (0x%x)\n",
353 usermode_helper, cmd, mb,
354 (ret >> 8) & 0xff, ret);
355 else
356 drbd_info(device, "helper command: %s %s %s exit code %u (0x%x)\n",
357 usermode_helper, cmd, mb,
358 (ret >> 8) & 0xff, ret);
359 sib.sib_reason = SIB_HELPER_POST;
360 sib.helper_exit_code = ret;
361 drbd_bcast_event(device, &sib);
362
363 if (current == connection->worker.task)
364 clear_bit(CALLBACK_PENDING, &connection->flags);
365
366 if (ret < 0) /* Ignore any ERRNOs we got. */
367 ret = 0;
368
369 return ret;
370 }
371
372 static int conn_khelper(struct drbd_connection *connection, char *cmd)
373 {
374 char *envp[] = { "HOME=/",
375 "TERM=linux",
376 "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
377 (char[20]) { }, /* address family */
378 (char[60]) { }, /* address */
379 NULL };
380 char *resource_name = connection->resource->name;
381 char *argv[] = {usermode_helper, cmd, resource_name, NULL };
382 int ret;
383
384 setup_khelper_env(connection, envp);
385 conn_md_sync(connection);
386
387 drbd_info(connection, "helper command: %s %s %s\n", usermode_helper, cmd, resource_name);
388 /* TODO: conn_bcast_event() ?? */
389
390 ret = call_usermodehelper(usermode_helper, argv, envp, UMH_WAIT_PROC);
391 if (ret)
392 drbd_warn(connection, "helper command: %s %s %s exit code %u (0x%x)\n",
393 usermode_helper, cmd, resource_name,
394 (ret >> 8) & 0xff, ret);
395 else
396 drbd_info(connection, "helper command: %s %s %s exit code %u (0x%x)\n",
397 usermode_helper, cmd, resource_name,
398 (ret >> 8) & 0xff, ret);
399 /* TODO: conn_bcast_event() ?? */
400
401 if (ret < 0) /* Ignore any ERRNOs we got. */
402 ret = 0;
403
404 return ret;
405 }
406
407 static enum drbd_fencing_p highest_fencing_policy(struct drbd_connection *connection)
408 {
409 enum drbd_fencing_p fp = FP_NOT_AVAIL;
410 struct drbd_peer_device *peer_device;
411 int vnr;
412
413 rcu_read_lock();
414 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
415 struct drbd_device *device = peer_device->device;
416 if (get_ldev_if_state(device, D_CONSISTENT)) {
417 struct disk_conf *disk_conf =
418 rcu_dereference(peer_device->device->ldev->disk_conf);
419 fp = max_t(enum drbd_fencing_p, fp, disk_conf->fencing);
420 put_ldev(device);
421 }
422 }
423 rcu_read_unlock();
424
425 if (fp == FP_NOT_AVAIL) {
426 /* IO Suspending works on the whole resource.
427 Do it only for one device. */
428 vnr = 0;
429 peer_device = idr_get_next(&connection->peer_devices, &vnr);
430 drbd_change_state(peer_device->device, CS_VERBOSE | CS_HARD, NS(susp_fen, 0));
431 }
432
433 return fp;
434 }
435
436 bool conn_try_outdate_peer(struct drbd_connection *connection)
437 {
438 unsigned int connect_cnt;
439 union drbd_state mask = { };
440 union drbd_state val = { };
441 enum drbd_fencing_p fp;
442 char *ex_to_string;
443 int r;
444
445 spin_lock_irq(&connection->resource->req_lock);
446 if (connection->cstate >= C_WF_REPORT_PARAMS) {
447 drbd_err(connection, "Expected cstate < C_WF_REPORT_PARAMS\n");
448 spin_unlock_irq(&connection->resource->req_lock);
449 return false;
450 }
451
452 connect_cnt = connection->connect_cnt;
453 spin_unlock_irq(&connection->resource->req_lock);
454
455 fp = highest_fencing_policy(connection);
456 switch (fp) {
457 case FP_NOT_AVAIL:
458 drbd_warn(connection, "Not fencing peer, I'm not even Consistent myself.\n");
459 goto out;
460 case FP_DONT_CARE:
461 return true;
462 default: ;
463 }
464
465 r = conn_khelper(connection, "fence-peer");
466
467 switch ((r>>8) & 0xff) {
468 case 3: /* peer is inconsistent */
469 ex_to_string = "peer is inconsistent or worse";
470 mask.pdsk = D_MASK;
471 val.pdsk = D_INCONSISTENT;
472 break;
473 case 4: /* peer got outdated, or was already outdated */
474 ex_to_string = "peer was fenced";
475 mask.pdsk = D_MASK;
476 val.pdsk = D_OUTDATED;
477 break;
478 case 5: /* peer was down */
479 if (conn_highest_disk(connection) == D_UP_TO_DATE) {
480 /* we will(have) create(d) a new UUID anyways... */
481 ex_to_string = "peer is unreachable, assumed to be dead";
482 mask.pdsk = D_MASK;
483 val.pdsk = D_OUTDATED;
484 } else {
485 ex_to_string = "peer unreachable, doing nothing since disk != UpToDate";
486 }
487 break;
488 case 6: /* Peer is primary, voluntarily outdate myself.
489 * This is useful when an unconnected R_SECONDARY is asked to
490 * become R_PRIMARY, but finds the other peer being active. */
491 ex_to_string = "peer is active";
492 drbd_warn(connection, "Peer is primary, outdating myself.\n");
493 mask.disk = D_MASK;
494 val.disk = D_OUTDATED;
495 break;
496 case 7:
497 if (fp != FP_STONITH)
498 drbd_err(connection, "fence-peer() = 7 && fencing != Stonith !!!\n");
499 ex_to_string = "peer was stonithed";
500 mask.pdsk = D_MASK;
501 val.pdsk = D_OUTDATED;
502 break;
503 default:
504 /* The script is broken ... */
505 drbd_err(connection, "fence-peer helper broken, returned %d\n", (r>>8)&0xff);
506 return false; /* Eventually leave IO frozen */
507 }
508
509 drbd_info(connection, "fence-peer helper returned %d (%s)\n",
510 (r>>8) & 0xff, ex_to_string);
511
512 out:
513
514 /* Not using
515 conn_request_state(connection, mask, val, CS_VERBOSE);
516 here, because we might were able to re-establish the connection in the
517 meantime. */
518 spin_lock_irq(&connection->resource->req_lock);
519 if (connection->cstate < C_WF_REPORT_PARAMS && !test_bit(STATE_SENT, &connection->flags)) {
520 if (connection->connect_cnt != connect_cnt)
521 /* In case the connection was established and droped
522 while the fence-peer handler was running, ignore it */
523 drbd_info(connection, "Ignoring fence-peer exit code\n");
524 else
525 _conn_request_state(connection, mask, val, CS_VERBOSE);
526 }
527 spin_unlock_irq(&connection->resource->req_lock);
528
529 return conn_highest_pdsk(connection) <= D_OUTDATED;
530 }
531
532 static int _try_outdate_peer_async(void *data)
533 {
534 struct drbd_connection *connection = (struct drbd_connection *)data;
535
536 conn_try_outdate_peer(connection);
537
538 kref_put(&connection->kref, drbd_destroy_connection);
539 return 0;
540 }
541
542 void conn_try_outdate_peer_async(struct drbd_connection *connection)
543 {
544 struct task_struct *opa;
545
546 kref_get(&connection->kref);
547 opa = kthread_run(_try_outdate_peer_async, connection, "drbd_async_h");
548 if (IS_ERR(opa)) {
549 drbd_err(connection, "out of mem, failed to invoke fence-peer helper\n");
550 kref_put(&connection->kref, drbd_destroy_connection);
551 }
552 }
553
554 enum drbd_state_rv
555 drbd_set_role(struct drbd_device *const device, enum drbd_role new_role, int force)
556 {
557 struct drbd_peer_device *const peer_device = first_peer_device(device);
558 struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL;
559 const int max_tries = 4;
560 enum drbd_state_rv rv = SS_UNKNOWN_ERROR;
561 struct net_conf *nc;
562 int try = 0;
563 int forced = 0;
564 union drbd_state mask, val;
565
566 if (new_role == R_PRIMARY) {
567 struct drbd_connection *connection;
568
569 /* Detect dead peers as soon as possible. */
570
571 rcu_read_lock();
572 for_each_connection(connection, device->resource)
573 request_ping(connection);
574 rcu_read_unlock();
575 }
576
577 mutex_lock(device->state_mutex);
578
579 mask.i = 0; mask.role = R_MASK;
580 val.i = 0; val.role = new_role;
581
582 while (try++ < max_tries) {
583 rv = _drbd_request_state(device, mask, val, CS_WAIT_COMPLETE);
584
585 /* in case we first succeeded to outdate,
586 * but now suddenly could establish a connection */
587 if (rv == SS_CW_FAILED_BY_PEER && mask.pdsk != 0) {
588 val.pdsk = 0;
589 mask.pdsk = 0;
590 continue;
591 }
592
593 if (rv == SS_NO_UP_TO_DATE_DISK && force &&
594 (device->state.disk < D_UP_TO_DATE &&
595 device->state.disk >= D_INCONSISTENT)) {
596 mask.disk = D_MASK;
597 val.disk = D_UP_TO_DATE;
598 forced = 1;
599 continue;
600 }
601
602 if (rv == SS_NO_UP_TO_DATE_DISK &&
603 device->state.disk == D_CONSISTENT && mask.pdsk == 0) {
604 D_ASSERT(device, device->state.pdsk == D_UNKNOWN);
605
606 if (conn_try_outdate_peer(connection)) {
607 val.disk = D_UP_TO_DATE;
608 mask.disk = D_MASK;
609 }
610 continue;
611 }
612
613 if (rv == SS_NOTHING_TO_DO)
614 goto out;
615 if (rv == SS_PRIMARY_NOP && mask.pdsk == 0) {
616 if (!conn_try_outdate_peer(connection) && force) {
617 drbd_warn(device, "Forced into split brain situation!\n");
618 mask.pdsk = D_MASK;
619 val.pdsk = D_OUTDATED;
620
621 }
622 continue;
623 }
624 if (rv == SS_TWO_PRIMARIES) {
625 /* Maybe the peer is detected as dead very soon...
626 retry at most once more in this case. */
627 int timeo;
628 rcu_read_lock();
629 nc = rcu_dereference(connection->net_conf);
630 timeo = nc ? (nc->ping_timeo + 1) * HZ / 10 : 1;
631 rcu_read_unlock();
632 schedule_timeout_interruptible(timeo);
633 if (try < max_tries)
634 try = max_tries - 1;
635 continue;
636 }
637 if (rv < SS_SUCCESS) {
638 rv = _drbd_request_state(device, mask, val,
639 CS_VERBOSE + CS_WAIT_COMPLETE);
640 if (rv < SS_SUCCESS)
641 goto out;
642 }
643 break;
644 }
645
646 if (rv < SS_SUCCESS)
647 goto out;
648
649 if (forced)
650 drbd_warn(device, "Forced to consider local data as UpToDate!\n");
651
652 /* Wait until nothing is on the fly :) */
653 wait_event(device->misc_wait, atomic_read(&device->ap_pending_cnt) == 0);
654
655 /* FIXME also wait for all pending P_BARRIER_ACK? */
656
657 if (new_role == R_SECONDARY) {
658 set_disk_ro(device->vdisk, true);
659 if (get_ldev(device)) {
660 device->ldev->md.uuid[UI_CURRENT] &= ~(u64)1;
661 put_ldev(device);
662 }
663 } else {
664 mutex_lock(&device->resource->conf_update);
665 nc = connection->net_conf;
666 if (nc)
667 nc->discard_my_data = 0; /* without copy; single bit op is atomic */
668 mutex_unlock(&device->resource->conf_update);
669
670 set_disk_ro(device->vdisk, false);
671 if (get_ldev(device)) {
672 if (((device->state.conn < C_CONNECTED ||
673 device->state.pdsk <= D_FAILED)
674 && device->ldev->md.uuid[UI_BITMAP] == 0) || forced)
675 drbd_uuid_new_current(device);
676
677 device->ldev->md.uuid[UI_CURRENT] |= (u64)1;
678 put_ldev(device);
679 }
680 }
681
682 /* writeout of activity log covered areas of the bitmap
683 * to stable storage done in after state change already */
684
685 if (device->state.conn >= C_WF_REPORT_PARAMS) {
686 /* if this was forced, we should consider sync */
687 if (forced)
688 drbd_send_uuids(peer_device);
689 drbd_send_current_state(peer_device);
690 }
691
692 drbd_md_sync(device);
693
694 kobject_uevent(&disk_to_dev(device->vdisk)->kobj, KOBJ_CHANGE);
695 out:
696 mutex_unlock(device->state_mutex);
697 return rv;
698 }
699
700 static const char *from_attrs_err_to_txt(int err)
701 {
702 return err == -ENOMSG ? "required attribute missing" :
703 err == -EOPNOTSUPP ? "unknown mandatory attribute" :
704 err == -EEXIST ? "can not change invariant setting" :
705 "invalid attribute value";
706 }
707
708 int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info)
709 {
710 struct drbd_config_context adm_ctx;
711 struct set_role_parms parms;
712 int err;
713 enum drbd_ret_code retcode;
714
715 retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
716 if (!adm_ctx.reply_skb)
717 return retcode;
718 if (retcode != NO_ERROR)
719 goto out;
720
721 memset(&parms, 0, sizeof(parms));
722 if (info->attrs[DRBD_NLA_SET_ROLE_PARMS]) {
723 err = set_role_parms_from_attrs(&parms, info);
724 if (err) {
725 retcode = ERR_MANDATORY_TAG;
726 drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
727 goto out;
728 }
729 }
730 genl_unlock();
731 mutex_lock(&adm_ctx.resource->adm_mutex);
732
733 if (info->genlhdr->cmd == DRBD_ADM_PRIMARY)
734 retcode = drbd_set_role(adm_ctx.device, R_PRIMARY, parms.assume_uptodate);
735 else
736 retcode = drbd_set_role(adm_ctx.device, R_SECONDARY, 0);
737
738 mutex_unlock(&adm_ctx.resource->adm_mutex);
739 genl_lock();
740 out:
741 drbd_adm_finish(&adm_ctx, info, retcode);
742 return 0;
743 }
744
745 /* Initializes the md.*_offset members, so we are able to find
746 * the on disk meta data.
747 *
748 * We currently have two possible layouts:
749 * external:
750 * |----------- md_size_sect ------------------|
751 * [ 4k superblock ][ activity log ][ Bitmap ]
752 * | al_offset == 8 |
753 * | bm_offset = al_offset + X |
754 * ==> bitmap sectors = md_size_sect - bm_offset
755 *
756 * internal:
757 * |----------- md_size_sect ------------------|
758 * [data.....][ Bitmap ][ activity log ][ 4k superblock ]
759 * | al_offset < 0 |
760 * | bm_offset = al_offset - Y |
761 * ==> bitmap sectors = Y = al_offset - bm_offset
762 *
763 * Activity log size used to be fixed 32kB,
764 * but is about to become configurable.
765 */
766 static void drbd_md_set_sector_offsets(struct drbd_device *device,
767 struct drbd_backing_dev *bdev)
768 {
769 sector_t md_size_sect = 0;
770 unsigned int al_size_sect = bdev->md.al_size_4k * 8;
771
772 bdev->md.md_offset = drbd_md_ss(bdev);
773
774 switch (bdev->md.meta_dev_idx) {
775 default:
776 /* v07 style fixed size indexed meta data */
777 bdev->md.md_size_sect = MD_128MB_SECT;
778 bdev->md.al_offset = MD_4kB_SECT;
779 bdev->md.bm_offset = MD_4kB_SECT + al_size_sect;
780 break;
781 case DRBD_MD_INDEX_FLEX_EXT:
782 /* just occupy the full device; unit: sectors */
783 bdev->md.md_size_sect = drbd_get_capacity(bdev->md_bdev);
784 bdev->md.al_offset = MD_4kB_SECT;
785 bdev->md.bm_offset = MD_4kB_SECT + al_size_sect;
786 break;
787 case DRBD_MD_INDEX_INTERNAL:
788 case DRBD_MD_INDEX_FLEX_INT:
789 /* al size is still fixed */
790 bdev->md.al_offset = -al_size_sect;
791 /* we need (slightly less than) ~ this much bitmap sectors: */
792 md_size_sect = drbd_get_capacity(bdev->backing_bdev);
793 md_size_sect = ALIGN(md_size_sect, BM_SECT_PER_EXT);
794 md_size_sect = BM_SECT_TO_EXT(md_size_sect);
795 md_size_sect = ALIGN(md_size_sect, 8);
796
797 /* plus the "drbd meta data super block",
798 * and the activity log; */
799 md_size_sect += MD_4kB_SECT + al_size_sect;
800
801 bdev->md.md_size_sect = md_size_sect;
802 /* bitmap offset is adjusted by 'super' block size */
803 bdev->md.bm_offset = -md_size_sect + MD_4kB_SECT;
804 break;
805 }
806 }
807
808 /* input size is expected to be in KB */
809 char *ppsize(char *buf, unsigned long long size)
810 {
811 /* Needs 9 bytes at max including trailing NUL:
812 * -1ULL ==> "16384 EB" */
813 static char units[] = { 'K', 'M', 'G', 'T', 'P', 'E' };
814 int base = 0;
815 while (size >= 10000 && base < sizeof(units)-1) {
816 /* shift + round */
817 size = (size >> 10) + !!(size & (1<<9));
818 base++;
819 }
820 sprintf(buf, "%u %cB", (unsigned)size, units[base]);
821
822 return buf;
823 }
824
825 /* there is still a theoretical deadlock when called from receiver
826 * on an D_INCONSISTENT R_PRIMARY:
827 * remote READ does inc_ap_bio, receiver would need to receive answer
828 * packet from remote to dec_ap_bio again.
829 * receiver receive_sizes(), comes here,
830 * waits for ap_bio_cnt == 0. -> deadlock.
831 * but this cannot happen, actually, because:
832 * R_PRIMARY D_INCONSISTENT, and peer's disk is unreachable
833 * (not connected, or bad/no disk on peer):
834 * see drbd_fail_request_early, ap_bio_cnt is zero.
835 * R_PRIMARY D_INCONSISTENT, and C_SYNC_TARGET:
836 * peer may not initiate a resize.
837 */
838 /* Note these are not to be confused with
839 * drbd_adm_suspend_io/drbd_adm_resume_io,
840 * which are (sub) state changes triggered by admin (drbdsetup),
841 * and can be long lived.
842 * This changes an device->flag, is triggered by drbd internals,
843 * and should be short-lived. */
844 void drbd_suspend_io(struct drbd_device *device)
845 {
846 set_bit(SUSPEND_IO, &device->flags);
847 if (drbd_suspended(device))
848 return;
849 wait_event(device->misc_wait, !atomic_read(&device->ap_bio_cnt));
850 }
851
852 void drbd_resume_io(struct drbd_device *device)
853 {
854 clear_bit(SUSPEND_IO, &device->flags);
855 wake_up(&device->misc_wait);
856 }
857
858 /**
859 * drbd_determine_dev_size() - Sets the right device size obeying all constraints
860 * @device: DRBD device.
861 *
862 * Returns 0 on success, negative return values indicate errors.
863 * You should call drbd_md_sync() after calling this function.
864 */
865 enum determine_dev_size
866 drbd_determine_dev_size(struct drbd_device *device, enum dds_flags flags, struct resize_parms *rs) __must_hold(local)
867 {
868 sector_t prev_first_sect, prev_size; /* previous meta location */
869 sector_t la_size_sect, u_size;
870 struct drbd_md *md = &device->ldev->md;
871 u32 prev_al_stripe_size_4k;
872 u32 prev_al_stripes;
873 sector_t size;
874 char ppb[10];
875 void *buffer;
876
877 int md_moved, la_size_changed;
878 enum determine_dev_size rv = DS_UNCHANGED;
879
880 /* race:
881 * application request passes inc_ap_bio,
882 * but then cannot get an AL-reference.
883 * this function later may wait on ap_bio_cnt == 0. -> deadlock.
884 *
885 * to avoid that:
886 * Suspend IO right here.
887 * still lock the act_log to not trigger ASSERTs there.
888 */
889 drbd_suspend_io(device);
890 buffer = drbd_md_get_buffer(device); /* Lock meta-data IO */
891 if (!buffer) {
892 drbd_resume_io(device);
893 return DS_ERROR;
894 }
895
896 /* no wait necessary anymore, actually we could assert that */
897 wait_event(device->al_wait, lc_try_lock(device->act_log));
898
899 prev_first_sect = drbd_md_first_sector(device->ldev);
900 prev_size = device->ldev->md.md_size_sect;
901 la_size_sect = device->ldev->md.la_size_sect;
902
903 if (rs) {
904 /* rs is non NULL if we should change the AL layout only */
905
906 prev_al_stripes = md->al_stripes;
907 prev_al_stripe_size_4k = md->al_stripe_size_4k;
908
909 md->al_stripes = rs->al_stripes;
910 md->al_stripe_size_4k = rs->al_stripe_size / 4;
911 md->al_size_4k = (u64)rs->al_stripes * rs->al_stripe_size / 4;
912 }
913
914 drbd_md_set_sector_offsets(device, device->ldev);
915
916 rcu_read_lock();
917 u_size = rcu_dereference(device->ldev->disk_conf)->disk_size;
918 rcu_read_unlock();
919 size = drbd_new_dev_size(device, device->ldev, u_size, flags & DDSF_FORCED);
920
921 if (size < la_size_sect) {
922 if (rs && u_size == 0) {
923 /* Remove "rs &&" later. This check should always be active, but
924 right now the receiver expects the permissive behavior */
925 drbd_warn(device, "Implicit shrink not allowed. "
926 "Use --size=%llus for explicit shrink.\n",
927 (unsigned long long)size);
928 rv = DS_ERROR_SHRINK;
929 }
930 if (u_size > size)
931 rv = DS_ERROR_SPACE_MD;
932 if (rv != DS_UNCHANGED)
933 goto err_out;
934 }
935
936 if (drbd_get_capacity(device->this_bdev) != size ||
937 drbd_bm_capacity(device) != size) {
938 int err;
939 err = drbd_bm_resize(device, size, !(flags & DDSF_NO_RESYNC));
940 if (unlikely(err)) {
941 /* currently there is only one error: ENOMEM! */
942 size = drbd_bm_capacity(device)>>1;
943 if (size == 0) {
944 drbd_err(device, "OUT OF MEMORY! "
945 "Could not allocate bitmap!\n");
946 } else {
947 drbd_err(device, "BM resizing failed. "
948 "Leaving size unchanged at size = %lu KB\n",
949 (unsigned long)size);
950 }
951 rv = DS_ERROR;
952 }
953 /* racy, see comments above. */
954 drbd_set_my_capacity(device, size);
955 device->ldev->md.la_size_sect = size;
956 drbd_info(device, "size = %s (%llu KB)\n", ppsize(ppb, size>>1),
957 (unsigned long long)size>>1);
958 }
959 if (rv <= DS_ERROR)
960 goto err_out;
961
962 la_size_changed = (la_size_sect != device->ldev->md.la_size_sect);
963
964 md_moved = prev_first_sect != drbd_md_first_sector(device->ldev)
965 || prev_size != device->ldev->md.md_size_sect;
966
967 if (la_size_changed || md_moved || rs) {
968 u32 prev_flags;
969
970 /* We do some synchronous IO below, which may take some time.
971 * Clear the timer, to avoid scary "timer expired!" messages,
972 * "Superblock" is written out at least twice below, anyways. */
973 del_timer(&device->md_sync_timer);
974 drbd_al_shrink(device); /* All extents inactive. */
975
976 prev_flags = md->flags;
977 md->flags &= ~MDF_PRIMARY_IND;
978 drbd_md_write(device, buffer);
979
980 drbd_info(device, "Writing the whole bitmap, %s\n",
981 la_size_changed && md_moved ? "size changed and md moved" :
982 la_size_changed ? "size changed" : "md moved");
983 /* next line implicitly does drbd_suspend_io()+drbd_resume_io() */
984 drbd_bitmap_io(device, md_moved ? &drbd_bm_write_all : &drbd_bm_write,
985 "size changed", BM_LOCKED_MASK);
986 drbd_initialize_al(device, buffer);
987
988 md->flags = prev_flags;
989 drbd_md_write(device, buffer);
990
991 if (rs)
992 drbd_info(device, "Changed AL layout to al-stripes = %d, al-stripe-size-kB = %d\n",
993 md->al_stripes, md->al_stripe_size_4k * 4);
994 }
995
996 if (size > la_size_sect)
997 rv = la_size_sect ? DS_GREW : DS_GREW_FROM_ZERO;
998 if (size < la_size_sect)
999 rv = DS_SHRUNK;
1000
1001 if (0) {
1002 err_out:
1003 if (rs) {
1004 md->al_stripes = prev_al_stripes;
1005 md->al_stripe_size_4k = prev_al_stripe_size_4k;
1006 md->al_size_4k = (u64)prev_al_stripes * prev_al_stripe_size_4k;
1007
1008 drbd_md_set_sector_offsets(device, device->ldev);
1009 }
1010 }
1011 lc_unlock(device->act_log);
1012 wake_up(&device->al_wait);
1013 drbd_md_put_buffer(device);
1014 drbd_resume_io(device);
1015
1016 return rv;
1017 }
1018
1019 sector_t
1020 drbd_new_dev_size(struct drbd_device *device, struct drbd_backing_dev *bdev,
1021 sector_t u_size, int assume_peer_has_space)
1022 {
1023 sector_t p_size = device->p_size; /* partner's disk size. */
1024 sector_t la_size_sect = bdev->md.la_size_sect; /* last agreed size. */
1025 sector_t m_size; /* my size */
1026 sector_t size = 0;
1027
1028 m_size = drbd_get_max_capacity(bdev);
1029
1030 if (device->state.conn < C_CONNECTED && assume_peer_has_space) {
1031 drbd_warn(device, "Resize while not connected was forced by the user!\n");
1032 p_size = m_size;
1033 }
1034
1035 if (p_size && m_size) {
1036 size = min_t(sector_t, p_size, m_size);
1037 } else {
1038 if (la_size_sect) {
1039 size = la_size_sect;
1040 if (m_size && m_size < size)
1041 size = m_size;
1042 if (p_size && p_size < size)
1043 size = p_size;
1044 } else {
1045 if (m_size)
1046 size = m_size;
1047 if (p_size)
1048 size = p_size;
1049 }
1050 }
1051
1052 if (size == 0)
1053 drbd_err(device, "Both nodes diskless!\n");
1054
1055 if (u_size) {
1056 if (u_size > size)
1057 drbd_err(device, "Requested disk size is too big (%lu > %lu)\n",
1058 (unsigned long)u_size>>1, (unsigned long)size>>1);
1059 else
1060 size = u_size;
1061 }
1062
1063 return size;
1064 }
1065
1066 /**
1067 * drbd_check_al_size() - Ensures that the AL is of the right size
1068 * @device: DRBD device.
1069 *
1070 * Returns -EBUSY if current al lru is still used, -ENOMEM when allocation
1071 * failed, and 0 on success. You should call drbd_md_sync() after you called
1072 * this function.
1073 */
1074 static int drbd_check_al_size(struct drbd_device *device, struct disk_conf *dc)
1075 {
1076 struct lru_cache *n, *t;
1077 struct lc_element *e;
1078 unsigned int in_use;
1079 int i;
1080
1081 if (device->act_log &&
1082 device->act_log->nr_elements == dc->al_extents)
1083 return 0;
1084
1085 in_use = 0;
1086 t = device->act_log;
1087 n = lc_create("act_log", drbd_al_ext_cache, AL_UPDATES_PER_TRANSACTION,
1088 dc->al_extents, sizeof(struct lc_element), 0);
1089
1090 if (n == NULL) {
1091 drbd_err(device, "Cannot allocate act_log lru!\n");
1092 return -ENOMEM;
1093 }
1094 spin_lock_irq(&device->al_lock);
1095 if (t) {
1096 for (i = 0; i < t->nr_elements; i++) {
1097 e = lc_element_by_index(t, i);
1098 if (e->refcnt)
1099 drbd_err(device, "refcnt(%d)==%d\n",
1100 e->lc_number, e->refcnt);
1101 in_use += e->refcnt;
1102 }
1103 }
1104 if (!in_use)
1105 device->act_log = n;
1106 spin_unlock_irq(&device->al_lock);
1107 if (in_use) {
1108 drbd_err(device, "Activity log still in use!\n");
1109 lc_destroy(n);
1110 return -EBUSY;
1111 } else {
1112 if (t)
1113 lc_destroy(t);
1114 }
1115 drbd_md_mark_dirty(device); /* we changed device->act_log->nr_elemens */
1116 return 0;
1117 }
1118
1119 static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backing_dev *bdev,
1120 unsigned int max_bio_size)
1121 {
1122 struct request_queue * const q = device->rq_queue;
1123 unsigned int max_hw_sectors = max_bio_size >> 9;
1124 unsigned int max_segments = 0;
1125 struct request_queue *b = NULL;
1126
1127 if (bdev) {
1128 b = bdev->backing_bdev->bd_disk->queue;
1129
1130 max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9);
1131 rcu_read_lock();
1132 max_segments = rcu_dereference(device->ldev->disk_conf)->max_bio_bvecs;
1133 rcu_read_unlock();
1134
1135 blk_set_stacking_limits(&q->limits);
1136 blk_queue_max_write_same_sectors(q, 0);
1137 }
1138
1139 blk_queue_logical_block_size(q, 512);
1140 blk_queue_max_hw_sectors(q, max_hw_sectors);
1141 /* This is the workaround for "bio would need to, but cannot, be split" */
1142 blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS);
1143 blk_queue_segment_boundary(q, PAGE_CACHE_SIZE-1);
1144
1145 if (b) {
1146 struct drbd_connection *connection = first_peer_device(device)->connection;
1147
1148 if (blk_queue_discard(b) &&
1149 (connection->cstate < C_CONNECTED || connection->agreed_features & FF_TRIM)) {
1150 /* For now, don't allow more than one activity log extent worth of data
1151 * to be discarded in one go. We may need to rework drbd_al_begin_io()
1152 * to allow for even larger discard ranges */
1153 q->limits.max_discard_sectors = DRBD_MAX_DISCARD_SECTORS;
1154
1155 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
1156 /* REALLY? Is stacking secdiscard "legal"? */
1157 if (blk_queue_secdiscard(b))
1158 queue_flag_set_unlocked(QUEUE_FLAG_SECDISCARD, q);
1159 } else {
1160 q->limits.max_discard_sectors = 0;
1161 queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q);
1162 queue_flag_clear_unlocked(QUEUE_FLAG_SECDISCARD, q);
1163 }
1164
1165 blk_queue_stack_limits(q, b);
1166
1167 if (q->backing_dev_info.ra_pages != b->backing_dev_info.ra_pages) {
1168 drbd_info(device, "Adjusting my ra_pages to backing device's (%lu -> %lu)\n",
1169 q->backing_dev_info.ra_pages,
1170 b->backing_dev_info.ra_pages);
1171 q->backing_dev_info.ra_pages = b->backing_dev_info.ra_pages;
1172 }
1173 }
1174 }
1175
1176 void drbd_reconsider_max_bio_size(struct drbd_device *device, struct drbd_backing_dev *bdev)
1177 {
1178 unsigned int now, new, local, peer;
1179
1180 now = queue_max_hw_sectors(device->rq_queue) << 9;
1181 local = device->local_max_bio_size; /* Eventually last known value, from volatile memory */
1182 peer = device->peer_max_bio_size; /* Eventually last known value, from meta data */
1183
1184 if (bdev) {
1185 local = queue_max_hw_sectors(bdev->backing_bdev->bd_disk->queue) << 9;
1186 device->local_max_bio_size = local;
1187 }
1188 local = min(local, DRBD_MAX_BIO_SIZE);
1189
1190 /* We may ignore peer limits if the peer is modern enough.
1191 Because new from 8.3.8 onwards the peer can use multiple
1192 BIOs for a single peer_request */
1193 if (device->state.conn >= C_WF_REPORT_PARAMS) {
1194 if (first_peer_device(device)->connection->agreed_pro_version < 94)
1195 peer = min(device->peer_max_bio_size, DRBD_MAX_SIZE_H80_PACKET);
1196 /* Correct old drbd (up to 8.3.7) if it believes it can do more than 32KiB */
1197 else if (first_peer_device(device)->connection->agreed_pro_version == 94)
1198 peer = DRBD_MAX_SIZE_H80_PACKET;
1199 else if (first_peer_device(device)->connection->agreed_pro_version < 100)
1200 peer = DRBD_MAX_BIO_SIZE_P95; /* drbd 8.3.8 onwards, before 8.4.0 */
1201 else
1202 peer = DRBD_MAX_BIO_SIZE;
1203
1204 /* We may later detach and re-attach on a disconnected Primary.
1205 * Avoid this setting to jump back in that case.
1206 * We want to store what we know the peer DRBD can handle,
1207 * not what the peer IO backend can handle. */
1208 if (peer > device->peer_max_bio_size)
1209 device->peer_max_bio_size = peer;
1210 }
1211 new = min(local, peer);
1212
1213 if (device->state.role == R_PRIMARY && new < now)
1214 drbd_err(device, "ASSERT FAILED new < now; (%u < %u)\n", new, now);
1215
1216 if (new != now)
1217 drbd_info(device, "max BIO size = %u\n", new);
1218
1219 drbd_setup_queue_param(device, bdev, new);
1220 }
1221
1222 /* Starts the worker thread */
1223 static void conn_reconfig_start(struct drbd_connection *connection)
1224 {
1225 drbd_thread_start(&connection->worker);
1226 drbd_flush_workqueue(&connection->sender_work);
1227 }
1228
1229 /* if still unconfigured, stops worker again. */
1230 static void conn_reconfig_done(struct drbd_connection *connection)
1231 {
1232 bool stop_threads;
1233 spin_lock_irq(&connection->resource->req_lock);
1234 stop_threads = conn_all_vols_unconf(connection) &&
1235 connection->cstate == C_STANDALONE;
1236 spin_unlock_irq(&connection->resource->req_lock);
1237 if (stop_threads) {
1238 /* asender is implicitly stopped by receiver
1239 * in conn_disconnect() */
1240 drbd_thread_stop(&connection->receiver);
1241 drbd_thread_stop(&connection->worker);
1242 }
1243 }
1244
1245 /* Make sure IO is suspended before calling this function(). */
1246 static void drbd_suspend_al(struct drbd_device *device)
1247 {
1248 int s = 0;
1249
1250 if (!lc_try_lock(device->act_log)) {
1251 drbd_warn(device, "Failed to lock al in drbd_suspend_al()\n");
1252 return;
1253 }
1254
1255 drbd_al_shrink(device);
1256 spin_lock_irq(&device->resource->req_lock);
1257 if (device->state.conn < C_CONNECTED)
1258 s = !test_and_set_bit(AL_SUSPENDED, &device->flags);
1259 spin_unlock_irq(&device->resource->req_lock);
1260 lc_unlock(device->act_log);
1261
1262 if (s)
1263 drbd_info(device, "Suspended AL updates\n");
1264 }
1265
1266
1267 static bool should_set_defaults(struct genl_info *info)
1268 {
1269 unsigned flags = ((struct drbd_genlmsghdr*)info->userhdr)->flags;
1270 return 0 != (flags & DRBD_GENL_F_SET_DEFAULTS);
1271 }
1272
1273 static unsigned int drbd_al_extents_max(struct drbd_backing_dev *bdev)
1274 {
1275 /* This is limited by 16 bit "slot" numbers,
1276 * and by available on-disk context storage.
1277 *
1278 * Also (u16)~0 is special (denotes a "free" extent).
1279 *
1280 * One transaction occupies one 4kB on-disk block,
1281 * we have n such blocks in the on disk ring buffer,
1282 * the "current" transaction may fail (n-1),
1283 * and there is 919 slot numbers context information per transaction.
1284 *
1285 * 72 transaction blocks amounts to more than 2**16 context slots,
1286 * so cap there first.
1287 */
1288 const unsigned int max_al_nr = DRBD_AL_EXTENTS_MAX;
1289 const unsigned int sufficient_on_disk =
1290 (max_al_nr + AL_CONTEXT_PER_TRANSACTION -1)
1291 /AL_CONTEXT_PER_TRANSACTION;
1292
1293 unsigned int al_size_4k = bdev->md.al_size_4k;
1294
1295 if (al_size_4k > sufficient_on_disk)
1296 return max_al_nr;
1297
1298 return (al_size_4k - 1) * AL_CONTEXT_PER_TRANSACTION;
1299 }
1300
1301 static bool write_ordering_changed(struct disk_conf *a, struct disk_conf *b)
1302 {
1303 return a->disk_barrier != b->disk_barrier ||
1304 a->disk_flushes != b->disk_flushes ||
1305 a->disk_drain != b->disk_drain;
1306 }
1307
1308 int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
1309 {
1310 struct drbd_config_context adm_ctx;
1311 enum drbd_ret_code retcode;
1312 struct drbd_device *device;
1313 struct disk_conf *new_disk_conf, *old_disk_conf;
1314 struct fifo_buffer *old_plan = NULL, *new_plan = NULL;
1315 int err, fifo_size;
1316
1317 retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
1318 if (!adm_ctx.reply_skb)
1319 return retcode;
1320 if (retcode != NO_ERROR)
1321 goto finish;
1322
1323 device = adm_ctx.device;
1324 mutex_lock(&adm_ctx.resource->adm_mutex);
1325
1326 /* we also need a disk
1327 * to change the options on */
1328 if (!get_ldev(device)) {
1329 retcode = ERR_NO_DISK;
1330 goto out;
1331 }
1332
1333 new_disk_conf = kmalloc(sizeof(struct disk_conf), GFP_KERNEL);
1334 if (!new_disk_conf) {
1335 retcode = ERR_NOMEM;
1336 goto fail;
1337 }
1338
1339 mutex_lock(&device->resource->conf_update);
1340 old_disk_conf = device->ldev->disk_conf;
1341 *new_disk_conf = *old_disk_conf;
1342 if (should_set_defaults(info))
1343 set_disk_conf_defaults(new_disk_conf);
1344
1345 err = disk_conf_from_attrs_for_change(new_disk_conf, info);
1346 if (err && err != -ENOMSG) {
1347 retcode = ERR_MANDATORY_TAG;
1348 drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
1349 goto fail_unlock;
1350 }
1351
1352 if (!expect(new_disk_conf->resync_rate >= 1))
1353 new_disk_conf->resync_rate = 1;
1354
1355 if (new_disk_conf->al_extents < DRBD_AL_EXTENTS_MIN)
1356 new_disk_conf->al_extents = DRBD_AL_EXTENTS_MIN;
1357 if (new_disk_conf->al_extents > drbd_al_extents_max(device->ldev))
1358 new_disk_conf->al_extents = drbd_al_extents_max(device->ldev);
1359
1360 if (new_disk_conf->c_plan_ahead > DRBD_C_PLAN_AHEAD_MAX)
1361 new_disk_conf->c_plan_ahead = DRBD_C_PLAN_AHEAD_MAX;
1362
1363 fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ;
1364 if (fifo_size != device->rs_plan_s->size) {
1365 new_plan = fifo_alloc(fifo_size);
1366 if (!new_plan) {
1367 drbd_err(device, "kmalloc of fifo_buffer failed");
1368 retcode = ERR_NOMEM;
1369 goto fail_unlock;
1370 }
1371 }
1372
1373 drbd_suspend_io(device);
1374 wait_event(device->al_wait, lc_try_lock(device->act_log));
1375 drbd_al_shrink(device);
1376 err = drbd_check_al_size(device, new_disk_conf);
1377 lc_unlock(device->act_log);
1378 wake_up(&device->al_wait);
1379 drbd_resume_io(device);
1380
1381 if (err) {
1382 retcode = ERR_NOMEM;
1383 goto fail_unlock;
1384 }
1385
1386 write_lock_irq(&global_state_lock);
1387 retcode = drbd_resync_after_valid(device, new_disk_conf->resync_after);
1388 if (retcode == NO_ERROR) {
1389 rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
1390 drbd_resync_after_changed(device);
1391 }
1392 write_unlock_irq(&global_state_lock);
1393
1394 if (retcode != NO_ERROR)
1395 goto fail_unlock;
1396
1397 if (new_plan) {
1398 old_plan = device->rs_plan_s;
1399 rcu_assign_pointer(device->rs_plan_s, new_plan);
1400 }
1401
1402 mutex_unlock(&device->resource->conf_update);
1403
1404 if (new_disk_conf->al_updates)
1405 device->ldev->md.flags &= ~MDF_AL_DISABLED;
1406 else
1407 device->ldev->md.flags |= MDF_AL_DISABLED;
1408
1409 if (new_disk_conf->md_flushes)
1410 clear_bit(MD_NO_FUA, &device->flags);
1411 else
1412 set_bit(MD_NO_FUA, &device->flags);
1413
1414 if (write_ordering_changed(old_disk_conf, new_disk_conf))
1415 drbd_bump_write_ordering(device->resource, NULL, WO_bdev_flush);
1416
1417 drbd_md_sync(device);
1418
1419 if (device->state.conn >= C_CONNECTED) {
1420 struct drbd_peer_device *peer_device;
1421
1422 for_each_peer_device(peer_device, device)
1423 drbd_send_sync_param(peer_device);
1424 }
1425
1426 synchronize_rcu();
1427 kfree(old_disk_conf);
1428 kfree(old_plan);
1429 mod_timer(&device->request_timer, jiffies + HZ);
1430 goto success;
1431
1432 fail_unlock:
1433 mutex_unlock(&device->resource->conf_update);
1434 fail:
1435 kfree(new_disk_conf);
1436 kfree(new_plan);
1437 success:
1438 put_ldev(device);
1439 out:
1440 mutex_unlock(&adm_ctx.resource->adm_mutex);
1441 finish:
1442 drbd_adm_finish(&adm_ctx, info, retcode);
1443 return 0;
1444 }
1445
1446 int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
1447 {
1448 struct drbd_config_context adm_ctx;
1449 struct drbd_device *device;
1450 struct drbd_peer_device *peer_device;
1451 struct drbd_connection *connection;
1452 int err;
1453 enum drbd_ret_code retcode;
1454 enum determine_dev_size dd;
1455 sector_t max_possible_sectors;
1456 sector_t min_md_device_sectors;
1457 struct drbd_backing_dev *nbc = NULL; /* new_backing_conf */
1458 struct disk_conf *new_disk_conf = NULL;
1459 struct block_device *bdev;
1460 struct lru_cache *resync_lru = NULL;
1461 struct fifo_buffer *new_plan = NULL;
1462 union drbd_state ns, os;
1463 enum drbd_state_rv rv;
1464 struct net_conf *nc;
1465
1466 retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
1467 if (!adm_ctx.reply_skb)
1468 return retcode;
1469 if (retcode != NO_ERROR)
1470 goto finish;
1471
1472 device = adm_ctx.device;
1473 mutex_lock(&adm_ctx.resource->adm_mutex);
1474 peer_device = first_peer_device(device);
1475 connection = peer_device ? peer_device->connection : NULL;
1476 conn_reconfig_start(connection);
1477
1478 /* if you want to reconfigure, please tear down first */
1479 if (device->state.disk > D_DISKLESS) {
1480 retcode = ERR_DISK_CONFIGURED;
1481 goto fail;
1482 }
1483 /* It may just now have detached because of IO error. Make sure
1484 * drbd_ldev_destroy is done already, we may end up here very fast,
1485 * e.g. if someone calls attach from the on-io-error handler,
1486 * to realize a "hot spare" feature (not that I'd recommend that) */
1487 wait_event(device->misc_wait, !atomic_read(&device->local_cnt));
1488
1489 /* make sure there is no leftover from previous force-detach attempts */
1490 clear_bit(FORCE_DETACH, &device->flags);
1491 clear_bit(WAS_IO_ERROR, &device->flags);
1492 clear_bit(WAS_READ_ERROR, &device->flags);
1493
1494 /* and no leftover from previously aborted resync or verify, either */
1495 device->rs_total = 0;
1496 device->rs_failed = 0;
1497 atomic_set(&device->rs_pending_cnt, 0);
1498
1499 /* allocation not in the IO path, drbdsetup context */
1500 nbc = kzalloc(sizeof(struct drbd_backing_dev), GFP_KERNEL);
1501 if (!nbc) {
1502 retcode = ERR_NOMEM;
1503 goto fail;
1504 }
1505 spin_lock_init(&nbc->md.uuid_lock);
1506
1507 new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
1508 if (!new_disk_conf) {
1509 retcode = ERR_NOMEM;
1510 goto fail;
1511 }
1512 nbc->disk_conf = new_disk_conf;
1513
1514 set_disk_conf_defaults(new_disk_conf);
1515 err = disk_conf_from_attrs(new_disk_conf, info);
1516 if (err) {
1517 retcode = ERR_MANDATORY_TAG;
1518 drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
1519 goto fail;
1520 }
1521
1522 if (new_disk_conf->c_plan_ahead > DRBD_C_PLAN_AHEAD_MAX)
1523 new_disk_conf->c_plan_ahead = DRBD_C_PLAN_AHEAD_MAX;
1524
1525 new_plan = fifo_alloc((new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ);
1526 if (!new_plan) {
1527 retcode = ERR_NOMEM;
1528 goto fail;
1529 }
1530
1531 if (new_disk_conf->meta_dev_idx < DRBD_MD_INDEX_FLEX_INT) {
1532 retcode = ERR_MD_IDX_INVALID;
1533 goto fail;
1534 }
1535
1536 write_lock_irq(&global_state_lock);
1537 retcode = drbd_resync_after_valid(device, new_disk_conf->resync_after);
1538 write_unlock_irq(&global_state_lock);
1539 if (retcode != NO_ERROR)
1540 goto fail;
1541
1542 rcu_read_lock();
1543 nc = rcu_dereference(connection->net_conf);
1544 if (nc) {
1545 if (new_disk_conf->fencing == FP_STONITH && nc->wire_protocol == DRBD_PROT_A) {
1546 rcu_read_unlock();
1547 retcode = ERR_STONITH_AND_PROT_A;
1548 goto fail;
1549 }
1550 }
1551 rcu_read_unlock();
1552
1553 bdev = blkdev_get_by_path(new_disk_conf->backing_dev,
1554 FMODE_READ | FMODE_WRITE | FMODE_EXCL, device);
1555 if (IS_ERR(bdev)) {
1556 drbd_err(device, "open(\"%s\") failed with %ld\n", new_disk_conf->backing_dev,
1557 PTR_ERR(bdev));
1558 retcode = ERR_OPEN_DISK;
1559 goto fail;
1560 }
1561 nbc->backing_bdev = bdev;
1562
1563 /*
1564 * meta_dev_idx >= 0: external fixed size, possibly multiple
1565 * drbd sharing one meta device. TODO in that case, paranoia
1566 * check that [md_bdev, meta_dev_idx] is not yet used by some
1567 * other drbd minor! (if you use drbd.conf + drbdadm, that
1568 * should check it for you already; but if you don't, or
1569 * someone fooled it, we need to double check here)
1570 */
1571 bdev = blkdev_get_by_path(new_disk_conf->meta_dev,
1572 FMODE_READ | FMODE_WRITE | FMODE_EXCL,
1573 (new_disk_conf->meta_dev_idx < 0) ?
1574 (void *)device : (void *)drbd_m_holder);
1575 if (IS_ERR(bdev)) {
1576 drbd_err(device, "open(\"%s\") failed with %ld\n", new_disk_conf->meta_dev,
1577 PTR_ERR(bdev));
1578 retcode = ERR_OPEN_MD_DISK;
1579 goto fail;
1580 }
1581 nbc->md_bdev = bdev;
1582
1583 if ((nbc->backing_bdev == nbc->md_bdev) !=
1584 (new_disk_conf->meta_dev_idx == DRBD_MD_INDEX_INTERNAL ||
1585 new_disk_conf->meta_dev_idx == DRBD_MD_INDEX_FLEX_INT)) {
1586 retcode = ERR_MD_IDX_INVALID;
1587 goto fail;
1588 }
1589
1590 resync_lru = lc_create("resync", drbd_bm_ext_cache,
1591 1, 61, sizeof(struct bm_extent),
1592 offsetof(struct bm_extent, lce));
1593 if (!resync_lru) {
1594 retcode = ERR_NOMEM;
1595 goto fail;
1596 }
1597
1598 /* Read our meta data super block early.
1599 * This also sets other on-disk offsets. */
1600 retcode = drbd_md_read(device, nbc);
1601 if (retcode != NO_ERROR)
1602 goto fail;
1603
1604 if (new_disk_conf->al_extents < DRBD_AL_EXTENTS_MIN)
1605 new_disk_conf->al_extents = DRBD_AL_EXTENTS_MIN;
1606 if (new_disk_conf->al_extents > drbd_al_extents_max(nbc))
1607 new_disk_conf->al_extents = drbd_al_extents_max(nbc);
1608
1609 if (drbd_get_max_capacity(nbc) < new_disk_conf->disk_size) {
1610 drbd_err(device, "max capacity %llu smaller than disk size %llu\n",
1611 (unsigned long long) drbd_get_max_capacity(nbc),
1612 (unsigned long long) new_disk_conf->disk_size);
1613 retcode = ERR_DISK_TOO_SMALL;
1614 goto fail;
1615 }
1616
1617 if (new_disk_conf->meta_dev_idx < 0) {
1618 max_possible_sectors = DRBD_MAX_SECTORS_FLEX;
1619 /* at least one MB, otherwise it does not make sense */
1620 min_md_device_sectors = (2<<10);
1621 } else {
1622 max_possible_sectors = DRBD_MAX_SECTORS;
1623 min_md_device_sectors = MD_128MB_SECT * (new_disk_conf->meta_dev_idx + 1);
1624 }
1625
1626 if (drbd_get_capacity(nbc->md_bdev) < min_md_device_sectors) {
1627 retcode = ERR_MD_DISK_TOO_SMALL;
1628 drbd_warn(device, "refusing attach: md-device too small, "
1629 "at least %llu sectors needed for this meta-disk type\n",
1630 (unsigned long long) min_md_device_sectors);
1631 goto fail;
1632 }
1633
1634 /* Make sure the new disk is big enough
1635 * (we may currently be R_PRIMARY with no local disk...) */
1636 if (drbd_get_max_capacity(nbc) <
1637 drbd_get_capacity(device->this_bdev)) {
1638 retcode = ERR_DISK_TOO_SMALL;
1639 goto fail;
1640 }
1641
1642 nbc->known_size = drbd_get_capacity(nbc->backing_bdev);
1643
1644 if (nbc->known_size > max_possible_sectors) {
1645 drbd_warn(device, "==> truncating very big lower level device "
1646 "to currently maximum possible %llu sectors <==\n",
1647 (unsigned long long) max_possible_sectors);
1648 if (new_disk_conf->meta_dev_idx >= 0)
1649 drbd_warn(device, "==>> using internal or flexible "
1650 "meta data may help <<==\n");
1651 }
1652
1653 drbd_suspend_io(device);
1654 /* also wait for the last barrier ack. */
1655 /* FIXME see also https://daiquiri.linbit/cgi-bin/bugzilla/show_bug.cgi?id=171
1656 * We need a way to either ignore barrier acks for barriers sent before a device
1657 * was attached, or a way to wait for all pending barrier acks to come in.
1658 * As barriers are counted per resource,
1659 * we'd need to suspend io on all devices of a resource.
1660 */
1661 wait_event(device->misc_wait, !atomic_read(&device->ap_pending_cnt) || drbd_suspended(device));
1662 /* and for any other previously queued work */
1663 drbd_flush_workqueue(&connection->sender_work);
1664
1665 rv = _drbd_request_state(device, NS(disk, D_ATTACHING), CS_VERBOSE);
1666 retcode = rv; /* FIXME: Type mismatch. */
1667 drbd_resume_io(device);
1668 if (rv < SS_SUCCESS)
1669 goto fail;
1670
1671 if (!get_ldev_if_state(device, D_ATTACHING))
1672 goto force_diskless;
1673
1674 if (!device->bitmap) {
1675 if (drbd_bm_init(device)) {
1676 retcode = ERR_NOMEM;
1677 goto force_diskless_dec;
1678 }
1679 }
1680
1681 if (device->state.conn < C_CONNECTED &&
1682 device->state.role == R_PRIMARY && device->ed_uuid &&
1683 (device->ed_uuid & ~((u64)1)) != (nbc->md.uuid[UI_CURRENT] & ~((u64)1))) {
1684 drbd_err(device, "Can only attach to data with current UUID=%016llX\n",
1685 (unsigned long long)device->ed_uuid);
1686 retcode = ERR_DATA_NOT_CURRENT;
1687 goto force_diskless_dec;
1688 }
1689
1690 /* Since we are diskless, fix the activity log first... */
1691 if (drbd_check_al_size(device, new_disk_conf)) {
1692 retcode = ERR_NOMEM;
1693 goto force_diskless_dec;
1694 }
1695
1696 /* Prevent shrinking of consistent devices ! */
1697 if (drbd_md_test_flag(nbc, MDF_CONSISTENT) &&
1698 drbd_new_dev_size(device, nbc, nbc->disk_conf->disk_size, 0) < nbc->md.la_size_sect) {
1699 drbd_warn(device, "refusing to truncate a consistent device\n");
1700 retcode = ERR_DISK_TOO_SMALL;
1701 goto force_diskless_dec;
1702 }
1703
1704 /* Reset the "barriers don't work" bits here, then force meta data to
1705 * be written, to ensure we determine if barriers are supported. */
1706 if (new_disk_conf->md_flushes)
1707 clear_bit(MD_NO_FUA, &device->flags);
1708 else
1709 set_bit(MD_NO_FUA, &device->flags);
1710
1711 /* Point of no return reached.
1712 * Devices and memory are no longer released by error cleanup below.
1713 * now device takes over responsibility, and the state engine should
1714 * clean it up somewhere. */
1715 D_ASSERT(device, device->ldev == NULL);
1716 device->ldev = nbc;
1717 device->resync = resync_lru;
1718 device->rs_plan_s = new_plan;
1719 nbc = NULL;
1720 resync_lru = NULL;
1721 new_disk_conf = NULL;
1722 new_plan = NULL;
1723
1724 drbd_bump_write_ordering(device->resource, device->ldev, WO_bdev_flush);
1725
1726 if (drbd_md_test_flag(device->ldev, MDF_CRASHED_PRIMARY))
1727 set_bit(CRASHED_PRIMARY, &device->flags);
1728 else
1729 clear_bit(CRASHED_PRIMARY, &device->flags);
1730
1731 if (drbd_md_test_flag(device->ldev, MDF_PRIMARY_IND) &&
1732 !(device->state.role == R_PRIMARY && device->resource->susp_nod))
1733 set_bit(CRASHED_PRIMARY, &device->flags);
1734
1735 device->send_cnt = 0;
1736 device->recv_cnt = 0;
1737 device->read_cnt = 0;
1738 device->writ_cnt = 0;
1739
1740 drbd_reconsider_max_bio_size(device, device->ldev);
1741
1742 /* If I am currently not R_PRIMARY,
1743 * but meta data primary indicator is set,
1744 * I just now recover from a hard crash,
1745 * and have been R_PRIMARY before that crash.
1746 *
1747 * Now, if I had no connection before that crash
1748 * (have been degraded R_PRIMARY), chances are that
1749 * I won't find my peer now either.
1750 *
1751 * In that case, and _only_ in that case,
1752 * we use the degr-wfc-timeout instead of the default,
1753 * so we can automatically recover from a crash of a
1754 * degraded but active "cluster" after a certain timeout.
1755 */
1756 clear_bit(USE_DEGR_WFC_T, &device->flags);
1757 if (device->state.role != R_PRIMARY &&
1758 drbd_md_test_flag(device->ldev, MDF_PRIMARY_IND) &&
1759 !drbd_md_test_flag(device->ldev, MDF_CONNECTED_IND))
1760 set_bit(USE_DEGR_WFC_T, &device->flags);
1761
1762 dd = drbd_determine_dev_size(device, 0, NULL);
1763 if (dd <= DS_ERROR) {
1764 retcode = ERR_NOMEM_BITMAP;
1765 goto force_diskless_dec;
1766 } else if (dd == DS_GREW)
1767 set_bit(RESYNC_AFTER_NEG, &device->flags);
1768
1769 if (drbd_md_test_flag(device->ldev, MDF_FULL_SYNC) ||
1770 (test_bit(CRASHED_PRIMARY, &device->flags) &&
1771 drbd_md_test_flag(device->ldev, MDF_AL_DISABLED))) {
1772 drbd_info(device, "Assuming that all blocks are out of sync "
1773 "(aka FullSync)\n");
1774 if (drbd_bitmap_io(device, &drbd_bmio_set_n_write,
1775 "set_n_write from attaching", BM_LOCKED_MASK)) {
1776 retcode = ERR_IO_MD_DISK;
1777 goto force_diskless_dec;
1778 }
1779 } else {
1780 if (drbd_bitmap_io(device, &drbd_bm_read,
1781 "read from attaching", BM_LOCKED_MASK)) {
1782 retcode = ERR_IO_MD_DISK;
1783 goto force_diskless_dec;
1784 }
1785 }
1786
1787 if (_drbd_bm_total_weight(device) == drbd_bm_bits(device))
1788 drbd_suspend_al(device); /* IO is still suspended here... */
1789
1790 spin_lock_irq(&device->resource->req_lock);
1791 os = drbd_read_state(device);
1792 ns = os;
1793 /* If MDF_CONSISTENT is not set go into inconsistent state,
1794 otherwise investigate MDF_WasUpToDate...
1795 If MDF_WAS_UP_TO_DATE is not set go into D_OUTDATED disk state,
1796 otherwise into D_CONSISTENT state.
1797 */
1798 if (drbd_md_test_flag(device->ldev, MDF_CONSISTENT)) {
1799 if (drbd_md_test_flag(device->ldev, MDF_WAS_UP_TO_DATE))
1800 ns.disk = D_CONSISTENT;
1801 else
1802 ns.disk = D_OUTDATED;
1803 } else {
1804 ns.disk = D_INCONSISTENT;
1805 }
1806
1807 if (drbd_md_test_flag(device->ldev, MDF_PEER_OUT_DATED))
1808 ns.pdsk = D_OUTDATED;
1809
1810 rcu_read_lock();
1811 if (ns.disk == D_CONSISTENT &&
1812 (ns.pdsk == D_OUTDATED || rcu_dereference(device->ldev->disk_conf)->fencing == FP_DONT_CARE))
1813 ns.disk = D_UP_TO_DATE;
1814
1815 /* All tests on MDF_PRIMARY_IND, MDF_CONNECTED_IND,
1816 MDF_CONSISTENT and MDF_WAS_UP_TO_DATE must happen before
1817 this point, because drbd_request_state() modifies these
1818 flags. */
1819
1820 if (rcu_dereference(device->ldev->disk_conf)->al_updates)
1821 device->ldev->md.flags &= ~MDF_AL_DISABLED;
1822 else
1823 device->ldev->md.flags |= MDF_AL_DISABLED;
1824
1825 rcu_read_unlock();
1826
1827 /* In case we are C_CONNECTED postpone any decision on the new disk
1828 state after the negotiation phase. */
1829 if (device->state.conn == C_CONNECTED) {
1830 device->new_state_tmp.i = ns.i;
1831 ns.i = os.i;
1832 ns.disk = D_NEGOTIATING;
1833
1834 /* We expect to receive up-to-date UUIDs soon.
1835 To avoid a race in receive_state, free p_uuid while
1836 holding req_lock. I.e. atomic with the state change */
1837 kfree(device->p_uuid);
1838 device->p_uuid = NULL;
1839 }
1840
1841 rv = _drbd_set_state(device, ns, CS_VERBOSE, NULL);
1842 spin_unlock_irq(&device->resource->req_lock);
1843
1844 if (rv < SS_SUCCESS)
1845 goto force_diskless_dec;
1846
1847 mod_timer(&device->request_timer, jiffies + HZ);
1848
1849 if (device->state.role == R_PRIMARY)
1850 device->ldev->md.uuid[UI_CURRENT] |= (u64)1;
1851 else
1852 device->ldev->md.uuid[UI_CURRENT] &= ~(u64)1;
1853
1854 drbd_md_mark_dirty(device);
1855 drbd_md_sync(device);
1856
1857 kobject_uevent(&disk_to_dev(device->vdisk)->kobj, KOBJ_CHANGE);
1858 put_ldev(device);
1859 conn_reconfig_done(connection);
1860 mutex_unlock(&adm_ctx.resource->adm_mutex);
1861 drbd_adm_finish(&adm_ctx, info, retcode);
1862 return 0;
1863
1864 force_diskless_dec:
1865 put_ldev(device);
1866 force_diskless:
1867 drbd_force_state(device, NS(disk, D_DISKLESS));
1868 drbd_md_sync(device);
1869 fail:
1870 conn_reconfig_done(connection);
1871 if (nbc) {
1872 if (nbc->backing_bdev)
1873 blkdev_put(nbc->backing_bdev,
1874 FMODE_READ | FMODE_WRITE | FMODE_EXCL);
1875 if (nbc->md_bdev)
1876 blkdev_put(nbc->md_bdev,
1877 FMODE_READ | FMODE_WRITE | FMODE_EXCL);
1878 kfree(nbc);
1879 }
1880 kfree(new_disk_conf);
1881 lc_destroy(resync_lru);
1882 kfree(new_plan);
1883 mutex_unlock(&adm_ctx.resource->adm_mutex);
1884 finish:
1885 drbd_adm_finish(&adm_ctx, info, retcode);
1886 return 0;
1887 }
1888
1889 static int adm_detach(struct drbd_device *device, int force)
1890 {
1891 enum drbd_state_rv retcode;
1892 int ret;
1893
1894 if (force) {
1895 set_bit(FORCE_DETACH, &device->flags);
1896 drbd_force_state(device, NS(disk, D_FAILED));
1897 retcode = SS_SUCCESS;
1898 goto out;
1899 }
1900
1901 drbd_suspend_io(device); /* so no-one is stuck in drbd_al_begin_io */
1902 drbd_md_get_buffer(device); /* make sure there is no in-flight meta-data IO */
1903 retcode = drbd_request_state(device, NS(disk, D_FAILED));
1904 drbd_md_put_buffer(device);
1905 /* D_FAILED will transition to DISKLESS. */
1906 ret = wait_event_interruptible(device->misc_wait,
1907 device->state.disk != D_FAILED);
1908 drbd_resume_io(device);
1909 if ((int)retcode == (int)SS_IS_DISKLESS)
1910 retcode = SS_NOTHING_TO_DO;
1911 if (ret)
1912 retcode = ERR_INTR;
1913 out:
1914 return retcode;
1915 }
1916
1917 /* Detaching the disk is a process in multiple stages. First we need to lock
1918 * out application IO, in-flight IO, IO stuck in drbd_al_begin_io.
1919 * Then we transition to D_DISKLESS, and wait for put_ldev() to return all
1920 * internal references as well.
1921 * Only then we have finally detached. */
1922 int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info)
1923 {
1924 struct drbd_config_context adm_ctx;
1925 enum drbd_ret_code retcode;
1926 struct detach_parms parms = { };
1927 int err;
1928
1929 retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
1930 if (!adm_ctx.reply_skb)
1931 return retcode;
1932 if (retcode != NO_ERROR)
1933 goto out;
1934
1935 if (info->attrs[DRBD_NLA_DETACH_PARMS]) {
1936 err = detach_parms_from_attrs(&parms, info);
1937 if (err) {
1938 retcode = ERR_MANDATORY_TAG;
1939 drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
1940 goto out;
1941 }
1942 }
1943
1944 mutex_lock(&adm_ctx.resource->adm_mutex);
1945 retcode = adm_detach(adm_ctx.device, parms.force_detach);
1946 mutex_unlock(&adm_ctx.resource->adm_mutex);
1947 out:
1948 drbd_adm_finish(&adm_ctx, info, retcode);
1949 return 0;
1950 }
1951
1952 static bool conn_resync_running(struct drbd_connection *connection)
1953 {
1954 struct drbd_peer_device *peer_device;
1955 bool rv = false;
1956 int vnr;
1957
1958 rcu_read_lock();
1959 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1960 struct drbd_device *device = peer_device->device;
1961 if (device->state.conn == C_SYNC_SOURCE ||
1962 device->state.conn == C_SYNC_TARGET ||
1963 device->state.conn == C_PAUSED_SYNC_S ||
1964 device->state.conn == C_PAUSED_SYNC_T) {
1965 rv = true;
1966 break;
1967 }
1968 }
1969 rcu_read_unlock();
1970
1971 return rv;
1972 }
1973
1974 static bool conn_ov_running(struct drbd_connection *connection)
1975 {
1976 struct drbd_peer_device *peer_device;
1977 bool rv = false;
1978 int vnr;
1979
1980 rcu_read_lock();
1981 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1982 struct drbd_device *device = peer_device->device;
1983 if (device->state.conn == C_VERIFY_S ||
1984 device->state.conn == C_VERIFY_T) {
1985 rv = true;
1986 break;
1987 }
1988 }
1989 rcu_read_unlock();
1990
1991 return rv;
1992 }
1993
1994 static enum drbd_ret_code
1995 _check_net_options(struct drbd_connection *connection, struct net_conf *old_net_conf, struct net_conf *new_net_conf)
1996 {
1997 struct drbd_peer_device *peer_device;
1998 int i;
1999
2000 if (old_net_conf && connection->cstate == C_WF_REPORT_PARAMS && connection->agreed_pro_version < 100) {
2001 if (new_net_conf->wire_protocol != old_net_conf->wire_protocol)
2002 return ERR_NEED_APV_100;
2003
2004 if (new_net_conf->two_primaries != old_net_conf->two_primaries)
2005 return ERR_NEED_APV_100;
2006
2007 if (strcmp(new_net_conf->integrity_alg, old_net_conf->integrity_alg))
2008 return ERR_NEED_APV_100;
2009 }
2010
2011 if (!new_net_conf->two_primaries &&
2012 conn_highest_role(connection) == R_PRIMARY &&
2013 conn_highest_peer(connection) == R_PRIMARY)
2014 return ERR_NEED_ALLOW_TWO_PRI;
2015
2016 if (new_net_conf->two_primaries &&
2017 (new_net_conf->wire_protocol != DRBD_PROT_C))
2018 return ERR_NOT_PROTO_C;
2019
2020 idr_for_each_entry(&connection->peer_devices, peer_device, i) {
2021 struct drbd_device *device = peer_device->device;
2022 if (get_ldev(device)) {
2023 enum drbd_fencing_p fp = rcu_dereference(device->ldev->disk_conf)->fencing;
2024 put_ldev(device);
2025 if (new_net_conf->wire_protocol == DRBD_PROT_A && fp == FP_STONITH)
2026 return ERR_STONITH_AND_PROT_A;
2027 }
2028 if (device->state.role == R_PRIMARY && new_net_conf->discard_my_data)
2029 return ERR_DISCARD_IMPOSSIBLE;
2030 }
2031
2032 if (new_net_conf->on_congestion != OC_BLOCK && new_net_conf->wire_protocol != DRBD_PROT_A)
2033 return ERR_CONG_NOT_PROTO_A;
2034
2035 return NO_ERROR;
2036 }
2037
2038 static enum drbd_ret_code
2039 check_net_options(struct drbd_connection *connection, struct net_conf *new_net_conf)
2040 {
2041 static enum drbd_ret_code rv;
2042 struct drbd_peer_device *peer_device;
2043 int i;
2044
2045 rcu_read_lock();
2046 rv = _check_net_options(connection, rcu_dereference(connection->net_conf), new_net_conf);
2047 rcu_read_unlock();
2048
2049 /* connection->volumes protected by genl_lock() here */
2050 idr_for_each_entry(&connection->peer_devices, peer_device, i) {
2051 struct drbd_device *device = peer_device->device;
2052 if (!device->bitmap) {
2053 if (drbd_bm_init(device))
2054 return ERR_NOMEM;
2055 }
2056 }
2057
2058 return rv;
2059 }
2060
2061 struct crypto {
2062 struct crypto_hash *verify_tfm;
2063 struct crypto_hash *csums_tfm;
2064 struct crypto_hash *cram_hmac_tfm;
2065 struct crypto_hash *integrity_tfm;
2066 };
2067
2068 static int
2069 alloc_hash(struct crypto_hash **tfm, char *tfm_name, int err_alg)
2070 {
2071 if (!tfm_name[0])
2072 return NO_ERROR;
2073
2074 *tfm = crypto_alloc_hash(tfm_name, 0, CRYPTO_ALG_ASYNC);
2075 if (IS_ERR(*tfm)) {
2076 *tfm = NULL;
2077 return err_alg;
2078 }
2079
2080 return NO_ERROR;
2081 }
2082
2083 static enum drbd_ret_code
2084 alloc_crypto(struct crypto *crypto, struct net_conf *new_net_conf)
2085 {
2086 char hmac_name[CRYPTO_MAX_ALG_NAME];
2087 enum drbd_ret_code rv;
2088
2089 rv = alloc_hash(&crypto->csums_tfm, new_net_conf->csums_alg,
2090 ERR_CSUMS_ALG);
2091 if (rv != NO_ERROR)
2092 return rv;
2093 rv = alloc_hash(&crypto->verify_tfm, new_net_conf->verify_alg,
2094 ERR_VERIFY_ALG);
2095 if (rv != NO_ERROR)
2096 return rv;
2097 rv = alloc_hash(&crypto->integrity_tfm, new_net_conf->integrity_alg,
2098 ERR_INTEGRITY_ALG);
2099 if (rv != NO_ERROR)
2100 return rv;
2101 if (new_net_conf->cram_hmac_alg[0] != 0) {
2102 snprintf(hmac_name, CRYPTO_MAX_ALG_NAME, "hmac(%s)",
2103 new_net_conf->cram_hmac_alg);
2104
2105 rv = alloc_hash(&crypto->cram_hmac_tfm, hmac_name,
2106 ERR_AUTH_ALG);
2107 }
2108
2109 return rv;
2110 }
2111
2112 static void free_crypto(struct crypto *crypto)
2113 {
2114 crypto_free_hash(crypto->cram_hmac_tfm);
2115 crypto_free_hash(crypto->integrity_tfm);
2116 crypto_free_hash(crypto->csums_tfm);
2117 crypto_free_hash(crypto->verify_tfm);
2118 }
2119
2120 int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info)
2121 {
2122 struct drbd_config_context adm_ctx;
2123 enum drbd_ret_code retcode;
2124 struct drbd_connection *connection;
2125 struct net_conf *old_net_conf, *new_net_conf = NULL;
2126 int err;
2127 int ovr; /* online verify running */
2128 int rsr; /* re-sync running */
2129 struct crypto crypto = { };
2130
2131 retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_CONNECTION);
2132 if (!adm_ctx.reply_skb)
2133 return retcode;
2134 if (retcode != NO_ERROR)
2135 goto finish;
2136
2137 connection = adm_ctx.connection;
2138 mutex_lock(&adm_ctx.resource->adm_mutex);
2139
2140 new_net_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL);
2141 if (!new_net_conf) {
2142 retcode = ERR_NOMEM;
2143 goto out;
2144 }
2145
2146 conn_reconfig_start(connection);
2147
2148 mutex_lock(&connection->data.mutex);
2149 mutex_lock(&connection->resource->conf_update);
2150 old_net_conf = connection->net_conf;
2151
2152 if (!old_net_conf) {
2153 drbd_msg_put_info(adm_ctx.reply_skb, "net conf missing, try connect");
2154 retcode = ERR_INVALID_REQUEST;
2155 goto fail;
2156 }
2157
2158 *new_net_conf = *old_net_conf;
2159 if (should_set_defaults(info))
2160 set_net_conf_defaults(new_net_conf);
2161
2162 err = net_conf_from_attrs_for_change(new_net_conf, info);
2163 if (err && err != -ENOMSG) {
2164 retcode = ERR_MANDATORY_TAG;
2165 drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
2166 goto fail;
2167 }
2168
2169 retcode = check_net_options(connection, new_net_conf);
2170 if (retcode != NO_ERROR)
2171 goto fail;
2172
2173 /* re-sync running */
2174 rsr = conn_resync_running(connection);
2175 if (rsr && strcmp(new_net_conf->csums_alg, old_net_conf->csums_alg)) {
2176 retcode = ERR_CSUMS_RESYNC_RUNNING;
2177 goto fail;
2178 }
2179
2180 /* online verify running */
2181 ovr = conn_ov_running(connection);
2182 if (ovr && strcmp(new_net_conf->verify_alg, old_net_conf->verify_alg)) {
2183 retcode = ERR_VERIFY_RUNNING;
2184 goto fail;
2185 }
2186
2187 retcode = alloc_crypto(&crypto, new_net_conf);
2188 if (retcode != NO_ERROR)
2189 goto fail;
2190
2191 rcu_assign_pointer(connection->net_conf, new_net_conf);
2192
2193 if (!rsr) {
2194 crypto_free_hash(connection->csums_tfm);
2195 connection->csums_tfm = crypto.csums_tfm;
2196 crypto.csums_tfm = NULL;
2197 }
2198 if (!ovr) {
2199 crypto_free_hash(connection->verify_tfm);
2200 connection->verify_tfm = crypto.verify_tfm;
2201 crypto.verify_tfm = NULL;
2202 }
2203
2204 crypto_free_hash(connection->integrity_tfm);
2205 connection->integrity_tfm = crypto.integrity_tfm;
2206 if (connection->cstate >= C_WF_REPORT_PARAMS && connection->agreed_pro_version >= 100)
2207 /* Do this without trying to take connection->data.mutex again. */
2208 __drbd_send_protocol(connection, P_PROTOCOL_UPDATE);
2209
2210 crypto_free_hash(connection->cram_hmac_tfm);
2211 connection->cram_hmac_tfm = crypto.cram_hmac_tfm;
2212
2213 mutex_unlock(&connection->resource->conf_update);
2214 mutex_unlock(&connection->data.mutex);
2215 synchronize_rcu();
2216 kfree(old_net_conf);
2217
2218 if (connection->cstate >= C_WF_REPORT_PARAMS) {
2219 struct drbd_peer_device *peer_device;
2220 int vnr;
2221
2222 idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
2223 drbd_send_sync_param(peer_device);
2224 }
2225
2226 goto done;
2227
2228 fail:
2229 mutex_unlock(&connection->resource->conf_update);
2230 mutex_unlock(&connection->data.mutex);
2231 free_crypto(&crypto);
2232 kfree(new_net_conf);
2233 done:
2234 conn_reconfig_done(connection);
2235 out:
2236 mutex_unlock(&adm_ctx.resource->adm_mutex);
2237 finish:
2238 drbd_adm_finish(&adm_ctx, info, retcode);
2239 return 0;
2240 }
2241
2242 int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
2243 {
2244 struct drbd_config_context adm_ctx;
2245 struct drbd_peer_device *peer_device;
2246 struct net_conf *old_net_conf, *new_net_conf = NULL;
2247 struct crypto crypto = { };
2248 struct drbd_resource *resource;
2249 struct drbd_connection *connection;
2250 enum drbd_ret_code retcode;
2251 int i;
2252 int err;
2253
2254 retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
2255
2256 if (!adm_ctx.reply_skb)
2257 return retcode;
2258 if (retcode != NO_ERROR)
2259 goto out;
2260 if (!(adm_ctx.my_addr && adm_ctx.peer_addr)) {
2261 drbd_msg_put_info(adm_ctx.reply_skb, "connection endpoint(s) missing");
2262 retcode = ERR_INVALID_REQUEST;
2263 goto out;
2264 }
2265
2266 /* No need for _rcu here. All reconfiguration is
2267 * strictly serialized on genl_lock(). We are protected against
2268 * concurrent reconfiguration/addition/deletion */
2269 for_each_resource(resource, &drbd_resources) {
2270 for_each_connection(connection, resource) {
2271 if (nla_len(adm_ctx.my_addr) == connection->my_addr_len &&
2272 !memcmp(nla_data(adm_ctx.my_addr), &connection->my_addr,
2273 connection->my_addr_len)) {
2274 retcode = ERR_LOCAL_ADDR;
2275 goto out;
2276 }
2277
2278 if (nla_len(adm_ctx.peer_addr) == connection->peer_addr_len &&
2279 !memcmp(nla_data(adm_ctx.peer_addr), &connection->peer_addr,
2280 connection->peer_addr_len)) {
2281 retcode = ERR_PEER_ADDR;
2282 goto out;
2283 }
2284 }
2285 }
2286
2287 mutex_lock(&adm_ctx.resource->adm_mutex);
2288 connection = first_connection(adm_ctx.resource);
2289 conn_reconfig_start(connection);
2290
2291 if (connection->cstate > C_STANDALONE) {
2292 retcode = ERR_NET_CONFIGURED;
2293 goto fail;
2294 }
2295
2296 /* allocation not in the IO path, drbdsetup / netlink process context */
2297 new_net_conf = kzalloc(sizeof(*new_net_conf), GFP_KERNEL);
2298 if (!new_net_conf) {
2299 retcode = ERR_NOMEM;
2300 goto fail;
2301 }
2302
2303 set_net_conf_defaults(new_net_conf);
2304
2305 err = net_conf_from_attrs(new_net_conf, info);
2306 if (err && err != -ENOMSG) {
2307 retcode = ERR_MANDATORY_TAG;
2308 drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
2309 goto fail;
2310 }
2311
2312 retcode = check_net_options(connection, new_net_conf);
2313 if (retcode != NO_ERROR)
2314 goto fail;
2315
2316 retcode = alloc_crypto(&crypto, new_net_conf);
2317 if (retcode != NO_ERROR)
2318 goto fail;
2319
2320 ((char *)new_net_conf->shared_secret)[SHARED_SECRET_MAX-1] = 0;
2321
2322 drbd_flush_workqueue(&connection->sender_work);
2323
2324 mutex_lock(&adm_ctx.resource->conf_update);
2325 old_net_conf = connection->net_conf;
2326 if (old_net_conf) {
2327 retcode = ERR_NET_CONFIGURED;
2328 mutex_unlock(&adm_ctx.resource->conf_update);
2329 goto fail;
2330 }
2331 rcu_assign_pointer(connection->net_conf, new_net_conf);
2332
2333 conn_free_crypto(connection);
2334 connection->cram_hmac_tfm = crypto.cram_hmac_tfm;
2335 connection->integrity_tfm = crypto.integrity_tfm;
2336 connection->csums_tfm = crypto.csums_tfm;
2337 connection->verify_tfm = crypto.verify_tfm;
2338
2339 connection->my_addr_len = nla_len(adm_ctx.my_addr);
2340 memcpy(&connection->my_addr, nla_data(adm_ctx.my_addr), connection->my_addr_len);
2341 connection->peer_addr_len = nla_len(adm_ctx.peer_addr);
2342 memcpy(&connection->peer_addr, nla_data(adm_ctx.peer_addr), connection->peer_addr_len);
2343
2344 mutex_unlock(&adm_ctx.resource->conf_update);
2345
2346 rcu_read_lock();
2347 idr_for_each_entry(&connection->peer_devices, peer_device, i) {
2348 struct drbd_device *device = peer_device->device;
2349 device->send_cnt = 0;
2350 device->recv_cnt = 0;
2351 }
2352 rcu_read_unlock();
2353
2354 retcode = conn_request_state(connection, NS(conn, C_UNCONNECTED), CS_VERBOSE);
2355
2356 conn_reconfig_done(connection);
2357 mutex_unlock(&adm_ctx.resource->adm_mutex);
2358 drbd_adm_finish(&adm_ctx, info, retcode);
2359 return 0;
2360
2361 fail:
2362 free_crypto(&crypto);
2363 kfree(new_net_conf);
2364
2365 conn_reconfig_done(connection);
2366 mutex_unlock(&adm_ctx.resource->adm_mutex);
2367 out:
2368 drbd_adm_finish(&adm_ctx, info, retcode);
2369 return 0;
2370 }
2371
2372 static enum drbd_state_rv conn_try_disconnect(struct drbd_connection *connection, bool force)
2373 {
2374 enum drbd_state_rv rv;
2375
2376 rv = conn_request_state(connection, NS(conn, C_DISCONNECTING),
2377 force ? CS_HARD : 0);
2378
2379 switch (rv) {
2380 case SS_NOTHING_TO_DO:
2381 break;
2382 case SS_ALREADY_STANDALONE:
2383 return SS_SUCCESS;
2384 case SS_PRIMARY_NOP:
2385 /* Our state checking code wants to see the peer outdated. */
2386 rv = conn_request_state(connection, NS2(conn, C_DISCONNECTING, pdsk, D_OUTDATED), 0);
2387
2388 if (rv == SS_OUTDATE_WO_CONN) /* lost connection before graceful disconnect succeeded */
2389 rv = conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_VERBOSE);
2390
2391 break;
2392 case SS_CW_FAILED_BY_PEER:
2393 /* The peer probably wants to see us outdated. */
2394 rv = conn_request_state(connection, NS2(conn, C_DISCONNECTING,
2395 disk, D_OUTDATED), 0);
2396 if (rv == SS_IS_DISKLESS || rv == SS_LOWER_THAN_OUTDATED) {
2397 rv = conn_request_state(connection, NS(conn, C_DISCONNECTING),
2398 CS_HARD);
2399 }
2400 break;
2401 default:;
2402 /* no special handling necessary */
2403 }
2404
2405 if (rv >= SS_SUCCESS) {
2406 enum drbd_state_rv rv2;
2407 /* No one else can reconfigure the network while I am here.
2408 * The state handling only uses drbd_thread_stop_nowait(),
2409 * we want to really wait here until the receiver is no more.
2410 */
2411 drbd_thread_stop(&connection->receiver);
2412
2413 /* Race breaker. This additional state change request may be
2414 * necessary, if this was a forced disconnect during a receiver
2415 * restart. We may have "killed" the receiver thread just
2416 * after drbd_receiver() returned. Typically, we should be
2417 * C_STANDALONE already, now, and this becomes a no-op.
2418 */
2419 rv2 = conn_request_state(connection, NS(conn, C_STANDALONE),
2420 CS_VERBOSE | CS_HARD);
2421 if (rv2 < SS_SUCCESS)
2422 drbd_err(connection,
2423 "unexpected rv2=%d in conn_try_disconnect()\n",
2424 rv2);
2425 }
2426 return rv;
2427 }
2428
2429 int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info)
2430 {
2431 struct drbd_config_context adm_ctx;
2432 struct disconnect_parms parms;
2433 struct drbd_connection *connection;
2434 enum drbd_state_rv rv;
2435 enum drbd_ret_code retcode;
2436 int err;
2437
2438 retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_CONNECTION);
2439 if (!adm_ctx.reply_skb)
2440 return retcode;
2441 if (retcode != NO_ERROR)
2442 goto fail;
2443
2444 connection = adm_ctx.connection;
2445 memset(&parms, 0, sizeof(parms));
2446 if (info->attrs[DRBD_NLA_DISCONNECT_PARMS]) {
2447 err = disconnect_parms_from_attrs(&parms, info);
2448 if (err) {
2449 retcode = ERR_MANDATORY_TAG;
2450 drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
2451 goto fail;
2452 }
2453 }
2454
2455 mutex_lock(&adm_ctx.resource->adm_mutex);
2456 rv = conn_try_disconnect(connection, parms.force_disconnect);
2457 if (rv < SS_SUCCESS)
2458 retcode = rv; /* FIXME: Type mismatch. */
2459 else
2460 retcode = NO_ERROR;
2461 mutex_unlock(&adm_ctx.resource->adm_mutex);
2462 fail:
2463 drbd_adm_finish(&adm_ctx, info, retcode);
2464 return 0;
2465 }
2466
2467 void resync_after_online_grow(struct drbd_device *device)
2468 {
2469 int iass; /* I am sync source */
2470
2471 drbd_info(device, "Resync of new storage after online grow\n");
2472 if (device->state.role != device->state.peer)
2473 iass = (device->state.role == R_PRIMARY);
2474 else
2475 iass = test_bit(RESOLVE_CONFLICTS, &first_peer_device(device)->connection->flags);
2476
2477 if (iass)
2478 drbd_start_resync(device, C_SYNC_SOURCE);
2479 else
2480 _drbd_request_state(device, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE + CS_SERIALIZE);
2481 }
2482
2483 int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
2484 {
2485 struct drbd_config_context adm_ctx;
2486 struct disk_conf *old_disk_conf, *new_disk_conf = NULL;
2487 struct resize_parms rs;
2488 struct drbd_device *device;
2489 enum drbd_ret_code retcode;
2490 enum determine_dev_size dd;
2491 bool change_al_layout = false;
2492 enum dds_flags ddsf;
2493 sector_t u_size;
2494 int err;
2495
2496 retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
2497 if (!adm_ctx.reply_skb)
2498 return retcode;
2499 if (retcode != NO_ERROR)
2500 goto finish;
2501
2502 mutex_lock(&adm_ctx.resource->adm_mutex);
2503 device = adm_ctx.device;
2504 if (!get_ldev(device)) {
2505 retcode = ERR_NO_DISK;
2506 goto fail;
2507 }
2508
2509 memset(&rs, 0, sizeof(struct resize_parms));
2510 rs.al_stripes = device->ldev->md.al_stripes;
2511 rs.al_stripe_size = device->ldev->md.al_stripe_size_4k * 4;
2512 if (info->attrs[DRBD_NLA_RESIZE_PARMS]) {
2513 err = resize_parms_from_attrs(&rs, info);
2514 if (err) {
2515 retcode = ERR_MANDATORY_TAG;
2516 drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
2517 goto fail_ldev;
2518 }
2519 }
2520
2521 if (device->state.conn > C_CONNECTED) {
2522 retcode = ERR_RESIZE_RESYNC;
2523 goto fail_ldev;
2524 }
2525
2526 if (device->state.role == R_SECONDARY &&
2527 device->state.peer == R_SECONDARY) {
2528 retcode = ERR_NO_PRIMARY;
2529 goto fail_ldev;
2530 }
2531
2532 if (rs.no_resync && first_peer_device(device)->connection->agreed_pro_version < 93) {
2533 retcode = ERR_NEED_APV_93;
2534 goto fail_ldev;
2535 }
2536
2537 rcu_read_lock();
2538 u_size = rcu_dereference(device->ldev->disk_conf)->disk_size;
2539 rcu_read_unlock();
2540 if (u_size != (sector_t)rs.resize_size) {
2541 new_disk_conf = kmalloc(sizeof(struct disk_conf), GFP_KERNEL);
2542 if (!new_disk_conf) {
2543 retcode = ERR_NOMEM;
2544 goto fail_ldev;
2545 }
2546 }
2547
2548 if (device->ldev->md.al_stripes != rs.al_stripes ||
2549 device->ldev->md.al_stripe_size_4k != rs.al_stripe_size / 4) {
2550 u32 al_size_k = rs.al_stripes * rs.al_stripe_size;
2551
2552 if (al_size_k > (16 * 1024 * 1024)) {
2553 retcode = ERR_MD_LAYOUT_TOO_BIG;
2554 goto fail_ldev;
2555 }
2556
2557 if (al_size_k < MD_32kB_SECT/2) {
2558 retcode = ERR_MD_LAYOUT_TOO_SMALL;
2559 goto fail_ldev;
2560 }
2561
2562 if (device->state.conn != C_CONNECTED && !rs.resize_force) {
2563 retcode = ERR_MD_LAYOUT_CONNECTED;
2564 goto fail_ldev;
2565 }
2566
2567 change_al_layout = true;
2568 }
2569
2570 if (device->ldev->known_size != drbd_get_capacity(device->ldev->backing_bdev))
2571 device->ldev->known_size = drbd_get_capacity(device->ldev->backing_bdev);
2572
2573 if (new_disk_conf) {
2574 mutex_lock(&device->resource->conf_update);
2575 old_disk_conf = device->ldev->disk_conf;
2576 *new_disk_conf = *old_disk_conf;
2577 new_disk_conf->disk_size = (sector_t)rs.resize_size;
2578 rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
2579 mutex_unlock(&device->resource->conf_update);
2580 synchronize_rcu();
2581 kfree(old_disk_conf);
2582 }
2583
2584 ddsf = (rs.resize_force ? DDSF_FORCED : 0) | (rs.no_resync ? DDSF_NO_RESYNC : 0);
2585 dd = drbd_determine_dev_size(device, ddsf, change_al_layout ? &rs : NULL);
2586 drbd_md_sync(device);
2587 put_ldev(device);
2588 if (dd == DS_ERROR) {
2589 retcode = ERR_NOMEM_BITMAP;
2590 goto fail;
2591 } else if (dd == DS_ERROR_SPACE_MD) {
2592 retcode = ERR_MD_LAYOUT_NO_FIT;
2593 goto fail;
2594 } else if (dd == DS_ERROR_SHRINK) {
2595 retcode = ERR_IMPLICIT_SHRINK;
2596 goto fail;
2597 }
2598
2599 if (device->state.conn == C_CONNECTED) {
2600 if (dd == DS_GREW)
2601 set_bit(RESIZE_PENDING, &device->flags);
2602
2603 drbd_send_uuids(first_peer_device(device));
2604 drbd_send_sizes(first_peer_device(device), 1, ddsf);
2605 }
2606
2607 fail:
2608 mutex_unlock(&adm_ctx.resource->adm_mutex);
2609 finish:
2610 drbd_adm_finish(&adm_ctx, info, retcode);
2611 return 0;
2612
2613 fail_ldev:
2614 put_ldev(device);
2615 goto fail;
2616 }
2617
2618 int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info)
2619 {
2620 struct drbd_config_context adm_ctx;
2621 enum drbd_ret_code retcode;
2622 struct res_opts res_opts;
2623 int err;
2624
2625 retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
2626 if (!adm_ctx.reply_skb)
2627 return retcode;
2628 if (retcode != NO_ERROR)
2629 goto fail;
2630
2631 res_opts = adm_ctx.resource->res_opts;
2632 if (should_set_defaults(info))
2633 set_res_opts_defaults(&res_opts);
2634
2635 err = res_opts_from_attrs(&res_opts, info);
2636 if (err && err != -ENOMSG) {
2637 retcode = ERR_MANDATORY_TAG;
2638 drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
2639 goto fail;
2640 }
2641
2642 mutex_lock(&adm_ctx.resource->adm_mutex);
2643 err = set_resource_options(adm_ctx.resource, &res_opts);
2644 if (err) {
2645 retcode = ERR_INVALID_REQUEST;
2646 if (err == -ENOMEM)
2647 retcode = ERR_NOMEM;
2648 }
2649 mutex_unlock(&adm_ctx.resource->adm_mutex);
2650
2651 fail:
2652 drbd_adm_finish(&adm_ctx, info, retcode);
2653 return 0;
2654 }
2655
2656 int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info)
2657 {
2658 struct drbd_config_context adm_ctx;
2659 struct drbd_device *device;
2660 int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
2661
2662 retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
2663 if (!adm_ctx.reply_skb)
2664 return retcode;
2665 if (retcode != NO_ERROR)
2666 goto out;
2667
2668 device = adm_ctx.device;
2669 if (!get_ldev(device)) {
2670 retcode = ERR_NO_DISK;
2671 goto out;
2672 }
2673
2674 mutex_lock(&adm_ctx.resource->adm_mutex);
2675
2676 /* If there is still bitmap IO pending, probably because of a previous
2677 * resync just being finished, wait for it before requesting a new resync.
2678 * Also wait for it's after_state_ch(). */
2679 drbd_suspend_io(device);
2680 wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags));
2681 drbd_flush_workqueue(&first_peer_device(device)->connection->sender_work);
2682
2683 /* If we happen to be C_STANDALONE R_SECONDARY, just change to
2684 * D_INCONSISTENT, and set all bits in the bitmap. Otherwise,
2685 * try to start a resync handshake as sync target for full sync.
2686 */
2687 if (device->state.conn == C_STANDALONE && device->state.role == R_SECONDARY) {
2688 retcode = drbd_request_state(device, NS(disk, D_INCONSISTENT));
2689 if (retcode >= SS_SUCCESS) {
2690 if (drbd_bitmap_io(device, &drbd_bmio_set_n_write,
2691 "set_n_write from invalidate", BM_LOCKED_MASK))
2692 retcode = ERR_IO_MD_DISK;
2693 }
2694 } else
2695 retcode = drbd_request_state(device, NS(conn, C_STARTING_SYNC_T));
2696 drbd_resume_io(device);
2697 mutex_unlock(&adm_ctx.resource->adm_mutex);
2698 put_ldev(device);
2699 out:
2700 drbd_adm_finish(&adm_ctx, info, retcode);
2701 return 0;
2702 }
2703
2704 static int drbd_adm_simple_request_state(struct sk_buff *skb, struct genl_info *info,
2705 union drbd_state mask, union drbd_state val)
2706 {
2707 struct drbd_config_context adm_ctx;
2708 enum drbd_ret_code retcode;
2709
2710 retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
2711 if (!adm_ctx.reply_skb)
2712 return retcode;
2713 if (retcode != NO_ERROR)
2714 goto out;
2715
2716 mutex_lock(&adm_ctx.resource->adm_mutex);
2717 retcode = drbd_request_state(adm_ctx.device, mask, val);
2718 mutex_unlock(&adm_ctx.resource->adm_mutex);
2719 out:
2720 drbd_adm_finish(&adm_ctx, info, retcode);
2721 return 0;
2722 }
2723
2724 static int drbd_bmio_set_susp_al(struct drbd_device *device) __must_hold(local)
2725 {
2726 int rv;
2727
2728 rv = drbd_bmio_set_n_write(device);
2729 drbd_suspend_al(device);
2730 return rv;
2731 }
2732
2733 int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info)
2734 {
2735 struct drbd_config_context adm_ctx;
2736 int retcode; /* drbd_ret_code, drbd_state_rv */
2737 struct drbd_device *device;
2738
2739 retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
2740 if (!adm_ctx.reply_skb)
2741 return retcode;
2742 if (retcode != NO_ERROR)
2743 goto out;
2744
2745 device = adm_ctx.device;
2746 if (!get_ldev(device)) {
2747 retcode = ERR_NO_DISK;
2748 goto out;
2749 }
2750
2751 mutex_lock(&adm_ctx.resource->adm_mutex);
2752
2753 /* If there is still bitmap IO pending, probably because of a previous
2754 * resync just being finished, wait for it before requesting a new resync.
2755 * Also wait for it's after_state_ch(). */
2756 drbd_suspend_io(device);
2757 wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags));
2758 drbd_flush_workqueue(&first_peer_device(device)->connection->sender_work);
2759
2760 /* If we happen to be C_STANDALONE R_PRIMARY, just set all bits
2761 * in the bitmap. Otherwise, try to start a resync handshake
2762 * as sync source for full sync.
2763 */
2764 if (device->state.conn == C_STANDALONE && device->state.role == R_PRIMARY) {
2765 /* The peer will get a resync upon connect anyways. Just make that
2766 into a full resync. */
2767 retcode = drbd_request_state(device, NS(pdsk, D_INCONSISTENT));
2768 if (retcode >= SS_SUCCESS) {
2769 if (drbd_bitmap_io(device, &drbd_bmio_set_susp_al,
2770 "set_n_write from invalidate_peer",
2771 BM_LOCKED_SET_ALLOWED))
2772 retcode = ERR_IO_MD_DISK;
2773 }
2774 } else
2775 retcode = drbd_request_state(device, NS(conn, C_STARTING_SYNC_S));
2776 drbd_resume_io(device);
2777 mutex_unlock(&adm_ctx.resource->adm_mutex);
2778 put_ldev(device);
2779 out:
2780 drbd_adm_finish(&adm_ctx, info, retcode);
2781 return 0;
2782 }
2783
2784 int drbd_adm_pause_sync(struct sk_buff *skb, struct genl_info *info)
2785 {
2786 struct drbd_config_context adm_ctx;
2787 enum drbd_ret_code retcode;
2788
2789 retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
2790 if (!adm_ctx.reply_skb)
2791 return retcode;
2792 if (retcode != NO_ERROR)
2793 goto out;
2794
2795 mutex_lock(&adm_ctx.resource->adm_mutex);
2796 if (drbd_request_state(adm_ctx.device, NS(user_isp, 1)) == SS_NOTHING_TO_DO)
2797 retcode = ERR_PAUSE_IS_SET;
2798 mutex_unlock(&adm_ctx.resource->adm_mutex);
2799 out:
2800 drbd_adm_finish(&adm_ctx, info, retcode);
2801 return 0;
2802 }
2803
2804 int drbd_adm_resume_sync(struct sk_buff *skb, struct genl_info *info)
2805 {
2806 struct drbd_config_context adm_ctx;
2807 union drbd_dev_state s;
2808 enum drbd_ret_code retcode;
2809
2810 retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
2811 if (!adm_ctx.reply_skb)
2812 return retcode;
2813 if (retcode != NO_ERROR)
2814 goto out;
2815
2816 mutex_lock(&adm_ctx.resource->adm_mutex);
2817 if (drbd_request_state(adm_ctx.device, NS(user_isp, 0)) == SS_NOTHING_TO_DO) {
2818 s = adm_ctx.device->state;
2819 if (s.conn == C_PAUSED_SYNC_S || s.conn == C_PAUSED_SYNC_T) {
2820 retcode = s.aftr_isp ? ERR_PIC_AFTER_DEP :
2821 s.peer_isp ? ERR_PIC_PEER_DEP : ERR_PAUSE_IS_CLEAR;
2822 } else {
2823 retcode = ERR_PAUSE_IS_CLEAR;
2824 }
2825 }
2826 mutex_unlock(&adm_ctx.resource->adm_mutex);
2827 out:
2828 drbd_adm_finish(&adm_ctx, info, retcode);
2829 return 0;
2830 }
2831
2832 int drbd_adm_suspend_io(struct sk_buff *skb, struct genl_info *info)
2833 {
2834 return drbd_adm_simple_request_state(skb, info, NS(susp, 1));
2835 }
2836
2837 int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info)
2838 {
2839 struct drbd_config_context adm_ctx;
2840 struct drbd_device *device;
2841 int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
2842
2843 retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
2844 if (!adm_ctx.reply_skb)
2845 return retcode;
2846 if (retcode != NO_ERROR)
2847 goto out;
2848
2849 mutex_lock(&adm_ctx.resource->adm_mutex);
2850 device = adm_ctx.device;
2851 if (test_bit(NEW_CUR_UUID, &device->flags)) {
2852 drbd_uuid_new_current(device);
2853 clear_bit(NEW_CUR_UUID, &device->flags);
2854 }
2855 drbd_suspend_io(device);
2856 retcode = drbd_request_state(device, NS3(susp, 0, susp_nod, 0, susp_fen, 0));
2857 if (retcode == SS_SUCCESS) {
2858 if (device->state.conn < C_CONNECTED)
2859 tl_clear(first_peer_device(device)->connection);
2860 if (device->state.disk == D_DISKLESS || device->state.disk == D_FAILED)
2861 tl_restart(first_peer_device(device)->connection, FAIL_FROZEN_DISK_IO);
2862 }
2863 drbd_resume_io(device);
2864 mutex_unlock(&adm_ctx.resource->adm_mutex);
2865 out:
2866 drbd_adm_finish(&adm_ctx, info, retcode);
2867 return 0;
2868 }
2869
2870 int drbd_adm_outdate(struct sk_buff *skb, struct genl_info *info)
2871 {
2872 return drbd_adm_simple_request_state(skb, info, NS(disk, D_OUTDATED));
2873 }
2874
2875 static int nla_put_drbd_cfg_context(struct sk_buff *skb,
2876 struct drbd_resource *resource,
2877 struct drbd_connection *connection,
2878 struct drbd_device *device)
2879 {
2880 struct nlattr *nla;
2881 nla = nla_nest_start(skb, DRBD_NLA_CFG_CONTEXT);
2882 if (!nla)
2883 goto nla_put_failure;
2884 if (device &&
2885 nla_put_u32(skb, T_ctx_volume, device->vnr))
2886 goto nla_put_failure;
2887 if (nla_put_string(skb, T_ctx_resource_name, resource->name))
2888 goto nla_put_failure;
2889 if (connection) {
2890 if (connection->my_addr_len &&
2891 nla_put(skb, T_ctx_my_addr, connection->my_addr_len, &connection->my_addr))
2892 goto nla_put_failure;
2893 if (connection->peer_addr_len &&
2894 nla_put(skb, T_ctx_peer_addr, connection->peer_addr_len, &connection->peer_addr))
2895 goto nla_put_failure;
2896 }
2897 nla_nest_end(skb, nla);
2898 return 0;
2899
2900 nla_put_failure:
2901 if (nla)
2902 nla_nest_cancel(skb, nla);
2903 return -EMSGSIZE;
2904 }
2905
2906 /*
2907 * Return the connection of @resource if @resource has exactly one connection.
2908 */
2909 static struct drbd_connection *the_only_connection(struct drbd_resource *resource)
2910 {
2911 struct list_head *connections = &resource->connections;
2912
2913 if (list_empty(connections) || connections->next->next != connections)
2914 return NULL;
2915 return list_first_entry(&resource->connections, struct drbd_connection, connections);
2916 }
2917
2918 int nla_put_status_info(struct sk_buff *skb, struct drbd_device *device,
2919 const struct sib_info *sib)
2920 {
2921 struct drbd_resource *resource = device->resource;
2922 struct state_info *si = NULL; /* for sizeof(si->member); */
2923 struct nlattr *nla;
2924 int got_ldev;
2925 int err = 0;
2926 int exclude_sensitive;
2927
2928 /* If sib != NULL, this is drbd_bcast_event, which anyone can listen
2929 * to. So we better exclude_sensitive information.
2930 *
2931 * If sib == NULL, this is drbd_adm_get_status, executed synchronously
2932 * in the context of the requesting user process. Exclude sensitive
2933 * information, unless current has superuser.
2934 *
2935 * NOTE: for drbd_adm_get_status_all(), this is a netlink dump, and
2936 * relies on the current implementation of netlink_dump(), which
2937 * executes the dump callback successively from netlink_recvmsg(),
2938 * always in the context of the receiving process */
2939 exclude_sensitive = sib || !capable(CAP_SYS_ADMIN);
2940
2941 got_ldev = get_ldev(device);
2942
2943 /* We need to add connection name and volume number information still.
2944 * Minor number is in drbd_genlmsghdr. */
2945 if (nla_put_drbd_cfg_context(skb, resource, the_only_connection(resource), device))
2946 goto nla_put_failure;
2947
2948 if (res_opts_to_skb(skb, &device->resource->res_opts, exclude_sensitive))
2949 goto nla_put_failure;
2950
2951 rcu_read_lock();
2952 if (got_ldev) {
2953 struct disk_conf *disk_conf;
2954
2955 disk_conf = rcu_dereference(device->ldev->disk_conf);
2956 err = disk_conf_to_skb(skb, disk_conf, exclude_sensitive);
2957 }
2958 if (!err) {
2959 struct net_conf *nc;
2960
2961 nc = rcu_dereference(first_peer_device(device)->connection->net_conf);
2962 if (nc)
2963 err = net_conf_to_skb(skb, nc, exclude_sensitive);
2964 }
2965 rcu_read_unlock();
2966 if (err)
2967 goto nla_put_failure;
2968
2969 nla = nla_nest_start(skb, DRBD_NLA_STATE_INFO);
2970 if (!nla)
2971 goto nla_put_failure;
2972 if (nla_put_u32(skb, T_sib_reason, sib ? sib->sib_reason : SIB_GET_STATUS_REPLY) ||
2973 nla_put_u32(skb, T_current_state, device->state.i) ||
2974 nla_put_u64(skb, T_ed_uuid, device->ed_uuid) ||
2975 nla_put_u64(skb, T_capacity, drbd_get_capacity(device->this_bdev)) ||
2976 nla_put_u64(skb, T_send_cnt, device->send_cnt) ||
2977 nla_put_u64(skb, T_recv_cnt, device->recv_cnt) ||
2978 nla_put_u64(skb, T_read_cnt, device->read_cnt) ||
2979 nla_put_u64(skb, T_writ_cnt, device->writ_cnt) ||
2980 nla_put_u64(skb, T_al_writ_cnt, device->al_writ_cnt) ||
2981 nla_put_u64(skb, T_bm_writ_cnt, device->bm_writ_cnt) ||
2982 nla_put_u32(skb, T_ap_bio_cnt, atomic_read(&device->ap_bio_cnt)) ||
2983 nla_put_u32(skb, T_ap_pending_cnt, atomic_read(&device->ap_pending_cnt)) ||
2984 nla_put_u32(skb, T_rs_pending_cnt, atomic_read(&device->rs_pending_cnt)))
2985 goto nla_put_failure;
2986
2987 if (got_ldev) {
2988 int err;
2989
2990 spin_lock_irq(&device->ldev->md.uuid_lock);
2991 err = nla_put(skb, T_uuids, sizeof(si->uuids), device->ldev->md.uuid);
2992 spin_unlock_irq(&device->ldev->md.uuid_lock);
2993
2994 if (err)
2995 goto nla_put_failure;
2996
2997 if (nla_put_u32(skb, T_disk_flags, device->ldev->md.flags) ||
2998 nla_put_u64(skb, T_bits_total, drbd_bm_bits(device)) ||
2999 nla_put_u64(skb, T_bits_oos, drbd_bm_total_weight(device)))
3000 goto nla_put_failure;
3001 if (C_SYNC_SOURCE <= device->state.conn &&
3002 C_PAUSED_SYNC_T >= device->state.conn) {
3003 if (nla_put_u64(skb, T_bits_rs_total, device->rs_total) ||
3004 nla_put_u64(skb, T_bits_rs_failed, device->rs_failed))
3005 goto nla_put_failure;
3006 }
3007 }
3008
3009 if (sib) {
3010 switch(sib->sib_reason) {
3011 case SIB_SYNC_PROGRESS:
3012 case SIB_GET_STATUS_REPLY:
3013 break;
3014 case SIB_STATE_CHANGE:
3015 if (nla_put_u32(skb, T_prev_state, sib->os.i) ||
3016 nla_put_u32(skb, T_new_state, sib->ns.i))
3017 goto nla_put_failure;
3018 break;
3019 case SIB_HELPER_POST:
3020 if (nla_put_u32(skb, T_helper_exit_code,
3021 sib->helper_exit_code))
3022 goto nla_put_failure;
3023 /* fall through */
3024 case SIB_HELPER_PRE:
3025 if (nla_put_string(skb, T_helper, sib->helper_name))
3026 goto nla_put_failure;
3027 break;
3028 }
3029 }
3030 nla_nest_end(skb, nla);
3031
3032 if (0)
3033 nla_put_failure:
3034 err = -EMSGSIZE;
3035 if (got_ldev)
3036 put_ldev(device);
3037 return err;
3038 }
3039
3040 int drbd_adm_get_status(struct sk_buff *skb, struct genl_info *info)
3041 {
3042 struct drbd_config_context adm_ctx;
3043 enum drbd_ret_code retcode;
3044 int err;
3045
3046 retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
3047 if (!adm_ctx.reply_skb)
3048 return retcode;
3049 if (retcode != NO_ERROR)
3050 goto out;
3051
3052 err = nla_put_status_info(adm_ctx.reply_skb, adm_ctx.device, NULL);
3053 if (err) {
3054 nlmsg_free(adm_ctx.reply_skb);
3055 return err;
3056 }
3057 out:
3058 drbd_adm_finish(&adm_ctx, info, retcode);
3059 return 0;
3060 }
3061
3062 static int get_one_status(struct sk_buff *skb, struct netlink_callback *cb)
3063 {
3064 struct drbd_device *device;
3065 struct drbd_genlmsghdr *dh;
3066 struct drbd_resource *pos = (struct drbd_resource *)cb->args[0];
3067 struct drbd_resource *resource = NULL;
3068 struct drbd_resource *tmp;
3069 unsigned volume = cb->args[1];
3070
3071 /* Open coded, deferred, iteration:
3072 * for_each_resource_safe(resource, tmp, &drbd_resources) {
3073 * connection = "first connection of resource or undefined";
3074 * idr_for_each_entry(&resource->devices, device, i) {
3075 * ...
3076 * }
3077 * }
3078 * where resource is cb->args[0];
3079 * and i is cb->args[1];
3080 *
3081 * cb->args[2] indicates if we shall loop over all resources,
3082 * or just dump all volumes of a single resource.
3083 *
3084 * This may miss entries inserted after this dump started,
3085 * or entries deleted before they are reached.
3086 *
3087 * We need to make sure the device won't disappear while
3088 * we are looking at it, and revalidate our iterators
3089 * on each iteration.
3090 */
3091
3092 /* synchronize with conn_create()/drbd_destroy_connection() */
3093 rcu_read_lock();
3094 /* revalidate iterator position */
3095 for_each_resource_rcu(tmp, &drbd_resources) {
3096 if (pos == NULL) {
3097 /* first iteration */
3098 pos = tmp;
3099 resource = pos;
3100 break;
3101 }
3102 if (tmp == pos) {
3103 resource = pos;
3104 break;
3105 }
3106 }
3107 if (resource) {
3108 next_resource:
3109 device = idr_get_next(&resource->devices, &volume);
3110 if (!device) {
3111 /* No more volumes to dump on this resource.
3112 * Advance resource iterator. */
3113 pos = list_entry_rcu(resource->resources.next,
3114 struct drbd_resource, resources);
3115 /* Did we dump any volume of this resource yet? */
3116 if (volume != 0) {
3117 /* If we reached the end of the list,
3118 * or only a single resource dump was requested,
3119 * we are done. */
3120 if (&pos->resources == &drbd_resources || cb->args[2])
3121 goto out;
3122 volume = 0;
3123 resource = pos;
3124 goto next_resource;
3125 }
3126 }
3127
3128 dh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
3129 cb->nlh->nlmsg_seq, &drbd_genl_family,
3130 NLM_F_MULTI, DRBD_ADM_GET_STATUS);
3131 if (!dh)
3132 goto out;
3133
3134 if (!device) {
3135 /* This is a connection without a single volume.
3136 * Suprisingly enough, it may have a network
3137 * configuration. */
3138 struct drbd_connection *connection;
3139
3140 dh->minor = -1U;
3141 dh->ret_code = NO_ERROR;
3142 connection = the_only_connection(resource);
3143 if (nla_put_drbd_cfg_context(skb, resource, connection, NULL))
3144 goto cancel;
3145 if (connection) {
3146 struct net_conf *nc;
3147
3148 nc = rcu_dereference(connection->net_conf);
3149 if (nc && net_conf_to_skb(skb, nc, 1) != 0)
3150 goto cancel;
3151 }
3152 goto done;
3153 }
3154
3155 D_ASSERT(device, device->vnr == volume);
3156 D_ASSERT(device, device->resource == resource);
3157
3158 dh->minor = device_to_minor(device);
3159 dh->ret_code = NO_ERROR;
3160
3161 if (nla_put_status_info(skb, device, NULL)) {
3162 cancel:
3163 genlmsg_cancel(skb, dh);
3164 goto out;
3165 }
3166 done:
3167 genlmsg_end(skb, dh);
3168 }
3169
3170 out:
3171 rcu_read_unlock();
3172 /* where to start the next iteration */
3173 cb->args[0] = (long)pos;
3174 cb->args[1] = (pos == resource) ? volume + 1 : 0;
3175
3176 /* No more resources/volumes/minors found results in an empty skb.
3177 * Which will terminate the dump. */
3178 return skb->len;
3179 }
3180
3181 /*
3182 * Request status of all resources, or of all volumes within a single resource.
3183 *
3184 * This is a dump, as the answer may not fit in a single reply skb otherwise.
3185 * Which means we cannot use the family->attrbuf or other such members, because
3186 * dump is NOT protected by the genl_lock(). During dump, we only have access
3187 * to the incoming skb, and need to opencode "parsing" of the nlattr payload.
3188 *
3189 * Once things are setup properly, we call into get_one_status().
3190 */
3191 int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb)
3192 {
3193 const unsigned hdrlen = GENL_HDRLEN + GENL_MAGIC_FAMILY_HDRSZ;
3194 struct nlattr *nla;
3195 const char *resource_name;
3196 struct drbd_resource *resource;
3197 int maxtype;
3198
3199 /* Is this a followup call? */
3200 if (cb->args[0]) {
3201 /* ... of a single resource dump,
3202 * and the resource iterator has been advanced already? */
3203 if (cb->args[2] && cb->args[2] != cb->args[0])
3204 return 0; /* DONE. */
3205 goto dump;
3206 }
3207
3208 /* First call (from netlink_dump_start). We need to figure out
3209 * which resource(s) the user wants us to dump. */
3210 nla = nla_find(nlmsg_attrdata(cb->nlh, hdrlen),
3211 nlmsg_attrlen(cb->nlh, hdrlen),
3212 DRBD_NLA_CFG_CONTEXT);
3213
3214 /* No explicit context given. Dump all. */
3215 if (!nla)
3216 goto dump;
3217 maxtype = ARRAY_SIZE(drbd_cfg_context_nl_policy) - 1;
3218 nla = drbd_nla_find_nested(maxtype, nla, __nla_type(T_ctx_resource_name));
3219 if (IS_ERR(nla))
3220 return PTR_ERR(nla);
3221 /* context given, but no name present? */
3222 if (!nla)
3223 return -EINVAL;
3224 resource_name = nla_data(nla);
3225 if (!*resource_name)
3226 return -ENODEV;
3227 resource = drbd_find_resource(resource_name);
3228 if (!resource)
3229 return -ENODEV;
3230
3231 kref_put(&resource->kref, drbd_destroy_resource); /* get_one_status() revalidates the resource */
3232
3233 /* prime iterators, and set "filter" mode mark:
3234 * only dump this connection. */
3235 cb->args[0] = (long)resource;
3236 /* cb->args[1] = 0; passed in this way. */
3237 cb->args[2] = (long)resource;
3238
3239 dump:
3240 return get_one_status(skb, cb);
3241 }
3242
3243 int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info)
3244 {
3245 struct drbd_config_context adm_ctx;
3246 enum drbd_ret_code retcode;
3247 struct timeout_parms tp;
3248 int err;
3249
3250 retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
3251 if (!adm_ctx.reply_skb)
3252 return retcode;
3253 if (retcode != NO_ERROR)
3254 goto out;
3255
3256 tp.timeout_type =
3257 adm_ctx.device->state.pdsk == D_OUTDATED ? UT_PEER_OUTDATED :
3258 test_bit(USE_DEGR_WFC_T, &adm_ctx.device->flags) ? UT_DEGRADED :
3259 UT_DEFAULT;
3260
3261 err = timeout_parms_to_priv_skb(adm_ctx.reply_skb, &tp);
3262 if (err) {
3263 nlmsg_free(adm_ctx.reply_skb);
3264 return err;
3265 }
3266 out:
3267 drbd_adm_finish(&adm_ctx, info, retcode);
3268 return 0;
3269 }
3270
3271 int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info)
3272 {
3273 struct drbd_config_context adm_ctx;
3274 struct drbd_device *device;
3275 enum drbd_ret_code retcode;
3276 struct start_ov_parms parms;
3277
3278 retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
3279 if (!adm_ctx.reply_skb)
3280 return retcode;
3281 if (retcode != NO_ERROR)
3282 goto out;
3283
3284 device = adm_ctx.device;
3285
3286 /* resume from last known position, if possible */
3287 parms.ov_start_sector = device->ov_start_sector;
3288 parms.ov_stop_sector = ULLONG_MAX;
3289 if (info->attrs[DRBD_NLA_START_OV_PARMS]) {
3290 int err = start_ov_parms_from_attrs(&parms, info);
3291 if (err) {
3292 retcode = ERR_MANDATORY_TAG;
3293 drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
3294 goto out;
3295 }
3296 }
3297 mutex_lock(&adm_ctx.resource->adm_mutex);
3298
3299 /* w_make_ov_request expects position to be aligned */
3300 device->ov_start_sector = parms.ov_start_sector & ~(BM_SECT_PER_BIT-1);
3301 device->ov_stop_sector = parms.ov_stop_sector;
3302
3303 /* If there is still bitmap IO pending, e.g. previous resync or verify
3304 * just being finished, wait for it before requesting a new resync. */
3305 drbd_suspend_io(device);
3306 wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags));
3307 retcode = drbd_request_state(device, NS(conn, C_VERIFY_S));
3308 drbd_resume_io(device);
3309
3310 mutex_unlock(&adm_ctx.resource->adm_mutex);
3311 out:
3312 drbd_adm_finish(&adm_ctx, info, retcode);
3313 return 0;
3314 }
3315
3316
3317 int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info)
3318 {
3319 struct drbd_config_context adm_ctx;
3320 struct drbd_device *device;
3321 enum drbd_ret_code retcode;
3322 int skip_initial_sync = 0;
3323 int err;
3324 struct new_c_uuid_parms args;
3325
3326 retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
3327 if (!adm_ctx.reply_skb)
3328 return retcode;
3329 if (retcode != NO_ERROR)
3330 goto out_nolock;
3331
3332 device = adm_ctx.device;
3333 memset(&args, 0, sizeof(args));
3334 if (info->attrs[DRBD_NLA_NEW_C_UUID_PARMS]) {
3335 err = new_c_uuid_parms_from_attrs(&args, info);
3336 if (err) {
3337 retcode = ERR_MANDATORY_TAG;
3338 drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
3339 goto out_nolock;
3340 }
3341 }
3342
3343 mutex_lock(&adm_ctx.resource->adm_mutex);
3344 mutex_lock(device->state_mutex); /* Protects us against serialized state changes. */
3345
3346 if (!get_ldev(device)) {
3347 retcode = ERR_NO_DISK;
3348 goto out;
3349 }
3350
3351 /* this is "skip initial sync", assume to be clean */
3352 if (device->state.conn == C_CONNECTED &&
3353 first_peer_device(device)->connection->agreed_pro_version >= 90 &&
3354 device->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED && args.clear_bm) {
3355 drbd_info(device, "Preparing to skip initial sync\n");
3356 skip_initial_sync = 1;
3357 } else if (device->state.conn != C_STANDALONE) {
3358 retcode = ERR_CONNECTED;
3359 goto out_dec;
3360 }
3361
3362 drbd_uuid_set(device, UI_BITMAP, 0); /* Rotate UI_BITMAP to History 1, etc... */
3363 drbd_uuid_new_current(device); /* New current, previous to UI_BITMAP */
3364
3365 if (args.clear_bm) {
3366 err = drbd_bitmap_io(device, &drbd_bmio_clear_n_write,
3367 "clear_n_write from new_c_uuid", BM_LOCKED_MASK);
3368 if (err) {
3369 drbd_err(device, "Writing bitmap failed with %d\n", err);
3370 retcode = ERR_IO_MD_DISK;
3371 }
3372 if (skip_initial_sync) {
3373 drbd_send_uuids_skip_initial_sync(first_peer_device(device));
3374 _drbd_uuid_set(device, UI_BITMAP, 0);
3375 drbd_print_uuids(device, "cleared bitmap UUID");
3376 spin_lock_irq(&device->resource->req_lock);
3377 _drbd_set_state(_NS2(device, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
3378 CS_VERBOSE, NULL);
3379 spin_unlock_irq(&device->resource->req_lock);
3380 }
3381 }
3382
3383 drbd_md_sync(device);
3384 out_dec:
3385 put_ldev(device);
3386 out:
3387 mutex_unlock(device->state_mutex);
3388 mutex_unlock(&adm_ctx.resource->adm_mutex);
3389 out_nolock:
3390 drbd_adm_finish(&adm_ctx, info, retcode);
3391 return 0;
3392 }
3393
3394 static enum drbd_ret_code
3395 drbd_check_resource_name(struct drbd_config_context *adm_ctx)
3396 {
3397 const char *name = adm_ctx->resource_name;
3398 if (!name || !name[0]) {
3399 drbd_msg_put_info(adm_ctx->reply_skb, "resource name missing");
3400 return ERR_MANDATORY_TAG;
3401 }
3402 /* if we want to use these in sysfs/configfs/debugfs some day,
3403 * we must not allow slashes */
3404 if (strchr(name, '/')) {
3405 drbd_msg_put_info(adm_ctx->reply_skb, "invalid resource name");
3406 return ERR_INVALID_REQUEST;
3407 }
3408 return NO_ERROR;
3409 }
3410
3411 int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info)
3412 {
3413 struct drbd_config_context adm_ctx;
3414 enum drbd_ret_code retcode;
3415 struct res_opts res_opts;
3416 int err;
3417
3418 retcode = drbd_adm_prepare(&adm_ctx, skb, info, 0);
3419 if (!adm_ctx.reply_skb)
3420 return retcode;
3421 if (retcode != NO_ERROR)
3422 goto out;
3423
3424 set_res_opts_defaults(&res_opts);
3425 err = res_opts_from_attrs(&res_opts, info);
3426 if (err && err != -ENOMSG) {
3427 retcode = ERR_MANDATORY_TAG;
3428 drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
3429 goto out;
3430 }
3431
3432 retcode = drbd_check_resource_name(&adm_ctx);
3433 if (retcode != NO_ERROR)
3434 goto out;
3435
3436 if (adm_ctx.resource) {
3437 if (info->nlhdr->nlmsg_flags & NLM_F_EXCL) {
3438 retcode = ERR_INVALID_REQUEST;
3439 drbd_msg_put_info(adm_ctx.reply_skb, "resource exists");
3440 }
3441 /* else: still NO_ERROR */
3442 goto out;
3443 }
3444
3445 /* not yet safe for genl_family.parallel_ops */
3446 if (!conn_create(adm_ctx.resource_name, &res_opts))
3447 retcode = ERR_NOMEM;
3448 out:
3449 drbd_adm_finish(&adm_ctx, info, retcode);
3450 return 0;
3451 }
3452
3453 int drbd_adm_new_minor(struct sk_buff *skb, struct genl_info *info)
3454 {
3455 struct drbd_config_context adm_ctx;
3456 struct drbd_genlmsghdr *dh = info->userhdr;
3457 enum drbd_ret_code retcode;
3458
3459 retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
3460 if (!adm_ctx.reply_skb)
3461 return retcode;
3462 if (retcode != NO_ERROR)
3463 goto out;
3464
3465 if (dh->minor > MINORMASK) {
3466 drbd_msg_put_info(adm_ctx.reply_skb, "requested minor out of range");
3467 retcode = ERR_INVALID_REQUEST;
3468 goto out;
3469 }
3470 if (adm_ctx.volume > DRBD_VOLUME_MAX) {
3471 drbd_msg_put_info(adm_ctx.reply_skb, "requested volume id out of range");
3472 retcode = ERR_INVALID_REQUEST;
3473 goto out;
3474 }
3475
3476 /* drbd_adm_prepare made sure already
3477 * that first_peer_device(device)->connection and device->vnr match the request. */
3478 if (adm_ctx.device) {
3479 if (info->nlhdr->nlmsg_flags & NLM_F_EXCL)
3480 retcode = ERR_MINOR_EXISTS;
3481 /* else: still NO_ERROR */
3482 goto out;
3483 }
3484
3485 mutex_lock(&adm_ctx.resource->adm_mutex);
3486 retcode = drbd_create_device(&adm_ctx, dh->minor);
3487 mutex_unlock(&adm_ctx.resource->adm_mutex);
3488 out:
3489 drbd_adm_finish(&adm_ctx, info, retcode);
3490 return 0;
3491 }
3492
3493 static enum drbd_ret_code adm_del_minor(struct drbd_device *device)
3494 {
3495 if (device->state.disk == D_DISKLESS &&
3496 /* no need to be device->state.conn == C_STANDALONE &&
3497 * we may want to delete a minor from a live replication group.
3498 */
3499 device->state.role == R_SECONDARY) {
3500 _drbd_request_state(device, NS(conn, C_WF_REPORT_PARAMS),
3501 CS_VERBOSE + CS_WAIT_COMPLETE);
3502 drbd_delete_device(device);
3503 return NO_ERROR;
3504 } else
3505 return ERR_MINOR_CONFIGURED;
3506 }
3507
3508 int drbd_adm_del_minor(struct sk_buff *skb, struct genl_info *info)
3509 {
3510 struct drbd_config_context adm_ctx;
3511 enum drbd_ret_code retcode;
3512
3513 retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
3514 if (!adm_ctx.reply_skb)
3515 return retcode;
3516 if (retcode != NO_ERROR)
3517 goto out;
3518
3519 mutex_lock(&adm_ctx.resource->adm_mutex);
3520 retcode = adm_del_minor(adm_ctx.device);
3521 mutex_unlock(&adm_ctx.resource->adm_mutex);
3522 out:
3523 drbd_adm_finish(&adm_ctx, info, retcode);
3524 return 0;
3525 }
3526
3527 int drbd_adm_down(struct sk_buff *skb, struct genl_info *info)
3528 {
3529 struct drbd_config_context adm_ctx;
3530 struct drbd_resource *resource;
3531 struct drbd_connection *connection;
3532 struct drbd_device *device;
3533 int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
3534 unsigned i;
3535
3536 retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
3537 if (!adm_ctx.reply_skb)
3538 return retcode;
3539 if (retcode != NO_ERROR)
3540 goto finish;
3541
3542 resource = adm_ctx.resource;
3543 mutex_lock(&resource->adm_mutex);
3544 /* demote */
3545 for_each_connection(connection, resource) {
3546 struct drbd_peer_device *peer_device;
3547
3548 idr_for_each_entry(&connection->peer_devices, peer_device, i) {
3549 retcode = drbd_set_role(peer_device->device, R_SECONDARY, 0);
3550 if (retcode < SS_SUCCESS) {
3551 drbd_msg_put_info(adm_ctx.reply_skb, "failed to demote");
3552 goto out;
3553 }
3554 }
3555
3556 retcode = conn_try_disconnect(connection, 0);
3557 if (retcode < SS_SUCCESS) {
3558 drbd_msg_put_info(adm_ctx.reply_skb, "failed to disconnect");
3559 goto out;
3560 }
3561 }
3562
3563 /* detach */
3564 idr_for_each_entry(&resource->devices, device, i) {
3565 retcode = adm_detach(device, 0);
3566 if (retcode < SS_SUCCESS || retcode > NO_ERROR) {
3567 drbd_msg_put_info(adm_ctx.reply_skb, "failed to detach");
3568 goto out;
3569 }
3570 }
3571
3572 /* If we reach this, all volumes (of this connection) are Secondary,
3573 * Disconnected, Diskless, aka Unconfigured. Make sure all threads have
3574 * actually stopped, state handling only does drbd_thread_stop_nowait(). */
3575 for_each_connection(connection, resource)
3576 drbd_thread_stop(&connection->worker);
3577
3578 /* Now, nothing can fail anymore */
3579
3580 /* delete volumes */
3581 idr_for_each_entry(&resource->devices, device, i) {
3582 retcode = adm_del_minor(device);
3583 if (retcode != NO_ERROR) {
3584 /* "can not happen" */
3585 drbd_msg_put_info(adm_ctx.reply_skb, "failed to delete volume");
3586 goto out;
3587 }
3588 }
3589
3590 list_del_rcu(&resource->resources);
3591 synchronize_rcu();
3592 drbd_free_resource(resource);
3593 retcode = NO_ERROR;
3594 out:
3595 mutex_unlock(&resource->adm_mutex);
3596 finish:
3597 drbd_adm_finish(&adm_ctx, info, retcode);
3598 return 0;
3599 }
3600
3601 int drbd_adm_del_resource(struct sk_buff *skb, struct genl_info *info)
3602 {
3603 struct drbd_config_context adm_ctx;
3604 struct drbd_resource *resource;
3605 struct drbd_connection *connection;
3606 enum drbd_ret_code retcode;
3607
3608 retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
3609 if (!adm_ctx.reply_skb)
3610 return retcode;
3611 if (retcode != NO_ERROR)
3612 goto finish;
3613
3614 resource = adm_ctx.resource;
3615 mutex_lock(&resource->adm_mutex);
3616 for_each_connection(connection, resource) {
3617 if (connection->cstate > C_STANDALONE) {
3618 retcode = ERR_NET_CONFIGURED;
3619 goto out;
3620 }
3621 }
3622 if (!idr_is_empty(&resource->devices)) {
3623 retcode = ERR_RES_IN_USE;
3624 goto out;
3625 }
3626
3627 list_del_rcu(&resource->resources);
3628 for_each_connection(connection, resource)
3629 drbd_thread_stop(&connection->worker);
3630 synchronize_rcu();
3631 drbd_free_resource(resource);
3632 retcode = NO_ERROR;
3633 out:
3634 mutex_unlock(&resource->adm_mutex);
3635 finish:
3636 drbd_adm_finish(&adm_ctx, info, retcode);
3637 return 0;
3638 }
3639
3640 void drbd_bcast_event(struct drbd_device *device, const struct sib_info *sib)
3641 {
3642 static atomic_t drbd_genl_seq = ATOMIC_INIT(2); /* two. */
3643 struct sk_buff *msg;
3644 struct drbd_genlmsghdr *d_out;
3645 unsigned seq;
3646 int err = -ENOMEM;
3647
3648 seq = atomic_inc_return(&drbd_genl_seq);
3649 msg = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO);
3650 if (!msg)
3651 goto failed;
3652
3653 err = -EMSGSIZE;
3654 d_out = genlmsg_put(msg, 0, seq, &drbd_genl_family, 0, DRBD_EVENT);
3655 if (!d_out) /* cannot happen, but anyways. */
3656 goto nla_put_failure;
3657 d_out->minor = device_to_minor(device);
3658 d_out->ret_code = NO_ERROR;
3659
3660 if (nla_put_status_info(msg, device, sib))
3661 goto nla_put_failure;
3662 genlmsg_end(msg, d_out);
3663 err = drbd_genl_multicast_events(msg, 0);
3664 /* msg has been consumed or freed in netlink_broadcast() */
3665 if (err && err != -ESRCH)
3666 goto failed;
3667
3668 return;
3669
3670 nla_put_failure:
3671 nlmsg_free(msg);
3672 failed:
3673 drbd_err(device, "Error %d while broadcasting event. "
3674 "Event seq:%u sib_reason:%u\n",
3675 err, seq, sib->sib_reason);
3676 }