]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blob - drivers/block/drbd/drbd_nl.c
drbd: move set_disk_ro() to after we persisted the new role
[mirror_ubuntu-zesty-kernel.git] / drivers / block / drbd / drbd_nl.c
1 /*
2 drbd_nl.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23
24 */
25
26 #include <linux/module.h>
27 #include <linux/drbd.h>
28 #include <linux/in.h>
29 #include <linux/fs.h>
30 #include <linux/file.h>
31 #include <linux/slab.h>
32 #include <linux/blkpg.h>
33 #include <linux/cpumask.h>
34 #include "drbd_int.h"
35 #include "drbd_protocol.h"
36 #include "drbd_req.h"
37 #include <asm/unaligned.h>
38 #include <linux/drbd_limits.h>
39 #include <linux/kthread.h>
40
41 #include <net/genetlink.h>
42
43 /* .doit */
44 // int drbd_adm_create_resource(struct sk_buff *skb, struct genl_info *info);
45 // int drbd_adm_delete_resource(struct sk_buff *skb, struct genl_info *info);
46
47 int drbd_adm_new_minor(struct sk_buff *skb, struct genl_info *info);
48 int drbd_adm_del_minor(struct sk_buff *skb, struct genl_info *info);
49
50 int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info);
51 int drbd_adm_del_resource(struct sk_buff *skb, struct genl_info *info);
52 int drbd_adm_down(struct sk_buff *skb, struct genl_info *info);
53
54 int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info);
55 int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info);
56 int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info);
57 int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info);
58 int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info);
59 int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info);
60 int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info);
61 int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info);
62 int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info);
63 int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info);
64 int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info);
65 int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info);
66 int drbd_adm_pause_sync(struct sk_buff *skb, struct genl_info *info);
67 int drbd_adm_resume_sync(struct sk_buff *skb, struct genl_info *info);
68 int drbd_adm_suspend_io(struct sk_buff *skb, struct genl_info *info);
69 int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info);
70 int drbd_adm_outdate(struct sk_buff *skb, struct genl_info *info);
71 int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info);
72 int drbd_adm_get_status(struct sk_buff *skb, struct genl_info *info);
73 int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info);
74 /* .dumpit */
75 int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb);
76
77 #include <linux/drbd_genl_api.h>
78 #include "drbd_nla.h"
79 #include <linux/genl_magic_func.h>
80
81 /* used blkdev_get_by_path, to claim our meta data device(s) */
82 static char *drbd_m_holder = "Hands off! this is DRBD's meta data device.";
83
84 static void drbd_adm_send_reply(struct sk_buff *skb, struct genl_info *info)
85 {
86 genlmsg_end(skb, genlmsg_data(nlmsg_data(nlmsg_hdr(skb))));
87 if (genlmsg_reply(skb, info))
88 printk(KERN_ERR "drbd: error sending genl reply\n");
89 }
90
91 /* Used on a fresh "drbd_adm_prepare"d reply_skb, this cannot fail: The only
92 * reason it could fail was no space in skb, and there are 4k available. */
93 int drbd_msg_put_info(struct sk_buff *skb, const char *info)
94 {
95 struct nlattr *nla;
96 int err = -EMSGSIZE;
97
98 if (!info || !info[0])
99 return 0;
100
101 nla = nla_nest_start(skb, DRBD_NLA_CFG_REPLY);
102 if (!nla)
103 return err;
104
105 err = nla_put_string(skb, T_info_text, info);
106 if (err) {
107 nla_nest_cancel(skb, nla);
108 return err;
109 } else
110 nla_nest_end(skb, nla);
111 return 0;
112 }
113
114 /* This would be a good candidate for a "pre_doit" hook,
115 * and per-family private info->pointers.
116 * But we need to stay compatible with older kernels.
117 * If it returns successfully, adm_ctx members are valid.
118 *
119 * At this point, we still rely on the global genl_lock().
120 * If we want to avoid that, and allow "genl_family.parallel_ops", we may need
121 * to add additional synchronization against object destruction/modification.
122 */
123 #define DRBD_ADM_NEED_MINOR 1
124 #define DRBD_ADM_NEED_RESOURCE 2
125 #define DRBD_ADM_NEED_CONNECTION 4
126 static int drbd_adm_prepare(struct drbd_config_context *adm_ctx,
127 struct sk_buff *skb, struct genl_info *info, unsigned flags)
128 {
129 struct drbd_genlmsghdr *d_in = info->userhdr;
130 const u8 cmd = info->genlhdr->cmd;
131 int err;
132
133 memset(adm_ctx, 0, sizeof(*adm_ctx));
134
135 /* genl_rcv_msg only checks for CAP_NET_ADMIN on "GENL_ADMIN_PERM" :( */
136 if (cmd != DRBD_ADM_GET_STATUS && !capable(CAP_NET_ADMIN))
137 return -EPERM;
138
139 adm_ctx->reply_skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
140 if (!adm_ctx->reply_skb) {
141 err = -ENOMEM;
142 goto fail;
143 }
144
145 adm_ctx->reply_dh = genlmsg_put_reply(adm_ctx->reply_skb,
146 info, &drbd_genl_family, 0, cmd);
147 /* put of a few bytes into a fresh skb of >= 4k will always succeed.
148 * but anyways */
149 if (!adm_ctx->reply_dh) {
150 err = -ENOMEM;
151 goto fail;
152 }
153
154 adm_ctx->reply_dh->minor = d_in->minor;
155 adm_ctx->reply_dh->ret_code = NO_ERROR;
156
157 adm_ctx->volume = VOLUME_UNSPECIFIED;
158 if (info->attrs[DRBD_NLA_CFG_CONTEXT]) {
159 struct nlattr *nla;
160 /* parse and validate only */
161 err = drbd_cfg_context_from_attrs(NULL, info);
162 if (err)
163 goto fail;
164
165 /* It was present, and valid,
166 * copy it over to the reply skb. */
167 err = nla_put_nohdr(adm_ctx->reply_skb,
168 info->attrs[DRBD_NLA_CFG_CONTEXT]->nla_len,
169 info->attrs[DRBD_NLA_CFG_CONTEXT]);
170 if (err)
171 goto fail;
172
173 /* and assign stuff to the adm_ctx */
174 nla = nested_attr_tb[__nla_type(T_ctx_volume)];
175 if (nla)
176 adm_ctx->volume = nla_get_u32(nla);
177 nla = nested_attr_tb[__nla_type(T_ctx_resource_name)];
178 if (nla)
179 adm_ctx->resource_name = nla_data(nla);
180 adm_ctx->my_addr = nested_attr_tb[__nla_type(T_ctx_my_addr)];
181 adm_ctx->peer_addr = nested_attr_tb[__nla_type(T_ctx_peer_addr)];
182 if ((adm_ctx->my_addr &&
183 nla_len(adm_ctx->my_addr) > sizeof(adm_ctx->connection->my_addr)) ||
184 (adm_ctx->peer_addr &&
185 nla_len(adm_ctx->peer_addr) > sizeof(adm_ctx->connection->peer_addr))) {
186 err = -EINVAL;
187 goto fail;
188 }
189 }
190
191 adm_ctx->minor = d_in->minor;
192 adm_ctx->device = minor_to_device(d_in->minor);
193
194 /* We are protected by the global genl_lock().
195 * But we may explicitly drop it/retake it in drbd_adm_set_role(),
196 * so make sure this object stays around. */
197 if (adm_ctx->device)
198 kref_get(&adm_ctx->device->kref);
199
200 if (adm_ctx->resource_name) {
201 adm_ctx->resource = drbd_find_resource(adm_ctx->resource_name);
202 }
203
204 if (!adm_ctx->device && (flags & DRBD_ADM_NEED_MINOR)) {
205 drbd_msg_put_info(adm_ctx->reply_skb, "unknown minor");
206 return ERR_MINOR_INVALID;
207 }
208 if (!adm_ctx->resource && (flags & DRBD_ADM_NEED_RESOURCE)) {
209 drbd_msg_put_info(adm_ctx->reply_skb, "unknown resource");
210 if (adm_ctx->resource_name)
211 return ERR_RES_NOT_KNOWN;
212 return ERR_INVALID_REQUEST;
213 }
214
215 if (flags & DRBD_ADM_NEED_CONNECTION) {
216 if (adm_ctx->resource) {
217 drbd_msg_put_info(adm_ctx->reply_skb, "no resource name expected");
218 return ERR_INVALID_REQUEST;
219 }
220 if (adm_ctx->device) {
221 drbd_msg_put_info(adm_ctx->reply_skb, "no minor number expected");
222 return ERR_INVALID_REQUEST;
223 }
224 if (adm_ctx->my_addr && adm_ctx->peer_addr)
225 adm_ctx->connection = conn_get_by_addrs(nla_data(adm_ctx->my_addr),
226 nla_len(adm_ctx->my_addr),
227 nla_data(adm_ctx->peer_addr),
228 nla_len(adm_ctx->peer_addr));
229 if (!adm_ctx->connection) {
230 drbd_msg_put_info(adm_ctx->reply_skb, "unknown connection");
231 return ERR_INVALID_REQUEST;
232 }
233 }
234
235 /* some more paranoia, if the request was over-determined */
236 if (adm_ctx->device && adm_ctx->resource &&
237 adm_ctx->device->resource != adm_ctx->resource) {
238 pr_warning("request: minor=%u, resource=%s; but that minor belongs to resource %s\n",
239 adm_ctx->minor, adm_ctx->resource->name,
240 adm_ctx->device->resource->name);
241 drbd_msg_put_info(adm_ctx->reply_skb, "minor exists in different resource");
242 return ERR_INVALID_REQUEST;
243 }
244 if (adm_ctx->device &&
245 adm_ctx->volume != VOLUME_UNSPECIFIED &&
246 adm_ctx->volume != adm_ctx->device->vnr) {
247 pr_warning("request: minor=%u, volume=%u; but that minor is volume %u in %s\n",
248 adm_ctx->minor, adm_ctx->volume,
249 adm_ctx->device->vnr,
250 adm_ctx->device->resource->name);
251 drbd_msg_put_info(adm_ctx->reply_skb, "minor exists as different volume");
252 return ERR_INVALID_REQUEST;
253 }
254
255 /* still, provide adm_ctx->resource always, if possible. */
256 if (!adm_ctx->resource) {
257 adm_ctx->resource = adm_ctx->device ? adm_ctx->device->resource
258 : adm_ctx->connection ? adm_ctx->connection->resource : NULL;
259 if (adm_ctx->resource)
260 kref_get(&adm_ctx->resource->kref);
261 }
262
263 return NO_ERROR;
264
265 fail:
266 nlmsg_free(adm_ctx->reply_skb);
267 adm_ctx->reply_skb = NULL;
268 return err;
269 }
270
271 static int drbd_adm_finish(struct drbd_config_context *adm_ctx,
272 struct genl_info *info, int retcode)
273 {
274 if (adm_ctx->device) {
275 kref_put(&adm_ctx->device->kref, drbd_destroy_device);
276 adm_ctx->device = NULL;
277 }
278 if (adm_ctx->connection) {
279 kref_put(&adm_ctx->connection->kref, &drbd_destroy_connection);
280 adm_ctx->connection = NULL;
281 }
282 if (adm_ctx->resource) {
283 kref_put(&adm_ctx->resource->kref, drbd_destroy_resource);
284 adm_ctx->resource = NULL;
285 }
286
287 if (!adm_ctx->reply_skb)
288 return -ENOMEM;
289
290 adm_ctx->reply_dh->ret_code = retcode;
291 drbd_adm_send_reply(adm_ctx->reply_skb, info);
292 return 0;
293 }
294
295 static void setup_khelper_env(struct drbd_connection *connection, char **envp)
296 {
297 char *afs;
298
299 /* FIXME: A future version will not allow this case. */
300 if (connection->my_addr_len == 0 || connection->peer_addr_len == 0)
301 return;
302
303 switch (((struct sockaddr *)&connection->peer_addr)->sa_family) {
304 case AF_INET6:
305 afs = "ipv6";
306 snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI6",
307 &((struct sockaddr_in6 *)&connection->peer_addr)->sin6_addr);
308 break;
309 case AF_INET:
310 afs = "ipv4";
311 snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI4",
312 &((struct sockaddr_in *)&connection->peer_addr)->sin_addr);
313 break;
314 default:
315 afs = "ssocks";
316 snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI4",
317 &((struct sockaddr_in *)&connection->peer_addr)->sin_addr);
318 }
319 snprintf(envp[3], 20, "DRBD_PEER_AF=%s", afs);
320 }
321
322 int drbd_khelper(struct drbd_device *device, char *cmd)
323 {
324 char *envp[] = { "HOME=/",
325 "TERM=linux",
326 "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
327 (char[20]) { }, /* address family */
328 (char[60]) { }, /* address */
329 NULL };
330 char mb[12];
331 char *argv[] = {usermode_helper, cmd, mb, NULL };
332 struct drbd_connection *connection = first_peer_device(device)->connection;
333 struct sib_info sib;
334 int ret;
335
336 if (current == connection->worker.task)
337 set_bit(CALLBACK_PENDING, &connection->flags);
338
339 snprintf(mb, 12, "minor-%d", device_to_minor(device));
340 setup_khelper_env(connection, envp);
341
342 /* The helper may take some time.
343 * write out any unsynced meta data changes now */
344 drbd_md_sync(device);
345
346 drbd_info(device, "helper command: %s %s %s\n", usermode_helper, cmd, mb);
347 sib.sib_reason = SIB_HELPER_PRE;
348 sib.helper_name = cmd;
349 drbd_bcast_event(device, &sib);
350 ret = call_usermodehelper(usermode_helper, argv, envp, UMH_WAIT_PROC);
351 if (ret)
352 drbd_warn(device, "helper command: %s %s %s exit code %u (0x%x)\n",
353 usermode_helper, cmd, mb,
354 (ret >> 8) & 0xff, ret);
355 else
356 drbd_info(device, "helper command: %s %s %s exit code %u (0x%x)\n",
357 usermode_helper, cmd, mb,
358 (ret >> 8) & 0xff, ret);
359 sib.sib_reason = SIB_HELPER_POST;
360 sib.helper_exit_code = ret;
361 drbd_bcast_event(device, &sib);
362
363 if (current == connection->worker.task)
364 clear_bit(CALLBACK_PENDING, &connection->flags);
365
366 if (ret < 0) /* Ignore any ERRNOs we got. */
367 ret = 0;
368
369 return ret;
370 }
371
372 static int conn_khelper(struct drbd_connection *connection, char *cmd)
373 {
374 char *envp[] = { "HOME=/",
375 "TERM=linux",
376 "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
377 (char[20]) { }, /* address family */
378 (char[60]) { }, /* address */
379 NULL };
380 char *resource_name = connection->resource->name;
381 char *argv[] = {usermode_helper, cmd, resource_name, NULL };
382 int ret;
383
384 setup_khelper_env(connection, envp);
385 conn_md_sync(connection);
386
387 drbd_info(connection, "helper command: %s %s %s\n", usermode_helper, cmd, resource_name);
388 /* TODO: conn_bcast_event() ?? */
389
390 ret = call_usermodehelper(usermode_helper, argv, envp, UMH_WAIT_PROC);
391 if (ret)
392 drbd_warn(connection, "helper command: %s %s %s exit code %u (0x%x)\n",
393 usermode_helper, cmd, resource_name,
394 (ret >> 8) & 0xff, ret);
395 else
396 drbd_info(connection, "helper command: %s %s %s exit code %u (0x%x)\n",
397 usermode_helper, cmd, resource_name,
398 (ret >> 8) & 0xff, ret);
399 /* TODO: conn_bcast_event() ?? */
400
401 if (ret < 0) /* Ignore any ERRNOs we got. */
402 ret = 0;
403
404 return ret;
405 }
406
407 static enum drbd_fencing_p highest_fencing_policy(struct drbd_connection *connection)
408 {
409 enum drbd_fencing_p fp = FP_NOT_AVAIL;
410 struct drbd_peer_device *peer_device;
411 int vnr;
412
413 rcu_read_lock();
414 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
415 struct drbd_device *device = peer_device->device;
416 if (get_ldev_if_state(device, D_CONSISTENT)) {
417 struct disk_conf *disk_conf =
418 rcu_dereference(peer_device->device->ldev->disk_conf);
419 fp = max_t(enum drbd_fencing_p, fp, disk_conf->fencing);
420 put_ldev(device);
421 }
422 }
423 rcu_read_unlock();
424
425 if (fp == FP_NOT_AVAIL) {
426 /* IO Suspending works on the whole resource.
427 Do it only for one device. */
428 vnr = 0;
429 peer_device = idr_get_next(&connection->peer_devices, &vnr);
430 drbd_change_state(peer_device->device, CS_VERBOSE | CS_HARD, NS(susp_fen, 0));
431 }
432
433 return fp;
434 }
435
436 bool conn_try_outdate_peer(struct drbd_connection *connection)
437 {
438 unsigned int connect_cnt;
439 union drbd_state mask = { };
440 union drbd_state val = { };
441 enum drbd_fencing_p fp;
442 char *ex_to_string;
443 int r;
444
445 spin_lock_irq(&connection->resource->req_lock);
446 if (connection->cstate >= C_WF_REPORT_PARAMS) {
447 drbd_err(connection, "Expected cstate < C_WF_REPORT_PARAMS\n");
448 spin_unlock_irq(&connection->resource->req_lock);
449 return false;
450 }
451
452 connect_cnt = connection->connect_cnt;
453 spin_unlock_irq(&connection->resource->req_lock);
454
455 fp = highest_fencing_policy(connection);
456 switch (fp) {
457 case FP_NOT_AVAIL:
458 drbd_warn(connection, "Not fencing peer, I'm not even Consistent myself.\n");
459 goto out;
460 case FP_DONT_CARE:
461 return true;
462 default: ;
463 }
464
465 r = conn_khelper(connection, "fence-peer");
466
467 switch ((r>>8) & 0xff) {
468 case 3: /* peer is inconsistent */
469 ex_to_string = "peer is inconsistent or worse";
470 mask.pdsk = D_MASK;
471 val.pdsk = D_INCONSISTENT;
472 break;
473 case 4: /* peer got outdated, or was already outdated */
474 ex_to_string = "peer was fenced";
475 mask.pdsk = D_MASK;
476 val.pdsk = D_OUTDATED;
477 break;
478 case 5: /* peer was down */
479 if (conn_highest_disk(connection) == D_UP_TO_DATE) {
480 /* we will(have) create(d) a new UUID anyways... */
481 ex_to_string = "peer is unreachable, assumed to be dead";
482 mask.pdsk = D_MASK;
483 val.pdsk = D_OUTDATED;
484 } else {
485 ex_to_string = "peer unreachable, doing nothing since disk != UpToDate";
486 }
487 break;
488 case 6: /* Peer is primary, voluntarily outdate myself.
489 * This is useful when an unconnected R_SECONDARY is asked to
490 * become R_PRIMARY, but finds the other peer being active. */
491 ex_to_string = "peer is active";
492 drbd_warn(connection, "Peer is primary, outdating myself.\n");
493 mask.disk = D_MASK;
494 val.disk = D_OUTDATED;
495 break;
496 case 7:
497 if (fp != FP_STONITH)
498 drbd_err(connection, "fence-peer() = 7 && fencing != Stonith !!!\n");
499 ex_to_string = "peer was stonithed";
500 mask.pdsk = D_MASK;
501 val.pdsk = D_OUTDATED;
502 break;
503 default:
504 /* The script is broken ... */
505 drbd_err(connection, "fence-peer helper broken, returned %d\n", (r>>8)&0xff);
506 return false; /* Eventually leave IO frozen */
507 }
508
509 drbd_info(connection, "fence-peer helper returned %d (%s)\n",
510 (r>>8) & 0xff, ex_to_string);
511
512 out:
513
514 /* Not using
515 conn_request_state(connection, mask, val, CS_VERBOSE);
516 here, because we might were able to re-establish the connection in the
517 meantime. */
518 spin_lock_irq(&connection->resource->req_lock);
519 if (connection->cstate < C_WF_REPORT_PARAMS && !test_bit(STATE_SENT, &connection->flags)) {
520 if (connection->connect_cnt != connect_cnt)
521 /* In case the connection was established and droped
522 while the fence-peer handler was running, ignore it */
523 drbd_info(connection, "Ignoring fence-peer exit code\n");
524 else
525 _conn_request_state(connection, mask, val, CS_VERBOSE);
526 }
527 spin_unlock_irq(&connection->resource->req_lock);
528
529 return conn_highest_pdsk(connection) <= D_OUTDATED;
530 }
531
532 static int _try_outdate_peer_async(void *data)
533 {
534 struct drbd_connection *connection = (struct drbd_connection *)data;
535
536 conn_try_outdate_peer(connection);
537
538 kref_put(&connection->kref, drbd_destroy_connection);
539 return 0;
540 }
541
542 void conn_try_outdate_peer_async(struct drbd_connection *connection)
543 {
544 struct task_struct *opa;
545
546 kref_get(&connection->kref);
547 opa = kthread_run(_try_outdate_peer_async, connection, "drbd_async_h");
548 if (IS_ERR(opa)) {
549 drbd_err(connection, "out of mem, failed to invoke fence-peer helper\n");
550 kref_put(&connection->kref, drbd_destroy_connection);
551 }
552 }
553
554 enum drbd_state_rv
555 drbd_set_role(struct drbd_device *const device, enum drbd_role new_role, int force)
556 {
557 struct drbd_peer_device *const peer_device = first_peer_device(device);
558 struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL;
559 const int max_tries = 4;
560 enum drbd_state_rv rv = SS_UNKNOWN_ERROR;
561 struct net_conf *nc;
562 int try = 0;
563 int forced = 0;
564 union drbd_state mask, val;
565
566 if (new_role == R_PRIMARY) {
567 struct drbd_connection *connection;
568
569 /* Detect dead peers as soon as possible. */
570
571 rcu_read_lock();
572 for_each_connection(connection, device->resource)
573 request_ping(connection);
574 rcu_read_unlock();
575 }
576
577 mutex_lock(device->state_mutex);
578
579 mask.i = 0; mask.role = R_MASK;
580 val.i = 0; val.role = new_role;
581
582 while (try++ < max_tries) {
583 rv = _drbd_request_state(device, mask, val, CS_WAIT_COMPLETE);
584
585 /* in case we first succeeded to outdate,
586 * but now suddenly could establish a connection */
587 if (rv == SS_CW_FAILED_BY_PEER && mask.pdsk != 0) {
588 val.pdsk = 0;
589 mask.pdsk = 0;
590 continue;
591 }
592
593 if (rv == SS_NO_UP_TO_DATE_DISK && force &&
594 (device->state.disk < D_UP_TO_DATE &&
595 device->state.disk >= D_INCONSISTENT)) {
596 mask.disk = D_MASK;
597 val.disk = D_UP_TO_DATE;
598 forced = 1;
599 continue;
600 }
601
602 if (rv == SS_NO_UP_TO_DATE_DISK &&
603 device->state.disk == D_CONSISTENT && mask.pdsk == 0) {
604 D_ASSERT(device, device->state.pdsk == D_UNKNOWN);
605
606 if (conn_try_outdate_peer(connection)) {
607 val.disk = D_UP_TO_DATE;
608 mask.disk = D_MASK;
609 }
610 continue;
611 }
612
613 if (rv == SS_NOTHING_TO_DO)
614 goto out;
615 if (rv == SS_PRIMARY_NOP && mask.pdsk == 0) {
616 if (!conn_try_outdate_peer(connection) && force) {
617 drbd_warn(device, "Forced into split brain situation!\n");
618 mask.pdsk = D_MASK;
619 val.pdsk = D_OUTDATED;
620
621 }
622 continue;
623 }
624 if (rv == SS_TWO_PRIMARIES) {
625 /* Maybe the peer is detected as dead very soon...
626 retry at most once more in this case. */
627 int timeo;
628 rcu_read_lock();
629 nc = rcu_dereference(connection->net_conf);
630 timeo = nc ? (nc->ping_timeo + 1) * HZ / 10 : 1;
631 rcu_read_unlock();
632 schedule_timeout_interruptible(timeo);
633 if (try < max_tries)
634 try = max_tries - 1;
635 continue;
636 }
637 if (rv < SS_SUCCESS) {
638 rv = _drbd_request_state(device, mask, val,
639 CS_VERBOSE + CS_WAIT_COMPLETE);
640 if (rv < SS_SUCCESS)
641 goto out;
642 }
643 break;
644 }
645
646 if (rv < SS_SUCCESS)
647 goto out;
648
649 if (forced)
650 drbd_warn(device, "Forced to consider local data as UpToDate!\n");
651
652 /* Wait until nothing is on the fly :) */
653 wait_event(device->misc_wait, atomic_read(&device->ap_pending_cnt) == 0);
654
655 /* FIXME also wait for all pending P_BARRIER_ACK? */
656
657 if (new_role == R_SECONDARY) {
658 if (get_ldev(device)) {
659 device->ldev->md.uuid[UI_CURRENT] &= ~(u64)1;
660 put_ldev(device);
661 }
662 } else {
663 mutex_lock(&device->resource->conf_update);
664 nc = connection->net_conf;
665 if (nc)
666 nc->discard_my_data = 0; /* without copy; single bit op is atomic */
667 mutex_unlock(&device->resource->conf_update);
668
669 if (get_ldev(device)) {
670 if (((device->state.conn < C_CONNECTED ||
671 device->state.pdsk <= D_FAILED)
672 && device->ldev->md.uuid[UI_BITMAP] == 0) || forced)
673 drbd_uuid_new_current(device);
674
675 device->ldev->md.uuid[UI_CURRENT] |= (u64)1;
676 put_ldev(device);
677 }
678 }
679
680 /* writeout of activity log covered areas of the bitmap
681 * to stable storage done in after state change already */
682
683 if (device->state.conn >= C_WF_REPORT_PARAMS) {
684 /* if this was forced, we should consider sync */
685 if (forced)
686 drbd_send_uuids(peer_device);
687 drbd_send_current_state(peer_device);
688 }
689
690 drbd_md_sync(device);
691 set_disk_ro(device->vdisk, new_role == R_SECONDARY);
692 kobject_uevent(&disk_to_dev(device->vdisk)->kobj, KOBJ_CHANGE);
693 out:
694 mutex_unlock(device->state_mutex);
695 return rv;
696 }
697
698 static const char *from_attrs_err_to_txt(int err)
699 {
700 return err == -ENOMSG ? "required attribute missing" :
701 err == -EOPNOTSUPP ? "unknown mandatory attribute" :
702 err == -EEXIST ? "can not change invariant setting" :
703 "invalid attribute value";
704 }
705
706 int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info)
707 {
708 struct drbd_config_context adm_ctx;
709 struct set_role_parms parms;
710 int err;
711 enum drbd_ret_code retcode;
712
713 retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
714 if (!adm_ctx.reply_skb)
715 return retcode;
716 if (retcode != NO_ERROR)
717 goto out;
718
719 memset(&parms, 0, sizeof(parms));
720 if (info->attrs[DRBD_NLA_SET_ROLE_PARMS]) {
721 err = set_role_parms_from_attrs(&parms, info);
722 if (err) {
723 retcode = ERR_MANDATORY_TAG;
724 drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
725 goto out;
726 }
727 }
728 genl_unlock();
729 mutex_lock(&adm_ctx.resource->adm_mutex);
730
731 if (info->genlhdr->cmd == DRBD_ADM_PRIMARY)
732 retcode = drbd_set_role(adm_ctx.device, R_PRIMARY, parms.assume_uptodate);
733 else
734 retcode = drbd_set_role(adm_ctx.device, R_SECONDARY, 0);
735
736 mutex_unlock(&adm_ctx.resource->adm_mutex);
737 genl_lock();
738 out:
739 drbd_adm_finish(&adm_ctx, info, retcode);
740 return 0;
741 }
742
743 /* Initializes the md.*_offset members, so we are able to find
744 * the on disk meta data.
745 *
746 * We currently have two possible layouts:
747 * external:
748 * |----------- md_size_sect ------------------|
749 * [ 4k superblock ][ activity log ][ Bitmap ]
750 * | al_offset == 8 |
751 * | bm_offset = al_offset + X |
752 * ==> bitmap sectors = md_size_sect - bm_offset
753 *
754 * internal:
755 * |----------- md_size_sect ------------------|
756 * [data.....][ Bitmap ][ activity log ][ 4k superblock ]
757 * | al_offset < 0 |
758 * | bm_offset = al_offset - Y |
759 * ==> bitmap sectors = Y = al_offset - bm_offset
760 *
761 * Activity log size used to be fixed 32kB,
762 * but is about to become configurable.
763 */
764 static void drbd_md_set_sector_offsets(struct drbd_device *device,
765 struct drbd_backing_dev *bdev)
766 {
767 sector_t md_size_sect = 0;
768 unsigned int al_size_sect = bdev->md.al_size_4k * 8;
769
770 bdev->md.md_offset = drbd_md_ss(bdev);
771
772 switch (bdev->md.meta_dev_idx) {
773 default:
774 /* v07 style fixed size indexed meta data */
775 bdev->md.md_size_sect = MD_128MB_SECT;
776 bdev->md.al_offset = MD_4kB_SECT;
777 bdev->md.bm_offset = MD_4kB_SECT + al_size_sect;
778 break;
779 case DRBD_MD_INDEX_FLEX_EXT:
780 /* just occupy the full device; unit: sectors */
781 bdev->md.md_size_sect = drbd_get_capacity(bdev->md_bdev);
782 bdev->md.al_offset = MD_4kB_SECT;
783 bdev->md.bm_offset = MD_4kB_SECT + al_size_sect;
784 break;
785 case DRBD_MD_INDEX_INTERNAL:
786 case DRBD_MD_INDEX_FLEX_INT:
787 /* al size is still fixed */
788 bdev->md.al_offset = -al_size_sect;
789 /* we need (slightly less than) ~ this much bitmap sectors: */
790 md_size_sect = drbd_get_capacity(bdev->backing_bdev);
791 md_size_sect = ALIGN(md_size_sect, BM_SECT_PER_EXT);
792 md_size_sect = BM_SECT_TO_EXT(md_size_sect);
793 md_size_sect = ALIGN(md_size_sect, 8);
794
795 /* plus the "drbd meta data super block",
796 * and the activity log; */
797 md_size_sect += MD_4kB_SECT + al_size_sect;
798
799 bdev->md.md_size_sect = md_size_sect;
800 /* bitmap offset is adjusted by 'super' block size */
801 bdev->md.bm_offset = -md_size_sect + MD_4kB_SECT;
802 break;
803 }
804 }
805
806 /* input size is expected to be in KB */
807 char *ppsize(char *buf, unsigned long long size)
808 {
809 /* Needs 9 bytes at max including trailing NUL:
810 * -1ULL ==> "16384 EB" */
811 static char units[] = { 'K', 'M', 'G', 'T', 'P', 'E' };
812 int base = 0;
813 while (size >= 10000 && base < sizeof(units)-1) {
814 /* shift + round */
815 size = (size >> 10) + !!(size & (1<<9));
816 base++;
817 }
818 sprintf(buf, "%u %cB", (unsigned)size, units[base]);
819
820 return buf;
821 }
822
823 /* there is still a theoretical deadlock when called from receiver
824 * on an D_INCONSISTENT R_PRIMARY:
825 * remote READ does inc_ap_bio, receiver would need to receive answer
826 * packet from remote to dec_ap_bio again.
827 * receiver receive_sizes(), comes here,
828 * waits for ap_bio_cnt == 0. -> deadlock.
829 * but this cannot happen, actually, because:
830 * R_PRIMARY D_INCONSISTENT, and peer's disk is unreachable
831 * (not connected, or bad/no disk on peer):
832 * see drbd_fail_request_early, ap_bio_cnt is zero.
833 * R_PRIMARY D_INCONSISTENT, and C_SYNC_TARGET:
834 * peer may not initiate a resize.
835 */
836 /* Note these are not to be confused with
837 * drbd_adm_suspend_io/drbd_adm_resume_io,
838 * which are (sub) state changes triggered by admin (drbdsetup),
839 * and can be long lived.
840 * This changes an device->flag, is triggered by drbd internals,
841 * and should be short-lived. */
842 void drbd_suspend_io(struct drbd_device *device)
843 {
844 set_bit(SUSPEND_IO, &device->flags);
845 if (drbd_suspended(device))
846 return;
847 wait_event(device->misc_wait, !atomic_read(&device->ap_bio_cnt));
848 }
849
850 void drbd_resume_io(struct drbd_device *device)
851 {
852 clear_bit(SUSPEND_IO, &device->flags);
853 wake_up(&device->misc_wait);
854 }
855
856 /**
857 * drbd_determine_dev_size() - Sets the right device size obeying all constraints
858 * @device: DRBD device.
859 *
860 * Returns 0 on success, negative return values indicate errors.
861 * You should call drbd_md_sync() after calling this function.
862 */
863 enum determine_dev_size
864 drbd_determine_dev_size(struct drbd_device *device, enum dds_flags flags, struct resize_parms *rs) __must_hold(local)
865 {
866 sector_t prev_first_sect, prev_size; /* previous meta location */
867 sector_t la_size_sect, u_size;
868 struct drbd_md *md = &device->ldev->md;
869 u32 prev_al_stripe_size_4k;
870 u32 prev_al_stripes;
871 sector_t size;
872 char ppb[10];
873 void *buffer;
874
875 int md_moved, la_size_changed;
876 enum determine_dev_size rv = DS_UNCHANGED;
877
878 /* race:
879 * application request passes inc_ap_bio,
880 * but then cannot get an AL-reference.
881 * this function later may wait on ap_bio_cnt == 0. -> deadlock.
882 *
883 * to avoid that:
884 * Suspend IO right here.
885 * still lock the act_log to not trigger ASSERTs there.
886 */
887 drbd_suspend_io(device);
888 buffer = drbd_md_get_buffer(device); /* Lock meta-data IO */
889 if (!buffer) {
890 drbd_resume_io(device);
891 return DS_ERROR;
892 }
893
894 /* no wait necessary anymore, actually we could assert that */
895 wait_event(device->al_wait, lc_try_lock(device->act_log));
896
897 prev_first_sect = drbd_md_first_sector(device->ldev);
898 prev_size = device->ldev->md.md_size_sect;
899 la_size_sect = device->ldev->md.la_size_sect;
900
901 if (rs) {
902 /* rs is non NULL if we should change the AL layout only */
903
904 prev_al_stripes = md->al_stripes;
905 prev_al_stripe_size_4k = md->al_stripe_size_4k;
906
907 md->al_stripes = rs->al_stripes;
908 md->al_stripe_size_4k = rs->al_stripe_size / 4;
909 md->al_size_4k = (u64)rs->al_stripes * rs->al_stripe_size / 4;
910 }
911
912 drbd_md_set_sector_offsets(device, device->ldev);
913
914 rcu_read_lock();
915 u_size = rcu_dereference(device->ldev->disk_conf)->disk_size;
916 rcu_read_unlock();
917 size = drbd_new_dev_size(device, device->ldev, u_size, flags & DDSF_FORCED);
918
919 if (size < la_size_sect) {
920 if (rs && u_size == 0) {
921 /* Remove "rs &&" later. This check should always be active, but
922 right now the receiver expects the permissive behavior */
923 drbd_warn(device, "Implicit shrink not allowed. "
924 "Use --size=%llus for explicit shrink.\n",
925 (unsigned long long)size);
926 rv = DS_ERROR_SHRINK;
927 }
928 if (u_size > size)
929 rv = DS_ERROR_SPACE_MD;
930 if (rv != DS_UNCHANGED)
931 goto err_out;
932 }
933
934 if (drbd_get_capacity(device->this_bdev) != size ||
935 drbd_bm_capacity(device) != size) {
936 int err;
937 err = drbd_bm_resize(device, size, !(flags & DDSF_NO_RESYNC));
938 if (unlikely(err)) {
939 /* currently there is only one error: ENOMEM! */
940 size = drbd_bm_capacity(device)>>1;
941 if (size == 0) {
942 drbd_err(device, "OUT OF MEMORY! "
943 "Could not allocate bitmap!\n");
944 } else {
945 drbd_err(device, "BM resizing failed. "
946 "Leaving size unchanged at size = %lu KB\n",
947 (unsigned long)size);
948 }
949 rv = DS_ERROR;
950 }
951 /* racy, see comments above. */
952 drbd_set_my_capacity(device, size);
953 device->ldev->md.la_size_sect = size;
954 drbd_info(device, "size = %s (%llu KB)\n", ppsize(ppb, size>>1),
955 (unsigned long long)size>>1);
956 }
957 if (rv <= DS_ERROR)
958 goto err_out;
959
960 la_size_changed = (la_size_sect != device->ldev->md.la_size_sect);
961
962 md_moved = prev_first_sect != drbd_md_first_sector(device->ldev)
963 || prev_size != device->ldev->md.md_size_sect;
964
965 if (la_size_changed || md_moved || rs) {
966 u32 prev_flags;
967
968 /* We do some synchronous IO below, which may take some time.
969 * Clear the timer, to avoid scary "timer expired!" messages,
970 * "Superblock" is written out at least twice below, anyways. */
971 del_timer(&device->md_sync_timer);
972 drbd_al_shrink(device); /* All extents inactive. */
973
974 prev_flags = md->flags;
975 md->flags &= ~MDF_PRIMARY_IND;
976 drbd_md_write(device, buffer);
977
978 drbd_info(device, "Writing the whole bitmap, %s\n",
979 la_size_changed && md_moved ? "size changed and md moved" :
980 la_size_changed ? "size changed" : "md moved");
981 /* next line implicitly does drbd_suspend_io()+drbd_resume_io() */
982 drbd_bitmap_io(device, md_moved ? &drbd_bm_write_all : &drbd_bm_write,
983 "size changed", BM_LOCKED_MASK);
984 drbd_initialize_al(device, buffer);
985
986 md->flags = prev_flags;
987 drbd_md_write(device, buffer);
988
989 if (rs)
990 drbd_info(device, "Changed AL layout to al-stripes = %d, al-stripe-size-kB = %d\n",
991 md->al_stripes, md->al_stripe_size_4k * 4);
992 }
993
994 if (size > la_size_sect)
995 rv = la_size_sect ? DS_GREW : DS_GREW_FROM_ZERO;
996 if (size < la_size_sect)
997 rv = DS_SHRUNK;
998
999 if (0) {
1000 err_out:
1001 if (rs) {
1002 md->al_stripes = prev_al_stripes;
1003 md->al_stripe_size_4k = prev_al_stripe_size_4k;
1004 md->al_size_4k = (u64)prev_al_stripes * prev_al_stripe_size_4k;
1005
1006 drbd_md_set_sector_offsets(device, device->ldev);
1007 }
1008 }
1009 lc_unlock(device->act_log);
1010 wake_up(&device->al_wait);
1011 drbd_md_put_buffer(device);
1012 drbd_resume_io(device);
1013
1014 return rv;
1015 }
1016
1017 sector_t
1018 drbd_new_dev_size(struct drbd_device *device, struct drbd_backing_dev *bdev,
1019 sector_t u_size, int assume_peer_has_space)
1020 {
1021 sector_t p_size = device->p_size; /* partner's disk size. */
1022 sector_t la_size_sect = bdev->md.la_size_sect; /* last agreed size. */
1023 sector_t m_size; /* my size */
1024 sector_t size = 0;
1025
1026 m_size = drbd_get_max_capacity(bdev);
1027
1028 if (device->state.conn < C_CONNECTED && assume_peer_has_space) {
1029 drbd_warn(device, "Resize while not connected was forced by the user!\n");
1030 p_size = m_size;
1031 }
1032
1033 if (p_size && m_size) {
1034 size = min_t(sector_t, p_size, m_size);
1035 } else {
1036 if (la_size_sect) {
1037 size = la_size_sect;
1038 if (m_size && m_size < size)
1039 size = m_size;
1040 if (p_size && p_size < size)
1041 size = p_size;
1042 } else {
1043 if (m_size)
1044 size = m_size;
1045 if (p_size)
1046 size = p_size;
1047 }
1048 }
1049
1050 if (size == 0)
1051 drbd_err(device, "Both nodes diskless!\n");
1052
1053 if (u_size) {
1054 if (u_size > size)
1055 drbd_err(device, "Requested disk size is too big (%lu > %lu)\n",
1056 (unsigned long)u_size>>1, (unsigned long)size>>1);
1057 else
1058 size = u_size;
1059 }
1060
1061 return size;
1062 }
1063
1064 /**
1065 * drbd_check_al_size() - Ensures that the AL is of the right size
1066 * @device: DRBD device.
1067 *
1068 * Returns -EBUSY if current al lru is still used, -ENOMEM when allocation
1069 * failed, and 0 on success. You should call drbd_md_sync() after you called
1070 * this function.
1071 */
1072 static int drbd_check_al_size(struct drbd_device *device, struct disk_conf *dc)
1073 {
1074 struct lru_cache *n, *t;
1075 struct lc_element *e;
1076 unsigned int in_use;
1077 int i;
1078
1079 if (device->act_log &&
1080 device->act_log->nr_elements == dc->al_extents)
1081 return 0;
1082
1083 in_use = 0;
1084 t = device->act_log;
1085 n = lc_create("act_log", drbd_al_ext_cache, AL_UPDATES_PER_TRANSACTION,
1086 dc->al_extents, sizeof(struct lc_element), 0);
1087
1088 if (n == NULL) {
1089 drbd_err(device, "Cannot allocate act_log lru!\n");
1090 return -ENOMEM;
1091 }
1092 spin_lock_irq(&device->al_lock);
1093 if (t) {
1094 for (i = 0; i < t->nr_elements; i++) {
1095 e = lc_element_by_index(t, i);
1096 if (e->refcnt)
1097 drbd_err(device, "refcnt(%d)==%d\n",
1098 e->lc_number, e->refcnt);
1099 in_use += e->refcnt;
1100 }
1101 }
1102 if (!in_use)
1103 device->act_log = n;
1104 spin_unlock_irq(&device->al_lock);
1105 if (in_use) {
1106 drbd_err(device, "Activity log still in use!\n");
1107 lc_destroy(n);
1108 return -EBUSY;
1109 } else {
1110 if (t)
1111 lc_destroy(t);
1112 }
1113 drbd_md_mark_dirty(device); /* we changed device->act_log->nr_elemens */
1114 return 0;
1115 }
1116
1117 static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backing_dev *bdev,
1118 unsigned int max_bio_size)
1119 {
1120 struct request_queue * const q = device->rq_queue;
1121 unsigned int max_hw_sectors = max_bio_size >> 9;
1122 unsigned int max_segments = 0;
1123 struct request_queue *b = NULL;
1124
1125 if (bdev) {
1126 b = bdev->backing_bdev->bd_disk->queue;
1127
1128 max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9);
1129 rcu_read_lock();
1130 max_segments = rcu_dereference(device->ldev->disk_conf)->max_bio_bvecs;
1131 rcu_read_unlock();
1132
1133 blk_set_stacking_limits(&q->limits);
1134 blk_queue_max_write_same_sectors(q, 0);
1135 }
1136
1137 blk_queue_logical_block_size(q, 512);
1138 blk_queue_max_hw_sectors(q, max_hw_sectors);
1139 /* This is the workaround for "bio would need to, but cannot, be split" */
1140 blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS);
1141 blk_queue_segment_boundary(q, PAGE_CACHE_SIZE-1);
1142
1143 if (b) {
1144 struct drbd_connection *connection = first_peer_device(device)->connection;
1145
1146 if (blk_queue_discard(b) &&
1147 (connection->cstate < C_CONNECTED || connection->agreed_features & FF_TRIM)) {
1148 /* For now, don't allow more than one activity log extent worth of data
1149 * to be discarded in one go. We may need to rework drbd_al_begin_io()
1150 * to allow for even larger discard ranges */
1151 q->limits.max_discard_sectors = DRBD_MAX_DISCARD_SECTORS;
1152
1153 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
1154 /* REALLY? Is stacking secdiscard "legal"? */
1155 if (blk_queue_secdiscard(b))
1156 queue_flag_set_unlocked(QUEUE_FLAG_SECDISCARD, q);
1157 } else {
1158 q->limits.max_discard_sectors = 0;
1159 queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q);
1160 queue_flag_clear_unlocked(QUEUE_FLAG_SECDISCARD, q);
1161 }
1162
1163 blk_queue_stack_limits(q, b);
1164
1165 if (q->backing_dev_info.ra_pages != b->backing_dev_info.ra_pages) {
1166 drbd_info(device, "Adjusting my ra_pages to backing device's (%lu -> %lu)\n",
1167 q->backing_dev_info.ra_pages,
1168 b->backing_dev_info.ra_pages);
1169 q->backing_dev_info.ra_pages = b->backing_dev_info.ra_pages;
1170 }
1171 }
1172 }
1173
1174 void drbd_reconsider_max_bio_size(struct drbd_device *device, struct drbd_backing_dev *bdev)
1175 {
1176 unsigned int now, new, local, peer;
1177
1178 now = queue_max_hw_sectors(device->rq_queue) << 9;
1179 local = device->local_max_bio_size; /* Eventually last known value, from volatile memory */
1180 peer = device->peer_max_bio_size; /* Eventually last known value, from meta data */
1181
1182 if (bdev) {
1183 local = queue_max_hw_sectors(bdev->backing_bdev->bd_disk->queue) << 9;
1184 device->local_max_bio_size = local;
1185 }
1186 local = min(local, DRBD_MAX_BIO_SIZE);
1187
1188 /* We may ignore peer limits if the peer is modern enough.
1189 Because new from 8.3.8 onwards the peer can use multiple
1190 BIOs for a single peer_request */
1191 if (device->state.conn >= C_WF_REPORT_PARAMS) {
1192 if (first_peer_device(device)->connection->agreed_pro_version < 94)
1193 peer = min(device->peer_max_bio_size, DRBD_MAX_SIZE_H80_PACKET);
1194 /* Correct old drbd (up to 8.3.7) if it believes it can do more than 32KiB */
1195 else if (first_peer_device(device)->connection->agreed_pro_version == 94)
1196 peer = DRBD_MAX_SIZE_H80_PACKET;
1197 else if (first_peer_device(device)->connection->agreed_pro_version < 100)
1198 peer = DRBD_MAX_BIO_SIZE_P95; /* drbd 8.3.8 onwards, before 8.4.0 */
1199 else
1200 peer = DRBD_MAX_BIO_SIZE;
1201
1202 /* We may later detach and re-attach on a disconnected Primary.
1203 * Avoid this setting to jump back in that case.
1204 * We want to store what we know the peer DRBD can handle,
1205 * not what the peer IO backend can handle. */
1206 if (peer > device->peer_max_bio_size)
1207 device->peer_max_bio_size = peer;
1208 }
1209 new = min(local, peer);
1210
1211 if (device->state.role == R_PRIMARY && new < now)
1212 drbd_err(device, "ASSERT FAILED new < now; (%u < %u)\n", new, now);
1213
1214 if (new != now)
1215 drbd_info(device, "max BIO size = %u\n", new);
1216
1217 drbd_setup_queue_param(device, bdev, new);
1218 }
1219
1220 /* Starts the worker thread */
1221 static void conn_reconfig_start(struct drbd_connection *connection)
1222 {
1223 drbd_thread_start(&connection->worker);
1224 drbd_flush_workqueue(&connection->sender_work);
1225 }
1226
1227 /* if still unconfigured, stops worker again. */
1228 static void conn_reconfig_done(struct drbd_connection *connection)
1229 {
1230 bool stop_threads;
1231 spin_lock_irq(&connection->resource->req_lock);
1232 stop_threads = conn_all_vols_unconf(connection) &&
1233 connection->cstate == C_STANDALONE;
1234 spin_unlock_irq(&connection->resource->req_lock);
1235 if (stop_threads) {
1236 /* asender is implicitly stopped by receiver
1237 * in conn_disconnect() */
1238 drbd_thread_stop(&connection->receiver);
1239 drbd_thread_stop(&connection->worker);
1240 }
1241 }
1242
1243 /* Make sure IO is suspended before calling this function(). */
1244 static void drbd_suspend_al(struct drbd_device *device)
1245 {
1246 int s = 0;
1247
1248 if (!lc_try_lock(device->act_log)) {
1249 drbd_warn(device, "Failed to lock al in drbd_suspend_al()\n");
1250 return;
1251 }
1252
1253 drbd_al_shrink(device);
1254 spin_lock_irq(&device->resource->req_lock);
1255 if (device->state.conn < C_CONNECTED)
1256 s = !test_and_set_bit(AL_SUSPENDED, &device->flags);
1257 spin_unlock_irq(&device->resource->req_lock);
1258 lc_unlock(device->act_log);
1259
1260 if (s)
1261 drbd_info(device, "Suspended AL updates\n");
1262 }
1263
1264
1265 static bool should_set_defaults(struct genl_info *info)
1266 {
1267 unsigned flags = ((struct drbd_genlmsghdr*)info->userhdr)->flags;
1268 return 0 != (flags & DRBD_GENL_F_SET_DEFAULTS);
1269 }
1270
1271 static unsigned int drbd_al_extents_max(struct drbd_backing_dev *bdev)
1272 {
1273 /* This is limited by 16 bit "slot" numbers,
1274 * and by available on-disk context storage.
1275 *
1276 * Also (u16)~0 is special (denotes a "free" extent).
1277 *
1278 * One transaction occupies one 4kB on-disk block,
1279 * we have n such blocks in the on disk ring buffer,
1280 * the "current" transaction may fail (n-1),
1281 * and there is 919 slot numbers context information per transaction.
1282 *
1283 * 72 transaction blocks amounts to more than 2**16 context slots,
1284 * so cap there first.
1285 */
1286 const unsigned int max_al_nr = DRBD_AL_EXTENTS_MAX;
1287 const unsigned int sufficient_on_disk =
1288 (max_al_nr + AL_CONTEXT_PER_TRANSACTION -1)
1289 /AL_CONTEXT_PER_TRANSACTION;
1290
1291 unsigned int al_size_4k = bdev->md.al_size_4k;
1292
1293 if (al_size_4k > sufficient_on_disk)
1294 return max_al_nr;
1295
1296 return (al_size_4k - 1) * AL_CONTEXT_PER_TRANSACTION;
1297 }
1298
1299 static bool write_ordering_changed(struct disk_conf *a, struct disk_conf *b)
1300 {
1301 return a->disk_barrier != b->disk_barrier ||
1302 a->disk_flushes != b->disk_flushes ||
1303 a->disk_drain != b->disk_drain;
1304 }
1305
1306 int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
1307 {
1308 struct drbd_config_context adm_ctx;
1309 enum drbd_ret_code retcode;
1310 struct drbd_device *device;
1311 struct disk_conf *new_disk_conf, *old_disk_conf;
1312 struct fifo_buffer *old_plan = NULL, *new_plan = NULL;
1313 int err, fifo_size;
1314
1315 retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
1316 if (!adm_ctx.reply_skb)
1317 return retcode;
1318 if (retcode != NO_ERROR)
1319 goto finish;
1320
1321 device = adm_ctx.device;
1322 mutex_lock(&adm_ctx.resource->adm_mutex);
1323
1324 /* we also need a disk
1325 * to change the options on */
1326 if (!get_ldev(device)) {
1327 retcode = ERR_NO_DISK;
1328 goto out;
1329 }
1330
1331 new_disk_conf = kmalloc(sizeof(struct disk_conf), GFP_KERNEL);
1332 if (!new_disk_conf) {
1333 retcode = ERR_NOMEM;
1334 goto fail;
1335 }
1336
1337 mutex_lock(&device->resource->conf_update);
1338 old_disk_conf = device->ldev->disk_conf;
1339 *new_disk_conf = *old_disk_conf;
1340 if (should_set_defaults(info))
1341 set_disk_conf_defaults(new_disk_conf);
1342
1343 err = disk_conf_from_attrs_for_change(new_disk_conf, info);
1344 if (err && err != -ENOMSG) {
1345 retcode = ERR_MANDATORY_TAG;
1346 drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
1347 goto fail_unlock;
1348 }
1349
1350 if (!expect(new_disk_conf->resync_rate >= 1))
1351 new_disk_conf->resync_rate = 1;
1352
1353 if (new_disk_conf->al_extents < DRBD_AL_EXTENTS_MIN)
1354 new_disk_conf->al_extents = DRBD_AL_EXTENTS_MIN;
1355 if (new_disk_conf->al_extents > drbd_al_extents_max(device->ldev))
1356 new_disk_conf->al_extents = drbd_al_extents_max(device->ldev);
1357
1358 if (new_disk_conf->c_plan_ahead > DRBD_C_PLAN_AHEAD_MAX)
1359 new_disk_conf->c_plan_ahead = DRBD_C_PLAN_AHEAD_MAX;
1360
1361 fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ;
1362 if (fifo_size != device->rs_plan_s->size) {
1363 new_plan = fifo_alloc(fifo_size);
1364 if (!new_plan) {
1365 drbd_err(device, "kmalloc of fifo_buffer failed");
1366 retcode = ERR_NOMEM;
1367 goto fail_unlock;
1368 }
1369 }
1370
1371 drbd_suspend_io(device);
1372 wait_event(device->al_wait, lc_try_lock(device->act_log));
1373 drbd_al_shrink(device);
1374 err = drbd_check_al_size(device, new_disk_conf);
1375 lc_unlock(device->act_log);
1376 wake_up(&device->al_wait);
1377 drbd_resume_io(device);
1378
1379 if (err) {
1380 retcode = ERR_NOMEM;
1381 goto fail_unlock;
1382 }
1383
1384 write_lock_irq(&global_state_lock);
1385 retcode = drbd_resync_after_valid(device, new_disk_conf->resync_after);
1386 if (retcode == NO_ERROR) {
1387 rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
1388 drbd_resync_after_changed(device);
1389 }
1390 write_unlock_irq(&global_state_lock);
1391
1392 if (retcode != NO_ERROR)
1393 goto fail_unlock;
1394
1395 if (new_plan) {
1396 old_plan = device->rs_plan_s;
1397 rcu_assign_pointer(device->rs_plan_s, new_plan);
1398 }
1399
1400 mutex_unlock(&device->resource->conf_update);
1401
1402 if (new_disk_conf->al_updates)
1403 device->ldev->md.flags &= ~MDF_AL_DISABLED;
1404 else
1405 device->ldev->md.flags |= MDF_AL_DISABLED;
1406
1407 if (new_disk_conf->md_flushes)
1408 clear_bit(MD_NO_FUA, &device->flags);
1409 else
1410 set_bit(MD_NO_FUA, &device->flags);
1411
1412 if (write_ordering_changed(old_disk_conf, new_disk_conf))
1413 drbd_bump_write_ordering(device->resource, NULL, WO_bdev_flush);
1414
1415 drbd_md_sync(device);
1416
1417 if (device->state.conn >= C_CONNECTED) {
1418 struct drbd_peer_device *peer_device;
1419
1420 for_each_peer_device(peer_device, device)
1421 drbd_send_sync_param(peer_device);
1422 }
1423
1424 synchronize_rcu();
1425 kfree(old_disk_conf);
1426 kfree(old_plan);
1427 mod_timer(&device->request_timer, jiffies + HZ);
1428 goto success;
1429
1430 fail_unlock:
1431 mutex_unlock(&device->resource->conf_update);
1432 fail:
1433 kfree(new_disk_conf);
1434 kfree(new_plan);
1435 success:
1436 put_ldev(device);
1437 out:
1438 mutex_unlock(&adm_ctx.resource->adm_mutex);
1439 finish:
1440 drbd_adm_finish(&adm_ctx, info, retcode);
1441 return 0;
1442 }
1443
1444 int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
1445 {
1446 struct drbd_config_context adm_ctx;
1447 struct drbd_device *device;
1448 struct drbd_peer_device *peer_device;
1449 struct drbd_connection *connection;
1450 int err;
1451 enum drbd_ret_code retcode;
1452 enum determine_dev_size dd;
1453 sector_t max_possible_sectors;
1454 sector_t min_md_device_sectors;
1455 struct drbd_backing_dev *nbc = NULL; /* new_backing_conf */
1456 struct disk_conf *new_disk_conf = NULL;
1457 struct block_device *bdev;
1458 struct lru_cache *resync_lru = NULL;
1459 struct fifo_buffer *new_plan = NULL;
1460 union drbd_state ns, os;
1461 enum drbd_state_rv rv;
1462 struct net_conf *nc;
1463
1464 retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
1465 if (!adm_ctx.reply_skb)
1466 return retcode;
1467 if (retcode != NO_ERROR)
1468 goto finish;
1469
1470 device = adm_ctx.device;
1471 mutex_lock(&adm_ctx.resource->adm_mutex);
1472 peer_device = first_peer_device(device);
1473 connection = peer_device ? peer_device->connection : NULL;
1474 conn_reconfig_start(connection);
1475
1476 /* if you want to reconfigure, please tear down first */
1477 if (device->state.disk > D_DISKLESS) {
1478 retcode = ERR_DISK_CONFIGURED;
1479 goto fail;
1480 }
1481 /* It may just now have detached because of IO error. Make sure
1482 * drbd_ldev_destroy is done already, we may end up here very fast,
1483 * e.g. if someone calls attach from the on-io-error handler,
1484 * to realize a "hot spare" feature (not that I'd recommend that) */
1485 wait_event(device->misc_wait, !atomic_read(&device->local_cnt));
1486
1487 /* make sure there is no leftover from previous force-detach attempts */
1488 clear_bit(FORCE_DETACH, &device->flags);
1489 clear_bit(WAS_IO_ERROR, &device->flags);
1490 clear_bit(WAS_READ_ERROR, &device->flags);
1491
1492 /* and no leftover from previously aborted resync or verify, either */
1493 device->rs_total = 0;
1494 device->rs_failed = 0;
1495 atomic_set(&device->rs_pending_cnt, 0);
1496
1497 /* allocation not in the IO path, drbdsetup context */
1498 nbc = kzalloc(sizeof(struct drbd_backing_dev), GFP_KERNEL);
1499 if (!nbc) {
1500 retcode = ERR_NOMEM;
1501 goto fail;
1502 }
1503 spin_lock_init(&nbc->md.uuid_lock);
1504
1505 new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
1506 if (!new_disk_conf) {
1507 retcode = ERR_NOMEM;
1508 goto fail;
1509 }
1510 nbc->disk_conf = new_disk_conf;
1511
1512 set_disk_conf_defaults(new_disk_conf);
1513 err = disk_conf_from_attrs(new_disk_conf, info);
1514 if (err) {
1515 retcode = ERR_MANDATORY_TAG;
1516 drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
1517 goto fail;
1518 }
1519
1520 if (new_disk_conf->c_plan_ahead > DRBD_C_PLAN_AHEAD_MAX)
1521 new_disk_conf->c_plan_ahead = DRBD_C_PLAN_AHEAD_MAX;
1522
1523 new_plan = fifo_alloc((new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ);
1524 if (!new_plan) {
1525 retcode = ERR_NOMEM;
1526 goto fail;
1527 }
1528
1529 if (new_disk_conf->meta_dev_idx < DRBD_MD_INDEX_FLEX_INT) {
1530 retcode = ERR_MD_IDX_INVALID;
1531 goto fail;
1532 }
1533
1534 write_lock_irq(&global_state_lock);
1535 retcode = drbd_resync_after_valid(device, new_disk_conf->resync_after);
1536 write_unlock_irq(&global_state_lock);
1537 if (retcode != NO_ERROR)
1538 goto fail;
1539
1540 rcu_read_lock();
1541 nc = rcu_dereference(connection->net_conf);
1542 if (nc) {
1543 if (new_disk_conf->fencing == FP_STONITH && nc->wire_protocol == DRBD_PROT_A) {
1544 rcu_read_unlock();
1545 retcode = ERR_STONITH_AND_PROT_A;
1546 goto fail;
1547 }
1548 }
1549 rcu_read_unlock();
1550
1551 bdev = blkdev_get_by_path(new_disk_conf->backing_dev,
1552 FMODE_READ | FMODE_WRITE | FMODE_EXCL, device);
1553 if (IS_ERR(bdev)) {
1554 drbd_err(device, "open(\"%s\") failed with %ld\n", new_disk_conf->backing_dev,
1555 PTR_ERR(bdev));
1556 retcode = ERR_OPEN_DISK;
1557 goto fail;
1558 }
1559 nbc->backing_bdev = bdev;
1560
1561 /*
1562 * meta_dev_idx >= 0: external fixed size, possibly multiple
1563 * drbd sharing one meta device. TODO in that case, paranoia
1564 * check that [md_bdev, meta_dev_idx] is not yet used by some
1565 * other drbd minor! (if you use drbd.conf + drbdadm, that
1566 * should check it for you already; but if you don't, or
1567 * someone fooled it, we need to double check here)
1568 */
1569 bdev = blkdev_get_by_path(new_disk_conf->meta_dev,
1570 FMODE_READ | FMODE_WRITE | FMODE_EXCL,
1571 (new_disk_conf->meta_dev_idx < 0) ?
1572 (void *)device : (void *)drbd_m_holder);
1573 if (IS_ERR(bdev)) {
1574 drbd_err(device, "open(\"%s\") failed with %ld\n", new_disk_conf->meta_dev,
1575 PTR_ERR(bdev));
1576 retcode = ERR_OPEN_MD_DISK;
1577 goto fail;
1578 }
1579 nbc->md_bdev = bdev;
1580
1581 if ((nbc->backing_bdev == nbc->md_bdev) !=
1582 (new_disk_conf->meta_dev_idx == DRBD_MD_INDEX_INTERNAL ||
1583 new_disk_conf->meta_dev_idx == DRBD_MD_INDEX_FLEX_INT)) {
1584 retcode = ERR_MD_IDX_INVALID;
1585 goto fail;
1586 }
1587
1588 resync_lru = lc_create("resync", drbd_bm_ext_cache,
1589 1, 61, sizeof(struct bm_extent),
1590 offsetof(struct bm_extent, lce));
1591 if (!resync_lru) {
1592 retcode = ERR_NOMEM;
1593 goto fail;
1594 }
1595
1596 /* Read our meta data super block early.
1597 * This also sets other on-disk offsets. */
1598 retcode = drbd_md_read(device, nbc);
1599 if (retcode != NO_ERROR)
1600 goto fail;
1601
1602 if (new_disk_conf->al_extents < DRBD_AL_EXTENTS_MIN)
1603 new_disk_conf->al_extents = DRBD_AL_EXTENTS_MIN;
1604 if (new_disk_conf->al_extents > drbd_al_extents_max(nbc))
1605 new_disk_conf->al_extents = drbd_al_extents_max(nbc);
1606
1607 if (drbd_get_max_capacity(nbc) < new_disk_conf->disk_size) {
1608 drbd_err(device, "max capacity %llu smaller than disk size %llu\n",
1609 (unsigned long long) drbd_get_max_capacity(nbc),
1610 (unsigned long long) new_disk_conf->disk_size);
1611 retcode = ERR_DISK_TOO_SMALL;
1612 goto fail;
1613 }
1614
1615 if (new_disk_conf->meta_dev_idx < 0) {
1616 max_possible_sectors = DRBD_MAX_SECTORS_FLEX;
1617 /* at least one MB, otherwise it does not make sense */
1618 min_md_device_sectors = (2<<10);
1619 } else {
1620 max_possible_sectors = DRBD_MAX_SECTORS;
1621 min_md_device_sectors = MD_128MB_SECT * (new_disk_conf->meta_dev_idx + 1);
1622 }
1623
1624 if (drbd_get_capacity(nbc->md_bdev) < min_md_device_sectors) {
1625 retcode = ERR_MD_DISK_TOO_SMALL;
1626 drbd_warn(device, "refusing attach: md-device too small, "
1627 "at least %llu sectors needed for this meta-disk type\n",
1628 (unsigned long long) min_md_device_sectors);
1629 goto fail;
1630 }
1631
1632 /* Make sure the new disk is big enough
1633 * (we may currently be R_PRIMARY with no local disk...) */
1634 if (drbd_get_max_capacity(nbc) <
1635 drbd_get_capacity(device->this_bdev)) {
1636 retcode = ERR_DISK_TOO_SMALL;
1637 goto fail;
1638 }
1639
1640 nbc->known_size = drbd_get_capacity(nbc->backing_bdev);
1641
1642 if (nbc->known_size > max_possible_sectors) {
1643 drbd_warn(device, "==> truncating very big lower level device "
1644 "to currently maximum possible %llu sectors <==\n",
1645 (unsigned long long) max_possible_sectors);
1646 if (new_disk_conf->meta_dev_idx >= 0)
1647 drbd_warn(device, "==>> using internal or flexible "
1648 "meta data may help <<==\n");
1649 }
1650
1651 drbd_suspend_io(device);
1652 /* also wait for the last barrier ack. */
1653 /* FIXME see also https://daiquiri.linbit/cgi-bin/bugzilla/show_bug.cgi?id=171
1654 * We need a way to either ignore barrier acks for barriers sent before a device
1655 * was attached, or a way to wait for all pending barrier acks to come in.
1656 * As barriers are counted per resource,
1657 * we'd need to suspend io on all devices of a resource.
1658 */
1659 wait_event(device->misc_wait, !atomic_read(&device->ap_pending_cnt) || drbd_suspended(device));
1660 /* and for any other previously queued work */
1661 drbd_flush_workqueue(&connection->sender_work);
1662
1663 rv = _drbd_request_state(device, NS(disk, D_ATTACHING), CS_VERBOSE);
1664 retcode = rv; /* FIXME: Type mismatch. */
1665 drbd_resume_io(device);
1666 if (rv < SS_SUCCESS)
1667 goto fail;
1668
1669 if (!get_ldev_if_state(device, D_ATTACHING))
1670 goto force_diskless;
1671
1672 if (!device->bitmap) {
1673 if (drbd_bm_init(device)) {
1674 retcode = ERR_NOMEM;
1675 goto force_diskless_dec;
1676 }
1677 }
1678
1679 if (device->state.conn < C_CONNECTED &&
1680 device->state.role == R_PRIMARY && device->ed_uuid &&
1681 (device->ed_uuid & ~((u64)1)) != (nbc->md.uuid[UI_CURRENT] & ~((u64)1))) {
1682 drbd_err(device, "Can only attach to data with current UUID=%016llX\n",
1683 (unsigned long long)device->ed_uuid);
1684 retcode = ERR_DATA_NOT_CURRENT;
1685 goto force_diskless_dec;
1686 }
1687
1688 /* Since we are diskless, fix the activity log first... */
1689 if (drbd_check_al_size(device, new_disk_conf)) {
1690 retcode = ERR_NOMEM;
1691 goto force_diskless_dec;
1692 }
1693
1694 /* Prevent shrinking of consistent devices ! */
1695 if (drbd_md_test_flag(nbc, MDF_CONSISTENT) &&
1696 drbd_new_dev_size(device, nbc, nbc->disk_conf->disk_size, 0) < nbc->md.la_size_sect) {
1697 drbd_warn(device, "refusing to truncate a consistent device\n");
1698 retcode = ERR_DISK_TOO_SMALL;
1699 goto force_diskless_dec;
1700 }
1701
1702 /* Reset the "barriers don't work" bits here, then force meta data to
1703 * be written, to ensure we determine if barriers are supported. */
1704 if (new_disk_conf->md_flushes)
1705 clear_bit(MD_NO_FUA, &device->flags);
1706 else
1707 set_bit(MD_NO_FUA, &device->flags);
1708
1709 /* Point of no return reached.
1710 * Devices and memory are no longer released by error cleanup below.
1711 * now device takes over responsibility, and the state engine should
1712 * clean it up somewhere. */
1713 D_ASSERT(device, device->ldev == NULL);
1714 device->ldev = nbc;
1715 device->resync = resync_lru;
1716 device->rs_plan_s = new_plan;
1717 nbc = NULL;
1718 resync_lru = NULL;
1719 new_disk_conf = NULL;
1720 new_plan = NULL;
1721
1722 drbd_bump_write_ordering(device->resource, device->ldev, WO_bdev_flush);
1723
1724 if (drbd_md_test_flag(device->ldev, MDF_CRASHED_PRIMARY))
1725 set_bit(CRASHED_PRIMARY, &device->flags);
1726 else
1727 clear_bit(CRASHED_PRIMARY, &device->flags);
1728
1729 if (drbd_md_test_flag(device->ldev, MDF_PRIMARY_IND) &&
1730 !(device->state.role == R_PRIMARY && device->resource->susp_nod))
1731 set_bit(CRASHED_PRIMARY, &device->flags);
1732
1733 device->send_cnt = 0;
1734 device->recv_cnt = 0;
1735 device->read_cnt = 0;
1736 device->writ_cnt = 0;
1737
1738 drbd_reconsider_max_bio_size(device, device->ldev);
1739
1740 /* If I am currently not R_PRIMARY,
1741 * but meta data primary indicator is set,
1742 * I just now recover from a hard crash,
1743 * and have been R_PRIMARY before that crash.
1744 *
1745 * Now, if I had no connection before that crash
1746 * (have been degraded R_PRIMARY), chances are that
1747 * I won't find my peer now either.
1748 *
1749 * In that case, and _only_ in that case,
1750 * we use the degr-wfc-timeout instead of the default,
1751 * so we can automatically recover from a crash of a
1752 * degraded but active "cluster" after a certain timeout.
1753 */
1754 clear_bit(USE_DEGR_WFC_T, &device->flags);
1755 if (device->state.role != R_PRIMARY &&
1756 drbd_md_test_flag(device->ldev, MDF_PRIMARY_IND) &&
1757 !drbd_md_test_flag(device->ldev, MDF_CONNECTED_IND))
1758 set_bit(USE_DEGR_WFC_T, &device->flags);
1759
1760 dd = drbd_determine_dev_size(device, 0, NULL);
1761 if (dd <= DS_ERROR) {
1762 retcode = ERR_NOMEM_BITMAP;
1763 goto force_diskless_dec;
1764 } else if (dd == DS_GREW)
1765 set_bit(RESYNC_AFTER_NEG, &device->flags);
1766
1767 if (drbd_md_test_flag(device->ldev, MDF_FULL_SYNC) ||
1768 (test_bit(CRASHED_PRIMARY, &device->flags) &&
1769 drbd_md_test_flag(device->ldev, MDF_AL_DISABLED))) {
1770 drbd_info(device, "Assuming that all blocks are out of sync "
1771 "(aka FullSync)\n");
1772 if (drbd_bitmap_io(device, &drbd_bmio_set_n_write,
1773 "set_n_write from attaching", BM_LOCKED_MASK)) {
1774 retcode = ERR_IO_MD_DISK;
1775 goto force_diskless_dec;
1776 }
1777 } else {
1778 if (drbd_bitmap_io(device, &drbd_bm_read,
1779 "read from attaching", BM_LOCKED_MASK)) {
1780 retcode = ERR_IO_MD_DISK;
1781 goto force_diskless_dec;
1782 }
1783 }
1784
1785 if (_drbd_bm_total_weight(device) == drbd_bm_bits(device))
1786 drbd_suspend_al(device); /* IO is still suspended here... */
1787
1788 spin_lock_irq(&device->resource->req_lock);
1789 os = drbd_read_state(device);
1790 ns = os;
1791 /* If MDF_CONSISTENT is not set go into inconsistent state,
1792 otherwise investigate MDF_WasUpToDate...
1793 If MDF_WAS_UP_TO_DATE is not set go into D_OUTDATED disk state,
1794 otherwise into D_CONSISTENT state.
1795 */
1796 if (drbd_md_test_flag(device->ldev, MDF_CONSISTENT)) {
1797 if (drbd_md_test_flag(device->ldev, MDF_WAS_UP_TO_DATE))
1798 ns.disk = D_CONSISTENT;
1799 else
1800 ns.disk = D_OUTDATED;
1801 } else {
1802 ns.disk = D_INCONSISTENT;
1803 }
1804
1805 if (drbd_md_test_flag(device->ldev, MDF_PEER_OUT_DATED))
1806 ns.pdsk = D_OUTDATED;
1807
1808 rcu_read_lock();
1809 if (ns.disk == D_CONSISTENT &&
1810 (ns.pdsk == D_OUTDATED || rcu_dereference(device->ldev->disk_conf)->fencing == FP_DONT_CARE))
1811 ns.disk = D_UP_TO_DATE;
1812
1813 /* All tests on MDF_PRIMARY_IND, MDF_CONNECTED_IND,
1814 MDF_CONSISTENT and MDF_WAS_UP_TO_DATE must happen before
1815 this point, because drbd_request_state() modifies these
1816 flags. */
1817
1818 if (rcu_dereference(device->ldev->disk_conf)->al_updates)
1819 device->ldev->md.flags &= ~MDF_AL_DISABLED;
1820 else
1821 device->ldev->md.flags |= MDF_AL_DISABLED;
1822
1823 rcu_read_unlock();
1824
1825 /* In case we are C_CONNECTED postpone any decision on the new disk
1826 state after the negotiation phase. */
1827 if (device->state.conn == C_CONNECTED) {
1828 device->new_state_tmp.i = ns.i;
1829 ns.i = os.i;
1830 ns.disk = D_NEGOTIATING;
1831
1832 /* We expect to receive up-to-date UUIDs soon.
1833 To avoid a race in receive_state, free p_uuid while
1834 holding req_lock. I.e. atomic with the state change */
1835 kfree(device->p_uuid);
1836 device->p_uuid = NULL;
1837 }
1838
1839 rv = _drbd_set_state(device, ns, CS_VERBOSE, NULL);
1840 spin_unlock_irq(&device->resource->req_lock);
1841
1842 if (rv < SS_SUCCESS)
1843 goto force_diskless_dec;
1844
1845 mod_timer(&device->request_timer, jiffies + HZ);
1846
1847 if (device->state.role == R_PRIMARY)
1848 device->ldev->md.uuid[UI_CURRENT] |= (u64)1;
1849 else
1850 device->ldev->md.uuid[UI_CURRENT] &= ~(u64)1;
1851
1852 drbd_md_mark_dirty(device);
1853 drbd_md_sync(device);
1854
1855 kobject_uevent(&disk_to_dev(device->vdisk)->kobj, KOBJ_CHANGE);
1856 put_ldev(device);
1857 conn_reconfig_done(connection);
1858 mutex_unlock(&adm_ctx.resource->adm_mutex);
1859 drbd_adm_finish(&adm_ctx, info, retcode);
1860 return 0;
1861
1862 force_diskless_dec:
1863 put_ldev(device);
1864 force_diskless:
1865 drbd_force_state(device, NS(disk, D_DISKLESS));
1866 drbd_md_sync(device);
1867 fail:
1868 conn_reconfig_done(connection);
1869 if (nbc) {
1870 if (nbc->backing_bdev)
1871 blkdev_put(nbc->backing_bdev,
1872 FMODE_READ | FMODE_WRITE | FMODE_EXCL);
1873 if (nbc->md_bdev)
1874 blkdev_put(nbc->md_bdev,
1875 FMODE_READ | FMODE_WRITE | FMODE_EXCL);
1876 kfree(nbc);
1877 }
1878 kfree(new_disk_conf);
1879 lc_destroy(resync_lru);
1880 kfree(new_plan);
1881 mutex_unlock(&adm_ctx.resource->adm_mutex);
1882 finish:
1883 drbd_adm_finish(&adm_ctx, info, retcode);
1884 return 0;
1885 }
1886
1887 static int adm_detach(struct drbd_device *device, int force)
1888 {
1889 enum drbd_state_rv retcode;
1890 int ret;
1891
1892 if (force) {
1893 set_bit(FORCE_DETACH, &device->flags);
1894 drbd_force_state(device, NS(disk, D_FAILED));
1895 retcode = SS_SUCCESS;
1896 goto out;
1897 }
1898
1899 drbd_suspend_io(device); /* so no-one is stuck in drbd_al_begin_io */
1900 drbd_md_get_buffer(device); /* make sure there is no in-flight meta-data IO */
1901 retcode = drbd_request_state(device, NS(disk, D_FAILED));
1902 drbd_md_put_buffer(device);
1903 /* D_FAILED will transition to DISKLESS. */
1904 ret = wait_event_interruptible(device->misc_wait,
1905 device->state.disk != D_FAILED);
1906 drbd_resume_io(device);
1907 if ((int)retcode == (int)SS_IS_DISKLESS)
1908 retcode = SS_NOTHING_TO_DO;
1909 if (ret)
1910 retcode = ERR_INTR;
1911 out:
1912 return retcode;
1913 }
1914
1915 /* Detaching the disk is a process in multiple stages. First we need to lock
1916 * out application IO, in-flight IO, IO stuck in drbd_al_begin_io.
1917 * Then we transition to D_DISKLESS, and wait for put_ldev() to return all
1918 * internal references as well.
1919 * Only then we have finally detached. */
1920 int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info)
1921 {
1922 struct drbd_config_context adm_ctx;
1923 enum drbd_ret_code retcode;
1924 struct detach_parms parms = { };
1925 int err;
1926
1927 retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
1928 if (!adm_ctx.reply_skb)
1929 return retcode;
1930 if (retcode != NO_ERROR)
1931 goto out;
1932
1933 if (info->attrs[DRBD_NLA_DETACH_PARMS]) {
1934 err = detach_parms_from_attrs(&parms, info);
1935 if (err) {
1936 retcode = ERR_MANDATORY_TAG;
1937 drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
1938 goto out;
1939 }
1940 }
1941
1942 mutex_lock(&adm_ctx.resource->adm_mutex);
1943 retcode = adm_detach(adm_ctx.device, parms.force_detach);
1944 mutex_unlock(&adm_ctx.resource->adm_mutex);
1945 out:
1946 drbd_adm_finish(&adm_ctx, info, retcode);
1947 return 0;
1948 }
1949
1950 static bool conn_resync_running(struct drbd_connection *connection)
1951 {
1952 struct drbd_peer_device *peer_device;
1953 bool rv = false;
1954 int vnr;
1955
1956 rcu_read_lock();
1957 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1958 struct drbd_device *device = peer_device->device;
1959 if (device->state.conn == C_SYNC_SOURCE ||
1960 device->state.conn == C_SYNC_TARGET ||
1961 device->state.conn == C_PAUSED_SYNC_S ||
1962 device->state.conn == C_PAUSED_SYNC_T) {
1963 rv = true;
1964 break;
1965 }
1966 }
1967 rcu_read_unlock();
1968
1969 return rv;
1970 }
1971
1972 static bool conn_ov_running(struct drbd_connection *connection)
1973 {
1974 struct drbd_peer_device *peer_device;
1975 bool rv = false;
1976 int vnr;
1977
1978 rcu_read_lock();
1979 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1980 struct drbd_device *device = peer_device->device;
1981 if (device->state.conn == C_VERIFY_S ||
1982 device->state.conn == C_VERIFY_T) {
1983 rv = true;
1984 break;
1985 }
1986 }
1987 rcu_read_unlock();
1988
1989 return rv;
1990 }
1991
1992 static enum drbd_ret_code
1993 _check_net_options(struct drbd_connection *connection, struct net_conf *old_net_conf, struct net_conf *new_net_conf)
1994 {
1995 struct drbd_peer_device *peer_device;
1996 int i;
1997
1998 if (old_net_conf && connection->cstate == C_WF_REPORT_PARAMS && connection->agreed_pro_version < 100) {
1999 if (new_net_conf->wire_protocol != old_net_conf->wire_protocol)
2000 return ERR_NEED_APV_100;
2001
2002 if (new_net_conf->two_primaries != old_net_conf->two_primaries)
2003 return ERR_NEED_APV_100;
2004
2005 if (strcmp(new_net_conf->integrity_alg, old_net_conf->integrity_alg))
2006 return ERR_NEED_APV_100;
2007 }
2008
2009 if (!new_net_conf->two_primaries &&
2010 conn_highest_role(connection) == R_PRIMARY &&
2011 conn_highest_peer(connection) == R_PRIMARY)
2012 return ERR_NEED_ALLOW_TWO_PRI;
2013
2014 if (new_net_conf->two_primaries &&
2015 (new_net_conf->wire_protocol != DRBD_PROT_C))
2016 return ERR_NOT_PROTO_C;
2017
2018 idr_for_each_entry(&connection->peer_devices, peer_device, i) {
2019 struct drbd_device *device = peer_device->device;
2020 if (get_ldev(device)) {
2021 enum drbd_fencing_p fp = rcu_dereference(device->ldev->disk_conf)->fencing;
2022 put_ldev(device);
2023 if (new_net_conf->wire_protocol == DRBD_PROT_A && fp == FP_STONITH)
2024 return ERR_STONITH_AND_PROT_A;
2025 }
2026 if (device->state.role == R_PRIMARY && new_net_conf->discard_my_data)
2027 return ERR_DISCARD_IMPOSSIBLE;
2028 }
2029
2030 if (new_net_conf->on_congestion != OC_BLOCK && new_net_conf->wire_protocol != DRBD_PROT_A)
2031 return ERR_CONG_NOT_PROTO_A;
2032
2033 return NO_ERROR;
2034 }
2035
2036 static enum drbd_ret_code
2037 check_net_options(struct drbd_connection *connection, struct net_conf *new_net_conf)
2038 {
2039 static enum drbd_ret_code rv;
2040 struct drbd_peer_device *peer_device;
2041 int i;
2042
2043 rcu_read_lock();
2044 rv = _check_net_options(connection, rcu_dereference(connection->net_conf), new_net_conf);
2045 rcu_read_unlock();
2046
2047 /* connection->volumes protected by genl_lock() here */
2048 idr_for_each_entry(&connection->peer_devices, peer_device, i) {
2049 struct drbd_device *device = peer_device->device;
2050 if (!device->bitmap) {
2051 if (drbd_bm_init(device))
2052 return ERR_NOMEM;
2053 }
2054 }
2055
2056 return rv;
2057 }
2058
2059 struct crypto {
2060 struct crypto_hash *verify_tfm;
2061 struct crypto_hash *csums_tfm;
2062 struct crypto_hash *cram_hmac_tfm;
2063 struct crypto_hash *integrity_tfm;
2064 };
2065
2066 static int
2067 alloc_hash(struct crypto_hash **tfm, char *tfm_name, int err_alg)
2068 {
2069 if (!tfm_name[0])
2070 return NO_ERROR;
2071
2072 *tfm = crypto_alloc_hash(tfm_name, 0, CRYPTO_ALG_ASYNC);
2073 if (IS_ERR(*tfm)) {
2074 *tfm = NULL;
2075 return err_alg;
2076 }
2077
2078 return NO_ERROR;
2079 }
2080
2081 static enum drbd_ret_code
2082 alloc_crypto(struct crypto *crypto, struct net_conf *new_net_conf)
2083 {
2084 char hmac_name[CRYPTO_MAX_ALG_NAME];
2085 enum drbd_ret_code rv;
2086
2087 rv = alloc_hash(&crypto->csums_tfm, new_net_conf->csums_alg,
2088 ERR_CSUMS_ALG);
2089 if (rv != NO_ERROR)
2090 return rv;
2091 rv = alloc_hash(&crypto->verify_tfm, new_net_conf->verify_alg,
2092 ERR_VERIFY_ALG);
2093 if (rv != NO_ERROR)
2094 return rv;
2095 rv = alloc_hash(&crypto->integrity_tfm, new_net_conf->integrity_alg,
2096 ERR_INTEGRITY_ALG);
2097 if (rv != NO_ERROR)
2098 return rv;
2099 if (new_net_conf->cram_hmac_alg[0] != 0) {
2100 snprintf(hmac_name, CRYPTO_MAX_ALG_NAME, "hmac(%s)",
2101 new_net_conf->cram_hmac_alg);
2102
2103 rv = alloc_hash(&crypto->cram_hmac_tfm, hmac_name,
2104 ERR_AUTH_ALG);
2105 }
2106
2107 return rv;
2108 }
2109
2110 static void free_crypto(struct crypto *crypto)
2111 {
2112 crypto_free_hash(crypto->cram_hmac_tfm);
2113 crypto_free_hash(crypto->integrity_tfm);
2114 crypto_free_hash(crypto->csums_tfm);
2115 crypto_free_hash(crypto->verify_tfm);
2116 }
2117
2118 int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info)
2119 {
2120 struct drbd_config_context adm_ctx;
2121 enum drbd_ret_code retcode;
2122 struct drbd_connection *connection;
2123 struct net_conf *old_net_conf, *new_net_conf = NULL;
2124 int err;
2125 int ovr; /* online verify running */
2126 int rsr; /* re-sync running */
2127 struct crypto crypto = { };
2128
2129 retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_CONNECTION);
2130 if (!adm_ctx.reply_skb)
2131 return retcode;
2132 if (retcode != NO_ERROR)
2133 goto finish;
2134
2135 connection = adm_ctx.connection;
2136 mutex_lock(&adm_ctx.resource->adm_mutex);
2137
2138 new_net_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL);
2139 if (!new_net_conf) {
2140 retcode = ERR_NOMEM;
2141 goto out;
2142 }
2143
2144 conn_reconfig_start(connection);
2145
2146 mutex_lock(&connection->data.mutex);
2147 mutex_lock(&connection->resource->conf_update);
2148 old_net_conf = connection->net_conf;
2149
2150 if (!old_net_conf) {
2151 drbd_msg_put_info(adm_ctx.reply_skb, "net conf missing, try connect");
2152 retcode = ERR_INVALID_REQUEST;
2153 goto fail;
2154 }
2155
2156 *new_net_conf = *old_net_conf;
2157 if (should_set_defaults(info))
2158 set_net_conf_defaults(new_net_conf);
2159
2160 err = net_conf_from_attrs_for_change(new_net_conf, info);
2161 if (err && err != -ENOMSG) {
2162 retcode = ERR_MANDATORY_TAG;
2163 drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
2164 goto fail;
2165 }
2166
2167 retcode = check_net_options(connection, new_net_conf);
2168 if (retcode != NO_ERROR)
2169 goto fail;
2170
2171 /* re-sync running */
2172 rsr = conn_resync_running(connection);
2173 if (rsr && strcmp(new_net_conf->csums_alg, old_net_conf->csums_alg)) {
2174 retcode = ERR_CSUMS_RESYNC_RUNNING;
2175 goto fail;
2176 }
2177
2178 /* online verify running */
2179 ovr = conn_ov_running(connection);
2180 if (ovr && strcmp(new_net_conf->verify_alg, old_net_conf->verify_alg)) {
2181 retcode = ERR_VERIFY_RUNNING;
2182 goto fail;
2183 }
2184
2185 retcode = alloc_crypto(&crypto, new_net_conf);
2186 if (retcode != NO_ERROR)
2187 goto fail;
2188
2189 rcu_assign_pointer(connection->net_conf, new_net_conf);
2190
2191 if (!rsr) {
2192 crypto_free_hash(connection->csums_tfm);
2193 connection->csums_tfm = crypto.csums_tfm;
2194 crypto.csums_tfm = NULL;
2195 }
2196 if (!ovr) {
2197 crypto_free_hash(connection->verify_tfm);
2198 connection->verify_tfm = crypto.verify_tfm;
2199 crypto.verify_tfm = NULL;
2200 }
2201
2202 crypto_free_hash(connection->integrity_tfm);
2203 connection->integrity_tfm = crypto.integrity_tfm;
2204 if (connection->cstate >= C_WF_REPORT_PARAMS && connection->agreed_pro_version >= 100)
2205 /* Do this without trying to take connection->data.mutex again. */
2206 __drbd_send_protocol(connection, P_PROTOCOL_UPDATE);
2207
2208 crypto_free_hash(connection->cram_hmac_tfm);
2209 connection->cram_hmac_tfm = crypto.cram_hmac_tfm;
2210
2211 mutex_unlock(&connection->resource->conf_update);
2212 mutex_unlock(&connection->data.mutex);
2213 synchronize_rcu();
2214 kfree(old_net_conf);
2215
2216 if (connection->cstate >= C_WF_REPORT_PARAMS) {
2217 struct drbd_peer_device *peer_device;
2218 int vnr;
2219
2220 idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
2221 drbd_send_sync_param(peer_device);
2222 }
2223
2224 goto done;
2225
2226 fail:
2227 mutex_unlock(&connection->resource->conf_update);
2228 mutex_unlock(&connection->data.mutex);
2229 free_crypto(&crypto);
2230 kfree(new_net_conf);
2231 done:
2232 conn_reconfig_done(connection);
2233 out:
2234 mutex_unlock(&adm_ctx.resource->adm_mutex);
2235 finish:
2236 drbd_adm_finish(&adm_ctx, info, retcode);
2237 return 0;
2238 }
2239
2240 int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
2241 {
2242 struct drbd_config_context adm_ctx;
2243 struct drbd_peer_device *peer_device;
2244 struct net_conf *old_net_conf, *new_net_conf = NULL;
2245 struct crypto crypto = { };
2246 struct drbd_resource *resource;
2247 struct drbd_connection *connection;
2248 enum drbd_ret_code retcode;
2249 int i;
2250 int err;
2251
2252 retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
2253
2254 if (!adm_ctx.reply_skb)
2255 return retcode;
2256 if (retcode != NO_ERROR)
2257 goto out;
2258 if (!(adm_ctx.my_addr && adm_ctx.peer_addr)) {
2259 drbd_msg_put_info(adm_ctx.reply_skb, "connection endpoint(s) missing");
2260 retcode = ERR_INVALID_REQUEST;
2261 goto out;
2262 }
2263
2264 /* No need for _rcu here. All reconfiguration is
2265 * strictly serialized on genl_lock(). We are protected against
2266 * concurrent reconfiguration/addition/deletion */
2267 for_each_resource(resource, &drbd_resources) {
2268 for_each_connection(connection, resource) {
2269 if (nla_len(adm_ctx.my_addr) == connection->my_addr_len &&
2270 !memcmp(nla_data(adm_ctx.my_addr), &connection->my_addr,
2271 connection->my_addr_len)) {
2272 retcode = ERR_LOCAL_ADDR;
2273 goto out;
2274 }
2275
2276 if (nla_len(adm_ctx.peer_addr) == connection->peer_addr_len &&
2277 !memcmp(nla_data(adm_ctx.peer_addr), &connection->peer_addr,
2278 connection->peer_addr_len)) {
2279 retcode = ERR_PEER_ADDR;
2280 goto out;
2281 }
2282 }
2283 }
2284
2285 mutex_lock(&adm_ctx.resource->adm_mutex);
2286 connection = first_connection(adm_ctx.resource);
2287 conn_reconfig_start(connection);
2288
2289 if (connection->cstate > C_STANDALONE) {
2290 retcode = ERR_NET_CONFIGURED;
2291 goto fail;
2292 }
2293
2294 /* allocation not in the IO path, drbdsetup / netlink process context */
2295 new_net_conf = kzalloc(sizeof(*new_net_conf), GFP_KERNEL);
2296 if (!new_net_conf) {
2297 retcode = ERR_NOMEM;
2298 goto fail;
2299 }
2300
2301 set_net_conf_defaults(new_net_conf);
2302
2303 err = net_conf_from_attrs(new_net_conf, info);
2304 if (err && err != -ENOMSG) {
2305 retcode = ERR_MANDATORY_TAG;
2306 drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
2307 goto fail;
2308 }
2309
2310 retcode = check_net_options(connection, new_net_conf);
2311 if (retcode != NO_ERROR)
2312 goto fail;
2313
2314 retcode = alloc_crypto(&crypto, new_net_conf);
2315 if (retcode != NO_ERROR)
2316 goto fail;
2317
2318 ((char *)new_net_conf->shared_secret)[SHARED_SECRET_MAX-1] = 0;
2319
2320 drbd_flush_workqueue(&connection->sender_work);
2321
2322 mutex_lock(&adm_ctx.resource->conf_update);
2323 old_net_conf = connection->net_conf;
2324 if (old_net_conf) {
2325 retcode = ERR_NET_CONFIGURED;
2326 mutex_unlock(&adm_ctx.resource->conf_update);
2327 goto fail;
2328 }
2329 rcu_assign_pointer(connection->net_conf, new_net_conf);
2330
2331 conn_free_crypto(connection);
2332 connection->cram_hmac_tfm = crypto.cram_hmac_tfm;
2333 connection->integrity_tfm = crypto.integrity_tfm;
2334 connection->csums_tfm = crypto.csums_tfm;
2335 connection->verify_tfm = crypto.verify_tfm;
2336
2337 connection->my_addr_len = nla_len(adm_ctx.my_addr);
2338 memcpy(&connection->my_addr, nla_data(adm_ctx.my_addr), connection->my_addr_len);
2339 connection->peer_addr_len = nla_len(adm_ctx.peer_addr);
2340 memcpy(&connection->peer_addr, nla_data(adm_ctx.peer_addr), connection->peer_addr_len);
2341
2342 mutex_unlock(&adm_ctx.resource->conf_update);
2343
2344 rcu_read_lock();
2345 idr_for_each_entry(&connection->peer_devices, peer_device, i) {
2346 struct drbd_device *device = peer_device->device;
2347 device->send_cnt = 0;
2348 device->recv_cnt = 0;
2349 }
2350 rcu_read_unlock();
2351
2352 retcode = conn_request_state(connection, NS(conn, C_UNCONNECTED), CS_VERBOSE);
2353
2354 conn_reconfig_done(connection);
2355 mutex_unlock(&adm_ctx.resource->adm_mutex);
2356 drbd_adm_finish(&adm_ctx, info, retcode);
2357 return 0;
2358
2359 fail:
2360 free_crypto(&crypto);
2361 kfree(new_net_conf);
2362
2363 conn_reconfig_done(connection);
2364 mutex_unlock(&adm_ctx.resource->adm_mutex);
2365 out:
2366 drbd_adm_finish(&adm_ctx, info, retcode);
2367 return 0;
2368 }
2369
2370 static enum drbd_state_rv conn_try_disconnect(struct drbd_connection *connection, bool force)
2371 {
2372 enum drbd_state_rv rv;
2373
2374 rv = conn_request_state(connection, NS(conn, C_DISCONNECTING),
2375 force ? CS_HARD : 0);
2376
2377 switch (rv) {
2378 case SS_NOTHING_TO_DO:
2379 break;
2380 case SS_ALREADY_STANDALONE:
2381 return SS_SUCCESS;
2382 case SS_PRIMARY_NOP:
2383 /* Our state checking code wants to see the peer outdated. */
2384 rv = conn_request_state(connection, NS2(conn, C_DISCONNECTING, pdsk, D_OUTDATED), 0);
2385
2386 if (rv == SS_OUTDATE_WO_CONN) /* lost connection before graceful disconnect succeeded */
2387 rv = conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_VERBOSE);
2388
2389 break;
2390 case SS_CW_FAILED_BY_PEER:
2391 /* The peer probably wants to see us outdated. */
2392 rv = conn_request_state(connection, NS2(conn, C_DISCONNECTING,
2393 disk, D_OUTDATED), 0);
2394 if (rv == SS_IS_DISKLESS || rv == SS_LOWER_THAN_OUTDATED) {
2395 rv = conn_request_state(connection, NS(conn, C_DISCONNECTING),
2396 CS_HARD);
2397 }
2398 break;
2399 default:;
2400 /* no special handling necessary */
2401 }
2402
2403 if (rv >= SS_SUCCESS) {
2404 enum drbd_state_rv rv2;
2405 /* No one else can reconfigure the network while I am here.
2406 * The state handling only uses drbd_thread_stop_nowait(),
2407 * we want to really wait here until the receiver is no more.
2408 */
2409 drbd_thread_stop(&connection->receiver);
2410
2411 /* Race breaker. This additional state change request may be
2412 * necessary, if this was a forced disconnect during a receiver
2413 * restart. We may have "killed" the receiver thread just
2414 * after drbd_receiver() returned. Typically, we should be
2415 * C_STANDALONE already, now, and this becomes a no-op.
2416 */
2417 rv2 = conn_request_state(connection, NS(conn, C_STANDALONE),
2418 CS_VERBOSE | CS_HARD);
2419 if (rv2 < SS_SUCCESS)
2420 drbd_err(connection,
2421 "unexpected rv2=%d in conn_try_disconnect()\n",
2422 rv2);
2423 }
2424 return rv;
2425 }
2426
2427 int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info)
2428 {
2429 struct drbd_config_context adm_ctx;
2430 struct disconnect_parms parms;
2431 struct drbd_connection *connection;
2432 enum drbd_state_rv rv;
2433 enum drbd_ret_code retcode;
2434 int err;
2435
2436 retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_CONNECTION);
2437 if (!adm_ctx.reply_skb)
2438 return retcode;
2439 if (retcode != NO_ERROR)
2440 goto fail;
2441
2442 connection = adm_ctx.connection;
2443 memset(&parms, 0, sizeof(parms));
2444 if (info->attrs[DRBD_NLA_DISCONNECT_PARMS]) {
2445 err = disconnect_parms_from_attrs(&parms, info);
2446 if (err) {
2447 retcode = ERR_MANDATORY_TAG;
2448 drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
2449 goto fail;
2450 }
2451 }
2452
2453 mutex_lock(&adm_ctx.resource->adm_mutex);
2454 rv = conn_try_disconnect(connection, parms.force_disconnect);
2455 if (rv < SS_SUCCESS)
2456 retcode = rv; /* FIXME: Type mismatch. */
2457 else
2458 retcode = NO_ERROR;
2459 mutex_unlock(&adm_ctx.resource->adm_mutex);
2460 fail:
2461 drbd_adm_finish(&adm_ctx, info, retcode);
2462 return 0;
2463 }
2464
2465 void resync_after_online_grow(struct drbd_device *device)
2466 {
2467 int iass; /* I am sync source */
2468
2469 drbd_info(device, "Resync of new storage after online grow\n");
2470 if (device->state.role != device->state.peer)
2471 iass = (device->state.role == R_PRIMARY);
2472 else
2473 iass = test_bit(RESOLVE_CONFLICTS, &first_peer_device(device)->connection->flags);
2474
2475 if (iass)
2476 drbd_start_resync(device, C_SYNC_SOURCE);
2477 else
2478 _drbd_request_state(device, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE + CS_SERIALIZE);
2479 }
2480
2481 int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
2482 {
2483 struct drbd_config_context adm_ctx;
2484 struct disk_conf *old_disk_conf, *new_disk_conf = NULL;
2485 struct resize_parms rs;
2486 struct drbd_device *device;
2487 enum drbd_ret_code retcode;
2488 enum determine_dev_size dd;
2489 bool change_al_layout = false;
2490 enum dds_flags ddsf;
2491 sector_t u_size;
2492 int err;
2493
2494 retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
2495 if (!adm_ctx.reply_skb)
2496 return retcode;
2497 if (retcode != NO_ERROR)
2498 goto finish;
2499
2500 mutex_lock(&adm_ctx.resource->adm_mutex);
2501 device = adm_ctx.device;
2502 if (!get_ldev(device)) {
2503 retcode = ERR_NO_DISK;
2504 goto fail;
2505 }
2506
2507 memset(&rs, 0, sizeof(struct resize_parms));
2508 rs.al_stripes = device->ldev->md.al_stripes;
2509 rs.al_stripe_size = device->ldev->md.al_stripe_size_4k * 4;
2510 if (info->attrs[DRBD_NLA_RESIZE_PARMS]) {
2511 err = resize_parms_from_attrs(&rs, info);
2512 if (err) {
2513 retcode = ERR_MANDATORY_TAG;
2514 drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
2515 goto fail_ldev;
2516 }
2517 }
2518
2519 if (device->state.conn > C_CONNECTED) {
2520 retcode = ERR_RESIZE_RESYNC;
2521 goto fail_ldev;
2522 }
2523
2524 if (device->state.role == R_SECONDARY &&
2525 device->state.peer == R_SECONDARY) {
2526 retcode = ERR_NO_PRIMARY;
2527 goto fail_ldev;
2528 }
2529
2530 if (rs.no_resync && first_peer_device(device)->connection->agreed_pro_version < 93) {
2531 retcode = ERR_NEED_APV_93;
2532 goto fail_ldev;
2533 }
2534
2535 rcu_read_lock();
2536 u_size = rcu_dereference(device->ldev->disk_conf)->disk_size;
2537 rcu_read_unlock();
2538 if (u_size != (sector_t)rs.resize_size) {
2539 new_disk_conf = kmalloc(sizeof(struct disk_conf), GFP_KERNEL);
2540 if (!new_disk_conf) {
2541 retcode = ERR_NOMEM;
2542 goto fail_ldev;
2543 }
2544 }
2545
2546 if (device->ldev->md.al_stripes != rs.al_stripes ||
2547 device->ldev->md.al_stripe_size_4k != rs.al_stripe_size / 4) {
2548 u32 al_size_k = rs.al_stripes * rs.al_stripe_size;
2549
2550 if (al_size_k > (16 * 1024 * 1024)) {
2551 retcode = ERR_MD_LAYOUT_TOO_BIG;
2552 goto fail_ldev;
2553 }
2554
2555 if (al_size_k < MD_32kB_SECT/2) {
2556 retcode = ERR_MD_LAYOUT_TOO_SMALL;
2557 goto fail_ldev;
2558 }
2559
2560 if (device->state.conn != C_CONNECTED && !rs.resize_force) {
2561 retcode = ERR_MD_LAYOUT_CONNECTED;
2562 goto fail_ldev;
2563 }
2564
2565 change_al_layout = true;
2566 }
2567
2568 if (device->ldev->known_size != drbd_get_capacity(device->ldev->backing_bdev))
2569 device->ldev->known_size = drbd_get_capacity(device->ldev->backing_bdev);
2570
2571 if (new_disk_conf) {
2572 mutex_lock(&device->resource->conf_update);
2573 old_disk_conf = device->ldev->disk_conf;
2574 *new_disk_conf = *old_disk_conf;
2575 new_disk_conf->disk_size = (sector_t)rs.resize_size;
2576 rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
2577 mutex_unlock(&device->resource->conf_update);
2578 synchronize_rcu();
2579 kfree(old_disk_conf);
2580 }
2581
2582 ddsf = (rs.resize_force ? DDSF_FORCED : 0) | (rs.no_resync ? DDSF_NO_RESYNC : 0);
2583 dd = drbd_determine_dev_size(device, ddsf, change_al_layout ? &rs : NULL);
2584 drbd_md_sync(device);
2585 put_ldev(device);
2586 if (dd == DS_ERROR) {
2587 retcode = ERR_NOMEM_BITMAP;
2588 goto fail;
2589 } else if (dd == DS_ERROR_SPACE_MD) {
2590 retcode = ERR_MD_LAYOUT_NO_FIT;
2591 goto fail;
2592 } else if (dd == DS_ERROR_SHRINK) {
2593 retcode = ERR_IMPLICIT_SHRINK;
2594 goto fail;
2595 }
2596
2597 if (device->state.conn == C_CONNECTED) {
2598 if (dd == DS_GREW)
2599 set_bit(RESIZE_PENDING, &device->flags);
2600
2601 drbd_send_uuids(first_peer_device(device));
2602 drbd_send_sizes(first_peer_device(device), 1, ddsf);
2603 }
2604
2605 fail:
2606 mutex_unlock(&adm_ctx.resource->adm_mutex);
2607 finish:
2608 drbd_adm_finish(&adm_ctx, info, retcode);
2609 return 0;
2610
2611 fail_ldev:
2612 put_ldev(device);
2613 goto fail;
2614 }
2615
2616 int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info)
2617 {
2618 struct drbd_config_context adm_ctx;
2619 enum drbd_ret_code retcode;
2620 struct res_opts res_opts;
2621 int err;
2622
2623 retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
2624 if (!adm_ctx.reply_skb)
2625 return retcode;
2626 if (retcode != NO_ERROR)
2627 goto fail;
2628
2629 res_opts = adm_ctx.resource->res_opts;
2630 if (should_set_defaults(info))
2631 set_res_opts_defaults(&res_opts);
2632
2633 err = res_opts_from_attrs(&res_opts, info);
2634 if (err && err != -ENOMSG) {
2635 retcode = ERR_MANDATORY_TAG;
2636 drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
2637 goto fail;
2638 }
2639
2640 mutex_lock(&adm_ctx.resource->adm_mutex);
2641 err = set_resource_options(adm_ctx.resource, &res_opts);
2642 if (err) {
2643 retcode = ERR_INVALID_REQUEST;
2644 if (err == -ENOMEM)
2645 retcode = ERR_NOMEM;
2646 }
2647 mutex_unlock(&adm_ctx.resource->adm_mutex);
2648
2649 fail:
2650 drbd_adm_finish(&adm_ctx, info, retcode);
2651 return 0;
2652 }
2653
2654 int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info)
2655 {
2656 struct drbd_config_context adm_ctx;
2657 struct drbd_device *device;
2658 int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
2659
2660 retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
2661 if (!adm_ctx.reply_skb)
2662 return retcode;
2663 if (retcode != NO_ERROR)
2664 goto out;
2665
2666 device = adm_ctx.device;
2667 if (!get_ldev(device)) {
2668 retcode = ERR_NO_DISK;
2669 goto out;
2670 }
2671
2672 mutex_lock(&adm_ctx.resource->adm_mutex);
2673
2674 /* If there is still bitmap IO pending, probably because of a previous
2675 * resync just being finished, wait for it before requesting a new resync.
2676 * Also wait for it's after_state_ch(). */
2677 drbd_suspend_io(device);
2678 wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags));
2679 drbd_flush_workqueue(&first_peer_device(device)->connection->sender_work);
2680
2681 /* If we happen to be C_STANDALONE R_SECONDARY, just change to
2682 * D_INCONSISTENT, and set all bits in the bitmap. Otherwise,
2683 * try to start a resync handshake as sync target for full sync.
2684 */
2685 if (device->state.conn == C_STANDALONE && device->state.role == R_SECONDARY) {
2686 retcode = drbd_request_state(device, NS(disk, D_INCONSISTENT));
2687 if (retcode >= SS_SUCCESS) {
2688 if (drbd_bitmap_io(device, &drbd_bmio_set_n_write,
2689 "set_n_write from invalidate", BM_LOCKED_MASK))
2690 retcode = ERR_IO_MD_DISK;
2691 }
2692 } else
2693 retcode = drbd_request_state(device, NS(conn, C_STARTING_SYNC_T));
2694 drbd_resume_io(device);
2695 mutex_unlock(&adm_ctx.resource->adm_mutex);
2696 put_ldev(device);
2697 out:
2698 drbd_adm_finish(&adm_ctx, info, retcode);
2699 return 0;
2700 }
2701
2702 static int drbd_adm_simple_request_state(struct sk_buff *skb, struct genl_info *info,
2703 union drbd_state mask, union drbd_state val)
2704 {
2705 struct drbd_config_context adm_ctx;
2706 enum drbd_ret_code retcode;
2707
2708 retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
2709 if (!adm_ctx.reply_skb)
2710 return retcode;
2711 if (retcode != NO_ERROR)
2712 goto out;
2713
2714 mutex_lock(&adm_ctx.resource->adm_mutex);
2715 retcode = drbd_request_state(adm_ctx.device, mask, val);
2716 mutex_unlock(&adm_ctx.resource->adm_mutex);
2717 out:
2718 drbd_adm_finish(&adm_ctx, info, retcode);
2719 return 0;
2720 }
2721
2722 static int drbd_bmio_set_susp_al(struct drbd_device *device) __must_hold(local)
2723 {
2724 int rv;
2725
2726 rv = drbd_bmio_set_n_write(device);
2727 drbd_suspend_al(device);
2728 return rv;
2729 }
2730
2731 int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info)
2732 {
2733 struct drbd_config_context adm_ctx;
2734 int retcode; /* drbd_ret_code, drbd_state_rv */
2735 struct drbd_device *device;
2736
2737 retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
2738 if (!adm_ctx.reply_skb)
2739 return retcode;
2740 if (retcode != NO_ERROR)
2741 goto out;
2742
2743 device = adm_ctx.device;
2744 if (!get_ldev(device)) {
2745 retcode = ERR_NO_DISK;
2746 goto out;
2747 }
2748
2749 mutex_lock(&adm_ctx.resource->adm_mutex);
2750
2751 /* If there is still bitmap IO pending, probably because of a previous
2752 * resync just being finished, wait for it before requesting a new resync.
2753 * Also wait for it's after_state_ch(). */
2754 drbd_suspend_io(device);
2755 wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags));
2756 drbd_flush_workqueue(&first_peer_device(device)->connection->sender_work);
2757
2758 /* If we happen to be C_STANDALONE R_PRIMARY, just set all bits
2759 * in the bitmap. Otherwise, try to start a resync handshake
2760 * as sync source for full sync.
2761 */
2762 if (device->state.conn == C_STANDALONE && device->state.role == R_PRIMARY) {
2763 /* The peer will get a resync upon connect anyways. Just make that
2764 into a full resync. */
2765 retcode = drbd_request_state(device, NS(pdsk, D_INCONSISTENT));
2766 if (retcode >= SS_SUCCESS) {
2767 if (drbd_bitmap_io(device, &drbd_bmio_set_susp_al,
2768 "set_n_write from invalidate_peer",
2769 BM_LOCKED_SET_ALLOWED))
2770 retcode = ERR_IO_MD_DISK;
2771 }
2772 } else
2773 retcode = drbd_request_state(device, NS(conn, C_STARTING_SYNC_S));
2774 drbd_resume_io(device);
2775 mutex_unlock(&adm_ctx.resource->adm_mutex);
2776 put_ldev(device);
2777 out:
2778 drbd_adm_finish(&adm_ctx, info, retcode);
2779 return 0;
2780 }
2781
2782 int drbd_adm_pause_sync(struct sk_buff *skb, struct genl_info *info)
2783 {
2784 struct drbd_config_context adm_ctx;
2785 enum drbd_ret_code retcode;
2786
2787 retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
2788 if (!adm_ctx.reply_skb)
2789 return retcode;
2790 if (retcode != NO_ERROR)
2791 goto out;
2792
2793 mutex_lock(&adm_ctx.resource->adm_mutex);
2794 if (drbd_request_state(adm_ctx.device, NS(user_isp, 1)) == SS_NOTHING_TO_DO)
2795 retcode = ERR_PAUSE_IS_SET;
2796 mutex_unlock(&adm_ctx.resource->adm_mutex);
2797 out:
2798 drbd_adm_finish(&adm_ctx, info, retcode);
2799 return 0;
2800 }
2801
2802 int drbd_adm_resume_sync(struct sk_buff *skb, struct genl_info *info)
2803 {
2804 struct drbd_config_context adm_ctx;
2805 union drbd_dev_state s;
2806 enum drbd_ret_code retcode;
2807
2808 retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
2809 if (!adm_ctx.reply_skb)
2810 return retcode;
2811 if (retcode != NO_ERROR)
2812 goto out;
2813
2814 mutex_lock(&adm_ctx.resource->adm_mutex);
2815 if (drbd_request_state(adm_ctx.device, NS(user_isp, 0)) == SS_NOTHING_TO_DO) {
2816 s = adm_ctx.device->state;
2817 if (s.conn == C_PAUSED_SYNC_S || s.conn == C_PAUSED_SYNC_T) {
2818 retcode = s.aftr_isp ? ERR_PIC_AFTER_DEP :
2819 s.peer_isp ? ERR_PIC_PEER_DEP : ERR_PAUSE_IS_CLEAR;
2820 } else {
2821 retcode = ERR_PAUSE_IS_CLEAR;
2822 }
2823 }
2824 mutex_unlock(&adm_ctx.resource->adm_mutex);
2825 out:
2826 drbd_adm_finish(&adm_ctx, info, retcode);
2827 return 0;
2828 }
2829
2830 int drbd_adm_suspend_io(struct sk_buff *skb, struct genl_info *info)
2831 {
2832 return drbd_adm_simple_request_state(skb, info, NS(susp, 1));
2833 }
2834
2835 int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info)
2836 {
2837 struct drbd_config_context adm_ctx;
2838 struct drbd_device *device;
2839 int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
2840
2841 retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
2842 if (!adm_ctx.reply_skb)
2843 return retcode;
2844 if (retcode != NO_ERROR)
2845 goto out;
2846
2847 mutex_lock(&adm_ctx.resource->adm_mutex);
2848 device = adm_ctx.device;
2849 if (test_bit(NEW_CUR_UUID, &device->flags)) {
2850 drbd_uuid_new_current(device);
2851 clear_bit(NEW_CUR_UUID, &device->flags);
2852 }
2853 drbd_suspend_io(device);
2854 retcode = drbd_request_state(device, NS3(susp, 0, susp_nod, 0, susp_fen, 0));
2855 if (retcode == SS_SUCCESS) {
2856 if (device->state.conn < C_CONNECTED)
2857 tl_clear(first_peer_device(device)->connection);
2858 if (device->state.disk == D_DISKLESS || device->state.disk == D_FAILED)
2859 tl_restart(first_peer_device(device)->connection, FAIL_FROZEN_DISK_IO);
2860 }
2861 drbd_resume_io(device);
2862 mutex_unlock(&adm_ctx.resource->adm_mutex);
2863 out:
2864 drbd_adm_finish(&adm_ctx, info, retcode);
2865 return 0;
2866 }
2867
2868 int drbd_adm_outdate(struct sk_buff *skb, struct genl_info *info)
2869 {
2870 return drbd_adm_simple_request_state(skb, info, NS(disk, D_OUTDATED));
2871 }
2872
2873 static int nla_put_drbd_cfg_context(struct sk_buff *skb,
2874 struct drbd_resource *resource,
2875 struct drbd_connection *connection,
2876 struct drbd_device *device)
2877 {
2878 struct nlattr *nla;
2879 nla = nla_nest_start(skb, DRBD_NLA_CFG_CONTEXT);
2880 if (!nla)
2881 goto nla_put_failure;
2882 if (device &&
2883 nla_put_u32(skb, T_ctx_volume, device->vnr))
2884 goto nla_put_failure;
2885 if (nla_put_string(skb, T_ctx_resource_name, resource->name))
2886 goto nla_put_failure;
2887 if (connection) {
2888 if (connection->my_addr_len &&
2889 nla_put(skb, T_ctx_my_addr, connection->my_addr_len, &connection->my_addr))
2890 goto nla_put_failure;
2891 if (connection->peer_addr_len &&
2892 nla_put(skb, T_ctx_peer_addr, connection->peer_addr_len, &connection->peer_addr))
2893 goto nla_put_failure;
2894 }
2895 nla_nest_end(skb, nla);
2896 return 0;
2897
2898 nla_put_failure:
2899 if (nla)
2900 nla_nest_cancel(skb, nla);
2901 return -EMSGSIZE;
2902 }
2903
2904 /*
2905 * Return the connection of @resource if @resource has exactly one connection.
2906 */
2907 static struct drbd_connection *the_only_connection(struct drbd_resource *resource)
2908 {
2909 struct list_head *connections = &resource->connections;
2910
2911 if (list_empty(connections) || connections->next->next != connections)
2912 return NULL;
2913 return list_first_entry(&resource->connections, struct drbd_connection, connections);
2914 }
2915
2916 int nla_put_status_info(struct sk_buff *skb, struct drbd_device *device,
2917 const struct sib_info *sib)
2918 {
2919 struct drbd_resource *resource = device->resource;
2920 struct state_info *si = NULL; /* for sizeof(si->member); */
2921 struct nlattr *nla;
2922 int got_ldev;
2923 int err = 0;
2924 int exclude_sensitive;
2925
2926 /* If sib != NULL, this is drbd_bcast_event, which anyone can listen
2927 * to. So we better exclude_sensitive information.
2928 *
2929 * If sib == NULL, this is drbd_adm_get_status, executed synchronously
2930 * in the context of the requesting user process. Exclude sensitive
2931 * information, unless current has superuser.
2932 *
2933 * NOTE: for drbd_adm_get_status_all(), this is a netlink dump, and
2934 * relies on the current implementation of netlink_dump(), which
2935 * executes the dump callback successively from netlink_recvmsg(),
2936 * always in the context of the receiving process */
2937 exclude_sensitive = sib || !capable(CAP_SYS_ADMIN);
2938
2939 got_ldev = get_ldev(device);
2940
2941 /* We need to add connection name and volume number information still.
2942 * Minor number is in drbd_genlmsghdr. */
2943 if (nla_put_drbd_cfg_context(skb, resource, the_only_connection(resource), device))
2944 goto nla_put_failure;
2945
2946 if (res_opts_to_skb(skb, &device->resource->res_opts, exclude_sensitive))
2947 goto nla_put_failure;
2948
2949 rcu_read_lock();
2950 if (got_ldev) {
2951 struct disk_conf *disk_conf;
2952
2953 disk_conf = rcu_dereference(device->ldev->disk_conf);
2954 err = disk_conf_to_skb(skb, disk_conf, exclude_sensitive);
2955 }
2956 if (!err) {
2957 struct net_conf *nc;
2958
2959 nc = rcu_dereference(first_peer_device(device)->connection->net_conf);
2960 if (nc)
2961 err = net_conf_to_skb(skb, nc, exclude_sensitive);
2962 }
2963 rcu_read_unlock();
2964 if (err)
2965 goto nla_put_failure;
2966
2967 nla = nla_nest_start(skb, DRBD_NLA_STATE_INFO);
2968 if (!nla)
2969 goto nla_put_failure;
2970 if (nla_put_u32(skb, T_sib_reason, sib ? sib->sib_reason : SIB_GET_STATUS_REPLY) ||
2971 nla_put_u32(skb, T_current_state, device->state.i) ||
2972 nla_put_u64(skb, T_ed_uuid, device->ed_uuid) ||
2973 nla_put_u64(skb, T_capacity, drbd_get_capacity(device->this_bdev)) ||
2974 nla_put_u64(skb, T_send_cnt, device->send_cnt) ||
2975 nla_put_u64(skb, T_recv_cnt, device->recv_cnt) ||
2976 nla_put_u64(skb, T_read_cnt, device->read_cnt) ||
2977 nla_put_u64(skb, T_writ_cnt, device->writ_cnt) ||
2978 nla_put_u64(skb, T_al_writ_cnt, device->al_writ_cnt) ||
2979 nla_put_u64(skb, T_bm_writ_cnt, device->bm_writ_cnt) ||
2980 nla_put_u32(skb, T_ap_bio_cnt, atomic_read(&device->ap_bio_cnt)) ||
2981 nla_put_u32(skb, T_ap_pending_cnt, atomic_read(&device->ap_pending_cnt)) ||
2982 nla_put_u32(skb, T_rs_pending_cnt, atomic_read(&device->rs_pending_cnt)))
2983 goto nla_put_failure;
2984
2985 if (got_ldev) {
2986 int err;
2987
2988 spin_lock_irq(&device->ldev->md.uuid_lock);
2989 err = nla_put(skb, T_uuids, sizeof(si->uuids), device->ldev->md.uuid);
2990 spin_unlock_irq(&device->ldev->md.uuid_lock);
2991
2992 if (err)
2993 goto nla_put_failure;
2994
2995 if (nla_put_u32(skb, T_disk_flags, device->ldev->md.flags) ||
2996 nla_put_u64(skb, T_bits_total, drbd_bm_bits(device)) ||
2997 nla_put_u64(skb, T_bits_oos, drbd_bm_total_weight(device)))
2998 goto nla_put_failure;
2999 if (C_SYNC_SOURCE <= device->state.conn &&
3000 C_PAUSED_SYNC_T >= device->state.conn) {
3001 if (nla_put_u64(skb, T_bits_rs_total, device->rs_total) ||
3002 nla_put_u64(skb, T_bits_rs_failed, device->rs_failed))
3003 goto nla_put_failure;
3004 }
3005 }
3006
3007 if (sib) {
3008 switch(sib->sib_reason) {
3009 case SIB_SYNC_PROGRESS:
3010 case SIB_GET_STATUS_REPLY:
3011 break;
3012 case SIB_STATE_CHANGE:
3013 if (nla_put_u32(skb, T_prev_state, sib->os.i) ||
3014 nla_put_u32(skb, T_new_state, sib->ns.i))
3015 goto nla_put_failure;
3016 break;
3017 case SIB_HELPER_POST:
3018 if (nla_put_u32(skb, T_helper_exit_code,
3019 sib->helper_exit_code))
3020 goto nla_put_failure;
3021 /* fall through */
3022 case SIB_HELPER_PRE:
3023 if (nla_put_string(skb, T_helper, sib->helper_name))
3024 goto nla_put_failure;
3025 break;
3026 }
3027 }
3028 nla_nest_end(skb, nla);
3029
3030 if (0)
3031 nla_put_failure:
3032 err = -EMSGSIZE;
3033 if (got_ldev)
3034 put_ldev(device);
3035 return err;
3036 }
3037
3038 int drbd_adm_get_status(struct sk_buff *skb, struct genl_info *info)
3039 {
3040 struct drbd_config_context adm_ctx;
3041 enum drbd_ret_code retcode;
3042 int err;
3043
3044 retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
3045 if (!adm_ctx.reply_skb)
3046 return retcode;
3047 if (retcode != NO_ERROR)
3048 goto out;
3049
3050 err = nla_put_status_info(adm_ctx.reply_skb, adm_ctx.device, NULL);
3051 if (err) {
3052 nlmsg_free(adm_ctx.reply_skb);
3053 return err;
3054 }
3055 out:
3056 drbd_adm_finish(&adm_ctx, info, retcode);
3057 return 0;
3058 }
3059
3060 static int get_one_status(struct sk_buff *skb, struct netlink_callback *cb)
3061 {
3062 struct drbd_device *device;
3063 struct drbd_genlmsghdr *dh;
3064 struct drbd_resource *pos = (struct drbd_resource *)cb->args[0];
3065 struct drbd_resource *resource = NULL;
3066 struct drbd_resource *tmp;
3067 unsigned volume = cb->args[1];
3068
3069 /* Open coded, deferred, iteration:
3070 * for_each_resource_safe(resource, tmp, &drbd_resources) {
3071 * connection = "first connection of resource or undefined";
3072 * idr_for_each_entry(&resource->devices, device, i) {
3073 * ...
3074 * }
3075 * }
3076 * where resource is cb->args[0];
3077 * and i is cb->args[1];
3078 *
3079 * cb->args[2] indicates if we shall loop over all resources,
3080 * or just dump all volumes of a single resource.
3081 *
3082 * This may miss entries inserted after this dump started,
3083 * or entries deleted before they are reached.
3084 *
3085 * We need to make sure the device won't disappear while
3086 * we are looking at it, and revalidate our iterators
3087 * on each iteration.
3088 */
3089
3090 /* synchronize with conn_create()/drbd_destroy_connection() */
3091 rcu_read_lock();
3092 /* revalidate iterator position */
3093 for_each_resource_rcu(tmp, &drbd_resources) {
3094 if (pos == NULL) {
3095 /* first iteration */
3096 pos = tmp;
3097 resource = pos;
3098 break;
3099 }
3100 if (tmp == pos) {
3101 resource = pos;
3102 break;
3103 }
3104 }
3105 if (resource) {
3106 next_resource:
3107 device = idr_get_next(&resource->devices, &volume);
3108 if (!device) {
3109 /* No more volumes to dump on this resource.
3110 * Advance resource iterator. */
3111 pos = list_entry_rcu(resource->resources.next,
3112 struct drbd_resource, resources);
3113 /* Did we dump any volume of this resource yet? */
3114 if (volume != 0) {
3115 /* If we reached the end of the list,
3116 * or only a single resource dump was requested,
3117 * we are done. */
3118 if (&pos->resources == &drbd_resources || cb->args[2])
3119 goto out;
3120 volume = 0;
3121 resource = pos;
3122 goto next_resource;
3123 }
3124 }
3125
3126 dh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
3127 cb->nlh->nlmsg_seq, &drbd_genl_family,
3128 NLM_F_MULTI, DRBD_ADM_GET_STATUS);
3129 if (!dh)
3130 goto out;
3131
3132 if (!device) {
3133 /* This is a connection without a single volume.
3134 * Suprisingly enough, it may have a network
3135 * configuration. */
3136 struct drbd_connection *connection;
3137
3138 dh->minor = -1U;
3139 dh->ret_code = NO_ERROR;
3140 connection = the_only_connection(resource);
3141 if (nla_put_drbd_cfg_context(skb, resource, connection, NULL))
3142 goto cancel;
3143 if (connection) {
3144 struct net_conf *nc;
3145
3146 nc = rcu_dereference(connection->net_conf);
3147 if (nc && net_conf_to_skb(skb, nc, 1) != 0)
3148 goto cancel;
3149 }
3150 goto done;
3151 }
3152
3153 D_ASSERT(device, device->vnr == volume);
3154 D_ASSERT(device, device->resource == resource);
3155
3156 dh->minor = device_to_minor(device);
3157 dh->ret_code = NO_ERROR;
3158
3159 if (nla_put_status_info(skb, device, NULL)) {
3160 cancel:
3161 genlmsg_cancel(skb, dh);
3162 goto out;
3163 }
3164 done:
3165 genlmsg_end(skb, dh);
3166 }
3167
3168 out:
3169 rcu_read_unlock();
3170 /* where to start the next iteration */
3171 cb->args[0] = (long)pos;
3172 cb->args[1] = (pos == resource) ? volume + 1 : 0;
3173
3174 /* No more resources/volumes/minors found results in an empty skb.
3175 * Which will terminate the dump. */
3176 return skb->len;
3177 }
3178
3179 /*
3180 * Request status of all resources, or of all volumes within a single resource.
3181 *
3182 * This is a dump, as the answer may not fit in a single reply skb otherwise.
3183 * Which means we cannot use the family->attrbuf or other such members, because
3184 * dump is NOT protected by the genl_lock(). During dump, we only have access
3185 * to the incoming skb, and need to opencode "parsing" of the nlattr payload.
3186 *
3187 * Once things are setup properly, we call into get_one_status().
3188 */
3189 int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb)
3190 {
3191 const unsigned hdrlen = GENL_HDRLEN + GENL_MAGIC_FAMILY_HDRSZ;
3192 struct nlattr *nla;
3193 const char *resource_name;
3194 struct drbd_resource *resource;
3195 int maxtype;
3196
3197 /* Is this a followup call? */
3198 if (cb->args[0]) {
3199 /* ... of a single resource dump,
3200 * and the resource iterator has been advanced already? */
3201 if (cb->args[2] && cb->args[2] != cb->args[0])
3202 return 0; /* DONE. */
3203 goto dump;
3204 }
3205
3206 /* First call (from netlink_dump_start). We need to figure out
3207 * which resource(s) the user wants us to dump. */
3208 nla = nla_find(nlmsg_attrdata(cb->nlh, hdrlen),
3209 nlmsg_attrlen(cb->nlh, hdrlen),
3210 DRBD_NLA_CFG_CONTEXT);
3211
3212 /* No explicit context given. Dump all. */
3213 if (!nla)
3214 goto dump;
3215 maxtype = ARRAY_SIZE(drbd_cfg_context_nl_policy) - 1;
3216 nla = drbd_nla_find_nested(maxtype, nla, __nla_type(T_ctx_resource_name));
3217 if (IS_ERR(nla))
3218 return PTR_ERR(nla);
3219 /* context given, but no name present? */
3220 if (!nla)
3221 return -EINVAL;
3222 resource_name = nla_data(nla);
3223 if (!*resource_name)
3224 return -ENODEV;
3225 resource = drbd_find_resource(resource_name);
3226 if (!resource)
3227 return -ENODEV;
3228
3229 kref_put(&resource->kref, drbd_destroy_resource); /* get_one_status() revalidates the resource */
3230
3231 /* prime iterators, and set "filter" mode mark:
3232 * only dump this connection. */
3233 cb->args[0] = (long)resource;
3234 /* cb->args[1] = 0; passed in this way. */
3235 cb->args[2] = (long)resource;
3236
3237 dump:
3238 return get_one_status(skb, cb);
3239 }
3240
3241 int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info)
3242 {
3243 struct drbd_config_context adm_ctx;
3244 enum drbd_ret_code retcode;
3245 struct timeout_parms tp;
3246 int err;
3247
3248 retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
3249 if (!adm_ctx.reply_skb)
3250 return retcode;
3251 if (retcode != NO_ERROR)
3252 goto out;
3253
3254 tp.timeout_type =
3255 adm_ctx.device->state.pdsk == D_OUTDATED ? UT_PEER_OUTDATED :
3256 test_bit(USE_DEGR_WFC_T, &adm_ctx.device->flags) ? UT_DEGRADED :
3257 UT_DEFAULT;
3258
3259 err = timeout_parms_to_priv_skb(adm_ctx.reply_skb, &tp);
3260 if (err) {
3261 nlmsg_free(adm_ctx.reply_skb);
3262 return err;
3263 }
3264 out:
3265 drbd_adm_finish(&adm_ctx, info, retcode);
3266 return 0;
3267 }
3268
3269 int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info)
3270 {
3271 struct drbd_config_context adm_ctx;
3272 struct drbd_device *device;
3273 enum drbd_ret_code retcode;
3274 struct start_ov_parms parms;
3275
3276 retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
3277 if (!adm_ctx.reply_skb)
3278 return retcode;
3279 if (retcode != NO_ERROR)
3280 goto out;
3281
3282 device = adm_ctx.device;
3283
3284 /* resume from last known position, if possible */
3285 parms.ov_start_sector = device->ov_start_sector;
3286 parms.ov_stop_sector = ULLONG_MAX;
3287 if (info->attrs[DRBD_NLA_START_OV_PARMS]) {
3288 int err = start_ov_parms_from_attrs(&parms, info);
3289 if (err) {
3290 retcode = ERR_MANDATORY_TAG;
3291 drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
3292 goto out;
3293 }
3294 }
3295 mutex_lock(&adm_ctx.resource->adm_mutex);
3296
3297 /* w_make_ov_request expects position to be aligned */
3298 device->ov_start_sector = parms.ov_start_sector & ~(BM_SECT_PER_BIT-1);
3299 device->ov_stop_sector = parms.ov_stop_sector;
3300
3301 /* If there is still bitmap IO pending, e.g. previous resync or verify
3302 * just being finished, wait for it before requesting a new resync. */
3303 drbd_suspend_io(device);
3304 wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags));
3305 retcode = drbd_request_state(device, NS(conn, C_VERIFY_S));
3306 drbd_resume_io(device);
3307
3308 mutex_unlock(&adm_ctx.resource->adm_mutex);
3309 out:
3310 drbd_adm_finish(&adm_ctx, info, retcode);
3311 return 0;
3312 }
3313
3314
3315 int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info)
3316 {
3317 struct drbd_config_context adm_ctx;
3318 struct drbd_device *device;
3319 enum drbd_ret_code retcode;
3320 int skip_initial_sync = 0;
3321 int err;
3322 struct new_c_uuid_parms args;
3323
3324 retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
3325 if (!adm_ctx.reply_skb)
3326 return retcode;
3327 if (retcode != NO_ERROR)
3328 goto out_nolock;
3329
3330 device = adm_ctx.device;
3331 memset(&args, 0, sizeof(args));
3332 if (info->attrs[DRBD_NLA_NEW_C_UUID_PARMS]) {
3333 err = new_c_uuid_parms_from_attrs(&args, info);
3334 if (err) {
3335 retcode = ERR_MANDATORY_TAG;
3336 drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
3337 goto out_nolock;
3338 }
3339 }
3340
3341 mutex_lock(&adm_ctx.resource->adm_mutex);
3342 mutex_lock(device->state_mutex); /* Protects us against serialized state changes. */
3343
3344 if (!get_ldev(device)) {
3345 retcode = ERR_NO_DISK;
3346 goto out;
3347 }
3348
3349 /* this is "skip initial sync", assume to be clean */
3350 if (device->state.conn == C_CONNECTED &&
3351 first_peer_device(device)->connection->agreed_pro_version >= 90 &&
3352 device->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED && args.clear_bm) {
3353 drbd_info(device, "Preparing to skip initial sync\n");
3354 skip_initial_sync = 1;
3355 } else if (device->state.conn != C_STANDALONE) {
3356 retcode = ERR_CONNECTED;
3357 goto out_dec;
3358 }
3359
3360 drbd_uuid_set(device, UI_BITMAP, 0); /* Rotate UI_BITMAP to History 1, etc... */
3361 drbd_uuid_new_current(device); /* New current, previous to UI_BITMAP */
3362
3363 if (args.clear_bm) {
3364 err = drbd_bitmap_io(device, &drbd_bmio_clear_n_write,
3365 "clear_n_write from new_c_uuid", BM_LOCKED_MASK);
3366 if (err) {
3367 drbd_err(device, "Writing bitmap failed with %d\n", err);
3368 retcode = ERR_IO_MD_DISK;
3369 }
3370 if (skip_initial_sync) {
3371 drbd_send_uuids_skip_initial_sync(first_peer_device(device));
3372 _drbd_uuid_set(device, UI_BITMAP, 0);
3373 drbd_print_uuids(device, "cleared bitmap UUID");
3374 spin_lock_irq(&device->resource->req_lock);
3375 _drbd_set_state(_NS2(device, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
3376 CS_VERBOSE, NULL);
3377 spin_unlock_irq(&device->resource->req_lock);
3378 }
3379 }
3380
3381 drbd_md_sync(device);
3382 out_dec:
3383 put_ldev(device);
3384 out:
3385 mutex_unlock(device->state_mutex);
3386 mutex_unlock(&adm_ctx.resource->adm_mutex);
3387 out_nolock:
3388 drbd_adm_finish(&adm_ctx, info, retcode);
3389 return 0;
3390 }
3391
3392 static enum drbd_ret_code
3393 drbd_check_resource_name(struct drbd_config_context *adm_ctx)
3394 {
3395 const char *name = adm_ctx->resource_name;
3396 if (!name || !name[0]) {
3397 drbd_msg_put_info(adm_ctx->reply_skb, "resource name missing");
3398 return ERR_MANDATORY_TAG;
3399 }
3400 /* if we want to use these in sysfs/configfs/debugfs some day,
3401 * we must not allow slashes */
3402 if (strchr(name, '/')) {
3403 drbd_msg_put_info(adm_ctx->reply_skb, "invalid resource name");
3404 return ERR_INVALID_REQUEST;
3405 }
3406 return NO_ERROR;
3407 }
3408
3409 int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info)
3410 {
3411 struct drbd_config_context adm_ctx;
3412 enum drbd_ret_code retcode;
3413 struct res_opts res_opts;
3414 int err;
3415
3416 retcode = drbd_adm_prepare(&adm_ctx, skb, info, 0);
3417 if (!adm_ctx.reply_skb)
3418 return retcode;
3419 if (retcode != NO_ERROR)
3420 goto out;
3421
3422 set_res_opts_defaults(&res_opts);
3423 err = res_opts_from_attrs(&res_opts, info);
3424 if (err && err != -ENOMSG) {
3425 retcode = ERR_MANDATORY_TAG;
3426 drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
3427 goto out;
3428 }
3429
3430 retcode = drbd_check_resource_name(&adm_ctx);
3431 if (retcode != NO_ERROR)
3432 goto out;
3433
3434 if (adm_ctx.resource) {
3435 if (info->nlhdr->nlmsg_flags & NLM_F_EXCL) {
3436 retcode = ERR_INVALID_REQUEST;
3437 drbd_msg_put_info(adm_ctx.reply_skb, "resource exists");
3438 }
3439 /* else: still NO_ERROR */
3440 goto out;
3441 }
3442
3443 /* not yet safe for genl_family.parallel_ops */
3444 if (!conn_create(adm_ctx.resource_name, &res_opts))
3445 retcode = ERR_NOMEM;
3446 out:
3447 drbd_adm_finish(&adm_ctx, info, retcode);
3448 return 0;
3449 }
3450
3451 int drbd_adm_new_minor(struct sk_buff *skb, struct genl_info *info)
3452 {
3453 struct drbd_config_context adm_ctx;
3454 struct drbd_genlmsghdr *dh = info->userhdr;
3455 enum drbd_ret_code retcode;
3456
3457 retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
3458 if (!adm_ctx.reply_skb)
3459 return retcode;
3460 if (retcode != NO_ERROR)
3461 goto out;
3462
3463 if (dh->minor > MINORMASK) {
3464 drbd_msg_put_info(adm_ctx.reply_skb, "requested minor out of range");
3465 retcode = ERR_INVALID_REQUEST;
3466 goto out;
3467 }
3468 if (adm_ctx.volume > DRBD_VOLUME_MAX) {
3469 drbd_msg_put_info(adm_ctx.reply_skb, "requested volume id out of range");
3470 retcode = ERR_INVALID_REQUEST;
3471 goto out;
3472 }
3473
3474 /* drbd_adm_prepare made sure already
3475 * that first_peer_device(device)->connection and device->vnr match the request. */
3476 if (adm_ctx.device) {
3477 if (info->nlhdr->nlmsg_flags & NLM_F_EXCL)
3478 retcode = ERR_MINOR_EXISTS;
3479 /* else: still NO_ERROR */
3480 goto out;
3481 }
3482
3483 mutex_lock(&adm_ctx.resource->adm_mutex);
3484 retcode = drbd_create_device(&adm_ctx, dh->minor);
3485 mutex_unlock(&adm_ctx.resource->adm_mutex);
3486 out:
3487 drbd_adm_finish(&adm_ctx, info, retcode);
3488 return 0;
3489 }
3490
3491 static enum drbd_ret_code adm_del_minor(struct drbd_device *device)
3492 {
3493 if (device->state.disk == D_DISKLESS &&
3494 /* no need to be device->state.conn == C_STANDALONE &&
3495 * we may want to delete a minor from a live replication group.
3496 */
3497 device->state.role == R_SECONDARY) {
3498 _drbd_request_state(device, NS(conn, C_WF_REPORT_PARAMS),
3499 CS_VERBOSE + CS_WAIT_COMPLETE);
3500 drbd_delete_device(device);
3501 return NO_ERROR;
3502 } else
3503 return ERR_MINOR_CONFIGURED;
3504 }
3505
3506 int drbd_adm_del_minor(struct sk_buff *skb, struct genl_info *info)
3507 {
3508 struct drbd_config_context adm_ctx;
3509 enum drbd_ret_code retcode;
3510
3511 retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
3512 if (!adm_ctx.reply_skb)
3513 return retcode;
3514 if (retcode != NO_ERROR)
3515 goto out;
3516
3517 mutex_lock(&adm_ctx.resource->adm_mutex);
3518 retcode = adm_del_minor(adm_ctx.device);
3519 mutex_unlock(&adm_ctx.resource->adm_mutex);
3520 out:
3521 drbd_adm_finish(&adm_ctx, info, retcode);
3522 return 0;
3523 }
3524
3525 int drbd_adm_down(struct sk_buff *skb, struct genl_info *info)
3526 {
3527 struct drbd_config_context adm_ctx;
3528 struct drbd_resource *resource;
3529 struct drbd_connection *connection;
3530 struct drbd_device *device;
3531 int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
3532 unsigned i;
3533
3534 retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
3535 if (!adm_ctx.reply_skb)
3536 return retcode;
3537 if (retcode != NO_ERROR)
3538 goto finish;
3539
3540 resource = adm_ctx.resource;
3541 mutex_lock(&resource->adm_mutex);
3542 /* demote */
3543 for_each_connection(connection, resource) {
3544 struct drbd_peer_device *peer_device;
3545
3546 idr_for_each_entry(&connection->peer_devices, peer_device, i) {
3547 retcode = drbd_set_role(peer_device->device, R_SECONDARY, 0);
3548 if (retcode < SS_SUCCESS) {
3549 drbd_msg_put_info(adm_ctx.reply_skb, "failed to demote");
3550 goto out;
3551 }
3552 }
3553
3554 retcode = conn_try_disconnect(connection, 0);
3555 if (retcode < SS_SUCCESS) {
3556 drbd_msg_put_info(adm_ctx.reply_skb, "failed to disconnect");
3557 goto out;
3558 }
3559 }
3560
3561 /* detach */
3562 idr_for_each_entry(&resource->devices, device, i) {
3563 retcode = adm_detach(device, 0);
3564 if (retcode < SS_SUCCESS || retcode > NO_ERROR) {
3565 drbd_msg_put_info(adm_ctx.reply_skb, "failed to detach");
3566 goto out;
3567 }
3568 }
3569
3570 /* If we reach this, all volumes (of this connection) are Secondary,
3571 * Disconnected, Diskless, aka Unconfigured. Make sure all threads have
3572 * actually stopped, state handling only does drbd_thread_stop_nowait(). */
3573 for_each_connection(connection, resource)
3574 drbd_thread_stop(&connection->worker);
3575
3576 /* Now, nothing can fail anymore */
3577
3578 /* delete volumes */
3579 idr_for_each_entry(&resource->devices, device, i) {
3580 retcode = adm_del_minor(device);
3581 if (retcode != NO_ERROR) {
3582 /* "can not happen" */
3583 drbd_msg_put_info(adm_ctx.reply_skb, "failed to delete volume");
3584 goto out;
3585 }
3586 }
3587
3588 list_del_rcu(&resource->resources);
3589 synchronize_rcu();
3590 drbd_free_resource(resource);
3591 retcode = NO_ERROR;
3592 out:
3593 mutex_unlock(&resource->adm_mutex);
3594 finish:
3595 drbd_adm_finish(&adm_ctx, info, retcode);
3596 return 0;
3597 }
3598
3599 int drbd_adm_del_resource(struct sk_buff *skb, struct genl_info *info)
3600 {
3601 struct drbd_config_context adm_ctx;
3602 struct drbd_resource *resource;
3603 struct drbd_connection *connection;
3604 enum drbd_ret_code retcode;
3605
3606 retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
3607 if (!adm_ctx.reply_skb)
3608 return retcode;
3609 if (retcode != NO_ERROR)
3610 goto finish;
3611
3612 resource = adm_ctx.resource;
3613 mutex_lock(&resource->adm_mutex);
3614 for_each_connection(connection, resource) {
3615 if (connection->cstate > C_STANDALONE) {
3616 retcode = ERR_NET_CONFIGURED;
3617 goto out;
3618 }
3619 }
3620 if (!idr_is_empty(&resource->devices)) {
3621 retcode = ERR_RES_IN_USE;
3622 goto out;
3623 }
3624
3625 list_del_rcu(&resource->resources);
3626 for_each_connection(connection, resource)
3627 drbd_thread_stop(&connection->worker);
3628 synchronize_rcu();
3629 drbd_free_resource(resource);
3630 retcode = NO_ERROR;
3631 out:
3632 mutex_unlock(&resource->adm_mutex);
3633 finish:
3634 drbd_adm_finish(&adm_ctx, info, retcode);
3635 return 0;
3636 }
3637
3638 void drbd_bcast_event(struct drbd_device *device, const struct sib_info *sib)
3639 {
3640 static atomic_t drbd_genl_seq = ATOMIC_INIT(2); /* two. */
3641 struct sk_buff *msg;
3642 struct drbd_genlmsghdr *d_out;
3643 unsigned seq;
3644 int err = -ENOMEM;
3645
3646 seq = atomic_inc_return(&drbd_genl_seq);
3647 msg = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO);
3648 if (!msg)
3649 goto failed;
3650
3651 err = -EMSGSIZE;
3652 d_out = genlmsg_put(msg, 0, seq, &drbd_genl_family, 0, DRBD_EVENT);
3653 if (!d_out) /* cannot happen, but anyways. */
3654 goto nla_put_failure;
3655 d_out->minor = device_to_minor(device);
3656 d_out->ret_code = NO_ERROR;
3657
3658 if (nla_put_status_info(msg, device, sib))
3659 goto nla_put_failure;
3660 genlmsg_end(msg, d_out);
3661 err = drbd_genl_multicast_events(msg, 0);
3662 /* msg has been consumed or freed in netlink_broadcast() */
3663 if (err && err != -ESRCH)
3664 goto failed;
3665
3666 return;
3667
3668 nla_put_failure:
3669 nlmsg_free(msg);
3670 failed:
3671 drbd_err(device, "Error %d while broadcasting event. "
3672 "Event seq:%u sib_reason:%u\n",
3673 err, seq, sib->sib_reason);
3674 }