]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blame - drivers/block/drbd/drbd_nl.c
drbd: Converted drbd_cfg_mutex into drbd_cfg_rwsem
[mirror_ubuntu-zesty-kernel.git] / drivers / block / drbd / drbd_nl.c
CommitLineData
b411b363
PR
1/*
2 drbd_nl.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23
24 */
25
b411b363
PR
26#include <linux/module.h>
27#include <linux/drbd.h>
28#include <linux/in.h>
29#include <linux/fs.h>
30#include <linux/file.h>
31#include <linux/slab.h>
b411b363
PR
32#include <linux/blkpg.h>
33#include <linux/cpumask.h>
34#include "drbd_int.h"
265be2d0 35#include "drbd_req.h"
b411b363
PR
36#include "drbd_wrappers.h"
37#include <asm/unaligned.h>
b411b363 38#include <linux/drbd_limits.h>
87f7be4c 39#include <linux/kthread.h>
b411b363 40
3b98c0c2
LE
41#include <net/genetlink.h>
42
43/* .doit */
44// int drbd_adm_create_resource(struct sk_buff *skb, struct genl_info *info);
45// int drbd_adm_delete_resource(struct sk_buff *skb, struct genl_info *info);
46
47int drbd_adm_add_minor(struct sk_buff *skb, struct genl_info *info);
48int drbd_adm_delete_minor(struct sk_buff *skb, struct genl_info *info);
49
50int drbd_adm_create_connection(struct sk_buff *skb, struct genl_info *info);
51int drbd_adm_delete_connection(struct sk_buff *skb, struct genl_info *info);
85f75dd7 52int drbd_adm_down(struct sk_buff *skb, struct genl_info *info);
3b98c0c2
LE
53
54int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info);
55int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info);
f399002e 56int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info);
3b98c0c2
LE
57int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info);
58int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info);
f399002e 59int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info);
3b98c0c2
LE
60int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info);
61int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info);
62int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info);
63int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info);
64int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info);
65int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info);
66int drbd_adm_pause_sync(struct sk_buff *skb, struct genl_info *info);
67int drbd_adm_resume_sync(struct sk_buff *skb, struct genl_info *info);
68int drbd_adm_suspend_io(struct sk_buff *skb, struct genl_info *info);
69int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info);
70int drbd_adm_outdate(struct sk_buff *skb, struct genl_info *info);
f399002e 71int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info);
3b98c0c2
LE
72int drbd_adm_get_status(struct sk_buff *skb, struct genl_info *info);
73int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info);
74/* .dumpit */
75int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb);
76
77#include <linux/drbd_genl_api.h>
78#include <linux/genl_magic_func.h>
79
80/* used blkdev_get_by_path, to claim our meta data device(s) */
b411b363
PR
81static char *drbd_m_holder = "Hands off! this is DRBD's meta data device.";
82
3b98c0c2
LE
83/* Configuration is strictly serialized, because generic netlink message
84 * processing is strictly serialized by the genl_lock().
85 * Which means we can use one static global drbd_config_context struct.
86 */
87static struct drbd_config_context {
88 /* assigned from drbd_genlmsghdr */
89 unsigned int minor;
90 /* assigned from request attributes, if present */
91 unsigned int volume;
92#define VOLUME_UNSPECIFIED (-1U)
93 /* pointer into the request skb,
94 * limited lifetime! */
95 char *conn_name;
96
97 /* reply buffer */
98 struct sk_buff *reply_skb;
99 /* pointer into reply buffer */
100 struct drbd_genlmsghdr *reply_dh;
101 /* resolved from attributes, if possible */
102 struct drbd_conf *mdev;
103 struct drbd_tconn *tconn;
104} adm_ctx;
105
106static void drbd_adm_send_reply(struct sk_buff *skb, struct genl_info *info)
107{
108 genlmsg_end(skb, genlmsg_data(nlmsg_data(nlmsg_hdr(skb))));
109 if (genlmsg_reply(skb, info))
110 printk(KERN_ERR "drbd: error sending genl reply\n");
b411b363 111}
3b98c0c2
LE
112
113/* Used on a fresh "drbd_adm_prepare"d reply_skb, this cannot fail: The only
114 * reason it could fail was no space in skb, and there are 4k available. */
8432b314 115int drbd_msg_put_info(const char *info)
3b98c0c2
LE
116{
117 struct sk_buff *skb = adm_ctx.reply_skb;
118 struct nlattr *nla;
119 int err = -EMSGSIZE;
120
121 if (!info || !info[0])
122 return 0;
123
124 nla = nla_nest_start(skb, DRBD_NLA_CFG_REPLY);
125 if (!nla)
126 return err;
127
128 err = nla_put_string(skb, T_info_text, info);
129 if (err) {
130 nla_nest_cancel(skb, nla);
131 return err;
132 } else
133 nla_nest_end(skb, nla);
134 return 0;
b411b363
PR
135}
136
3b98c0c2
LE
137/* This would be a good candidate for a "pre_doit" hook,
138 * and per-family private info->pointers.
139 * But we need to stay compatible with older kernels.
140 * If it returns successfully, adm_ctx members are valid.
141 */
142#define DRBD_ADM_NEED_MINOR 1
143#define DRBD_ADM_NEED_CONN 2
144static int drbd_adm_prepare(struct sk_buff *skb, struct genl_info *info,
145 unsigned flags)
146{
147 struct drbd_genlmsghdr *d_in = info->userhdr;
148 const u8 cmd = info->genlhdr->cmd;
149 int err;
150
151 memset(&adm_ctx, 0, sizeof(adm_ctx));
152
153 /* genl_rcv_msg only checks for CAP_NET_ADMIN on "GENL_ADMIN_PERM" :( */
154 if (cmd != DRBD_ADM_GET_STATUS
155 && security_netlink_recv(skb, CAP_SYS_ADMIN))
156 return -EPERM;
157
158 adm_ctx.reply_skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
159 if (!adm_ctx.reply_skb)
160 goto fail;
161
162 adm_ctx.reply_dh = genlmsg_put_reply(adm_ctx.reply_skb,
163 info, &drbd_genl_family, 0, cmd);
164 /* put of a few bytes into a fresh skb of >= 4k will always succeed.
165 * but anyways */
166 if (!adm_ctx.reply_dh)
167 goto fail;
168
169 adm_ctx.reply_dh->minor = d_in->minor;
170 adm_ctx.reply_dh->ret_code = NO_ERROR;
171
172 if (info->attrs[DRBD_NLA_CFG_CONTEXT]) {
173 struct nlattr *nla;
174 /* parse and validate only */
f399002e 175 err = drbd_cfg_context_from_attrs(NULL, info);
3b98c0c2
LE
176 if (err)
177 goto fail;
178
179 /* It was present, and valid,
180 * copy it over to the reply skb. */
181 err = nla_put_nohdr(adm_ctx.reply_skb,
182 info->attrs[DRBD_NLA_CFG_CONTEXT]->nla_len,
183 info->attrs[DRBD_NLA_CFG_CONTEXT]);
184 if (err)
185 goto fail;
186
187 /* and assign stuff to the global adm_ctx */
188 nla = nested_attr_tb[__nla_type(T_ctx_volume)];
189 adm_ctx.volume = nla ? nla_get_u32(nla) : VOLUME_UNSPECIFIED;
190 nla = nested_attr_tb[__nla_type(T_ctx_conn_name)];
191 if (nla)
192 adm_ctx.conn_name = nla_data(nla);
193 } else
194 adm_ctx.volume = VOLUME_UNSPECIFIED;
195
196 adm_ctx.minor = d_in->minor;
197 adm_ctx.mdev = minor_to_mdev(d_in->minor);
198 adm_ctx.tconn = conn_by_name(adm_ctx.conn_name);
199
200 if (!adm_ctx.mdev && (flags & DRBD_ADM_NEED_MINOR)) {
201 drbd_msg_put_info("unknown minor");
202 return ERR_MINOR_INVALID;
203 }
204 if (!adm_ctx.tconn && (flags & DRBD_ADM_NEED_CONN)) {
205 drbd_msg_put_info("unknown connection");
206 return ERR_INVALID_REQUEST;
207 }
208
209 /* some more paranoia, if the request was over-determined */
527f4b24
LE
210 if (adm_ctx.mdev && adm_ctx.tconn &&
211 adm_ctx.mdev->tconn != adm_ctx.tconn) {
212 pr_warning("request: minor=%u, conn=%s; but that minor belongs to connection %s\n",
213 adm_ctx.minor, adm_ctx.conn_name, adm_ctx.mdev->tconn->name);
214 drbd_msg_put_info("minor exists in different connection");
215 return ERR_INVALID_REQUEST;
216 }
3b98c0c2
LE
217 if (adm_ctx.mdev &&
218 adm_ctx.volume != VOLUME_UNSPECIFIED &&
219 adm_ctx.volume != adm_ctx.mdev->vnr) {
220 pr_warning("request: minor=%u, volume=%u; but that minor is volume %u in %s\n",
221 adm_ctx.minor, adm_ctx.volume,
222 adm_ctx.mdev->vnr, adm_ctx.mdev->tconn->name);
527f4b24 223 drbd_msg_put_info("minor exists as different volume");
3b98c0c2
LE
224 return ERR_INVALID_REQUEST;
225 }
cffec5b2
LE
226 if (adm_ctx.mdev && !adm_ctx.tconn)
227 adm_ctx.tconn = adm_ctx.mdev->tconn;
3b98c0c2
LE
228 return NO_ERROR;
229
230fail:
231 nlmsg_free(adm_ctx.reply_skb);
232 adm_ctx.reply_skb = NULL;
233 return -ENOMEM;
234}
235
236static int drbd_adm_finish(struct genl_info *info, int retcode)
237{
238 struct nlattr *nla;
239 const char *conn_name = NULL;
240
241 if (!adm_ctx.reply_skb)
242 return -ENOMEM;
243
244 adm_ctx.reply_dh->ret_code = retcode;
245
246 nla = info->attrs[DRBD_NLA_CFG_CONTEXT];
247 if (nla) {
248 nla = nla_find_nested(nla, __nla_type(T_ctx_conn_name));
249 if (nla)
250 conn_name = nla_data(nla);
251 }
252
253 drbd_adm_send_reply(adm_ctx.reply_skb, info);
254 return 0;
255}
b411b363 256
6b75dced 257static void setup_khelper_env(struct drbd_tconn *tconn, char **envp)
b411b363 258{
6b75dced 259 char *afs;
b411b363 260
6b75dced
PR
261 if (get_net_conf(tconn)) {
262 switch (((struct sockaddr *)tconn->net_conf->peer_addr)->sa_family) {
b411b363
PR
263 case AF_INET6:
264 afs = "ipv6";
6b75dced
PR
265 snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI6",
266 &((struct sockaddr_in6 *)tconn->net_conf->peer_addr)->sin6_addr);
b411b363
PR
267 break;
268 case AF_INET:
269 afs = "ipv4";
6b75dced
PR
270 snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI4",
271 &((struct sockaddr_in *)tconn->net_conf->peer_addr)->sin_addr);
b411b363
PR
272 break;
273 default:
274 afs = "ssocks";
6b75dced
PR
275 snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI4",
276 &((struct sockaddr_in *)tconn->net_conf->peer_addr)->sin_addr);
b411b363 277 }
6b75dced
PR
278 snprintf(envp[3], 20, "DRBD_PEER_AF=%s", afs);
279 put_net_conf(tconn);
b411b363 280 }
6b75dced
PR
281}
282
283int drbd_khelper(struct drbd_conf *mdev, char *cmd)
284{
285 char *envp[] = { "HOME=/",
286 "TERM=linux",
287 "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
288 (char[20]) { }, /* address family */
289 (char[60]) { }, /* address */
290 NULL };
291 char mb[12];
292 char *argv[] = {usermode_helper, cmd, mb, NULL };
293 struct sib_info sib;
294 int ret;
295
296 snprintf(mb, 12, "minor-%d", mdev_to_minor(mdev));
297 setup_khelper_env(mdev->tconn, envp);
b411b363 298
1090c056
LE
299 /* The helper may take some time.
300 * write out any unsynced meta data changes now */
301 drbd_md_sync(mdev);
302
b411b363 303 dev_info(DEV, "helper command: %s %s %s\n", usermode_helper, cmd, mb);
3b98c0c2
LE
304 sib.sib_reason = SIB_HELPER_PRE;
305 sib.helper_name = cmd;
306 drbd_bcast_event(mdev, &sib);
b411b363
PR
307 ret = call_usermodehelper(usermode_helper, argv, envp, 1);
308 if (ret)
309 dev_warn(DEV, "helper command: %s %s %s exit code %u (0x%x)\n",
310 usermode_helper, cmd, mb,
311 (ret >> 8) & 0xff, ret);
312 else
313 dev_info(DEV, "helper command: %s %s %s exit code %u (0x%x)\n",
314 usermode_helper, cmd, mb,
315 (ret >> 8) & 0xff, ret);
3b98c0c2
LE
316 sib.sib_reason = SIB_HELPER_POST;
317 sib.helper_exit_code = ret;
318 drbd_bcast_event(mdev, &sib);
b411b363
PR
319
320 if (ret < 0) /* Ignore any ERRNOs we got. */
321 ret = 0;
322
323 return ret;
324}
325
6b75dced
PR
326static void conn_md_sync(struct drbd_tconn *tconn)
327{
328 struct drbd_conf *mdev;
e90285e0 329 int vnr;
6b75dced 330
e90285e0 331 idr_for_each_entry(&tconn->volumes, mdev, vnr)
6b75dced
PR
332 drbd_md_sync(mdev);
333}
334
335int conn_khelper(struct drbd_tconn *tconn, char *cmd)
336{
337 char *envp[] = { "HOME=/",
338 "TERM=linux",
339 "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
340 (char[20]) { }, /* address family */
341 (char[60]) { }, /* address */
342 NULL };
343 char *argv[] = {usermode_helper, cmd, tconn->name, NULL };
344 int ret;
345
346 setup_khelper_env(tconn, envp);
347 conn_md_sync(tconn);
348
349 conn_info(tconn, "helper command: %s %s %s\n", usermode_helper, cmd, tconn->name);
350 /* TODO: conn_bcast_event() ?? */
351
352 ret = call_usermodehelper(usermode_helper, argv, envp, 1);
353 if (ret)
354 conn_warn(tconn, "helper command: %s %s %s exit code %u (0x%x)\n",
355 usermode_helper, cmd, tconn->name,
356 (ret >> 8) & 0xff, ret);
357 else
358 conn_info(tconn, "helper command: %s %s %s exit code %u (0x%x)\n",
359 usermode_helper, cmd, tconn->name,
360 (ret >> 8) & 0xff, ret);
361 /* TODO: conn_bcast_event() ?? */
362
363 if (ret < 0) /* Ignore any ERRNOs we got. */
364 ret = 0;
365
366 return ret;
367}
368
cb703454 369static enum drbd_fencing_p highest_fencing_policy(struct drbd_tconn *tconn)
b411b363 370{
cb703454
PR
371 enum drbd_fencing_p fp = FP_NOT_AVAIL;
372 struct drbd_conf *mdev;
373 int vnr;
374
695d08fa 375 rcu_read_lock();
cb703454
PR
376 idr_for_each_entry(&tconn->volumes, mdev, vnr) {
377 if (get_ldev_if_state(mdev, D_CONSISTENT)) {
378 fp = max_t(enum drbd_fencing_p, fp, mdev->ldev->dc.fencing);
379 put_ldev(mdev);
380 }
381 }
695d08fa 382 rcu_read_unlock();
cb703454
PR
383
384 return fp;
385}
386
387bool conn_try_outdate_peer(struct drbd_tconn *tconn)
388{
389 union drbd_state mask = { };
390 union drbd_state val = { };
391 enum drbd_fencing_p fp;
b411b363
PR
392 char *ex_to_string;
393 int r;
b411b363 394
cb703454
PR
395 if (tconn->cstate >= C_WF_REPORT_PARAMS) {
396 conn_err(tconn, "Expected cstate < C_WF_REPORT_PARAMS\n");
397 return false;
398 }
b411b363 399
cb703454
PR
400 fp = highest_fencing_policy(tconn);
401 switch (fp) {
402 case FP_NOT_AVAIL:
403 conn_warn(tconn, "Not fencing peer, I'm not even Consistent myself.\n");
fb22c402 404 goto out;
cb703454
PR
405 case FP_DONT_CARE:
406 return true;
407 default: ;
b411b363
PR
408 }
409
cb703454 410 r = conn_khelper(tconn, "fence-peer");
b411b363
PR
411
412 switch ((r>>8) & 0xff) {
413 case 3: /* peer is inconsistent */
414 ex_to_string = "peer is inconsistent or worse";
cb703454
PR
415 mask.pdsk = D_MASK;
416 val.pdsk = D_INCONSISTENT;
b411b363
PR
417 break;
418 case 4: /* peer got outdated, or was already outdated */
419 ex_to_string = "peer was fenced";
cb703454
PR
420 mask.pdsk = D_MASK;
421 val.pdsk = D_OUTDATED;
b411b363
PR
422 break;
423 case 5: /* peer was down */
cb703454 424 if (conn_highest_disk(tconn) == D_UP_TO_DATE) {
b411b363
PR
425 /* we will(have) create(d) a new UUID anyways... */
426 ex_to_string = "peer is unreachable, assumed to be dead";
cb703454
PR
427 mask.pdsk = D_MASK;
428 val.pdsk = D_OUTDATED;
b411b363
PR
429 } else {
430 ex_to_string = "peer unreachable, doing nothing since disk != UpToDate";
b411b363
PR
431 }
432 break;
433 case 6: /* Peer is primary, voluntarily outdate myself.
434 * This is useful when an unconnected R_SECONDARY is asked to
435 * become R_PRIMARY, but finds the other peer being active. */
436 ex_to_string = "peer is active";
cb703454
PR
437 conn_warn(tconn, "Peer is primary, outdating myself.\n");
438 mask.disk = D_MASK;
439 val.disk = D_OUTDATED;
b411b363
PR
440 break;
441 case 7:
442 if (fp != FP_STONITH)
cb703454 443 conn_err(tconn, "fence-peer() = 7 && fencing != Stonith !!!\n");
b411b363 444 ex_to_string = "peer was stonithed";
cb703454
PR
445 mask.pdsk = D_MASK;
446 val.pdsk = D_OUTDATED;
b411b363
PR
447 break;
448 default:
449 /* The script is broken ... */
cb703454
PR
450 conn_err(tconn, "fence-peer helper broken, returned %d\n", (r>>8)&0xff);
451 return false; /* Eventually leave IO frozen */
b411b363
PR
452 }
453
cb703454
PR
454 conn_info(tconn, "fence-peer helper returned %d (%s)\n",
455 (r>>8) & 0xff, ex_to_string);
fb22c402 456
cb703454 457 out:
fb22c402 458
cb703454
PR
459 /* Not using
460 conn_request_state(tconn, mask, val, CS_VERBOSE);
461 here, because we might were able to re-establish the connection in the
462 meantime. */
463 spin_lock_irq(&tconn->req_lock);
464 if (tconn->cstate < C_WF_REPORT_PARAMS)
465 _conn_request_state(tconn, mask, val, CS_VERBOSE);
466 spin_unlock_irq(&tconn->req_lock);
467
468 return conn_highest_pdsk(tconn) <= D_OUTDATED;
b411b363
PR
469}
470
87f7be4c
PR
471static int _try_outdate_peer_async(void *data)
472{
cb703454 473 struct drbd_tconn *tconn = (struct drbd_tconn *)data;
87f7be4c 474
cb703454 475 conn_try_outdate_peer(tconn);
87f7be4c
PR
476
477 return 0;
478}
479
cb703454 480void conn_try_outdate_peer_async(struct drbd_tconn *tconn)
87f7be4c
PR
481{
482 struct task_struct *opa;
483
cb703454 484 opa = kthread_run(_try_outdate_peer_async, tconn, "drbd_async_h");
87f7be4c 485 if (IS_ERR(opa))
cb703454 486 conn_err(tconn, "out of mem, failed to invoke fence-peer helper\n");
87f7be4c 487}
b411b363 488
bf885f8a
AG
489enum drbd_state_rv
490drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force)
b411b363
PR
491{
492 const int max_tries = 4;
bf885f8a 493 enum drbd_state_rv rv = SS_UNKNOWN_ERROR;
b411b363
PR
494 int try = 0;
495 int forced = 0;
496 union drbd_state mask, val;
b411b363
PR
497
498 if (new_role == R_PRIMARY)
0625ac19 499 request_ping(mdev->tconn); /* Detect a dead peer ASAP */
b411b363 500
8410da8f 501 mutex_lock(mdev->state_mutex);
b411b363
PR
502
503 mask.i = 0; mask.role = R_MASK;
504 val.i = 0; val.role = new_role;
505
506 while (try++ < max_tries) {
bf885f8a 507 rv = _drbd_request_state(mdev, mask, val, CS_WAIT_COMPLETE);
b411b363
PR
508
509 /* in case we first succeeded to outdate,
510 * but now suddenly could establish a connection */
bf885f8a 511 if (rv == SS_CW_FAILED_BY_PEER && mask.pdsk != 0) {
b411b363
PR
512 val.pdsk = 0;
513 mask.pdsk = 0;
514 continue;
515 }
516
bf885f8a 517 if (rv == SS_NO_UP_TO_DATE_DISK && force &&
d10a33c6
PR
518 (mdev->state.disk < D_UP_TO_DATE &&
519 mdev->state.disk >= D_INCONSISTENT)) {
b411b363
PR
520 mask.disk = D_MASK;
521 val.disk = D_UP_TO_DATE;
522 forced = 1;
523 continue;
524 }
525
bf885f8a 526 if (rv == SS_NO_UP_TO_DATE_DISK &&
b411b363
PR
527 mdev->state.disk == D_CONSISTENT && mask.pdsk == 0) {
528 D_ASSERT(mdev->state.pdsk == D_UNKNOWN);
b411b363 529
cb703454 530 if (conn_try_outdate_peer(mdev->tconn)) {
b411b363
PR
531 val.disk = D_UP_TO_DATE;
532 mask.disk = D_MASK;
533 }
b411b363
PR
534 continue;
535 }
536
bf885f8a 537 if (rv == SS_NOTHING_TO_DO)
3b98c0c2 538 goto out;
bf885f8a 539 if (rv == SS_PRIMARY_NOP && mask.pdsk == 0) {
cb703454 540 if (!conn_try_outdate_peer(mdev->tconn) && force) {
b411b363 541 dev_warn(DEV, "Forced into split brain situation!\n");
cb703454
PR
542 mask.pdsk = D_MASK;
543 val.pdsk = D_OUTDATED;
b411b363 544
cb703454 545 }
b411b363
PR
546 continue;
547 }
bf885f8a 548 if (rv == SS_TWO_PRIMARIES) {
b411b363
PR
549 /* Maybe the peer is detected as dead very soon...
550 retry at most once more in this case. */
89e58e75 551 schedule_timeout_interruptible((mdev->tconn->net_conf->ping_timeo+1)*HZ/10);
b411b363
PR
552 if (try < max_tries)
553 try = max_tries - 1;
554 continue;
555 }
bf885f8a
AG
556 if (rv < SS_SUCCESS) {
557 rv = _drbd_request_state(mdev, mask, val,
b411b363 558 CS_VERBOSE + CS_WAIT_COMPLETE);
bf885f8a 559 if (rv < SS_SUCCESS)
3b98c0c2 560 goto out;
b411b363
PR
561 }
562 break;
563 }
564
bf885f8a 565 if (rv < SS_SUCCESS)
3b98c0c2 566 goto out;
b411b363
PR
567
568 if (forced)
569 dev_warn(DEV, "Forced to consider local data as UpToDate!\n");
570
571 /* Wait until nothing is on the fly :) */
572 wait_event(mdev->misc_wait, atomic_read(&mdev->ap_pending_cnt) == 0);
573
574 if (new_role == R_SECONDARY) {
81e84650 575 set_disk_ro(mdev->vdisk, true);
b411b363
PR
576 if (get_ldev(mdev)) {
577 mdev->ldev->md.uuid[UI_CURRENT] &= ~(u64)1;
578 put_ldev(mdev);
579 }
580 } else {
b2fb6dbe 581 if (get_net_conf(mdev->tconn)) {
89e58e75 582 mdev->tconn->net_conf->want_lose = 0;
b2fb6dbe 583 put_net_conf(mdev->tconn);
b411b363 584 }
81e84650 585 set_disk_ro(mdev->vdisk, false);
b411b363
PR
586 if (get_ldev(mdev)) {
587 if (((mdev->state.conn < C_CONNECTED ||
588 mdev->state.pdsk <= D_FAILED)
589 && mdev->ldev->md.uuid[UI_BITMAP] == 0) || forced)
590 drbd_uuid_new_current(mdev);
591
592 mdev->ldev->md.uuid[UI_CURRENT] |= (u64)1;
593 put_ldev(mdev);
594 }
595 }
596
19f843aa
LE
597 /* writeout of activity log covered areas of the bitmap
598 * to stable storage done in after state change already */
b411b363
PR
599
600 if (mdev->state.conn >= C_WF_REPORT_PARAMS) {
601 /* if this was forced, we should consider sync */
602 if (forced)
603 drbd_send_uuids(mdev);
604 drbd_send_state(mdev);
605 }
606
607 drbd_md_sync(mdev);
608
609 kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE);
3b98c0c2 610out:
8410da8f 611 mutex_unlock(mdev->state_mutex);
bf885f8a 612 return rv;
b411b363
PR
613}
614
3b98c0c2 615static const char *from_attrs_err_to_txt(int err)
b411b363 616{
3b98c0c2
LE
617 return err == -ENOMSG ? "required attribute missing" :
618 err == -EOPNOTSUPP ? "unknown mandatory attribute" :
f399002e 619 err == -EEXIST ? "can not change invariant setting" :
3b98c0c2 620 "invalid attribute value";
b411b363
PR
621}
622
3b98c0c2 623int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info)
b411b363 624{
3b98c0c2
LE
625 struct set_role_parms parms;
626 int err;
627 enum drbd_ret_code retcode;
b411b363 628
3b98c0c2
LE
629 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
630 if (!adm_ctx.reply_skb)
631 return retcode;
632 if (retcode != NO_ERROR)
633 goto out;
634
635 memset(&parms, 0, sizeof(parms));
636 if (info->attrs[DRBD_NLA_SET_ROLE_PARMS]) {
f399002e 637 err = set_role_parms_from_attrs(&parms, info);
3b98c0c2
LE
638 if (err) {
639 retcode = ERR_MANDATORY_TAG;
640 drbd_msg_put_info(from_attrs_err_to_txt(err));
641 goto out;
642 }
643 }
644
645 if (info->genlhdr->cmd == DRBD_ADM_PRIMARY)
646 retcode = drbd_set_role(adm_ctx.mdev, R_PRIMARY, parms.assume_uptodate);
647 else
648 retcode = drbd_set_role(adm_ctx.mdev, R_SECONDARY, 0);
649out:
650 drbd_adm_finish(info, retcode);
b411b363
PR
651 return 0;
652}
653
654/* initializes the md.*_offset members, so we are able to find
655 * the on disk meta data */
656static void drbd_md_set_sector_offsets(struct drbd_conf *mdev,
657 struct drbd_backing_dev *bdev)
658{
659 sector_t md_size_sect = 0;
660 switch (bdev->dc.meta_dev_idx) {
661 default:
662 /* v07 style fixed size indexed meta data */
663 bdev->md.md_size_sect = MD_RESERVED_SECT;
664 bdev->md.md_offset = drbd_md_ss__(mdev, bdev);
665 bdev->md.al_offset = MD_AL_OFFSET;
666 bdev->md.bm_offset = MD_BM_OFFSET;
667 break;
668 case DRBD_MD_INDEX_FLEX_EXT:
669 /* just occupy the full device; unit: sectors */
670 bdev->md.md_size_sect = drbd_get_capacity(bdev->md_bdev);
671 bdev->md.md_offset = 0;
672 bdev->md.al_offset = MD_AL_OFFSET;
673 bdev->md.bm_offset = MD_BM_OFFSET;
674 break;
675 case DRBD_MD_INDEX_INTERNAL:
676 case DRBD_MD_INDEX_FLEX_INT:
677 bdev->md.md_offset = drbd_md_ss__(mdev, bdev);
678 /* al size is still fixed */
7ad651b5 679 bdev->md.al_offset = -MD_AL_SECTORS;
b411b363
PR
680 /* we need (slightly less than) ~ this much bitmap sectors: */
681 md_size_sect = drbd_get_capacity(bdev->backing_bdev);
682 md_size_sect = ALIGN(md_size_sect, BM_SECT_PER_EXT);
683 md_size_sect = BM_SECT_TO_EXT(md_size_sect);
684 md_size_sect = ALIGN(md_size_sect, 8);
685
686 /* plus the "drbd meta data super block",
687 * and the activity log; */
688 md_size_sect += MD_BM_OFFSET;
689
690 bdev->md.md_size_sect = md_size_sect;
691 /* bitmap offset is adjusted by 'super' block size */
692 bdev->md.bm_offset = -md_size_sect + MD_AL_OFFSET;
693 break;
694 }
695}
696
4b0715f0 697/* input size is expected to be in KB */
b411b363
PR
698char *ppsize(char *buf, unsigned long long size)
699{
4b0715f0
LE
700 /* Needs 9 bytes at max including trailing NUL:
701 * -1ULL ==> "16384 EB" */
b411b363
PR
702 static char units[] = { 'K', 'M', 'G', 'T', 'P', 'E' };
703 int base = 0;
4b0715f0 704 while (size >= 10000 && base < sizeof(units)-1) {
b411b363
PR
705 /* shift + round */
706 size = (size >> 10) + !!(size & (1<<9));
707 base++;
708 }
4b0715f0 709 sprintf(buf, "%u %cB", (unsigned)size, units[base]);
b411b363
PR
710
711 return buf;
712}
713
714/* there is still a theoretical deadlock when called from receiver
715 * on an D_INCONSISTENT R_PRIMARY:
716 * remote READ does inc_ap_bio, receiver would need to receive answer
717 * packet from remote to dec_ap_bio again.
718 * receiver receive_sizes(), comes here,
719 * waits for ap_bio_cnt == 0. -> deadlock.
720 * but this cannot happen, actually, because:
721 * R_PRIMARY D_INCONSISTENT, and peer's disk is unreachable
722 * (not connected, or bad/no disk on peer):
723 * see drbd_fail_request_early, ap_bio_cnt is zero.
724 * R_PRIMARY D_INCONSISTENT, and C_SYNC_TARGET:
725 * peer may not initiate a resize.
726 */
3b98c0c2
LE
727/* Note these are not to be confused with
728 * drbd_adm_suspend_io/drbd_adm_resume_io,
729 * which are (sub) state changes triggered by admin (drbdsetup),
730 * and can be long lived.
731 * This changes an mdev->flag, is triggered by drbd internals,
732 * and should be short-lived. */
b411b363
PR
733void drbd_suspend_io(struct drbd_conf *mdev)
734{
735 set_bit(SUSPEND_IO, &mdev->flags);
2aebfabb 736 if (drbd_suspended(mdev))
265be2d0 737 return;
b411b363
PR
738 wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_bio_cnt));
739}
740
741void drbd_resume_io(struct drbd_conf *mdev)
742{
743 clear_bit(SUSPEND_IO, &mdev->flags);
744 wake_up(&mdev->misc_wait);
745}
746
747/**
748 * drbd_determine_dev_size() - Sets the right device size obeying all constraints
749 * @mdev: DRBD device.
750 *
751 * Returns 0 on success, negative return values indicate errors.
752 * You should call drbd_md_sync() after calling this function.
753 */
24c4830c 754enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds_flags flags) __must_hold(local)
b411b363
PR
755{
756 sector_t prev_first_sect, prev_size; /* previous meta location */
757 sector_t la_size;
758 sector_t size;
759 char ppb[10];
760
761 int md_moved, la_size_changed;
762 enum determine_dev_size rv = unchanged;
763
764 /* race:
765 * application request passes inc_ap_bio,
766 * but then cannot get an AL-reference.
767 * this function later may wait on ap_bio_cnt == 0. -> deadlock.
768 *
769 * to avoid that:
770 * Suspend IO right here.
771 * still lock the act_log to not trigger ASSERTs there.
772 */
773 drbd_suspend_io(mdev);
774
775 /* no wait necessary anymore, actually we could assert that */
776 wait_event(mdev->al_wait, lc_try_lock(mdev->act_log));
777
778 prev_first_sect = drbd_md_first_sector(mdev->ldev);
779 prev_size = mdev->ldev->md.md_size_sect;
780 la_size = mdev->ldev->md.la_size_sect;
781
782 /* TODO: should only be some assert here, not (re)init... */
783 drbd_md_set_sector_offsets(mdev, mdev->ldev);
784
d845030f 785 size = drbd_new_dev_size(mdev, mdev->ldev, flags & DDSF_FORCED);
b411b363
PR
786
787 if (drbd_get_capacity(mdev->this_bdev) != size ||
788 drbd_bm_capacity(mdev) != size) {
789 int err;
02d9a94b 790 err = drbd_bm_resize(mdev, size, !(flags & DDSF_NO_RESYNC));
b411b363
PR
791 if (unlikely(err)) {
792 /* currently there is only one error: ENOMEM! */
793 size = drbd_bm_capacity(mdev)>>1;
794 if (size == 0) {
795 dev_err(DEV, "OUT OF MEMORY! "
796 "Could not allocate bitmap!\n");
797 } else {
798 dev_err(DEV, "BM resizing failed. "
799 "Leaving size unchanged at size = %lu KB\n",
800 (unsigned long)size);
801 }
802 rv = dev_size_error;
803 }
804 /* racy, see comments above. */
805 drbd_set_my_capacity(mdev, size);
806 mdev->ldev->md.la_size_sect = size;
807 dev_info(DEV, "size = %s (%llu KB)\n", ppsize(ppb, size>>1),
808 (unsigned long long)size>>1);
809 }
810 if (rv == dev_size_error)
811 goto out;
812
813 la_size_changed = (la_size != mdev->ldev->md.la_size_sect);
814
815 md_moved = prev_first_sect != drbd_md_first_sector(mdev->ldev)
816 || prev_size != mdev->ldev->md.md_size_sect;
817
818 if (la_size_changed || md_moved) {
24dccabb
AG
819 int err;
820
b411b363
PR
821 drbd_al_shrink(mdev); /* All extents inactive. */
822 dev_info(DEV, "Writing the whole bitmap, %s\n",
823 la_size_changed && md_moved ? "size changed and md moved" :
824 la_size_changed ? "size changed" : "md moved");
20ceb2b2
LE
825 /* next line implicitly does drbd_suspend_io()+drbd_resume_io() */
826 err = drbd_bitmap_io(mdev, &drbd_bm_write,
827 "size changed", BM_LOCKED_MASK);
24dccabb
AG
828 if (err) {
829 rv = dev_size_error;
830 goto out;
831 }
b411b363
PR
832 drbd_md_mark_dirty(mdev);
833 }
834
835 if (size > la_size)
836 rv = grew;
837 if (size < la_size)
838 rv = shrunk;
839out:
840 lc_unlock(mdev->act_log);
841 wake_up(&mdev->al_wait);
842 drbd_resume_io(mdev);
843
844 return rv;
845}
846
847sector_t
a393db6f 848drbd_new_dev_size(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, int assume_peer_has_space)
b411b363
PR
849{
850 sector_t p_size = mdev->p_size; /* partner's disk size. */
851 sector_t la_size = bdev->md.la_size_sect; /* last agreed size. */
852 sector_t m_size; /* my size */
853 sector_t u_size = bdev->dc.disk_size; /* size requested by user. */
854 sector_t size = 0;
855
856 m_size = drbd_get_max_capacity(bdev);
857
a393db6f
PR
858 if (mdev->state.conn < C_CONNECTED && assume_peer_has_space) {
859 dev_warn(DEV, "Resize while not connected was forced by the user!\n");
860 p_size = m_size;
861 }
862
b411b363
PR
863 if (p_size && m_size) {
864 size = min_t(sector_t, p_size, m_size);
865 } else {
866 if (la_size) {
867 size = la_size;
868 if (m_size && m_size < size)
869 size = m_size;
870 if (p_size && p_size < size)
871 size = p_size;
872 } else {
873 if (m_size)
874 size = m_size;
875 if (p_size)
876 size = p_size;
877 }
878 }
879
880 if (size == 0)
881 dev_err(DEV, "Both nodes diskless!\n");
882
883 if (u_size) {
884 if (u_size > size)
885 dev_err(DEV, "Requested disk size is too big (%lu > %lu)\n",
886 (unsigned long)u_size>>1, (unsigned long)size>>1);
887 else
888 size = u_size;
889 }
890
891 return size;
892}
893
894/**
895 * drbd_check_al_size() - Ensures that the AL is of the right size
896 * @mdev: DRBD device.
897 *
898 * Returns -EBUSY if current al lru is still used, -ENOMEM when allocation
899 * failed, and 0 on success. You should call drbd_md_sync() after you called
900 * this function.
901 */
f399002e 902static int drbd_check_al_size(struct drbd_conf *mdev, struct disk_conf *dc)
b411b363
PR
903{
904 struct lru_cache *n, *t;
905 struct lc_element *e;
906 unsigned int in_use;
907 int i;
908
f399002e
LE
909 if (!expect(dc->al_extents >= DRBD_AL_EXTENTS_MIN))
910 dc->al_extents = DRBD_AL_EXTENTS_MIN;
b411b363
PR
911
912 if (mdev->act_log &&
f399002e 913 mdev->act_log->nr_elements == dc->al_extents)
b411b363
PR
914 return 0;
915
916 in_use = 0;
917 t = mdev->act_log;
7ad651b5 918 n = lc_create("act_log", drbd_al_ext_cache, AL_UPDATES_PER_TRANSACTION,
f399002e 919 dc->al_extents, sizeof(struct lc_element), 0);
b411b363
PR
920
921 if (n == NULL) {
922 dev_err(DEV, "Cannot allocate act_log lru!\n");
923 return -ENOMEM;
924 }
925 spin_lock_irq(&mdev->al_lock);
926 if (t) {
927 for (i = 0; i < t->nr_elements; i++) {
928 e = lc_element_by_index(t, i);
929 if (e->refcnt)
930 dev_err(DEV, "refcnt(%d)==%d\n",
931 e->lc_number, e->refcnt);
932 in_use += e->refcnt;
933 }
934 }
935 if (!in_use)
936 mdev->act_log = n;
937 spin_unlock_irq(&mdev->al_lock);
938 if (in_use) {
939 dev_err(DEV, "Activity log still in use!\n");
940 lc_destroy(n);
941 return -EBUSY;
942 } else {
943 if (t)
944 lc_destroy(t);
945 }
946 drbd_md_mark_dirty(mdev); /* we changed mdev->act_log->nr_elemens */
947 return 0;
948}
949
99432fcc 950static void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_bio_size)
b411b363
PR
951{
952 struct request_queue * const q = mdev->rq_queue;
99432fcc
PR
953 int max_hw_sectors = max_bio_size >> 9;
954 int max_segments = 0;
955
956 if (get_ldev_if_state(mdev, D_ATTACHING)) {
957 struct request_queue * const b = mdev->ldev->backing_bdev->bd_disk->queue;
958
959 max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9);
960 max_segments = mdev->ldev->dc.max_bio_bvecs;
961 put_ldev(mdev);
962 }
b411b363 963
b411b363 964 blk_queue_logical_block_size(q, 512);
1816a2b4
LE
965 blk_queue_max_hw_sectors(q, max_hw_sectors);
966 /* This is the workaround for "bio would need to, but cannot, be split" */
967 blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS);
968 blk_queue_segment_boundary(q, PAGE_CACHE_SIZE-1);
b411b363 969
99432fcc
PR
970 if (get_ldev_if_state(mdev, D_ATTACHING)) {
971 struct request_queue * const b = mdev->ldev->backing_bdev->bd_disk->queue;
972
973 blk_queue_stack_limits(q, b);
974
975 if (q->backing_dev_info.ra_pages != b->backing_dev_info.ra_pages) {
976 dev_info(DEV, "Adjusting my ra_pages to backing device's (%lu -> %lu)\n",
977 q->backing_dev_info.ra_pages,
978 b->backing_dev_info.ra_pages);
979 q->backing_dev_info.ra_pages = b->backing_dev_info.ra_pages;
980 }
981 put_ldev(mdev);
982 }
983}
984
985void drbd_reconsider_max_bio_size(struct drbd_conf *mdev)
986{
987 int now, new, local, peer;
988
989 now = queue_max_hw_sectors(mdev->rq_queue) << 9;
990 local = mdev->local_max_bio_size; /* Eventually last known value, from volatile memory */
991 peer = mdev->peer_max_bio_size; /* Eventually last known value, from meta data */
b411b363 992
99432fcc
PR
993 if (get_ldev_if_state(mdev, D_ATTACHING)) {
994 local = queue_max_hw_sectors(mdev->ldev->backing_bdev->bd_disk->queue) << 9;
995 mdev->local_max_bio_size = local;
996 put_ldev(mdev);
b411b363 997 }
99432fcc
PR
998
999 /* We may ignore peer limits if the peer is modern enough.
1000 Because new from 8.3.8 onwards the peer can use multiple
1001 BIOs for a single peer_request */
1002 if (mdev->state.conn >= C_CONNECTED) {
31890f4a 1003 if (mdev->tconn->agreed_pro_version < 94)
99432fcc 1004 peer = mdev->peer_max_bio_size;
31890f4a 1005 else if (mdev->tconn->agreed_pro_version == 94)
99432fcc
PR
1006 peer = DRBD_MAX_SIZE_H80_PACKET;
1007 else /* drbd 8.3.8 onwards */
1008 peer = DRBD_MAX_BIO_SIZE;
1009 }
1010
1011 new = min_t(int, local, peer);
1012
1013 if (mdev->state.role == R_PRIMARY && new < now)
1014 dev_err(DEV, "ASSERT FAILED new < now; (%d < %d)\n", new, now);
1015
1016 if (new != now)
1017 dev_info(DEV, "max BIO size = %u\n", new);
1018
1019 drbd_setup_queue_param(mdev, new);
b411b363
PR
1020}
1021
1022/* serialize deconfig (worker exiting, doing cleanup)
1023 * and reconfig (drbdsetup disk, drbdsetup net)
1024 *
c518d04f
LE
1025 * Wait for a potentially exiting worker, then restart it,
1026 * or start a new one. Flush any pending work, there may still be an
1027 * after_state_change queued.
b411b363 1028 */
0e29d163 1029static void conn_reconfig_start(struct drbd_tconn *tconn)
b411b363 1030{
0e29d163
PR
1031 wait_event(tconn->ping_wait, !test_and_set_bit(CONFIG_PENDING, &tconn->flags));
1032 wait_event(tconn->ping_wait, !test_bit(OBJECT_DYING, &tconn->flags));
1033 drbd_thread_start(&tconn->worker);
1034 conn_flush_workqueue(tconn);
b411b363
PR
1035}
1036
1037/* if still unconfigured, stops worker again.
1038 * if configured now, clears CONFIG_PENDING.
1039 * wakes potential waiters */
0e29d163 1040static void conn_reconfig_done(struct drbd_tconn *tconn)
b411b363 1041{
0e29d163
PR
1042 spin_lock_irq(&tconn->req_lock);
1043 if (conn_all_vols_unconf(tconn)) {
1044 set_bit(OBJECT_DYING, &tconn->flags);
1045 drbd_thread_stop_nowait(&tconn->worker);
b411b363 1046 } else
0e29d163
PR
1047 clear_bit(CONFIG_PENDING, &tconn->flags);
1048 spin_unlock_irq(&tconn->req_lock);
1049 wake_up(&tconn->ping_wait);
b411b363
PR
1050}
1051
0778286a
PR
1052/* Make sure IO is suspended before calling this function(). */
1053static void drbd_suspend_al(struct drbd_conf *mdev)
1054{
1055 int s = 0;
1056
61610420 1057 if (!lc_try_lock(mdev->act_log)) {
0778286a
PR
1058 dev_warn(DEV, "Failed to lock al in drbd_suspend_al()\n");
1059 return;
1060 }
1061
61610420 1062 drbd_al_shrink(mdev);
87eeee41 1063 spin_lock_irq(&mdev->tconn->req_lock);
0778286a
PR
1064 if (mdev->state.conn < C_CONNECTED)
1065 s = !test_and_set_bit(AL_SUSPENDED, &mdev->flags);
87eeee41 1066 spin_unlock_irq(&mdev->tconn->req_lock);
61610420 1067 lc_unlock(mdev->act_log);
0778286a
PR
1068
1069 if (s)
1070 dev_info(DEV, "Suspended AL updates\n");
1071}
1072
f399002e
LE
1073int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
1074{
1075 enum drbd_ret_code retcode;
1076 struct drbd_conf *mdev;
1077 struct disk_conf *ndc; /* new disk conf */
1078 int err, fifo_size;
1079 int *rs_plan_s = NULL;
1080
1081 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
1082 if (!adm_ctx.reply_skb)
1083 return retcode;
1084 if (retcode != NO_ERROR)
1085 goto out;
1086
1087 mdev = adm_ctx.mdev;
1088
1089 /* we also need a disk
1090 * to change the options on */
1091 if (!get_ldev(mdev)) {
1092 retcode = ERR_NO_DISK;
1093 goto out;
1094 }
1095
1096/* FIXME freeze IO, cluster wide.
1097 *
1098 * We should make sure no-one uses
1099 * some half-updated struct when we
1100 * assign it later. */
1101
1102 ndc = kmalloc(sizeof(*ndc), GFP_KERNEL);
1103 if (!ndc) {
1104 retcode = ERR_NOMEM;
1105 goto fail;
1106 }
1107
1108 memcpy(ndc, &mdev->ldev->dc, sizeof(*ndc));
1109 err = disk_conf_from_attrs_for_change(ndc, info);
1110 if (err) {
1111 retcode = ERR_MANDATORY_TAG;
1112 drbd_msg_put_info(from_attrs_err_to_txt(err));
1113 }
1114
1115 if (!expect(ndc->resync_rate >= 1))
1116 ndc->resync_rate = 1;
1117
1118 /* clip to allowed range */
1119 if (!expect(ndc->al_extents >= DRBD_AL_EXTENTS_MIN))
1120 ndc->al_extents = DRBD_AL_EXTENTS_MIN;
1121 if (!expect(ndc->al_extents <= DRBD_AL_EXTENTS_MAX))
1122 ndc->al_extents = DRBD_AL_EXTENTS_MAX;
1123
1124 /* most sanity checks done, try to assign the new sync-after
1125 * dependency. need to hold the global lock in there,
1126 * to avoid a race in the dependency loop check. */
1127 retcode = drbd_alter_sa(mdev, ndc->resync_after);
1128 if (retcode != NO_ERROR)
1129 goto fail;
1130
1131 fifo_size = (ndc->c_plan_ahead * 10 * SLEEP_TIME) / HZ;
1132 if (fifo_size != mdev->rs_plan_s.size && fifo_size > 0) {
1133 rs_plan_s = kzalloc(sizeof(int) * fifo_size, GFP_KERNEL);
1134 if (!rs_plan_s) {
1135 dev_err(DEV, "kmalloc of fifo_buffer failed");
1136 retcode = ERR_NOMEM;
1137 goto fail;
1138 }
1139 }
1140
1141 if (fifo_size != mdev->rs_plan_s.size) {
1142 kfree(mdev->rs_plan_s.values);
1143 mdev->rs_plan_s.values = rs_plan_s;
1144 mdev->rs_plan_s.size = fifo_size;
1145 mdev->rs_planed = 0;
1146 rs_plan_s = NULL;
1147 }
1148
1149 wait_event(mdev->al_wait, lc_try_lock(mdev->act_log));
1150 drbd_al_shrink(mdev);
1151 err = drbd_check_al_size(mdev, ndc);
1152 lc_unlock(mdev->act_log);
1153 wake_up(&mdev->al_wait);
1154
1155 if (err) {
1156 retcode = ERR_NOMEM;
1157 goto fail;
1158 }
1159
1160 /* FIXME
1161 * To avoid someone looking at a half-updated struct, we probably
1162 * should have a rw-semaphor on net_conf and disk_conf.
1163 */
1164 mdev->ldev->dc = *ndc;
1165
1166 drbd_md_sync(mdev);
1167
1168
1169 if (mdev->state.conn >= C_CONNECTED)
1170 drbd_send_sync_param(mdev);
1171
1172 fail:
1173 put_ldev(mdev);
1174 kfree(ndc);
1175 kfree(rs_plan_s);
1176 out:
1177 drbd_adm_finish(info, retcode);
1178 return 0;
1179}
1180
3b98c0c2 1181int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
b411b363 1182{
3b98c0c2
LE
1183 struct drbd_conf *mdev;
1184 int err;
116676ca 1185 enum drbd_ret_code retcode;
b411b363
PR
1186 enum determine_dev_size dd;
1187 sector_t max_possible_sectors;
1188 sector_t min_md_device_sectors;
1189 struct drbd_backing_dev *nbc = NULL; /* new_backing_conf */
e525fd89 1190 struct block_device *bdev;
b411b363
PR
1191 struct lru_cache *resync_lru = NULL;
1192 union drbd_state ns, os;
f2024e7c 1193 enum drbd_state_rv rv;
b411b363 1194 int cp_discovered = 0;
b411b363 1195
3b98c0c2
LE
1196 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
1197 if (!adm_ctx.reply_skb)
1198 return retcode;
1199 if (retcode != NO_ERROR)
40cbf085 1200 goto finish;
3b98c0c2
LE
1201
1202 mdev = adm_ctx.mdev;
0e29d163 1203 conn_reconfig_start(mdev->tconn);
b411b363
PR
1204
1205 /* if you want to reconfigure, please tear down first */
1206 if (mdev->state.disk > D_DISKLESS) {
1207 retcode = ERR_DISK_CONFIGURED;
1208 goto fail;
1209 }
82f59cc6
LE
1210 /* It may just now have detached because of IO error. Make sure
1211 * drbd_ldev_destroy is done already, we may end up here very fast,
1212 * e.g. if someone calls attach from the on-io-error handler,
1213 * to realize a "hot spare" feature (not that I'd recommend that) */
1214 wait_event(mdev->misc_wait, !atomic_read(&mdev->local_cnt));
b411b363 1215
3b98c0c2 1216 /* allocation not in the IO path, drbdsetup context */
b411b363
PR
1217 nbc = kzalloc(sizeof(struct drbd_backing_dev), GFP_KERNEL);
1218 if (!nbc) {
1219 retcode = ERR_NOMEM;
1220 goto fail;
1221 }
1222
f399002e
LE
1223 nbc->dc = (struct disk_conf) {
1224 {}, 0, /* backing_dev */
1225 {}, 0, /* meta_dev */
1226 0, /* meta_dev_idx */
1227 DRBD_DISK_SIZE_SECT_DEF, /* disk_size */
1228 DRBD_MAX_BIO_BVECS_DEF, /* max_bio_bvecs */
1229 DRBD_ON_IO_ERROR_DEF, /* on_io_error */
1230 DRBD_FENCING_DEF, /* fencing */
1231 DRBD_RATE_DEF, /* resync_rate */
1232 DRBD_AFTER_DEF, /* resync_after */
1233 DRBD_AL_EXTENTS_DEF, /* al_extents */
1234 DRBD_C_PLAN_AHEAD_DEF, /* c_plan_ahead */
1235 DRBD_C_DELAY_TARGET_DEF, /* c_delay_target */
1236 DRBD_C_FILL_TARGET_DEF, /* c_fill_target */
1237 DRBD_C_MAX_RATE_DEF, /* c_max_rate */
1238 DRBD_C_MIN_RATE_DEF, /* c_min_rate */
1239 0, /* no_disk_barrier */
1240 0, /* no_disk_flush */
1241 0, /* no_disk_drain */
1242 0, /* no_md_flush */
1243 };
1244
1245 err = disk_conf_from_attrs(&nbc->dc, info);
3b98c0c2 1246 if (err) {
b411b363 1247 retcode = ERR_MANDATORY_TAG;
3b98c0c2 1248 drbd_msg_put_info(from_attrs_err_to_txt(err));
b411b363
PR
1249 goto fail;
1250 }
1251
3b98c0c2 1252 if ((int)nbc->dc.meta_dev_idx < DRBD_MD_INDEX_FLEX_INT) {
b411b363
PR
1253 retcode = ERR_MD_IDX_INVALID;
1254 goto fail;
1255 }
1256
b2fb6dbe 1257 if (get_net_conf(mdev->tconn)) {
89e58e75 1258 int prot = mdev->tconn->net_conf->wire_protocol;
b2fb6dbe 1259 put_net_conf(mdev->tconn);
47ff2d0a
PR
1260 if (nbc->dc.fencing == FP_STONITH && prot == DRBD_PROT_A) {
1261 retcode = ERR_STONITH_AND_PROT_A;
1262 goto fail;
1263 }
1264 }
1265
d4d77629
TH
1266 bdev = blkdev_get_by_path(nbc->dc.backing_dev,
1267 FMODE_READ | FMODE_WRITE | FMODE_EXCL, mdev);
e525fd89 1268 if (IS_ERR(bdev)) {
b411b363 1269 dev_err(DEV, "open(\"%s\") failed with %ld\n", nbc->dc.backing_dev,
e525fd89 1270 PTR_ERR(bdev));
b411b363
PR
1271 retcode = ERR_OPEN_DISK;
1272 goto fail;
1273 }
e525fd89
TH
1274 nbc->backing_bdev = bdev;
1275
1276 /*
1277 * meta_dev_idx >= 0: external fixed size, possibly multiple
1278 * drbd sharing one meta device. TODO in that case, paranoia
1279 * check that [md_bdev, meta_dev_idx] is not yet used by some
1280 * other drbd minor! (if you use drbd.conf + drbdadm, that
1281 * should check it for you already; but if you don't, or
1282 * someone fooled it, we need to double check here)
1283 */
d4d77629
TH
1284 bdev = blkdev_get_by_path(nbc->dc.meta_dev,
1285 FMODE_READ | FMODE_WRITE | FMODE_EXCL,
3b98c0c2 1286 ((int)nbc->dc.meta_dev_idx < 0) ?
d4d77629 1287 (void *)mdev : (void *)drbd_m_holder);
e525fd89 1288 if (IS_ERR(bdev)) {
b411b363 1289 dev_err(DEV, "open(\"%s\") failed with %ld\n", nbc->dc.meta_dev,
e525fd89 1290 PTR_ERR(bdev));
b411b363
PR
1291 retcode = ERR_OPEN_MD_DISK;
1292 goto fail;
1293 }
e525fd89 1294 nbc->md_bdev = bdev;
b411b363 1295
e525fd89
TH
1296 if ((nbc->backing_bdev == nbc->md_bdev) !=
1297 (nbc->dc.meta_dev_idx == DRBD_MD_INDEX_INTERNAL ||
1298 nbc->dc.meta_dev_idx == DRBD_MD_INDEX_FLEX_INT)) {
1299 retcode = ERR_MD_IDX_INVALID;
b411b363
PR
1300 goto fail;
1301 }
1302
1303 resync_lru = lc_create("resync", drbd_bm_ext_cache,
46a15bc3 1304 1, 61, sizeof(struct bm_extent),
b411b363
PR
1305 offsetof(struct bm_extent, lce));
1306 if (!resync_lru) {
1307 retcode = ERR_NOMEM;
e525fd89 1308 goto fail;
b411b363
PR
1309 }
1310
1311 /* RT - for drbd_get_max_capacity() DRBD_MD_INDEX_FLEX_INT */
1312 drbd_md_set_sector_offsets(mdev, nbc);
1313
1314 if (drbd_get_max_capacity(nbc) < nbc->dc.disk_size) {
1315 dev_err(DEV, "max capacity %llu smaller than disk size %llu\n",
1316 (unsigned long long) drbd_get_max_capacity(nbc),
1317 (unsigned long long) nbc->dc.disk_size);
1318 retcode = ERR_DISK_TO_SMALL;
e525fd89 1319 goto fail;
b411b363
PR
1320 }
1321
3b98c0c2 1322 if ((int)nbc->dc.meta_dev_idx < 0) {
b411b363
PR
1323 max_possible_sectors = DRBD_MAX_SECTORS_FLEX;
1324 /* at least one MB, otherwise it does not make sense */
1325 min_md_device_sectors = (2<<10);
1326 } else {
1327 max_possible_sectors = DRBD_MAX_SECTORS;
1328 min_md_device_sectors = MD_RESERVED_SECT * (nbc->dc.meta_dev_idx + 1);
1329 }
1330
b411b363
PR
1331 if (drbd_get_capacity(nbc->md_bdev) < min_md_device_sectors) {
1332 retcode = ERR_MD_DISK_TO_SMALL;
1333 dev_warn(DEV, "refusing attach: md-device too small, "
1334 "at least %llu sectors needed for this meta-disk type\n",
1335 (unsigned long long) min_md_device_sectors);
e525fd89 1336 goto fail;
b411b363
PR
1337 }
1338
1339 /* Make sure the new disk is big enough
1340 * (we may currently be R_PRIMARY with no local disk...) */
1341 if (drbd_get_max_capacity(nbc) <
1342 drbd_get_capacity(mdev->this_bdev)) {
1343 retcode = ERR_DISK_TO_SMALL;
e525fd89 1344 goto fail;
b411b363
PR
1345 }
1346
1347 nbc->known_size = drbd_get_capacity(nbc->backing_bdev);
1348
1352994b
LE
1349 if (nbc->known_size > max_possible_sectors) {
1350 dev_warn(DEV, "==> truncating very big lower level device "
1351 "to currently maximum possible %llu sectors <==\n",
1352 (unsigned long long) max_possible_sectors);
3b98c0c2 1353 if ((int)nbc->dc.meta_dev_idx >= 0)
1352994b
LE
1354 dev_warn(DEV, "==>> using internal or flexible "
1355 "meta data may help <<==\n");
1356 }
1357
b411b363
PR
1358 drbd_suspend_io(mdev);
1359 /* also wait for the last barrier ack. */
2aebfabb 1360 wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_pending_cnt) || drbd_suspended(mdev));
b411b363 1361 /* and for any other previously queued work */
a21e9298 1362 drbd_flush_workqueue(mdev);
b411b363 1363
f2024e7c
AG
1364 rv = _drbd_request_state(mdev, NS(disk, D_ATTACHING), CS_VERBOSE);
1365 retcode = rv; /* FIXME: Type mismatch. */
b411b363 1366 drbd_resume_io(mdev);
f2024e7c 1367 if (rv < SS_SUCCESS)
e525fd89 1368 goto fail;
b411b363
PR
1369
1370 if (!get_ldev_if_state(mdev, D_ATTACHING))
1371 goto force_diskless;
1372
1373 drbd_md_set_sector_offsets(mdev, nbc);
1374
1375 if (!mdev->bitmap) {
1376 if (drbd_bm_init(mdev)) {
1377 retcode = ERR_NOMEM;
1378 goto force_diskless_dec;
1379 }
1380 }
1381
1382 retcode = drbd_md_read(mdev, nbc);
1383 if (retcode != NO_ERROR)
1384 goto force_diskless_dec;
1385
1386 if (mdev->state.conn < C_CONNECTED &&
1387 mdev->state.role == R_PRIMARY &&
1388 (mdev->ed_uuid & ~((u64)1)) != (nbc->md.uuid[UI_CURRENT] & ~((u64)1))) {
1389 dev_err(DEV, "Can only attach to data with current UUID=%016llX\n",
1390 (unsigned long long)mdev->ed_uuid);
1391 retcode = ERR_DATA_NOT_CURRENT;
1392 goto force_diskless_dec;
1393 }
1394
1395 /* Since we are diskless, fix the activity log first... */
f399002e 1396 if (drbd_check_al_size(mdev, &nbc->dc)) {
b411b363
PR
1397 retcode = ERR_NOMEM;
1398 goto force_diskless_dec;
1399 }
1400
1401 /* Prevent shrinking of consistent devices ! */
1402 if (drbd_md_test_flag(nbc, MDF_CONSISTENT) &&
a393db6f 1403 drbd_new_dev_size(mdev, nbc, 0) < nbc->md.la_size_sect) {
b411b363
PR
1404 dev_warn(DEV, "refusing to truncate a consistent device\n");
1405 retcode = ERR_DISK_TO_SMALL;
1406 goto force_diskless_dec;
1407 }
1408
1409 if (!drbd_al_read_log(mdev, nbc)) {
1410 retcode = ERR_IO_MD_DISK;
1411 goto force_diskless_dec;
1412 }
1413
b411b363
PR
1414 /* Reset the "barriers don't work" bits here, then force meta data to
1415 * be written, to ensure we determine if barriers are supported. */
1416 if (nbc->dc.no_md_flush)
a8a4e51e 1417 set_bit(MD_NO_FUA, &mdev->flags);
b411b363 1418 else
a8a4e51e 1419 clear_bit(MD_NO_FUA, &mdev->flags);
b411b363
PR
1420
1421 /* Point of no return reached.
1422 * Devices and memory are no longer released by error cleanup below.
1423 * now mdev takes over responsibility, and the state engine should
1424 * clean it up somewhere. */
1425 D_ASSERT(mdev->ldev == NULL);
1426 mdev->ldev = nbc;
1427 mdev->resync = resync_lru;
1428 nbc = NULL;
1429 resync_lru = NULL;
1430
2451fc3b
PR
1431 mdev->write_ordering = WO_bdev_flush;
1432 drbd_bump_write_ordering(mdev, WO_bdev_flush);
b411b363
PR
1433
1434 if (drbd_md_test_flag(mdev->ldev, MDF_CRASHED_PRIMARY))
1435 set_bit(CRASHED_PRIMARY, &mdev->flags);
1436 else
1437 clear_bit(CRASHED_PRIMARY, &mdev->flags);
1438
894c6a94 1439 if (drbd_md_test_flag(mdev->ldev, MDF_PRIMARY_IND) &&
da9fbc27 1440 !(mdev->state.role == R_PRIMARY && mdev->tconn->susp_nod)) {
b411b363
PR
1441 set_bit(CRASHED_PRIMARY, &mdev->flags);
1442 cp_discovered = 1;
1443 }
1444
1445 mdev->send_cnt = 0;
1446 mdev->recv_cnt = 0;
1447 mdev->read_cnt = 0;
1448 mdev->writ_cnt = 0;
1449
99432fcc 1450 drbd_reconsider_max_bio_size(mdev);
b411b363
PR
1451
1452 /* If I am currently not R_PRIMARY,
1453 * but meta data primary indicator is set,
1454 * I just now recover from a hard crash,
1455 * and have been R_PRIMARY before that crash.
1456 *
1457 * Now, if I had no connection before that crash
1458 * (have been degraded R_PRIMARY), chances are that
1459 * I won't find my peer now either.
1460 *
1461 * In that case, and _only_ in that case,
1462 * we use the degr-wfc-timeout instead of the default,
1463 * so we can automatically recover from a crash of a
1464 * degraded but active "cluster" after a certain timeout.
1465 */
1466 clear_bit(USE_DEGR_WFC_T, &mdev->flags);
1467 if (mdev->state.role != R_PRIMARY &&
1468 drbd_md_test_flag(mdev->ldev, MDF_PRIMARY_IND) &&
1469 !drbd_md_test_flag(mdev->ldev, MDF_CONNECTED_IND))
1470 set_bit(USE_DEGR_WFC_T, &mdev->flags);
1471
24c4830c 1472 dd = drbd_determine_dev_size(mdev, 0);
b411b363
PR
1473 if (dd == dev_size_error) {
1474 retcode = ERR_NOMEM_BITMAP;
1475 goto force_diskless_dec;
1476 } else if (dd == grew)
1477 set_bit(RESYNC_AFTER_NEG, &mdev->flags);
1478
1479 if (drbd_md_test_flag(mdev->ldev, MDF_FULL_SYNC)) {
1480 dev_info(DEV, "Assuming that all blocks are out of sync "
1481 "(aka FullSync)\n");
20ceb2b2
LE
1482 if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write,
1483 "set_n_write from attaching", BM_LOCKED_MASK)) {
b411b363
PR
1484 retcode = ERR_IO_MD_DISK;
1485 goto force_diskless_dec;
1486 }
1487 } else {
20ceb2b2 1488 if (drbd_bitmap_io(mdev, &drbd_bm_read,
22ab6a30 1489 "read from attaching", BM_LOCKED_MASK)) {
b411b363
PR
1490 retcode = ERR_IO_MD_DISK;
1491 goto force_diskless_dec;
1492 }
1493 }
1494
1495 if (cp_discovered) {
1496 drbd_al_apply_to_bm(mdev);
20ceb2b2
LE
1497 if (drbd_bitmap_io(mdev, &drbd_bm_write,
1498 "crashed primary apply AL", BM_LOCKED_MASK)) {
19f843aa
LE
1499 retcode = ERR_IO_MD_DISK;
1500 goto force_diskless_dec;
1501 }
b411b363
PR
1502 }
1503
0778286a
PR
1504 if (_drbd_bm_total_weight(mdev) == drbd_bm_bits(mdev))
1505 drbd_suspend_al(mdev); /* IO is still suspended here... */
1506
87eeee41 1507 spin_lock_irq(&mdev->tconn->req_lock);
78bae59b
PR
1508 os = drbd_read_state(mdev);
1509 ns = os;
b411b363
PR
1510 /* If MDF_CONSISTENT is not set go into inconsistent state,
1511 otherwise investigate MDF_WasUpToDate...
1512 If MDF_WAS_UP_TO_DATE is not set go into D_OUTDATED disk state,
1513 otherwise into D_CONSISTENT state.
1514 */
1515 if (drbd_md_test_flag(mdev->ldev, MDF_CONSISTENT)) {
1516 if (drbd_md_test_flag(mdev->ldev, MDF_WAS_UP_TO_DATE))
1517 ns.disk = D_CONSISTENT;
1518 else
1519 ns.disk = D_OUTDATED;
1520 } else {
1521 ns.disk = D_INCONSISTENT;
1522 }
1523
1524 if (drbd_md_test_flag(mdev->ldev, MDF_PEER_OUT_DATED))
1525 ns.pdsk = D_OUTDATED;
1526
1527 if ( ns.disk == D_CONSISTENT &&
1528 (ns.pdsk == D_OUTDATED || mdev->ldev->dc.fencing == FP_DONT_CARE))
1529 ns.disk = D_UP_TO_DATE;
1530
1531 /* All tests on MDF_PRIMARY_IND, MDF_CONNECTED_IND,
1532 MDF_CONSISTENT and MDF_WAS_UP_TO_DATE must happen before
1533 this point, because drbd_request_state() modifies these
1534 flags. */
1535
1536 /* In case we are C_CONNECTED postpone any decision on the new disk
1537 state after the negotiation phase. */
1538 if (mdev->state.conn == C_CONNECTED) {
1539 mdev->new_state_tmp.i = ns.i;
1540 ns.i = os.i;
1541 ns.disk = D_NEGOTIATING;
dc66c74d
PR
1542
1543 /* We expect to receive up-to-date UUIDs soon.
1544 To avoid a race in receive_state, free p_uuid while
1545 holding req_lock. I.e. atomic with the state change */
1546 kfree(mdev->p_uuid);
1547 mdev->p_uuid = NULL;
b411b363
PR
1548 }
1549
1550 rv = _drbd_set_state(mdev, ns, CS_VERBOSE, NULL);
87eeee41 1551 spin_unlock_irq(&mdev->tconn->req_lock);
b411b363
PR
1552
1553 if (rv < SS_SUCCESS)
1554 goto force_diskless_dec;
1555
1556 if (mdev->state.role == R_PRIMARY)
1557 mdev->ldev->md.uuid[UI_CURRENT] |= (u64)1;
1558 else
1559 mdev->ldev->md.uuid[UI_CURRENT] &= ~(u64)1;
1560
1561 drbd_md_mark_dirty(mdev);
1562 drbd_md_sync(mdev);
1563
1564 kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE);
1565 put_ldev(mdev);
0e29d163 1566 conn_reconfig_done(mdev->tconn);
3b98c0c2 1567 drbd_adm_finish(info, retcode);
b411b363
PR
1568 return 0;
1569
1570 force_diskless_dec:
1571 put_ldev(mdev);
1572 force_diskless:
82f59cc6 1573 drbd_force_state(mdev, NS(disk, D_FAILED));
b411b363 1574 drbd_md_sync(mdev);
b411b363 1575 fail:
40cbf085 1576 conn_reconfig_done(mdev->tconn);
b411b363 1577 if (nbc) {
e525fd89
TH
1578 if (nbc->backing_bdev)
1579 blkdev_put(nbc->backing_bdev,
1580 FMODE_READ | FMODE_WRITE | FMODE_EXCL);
1581 if (nbc->md_bdev)
1582 blkdev_put(nbc->md_bdev,
1583 FMODE_READ | FMODE_WRITE | FMODE_EXCL);
b411b363
PR
1584 kfree(nbc);
1585 }
1586 lc_destroy(resync_lru);
1587
40cbf085 1588 finish:
3b98c0c2 1589 drbd_adm_finish(info, retcode);
b411b363
PR
1590 return 0;
1591}
1592
85f75dd7
LE
1593static int adm_detach(struct drbd_conf *mdev)
1594{
19f83c76 1595 enum drbd_state_rv retcode;
85f75dd7
LE
1596 drbd_suspend_io(mdev); /* so no-one is stuck in drbd_al_begin_io */
1597 retcode = drbd_request_state(mdev, NS(disk, D_DISKLESS));
1598 wait_event(mdev->misc_wait,
1599 mdev->state.disk != D_DISKLESS ||
1600 !atomic_read(&mdev->local_cnt));
1601 drbd_resume_io(mdev);
1602 return retcode;
1603}
1604
82f59cc6
LE
1605/* Detaching the disk is a process in multiple stages. First we need to lock
1606 * out application IO, in-flight IO, IO stuck in drbd_al_begin_io.
1607 * Then we transition to D_DISKLESS, and wait for put_ldev() to return all
1608 * internal references as well.
1609 * Only then we have finally detached. */
3b98c0c2 1610int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info)
b411b363 1611{
9a0d9d03 1612 enum drbd_ret_code retcode;
3b98c0c2
LE
1613
1614 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
1615 if (!adm_ctx.reply_skb)
1616 return retcode;
1617 if (retcode != NO_ERROR)
1618 goto out;
1619
85f75dd7 1620 retcode = adm_detach(adm_ctx.mdev);
3b98c0c2
LE
1621out:
1622 drbd_adm_finish(info, retcode);
b411b363
PR
1623 return 0;
1624}
1625
f399002e
LE
1626static bool conn_resync_running(struct drbd_tconn *tconn)
1627{
1628 struct drbd_conf *mdev;
695d08fa 1629 bool rv = false;
f399002e
LE
1630 int vnr;
1631
695d08fa 1632 rcu_read_lock();
f399002e
LE
1633 idr_for_each_entry(&tconn->volumes, mdev, vnr) {
1634 if (mdev->state.conn == C_SYNC_SOURCE ||
1635 mdev->state.conn == C_SYNC_TARGET ||
1636 mdev->state.conn == C_PAUSED_SYNC_S ||
695d08fa
PR
1637 mdev->state.conn == C_PAUSED_SYNC_T) {
1638 rv = true;
1639 break;
1640 }
f399002e 1641 }
695d08fa
PR
1642 rcu_read_unlock();
1643
1644 return rv;
f399002e
LE
1645}
1646
1647static bool conn_ov_running(struct drbd_tconn *tconn)
1648{
1649 struct drbd_conf *mdev;
695d08fa 1650 bool rv = false;
f399002e
LE
1651 int vnr;
1652
695d08fa 1653 rcu_read_lock();
f399002e
LE
1654 idr_for_each_entry(&tconn->volumes, mdev, vnr) {
1655 if (mdev->state.conn == C_VERIFY_S ||
695d08fa
PR
1656 mdev->state.conn == C_VERIFY_T) {
1657 rv = true;
1658 break;
1659 }
f399002e 1660 }
695d08fa
PR
1661 rcu_read_unlock();
1662
1663 return rv;
f399002e
LE
1664}
1665
1666int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info)
1667{
1668 enum drbd_ret_code retcode;
1669 struct drbd_tconn *tconn;
1670 struct net_conf *new_conf = NULL;
1671 int err;
1672 int ovr; /* online verify running */
1673 int rsr; /* re-sync running */
1674 struct crypto_hash *verify_tfm = NULL;
1675 struct crypto_hash *csums_tfm = NULL;
1676
1677
1678 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONN);
1679 if (!adm_ctx.reply_skb)
1680 return retcode;
1681 if (retcode != NO_ERROR)
1682 goto out;
1683
1684 tconn = adm_ctx.tconn;
1685
1686 new_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL);
1687 if (!new_conf) {
1688 retcode = ERR_NOMEM;
1689 goto out;
1690 }
1691
1692 /* we also need a net config
1693 * to change the options on */
1694 if (!get_net_conf(tconn)) {
1695 drbd_msg_put_info("net conf missing, try connect");
1696 retcode = ERR_INVALID_REQUEST;
1697 goto out;
1698 }
1699
1700 conn_reconfig_start(tconn);
1701
1702 memcpy(new_conf, tconn->net_conf, sizeof(*new_conf));
1703 err = net_conf_from_attrs_for_change(new_conf, info);
1704 if (err) {
1705 retcode = ERR_MANDATORY_TAG;
1706 drbd_msg_put_info(from_attrs_err_to_txt(err));
1707 goto fail;
1708 }
1709
1710 /* re-sync running */
1711 rsr = conn_resync_running(tconn);
1712 if (rsr && strcmp(new_conf->csums_alg, tconn->net_conf->csums_alg)) {
1713 retcode = ERR_CSUMS_RESYNC_RUNNING;
1714 goto fail;
1715 }
1716
1717 if (!rsr && new_conf->csums_alg[0]) {
1718 csums_tfm = crypto_alloc_hash(new_conf->csums_alg, 0, CRYPTO_ALG_ASYNC);
1719 if (IS_ERR(csums_tfm)) {
1720 csums_tfm = NULL;
1721 retcode = ERR_CSUMS_ALG;
1722 goto fail;
1723 }
1724
1725 if (!drbd_crypto_is_hash(crypto_hash_tfm(csums_tfm))) {
1726 retcode = ERR_CSUMS_ALG_ND;
1727 goto fail;
1728 }
1729 }
1730
1731 /* online verify running */
1732 ovr = conn_ov_running(tconn);
1733 if (ovr) {
1734 if (strcmp(new_conf->verify_alg, tconn->net_conf->verify_alg)) {
1735 retcode = ERR_VERIFY_RUNNING;
1736 goto fail;
1737 }
1738 }
1739
1740 if (!ovr && new_conf->verify_alg[0]) {
1741 verify_tfm = crypto_alloc_hash(new_conf->verify_alg, 0, CRYPTO_ALG_ASYNC);
1742 if (IS_ERR(verify_tfm)) {
1743 verify_tfm = NULL;
1744 retcode = ERR_VERIFY_ALG;
1745 goto fail;
1746 }
1747
1748 if (!drbd_crypto_is_hash(crypto_hash_tfm(verify_tfm))) {
1749 retcode = ERR_VERIFY_ALG_ND;
1750 goto fail;
1751 }
1752 }
1753
1754
1755 /* For now, use struct assignment, not pointer assignment.
1756 * We don't have any means to determine who might still
1757 * keep a local alias into the struct,
1758 * so we cannot just free it and hope for the best :(
1759 * FIXME
1760 * To avoid someone looking at a half-updated struct, we probably
1761 * should have a rw-semaphor on net_conf and disk_conf.
1762 */
1763 *tconn->net_conf = *new_conf;
1764
1765 if (!rsr) {
1766 crypto_free_hash(tconn->csums_tfm);
1767 tconn->csums_tfm = csums_tfm;
1768 csums_tfm = NULL;
1769 }
1770 if (!ovr) {
1771 crypto_free_hash(tconn->verify_tfm);
1772 tconn->verify_tfm = verify_tfm;
1773 verify_tfm = NULL;
1774 }
1775
1776 if (tconn->cstate >= C_WF_REPORT_PARAMS)
1777 drbd_send_sync_param(minor_to_mdev(conn_lowest_minor(tconn)));
1778
1779 fail:
1780 crypto_free_hash(csums_tfm);
1781 crypto_free_hash(verify_tfm);
1782 kfree(new_conf);
1783 put_net_conf(tconn);
1784 conn_reconfig_done(tconn);
1785 out:
1786 drbd_adm_finish(info, retcode);
1787 return 0;
1788}
1789
3b98c0c2 1790int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
b411b363 1791{
3b98c0c2
LE
1792 char hmac_name[CRYPTO_MAX_ALG_NAME];
1793 struct drbd_conf *mdev;
b411b363
PR
1794 struct net_conf *new_conf = NULL;
1795 struct crypto_hash *tfm = NULL;
1796 struct crypto_hash *integrity_w_tfm = NULL;
1797 struct crypto_hash *integrity_r_tfm = NULL;
b411b363
PR
1798 void *int_dig_in = NULL;
1799 void *int_dig_vv = NULL;
80883197 1800 struct drbd_tconn *oconn;
3b98c0c2 1801 struct drbd_tconn *tconn;
b411b363 1802 struct sockaddr *new_my_addr, *new_peer_addr, *taken_addr;
3b98c0c2
LE
1803 enum drbd_ret_code retcode;
1804 int i;
1805 int err;
b411b363 1806
3b98c0c2
LE
1807 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONN);
1808 if (!adm_ctx.reply_skb)
1809 return retcode;
1810 if (retcode != NO_ERROR)
1811 goto out;
1812
1813 tconn = adm_ctx.tconn;
80883197 1814 conn_reconfig_start(tconn);
b411b363 1815
80883197 1816 if (tconn->cstate > C_STANDALONE) {
b411b363
PR
1817 retcode = ERR_NET_CONFIGURED;
1818 goto fail;
1819 }
1820
1821 /* allocation not in the IO path, cqueue thread context */
f399002e 1822 new_conf = kmalloc(sizeof(struct net_conf), GFP_KERNEL);
b411b363
PR
1823 if (!new_conf) {
1824 retcode = ERR_NOMEM;
1825 goto fail;
1826 }
1827
f399002e
LE
1828 *new_conf = (struct net_conf) {
1829 {}, 0, /* my_addr */
1830 {}, 0, /* peer_addr */
1831 {}, 0, /* shared_secret */
1832 {}, 0, /* cram_hmac_alg */
1833 {}, 0, /* integrity_alg */
1834 {}, 0, /* verify_alg */
1835 {}, 0, /* csums_alg */
1836 DRBD_PROTOCOL_DEF, /* wire_protocol */
1837 DRBD_CONNECT_INT_DEF, /* try_connect_int */
1838 DRBD_TIMEOUT_DEF, /* timeout */
1839 DRBD_PING_INT_DEF, /* ping_int */
1840 DRBD_PING_TIMEO_DEF, /* ping_timeo */
1841 DRBD_SNDBUF_SIZE_DEF, /* sndbuf_size */
1842 DRBD_RCVBUF_SIZE_DEF, /* rcvbuf_size */
1843 DRBD_KO_COUNT_DEF, /* ko_count */
1844 DRBD_MAX_BUFFERS_DEF, /* max_buffers */
1845 DRBD_MAX_EPOCH_SIZE_DEF, /* max_epoch_size */
1846 DRBD_UNPLUG_WATERMARK_DEF, /* unplug_watermark */
1847 DRBD_AFTER_SB_0P_DEF, /* after_sb_0p */
1848 DRBD_AFTER_SB_1P_DEF, /* after_sb_1p */
1849 DRBD_AFTER_SB_2P_DEF, /* after_sb_2p */
1850 DRBD_RR_CONFLICT_DEF, /* rr_conflict */
1851 DRBD_ON_CONGESTION_DEF, /* on_congestion */
1852 DRBD_CONG_FILL_DEF, /* cong_fill */
1853 DRBD_CONG_EXTENTS_DEF, /* cong_extents */
1854 0, /* two_primaries */
1855 0, /* want_lose */
1856 0, /* no_cork */
1857 0, /* always_asbp */
1858 0, /* dry_run */
1859 0, /* use_rle */
1860 };
1861
1862 err = net_conf_from_attrs(new_conf, info);
3b98c0c2 1863 if (err) {
b411b363 1864 retcode = ERR_MANDATORY_TAG;
3b98c0c2 1865 drbd_msg_put_info(from_attrs_err_to_txt(err));
b411b363
PR
1866 goto fail;
1867 }
1868
1869 if (new_conf->two_primaries
1870 && (new_conf->wire_protocol != DRBD_PROT_C)) {
1871 retcode = ERR_NOT_PROTO_C;
1872 goto fail;
47ff2d0a
PR
1873 }
1874
695d08fa 1875 rcu_read_lock();
80883197
PR
1876 idr_for_each_entry(&tconn->volumes, mdev, i) {
1877 if (get_ldev(mdev)) {
1878 enum drbd_fencing_p fp = mdev->ldev->dc.fencing;
1879 put_ldev(mdev);
1880 if (new_conf->wire_protocol == DRBD_PROT_A && fp == FP_STONITH) {
1881 retcode = ERR_STONITH_AND_PROT_A;
695d08fa 1882 goto fail_rcu_unlock;
80883197
PR
1883 }
1884 }
1885 if (mdev->state.role == R_PRIMARY && new_conf->want_lose) {
1886 retcode = ERR_DISCARD;
695d08fa 1887 goto fail_rcu_unlock;
47ff2d0a 1888 }
80883197
PR
1889 if (!mdev->bitmap) {
1890 if(drbd_bm_init(mdev)) {
1891 retcode = ERR_NOMEM;
695d08fa 1892 goto fail_rcu_unlock;
80883197
PR
1893 }
1894 }
47ff2d0a 1895 }
695d08fa 1896 rcu_read_unlock();
b411b363 1897
422028b1
PR
1898 if (new_conf->on_congestion != OC_BLOCK && new_conf->wire_protocol != DRBD_PROT_A) {
1899 retcode = ERR_CONG_NOT_PROTO_A;
1900 goto fail;
1901 }
1902
b411b363
PR
1903 retcode = NO_ERROR;
1904
1905 new_my_addr = (struct sockaddr *)&new_conf->my_addr;
1906 new_peer_addr = (struct sockaddr *)&new_conf->peer_addr;
543cc10b 1907
ef356262 1908 /* No need to take drbd_cfg_rwsem here. All reconfiguration is
543cc10b
LE
1909 * strictly serialized on genl_lock(). We are protected against
1910 * concurrent reconfiguration/addition/deletion */
80883197
PR
1911 list_for_each_entry(oconn, &drbd_tconns, all_tconn) {
1912 if (oconn == tconn)
b411b363 1913 continue;
80883197
PR
1914 if (get_net_conf(oconn)) {
1915 taken_addr = (struct sockaddr *)&oconn->net_conf->my_addr;
1916 if (new_conf->my_addr_len == oconn->net_conf->my_addr_len &&
b411b363
PR
1917 !memcmp(new_my_addr, taken_addr, new_conf->my_addr_len))
1918 retcode = ERR_LOCAL_ADDR;
1919
80883197
PR
1920 taken_addr = (struct sockaddr *)&oconn->net_conf->peer_addr;
1921 if (new_conf->peer_addr_len == oconn->net_conf->peer_addr_len &&
b411b363
PR
1922 !memcmp(new_peer_addr, taken_addr, new_conf->peer_addr_len))
1923 retcode = ERR_PEER_ADDR;
1924
80883197 1925 put_net_conf(oconn);
b411b363
PR
1926 if (retcode != NO_ERROR)
1927 goto fail;
1928 }
1929 }
1930
1931 if (new_conf->cram_hmac_alg[0] != 0) {
1932 snprintf(hmac_name, CRYPTO_MAX_ALG_NAME, "hmac(%s)",
1933 new_conf->cram_hmac_alg);
1934 tfm = crypto_alloc_hash(hmac_name, 0, CRYPTO_ALG_ASYNC);
1935 if (IS_ERR(tfm)) {
1936 tfm = NULL;
1937 retcode = ERR_AUTH_ALG;
1938 goto fail;
1939 }
1940
0798219f 1941 if (!drbd_crypto_is_hash(crypto_hash_tfm(tfm))) {
b411b363
PR
1942 retcode = ERR_AUTH_ALG_ND;
1943 goto fail;
1944 }
1945 }
1946
1947 if (new_conf->integrity_alg[0]) {
1948 integrity_w_tfm = crypto_alloc_hash(new_conf->integrity_alg, 0, CRYPTO_ALG_ASYNC);
1949 if (IS_ERR(integrity_w_tfm)) {
1950 integrity_w_tfm = NULL;
1951 retcode=ERR_INTEGRITY_ALG;
1952 goto fail;
1953 }
1954
1955 if (!drbd_crypto_is_hash(crypto_hash_tfm(integrity_w_tfm))) {
1956 retcode=ERR_INTEGRITY_ALG_ND;
1957 goto fail;
1958 }
1959
1960 integrity_r_tfm = crypto_alloc_hash(new_conf->integrity_alg, 0, CRYPTO_ALG_ASYNC);
1961 if (IS_ERR(integrity_r_tfm)) {
1962 integrity_r_tfm = NULL;
1963 retcode=ERR_INTEGRITY_ALG;
1964 goto fail;
1965 }
1966 }
1967
b411b363
PR
1968 ((char *)new_conf->shared_secret)[SHARED_SECRET_MAX-1] = 0;
1969
80883197 1970 /* allocation not in the IO path, cqueue thread context */
b411b363
PR
1971 if (integrity_w_tfm) {
1972 i = crypto_hash_digestsize(integrity_w_tfm);
b411b363
PR
1973 int_dig_in = kmalloc(i, GFP_KERNEL);
1974 if (!int_dig_in) {
1975 retcode = ERR_NOMEM;
1976 goto fail;
1977 }
1978 int_dig_vv = kmalloc(i, GFP_KERNEL);
1979 if (!int_dig_vv) {
1980 retcode = ERR_NOMEM;
1981 goto fail;
1982 }
1983 }
1984
80883197
PR
1985 conn_flush_workqueue(tconn);
1986 spin_lock_irq(&tconn->req_lock);
1987 if (tconn->net_conf != NULL) {
b411b363 1988 retcode = ERR_NET_CONFIGURED;
80883197 1989 spin_unlock_irq(&tconn->req_lock);
b411b363
PR
1990 goto fail;
1991 }
80883197 1992 tconn->net_conf = new_conf;
b411b363 1993
80883197
PR
1994 crypto_free_hash(tconn->cram_hmac_tfm);
1995 tconn->cram_hmac_tfm = tfm;
b411b363 1996
80883197
PR
1997 crypto_free_hash(tconn->integrity_w_tfm);
1998 tconn->integrity_w_tfm = integrity_w_tfm;
b411b363 1999
80883197
PR
2000 crypto_free_hash(tconn->integrity_r_tfm);
2001 tconn->integrity_r_tfm = integrity_r_tfm;
b411b363 2002
80883197
PR
2003 kfree(tconn->int_dig_in);
2004 kfree(tconn->int_dig_vv);
80883197
PR
2005 tconn->int_dig_in=int_dig_in;
2006 tconn->int_dig_vv=int_dig_vv;
2007 retcode = _conn_request_state(tconn, NS(conn, C_UNCONNECTED), CS_VERBOSE);
2008 spin_unlock_irq(&tconn->req_lock);
b411b363 2009
695d08fa 2010 rcu_read_lock();
80883197
PR
2011 idr_for_each_entry(&tconn->volumes, mdev, i) {
2012 mdev->send_cnt = 0;
2013 mdev->recv_cnt = 0;
2014 kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE);
2015 }
695d08fa 2016 rcu_read_unlock();
80883197 2017 conn_reconfig_done(tconn);
3b98c0c2 2018 drbd_adm_finish(info, retcode);
b411b363
PR
2019 return 0;
2020
695d08fa
PR
2021fail_rcu_unlock:
2022 rcu_read_unlock();
b411b363 2023fail:
b411b363
PR
2024 kfree(int_dig_in);
2025 kfree(int_dig_vv);
2026 crypto_free_hash(tfm);
2027 crypto_free_hash(integrity_w_tfm);
2028 crypto_free_hash(integrity_r_tfm);
b411b363
PR
2029 kfree(new_conf);
2030
80883197 2031 conn_reconfig_done(tconn);
3b98c0c2
LE
2032out:
2033 drbd_adm_finish(info, retcode);
b411b363
PR
2034 return 0;
2035}
2036
85f75dd7
LE
2037static enum drbd_state_rv conn_try_disconnect(struct drbd_tconn *tconn, bool force)
2038{
2039 enum drbd_state_rv rv;
2040 if (force) {
2041 spin_lock_irq(&tconn->req_lock);
2042 if (tconn->cstate >= C_WF_CONNECTION)
2043 _conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
2044 spin_unlock_irq(&tconn->req_lock);
2045 return SS_SUCCESS;
2046 }
2047
2048 rv = conn_request_state(tconn, NS(conn, C_DISCONNECTING), 0);
2049
2050 switch (rv) {
2051 case SS_NOTHING_TO_DO:
2052 case SS_ALREADY_STANDALONE:
2053 return SS_SUCCESS;
2054 case SS_PRIMARY_NOP:
2055 /* Our state checking code wants to see the peer outdated. */
2056 rv = conn_request_state(tconn, NS2(conn, C_DISCONNECTING,
2057 pdsk, D_OUTDATED), CS_VERBOSE);
2058 break;
2059 case SS_CW_FAILED_BY_PEER:
2060 /* The peer probably wants to see us outdated. */
2061 rv = conn_request_state(tconn, NS2(conn, C_DISCONNECTING,
2062 disk, D_OUTDATED), 0);
2063 if (rv == SS_IS_DISKLESS || rv == SS_LOWER_THAN_OUTDATED) {
2064 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
2065 rv = SS_SUCCESS;
2066 }
2067 break;
2068 default:;
2069 /* no special handling necessary */
2070 }
2071
2072 return rv;
2073}
2074
3b98c0c2 2075int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info)
b411b363 2076{
3b98c0c2
LE
2077 struct disconnect_parms parms;
2078 struct drbd_tconn *tconn;
85f75dd7 2079 enum drbd_state_rv rv;
3b98c0c2
LE
2080 enum drbd_ret_code retcode;
2081 int err;
2561b9c1 2082
3b98c0c2
LE
2083 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONN);
2084 if (!adm_ctx.reply_skb)
2085 return retcode;
2086 if (retcode != NO_ERROR)
2561b9c1 2087 goto fail;
3b98c0c2
LE
2088
2089 tconn = adm_ctx.tconn;
2090 memset(&parms, 0, sizeof(parms));
2091 if (info->attrs[DRBD_NLA_DISCONNECT_PARMS]) {
f399002e 2092 err = disconnect_parms_from_attrs(&parms, info);
3b98c0c2
LE
2093 if (err) {
2094 retcode = ERR_MANDATORY_TAG;
2095 drbd_msg_put_info(from_attrs_err_to_txt(err));
2096 goto fail;
2097 }
2561b9c1
PR
2098 }
2099
85f75dd7
LE
2100 rv = conn_try_disconnect(tconn, parms.force_disconnect);
2101 if (rv < SS_SUCCESS)
b411b363
PR
2102 goto fail;
2103
df24aa45
PR
2104 if (wait_event_interruptible(tconn->ping_wait,
2105 tconn->cstate != C_DISCONNECTING)) {
b411b363
PR
2106 /* Do not test for mdev->state.conn == C_STANDALONE, since
2107 someone else might connect us in the mean time! */
2108 retcode = ERR_INTR;
2109 goto fail;
2110 }
2111
b411b363
PR
2112 retcode = NO_ERROR;
2113 fail:
3b98c0c2 2114 drbd_adm_finish(info, retcode);
b411b363
PR
2115 return 0;
2116}
2117
2118void resync_after_online_grow(struct drbd_conf *mdev)
2119{
2120 int iass; /* I am sync source */
2121
2122 dev_info(DEV, "Resync of new storage after online grow\n");
2123 if (mdev->state.role != mdev->state.peer)
2124 iass = (mdev->state.role == R_PRIMARY);
2125 else
25703f83 2126 iass = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags);
b411b363
PR
2127
2128 if (iass)
2129 drbd_start_resync(mdev, C_SYNC_SOURCE);
2130 else
2131 _drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE + CS_SERIALIZE);
2132}
2133
3b98c0c2 2134int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
b411b363 2135{
3b98c0c2
LE
2136 struct resize_parms rs;
2137 struct drbd_conf *mdev;
2138 enum drbd_ret_code retcode;
b411b363 2139 enum determine_dev_size dd;
6495d2c6 2140 enum dds_flags ddsf;
3b98c0c2 2141 int err;
b411b363 2142
3b98c0c2
LE
2143 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
2144 if (!adm_ctx.reply_skb)
2145 return retcode;
2146 if (retcode != NO_ERROR)
b411b363 2147 goto fail;
3b98c0c2
LE
2148
2149 memset(&rs, 0, sizeof(struct resize_parms));
2150 if (info->attrs[DRBD_NLA_RESIZE_PARMS]) {
f399002e 2151 err = resize_parms_from_attrs(&rs, info);
3b98c0c2
LE
2152 if (err) {
2153 retcode = ERR_MANDATORY_TAG;
2154 drbd_msg_put_info(from_attrs_err_to_txt(err));
2155 goto fail;
2156 }
b411b363
PR
2157 }
2158
3b98c0c2 2159 mdev = adm_ctx.mdev;
b411b363
PR
2160 if (mdev->state.conn > C_CONNECTED) {
2161 retcode = ERR_RESIZE_RESYNC;
2162 goto fail;
2163 }
2164
2165 if (mdev->state.role == R_SECONDARY &&
2166 mdev->state.peer == R_SECONDARY) {
2167 retcode = ERR_NO_PRIMARY;
2168 goto fail;
2169 }
2170
2171 if (!get_ldev(mdev)) {
2172 retcode = ERR_NO_DISK;
2173 goto fail;
2174 }
2175
31890f4a 2176 if (rs.no_resync && mdev->tconn->agreed_pro_version < 93) {
6495d2c6
PR
2177 retcode = ERR_NEED_APV_93;
2178 goto fail;
2179 }
2180
087c2492 2181 if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev))
b411b363 2182 mdev->ldev->known_size = drbd_get_capacity(mdev->ldev->backing_bdev);
b411b363
PR
2183
2184 mdev->ldev->dc.disk_size = (sector_t)rs.resize_size;
6495d2c6 2185 ddsf = (rs.resize_force ? DDSF_FORCED : 0) | (rs.no_resync ? DDSF_NO_RESYNC : 0);
24c4830c 2186 dd = drbd_determine_dev_size(mdev, ddsf);
b411b363
PR
2187 drbd_md_sync(mdev);
2188 put_ldev(mdev);
2189 if (dd == dev_size_error) {
2190 retcode = ERR_NOMEM_BITMAP;
2191 goto fail;
2192 }
2193
087c2492 2194 if (mdev->state.conn == C_CONNECTED) {
b411b363
PR
2195 if (dd == grew)
2196 set_bit(RESIZE_PENDING, &mdev->flags);
2197
2198 drbd_send_uuids(mdev);
6495d2c6 2199 drbd_send_sizes(mdev, 1, ddsf);
b411b363
PR
2200 }
2201
2202 fail:
3b98c0c2 2203 drbd_adm_finish(info, retcode);
b411b363
PR
2204 return 0;
2205}
2206
f399002e 2207int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info)
b411b363 2208{
3b98c0c2 2209 enum drbd_ret_code retcode;
b411b363 2210 cpumask_var_t new_cpu_mask;
f399002e 2211 struct drbd_tconn *tconn;
778f271d 2212 int *rs_plan_s = NULL;
f399002e
LE
2213 struct res_opts sc;
2214 int err;
b411b363 2215
f399002e 2216 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONN);
3b98c0c2
LE
2217 if (!adm_ctx.reply_skb)
2218 return retcode;
2219 if (retcode != NO_ERROR)
2220 goto fail;
f399002e 2221 tconn = adm_ctx.tconn;
3b98c0c2 2222
b411b363
PR
2223 if (!zalloc_cpumask_var(&new_cpu_mask, GFP_KERNEL)) {
2224 retcode = ERR_NOMEM;
3b98c0c2 2225 drbd_msg_put_info("unable to allocate cpumask");
b411b363
PR
2226 goto fail;
2227 }
2228
3b98c0c2
LE
2229 if (((struct drbd_genlmsghdr*)info->userhdr)->flags
2230 & DRBD_GENL_F_SET_DEFAULTS) {
f399002e 2231 memset(&sc, 0, sizeof(struct res_opts));
265be2d0 2232 sc.on_no_data = DRBD_ON_NO_DATA_DEF;
b411b363 2233 } else
f399002e 2234 sc = tconn->res_opts;
b411b363 2235
f399002e 2236 err = res_opts_from_attrs(&sc, info);
3b98c0c2 2237 if (err) {
b411b363 2238 retcode = ERR_MANDATORY_TAG;
3b98c0c2 2239 drbd_msg_put_info(from_attrs_err_to_txt(err));
b411b363
PR
2240 goto fail;
2241 }
2242
b411b363
PR
2243 /* silently ignore cpu mask on UP kernel */
2244 if (nr_cpu_ids > 1 && sc.cpu_mask[0] != 0) {
2245 err = __bitmap_parse(sc.cpu_mask, 32, 0,
2246 cpumask_bits(new_cpu_mask), nr_cpu_ids);
2247 if (err) {
f399002e 2248 conn_warn(tconn, "__bitmap_parse() failed with %d\n", err);
b411b363
PR
2249 retcode = ERR_CPU_MASK_PARSE;
2250 goto fail;
2251 }
2252 }
2253
b411b363 2254
f399002e 2255 tconn->res_opts = sc;
b411b363 2256
f399002e
LE
2257 if (!cpumask_equal(tconn->cpu_mask, new_cpu_mask)) {
2258 cpumask_copy(tconn->cpu_mask, new_cpu_mask);
2259 drbd_calc_cpu_mask(tconn);
2260 tconn->receiver.reset_cpu_mask = 1;
2261 tconn->asender.reset_cpu_mask = 1;
2262 tconn->worker.reset_cpu_mask = 1;
b411b363
PR
2263 }
2264
b411b363 2265fail:
778f271d 2266 kfree(rs_plan_s);
b411b363 2267 free_cpumask_var(new_cpu_mask);
3b98c0c2
LE
2268
2269 drbd_adm_finish(info, retcode);
b411b363
PR
2270 return 0;
2271}
2272
3b98c0c2 2273int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info)
b411b363 2274{
3b98c0c2
LE
2275 struct drbd_conf *mdev;
2276 int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
2277
2278 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
2279 if (!adm_ctx.reply_skb)
2280 return retcode;
2281 if (retcode != NO_ERROR)
2282 goto out;
2283
2284 mdev = adm_ctx.mdev;
b411b363 2285
194bfb32
LE
2286 /* If there is still bitmap IO pending, probably because of a previous
2287 * resync just being finished, wait for it before requesting a new resync. */
2288 wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
2289
b411b363
PR
2290 retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T), CS_ORDERED);
2291
2292 if (retcode < SS_SUCCESS && retcode != SS_NEED_CONNECTION)
2293 retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T));
2294
2295 while (retcode == SS_NEED_CONNECTION) {
87eeee41 2296 spin_lock_irq(&mdev->tconn->req_lock);
b411b363
PR
2297 if (mdev->state.conn < C_CONNECTED)
2298 retcode = _drbd_set_state(_NS(mdev, disk, D_INCONSISTENT), CS_VERBOSE, NULL);
87eeee41 2299 spin_unlock_irq(&mdev->tconn->req_lock);
b411b363
PR
2300
2301 if (retcode != SS_NEED_CONNECTION)
2302 break;
2303
2304 retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T));
2305 }
2306
3b98c0c2
LE
2307out:
2308 drbd_adm_finish(info, retcode);
b411b363
PR
2309 return 0;
2310}
2311
0778286a
PR
2312static int drbd_bmio_set_susp_al(struct drbd_conf *mdev)
2313{
2314 int rv;
2315
2316 rv = drbd_bmio_set_n_write(mdev);
2317 drbd_suspend_al(mdev);
2318 return rv;
2319}
2320
3b98c0c2
LE
2321static int drbd_adm_simple_request_state(struct sk_buff *skb, struct genl_info *info,
2322 union drbd_state mask, union drbd_state val)
b411b363 2323{
3b98c0c2 2324 enum drbd_ret_code retcode;
194bfb32 2325
3b98c0c2
LE
2326 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
2327 if (!adm_ctx.reply_skb)
2328 return retcode;
2329 if (retcode != NO_ERROR)
2330 goto out;
b411b363 2331
3b98c0c2
LE
2332 retcode = drbd_request_state(adm_ctx.mdev, mask, val);
2333out:
2334 drbd_adm_finish(info, retcode);
b411b363
PR
2335 return 0;
2336}
2337
3b98c0c2 2338int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info)
b411b363 2339{
3b98c0c2
LE
2340 return drbd_adm_simple_request_state(skb, info, NS(conn, C_STARTING_SYNC_S));
2341}
b411b363 2342
3b98c0c2
LE
2343int drbd_adm_pause_sync(struct sk_buff *skb, struct genl_info *info)
2344{
2345 enum drbd_ret_code retcode;
2346
2347 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
2348 if (!adm_ctx.reply_skb)
2349 return retcode;
2350 if (retcode != NO_ERROR)
2351 goto out;
b411b363 2352
3b98c0c2
LE
2353 if (drbd_request_state(adm_ctx.mdev, NS(user_isp, 1)) == SS_NOTHING_TO_DO)
2354 retcode = ERR_PAUSE_IS_SET;
2355out:
2356 drbd_adm_finish(info, retcode);
b411b363
PR
2357 return 0;
2358}
2359
3b98c0c2 2360int drbd_adm_resume_sync(struct sk_buff *skb, struct genl_info *info)
b411b363 2361{
da9fbc27 2362 union drbd_dev_state s;
3b98c0c2
LE
2363 enum drbd_ret_code retcode;
2364
2365 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
2366 if (!adm_ctx.reply_skb)
2367 return retcode;
2368 if (retcode != NO_ERROR)
2369 goto out;
b411b363 2370
3b98c0c2
LE
2371 if (drbd_request_state(adm_ctx.mdev, NS(user_isp, 0)) == SS_NOTHING_TO_DO) {
2372 s = adm_ctx.mdev->state;
cd88d030
PR
2373 if (s.conn == C_PAUSED_SYNC_S || s.conn == C_PAUSED_SYNC_T) {
2374 retcode = s.aftr_isp ? ERR_PIC_AFTER_DEP :
2375 s.peer_isp ? ERR_PIC_PEER_DEP : ERR_PAUSE_IS_CLEAR;
2376 } else {
2377 retcode = ERR_PAUSE_IS_CLEAR;
2378 }
2379 }
b411b363 2380
3b98c0c2
LE
2381out:
2382 drbd_adm_finish(info, retcode);
b411b363
PR
2383 return 0;
2384}
2385
3b98c0c2 2386int drbd_adm_suspend_io(struct sk_buff *skb, struct genl_info *info)
b411b363 2387{
3b98c0c2 2388 return drbd_adm_simple_request_state(skb, info, NS(susp, 1));
b411b363
PR
2389}
2390
3b98c0c2 2391int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info)
b411b363 2392{
3b98c0c2
LE
2393 struct drbd_conf *mdev;
2394 int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
2395
2396 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
2397 if (!adm_ctx.reply_skb)
2398 return retcode;
2399 if (retcode != NO_ERROR)
2400 goto out;
2401
2402 mdev = adm_ctx.mdev;
43a5182c
PR
2403 if (test_bit(NEW_CUR_UUID, &mdev->flags)) {
2404 drbd_uuid_new_current(mdev);
2405 clear_bit(NEW_CUR_UUID, &mdev->flags);
43a5182c 2406 }
265be2d0 2407 drbd_suspend_io(mdev);
3b98c0c2
LE
2408 retcode = drbd_request_state(mdev, NS3(susp, 0, susp_nod, 0, susp_fen, 0));
2409 if (retcode == SS_SUCCESS) {
265be2d0 2410 if (mdev->state.conn < C_CONNECTED)
2f5cdd0b 2411 tl_clear(mdev->tconn);
265be2d0 2412 if (mdev->state.disk == D_DISKLESS || mdev->state.disk == D_FAILED)
2f5cdd0b 2413 tl_restart(mdev->tconn, FAIL_FROZEN_DISK_IO);
265be2d0
PR
2414 }
2415 drbd_resume_io(mdev);
2416
3b98c0c2
LE
2417out:
2418 drbd_adm_finish(info, retcode);
b411b363
PR
2419 return 0;
2420}
2421
3b98c0c2 2422int drbd_adm_outdate(struct sk_buff *skb, struct genl_info *info)
b411b363 2423{
3b98c0c2 2424 return drbd_adm_simple_request_state(skb, info, NS(disk, D_OUTDATED));
b411b363
PR
2425}
2426
543cc10b
LE
2427int nla_put_drbd_cfg_context(struct sk_buff *skb, const char *conn_name, unsigned vnr)
2428{
2429 struct nlattr *nla;
2430 nla = nla_nest_start(skb, DRBD_NLA_CFG_CONTEXT);
2431 if (!nla)
2432 goto nla_put_failure;
2433 if (vnr != VOLUME_UNSPECIFIED)
2434 NLA_PUT_U32(skb, T_ctx_volume, vnr);
2435 NLA_PUT_STRING(skb, T_ctx_conn_name, conn_name);
2436 nla_nest_end(skb, nla);
2437 return 0;
2438
2439nla_put_failure:
2440 if (nla)
2441 nla_nest_cancel(skb, nla);
2442 return -EMSGSIZE;
2443}
2444
3b98c0c2
LE
2445int nla_put_status_info(struct sk_buff *skb, struct drbd_conf *mdev,
2446 const struct sib_info *sib)
b411b363 2447{
3b98c0c2
LE
2448 struct state_info *si = NULL; /* for sizeof(si->member); */
2449 struct nlattr *nla;
2450 int got_ldev;
2451 int got_net;
2452 int err = 0;
2453 int exclude_sensitive;
2454
2455 /* If sib != NULL, this is drbd_bcast_event, which anyone can listen
2456 * to. So we better exclude_sensitive information.
2457 *
2458 * If sib == NULL, this is drbd_adm_get_status, executed synchronously
2459 * in the context of the requesting user process. Exclude sensitive
2460 * information, unless current has superuser.
2461 *
2462 * NOTE: for drbd_adm_get_status_all(), this is a netlink dump, and
2463 * relies on the current implementation of netlink_dump(), which
2464 * executes the dump callback successively from netlink_recvmsg(),
2465 * always in the context of the receiving process */
2466 exclude_sensitive = sib || !capable(CAP_SYS_ADMIN);
2467
2468 got_ldev = get_ldev(mdev);
2469 got_net = get_net_conf(mdev->tconn);
2470
2471 /* We need to add connection name and volume number information still.
2472 * Minor number is in drbd_genlmsghdr. */
543cc10b 2473 if (nla_put_drbd_cfg_context(skb, mdev->tconn->name, mdev->vnr))
3b98c0c2 2474 goto nla_put_failure;
3b98c0c2 2475
f399002e
LE
2476 if (res_opts_to_skb(skb, &mdev->tconn->res_opts, exclude_sensitive))
2477 goto nla_put_failure;
2478
3b98c0c2
LE
2479 if (got_ldev)
2480 if (disk_conf_to_skb(skb, &mdev->ldev->dc, exclude_sensitive))
2481 goto nla_put_failure;
2482 if (got_net)
2483 if (net_conf_to_skb(skb, mdev->tconn->net_conf, exclude_sensitive))
2484 goto nla_put_failure;
2485
3b98c0c2
LE
2486 nla = nla_nest_start(skb, DRBD_NLA_STATE_INFO);
2487 if (!nla)
2488 goto nla_put_failure;
2489 NLA_PUT_U32(skb, T_sib_reason, sib ? sib->sib_reason : SIB_GET_STATUS_REPLY);
2490 NLA_PUT_U32(skb, T_current_state, mdev->state.i);
2491 NLA_PUT_U64(skb, T_ed_uuid, mdev->ed_uuid);
2492 NLA_PUT_U64(skb, T_capacity, drbd_get_capacity(mdev->this_bdev));
2493
2494 if (got_ldev) {
2495 NLA_PUT_U32(skb, T_disk_flags, mdev->ldev->md.flags);
2496 NLA_PUT(skb, T_uuids, sizeof(si->uuids), mdev->ldev->md.uuid);
2497 NLA_PUT_U64(skb, T_bits_total, drbd_bm_bits(mdev));
2498 NLA_PUT_U64(skb, T_bits_oos, drbd_bm_total_weight(mdev));
2499 if (C_SYNC_SOURCE <= mdev->state.conn &&
2500 C_PAUSED_SYNC_T >= mdev->state.conn) {
2501 NLA_PUT_U64(skb, T_bits_rs_total, mdev->rs_total);
2502 NLA_PUT_U64(skb, T_bits_rs_failed, mdev->rs_failed);
2503 }
b411b363
PR
2504 }
2505
3b98c0c2
LE
2506 if (sib) {
2507 switch(sib->sib_reason) {
2508 case SIB_SYNC_PROGRESS:
2509 case SIB_GET_STATUS_REPLY:
2510 break;
2511 case SIB_STATE_CHANGE:
2512 NLA_PUT_U32(skb, T_prev_state, sib->os.i);
2513 NLA_PUT_U32(skb, T_new_state, sib->ns.i);
2514 break;
2515 case SIB_HELPER_POST:
2516 NLA_PUT_U32(skb,
2517 T_helper_exit_code, sib->helper_exit_code);
2518 /* fall through */
2519 case SIB_HELPER_PRE:
2520 NLA_PUT_STRING(skb, T_helper, sib->helper_name);
2521 break;
2522 }
b411b363 2523 }
3b98c0c2 2524 nla_nest_end(skb, nla);
b411b363 2525
3b98c0c2
LE
2526 if (0)
2527nla_put_failure:
2528 err = -EMSGSIZE;
2529 if (got_ldev)
2530 put_ldev(mdev);
2531 if (got_net)
2532 put_net_conf(mdev->tconn);
2533 return err;
b411b363
PR
2534}
2535
3b98c0c2 2536int drbd_adm_get_status(struct sk_buff *skb, struct genl_info *info)
b411b363 2537{
3b98c0c2
LE
2538 enum drbd_ret_code retcode;
2539 int err;
b411b363 2540
3b98c0c2
LE
2541 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
2542 if (!adm_ctx.reply_skb)
2543 return retcode;
2544 if (retcode != NO_ERROR)
2545 goto out;
b411b363 2546
3b98c0c2
LE
2547 err = nla_put_status_info(adm_ctx.reply_skb, adm_ctx.mdev, NULL);
2548 if (err) {
2549 nlmsg_free(adm_ctx.reply_skb);
2550 return err;
b411b363 2551 }
3b98c0c2
LE
2552out:
2553 drbd_adm_finish(info, retcode);
2554 return 0;
b411b363
PR
2555}
2556
3b98c0c2 2557int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb)
b411b363 2558{
3b98c0c2
LE
2559 struct drbd_conf *mdev;
2560 struct drbd_genlmsghdr *dh;
543cc10b
LE
2561 struct drbd_tconn *pos = (struct drbd_tconn*)cb->args[0];
2562 struct drbd_tconn *tconn = NULL;
2563 struct drbd_tconn *tmp;
2564 unsigned volume = cb->args[1];
2565
2566 /* Open coded, deferred, iteration:
2567 * list_for_each_entry_safe(tconn, tmp, &drbd_tconns, all_tconn) {
2568 * idr_for_each_entry(&tconn->volumes, mdev, i) {
2569 * ...
2570 * }
2571 * }
2572 * where tconn is cb->args[0];
2573 * and i is cb->args[1];
2574 *
3b98c0c2
LE
2575 * This may miss entries inserted after this dump started,
2576 * or entries deleted before they are reached.
543cc10b
LE
2577 *
2578 * We need to make sure the mdev won't disappear while
2579 * we are looking at it, and revalidate our iterators
2580 * on each iteration.
2581 */
3b98c0c2 2582
543cc10b 2583 /* synchronize with drbd_new_tconn/drbd_free_tconn */
ef356262 2584 down_read(&drbd_cfg_rwsem);
543cc10b
LE
2585next_tconn:
2586 /* revalidate iterator position */
2587 list_for_each_entry(tmp, &drbd_tconns, all_tconn) {
2588 if (pos == NULL) {
2589 /* first iteration */
2590 pos = tmp;
2591 tconn = pos;
2592 break;
2593 }
2594 if (tmp == pos) {
2595 tconn = pos;
2596 break;
2597 }
2598 }
2599 if (tconn) {
2600 mdev = idr_get_next(&tconn->volumes, &volume);
2601 if (!mdev) {
2602 /* No more volumes to dump on this tconn.
2603 * Advance tconn iterator. */
2604 pos = list_entry(tconn->all_tconn.next,
2605 struct drbd_tconn, all_tconn);
2606 /* But, did we dump any volume on this tconn yet? */
2607 if (volume != 0) {
2608 tconn = NULL;
2609 volume = 0;
2610 goto next_tconn;
2611 }
2612 }
2613
3b98c0c2
LE
2614 dh = genlmsg_put(skb, NETLINK_CB(cb->skb).pid,
2615 cb->nlh->nlmsg_seq, &drbd_genl_family,
2616 NLM_F_MULTI, DRBD_ADM_GET_STATUS);
2617 if (!dh)
543cc10b
LE
2618 goto out;
2619
2620 if (!mdev) {
2621 /* this is a tconn without a single volume */
2622 dh->minor = -1U;
2623 dh->ret_code = NO_ERROR;
2624 if (nla_put_drbd_cfg_context(skb, tconn->name, VOLUME_UNSPECIFIED))
2625 genlmsg_cancel(skb, dh);
2626 else
2627 genlmsg_end(skb, dh);
2628 goto out;
2629 }
3b98c0c2 2630
543cc10b
LE
2631 D_ASSERT(mdev->vnr == volume);
2632 D_ASSERT(mdev->tconn == tconn);
3b98c0c2 2633
543cc10b 2634 dh->minor = mdev_to_minor(mdev);
3b98c0c2
LE
2635 dh->ret_code = NO_ERROR;
2636
2637 if (nla_put_status_info(skb, mdev, NULL)) {
2638 genlmsg_cancel(skb, dh);
543cc10b 2639 goto out;
3b98c0c2
LE
2640 }
2641 genlmsg_end(skb, dh);
2642 }
b411b363 2643
543cc10b 2644out:
ef356262 2645 up_read(&drbd_cfg_rwsem);
543cc10b
LE
2646 /* where to start the next iteration */
2647 cb->args[0] = (long)pos;
2648 cb->args[1] = (pos == tconn) ? volume + 1 : 0;
b411b363 2649
543cc10b
LE
2650 /* No more tconns/volumes/minors found results in an empty skb.
2651 * Which will terminate the dump. */
3b98c0c2 2652 return skb->len;
b411b363
PR
2653}
2654
3b98c0c2 2655int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info)
b411b363 2656{
3b98c0c2
LE
2657 enum drbd_ret_code retcode;
2658 struct timeout_parms tp;
2659 int err;
b411b363 2660
3b98c0c2
LE
2661 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
2662 if (!adm_ctx.reply_skb)
2663 return retcode;
2664 if (retcode != NO_ERROR)
2665 goto out;
b411b363 2666
3b98c0c2
LE
2667 tp.timeout_type =
2668 adm_ctx.mdev->state.pdsk == D_OUTDATED ? UT_PEER_OUTDATED :
2669 test_bit(USE_DEGR_WFC_T, &adm_ctx.mdev->flags) ? UT_DEGRADED :
2670 UT_DEFAULT;
b411b363 2671
3b98c0c2
LE
2672 err = timeout_parms_to_priv_skb(adm_ctx.reply_skb, &tp);
2673 if (err) {
2674 nlmsg_free(adm_ctx.reply_skb);
2675 return err;
2676 }
2677out:
2678 drbd_adm_finish(info, retcode);
2679 return 0;
b411b363
PR
2680}
2681
3b98c0c2 2682int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info)
b411b363 2683{
3b98c0c2
LE
2684 struct drbd_conf *mdev;
2685 enum drbd_ret_code retcode;
b411b363 2686
3b98c0c2
LE
2687 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
2688 if (!adm_ctx.reply_skb)
2689 return retcode;
2690 if (retcode != NO_ERROR)
2691 goto out;
873b0d5f 2692
3b98c0c2
LE
2693 mdev = adm_ctx.mdev;
2694 if (info->attrs[DRBD_NLA_START_OV_PARMS]) {
2695 /* resume from last known position, if possible */
2696 struct start_ov_parms parms =
2697 { .ov_start_sector = mdev->ov_start_sector };
f399002e 2698 int err = start_ov_parms_from_attrs(&parms, info);
3b98c0c2
LE
2699 if (err) {
2700 retcode = ERR_MANDATORY_TAG;
2701 drbd_msg_put_info(from_attrs_err_to_txt(err));
2702 goto out;
2703 }
2704 /* w_make_ov_request expects position to be aligned */
2705 mdev->ov_start_sector = parms.ov_start_sector & ~BM_SECT_PER_BIT;
2706 }
873b0d5f
LE
2707 /* If there is still bitmap IO pending, e.g. previous resync or verify
2708 * just being finished, wait for it before requesting a new resync. */
2709 wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
3b98c0c2
LE
2710 retcode = drbd_request_state(mdev,NS(conn,C_VERIFY_S));
2711out:
2712 drbd_adm_finish(info, retcode);
b411b363
PR
2713 return 0;
2714}
2715
2716
3b98c0c2 2717int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info)
b411b363 2718{
3b98c0c2
LE
2719 struct drbd_conf *mdev;
2720 enum drbd_ret_code retcode;
b411b363
PR
2721 int skip_initial_sync = 0;
2722 int err;
3b98c0c2 2723 struct new_c_uuid_parms args;
b411b363 2724
3b98c0c2
LE
2725 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
2726 if (!adm_ctx.reply_skb)
2727 return retcode;
2728 if (retcode != NO_ERROR)
2729 goto out_nolock;
b411b363 2730
3b98c0c2
LE
2731 mdev = adm_ctx.mdev;
2732 memset(&args, 0, sizeof(args));
2733 if (info->attrs[DRBD_NLA_NEW_C_UUID_PARMS]) {
f399002e 2734 err = new_c_uuid_parms_from_attrs(&args, info);
3b98c0c2
LE
2735 if (err) {
2736 retcode = ERR_MANDATORY_TAG;
2737 drbd_msg_put_info(from_attrs_err_to_txt(err));
2738 goto out_nolock;
2739 }
b411b363
PR
2740 }
2741
8410da8f 2742 mutex_lock(mdev->state_mutex); /* Protects us against serialized state changes. */
b411b363
PR
2743
2744 if (!get_ldev(mdev)) {
2745 retcode = ERR_NO_DISK;
2746 goto out;
2747 }
2748
2749 /* this is "skip initial sync", assume to be clean */
31890f4a 2750 if (mdev->state.conn == C_CONNECTED && mdev->tconn->agreed_pro_version >= 90 &&
b411b363
PR
2751 mdev->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED && args.clear_bm) {
2752 dev_info(DEV, "Preparing to skip initial sync\n");
2753 skip_initial_sync = 1;
2754 } else if (mdev->state.conn != C_STANDALONE) {
2755 retcode = ERR_CONNECTED;
2756 goto out_dec;
2757 }
2758
2759 drbd_uuid_set(mdev, UI_BITMAP, 0); /* Rotate UI_BITMAP to History 1, etc... */
2760 drbd_uuid_new_current(mdev); /* New current, previous to UI_BITMAP */
2761
2762 if (args.clear_bm) {
20ceb2b2
LE
2763 err = drbd_bitmap_io(mdev, &drbd_bmio_clear_n_write,
2764 "clear_n_write from new_c_uuid", BM_LOCKED_MASK);
b411b363
PR
2765 if (err) {
2766 dev_err(DEV, "Writing bitmap failed with %d\n",err);
2767 retcode = ERR_IO_MD_DISK;
2768 }
2769 if (skip_initial_sync) {
2770 drbd_send_uuids_skip_initial_sync(mdev);
2771 _drbd_uuid_set(mdev, UI_BITMAP, 0);
62b0da3a 2772 drbd_print_uuids(mdev, "cleared bitmap UUID");
87eeee41 2773 spin_lock_irq(&mdev->tconn->req_lock);
b411b363
PR
2774 _drbd_set_state(_NS2(mdev, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
2775 CS_VERBOSE, NULL);
87eeee41 2776 spin_unlock_irq(&mdev->tconn->req_lock);
b411b363
PR
2777 }
2778 }
2779
2780 drbd_md_sync(mdev);
2781out_dec:
2782 put_ldev(mdev);
2783out:
8410da8f 2784 mutex_unlock(mdev->state_mutex);
3b98c0c2
LE
2785out_nolock:
2786 drbd_adm_finish(info, retcode);
774b3055
PR
2787 return 0;
2788}
2789
3b98c0c2
LE
2790static enum drbd_ret_code
2791drbd_check_conn_name(const char *name)
774b3055 2792{
3b98c0c2
LE
2793 if (!name || !name[0]) {
2794 drbd_msg_put_info("connection name missing");
2795 return ERR_MANDATORY_TAG;
774b3055 2796 }
3b98c0c2
LE
2797 /* if we want to use these in sysfs/configfs/debugfs some day,
2798 * we must not allow slashes */
2799 if (strchr(name, '/')) {
2800 drbd_msg_put_info("invalid connection name");
2801 return ERR_INVALID_REQUEST;
774b3055 2802 }
3b98c0c2 2803 return NO_ERROR;
774b3055
PR
2804}
2805
3b98c0c2 2806int drbd_adm_create_connection(struct sk_buff *skb, struct genl_info *info)
b411b363 2807{
3b98c0c2 2808 enum drbd_ret_code retcode;
9f5180e5 2809
3b98c0c2
LE
2810 retcode = drbd_adm_prepare(skb, info, 0);
2811 if (!adm_ctx.reply_skb)
2812 return retcode;
2813 if (retcode != NO_ERROR)
2814 goto out;
b411b363 2815
3b98c0c2
LE
2816 retcode = drbd_check_conn_name(adm_ctx.conn_name);
2817 if (retcode != NO_ERROR)
2818 goto out;
b411b363 2819
3b98c0c2 2820 if (adm_ctx.tconn) {
38f19616
LE
2821 if (info->nlhdr->nlmsg_flags & NLM_F_EXCL) {
2822 retcode = ERR_INVALID_REQUEST;
2823 drbd_msg_put_info("connection exists");
2824 }
2825 /* else: still NO_ERROR */
3b98c0c2 2826 goto out;
b411b363
PR
2827 }
2828
3b98c0c2 2829 if (!drbd_new_tconn(adm_ctx.conn_name))
b411b363 2830 retcode = ERR_NOMEM;
3b98c0c2
LE
2831out:
2832 drbd_adm_finish(info, retcode);
2833 return 0;
b411b363
PR
2834}
2835
3b98c0c2 2836int drbd_adm_add_minor(struct sk_buff *skb, struct genl_info *info)
b411b363 2837{
3b98c0c2
LE
2838 struct drbd_genlmsghdr *dh = info->userhdr;
2839 enum drbd_ret_code retcode;
b411b363 2840
3b98c0c2
LE
2841 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONN);
2842 if (!adm_ctx.reply_skb)
2843 return retcode;
2844 if (retcode != NO_ERROR)
2845 goto out;
b411b363 2846
3b98c0c2
LE
2847 /* FIXME drop minor_count parameter, limit to MINORMASK */
2848 if (dh->minor >= minor_count) {
2849 drbd_msg_put_info("requested minor out of range");
2850 retcode = ERR_INVALID_REQUEST;
2851 goto out;
b411b363 2852 }
0c8e36d9 2853 if (adm_ctx.volume > DRBD_VOLUME_MAX) {
3b98c0c2
LE
2854 drbd_msg_put_info("requested volume id out of range");
2855 retcode = ERR_INVALID_REQUEST;
2856 goto out;
b411b363 2857 }
b411b363 2858
38f19616
LE
2859 /* drbd_adm_prepare made sure already
2860 * that mdev->tconn and mdev->vnr match the request. */
2861 if (adm_ctx.mdev) {
2862 if (info->nlhdr->nlmsg_flags & NLM_F_EXCL)
2863 retcode = ERR_MINOR_EXISTS;
2864 /* else: still NO_ERROR */
2865 goto out;
2866 }
2867
3b98c0c2
LE
2868 retcode = conn_new_minor(adm_ctx.tconn, dh->minor, adm_ctx.volume);
2869out:
2870 drbd_adm_finish(info, retcode);
2871 return 0;
b411b363
PR
2872}
2873
85f75dd7
LE
2874static enum drbd_ret_code adm_delete_minor(struct drbd_conf *mdev)
2875{
2876 if (mdev->state.disk == D_DISKLESS &&
2877 /* no need to be mdev->state.conn == C_STANDALONE &&
2878 * we may want to delete a minor from a live replication group.
2879 */
2880 mdev->state.role == R_SECONDARY) {
ff370e5a 2881 drbd_delete_device(mdev);
85f75dd7
LE
2882 return NO_ERROR;
2883 } else
2884 return ERR_MINOR_CONFIGURED;
2885}
2886
3b98c0c2 2887int drbd_adm_delete_minor(struct sk_buff *skb, struct genl_info *info)
b411b363 2888{
3b98c0c2 2889 enum drbd_ret_code retcode;
b411b363 2890
3b98c0c2
LE
2891 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
2892 if (!adm_ctx.reply_skb)
2893 return retcode;
2894 if (retcode != NO_ERROR)
2895 goto out;
b411b363 2896
ef356262 2897 down_write(&drbd_cfg_rwsem);
85f75dd7 2898 retcode = adm_delete_minor(adm_ctx.mdev);
ef356262 2899 up_write(&drbd_cfg_rwsem);
85f75dd7
LE
2900 /* if this was the last volume of this connection,
2901 * this will terminate all threads */
2902 if (retcode == NO_ERROR)
cffec5b2 2903 conn_reconfig_done(adm_ctx.tconn);
85f75dd7
LE
2904out:
2905 drbd_adm_finish(info, retcode);
2906 return 0;
2907}
2908
2909int drbd_adm_down(struct sk_buff *skb, struct genl_info *info)
2910{
2911 enum drbd_ret_code retcode;
2912 enum drbd_state_rv rv;
2913 struct drbd_conf *mdev;
2914 unsigned i;
2915
2916 retcode = drbd_adm_prepare(skb, info, 0);
2917 if (!adm_ctx.reply_skb)
2918 return retcode;
2919 if (retcode != NO_ERROR)
2920 goto out;
2921
2922 if (!adm_ctx.tconn) {
2923 retcode = ERR_CONN_NOT_KNOWN;
2924 goto out;
2925 }
2926
ef356262 2927 down_read(&drbd_cfg_rwsem);
85f75dd7
LE
2928 /* demote */
2929 idr_for_each_entry(&adm_ctx.tconn->volumes, mdev, i) {
2930 retcode = drbd_set_role(mdev, R_SECONDARY, 0);
2931 if (retcode < SS_SUCCESS) {
2932 drbd_msg_put_info("failed to demote");
2933 goto out_unlock;
2934 }
2935 }
2936
2937 /* disconnect */
2938 rv = conn_try_disconnect(adm_ctx.tconn, 0);
2939 if (rv < SS_SUCCESS) {
2940 retcode = rv; /* enum type mismatch! */
2941 drbd_msg_put_info("failed to disconnect");
2942 goto out_unlock;
2943 }
2944
2945 /* detach */
2946 idr_for_each_entry(&adm_ctx.tconn->volumes, mdev, i) {
2947 rv = adm_detach(mdev);
2948 if (rv < SS_SUCCESS) {
2949 retcode = rv; /* enum type mismatch! */
2950 drbd_msg_put_info("failed to detach");
2951 goto out_unlock;
2952 }
2953 }
ef356262 2954 up_read(&drbd_cfg_rwsem);
85f75dd7
LE
2955
2956 /* delete volumes */
ef356262 2957 down_write(&drbd_cfg_rwsem);
85f75dd7
LE
2958 idr_for_each_entry(&adm_ctx.tconn->volumes, mdev, i) {
2959 retcode = adm_delete_minor(mdev);
2960 if (retcode != NO_ERROR) {
2961 /* "can not happen" */
2962 drbd_msg_put_info("failed to delete volume");
ef356262
PR
2963 up_write(&drbd_cfg_rwsem);
2964 goto out;
85f75dd7
LE
2965 }
2966 }
2967
2968 /* stop all threads */
2969 conn_reconfig_done(adm_ctx.tconn);
2970
2971 /* delete connection */
2972 if (conn_lowest_minor(adm_ctx.tconn) < 0) {
2973 drbd_free_tconn(adm_ctx.tconn);
2974 retcode = NO_ERROR;
2975 } else {
2976 /* "can not happen" */
2977 retcode = ERR_CONN_IN_USE;
2978 drbd_msg_put_info("failed to delete connection");
85f75dd7 2979 }
ef356262
PR
2980
2981 up_write(&drbd_cfg_rwsem);
2982 goto out;
85f75dd7 2983out_unlock:
ef356262 2984 up_read(&drbd_cfg_rwsem);
3b98c0c2
LE
2985out:
2986 drbd_adm_finish(info, retcode);
2987 return 0;
b411b363
PR
2988}
2989
3b98c0c2 2990int drbd_adm_delete_connection(struct sk_buff *skb, struct genl_info *info)
b411b363 2991{
3b98c0c2 2992 enum drbd_ret_code retcode;
b411b363 2993
3b98c0c2
LE
2994 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONN);
2995 if (!adm_ctx.reply_skb)
2996 return retcode;
2997 if (retcode != NO_ERROR)
2998 goto out;
2999
ef356262 3000 down_write(&drbd_cfg_rwsem);
3b98c0c2
LE
3001 if (conn_lowest_minor(adm_ctx.tconn) < 0) {
3002 drbd_free_tconn(adm_ctx.tconn);
3003 retcode = NO_ERROR;
3004 } else {
3005 retcode = ERR_CONN_IN_USE;
b411b363 3006 }
ef356262 3007 up_write(&drbd_cfg_rwsem);
b411b363 3008
3b98c0c2
LE
3009out:
3010 drbd_adm_finish(info, retcode);
b411b363
PR
3011 return 0;
3012}
3013
3b98c0c2 3014void drbd_bcast_event(struct drbd_conf *mdev, const struct sib_info *sib)
b411b363 3015{
3b98c0c2
LE
3016 static atomic_t drbd_genl_seq = ATOMIC_INIT(2); /* two. */
3017 struct sk_buff *msg;
3018 struct drbd_genlmsghdr *d_out;
3019 unsigned seq;
3020 int err = -ENOMEM;
3021
3022 seq = atomic_inc_return(&drbd_genl_seq);
3023 msg = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO);
3024 if (!msg)
3025 goto failed;
3026
3027 err = -EMSGSIZE;
3028 d_out = genlmsg_put(msg, 0, seq, &drbd_genl_family, 0, DRBD_EVENT);
3029 if (!d_out) /* cannot happen, but anyways. */
3030 goto nla_put_failure;
3031 d_out->minor = mdev_to_minor(mdev);
3032 d_out->ret_code = 0;
3033
3034 if (nla_put_status_info(msg, mdev, sib))
3035 goto nla_put_failure;
3036 genlmsg_end(msg, d_out);
3037 err = drbd_genl_multicast_events(msg, 0);
3038 /* msg has been consumed or freed in netlink_broadcast() */
3039 if (err && err != -ESRCH)
3040 goto failed;
b411b363 3041
3b98c0c2 3042 return;
b411b363 3043
3b98c0c2
LE
3044nla_put_failure:
3045 nlmsg_free(msg);
3046failed:
3047 dev_err(DEV, "Error %d while broadcasting event. "
3048 "Event seq:%u sib_reason:%u\n",
3049 err, seq, sib->sib_reason);
b411b363 3050}