drivers/block/drbd/drbd_nl.c

   1 /*
   2    drbd_nl.c
   3
   4    This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
   5
   6    Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
   7    Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
   8    Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
   9
  10    drbd is free software; you can redistribute it and/or modify
  11    it under the terms of the GNU General Public License as published by
  12    the Free Software Foundation; either version 2, or (at your option)
  13    any later version.
  14
  15    drbd is distributed in the hope that it will be useful,
  16    but WITHOUT ANY WARRANTY; without even the implied warranty of
  17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  18    GNU General Public License for more details.
  19
  20    You should have received a copy of the GNU General Public License
  21    along with drbd; see the file COPYING.  If not, write to
  22    the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
  23
  24  */
  25
  26 #include <linux/module.h>
  27 #include <linux/drbd.h>
  28 #include <linux/in.h>
  29 #include <linux/fs.h>
  30 #include <linux/file.h>
  31 #include <linux/slab.h>
  32 #include <linux/blkpg.h>
  33 #include <linux/cpumask.h>
  34 #include "drbd_int.h"
  35 #include "drbd_protocol.h"
  36 #include "drbd_req.h"
  37 #include <asm/unaligned.h>
  38 #include <linux/drbd_limits.h>
  39 #include <linux/kthread.h>
  40
  41 #include <net/genetlink.h>
  42
  43 /* .doit */
  44 // int drbd_adm_create_resource(struct sk_buff *skb, struct genl_info *info);
  45 // int drbd_adm_delete_resource(struct sk_buff *skb, struct genl_info *info);
  46
  47 int drbd_adm_new_minor(struct sk_buff *skb, struct genl_info *info);
  48 int drbd_adm_del_minor(struct sk_buff *skb, struct genl_info *info);
  49
  50 int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info);
  51 int drbd_adm_del_resource(struct sk_buff *skb, struct genl_info *info);
  52 int drbd_adm_down(struct sk_buff *skb, struct genl_info *info);
  53
  54 int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info);
  55 int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info);
  56 int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info);
  57 int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info);
  58 int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info);
  59 int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info);
  60 int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info);
  61 int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info);
  62 int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info);
  63 int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info);
  64 int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info);
  65 int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info);
  66 int drbd_adm_pause_sync(struct sk_buff *skb, struct genl_info *info);
  67 int drbd_adm_resume_sync(struct sk_buff *skb, struct genl_info *info);
  68 int drbd_adm_suspend_io(struct sk_buff *skb, struct genl_info *info);
  69 int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info);
  70 int drbd_adm_outdate(struct sk_buff *skb, struct genl_info *info);
  71 int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info);
  72 int drbd_adm_get_status(struct sk_buff *skb, struct genl_info *info);
  73 int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info);
  74 /* .dumpit */
  75 int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb);
  76
  77 #include <linux/drbd_genl_api.h>
  78 #include "drbd_nla.h"
  79 #include <linux/genl_magic_func.h>
  80
  81 /* used blkdev_get_by_path, to claim our meta data device(s) */
  82 static char *drbd_m_holder = "Hands off! this is DRBD's meta data device.";
  83
  84 static void drbd_adm_send_reply(struct sk_buff *skb, struct genl_info *info)
  85 {
  86         genlmsg_end(skb, genlmsg_data(nlmsg_data(nlmsg_hdr(skb))));
  87         if (genlmsg_reply(skb, info))
  88                 printk(KERN_ERR "drbd: error sending genl reply\n");
  89 }
  90
  91 /* Used on a fresh "drbd_adm_prepare"d reply_skb, this cannot fail: The only
  92  * reason it could fail was no space in skb, and there are 4k available. */
  93 int drbd_msg_put_info(struct sk_buff *skb, const char *info)
  94 {
  95         struct nlattr *nla;
  96         int err = -EMSGSIZE;
  97
  98         if (!info || !info[0])
  99                 return 0;
 100
 101         nla = nla_nest_start(skb, DRBD_NLA_CFG_REPLY);
 102         if (!nla)
 103                 return err;
 104
 105         err = nla_put_string(skb, T_info_text, info);
 106         if (err) {
 107                 nla_nest_cancel(skb, nla);
 108                 return err;
 109         } else
 110                 nla_nest_end(skb, nla);
 111         return 0;
 112 }
 113
 114 /* This would be a good candidate for a "pre_doit" hook,
 115  * and per-family private info->pointers.
 116  * But we need to stay compatible with older kernels.
 117  * If it returns successfully, adm_ctx members are valid.
 118  *
 119  * At this point, we still rely on the global genl_lock().
 120  * If we want to avoid that, and allow "genl_family.parallel_ops", we may need
 121  * to add additional synchronization against object destruction/modification.
 122  */
 123 #define DRBD_ADM_NEED_MINOR     1
 124 #define DRBD_ADM_NEED_RESOURCE  2
 125 #define DRBD_ADM_NEED_CONNECTION 4
 126 static int drbd_adm_prepare(struct drbd_config_context *adm_ctx,
 127         struct sk_buff *skb, struct genl_info *info, unsigned flags)
 128 {
 129         struct drbd_genlmsghdr *d_in = info->userhdr;
 130         const u8 cmd = info->genlhdr->cmd;
 131         int err;
 132
 133         memset(adm_ctx, 0, sizeof(*adm_ctx));
 134
 135         /* genl_rcv_msg only checks for CAP_NET_ADMIN on "GENL_ADMIN_PERM" :( */
 136         if (cmd != DRBD_ADM_GET_STATUS && !capable(CAP_NET_ADMIN))
 137                return -EPERM;
 138
 139         adm_ctx->reply_skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
 140         if (!adm_ctx->reply_skb) {
 141                 err = -ENOMEM;
 142                 goto fail;
 143         }
 144
 145         adm_ctx->reply_dh = genlmsg_put_reply(adm_ctx->reply_skb,
 146                                         info, &drbd_genl_family, 0, cmd);
 147         /* put of a few bytes into a fresh skb of >= 4k will always succeed.
 148          * but anyways */
 149         if (!adm_ctx->reply_dh) {
 150                 err = -ENOMEM;
 151                 goto fail;
 152         }
 153
 154         adm_ctx->reply_dh->minor = d_in->minor;
 155         adm_ctx->reply_dh->ret_code = NO_ERROR;
 156
 157         adm_ctx->volume = VOLUME_UNSPECIFIED;
 158         if (info->attrs[DRBD_NLA_CFG_CONTEXT]) {
 159                 struct nlattr *nla;
 160                 /* parse and validate only */
 161                 err = drbd_cfg_context_from_attrs(NULL, info);
 162                 if (err)
 163                         goto fail;
 164
 165                 /* It was present, and valid,
 166                  * copy it over to the reply skb. */
 167                 err = nla_put_nohdr(adm_ctx->reply_skb,
 168                                 info->attrs[DRBD_NLA_CFG_CONTEXT]->nla_len,
 169                                 info->attrs[DRBD_NLA_CFG_CONTEXT]);
 170                 if (err)
 171                         goto fail;
 172
 173                 /* and assign stuff to the adm_ctx */
 174                 nla = nested_attr_tb[__nla_type(T_ctx_volume)];
 175                 if (nla)
 176                         adm_ctx->volume = nla_get_u32(nla);
 177                 nla = nested_attr_tb[__nla_type(T_ctx_resource_name)];
 178                 if (nla)
 179                         adm_ctx->resource_name = nla_data(nla);
 180                 adm_ctx->my_addr = nested_attr_tb[__nla_type(T_ctx_my_addr)];
 181                 adm_ctx->peer_addr = nested_attr_tb[__nla_type(T_ctx_peer_addr)];
 182                 if ((adm_ctx->my_addr &&
 183                      nla_len(adm_ctx->my_addr) > sizeof(adm_ctx->connection->my_addr)) ||
 184                     (adm_ctx->peer_addr &&
 185                      nla_len(adm_ctx->peer_addr) > sizeof(adm_ctx->connection->peer_addr))) {
 186                         err = -EINVAL;
 187                         goto fail;
 188                 }
 189         }
 190
 191         adm_ctx->minor = d_in->minor;
 192         adm_ctx->device = minor_to_device(d_in->minor);
 193
 194         /* We are protected by the global genl_lock().
 195          * But we may explicitly drop it/retake it in drbd_adm_set_role(),
 196          * so make sure this object stays around. */
 197         if (adm_ctx->device)
 198                 kref_get(&adm_ctx->device->kref);
 199
 200         if (adm_ctx->resource_name) {
 201                 adm_ctx->resource = drbd_find_resource(adm_ctx->resource_name);
 202         }
 203
 204         if (!adm_ctx->device && (flags & DRBD_ADM_NEED_MINOR)) {
 205                 drbd_msg_put_info(adm_ctx->reply_skb, "unknown minor");
 206                 return ERR_MINOR_INVALID;
 207         }
 208         if (!adm_ctx->resource && (flags & DRBD_ADM_NEED_RESOURCE)) {
 209                 drbd_msg_put_info(adm_ctx->reply_skb, "unknown resource");
 210                 if (adm_ctx->resource_name)
 211                         return ERR_RES_NOT_KNOWN;
 212                 return ERR_INVALID_REQUEST;
 213         }
 214
 215         if (flags & DRBD_ADM_NEED_CONNECTION) {
 216                 if (adm_ctx->resource) {
 217                         drbd_msg_put_info(adm_ctx->reply_skb, "no resource name expected");
 218                         return ERR_INVALID_REQUEST;
 219                 }
 220                 if (adm_ctx->device) {
 221                         drbd_msg_put_info(adm_ctx->reply_skb, "no minor number expected");
 222                         return ERR_INVALID_REQUEST;
 223                 }
 224                 if (adm_ctx->my_addr && adm_ctx->peer_addr)
 225                         adm_ctx->connection = conn_get_by_addrs(nla_data(adm_ctx->my_addr),
 226                                                           nla_len(adm_ctx->my_addr),
 227                                                           nla_data(adm_ctx->peer_addr),
 228                                                           nla_len(adm_ctx->peer_addr));
 229                 if (!adm_ctx->connection) {
 230                         drbd_msg_put_info(adm_ctx->reply_skb, "unknown connection");
 231                         return ERR_INVALID_REQUEST;
 232                 }
 233         }
 234
 235         /* some more paranoia, if the request was over-determined */
 236         if (adm_ctx->device && adm_ctx->resource &&
 237             adm_ctx->device->resource != adm_ctx->resource) {
 238                 pr_warning("request: minor=%u, resource=%s; but that minor belongs to resource %s\n",
 239                                 adm_ctx->minor, adm_ctx->resource->name,
 240                                 adm_ctx->device->resource->name);
 241                 drbd_msg_put_info(adm_ctx->reply_skb, "minor exists in different resource");
 242                 return ERR_INVALID_REQUEST;
 243         }
 244         if (adm_ctx->device &&
 245             adm_ctx->volume != VOLUME_UNSPECIFIED &&
 246             adm_ctx->volume != adm_ctx->device->vnr) {
 247                 pr_warning("request: minor=%u, volume=%u; but that minor is volume %u in %s\n",
 248                                 adm_ctx->minor, adm_ctx->volume,
 249                                 adm_ctx->device->vnr,
 250                                 adm_ctx->device->resource->name);
 251                 drbd_msg_put_info(adm_ctx->reply_skb, "minor exists as different volume");
 252                 return ERR_INVALID_REQUEST;
 253         }
 254
 255         /* still, provide adm_ctx->resource always, if possible. */
 256         if (!adm_ctx->resource) {
 257                 adm_ctx->resource = adm_ctx->device ? adm_ctx->device->resource
 258                         : adm_ctx->connection ? adm_ctx->connection->resource : NULL;
 259                 if (adm_ctx->resource)
 260                         kref_get(&adm_ctx->resource->kref);
 261         }
 262
 263         return NO_ERROR;
 264
 265 fail:
 266         nlmsg_free(adm_ctx->reply_skb);
 267         adm_ctx->reply_skb = NULL;
 268         return err;
 269 }
 270
 271 static int drbd_adm_finish(struct drbd_config_context *adm_ctx,
 272         struct genl_info *info, int retcode)
 273 {
 274         if (adm_ctx->device) {
 275                 kref_put(&adm_ctx->device->kref, drbd_destroy_device);
 276                 adm_ctx->device = NULL;
 277         }
 278         if (adm_ctx->connection) {
 279                 kref_put(&adm_ctx->connection->kref, &drbd_destroy_connection);
 280                 adm_ctx->connection = NULL;
 281         }
 282         if (adm_ctx->resource) {
 283                 kref_put(&adm_ctx->resource->kref, drbd_destroy_resource);
 284                 adm_ctx->resource = NULL;
 285         }
 286
 287         if (!adm_ctx->reply_skb)
 288                 return -ENOMEM;
 289
 290         adm_ctx->reply_dh->ret_code = retcode;
 291         drbd_adm_send_reply(adm_ctx->reply_skb, info);
 292         return 0;
 293 }
 294
 295 static void setup_khelper_env(struct drbd_connection *connection, char **envp)
 296 {
 297         char *afs;
 298
 299         /* FIXME: A future version will not allow this case. */
 300         if (connection->my_addr_len == 0 || connection->peer_addr_len == 0)
 301                 return;
 302
 303         switch (((struct sockaddr *)&connection->peer_addr)->sa_family) {
 304         case AF_INET6:
 305                 afs = "ipv6";
 306                 snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI6",
 307                          &((struct sockaddr_in6 *)&connection->peer_addr)->sin6_addr);
 308                 break;
 309         case AF_INET:
 310                 afs = "ipv4";
 311                 snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI4",
 312                          &((struct sockaddr_in *)&connection->peer_addr)->sin_addr);
 313                 break;
 314         default:
 315                 afs = "ssocks";
 316                 snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI4",
 317                          &((struct sockaddr_in *)&connection->peer_addr)->sin_addr);
 318         }
 319         snprintf(envp[3], 20, "DRBD_PEER_AF=%s", afs);
 320 }
 321
 322 int drbd_khelper(struct drbd_device *device, char *cmd)
 323 {
 324         char *envp[] = { "HOME=/",
 325                         "TERM=linux",
 326                         "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
 327                          (char[20]) { }, /* address family */
 328                          (char[60]) { }, /* address */
 329                         NULL };
 330         char mb[12];
 331         char *argv[] = {usermode_helper, cmd, mb, NULL };
 332         struct drbd_connection *connection = first_peer_device(device)->connection;
 333         struct sib_info sib;
 334         int ret;
 335
 336         if (current == connection->worker.task)
 337                 set_bit(CALLBACK_PENDING, &connection->flags);
 338
 339         snprintf(mb, 12, "minor-%d", device_to_minor(device));
 340         setup_khelper_env(connection, envp);
 341
 342         /* The helper may take some time.
 343          * write out any unsynced meta data changes now */
 344         drbd_md_sync(device);
 345
 346         drbd_info(device, "helper command: %s %s %s\n", usermode_helper, cmd, mb);
 347         sib.sib_reason = SIB_HELPER_PRE;
 348         sib.helper_name = cmd;
 349         drbd_bcast_event(device, &sib);
 350         ret = call_usermodehelper(usermode_helper, argv, envp, UMH_WAIT_PROC);
 351         if (ret)
 352                 drbd_warn(device, "helper command: %s %s %s exit code %u (0x%x)\n",
 353                                 usermode_helper, cmd, mb,
 354                                 (ret >> 8) & 0xff, ret);
 355         else
 356                 drbd_info(device, "helper command: %s %s %s exit code %u (0x%x)\n",
 357                                 usermode_helper, cmd, mb,
 358                                 (ret >> 8) & 0xff, ret);
 359         sib.sib_reason = SIB_HELPER_POST;
 360         sib.helper_exit_code = ret;
 361         drbd_bcast_event(device, &sib);
 362
 363         if (current == connection->worker.task)
 364                 clear_bit(CALLBACK_PENDING, &connection->flags);
 365
 366         if (ret < 0) /* Ignore any ERRNOs we got. */
 367                 ret = 0;
 368
 369         return ret;
 370 }
 371
 372 static int conn_khelper(struct drbd_connection *connection, char *cmd)
 373 {
 374         char *envp[] = { "HOME=/",
 375                         "TERM=linux",
 376                         "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
 377                          (char[20]) { }, /* address family */
 378                          (char[60]) { }, /* address */
 379                         NULL };
 380         char *resource_name = connection->resource->name;
 381         char *argv[] = {usermode_helper, cmd, resource_name, NULL };
 382         int ret;
 383
 384         setup_khelper_env(connection, envp);
 385         conn_md_sync(connection);
 386
 387         drbd_info(connection, "helper command: %s %s %s\n", usermode_helper, cmd, resource_name);
 388         /* TODO: conn_bcast_event() ?? */
 389
 390         ret = call_usermodehelper(usermode_helper, argv, envp, UMH_WAIT_PROC);
 391         if (ret)
 392                 drbd_warn(connection, "helper command: %s %s %s exit code %u (0x%x)\n",
 393                           usermode_helper, cmd, resource_name,
 394                           (ret >> 8) & 0xff, ret);
 395         else
 396                 drbd_info(connection, "helper command: %s %s %s exit code %u (0x%x)\n",
 397                           usermode_helper, cmd, resource_name,
 398                           (ret >> 8) & 0xff, ret);
 399         /* TODO: conn_bcast_event() ?? */
 400
 401         if (ret < 0) /* Ignore any ERRNOs we got. */
 402                 ret = 0;
 403
 404         return ret;
 405 }
 406
 407 static enum drbd_fencing_p highest_fencing_policy(struct drbd_connection *connection)
 408 {
 409         enum drbd_fencing_p fp = FP_NOT_AVAIL;
 410         struct drbd_peer_device *peer_device;
 411         int vnr;
 412
 413         rcu_read_lock();
 414         idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
 415                 struct drbd_device *device = peer_device->device;
 416                 if (get_ldev_if_state(device, D_CONSISTENT)) {
 417                         struct disk_conf *disk_conf =
 418                                 rcu_dereference(peer_device->device->ldev->disk_conf);
 419                         fp = max_t(enum drbd_fencing_p, fp, disk_conf->fencing);
 420                         put_ldev(device);
 421                 }
 422         }
 423         rcu_read_unlock();
 424
 425         if (fp == FP_NOT_AVAIL) {
 426                 /* IO Suspending works on the whole resource.
 427                    Do it only for one device. */
 428                 vnr = 0;
 429                 peer_device = idr_get_next(&connection->peer_devices, &vnr);
 430                 drbd_change_state(peer_device->device, CS_VERBOSE | CS_HARD, NS(susp_fen, 0));
 431         }
 432
 433         return fp;
 434 }
 435
 436 bool conn_try_outdate_peer(struct drbd_connection *connection)
 437 {
 438         unsigned int connect_cnt;
 439         union drbd_state mask = { };
 440         union drbd_state val = { };
 441         enum drbd_fencing_p fp;
 442         char *ex_to_string;
 443         int r;
 444
 445         spin_lock_irq(&connection->resource->req_lock);
 446         if (connection->cstate >= C_WF_REPORT_PARAMS) {
 447                 drbd_err(connection, "Expected cstate < C_WF_REPORT_PARAMS\n");
 448                 spin_unlock_irq(&connection->resource->req_lock);
 449                 return false;
 450         }
 451
 452         connect_cnt = connection->connect_cnt;
 453         spin_unlock_irq(&connection->resource->req_lock);
 454
 455         fp = highest_fencing_policy(connection);
 456         switch (fp) {
 457         case FP_NOT_AVAIL:
 458                 drbd_warn(connection, "Not fencing peer, I'm not even Consistent myself.\n");
 459                 goto out;
 460         case FP_DONT_CARE:
 461                 return true;
 462         default: ;
 463         }
 464
 465         r = conn_khelper(connection, "fence-peer");
 466
 467         switch ((r>>8) & 0xff) {
 468         case 3: /* peer is inconsistent */
 469                 ex_to_string = "peer is inconsistent or worse";
 470                 mask.pdsk = D_MASK;
 471                 val.pdsk = D_INCONSISTENT;
 472                 break;
 473         case 4: /* peer got outdated, or was already outdated */
 474                 ex_to_string = "peer was fenced";
 475                 mask.pdsk = D_MASK;
 476                 val.pdsk = D_OUTDATED;
 477                 break;
 478         case 5: /* peer was down */
 479                 if (conn_highest_disk(connection) == D_UP_TO_DATE) {
 480                         /* we will(have) create(d) a new UUID anyways... */
 481                         ex_to_string = "peer is unreachable, assumed to be dead";
 482                         mask.pdsk = D_MASK;
 483                         val.pdsk = D_OUTDATED;
 484                 } else {
 485                         ex_to_string = "peer unreachable, doing nothing since disk != UpToDate";
 486                 }
 487                 break;
 488         case 6: /* Peer is primary, voluntarily outdate myself.
 489                  * This is useful when an unconnected R_SECONDARY is asked to
 490                  * become R_PRIMARY, but finds the other peer being active. */
 491                 ex_to_string = "peer is active";
 492                 drbd_warn(connection, "Peer is primary, outdating myself.\n");
 493                 mask.disk = D_MASK;
 494                 val.disk = D_OUTDATED;
 495                 break;
 496         case 7:
 497                 if (fp != FP_STONITH)
 498                         drbd_err(connection, "fence-peer() = 7 && fencing != Stonith !!!\n");
 499                 ex_to_string = "peer was stonithed";
 500                 mask.pdsk = D_MASK;
 501                 val.pdsk = D_OUTDATED;
 502                 break;
 503         default:
 504                 /* The script is broken ... */
 505                 drbd_err(connection, "fence-peer helper broken, returned %d\n", (r>>8)&0xff);
 506                 return false; /* Eventually leave IO frozen */
 507         }
 508
 509         drbd_info(connection, "fence-peer helper returned %d (%s)\n",
 510                   (r>>8) & 0xff, ex_to_string);
 511
 512  out:
 513
 514         /* Not using
 515            conn_request_state(connection, mask, val, CS_VERBOSE);
 516            here, because we might were able to re-establish the connection in the
 517            meantime. */
 518         spin_lock_irq(&connection->resource->req_lock);
 519         if (connection->cstate < C_WF_REPORT_PARAMS && !test_bit(STATE_SENT, &connection->flags)) {
 520                 if (connection->connect_cnt != connect_cnt)
 521                         /* In case the connection was established and droped
 522                            while the fence-peer handler was running, ignore it */
 523                         drbd_info(connection, "Ignoring fence-peer exit code\n");
 524                 else
 525                         _conn_request_state(connection, mask, val, CS_VERBOSE);
 526         }
 527         spin_unlock_irq(&connection->resource->req_lock);
 528
 529         return conn_highest_pdsk(connection) <= D_OUTDATED;
 530 }
 531
 532 static int _try_outdate_peer_async(void *data)
 533 {
 534         struct drbd_connection *connection = (struct drbd_connection *)data;
 535
 536         conn_try_outdate_peer(connection);
 537
 538         kref_put(&connection->kref, drbd_destroy_connection);
 539         return 0;
 540 }
 541
 542 void conn_try_outdate_peer_async(struct drbd_connection *connection)
 543 {
 544         struct task_struct *opa;
 545
 546         kref_get(&connection->kref);
 547         opa = kthread_run(_try_outdate_peer_async, connection, "drbd_async_h");
 548         if (IS_ERR(opa)) {
 549                 drbd_err(connection, "out of mem, failed to invoke fence-peer helper\n");
 550                 kref_put(&connection->kref, drbd_destroy_connection);
 551         }
 552 }
 553
 554 enum drbd_state_rv
 555 drbd_set_role(struct drbd_device *const device, enum drbd_role new_role, int force)
 556 {
 557         struct drbd_peer_device *const peer_device = first_peer_device(device);
 558         struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL;
 559         const int max_tries = 4;
 560         enum drbd_state_rv rv = SS_UNKNOWN_ERROR;
 561         struct net_conf *nc;
 562         int try = 0;
 563         int forced = 0;
 564         union drbd_state mask, val;
 565
 566         if (new_role == R_PRIMARY) {
 567                 struct drbd_connection *connection;
 568
 569                 /* Detect dead peers as soon as possible.  */
 570
 571                 rcu_read_lock();
 572                 for_each_connection(connection, device->resource)
 573                         request_ping(connection);
 574                 rcu_read_unlock();
 575         }
 576
 577         mutex_lock(device->state_mutex);
 578
 579         mask.i = 0; mask.role = R_MASK;
 580         val.i  = 0; val.role  = new_role;
 581
 582         while (try++ < max_tries) {
 583                 rv = _drbd_request_state(device, mask, val, CS_WAIT_COMPLETE);
 584
 585                 /* in case we first succeeded to outdate,
 586                  * but now suddenly could establish a connection */
 587                 if (rv == SS_CW_FAILED_BY_PEER && mask.pdsk != 0) {
 588                         val.pdsk = 0;
 589                         mask.pdsk = 0;
 590                         continue;
 591                 }
 592
 593                 if (rv == SS_NO_UP_TO_DATE_DISK && force &&
 594                     (device->state.disk < D_UP_TO_DATE &&
 595                      device->state.disk >= D_INCONSISTENT)) {
 596                         mask.disk = D_MASK;
 597                         val.disk  = D_UP_TO_DATE;
 598                         forced = 1;
 599                         continue;
 600                 }
 601
 602                 if (rv == SS_NO_UP_TO_DATE_DISK &&
 603                     device->state.disk == D_CONSISTENT && mask.pdsk == 0) {
 604                         D_ASSERT(device, device->state.pdsk == D_UNKNOWN);
 605
 606                         if (conn_try_outdate_peer(connection)) {
 607                                 val.disk = D_UP_TO_DATE;
 608                                 mask.disk = D_MASK;
 609                         }
 610                         continue;
 611                 }
 612
 613                 if (rv == SS_NOTHING_TO_DO)
 614                         goto out;
 615                 if (rv == SS_PRIMARY_NOP && mask.pdsk == 0) {
 616                         if (!conn_try_outdate_peer(connection) && force) {
 617                                 drbd_warn(device, "Forced into split brain situation!\n");
 618                                 mask.pdsk = D_MASK;
 619                                 val.pdsk  = D_OUTDATED;
 620
 621                         }
 622                         continue;
 623                 }
 624                 if (rv == SS_TWO_PRIMARIES) {
 625                         /* Maybe the peer is detected as dead very soon...
 626                            retry at most once more in this case. */
 627                         int timeo;
 628                         rcu_read_lock();
 629                         nc = rcu_dereference(connection->net_conf);
 630                         timeo = nc ? (nc->ping_timeo + 1) * HZ / 10 : 1;
 631                         rcu_read_unlock();
 632                         schedule_timeout_interruptible(timeo);
 633                         if (try < max_tries)
 634                                 try = max_tries - 1;
 635                         continue;
 636                 }
 637                 if (rv < SS_SUCCESS) {
 638                         rv = _drbd_request_state(device, mask, val,
 639                                                 CS_VERBOSE + CS_WAIT_COMPLETE);
 640                         if (rv < SS_SUCCESS)
 641                                 goto out;
 642                 }
 643                 break;
 644         }
 645
 646         if (rv < SS_SUCCESS)
 647                 goto out;
 648
 649         if (forced)
 650                 drbd_warn(device, "Forced to consider local data as UpToDate!\n");
 651
 652         /* Wait until nothing is on the fly :) */
 653         wait_event(device->misc_wait, atomic_read(&device->ap_pending_cnt) == 0);
 654
 655         /* FIXME also wait for all pending P_BARRIER_ACK? */
 656
 657         if (new_role == R_SECONDARY) {
 658                 if (get_ldev(device)) {
 659                         device->ldev->md.uuid[UI_CURRENT] &= ~(u64)1;
 660                         put_ldev(device);
 661                 }
 662         } else {
 663                 mutex_lock(&device->resource->conf_update);
 664                 nc = connection->net_conf;
 665                 if (nc)
 666                         nc->discard_my_data = 0; /* without copy; single bit op is atomic */
 667                 mutex_unlock(&device->resource->conf_update);
 668
 669                 if (get_ldev(device)) {
 670                         if (((device->state.conn < C_CONNECTED ||
 671                                device->state.pdsk <= D_FAILED)
 672                               && device->ldev->md.uuid[UI_BITMAP] == 0) || forced)
 673                                 drbd_uuid_new_current(device);
 674
 675                         device->ldev->md.uuid[UI_CURRENT] |=  (u64)1;
 676                         put_ldev(device);
 677                 }
 678         }
 679
 680         /* writeout of activity log covered areas of the bitmap
 681          * to stable storage done in after state change already */
 682
 683         if (device->state.conn >= C_WF_REPORT_PARAMS) {
 684                 /* if this was forced, we should consider sync */
 685                 if (forced)
 686                         drbd_send_uuids(peer_device);
 687                 drbd_send_current_state(peer_device);
 688         }
 689
 690         drbd_md_sync(device);
 691         set_disk_ro(device->vdisk, new_role == R_SECONDARY);
 692         kobject_uevent(&disk_to_dev(device->vdisk)->kobj, KOBJ_CHANGE);
 693 out:
 694         mutex_unlock(device->state_mutex);
 695         return rv;
 696 }
 697
 698 static const char *from_attrs_err_to_txt(int err)
 699 {
 700         return  err == -ENOMSG ? "required attribute missing" :
 701                 err == -EOPNOTSUPP ? "unknown mandatory attribute" :
 702                 err == -EEXIST ? "can not change invariant setting" :
 703                 "invalid attribute value";
 704 }
 705
 706 int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info)
 707 {
 708         struct drbd_config_context adm_ctx;
 709         struct set_role_parms parms;
 710         int err;
 711         enum drbd_ret_code retcode;
 712
 713         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
 714         if (!adm_ctx.reply_skb)
 715                 return retcode;
 716         if (retcode != NO_ERROR)
 717                 goto out;
 718
 719         memset(&parms, 0, sizeof(parms));
 720         if (info->attrs[DRBD_NLA_SET_ROLE_PARMS]) {
 721                 err = set_role_parms_from_attrs(&parms, info);
 722                 if (err) {
 723                         retcode = ERR_MANDATORY_TAG;
 724                         drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
 725                         goto out;
 726                 }
 727         }
 728         genl_unlock();
 729         mutex_lock(&adm_ctx.resource->adm_mutex);
 730
 731         if (info->genlhdr->cmd == DRBD_ADM_PRIMARY)
 732                 retcode = drbd_set_role(adm_ctx.device, R_PRIMARY, parms.assume_uptodate);
 733         else
 734                 retcode = drbd_set_role(adm_ctx.device, R_SECONDARY, 0);
 735
 736         mutex_unlock(&adm_ctx.resource->adm_mutex);
 737         genl_lock();
 738 out:
 739         drbd_adm_finish(&adm_ctx, info, retcode);
 740         return 0;
 741 }
 742
 743 /* Initializes the md.*_offset members, so we are able to find
 744  * the on disk meta data.
 745  *
 746  * We currently have two possible layouts:
 747  * external:
 748  *   |----------- md_size_sect ------------------|
 749  *   [ 4k superblock ][ activity log ][  Bitmap  ]
 750  *   | al_offset == 8 |
 751  *   | bm_offset = al_offset + X      |
 752  *  ==> bitmap sectors = md_size_sect - bm_offset
 753  *
 754  * internal:
 755  *            |----------- md_size_sect ------------------|
 756  * [data.....][  Bitmap  ][ activity log ][ 4k superblock ]
 757  *                        | al_offset < 0 |
 758  *            | bm_offset = al_offset - Y |
 759  *  ==> bitmap sectors = Y = al_offset - bm_offset
 760  *
 761  *  Activity log size used to be fixed 32kB,
 762  *  but is about to become configurable.
 763  */
 764 static void drbd_md_set_sector_offsets(struct drbd_device *device,
 765                                        struct drbd_backing_dev *bdev)
 766 {
 767         sector_t md_size_sect = 0;
 768         unsigned int al_size_sect = bdev->md.al_size_4k * 8;
 769
 770         bdev->md.md_offset = drbd_md_ss(bdev);
 771
 772         switch (bdev->md.meta_dev_idx) {
 773         default:
 774                 /* v07 style fixed size indexed meta data */
 775                 bdev->md.md_size_sect = MD_128MB_SECT;
 776                 bdev->md.al_offset = MD_4kB_SECT;
 777                 bdev->md.bm_offset = MD_4kB_SECT + al_size_sect;
 778                 break;
 779         case DRBD_MD_INDEX_FLEX_EXT:
 780                 /* just occupy the full device; unit: sectors */
 781                 bdev->md.md_size_sect = drbd_get_capacity(bdev->md_bdev);
 782                 bdev->md.al_offset = MD_4kB_SECT;
 783                 bdev->md.bm_offset = MD_4kB_SECT + al_size_sect;
 784                 break;
 785         case DRBD_MD_INDEX_INTERNAL:
 786         case DRBD_MD_INDEX_FLEX_INT:
 787                 /* al size is still fixed */
 788                 bdev->md.al_offset = -al_size_sect;
 789                 /* we need (slightly less than) ~ this much bitmap sectors: */
 790                 md_size_sect = drbd_get_capacity(bdev->backing_bdev);
 791                 md_size_sect = ALIGN(md_size_sect, BM_SECT_PER_EXT);
 792                 md_size_sect = BM_SECT_TO_EXT(md_size_sect);
 793                 md_size_sect = ALIGN(md_size_sect, 8);
 794
 795                 /* plus the "drbd meta data super block",
 796                  * and the activity log; */
 797                 md_size_sect += MD_4kB_SECT + al_size_sect;
 798
 799                 bdev->md.md_size_sect = md_size_sect;
 800                 /* bitmap offset is adjusted by 'super' block size */
 801                 bdev->md.bm_offset   = -md_size_sect + MD_4kB_SECT;
 802                 break;
 803         }
 804 }
 805
 806 /* input size is expected to be in KB */
 807 char *ppsize(char *buf, unsigned long long size)
 808 {
 809         /* Needs 9 bytes at max including trailing NUL:
 810          * -1ULL ==> "16384 EB" */
 811         static char units[] = { 'K', 'M', 'G', 'T', 'P', 'E' };
 812         int base = 0;
 813         while (size >= 10000 && base < sizeof(units)-1) {
 814                 /* shift + round */
 815                 size = (size >> 10) + !!(size & (1<<9));
 816                 base++;
 817         }
 818         sprintf(buf, "%u %cB", (unsigned)size, units[base]);
 819
 820         return buf;
 821 }
 822
 823 /* there is still a theoretical deadlock when called from receiver
 824  * on an D_INCONSISTENT R_PRIMARY:
 825  *  remote READ does inc_ap_bio, receiver would need to receive answer
 826  *  packet from remote to dec_ap_bio again.
 827  *  receiver receive_sizes(), comes here,
 828  *  waits for ap_bio_cnt == 0. -> deadlock.
 829  * but this cannot happen, actually, because:
 830  *  R_PRIMARY D_INCONSISTENT, and peer's disk is unreachable
 831  *  (not connected, or bad/no disk on peer):
 832  *  see drbd_fail_request_early, ap_bio_cnt is zero.
 833  *  R_PRIMARY D_INCONSISTENT, and C_SYNC_TARGET:
 834  *  peer may not initiate a resize.
 835  */
 836 /* Note these are not to be confused with
 837  * drbd_adm_suspend_io/drbd_adm_resume_io,
 838  * which are (sub) state changes triggered by admin (drbdsetup),
 839  * and can be long lived.
 840  * This changes an device->flag, is triggered by drbd internals,
 841  * and should be short-lived. */
 842 void drbd_suspend_io(struct drbd_device *device)
 843 {
 844         set_bit(SUSPEND_IO, &device->flags);
 845         if (drbd_suspended(device))
 846                 return;
 847         wait_event(device->misc_wait, !atomic_read(&device->ap_bio_cnt));
 848 }
 849
 850 void drbd_resume_io(struct drbd_device *device)
 851 {
 852         clear_bit(SUSPEND_IO, &device->flags);
 853         wake_up(&device->misc_wait);
 854 }
 855
 856 /**
 857  * drbd_determine_dev_size() -  Sets the right device size obeying all constraints
 858  * @device:     DRBD device.
 859  *
 860  * Returns 0 on success, negative return values indicate errors.
 861  * You should call drbd_md_sync() after calling this function.
 862  */
 863 enum determine_dev_size
 864 drbd_determine_dev_size(struct drbd_device *device, enum dds_flags flags, struct resize_parms *rs) __must_hold(local)
 865 {
 866         sector_t prev_first_sect, prev_size; /* previous meta location */
 867         sector_t la_size_sect, u_size;
 868         struct drbd_md *md = &device->ldev->md;
 869         u32 prev_al_stripe_size_4k;
 870         u32 prev_al_stripes;
 871         sector_t size;
 872         char ppb[10];
 873         void *buffer;
 874
 875         int md_moved, la_size_changed;
 876         enum determine_dev_size rv = DS_UNCHANGED;
 877
 878         /* race:
 879          * application request passes inc_ap_bio,
 880          * but then cannot get an AL-reference.
 881          * this function later may wait on ap_bio_cnt == 0. -> deadlock.
 882          *
 883          * to avoid that:
 884          * Suspend IO right here.
 885          * still lock the act_log to not trigger ASSERTs there.
 886          */
 887         drbd_suspend_io(device);
 888         buffer = drbd_md_get_buffer(device); /* Lock meta-data IO */
 889         if (!buffer) {
 890                 drbd_resume_io(device);
 891                 return DS_ERROR;
 892         }
 893
 894         /* no wait necessary anymore, actually we could assert that */
 895         wait_event(device->al_wait, lc_try_lock(device->act_log));
 896
 897         prev_first_sect = drbd_md_first_sector(device->ldev);
 898         prev_size = device->ldev->md.md_size_sect;
 899         la_size_sect = device->ldev->md.la_size_sect;
 900
 901         if (rs) {
 902                 /* rs is non NULL if we should change the AL layout only */
 903
 904                 prev_al_stripes = md->al_stripes;
 905                 prev_al_stripe_size_4k = md->al_stripe_size_4k;
 906
 907                 md->al_stripes = rs->al_stripes;
 908                 md->al_stripe_size_4k = rs->al_stripe_size / 4;
 909                 md->al_size_4k = (u64)rs->al_stripes * rs->al_stripe_size / 4;
 910         }
 911
 912         drbd_md_set_sector_offsets(device, device->ldev);
 913
 914         rcu_read_lock();
 915         u_size = rcu_dereference(device->ldev->disk_conf)->disk_size;
 916         rcu_read_unlock();
 917         size = drbd_new_dev_size(device, device->ldev, u_size, flags & DDSF_FORCED);
 918
 919         if (size < la_size_sect) {
 920                 if (rs && u_size == 0) {
 921                         /* Remove "rs &&" later. This check should always be active, but
 922                            right now the receiver expects the permissive behavior */
 923                         drbd_warn(device, "Implicit shrink not allowed. "
 924                                  "Use --size=%llus for explicit shrink.\n",
 925                                  (unsigned long long)size);
 926                         rv = DS_ERROR_SHRINK;
 927                 }
 928                 if (u_size > size)
 929                         rv = DS_ERROR_SPACE_MD;
 930                 if (rv != DS_UNCHANGED)
 931                         goto err_out;
 932         }
 933
 934         if (drbd_get_capacity(device->this_bdev) != size ||
 935             drbd_bm_capacity(device) != size) {
 936                 int err;
 937                 err = drbd_bm_resize(device, size, !(flags & DDSF_NO_RESYNC));
 938                 if (unlikely(err)) {
 939                         /* currently there is only one error: ENOMEM! */
 940                         size = drbd_bm_capacity(device)>>1;
 941                         if (size == 0) {
 942                                 drbd_err(device, "OUT OF MEMORY! "
 943                                     "Could not allocate bitmap!\n");
 944                         } else {
 945                                 drbd_err(device, "BM resizing failed. "
 946                                     "Leaving size unchanged at size = %lu KB\n",
 947                                     (unsigned long)size);
 948                         }
 949                         rv = DS_ERROR;
 950                 }
 951                 /* racy, see comments above. */
 952                 drbd_set_my_capacity(device, size);
 953                 device->ldev->md.la_size_sect = size;
 954                 drbd_info(device, "size = %s (%llu KB)\n", ppsize(ppb, size>>1),
 955                      (unsigned long long)size>>1);
 956         }
 957         if (rv <= DS_ERROR)
 958                 goto err_out;
 959
 960         la_size_changed = (la_size_sect != device->ldev->md.la_size_sect);
 961
 962         md_moved = prev_first_sect != drbd_md_first_sector(device->ldev)
 963                 || prev_size       != device->ldev->md.md_size_sect;
 964
 965         if (la_size_changed || md_moved || rs) {
 966                 u32 prev_flags;
 967
 968                 /* We do some synchronous IO below, which may take some time.
 969                  * Clear the timer, to avoid scary "timer expired!" messages,
 970                  * "Superblock" is written out at least twice below, anyways. */
 971                 del_timer(&device->md_sync_timer);
 972                 drbd_al_shrink(device); /* All extents inactive. */
 973
 974                 prev_flags = md->flags;
 975                 md->flags &= ~MDF_PRIMARY_IND;
 976                 drbd_md_write(device, buffer);
 977
 978                 drbd_info(device, "Writing the whole bitmap, %s\n",
 979                          la_size_changed && md_moved ? "size changed and md moved" :
 980                          la_size_changed ? "size changed" : "md moved");
 981                 /* next line implicitly does drbd_suspend_io()+drbd_resume_io() */
 982                 drbd_bitmap_io(device, md_moved ? &drbd_bm_write_all : &drbd_bm_write,
 983                                "size changed", BM_LOCKED_MASK);
 984                 drbd_initialize_al(device, buffer);
 985
 986                 md->flags = prev_flags;
 987                 drbd_md_write(device, buffer);
 988
 989                 if (rs)
 990                         drbd_info(device, "Changed AL layout to al-stripes = %d, al-stripe-size-kB = %d\n",
 991                                   md->al_stripes, md->al_stripe_size_4k * 4);
 992         }
 993
 994         if (size > la_size_sect)
 995                 rv = la_size_sect ? DS_GREW : DS_GREW_FROM_ZERO;
 996         if (size < la_size_sect)
 997                 rv = DS_SHRUNK;
 998
 999         if (0) {
1000         err_out:
1001                 if (rs) {
1002                         md->al_stripes = prev_al_stripes;
1003                         md->al_stripe_size_4k = prev_al_stripe_size_4k;
1004                         md->al_size_4k = (u64)prev_al_stripes * prev_al_stripe_size_4k;
1005
1006                         drbd_md_set_sector_offsets(device, device->ldev);
1007                 }
1008         }
1009         lc_unlock(device->act_log);
1010         wake_up(&device->al_wait);
1011         drbd_md_put_buffer(device);
1012         drbd_resume_io(device);
1013
1014         return rv;
1015 }
1016
1017 sector_t
1018 drbd_new_dev_size(struct drbd_device *device, struct drbd_backing_dev *bdev,
1019                   sector_t u_size, int assume_peer_has_space)
1020 {
1021         sector_t p_size = device->p_size;   /* partner's disk size. */
1022         sector_t la_size_sect = bdev->md.la_size_sect; /* last agreed size. */
1023         sector_t m_size; /* my size */
1024         sector_t size = 0;
1025
1026         m_size = drbd_get_max_capacity(bdev);
1027
1028         if (device->state.conn < C_CONNECTED && assume_peer_has_space) {
1029                 drbd_warn(device, "Resize while not connected was forced by the user!\n");
1030                 p_size = m_size;
1031         }
1032
1033         if (p_size && m_size) {
1034                 size = min_t(sector_t, p_size, m_size);
1035         } else {
1036                 if (la_size_sect) {
1037                         size = la_size_sect;
1038                         if (m_size && m_size < size)
1039                                 size = m_size;
1040                         if (p_size && p_size < size)
1041                                 size = p_size;
1042                 } else {
1043                         if (m_size)
1044                                 size = m_size;
1045                         if (p_size)
1046                                 size = p_size;
1047                 }
1048         }
1049
1050         if (size == 0)
1051                 drbd_err(device, "Both nodes diskless!\n");
1052
1053         if (u_size) {
1054                 if (u_size > size)
1055                         drbd_err(device, "Requested disk size is too big (%lu > %lu)\n",
1056                             (unsigned long)u_size>>1, (unsigned long)size>>1);
1057                 else
1058                         size = u_size;
1059         }
1060
1061         return size;
1062 }
1063
1064 /**
1065  * drbd_check_al_size() - Ensures that the AL is of the right size
1066  * @device:     DRBD device.
1067  *
1068  * Returns -EBUSY if current al lru is still used, -ENOMEM when allocation
1069  * failed, and 0 on success. You should call drbd_md_sync() after you called
1070  * this function.
1071  */
1072 static int drbd_check_al_size(struct drbd_device *device, struct disk_conf *dc)
1073 {
1074         struct lru_cache *n, *t;
1075         struct lc_element *e;
1076         unsigned int in_use;
1077         int i;
1078
1079         if (device->act_log &&
1080             device->act_log->nr_elements == dc->al_extents)
1081                 return 0;
1082
1083         in_use = 0;
1084         t = device->act_log;
1085         n = lc_create("act_log", drbd_al_ext_cache, AL_UPDATES_PER_TRANSACTION,
1086                 dc->al_extents, sizeof(struct lc_element), 0);
1087
1088         if (n == NULL) {
1089                 drbd_err(device, "Cannot allocate act_log lru!\n");
1090                 return -ENOMEM;
1091         }
1092         spin_lock_irq(&device->al_lock);
1093         if (t) {
1094                 for (i = 0; i < t->nr_elements; i++) {
1095                         e = lc_element_by_index(t, i);
1096                         if (e->refcnt)
1097                                 drbd_err(device, "refcnt(%d)==%d\n",
1098                                     e->lc_number, e->refcnt);
1099                         in_use += e->refcnt;
1100                 }
1101         }
1102         if (!in_use)
1103                 device->act_log = n;
1104         spin_unlock_irq(&device->al_lock);
1105         if (in_use) {
1106                 drbd_err(device, "Activity log still in use!\n");
1107                 lc_destroy(n);
1108                 return -EBUSY;
1109         } else {
1110                 if (t)
1111                         lc_destroy(t);
1112         }
1113         drbd_md_mark_dirty(device); /* we changed device->act_log->nr_elemens */
1114         return 0;
1115 }
1116
1117 static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backing_dev *bdev,
1118                                    unsigned int max_bio_size)
1119 {
1120         struct request_queue * const q = device->rq_queue;
1121         unsigned int max_hw_sectors = max_bio_size >> 9;
1122         unsigned int max_segments = 0;
1123         struct request_queue *b = NULL;
1124
1125         if (bdev) {
1126                 b = bdev->backing_bdev->bd_disk->queue;
1127
1128                 max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9);
1129                 rcu_read_lock();
1130                 max_segments = rcu_dereference(device->ldev->disk_conf)->max_bio_bvecs;
1131                 rcu_read_unlock();
1132
1133                 blk_set_stacking_limits(&q->limits);
1134                 blk_queue_max_write_same_sectors(q, 0);
1135         }
1136
1137         blk_queue_logical_block_size(q, 512);
1138         blk_queue_max_hw_sectors(q, max_hw_sectors);
1139         /* This is the workaround for "bio would need to, but cannot, be split" */
1140         blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS);
1141         blk_queue_segment_boundary(q, PAGE_CACHE_SIZE-1);
1142
1143         if (b) {
1144                 struct drbd_connection *connection = first_peer_device(device)->connection;
1145
1146                 if (blk_queue_discard(b) &&
1147                     (connection->cstate < C_CONNECTED || connection->agreed_features & FF_TRIM)) {
1148                         /* For now, don't allow more than one activity log extent worth of data
1149                          * to be discarded in one go. We may need to rework drbd_al_begin_io()
1150                          * to allow for even larger discard ranges */
1151                         q->limits.max_discard_sectors = DRBD_MAX_DISCARD_SECTORS;
1152
1153                         queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
1154                         /* REALLY? Is stacking secdiscard "legal"? */
1155                         if (blk_queue_secdiscard(b))
1156                                 queue_flag_set_unlocked(QUEUE_FLAG_SECDISCARD, q);
1157                 } else {
1158                         q->limits.max_discard_sectors = 0;
1159                         queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q);
1160                         queue_flag_clear_unlocked(QUEUE_FLAG_SECDISCARD, q);
1161                 }
1162
1163                 blk_queue_stack_limits(q, b);
1164
1165                 if (q->backing_dev_info.ra_pages != b->backing_dev_info.ra_pages) {
1166                         drbd_info(device, "Adjusting my ra_pages to backing device's (%lu -> %lu)\n",
1167                                  q->backing_dev_info.ra_pages,
1168                                  b->backing_dev_info.ra_pages);
1169                         q->backing_dev_info.ra_pages = b->backing_dev_info.ra_pages;
1170                 }
1171         }
1172 }
1173
1174 void drbd_reconsider_max_bio_size(struct drbd_device *device, struct drbd_backing_dev *bdev)
1175 {
1176         unsigned int now, new, local, peer;
1177
1178         now = queue_max_hw_sectors(device->rq_queue) << 9;
1179         local = device->local_max_bio_size; /* Eventually last known value, from volatile memory */
1180         peer = device->peer_max_bio_size; /* Eventually last known value, from meta data */
1181
1182         if (bdev) {
1183                 local = queue_max_hw_sectors(bdev->backing_bdev->bd_disk->queue) << 9;
1184                 device->local_max_bio_size = local;
1185         }
1186         local = min(local, DRBD_MAX_BIO_SIZE);
1187
1188         /* We may ignore peer limits if the peer is modern enough.
1189            Because new from 8.3.8 onwards the peer can use multiple
1190            BIOs for a single peer_request */
1191         if (device->state.conn >= C_WF_REPORT_PARAMS) {
1192                 if (first_peer_device(device)->connection->agreed_pro_version < 94)
1193                         peer = min(device->peer_max_bio_size, DRBD_MAX_SIZE_H80_PACKET);
1194                         /* Correct old drbd (up to 8.3.7) if it believes it can do more than 32KiB */
1195                 else if (first_peer_device(device)->connection->agreed_pro_version == 94)
1196                         peer = DRBD_MAX_SIZE_H80_PACKET;
1197                 else if (first_peer_device(device)->connection->agreed_pro_version < 100)
1198                         peer = DRBD_MAX_BIO_SIZE_P95;  /* drbd 8.3.8 onwards, before 8.4.0 */
1199                 else
1200                         peer = DRBD_MAX_BIO_SIZE;
1201
1202                 /* We may later detach and re-attach on a disconnected Primary.
1203                  * Avoid this setting to jump back in that case.
1204                  * We want to store what we know the peer DRBD can handle,
1205                  * not what the peer IO backend can handle. */
1206                 if (peer > device->peer_max_bio_size)
1207                         device->peer_max_bio_size = peer;
1208         }
1209         new = min(local, peer);
1210
1211         if (device->state.role == R_PRIMARY && new < now)
1212                 drbd_err(device, "ASSERT FAILED new < now; (%u < %u)\n", new, now);
1213
1214         if (new != now)
1215                 drbd_info(device, "max BIO size = %u\n", new);
1216
1217         drbd_setup_queue_param(device, bdev, new);
1218 }
1219
1220 /* Starts the worker thread */
1221 static void conn_reconfig_start(struct drbd_connection *connection)
1222 {
1223         drbd_thread_start(&connection->worker);
1224         drbd_flush_workqueue(&connection->sender_work);
1225 }
1226
1227 /* if still unconfigured, stops worker again. */
1228 static void conn_reconfig_done(struct drbd_connection *connection)
1229 {
1230         bool stop_threads;
1231         spin_lock_irq(&connection->resource->req_lock);
1232         stop_threads = conn_all_vols_unconf(connection) &&
1233                 connection->cstate == C_STANDALONE;
1234         spin_unlock_irq(&connection->resource->req_lock);
1235         if (stop_threads) {
1236                 /* asender is implicitly stopped by receiver
1237                  * in conn_disconnect() */
1238                 drbd_thread_stop(&connection->receiver);
1239                 drbd_thread_stop(&connection->worker);
1240         }
1241 }
1242
1243 /* Make sure IO is suspended before calling this function(). */
1244 static void drbd_suspend_al(struct drbd_device *device)
1245 {
1246         int s = 0;
1247
1248         if (!lc_try_lock(device->act_log)) {
1249                 drbd_warn(device, "Failed to lock al in drbd_suspend_al()\n");
1250                 return;
1251         }
1252
1253         drbd_al_shrink(device);
1254         spin_lock_irq(&device->resource->req_lock);
1255         if (device->state.conn < C_CONNECTED)
1256                 s = !test_and_set_bit(AL_SUSPENDED, &device->flags);
1257         spin_unlock_irq(&device->resource->req_lock);
1258         lc_unlock(device->act_log);
1259
1260         if (s)
1261                 drbd_info(device, "Suspended AL updates\n");
1262 }
1263
1264
1265 static bool should_set_defaults(struct genl_info *info)
1266 {
1267         unsigned flags = ((struct drbd_genlmsghdr*)info->userhdr)->flags;
1268         return 0 != (flags & DRBD_GENL_F_SET_DEFAULTS);
1269 }
1270
1271 static unsigned int drbd_al_extents_max(struct drbd_backing_dev *bdev)
1272 {
1273         /* This is limited by 16 bit "slot" numbers,
1274          * and by available on-disk context storage.
1275          *
1276          * Also (u16)~0 is special (denotes a "free" extent).
1277          *
1278          * One transaction occupies one 4kB on-disk block,
1279          * we have n such blocks in the on disk ring buffer,
1280          * the "current" transaction may fail (n-1),
1281          * and there is 919 slot numbers context information per transaction.
1282          *
1283          * 72 transaction blocks amounts to more than 2**16 context slots,
1284          * so cap there first.
1285          */
1286         const unsigned int max_al_nr = DRBD_AL_EXTENTS_MAX;
1287         const unsigned int sufficient_on_disk =
1288                 (max_al_nr + AL_CONTEXT_PER_TRANSACTION -1)
1289                 /AL_CONTEXT_PER_TRANSACTION;
1290
1291         unsigned int al_size_4k = bdev->md.al_size_4k;
1292
1293         if (al_size_4k > sufficient_on_disk)
1294                 return max_al_nr;
1295
1296         return (al_size_4k - 1) * AL_CONTEXT_PER_TRANSACTION;
1297 }
1298
1299 static bool write_ordering_changed(struct disk_conf *a, struct disk_conf *b)
1300 {
1301         return  a->disk_barrier != b->disk_barrier ||
1302                 a->disk_flushes != b->disk_flushes ||
1303                 a->disk_drain != b->disk_drain;
1304 }
1305
1306 int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
1307 {
1308         struct drbd_config_context adm_ctx;
1309         enum drbd_ret_code retcode;
1310         struct drbd_device *device;
1311         struct disk_conf *new_disk_conf, *old_disk_conf;
1312         struct fifo_buffer *old_plan = NULL, *new_plan = NULL;
1313         int err, fifo_size;
1314
1315         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
1316         if (!adm_ctx.reply_skb)
1317                 return retcode;
1318         if (retcode != NO_ERROR)
1319                 goto finish;
1320
1321         device = adm_ctx.device;
1322         mutex_lock(&adm_ctx.resource->adm_mutex);
1323
1324         /* we also need a disk
1325          * to change the options on */
1326         if (!get_ldev(device)) {
1327                 retcode = ERR_NO_DISK;
1328                 goto out;
1329         }
1330
1331         new_disk_conf = kmalloc(sizeof(struct disk_conf), GFP_KERNEL);
1332         if (!new_disk_conf) {
1333                 retcode = ERR_NOMEM;
1334                 goto fail;
1335         }
1336
1337         mutex_lock(&device->resource->conf_update);
1338         old_disk_conf = device->ldev->disk_conf;
1339         *new_disk_conf = *old_disk_conf;
1340         if (should_set_defaults(info))
1341                 set_disk_conf_defaults(new_disk_conf);
1342
1343         err = disk_conf_from_attrs_for_change(new_disk_conf, info);
1344         if (err && err != -ENOMSG) {
1345                 retcode = ERR_MANDATORY_TAG;
1346                 drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
1347                 goto fail_unlock;
1348         }
1349
1350         if (!expect(new_disk_conf->resync_rate >= 1))
1351                 new_disk_conf->resync_rate = 1;
1352
1353         if (new_disk_conf->al_extents < DRBD_AL_EXTENTS_MIN)
1354                 new_disk_conf->al_extents = DRBD_AL_EXTENTS_MIN;
1355         if (new_disk_conf->al_extents > drbd_al_extents_max(device->ldev))
1356                 new_disk_conf->al_extents = drbd_al_extents_max(device->ldev);
1357
1358         if (new_disk_conf->c_plan_ahead > DRBD_C_PLAN_AHEAD_MAX)
1359                 new_disk_conf->c_plan_ahead = DRBD_C_PLAN_AHEAD_MAX;
1360
1361         fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ;
1362         if (fifo_size != device->rs_plan_s->size) {
1363                 new_plan = fifo_alloc(fifo_size);
1364                 if (!new_plan) {
1365                         drbd_err(device, "kmalloc of fifo_buffer failed");
1366                         retcode = ERR_NOMEM;
1367                         goto fail_unlock;
1368                 }
1369         }
1370
1371         drbd_suspend_io(device);
1372         wait_event(device->al_wait, lc_try_lock(device->act_log));
1373         drbd_al_shrink(device);
1374         err = drbd_check_al_size(device, new_disk_conf);
1375         lc_unlock(device->act_log);
1376         wake_up(&device->al_wait);
1377         drbd_resume_io(device);
1378
1379         if (err) {
1380                 retcode = ERR_NOMEM;
1381                 goto fail_unlock;
1382         }
1383
1384         write_lock_irq(&global_state_lock);
1385         retcode = drbd_resync_after_valid(device, new_disk_conf->resync_after);
1386         if (retcode == NO_ERROR) {
1387                 rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
1388                 drbd_resync_after_changed(device);
1389         }
1390         write_unlock_irq(&global_state_lock);
1391
1392         if (retcode != NO_ERROR)
1393                 goto fail_unlock;
1394
1395         if (new_plan) {
1396                 old_plan = device->rs_plan_s;
1397                 rcu_assign_pointer(device->rs_plan_s, new_plan);
1398         }
1399
1400         mutex_unlock(&device->resource->conf_update);
1401
1402         if (new_disk_conf->al_updates)
1403                 device->ldev->md.flags &= ~MDF_AL_DISABLED;
1404         else
1405                 device->ldev->md.flags |= MDF_AL_DISABLED;
1406
1407         if (new_disk_conf->md_flushes)
1408                 clear_bit(MD_NO_FUA, &device->flags);
1409         else
1410                 set_bit(MD_NO_FUA, &device->flags);
1411
1412         if (write_ordering_changed(old_disk_conf, new_disk_conf))
1413                 drbd_bump_write_ordering(device->resource, NULL, WO_bdev_flush);
1414
1415         drbd_md_sync(device);
1416
1417         if (device->state.conn >= C_CONNECTED) {
1418                 struct drbd_peer_device *peer_device;
1419
1420                 for_each_peer_device(peer_device, device)
1421                         drbd_send_sync_param(peer_device);
1422         }
1423
1424         synchronize_rcu();
1425         kfree(old_disk_conf);
1426         kfree(old_plan);
1427         mod_timer(&device->request_timer, jiffies + HZ);
1428         goto success;
1429
1430 fail_unlock:
1431         mutex_unlock(&device->resource->conf_update);
1432  fail:
1433         kfree(new_disk_conf);
1434         kfree(new_plan);
1435 success:
1436         put_ldev(device);
1437  out:
1438         mutex_unlock(&adm_ctx.resource->adm_mutex);
1439  finish:
1440         drbd_adm_finish(&adm_ctx, info, retcode);
1441         return 0;
1442 }
1443
1444 int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
1445 {
1446         struct drbd_config_context adm_ctx;
1447         struct drbd_device *device;
1448         struct drbd_peer_device *peer_device;
1449         struct drbd_connection *connection;
1450         int err;
1451         enum drbd_ret_code retcode;
1452         enum determine_dev_size dd;
1453         sector_t max_possible_sectors;
1454         sector_t min_md_device_sectors;
1455         struct drbd_backing_dev *nbc = NULL; /* new_backing_conf */
1456         struct disk_conf *new_disk_conf = NULL;
1457         struct block_device *bdev;
1458         struct lru_cache *resync_lru = NULL;
1459         struct fifo_buffer *new_plan = NULL;
1460         union drbd_state ns, os;
1461         enum drbd_state_rv rv;
1462         struct net_conf *nc;
1463
1464         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
1465         if (!adm_ctx.reply_skb)
1466                 return retcode;
1467         if (retcode != NO_ERROR)
1468                 goto finish;
1469
1470         device = adm_ctx.device;
1471         mutex_lock(&adm_ctx.resource->adm_mutex);
1472         peer_device = first_peer_device(device);
1473         connection = peer_device ? peer_device->connection : NULL;
1474         conn_reconfig_start(connection);
1475
1476         /* if you want to reconfigure, please tear down first */
1477         if (device->state.disk > D_DISKLESS) {
1478                 retcode = ERR_DISK_CONFIGURED;
1479                 goto fail;
1480         }
1481         /* It may just now have detached because of IO error.  Make sure
1482          * drbd_ldev_destroy is done already, we may end up here very fast,
1483          * e.g. if someone calls attach from the on-io-error handler,
1484          * to realize a "hot spare" feature (not that I'd recommend that) */
1485         wait_event(device->misc_wait, !atomic_read(&device->local_cnt));
1486
1487         /* make sure there is no leftover from previous force-detach attempts */
1488         clear_bit(FORCE_DETACH, &device->flags);
1489         clear_bit(WAS_IO_ERROR, &device->flags);
1490         clear_bit(WAS_READ_ERROR, &device->flags);
1491
1492         /* and no leftover from previously aborted resync or verify, either */
1493         device->rs_total = 0;
1494         device->rs_failed = 0;
1495         atomic_set(&device->rs_pending_cnt, 0);
1496
1497         /* allocation not in the IO path, drbdsetup context */
1498         nbc = kzalloc(sizeof(struct drbd_backing_dev), GFP_KERNEL);
1499         if (!nbc) {
1500                 retcode = ERR_NOMEM;
1501                 goto fail;
1502         }
1503         spin_lock_init(&nbc->md.uuid_lock);
1504
1505         new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
1506         if (!new_disk_conf) {
1507                 retcode = ERR_NOMEM;
1508                 goto fail;
1509         }
1510         nbc->disk_conf = new_disk_conf;
1511
1512         set_disk_conf_defaults(new_disk_conf);
1513         err = disk_conf_from_attrs(new_disk_conf, info);
1514         if (err) {
1515                 retcode = ERR_MANDATORY_TAG;
1516                 drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
1517                 goto fail;
1518         }
1519
1520         if (new_disk_conf->c_plan_ahead > DRBD_C_PLAN_AHEAD_MAX)
1521                 new_disk_conf->c_plan_ahead = DRBD_C_PLAN_AHEAD_MAX;
1522
1523         new_plan = fifo_alloc((new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ);
1524         if (!new_plan) {
1525                 retcode = ERR_NOMEM;
1526                 goto fail;
1527         }
1528
1529         if (new_disk_conf->meta_dev_idx < DRBD_MD_INDEX_FLEX_INT) {
1530                 retcode = ERR_MD_IDX_INVALID;
1531                 goto fail;
1532         }
1533
1534         write_lock_irq(&global_state_lock);
1535         retcode = drbd_resync_after_valid(device, new_disk_conf->resync_after);
1536         write_unlock_irq(&global_state_lock);
1537         if (retcode != NO_ERROR)
1538                 goto fail;
1539
1540         rcu_read_lock();
1541         nc = rcu_dereference(connection->net_conf);
1542         if (nc) {
1543                 if (new_disk_conf->fencing == FP_STONITH && nc->wire_protocol == DRBD_PROT_A) {
1544                         rcu_read_unlock();
1545                         retcode = ERR_STONITH_AND_PROT_A;
1546                         goto fail;
1547                 }
1548         }
1549         rcu_read_unlock();
1550
1551         bdev = blkdev_get_by_path(new_disk_conf->backing_dev,
1552                                   FMODE_READ | FMODE_WRITE | FMODE_EXCL, device);
1553         if (IS_ERR(bdev)) {
1554                 drbd_err(device, "open(\"%s\") failed with %ld\n", new_disk_conf->backing_dev,
1555                         PTR_ERR(bdev));
1556                 retcode = ERR_OPEN_DISK;
1557                 goto fail;
1558         }
1559         nbc->backing_bdev = bdev;
1560
1561         /*
1562          * meta_dev_idx >= 0: external fixed size, possibly multiple
1563          * drbd sharing one meta device.  TODO in that case, paranoia
1564          * check that [md_bdev, meta_dev_idx] is not yet used by some
1565          * other drbd minor!  (if you use drbd.conf + drbdadm, that
1566          * should check it for you already; but if you don't, or
1567          * someone fooled it, we need to double check here)
1568          */
1569         bdev = blkdev_get_by_path(new_disk_conf->meta_dev,
1570                                   FMODE_READ | FMODE_WRITE | FMODE_EXCL,
1571                                   (new_disk_conf->meta_dev_idx < 0) ?
1572                                   (void *)device : (void *)drbd_m_holder);
1573         if (IS_ERR(bdev)) {
1574                 drbd_err(device, "open(\"%s\") failed with %ld\n", new_disk_conf->meta_dev,
1575                         PTR_ERR(bdev));
1576                 retcode = ERR_OPEN_MD_DISK;
1577                 goto fail;
1578         }
1579         nbc->md_bdev = bdev;
1580
1581         if ((nbc->backing_bdev == nbc->md_bdev) !=
1582             (new_disk_conf->meta_dev_idx == DRBD_MD_INDEX_INTERNAL ||
1583              new_disk_conf->meta_dev_idx == DRBD_MD_INDEX_FLEX_INT)) {
1584                 retcode = ERR_MD_IDX_INVALID;
1585                 goto fail;
1586         }
1587
1588         resync_lru = lc_create("resync", drbd_bm_ext_cache,
1589                         1, 61, sizeof(struct bm_extent),
1590                         offsetof(struct bm_extent, lce));
1591         if (!resync_lru) {
1592                 retcode = ERR_NOMEM;
1593                 goto fail;
1594         }
1595
1596         /* Read our meta data super block early.
1597          * This also sets other on-disk offsets. */
1598         retcode = drbd_md_read(device, nbc);
1599         if (retcode != NO_ERROR)
1600                 goto fail;
1601
1602         if (new_disk_conf->al_extents < DRBD_AL_EXTENTS_MIN)
1603                 new_disk_conf->al_extents = DRBD_AL_EXTENTS_MIN;
1604         if (new_disk_conf->al_extents > drbd_al_extents_max(nbc))
1605                 new_disk_conf->al_extents = drbd_al_extents_max(nbc);
1606
1607         if (drbd_get_max_capacity(nbc) < new_disk_conf->disk_size) {
1608                 drbd_err(device, "max capacity %llu smaller than disk size %llu\n",
1609                         (unsigned long long) drbd_get_max_capacity(nbc),
1610                         (unsigned long long) new_disk_conf->disk_size);
1611                 retcode = ERR_DISK_TOO_SMALL;
1612                 goto fail;
1613         }
1614
1615         if (new_disk_conf->meta_dev_idx < 0) {
1616                 max_possible_sectors = DRBD_MAX_SECTORS_FLEX;
1617                 /* at least one MB, otherwise it does not make sense */
1618                 min_md_device_sectors = (2<<10);
1619         } else {
1620                 max_possible_sectors = DRBD_MAX_SECTORS;
1621                 min_md_device_sectors = MD_128MB_SECT * (new_disk_conf->meta_dev_idx + 1);
1622         }
1623
1624         if (drbd_get_capacity(nbc->md_bdev) < min_md_device_sectors) {
1625                 retcode = ERR_MD_DISK_TOO_SMALL;
1626                 drbd_warn(device, "refusing attach: md-device too small, "
1627                      "at least %llu sectors needed for this meta-disk type\n",
1628                      (unsigned long long) min_md_device_sectors);
1629                 goto fail;
1630         }
1631
1632         /* Make sure the new disk is big enough
1633          * (we may currently be R_PRIMARY with no local disk...) */
1634         if (drbd_get_max_capacity(nbc) <
1635             drbd_get_capacity(device->this_bdev)) {
1636                 retcode = ERR_DISK_TOO_SMALL;
1637                 goto fail;
1638         }
1639
1640         nbc->known_size = drbd_get_capacity(nbc->backing_bdev);
1641
1642         if (nbc->known_size > max_possible_sectors) {
1643                 drbd_warn(device, "==> truncating very big lower level device "
1644                         "to currently maximum possible %llu sectors <==\n",
1645                         (unsigned long long) max_possible_sectors);
1646                 if (new_disk_conf->meta_dev_idx >= 0)
1647                         drbd_warn(device, "==>> using internal or flexible "
1648                                       "meta data may help <<==\n");
1649         }
1650
1651         drbd_suspend_io(device);
1652         /* also wait for the last barrier ack. */
1653         /* FIXME see also https://daiquiri.linbit/cgi-bin/bugzilla/show_bug.cgi?id=171
1654          * We need a way to either ignore barrier acks for barriers sent before a device
1655          * was attached, or a way to wait for all pending barrier acks to come in.
1656          * As barriers are counted per resource,
1657          * we'd need to suspend io on all devices of a resource.
1658          */
1659         wait_event(device->misc_wait, !atomic_read(&device->ap_pending_cnt) || drbd_suspended(device));
1660         /* and for any other previously queued work */
1661         drbd_flush_workqueue(&connection->sender_work);
1662
1663         rv = _drbd_request_state(device, NS(disk, D_ATTACHING), CS_VERBOSE);
1664         retcode = rv;  /* FIXME: Type mismatch. */
1665         drbd_resume_io(device);
1666         if (rv < SS_SUCCESS)
1667                 goto fail;
1668
1669         if (!get_ldev_if_state(device, D_ATTACHING))
1670                 goto force_diskless;
1671
1672         if (!device->bitmap) {
1673                 if (drbd_bm_init(device)) {
1674                         retcode = ERR_NOMEM;
1675                         goto force_diskless_dec;
1676                 }
1677         }
1678
1679         if (device->state.conn < C_CONNECTED &&
1680             device->state.role == R_PRIMARY && device->ed_uuid &&
1681             (device->ed_uuid & ~((u64)1)) != (nbc->md.uuid[UI_CURRENT] & ~((u64)1))) {
1682                 drbd_err(device, "Can only attach to data with current UUID=%016llX\n",
1683                     (unsigned long long)device->ed_uuid);
1684                 retcode = ERR_DATA_NOT_CURRENT;
1685                 goto force_diskless_dec;
1686         }
1687
1688         /* Since we are diskless, fix the activity log first... */
1689         if (drbd_check_al_size(device, new_disk_conf)) {
1690                 retcode = ERR_NOMEM;
1691                 goto force_diskless_dec;
1692         }
1693
1694         /* Prevent shrinking of consistent devices ! */
1695         if (drbd_md_test_flag(nbc, MDF_CONSISTENT) &&
1696             drbd_new_dev_size(device, nbc, nbc->disk_conf->disk_size, 0) < nbc->md.la_size_sect) {
1697                 drbd_warn(device, "refusing to truncate a consistent device\n");
1698                 retcode = ERR_DISK_TOO_SMALL;
1699                 goto force_diskless_dec;
1700         }
1701
1702         /* Reset the "barriers don't work" bits here, then force meta data to
1703          * be written, to ensure we determine if barriers are supported. */
1704         if (new_disk_conf->md_flushes)
1705                 clear_bit(MD_NO_FUA, &device->flags);
1706         else
1707                 set_bit(MD_NO_FUA, &device->flags);
1708
1709         /* Point of no return reached.
1710          * Devices and memory are no longer released by error cleanup below.
1711          * now device takes over responsibility, and the state engine should
1712          * clean it up somewhere.  */
1713         D_ASSERT(device, device->ldev == NULL);
1714         device->ldev = nbc;
1715         device->resync = resync_lru;
1716         device->rs_plan_s = new_plan;
1717         nbc = NULL;
1718         resync_lru = NULL;
1719         new_disk_conf = NULL;
1720         new_plan = NULL;
1721
1722         drbd_bump_write_ordering(device->resource, device->ldev, WO_bdev_flush);
1723
1724         if (drbd_md_test_flag(device->ldev, MDF_CRASHED_PRIMARY))
1725                 set_bit(CRASHED_PRIMARY, &device->flags);
1726         else
1727                 clear_bit(CRASHED_PRIMARY, &device->flags);
1728
1729         if (drbd_md_test_flag(device->ldev, MDF_PRIMARY_IND) &&
1730             !(device->state.role == R_PRIMARY && device->resource->susp_nod))
1731                 set_bit(CRASHED_PRIMARY, &device->flags);
1732
1733         device->send_cnt = 0;
1734         device->recv_cnt = 0;
1735         device->read_cnt = 0;
1736         device->writ_cnt = 0;
1737
1738         drbd_reconsider_max_bio_size(device, device->ldev);
1739
1740         /* If I am currently not R_PRIMARY,
1741          * but meta data primary indicator is set,
1742          * I just now recover from a hard crash,
1743          * and have been R_PRIMARY before that crash.
1744          *
1745          * Now, if I had no connection before that crash
1746          * (have been degraded R_PRIMARY), chances are that
1747          * I won't find my peer now either.
1748          *
1749          * In that case, and _only_ in that case,
1750          * we use the degr-wfc-timeout instead of the default,
1751          * so we can automatically recover from a crash of a
1752          * degraded but active "cluster" after a certain timeout.
1753          */
1754         clear_bit(USE_DEGR_WFC_T, &device->flags);
1755         if (device->state.role != R_PRIMARY &&
1756              drbd_md_test_flag(device->ldev, MDF_PRIMARY_IND) &&
1757             !drbd_md_test_flag(device->ldev, MDF_CONNECTED_IND))
1758                 set_bit(USE_DEGR_WFC_T, &device->flags);
1759
1760         dd = drbd_determine_dev_size(device, 0, NULL);
1761         if (dd <= DS_ERROR) {
1762                 retcode = ERR_NOMEM_BITMAP;
1763                 goto force_diskless_dec;
1764         } else if (dd == DS_GREW)
1765                 set_bit(RESYNC_AFTER_NEG, &device->flags);
1766
1767         if (drbd_md_test_flag(device->ldev, MDF_FULL_SYNC) ||
1768             (test_bit(CRASHED_PRIMARY, &device->flags) &&
1769              drbd_md_test_flag(device->ldev, MDF_AL_DISABLED))) {
1770                 drbd_info(device, "Assuming that all blocks are out of sync "
1771                      "(aka FullSync)\n");
1772                 if (drbd_bitmap_io(device, &drbd_bmio_set_n_write,
1773                         "set_n_write from attaching", BM_LOCKED_MASK)) {
1774                         retcode = ERR_IO_MD_DISK;
1775                         goto force_diskless_dec;
1776                 }
1777         } else {
1778                 if (drbd_bitmap_io(device, &drbd_bm_read,
1779                         "read from attaching", BM_LOCKED_MASK)) {
1780                         retcode = ERR_IO_MD_DISK;
1781                         goto force_diskless_dec;
1782                 }
1783         }
1784
1785         if (_drbd_bm_total_weight(device) == drbd_bm_bits(device))
1786                 drbd_suspend_al(device); /* IO is still suspended here... */
1787
1788         spin_lock_irq(&device->resource->req_lock);
1789         os = drbd_read_state(device);
1790         ns = os;
1791         /* If MDF_CONSISTENT is not set go into inconsistent state,
1792            otherwise investigate MDF_WasUpToDate...
1793            If MDF_WAS_UP_TO_DATE is not set go into D_OUTDATED disk state,
1794            otherwise into D_CONSISTENT state.
1795         */
1796         if (drbd_md_test_flag(device->ldev, MDF_CONSISTENT)) {
1797                 if (drbd_md_test_flag(device->ldev, MDF_WAS_UP_TO_DATE))
1798                         ns.disk = D_CONSISTENT;
1799                 else
1800                         ns.disk = D_OUTDATED;
1801         } else {
1802                 ns.disk = D_INCONSISTENT;
1803         }
1804
1805         if (drbd_md_test_flag(device->ldev, MDF_PEER_OUT_DATED))
1806                 ns.pdsk = D_OUTDATED;
1807
1808         rcu_read_lock();
1809         if (ns.disk == D_CONSISTENT &&
1810             (ns.pdsk == D_OUTDATED || rcu_dereference(device->ldev->disk_conf)->fencing == FP_DONT_CARE))
1811                 ns.disk = D_UP_TO_DATE;
1812
1813         /* All tests on MDF_PRIMARY_IND, MDF_CONNECTED_IND,
1814            MDF_CONSISTENT and MDF_WAS_UP_TO_DATE must happen before
1815            this point, because drbd_request_state() modifies these
1816            flags. */
1817
1818         if (rcu_dereference(device->ldev->disk_conf)->al_updates)
1819                 device->ldev->md.flags &= ~MDF_AL_DISABLED;
1820         else
1821                 device->ldev->md.flags |= MDF_AL_DISABLED;
1822
1823         rcu_read_unlock();
1824
1825         /* In case we are C_CONNECTED postpone any decision on the new disk
1826            state after the negotiation phase. */
1827         if (device->state.conn == C_CONNECTED) {
1828                 device->new_state_tmp.i = ns.i;
1829                 ns.i = os.i;
1830                 ns.disk = D_NEGOTIATING;
1831
1832                 /* We expect to receive up-to-date UUIDs soon.
1833                    To avoid a race in receive_state, free p_uuid while
1834                    holding req_lock. I.e. atomic with the state change */
1835                 kfree(device->p_uuid);
1836                 device->p_uuid = NULL;
1837         }
1838
1839         rv = _drbd_set_state(device, ns, CS_VERBOSE, NULL);
1840         spin_unlock_irq(&device->resource->req_lock);
1841
1842         if (rv < SS_SUCCESS)
1843                 goto force_diskless_dec;
1844
1845         mod_timer(&device->request_timer, jiffies + HZ);
1846
1847         if (device->state.role == R_PRIMARY)
1848                 device->ldev->md.uuid[UI_CURRENT] |=  (u64)1;
1849         else
1850                 device->ldev->md.uuid[UI_CURRENT] &= ~(u64)1;
1851
1852         drbd_md_mark_dirty(device);
1853         drbd_md_sync(device);
1854
1855         kobject_uevent(&disk_to_dev(device->vdisk)->kobj, KOBJ_CHANGE);
1856         put_ldev(device);
1857         conn_reconfig_done(connection);
1858         mutex_unlock(&adm_ctx.resource->adm_mutex);
1859         drbd_adm_finish(&adm_ctx, info, retcode);
1860         return 0;
1861
1862  force_diskless_dec:
1863         put_ldev(device);
1864  force_diskless:
1865         drbd_force_state(device, NS(disk, D_DISKLESS));
1866         drbd_md_sync(device);
1867  fail:
1868         conn_reconfig_done(connection);
1869         if (nbc) {
1870                 if (nbc->backing_bdev)
1871                         blkdev_put(nbc->backing_bdev,
1872                                    FMODE_READ | FMODE_WRITE | FMODE_EXCL);
1873                 if (nbc->md_bdev)
1874                         blkdev_put(nbc->md_bdev,
1875                                    FMODE_READ | FMODE_WRITE | FMODE_EXCL);
1876                 kfree(nbc);
1877         }
1878         kfree(new_disk_conf);
1879         lc_destroy(resync_lru);
1880         kfree(new_plan);
1881         mutex_unlock(&adm_ctx.resource->adm_mutex);
1882  finish:
1883         drbd_adm_finish(&adm_ctx, info, retcode);
1884         return 0;
1885 }
1886
1887 static int adm_detach(struct drbd_device *device, int force)
1888 {
1889         enum drbd_state_rv retcode;
1890         int ret;
1891
1892         if (force) {
1893                 set_bit(FORCE_DETACH, &device->flags);
1894                 drbd_force_state(device, NS(disk, D_FAILED));
1895                 retcode = SS_SUCCESS;
1896                 goto out;
1897         }
1898
1899         drbd_suspend_io(device); /* so no-one is stuck in drbd_al_begin_io */
1900         drbd_md_get_buffer(device); /* make sure there is no in-flight meta-data IO */
1901         retcode = drbd_request_state(device, NS(disk, D_FAILED));
1902         drbd_md_put_buffer(device);
1903         /* D_FAILED will transition to DISKLESS. */
1904         ret = wait_event_interruptible(device->misc_wait,
1905                         device->state.disk != D_FAILED);
1906         drbd_resume_io(device);
1907         if ((int)retcode == (int)SS_IS_DISKLESS)
1908                 retcode = SS_NOTHING_TO_DO;
1909         if (ret)
1910                 retcode = ERR_INTR;
1911 out:
1912         return retcode;
1913 }
1914
1915 /* Detaching the disk is a process in multiple stages.  First we need to lock
1916  * out application IO, in-flight IO, IO stuck in drbd_al_begin_io.
1917  * Then we transition to D_DISKLESS, and wait for put_ldev() to return all
1918  * internal references as well.
1919  * Only then we have finally detached. */
1920 int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info)
1921 {
1922         struct drbd_config_context adm_ctx;
1923         enum drbd_ret_code retcode;
1924         struct detach_parms parms = { };
1925         int err;
1926
1927         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
1928         if (!adm_ctx.reply_skb)
1929                 return retcode;
1930         if (retcode != NO_ERROR)
1931                 goto out;
1932
1933         if (info->attrs[DRBD_NLA_DETACH_PARMS]) {
1934                 err = detach_parms_from_attrs(&parms, info);
1935                 if (err) {
1936                         retcode = ERR_MANDATORY_TAG;
1937                         drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
1938                         goto out;
1939                 }
1940         }
1941
1942         mutex_lock(&adm_ctx.resource->adm_mutex);
1943         retcode = adm_detach(adm_ctx.device, parms.force_detach);
1944         mutex_unlock(&adm_ctx.resource->adm_mutex);
1945 out:
1946         drbd_adm_finish(&adm_ctx, info, retcode);
1947         return 0;
1948 }
1949
1950 static bool conn_resync_running(struct drbd_connection *connection)
1951 {
1952         struct drbd_peer_device *peer_device;
1953         bool rv = false;
1954         int vnr;
1955
1956         rcu_read_lock();
1957         idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1958                 struct drbd_device *device = peer_device->device;
1959                 if (device->state.conn == C_SYNC_SOURCE ||
1960                     device->state.conn == C_SYNC_TARGET ||
1961                     device->state.conn == C_PAUSED_SYNC_S ||
1962                     device->state.conn == C_PAUSED_SYNC_T) {
1963                         rv = true;
1964                         break;
1965                 }
1966         }
1967         rcu_read_unlock();
1968
1969         return rv;
1970 }
1971
1972 static bool conn_ov_running(struct drbd_connection *connection)
1973 {
1974         struct drbd_peer_device *peer_device;
1975         bool rv = false;
1976         int vnr;
1977
1978         rcu_read_lock();
1979         idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1980                 struct drbd_device *device = peer_device->device;
1981                 if (device->state.conn == C_VERIFY_S ||
1982                     device->state.conn == C_VERIFY_T) {
1983                         rv = true;
1984                         break;
1985                 }
1986         }
1987         rcu_read_unlock();
1988
1989         return rv;
1990 }
1991
1992 static enum drbd_ret_code
1993 _check_net_options(struct drbd_connection *connection, struct net_conf *old_net_conf, struct net_conf *new_net_conf)
1994 {
1995         struct drbd_peer_device *peer_device;
1996         int i;
1997
1998         if (old_net_conf && connection->cstate == C_WF_REPORT_PARAMS && connection->agreed_pro_version < 100) {
1999                 if (new_net_conf->wire_protocol != old_net_conf->wire_protocol)
2000                         return ERR_NEED_APV_100;
2001
2002                 if (new_net_conf->two_primaries != old_net_conf->two_primaries)
2003                         return ERR_NEED_APV_100;
2004
2005                 if (strcmp(new_net_conf->integrity_alg, old_net_conf->integrity_alg))
2006                         return ERR_NEED_APV_100;
2007         }
2008
2009         if (!new_net_conf->two_primaries &&
2010             conn_highest_role(connection) == R_PRIMARY &&
2011             conn_highest_peer(connection) == R_PRIMARY)
2012                 return ERR_NEED_ALLOW_TWO_PRI;
2013
2014         if (new_net_conf->two_primaries &&
2015             (new_net_conf->wire_protocol != DRBD_PROT_C))
2016                 return ERR_NOT_PROTO_C;
2017
2018         idr_for_each_entry(&connection->peer_devices, peer_device, i) {
2019                 struct drbd_device *device = peer_device->device;
2020                 if (get_ldev(device)) {
2021                         enum drbd_fencing_p fp = rcu_dereference(device->ldev->disk_conf)->fencing;
2022                         put_ldev(device);
2023                         if (new_net_conf->wire_protocol == DRBD_PROT_A && fp == FP_STONITH)
2024                                 return ERR_STONITH_AND_PROT_A;
2025                 }
2026                 if (device->state.role == R_PRIMARY && new_net_conf->discard_my_data)
2027                         return ERR_DISCARD_IMPOSSIBLE;
2028         }
2029
2030         if (new_net_conf->on_congestion != OC_BLOCK && new_net_conf->wire_protocol != DRBD_PROT_A)
2031                 return ERR_CONG_NOT_PROTO_A;
2032
2033         return NO_ERROR;
2034 }
2035
2036 static enum drbd_ret_code
2037 check_net_options(struct drbd_connection *connection, struct net_conf *new_net_conf)
2038 {
2039         static enum drbd_ret_code rv;
2040         struct drbd_peer_device *peer_device;
2041         int i;
2042
2043         rcu_read_lock();
2044         rv = _check_net_options(connection, rcu_dereference(connection->net_conf), new_net_conf);
2045         rcu_read_unlock();
2046
2047         /* connection->volumes protected by genl_lock() here */
2048         idr_for_each_entry(&connection->peer_devices, peer_device, i) {
2049                 struct drbd_device *device = peer_device->device;
2050                 if (!device->bitmap) {
2051                         if (drbd_bm_init(device))
2052                                 return ERR_NOMEM;
2053                 }
2054         }
2055
2056         return rv;
2057 }
2058
2059 struct crypto {
2060         struct crypto_hash *verify_tfm;
2061         struct crypto_hash *csums_tfm;
2062         struct crypto_hash *cram_hmac_tfm;
2063         struct crypto_hash *integrity_tfm;
2064 };
2065
2066 static int
2067 alloc_hash(struct crypto_hash **tfm, char *tfm_name, int err_alg)
2068 {
2069         if (!tfm_name[0])
2070                 return NO_ERROR;
2071
2072         *tfm = crypto_alloc_hash(tfm_name, 0, CRYPTO_ALG_ASYNC);
2073         if (IS_ERR(*tfm)) {
2074                 *tfm = NULL;
2075                 return err_alg;
2076         }
2077
2078         return NO_ERROR;
2079 }
2080
2081 static enum drbd_ret_code
2082 alloc_crypto(struct crypto *crypto, struct net_conf *new_net_conf)
2083 {
2084         char hmac_name[CRYPTO_MAX_ALG_NAME];
2085         enum drbd_ret_code rv;
2086
2087         rv = alloc_hash(&crypto->csums_tfm, new_net_conf->csums_alg,
2088                        ERR_CSUMS_ALG);
2089         if (rv != NO_ERROR)
2090                 return rv;
2091         rv = alloc_hash(&crypto->verify_tfm, new_net_conf->verify_alg,
2092                        ERR_VERIFY_ALG);
2093         if (rv != NO_ERROR)
2094                 return rv;
2095         rv = alloc_hash(&crypto->integrity_tfm, new_net_conf->integrity_alg,
2096                        ERR_INTEGRITY_ALG);
2097         if (rv != NO_ERROR)
2098                 return rv;
2099         if (new_net_conf->cram_hmac_alg[0] != 0) {
2100                 snprintf(hmac_name, CRYPTO_MAX_ALG_NAME, "hmac(%s)",
2101                          new_net_conf->cram_hmac_alg);
2102
2103                 rv = alloc_hash(&crypto->cram_hmac_tfm, hmac_name,
2104                                ERR_AUTH_ALG);
2105         }
2106
2107         return rv;
2108 }
2109
2110 static void free_crypto(struct crypto *crypto)
2111 {
2112         crypto_free_hash(crypto->cram_hmac_tfm);
2113         crypto_free_hash(crypto->integrity_tfm);
2114         crypto_free_hash(crypto->csums_tfm);
2115         crypto_free_hash(crypto->verify_tfm);
2116 }
2117
2118 int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info)
2119 {
2120         struct drbd_config_context adm_ctx;
2121         enum drbd_ret_code retcode;
2122         struct drbd_connection *connection;
2123         struct net_conf *old_net_conf, *new_net_conf = NULL;
2124         int err;
2125         int ovr; /* online verify running */
2126         int rsr; /* re-sync running */
2127         struct crypto crypto = { };
2128
2129         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_CONNECTION);
2130         if (!adm_ctx.reply_skb)
2131                 return retcode;
2132         if (retcode != NO_ERROR)
2133                 goto finish;
2134
2135         connection = adm_ctx.connection;
2136         mutex_lock(&adm_ctx.resource->adm_mutex);
2137
2138         new_net_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL);
2139         if (!new_net_conf) {
2140                 retcode = ERR_NOMEM;
2141                 goto out;
2142         }
2143
2144         conn_reconfig_start(connection);
2145
2146         mutex_lock(&connection->data.mutex);
2147         mutex_lock(&connection->resource->conf_update);
2148         old_net_conf = connection->net_conf;
2149
2150         if (!old_net_conf) {
2151                 drbd_msg_put_info(adm_ctx.reply_skb, "net conf missing, try connect");
2152                 retcode = ERR_INVALID_REQUEST;
2153                 goto fail;
2154         }
2155
2156         *new_net_conf = *old_net_conf;
2157         if (should_set_defaults(info))
2158                 set_net_conf_defaults(new_net_conf);
2159
2160         err = net_conf_from_attrs_for_change(new_net_conf, info);
2161         if (err && err != -ENOMSG) {
2162                 retcode = ERR_MANDATORY_TAG;
2163                 drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
2164                 goto fail;
2165         }
2166
2167         retcode = check_net_options(connection, new_net_conf);
2168         if (retcode != NO_ERROR)
2169                 goto fail;
2170
2171         /* re-sync running */
2172         rsr = conn_resync_running(connection);
2173         if (rsr && strcmp(new_net_conf->csums_alg, old_net_conf->csums_alg)) {
2174                 retcode = ERR_CSUMS_RESYNC_RUNNING;
2175                 goto fail;
2176         }
2177
2178         /* online verify running */
2179         ovr = conn_ov_running(connection);
2180         if (ovr && strcmp(new_net_conf->verify_alg, old_net_conf->verify_alg)) {
2181                 retcode = ERR_VERIFY_RUNNING;
2182                 goto fail;
2183         }
2184
2185         retcode = alloc_crypto(&crypto, new_net_conf);
2186         if (retcode != NO_ERROR)
2187                 goto fail;
2188
2189         rcu_assign_pointer(connection->net_conf, new_net_conf);
2190
2191         if (!rsr) {
2192                 crypto_free_hash(connection->csums_tfm);
2193                 connection->csums_tfm = crypto.csums_tfm;
2194                 crypto.csums_tfm = NULL;
2195         }
2196         if (!ovr) {
2197                 crypto_free_hash(connection->verify_tfm);
2198                 connection->verify_tfm = crypto.verify_tfm;
2199                 crypto.verify_tfm = NULL;
2200         }
2201
2202         crypto_free_hash(connection->integrity_tfm);
2203         connection->integrity_tfm = crypto.integrity_tfm;
2204         if (connection->cstate >= C_WF_REPORT_PARAMS && connection->agreed_pro_version >= 100)
2205                 /* Do this without trying to take connection->data.mutex again.  */
2206                 __drbd_send_protocol(connection, P_PROTOCOL_UPDATE);
2207
2208         crypto_free_hash(connection->cram_hmac_tfm);
2209         connection->cram_hmac_tfm = crypto.cram_hmac_tfm;
2210
2211         mutex_unlock(&connection->resource->conf_update);
2212         mutex_unlock(&connection->data.mutex);
2213         synchronize_rcu();
2214         kfree(old_net_conf);
2215
2216         if (connection->cstate >= C_WF_REPORT_PARAMS) {
2217                 struct drbd_peer_device *peer_device;
2218                 int vnr;
2219
2220                 idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
2221                         drbd_send_sync_param(peer_device);
2222         }
2223
2224         goto done;
2225
2226  fail:
2227         mutex_unlock(&connection->resource->conf_update);
2228         mutex_unlock(&connection->data.mutex);
2229         free_crypto(&crypto);
2230         kfree(new_net_conf);
2231  done:
2232         conn_reconfig_done(connection);
2233  out:
2234         mutex_unlock(&adm_ctx.resource->adm_mutex);
2235  finish:
2236         drbd_adm_finish(&adm_ctx, info, retcode);
2237         return 0;
2238 }
2239
2240 int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
2241 {
2242         struct drbd_config_context adm_ctx;
2243         struct drbd_peer_device *peer_device;
2244         struct net_conf *old_net_conf, *new_net_conf = NULL;
2245         struct crypto crypto = { };
2246         struct drbd_resource *resource;
2247         struct drbd_connection *connection;
2248         enum drbd_ret_code retcode;
2249         int i;
2250         int err;
2251
2252         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
2253
2254         if (!adm_ctx.reply_skb)
2255                 return retcode;
2256         if (retcode != NO_ERROR)
2257                 goto out;
2258         if (!(adm_ctx.my_addr && adm_ctx.peer_addr)) {
2259                 drbd_msg_put_info(adm_ctx.reply_skb, "connection endpoint(s) missing");
2260                 retcode = ERR_INVALID_REQUEST;
2261                 goto out;
2262         }
2263
2264         /* No need for _rcu here. All reconfiguration is
2265          * strictly serialized on genl_lock(). We are protected against
2266          * concurrent reconfiguration/addition/deletion */
2267         for_each_resource(resource, &drbd_resources) {
2268                 for_each_connection(connection, resource) {
2269                         if (nla_len(adm_ctx.my_addr) == connection->my_addr_len &&
2270                             !memcmp(nla_data(adm_ctx.my_addr), &connection->my_addr,
2271                                     connection->my_addr_len)) {
2272                                 retcode = ERR_LOCAL_ADDR;
2273                                 goto out;
2274                         }
2275
2276                         if (nla_len(adm_ctx.peer_addr) == connection->peer_addr_len &&
2277                             !memcmp(nla_data(adm_ctx.peer_addr), &connection->peer_addr,
2278                                     connection->peer_addr_len)) {
2279                                 retcode = ERR_PEER_ADDR;
2280                                 goto out;
2281                         }
2282                 }
2283         }
2284
2285         mutex_lock(&adm_ctx.resource->adm_mutex);
2286         connection = first_connection(adm_ctx.resource);
2287         conn_reconfig_start(connection);
2288
2289         if (connection->cstate > C_STANDALONE) {
2290                 retcode = ERR_NET_CONFIGURED;
2291                 goto fail;
2292         }
2293
2294         /* allocation not in the IO path, drbdsetup / netlink process context */
2295         new_net_conf = kzalloc(sizeof(*new_net_conf), GFP_KERNEL);
2296         if (!new_net_conf) {
2297                 retcode = ERR_NOMEM;
2298                 goto fail;
2299         }
2300
2301         set_net_conf_defaults(new_net_conf);
2302
2303         err = net_conf_from_attrs(new_net_conf, info);
2304         if (err && err != -ENOMSG) {
2305                 retcode = ERR_MANDATORY_TAG;
2306                 drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
2307                 goto fail;
2308         }
2309
2310         retcode = check_net_options(connection, new_net_conf);
2311         if (retcode != NO_ERROR)
2312                 goto fail;
2313
2314         retcode = alloc_crypto(&crypto, new_net_conf);
2315         if (retcode != NO_ERROR)
2316                 goto fail;
2317
2318         ((char *)new_net_conf->shared_secret)[SHARED_SECRET_MAX-1] = 0;
2319
2320         drbd_flush_workqueue(&connection->sender_work);
2321
2322         mutex_lock(&adm_ctx.resource->conf_update);
2323         old_net_conf = connection->net_conf;
2324         if (old_net_conf) {
2325                 retcode = ERR_NET_CONFIGURED;
2326                 mutex_unlock(&adm_ctx.resource->conf_update);
2327                 goto fail;
2328         }
2329         rcu_assign_pointer(connection->net_conf, new_net_conf);
2330
2331         conn_free_crypto(connection);
2332         connection->cram_hmac_tfm = crypto.cram_hmac_tfm;
2333         connection->integrity_tfm = crypto.integrity_tfm;
2334         connection->csums_tfm = crypto.csums_tfm;
2335         connection->verify_tfm = crypto.verify_tfm;
2336
2337         connection->my_addr_len = nla_len(adm_ctx.my_addr);
2338         memcpy(&connection->my_addr, nla_data(adm_ctx.my_addr), connection->my_addr_len);
2339         connection->peer_addr_len = nla_len(adm_ctx.peer_addr);
2340         memcpy(&connection->peer_addr, nla_data(adm_ctx.peer_addr), connection->peer_addr_len);
2341
2342         mutex_unlock(&adm_ctx.resource->conf_update);
2343
2344         rcu_read_lock();
2345         idr_for_each_entry(&connection->peer_devices, peer_device, i) {
2346                 struct drbd_device *device = peer_device->device;
2347                 device->send_cnt = 0;
2348                 device->recv_cnt = 0;
2349         }
2350         rcu_read_unlock();
2351
2352         retcode = conn_request_state(connection, NS(conn, C_UNCONNECTED), CS_VERBOSE);
2353
2354         conn_reconfig_done(connection);
2355         mutex_unlock(&adm_ctx.resource->adm_mutex);
2356         drbd_adm_finish(&adm_ctx, info, retcode);
2357         return 0;
2358
2359 fail:
2360         free_crypto(&crypto);
2361         kfree(new_net_conf);
2362
2363         conn_reconfig_done(connection);
2364         mutex_unlock(&adm_ctx.resource->adm_mutex);
2365 out:
2366         drbd_adm_finish(&adm_ctx, info, retcode);
2367         return 0;
2368 }
2369
2370 static enum drbd_state_rv conn_try_disconnect(struct drbd_connection *connection, bool force)
2371 {
2372         enum drbd_state_rv rv;
2373
2374         rv = conn_request_state(connection, NS(conn, C_DISCONNECTING),
2375                         force ? CS_HARD : 0);
2376
2377         switch (rv) {
2378         case SS_NOTHING_TO_DO:
2379                 break;
2380         case SS_ALREADY_STANDALONE:
2381                 return SS_SUCCESS;
2382         case SS_PRIMARY_NOP:
2383                 /* Our state checking code wants to see the peer outdated. */
2384                 rv = conn_request_state(connection, NS2(conn, C_DISCONNECTING, pdsk, D_OUTDATED), 0);
2385
2386                 if (rv == SS_OUTDATE_WO_CONN) /* lost connection before graceful disconnect succeeded */
2387                         rv = conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_VERBOSE);
2388
2389                 break;
2390         case SS_CW_FAILED_BY_PEER:
2391                 /* The peer probably wants to see us outdated. */
2392                 rv = conn_request_state(connection, NS2(conn, C_DISCONNECTING,
2393                                                         disk, D_OUTDATED), 0);
2394                 if (rv == SS_IS_DISKLESS || rv == SS_LOWER_THAN_OUTDATED) {
2395                         rv = conn_request_state(connection, NS(conn, C_DISCONNECTING),
2396                                         CS_HARD);
2397                 }
2398                 break;
2399         default:;
2400                 /* no special handling necessary */
2401         }
2402
2403         if (rv >= SS_SUCCESS) {
2404                 enum drbd_state_rv rv2;
2405                 /* No one else can reconfigure the network while I am here.
2406                  * The state handling only uses drbd_thread_stop_nowait(),
2407                  * we want to really wait here until the receiver is no more.
2408                  */
2409                 drbd_thread_stop(&connection->receiver);
2410
2411                 /* Race breaker.  This additional state change request may be
2412                  * necessary, if this was a forced disconnect during a receiver
2413                  * restart.  We may have "killed" the receiver thread just
2414                  * after drbd_receiver() returned.  Typically, we should be
2415                  * C_STANDALONE already, now, and this becomes a no-op.
2416                  */
2417                 rv2 = conn_request_state(connection, NS(conn, C_STANDALONE),
2418                                 CS_VERBOSE | CS_HARD);
2419                 if (rv2 < SS_SUCCESS)
2420                         drbd_err(connection,
2421                                 "unexpected rv2=%d in conn_try_disconnect()\n",
2422                                 rv2);
2423         }
2424         return rv;
2425 }
2426
2427 int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info)
2428 {
2429         struct drbd_config_context adm_ctx;
2430         struct disconnect_parms parms;
2431         struct drbd_connection *connection;
2432         enum drbd_state_rv rv;
2433         enum drbd_ret_code retcode;
2434         int err;
2435
2436         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_CONNECTION);
2437         if (!adm_ctx.reply_skb)
2438                 return retcode;
2439         if (retcode != NO_ERROR)
2440                 goto fail;
2441
2442         connection = adm_ctx.connection;
2443         memset(&parms, 0, sizeof(parms));
2444         if (info->attrs[DRBD_NLA_DISCONNECT_PARMS]) {
2445                 err = disconnect_parms_from_attrs(&parms, info);
2446                 if (err) {
2447                         retcode = ERR_MANDATORY_TAG;
2448                         drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
2449                         goto fail;
2450                 }
2451         }
2452
2453         mutex_lock(&adm_ctx.resource->adm_mutex);
2454         rv = conn_try_disconnect(connection, parms.force_disconnect);
2455         if (rv < SS_SUCCESS)
2456                 retcode = rv;  /* FIXME: Type mismatch. */
2457         else
2458                 retcode = NO_ERROR;
2459         mutex_unlock(&adm_ctx.resource->adm_mutex);
2460  fail:
2461         drbd_adm_finish(&adm_ctx, info, retcode);
2462         return 0;
2463 }
2464
2465 void resync_after_online_grow(struct drbd_device *device)
2466 {
2467         int iass; /* I am sync source */
2468
2469         drbd_info(device, "Resync of new storage after online grow\n");
2470         if (device->state.role != device->state.peer)
2471                 iass = (device->state.role == R_PRIMARY);
2472         else
2473                 iass = test_bit(RESOLVE_CONFLICTS, &first_peer_device(device)->connection->flags);
2474
2475         if (iass)
2476                 drbd_start_resync(device, C_SYNC_SOURCE);
2477         else
2478                 _drbd_request_state(device, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE + CS_SERIALIZE);
2479 }
2480
2481 int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
2482 {
2483         struct drbd_config_context adm_ctx;
2484         struct disk_conf *old_disk_conf, *new_disk_conf = NULL;
2485         struct resize_parms rs;
2486         struct drbd_device *device;
2487         enum drbd_ret_code retcode;
2488         enum determine_dev_size dd;
2489         bool change_al_layout = false;
2490         enum dds_flags ddsf;
2491         sector_t u_size;
2492         int err;
2493
2494         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
2495         if (!adm_ctx.reply_skb)
2496                 return retcode;
2497         if (retcode != NO_ERROR)
2498                 goto finish;
2499
2500         mutex_lock(&adm_ctx.resource->adm_mutex);
2501         device = adm_ctx.device;
2502         if (!get_ldev(device)) {
2503                 retcode = ERR_NO_DISK;
2504                 goto fail;
2505         }
2506
2507         memset(&rs, 0, sizeof(struct resize_parms));
2508         rs.al_stripes = device->ldev->md.al_stripes;
2509         rs.al_stripe_size = device->ldev->md.al_stripe_size_4k * 4;
2510         if (info->attrs[DRBD_NLA_RESIZE_PARMS]) {
2511                 err = resize_parms_from_attrs(&rs, info);
2512                 if (err) {
2513                         retcode = ERR_MANDATORY_TAG;
2514                         drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
2515                         goto fail_ldev;
2516                 }
2517         }
2518
2519         if (device->state.conn > C_CONNECTED) {
2520                 retcode = ERR_RESIZE_RESYNC;
2521                 goto fail_ldev;
2522         }
2523
2524         if (device->state.role == R_SECONDARY &&
2525             device->state.peer == R_SECONDARY) {
2526                 retcode = ERR_NO_PRIMARY;
2527                 goto fail_ldev;
2528         }
2529
2530         if (rs.no_resync && first_peer_device(device)->connection->agreed_pro_version < 93) {
2531                 retcode = ERR_NEED_APV_93;
2532                 goto fail_ldev;
2533         }
2534
2535         rcu_read_lock();
2536         u_size = rcu_dereference(device->ldev->disk_conf)->disk_size;
2537         rcu_read_unlock();
2538         if (u_size != (sector_t)rs.resize_size) {
2539                 new_disk_conf = kmalloc(sizeof(struct disk_conf), GFP_KERNEL);
2540                 if (!new_disk_conf) {
2541                         retcode = ERR_NOMEM;
2542                         goto fail_ldev;
2543                 }
2544         }
2545
2546         if (device->ldev->md.al_stripes != rs.al_stripes ||
2547             device->ldev->md.al_stripe_size_4k != rs.al_stripe_size / 4) {
2548                 u32 al_size_k = rs.al_stripes * rs.al_stripe_size;
2549
2550                 if (al_size_k > (16 * 1024 * 1024)) {
2551                         retcode = ERR_MD_LAYOUT_TOO_BIG;
2552                         goto fail_ldev;
2553                 }
2554
2555                 if (al_size_k < MD_32kB_SECT/2) {
2556                         retcode = ERR_MD_LAYOUT_TOO_SMALL;
2557                         goto fail_ldev;
2558                 }
2559
2560                 if (device->state.conn != C_CONNECTED && !rs.resize_force) {
2561                         retcode = ERR_MD_LAYOUT_CONNECTED;
2562                         goto fail_ldev;
2563                 }
2564
2565                 change_al_layout = true;
2566         }
2567
2568         if (device->ldev->known_size != drbd_get_capacity(device->ldev->backing_bdev))
2569                 device->ldev->known_size = drbd_get_capacity(device->ldev->backing_bdev);
2570
2571         if (new_disk_conf) {
2572                 mutex_lock(&device->resource->conf_update);
2573                 old_disk_conf = device->ldev->disk_conf;
2574                 *new_disk_conf = *old_disk_conf;
2575                 new_disk_conf->disk_size = (sector_t)rs.resize_size;
2576                 rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
2577                 mutex_unlock(&device->resource->conf_update);
2578                 synchronize_rcu();
2579                 kfree(old_disk_conf);
2580         }
2581
2582         ddsf = (rs.resize_force ? DDSF_FORCED : 0) | (rs.no_resync ? DDSF_NO_RESYNC : 0);
2583         dd = drbd_determine_dev_size(device, ddsf, change_al_layout ? &rs : NULL);
2584         drbd_md_sync(device);
2585         put_ldev(device);
2586         if (dd == DS_ERROR) {
2587                 retcode = ERR_NOMEM_BITMAP;
2588                 goto fail;
2589         } else if (dd == DS_ERROR_SPACE_MD) {
2590                 retcode = ERR_MD_LAYOUT_NO_FIT;
2591                 goto fail;
2592         } else if (dd == DS_ERROR_SHRINK) {
2593                 retcode = ERR_IMPLICIT_SHRINK;
2594                 goto fail;
2595         }
2596
2597         if (device->state.conn == C_CONNECTED) {
2598                 if (dd == DS_GREW)
2599                         set_bit(RESIZE_PENDING, &device->flags);
2600
2601                 drbd_send_uuids(first_peer_device(device));
2602                 drbd_send_sizes(first_peer_device(device), 1, ddsf);
2603         }
2604
2605  fail:
2606         mutex_unlock(&adm_ctx.resource->adm_mutex);
2607  finish:
2608         drbd_adm_finish(&adm_ctx, info, retcode);
2609         return 0;
2610
2611  fail_ldev:
2612         put_ldev(device);
2613         goto fail;
2614 }
2615
2616 int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info)
2617 {
2618         struct drbd_config_context adm_ctx;
2619         enum drbd_ret_code retcode;
2620         struct res_opts res_opts;
2621         int err;
2622
2623         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
2624         if (!adm_ctx.reply_skb)
2625                 return retcode;
2626         if (retcode != NO_ERROR)
2627                 goto fail;
2628
2629         res_opts = adm_ctx.resource->res_opts;
2630         if (should_set_defaults(info))
2631                 set_res_opts_defaults(&res_opts);
2632
2633         err = res_opts_from_attrs(&res_opts, info);
2634         if (err && err != -ENOMSG) {
2635                 retcode = ERR_MANDATORY_TAG;
2636                 drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
2637                 goto fail;
2638         }
2639
2640         mutex_lock(&adm_ctx.resource->adm_mutex);
2641         err = set_resource_options(adm_ctx.resource, &res_opts);
2642         if (err) {
2643                 retcode = ERR_INVALID_REQUEST;
2644                 if (err == -ENOMEM)
2645                         retcode = ERR_NOMEM;
2646         }
2647         mutex_unlock(&adm_ctx.resource->adm_mutex);
2648
2649 fail:
2650         drbd_adm_finish(&adm_ctx, info, retcode);
2651         return 0;
2652 }
2653
2654 int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info)
2655 {
2656         struct drbd_config_context adm_ctx;
2657         struct drbd_device *device;
2658         int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
2659
2660         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
2661         if (!adm_ctx.reply_skb)
2662                 return retcode;
2663         if (retcode != NO_ERROR)
2664                 goto out;
2665
2666         device = adm_ctx.device;
2667         if (!get_ldev(device)) {
2668                 retcode = ERR_NO_DISK;
2669                 goto out;
2670         }
2671
2672         mutex_lock(&adm_ctx.resource->adm_mutex);
2673
2674         /* If there is still bitmap IO pending, probably because of a previous
2675          * resync just being finished, wait for it before requesting a new resync.
2676          * Also wait for it's after_state_ch(). */
2677         drbd_suspend_io(device);
2678         wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags));
2679         drbd_flush_workqueue(&first_peer_device(device)->connection->sender_work);
2680
2681         /* If we happen to be C_STANDALONE R_SECONDARY, just change to
2682          * D_INCONSISTENT, and set all bits in the bitmap.  Otherwise,
2683          * try to start a resync handshake as sync target for full sync.
2684          */
2685         if (device->state.conn == C_STANDALONE && device->state.role == R_SECONDARY) {
2686                 retcode = drbd_request_state(device, NS(disk, D_INCONSISTENT));
2687                 if (retcode >= SS_SUCCESS) {
2688                         if (drbd_bitmap_io(device, &drbd_bmio_set_n_write,
2689                                 "set_n_write from invalidate", BM_LOCKED_MASK))
2690                                 retcode = ERR_IO_MD_DISK;
2691                 }
2692         } else
2693                 retcode = drbd_request_state(device, NS(conn, C_STARTING_SYNC_T));
2694         drbd_resume_io(device);
2695         mutex_unlock(&adm_ctx.resource->adm_mutex);
2696         put_ldev(device);
2697 out:
2698         drbd_adm_finish(&adm_ctx, info, retcode);
2699         return 0;
2700 }
2701
2702 static int drbd_adm_simple_request_state(struct sk_buff *skb, struct genl_info *info,
2703                 union drbd_state mask, union drbd_state val)
2704 {
2705         struct drbd_config_context adm_ctx;
2706         enum drbd_ret_code retcode;
2707
2708         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
2709         if (!adm_ctx.reply_skb)
2710                 return retcode;
2711         if (retcode != NO_ERROR)
2712                 goto out;
2713
2714         mutex_lock(&adm_ctx.resource->adm_mutex);
2715         retcode = drbd_request_state(adm_ctx.device, mask, val);
2716         mutex_unlock(&adm_ctx.resource->adm_mutex);
2717 out:
2718         drbd_adm_finish(&adm_ctx, info, retcode);
2719         return 0;
2720 }
2721
2722 static int drbd_bmio_set_susp_al(struct drbd_device *device) __must_hold(local)
2723 {
2724         int rv;
2725
2726         rv = drbd_bmio_set_n_write(device);
2727         drbd_suspend_al(device);
2728         return rv;
2729 }
2730
2731 int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info)
2732 {
2733         struct drbd_config_context adm_ctx;
2734         int retcode; /* drbd_ret_code, drbd_state_rv */
2735         struct drbd_device *device;
2736
2737         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
2738         if (!adm_ctx.reply_skb)
2739                 return retcode;
2740         if (retcode != NO_ERROR)
2741                 goto out;
2742
2743         device = adm_ctx.device;
2744         if (!get_ldev(device)) {
2745                 retcode = ERR_NO_DISK;
2746                 goto out;
2747         }
2748
2749         mutex_lock(&adm_ctx.resource->adm_mutex);
2750
2751         /* If there is still bitmap IO pending, probably because of a previous
2752          * resync just being finished, wait for it before requesting a new resync.
2753          * Also wait for it's after_state_ch(). */
2754         drbd_suspend_io(device);
2755         wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags));
2756         drbd_flush_workqueue(&first_peer_device(device)->connection->sender_work);
2757
2758         /* If we happen to be C_STANDALONE R_PRIMARY, just set all bits
2759          * in the bitmap.  Otherwise, try to start a resync handshake
2760          * as sync source for full sync.
2761          */
2762         if (device->state.conn == C_STANDALONE && device->state.role == R_PRIMARY) {
2763                 /* The peer will get a resync upon connect anyways. Just make that
2764                    into a full resync. */
2765                 retcode = drbd_request_state(device, NS(pdsk, D_INCONSISTENT));
2766                 if (retcode >= SS_SUCCESS) {
2767                         if (drbd_bitmap_io(device, &drbd_bmio_set_susp_al,
2768                                 "set_n_write from invalidate_peer",
2769                                 BM_LOCKED_SET_ALLOWED))
2770                                 retcode = ERR_IO_MD_DISK;
2771                 }
2772         } else
2773                 retcode = drbd_request_state(device, NS(conn, C_STARTING_SYNC_S));
2774         drbd_resume_io(device);
2775         mutex_unlock(&adm_ctx.resource->adm_mutex);
2776         put_ldev(device);
2777 out:
2778         drbd_adm_finish(&adm_ctx, info, retcode);
2779         return 0;
2780 }
2781
2782 int drbd_adm_pause_sync(struct sk_buff *skb, struct genl_info *info)
2783 {
2784         struct drbd_config_context adm_ctx;
2785         enum drbd_ret_code retcode;
2786
2787         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
2788         if (!adm_ctx.reply_skb)
2789                 return retcode;
2790         if (retcode != NO_ERROR)
2791                 goto out;
2792
2793         mutex_lock(&adm_ctx.resource->adm_mutex);
2794         if (drbd_request_state(adm_ctx.device, NS(user_isp, 1)) == SS_NOTHING_TO_DO)
2795                 retcode = ERR_PAUSE_IS_SET;
2796         mutex_unlock(&adm_ctx.resource->adm_mutex);
2797 out:
2798         drbd_adm_finish(&adm_ctx, info, retcode);
2799         return 0;
2800 }
2801
2802 int drbd_adm_resume_sync(struct sk_buff *skb, struct genl_info *info)
2803 {
2804         struct drbd_config_context adm_ctx;
2805         union drbd_dev_state s;
2806         enum drbd_ret_code retcode;
2807
2808         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
2809         if (!adm_ctx.reply_skb)
2810                 return retcode;
2811         if (retcode != NO_ERROR)
2812                 goto out;
2813
2814         mutex_lock(&adm_ctx.resource->adm_mutex);
2815         if (drbd_request_state(adm_ctx.device, NS(user_isp, 0)) == SS_NOTHING_TO_DO) {
2816                 s = adm_ctx.device->state;
2817                 if (s.conn == C_PAUSED_SYNC_S || s.conn == C_PAUSED_SYNC_T) {
2818                         retcode = s.aftr_isp ? ERR_PIC_AFTER_DEP :
2819                                   s.peer_isp ? ERR_PIC_PEER_DEP : ERR_PAUSE_IS_CLEAR;
2820                 } else {
2821                         retcode = ERR_PAUSE_IS_CLEAR;
2822                 }
2823         }
2824         mutex_unlock(&adm_ctx.resource->adm_mutex);
2825 out:
2826         drbd_adm_finish(&adm_ctx, info, retcode);
2827         return 0;
2828 }
2829
2830 int drbd_adm_suspend_io(struct sk_buff *skb, struct genl_info *info)
2831 {
2832         return drbd_adm_simple_request_state(skb, info, NS(susp, 1));
2833 }
2834
2835 int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info)
2836 {
2837         struct drbd_config_context adm_ctx;
2838         struct drbd_device *device;
2839         int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
2840
2841         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
2842         if (!adm_ctx.reply_skb)
2843                 return retcode;
2844         if (retcode != NO_ERROR)
2845                 goto out;
2846
2847         mutex_lock(&adm_ctx.resource->adm_mutex);
2848         device = adm_ctx.device;
2849         if (test_bit(NEW_CUR_UUID, &device->flags)) {
2850                 drbd_uuid_new_current(device);
2851                 clear_bit(NEW_CUR_UUID, &device->flags);
2852         }
2853         drbd_suspend_io(device);
2854         retcode = drbd_request_state(device, NS3(susp, 0, susp_nod, 0, susp_fen, 0));
2855         if (retcode == SS_SUCCESS) {
2856                 if (device->state.conn < C_CONNECTED)
2857                         tl_clear(first_peer_device(device)->connection);
2858                 if (device->state.disk == D_DISKLESS || device->state.disk == D_FAILED)
2859                         tl_restart(first_peer_device(device)->connection, FAIL_FROZEN_DISK_IO);
2860         }
2861         drbd_resume_io(device);
2862         mutex_unlock(&adm_ctx.resource->adm_mutex);
2863 out:
2864         drbd_adm_finish(&adm_ctx, info, retcode);
2865         return 0;
2866 }
2867
2868 int drbd_adm_outdate(struct sk_buff *skb, struct genl_info *info)
2869 {
2870         return drbd_adm_simple_request_state(skb, info, NS(disk, D_OUTDATED));
2871 }
2872
2873 static int nla_put_drbd_cfg_context(struct sk_buff *skb,
2874                                     struct drbd_resource *resource,
2875                                     struct drbd_connection *connection,
2876                                     struct drbd_device *device)
2877 {
2878         struct nlattr *nla;
2879         nla = nla_nest_start(skb, DRBD_NLA_CFG_CONTEXT);
2880         if (!nla)
2881                 goto nla_put_failure;
2882         if (device &&
2883             nla_put_u32(skb, T_ctx_volume, device->vnr))
2884                 goto nla_put_failure;
2885         if (nla_put_string(skb, T_ctx_resource_name, resource->name))
2886                 goto nla_put_failure;
2887         if (connection) {
2888                 if (connection->my_addr_len &&
2889                     nla_put(skb, T_ctx_my_addr, connection->my_addr_len, &connection->my_addr))
2890                         goto nla_put_failure;
2891                 if (connection->peer_addr_len &&
2892                     nla_put(skb, T_ctx_peer_addr, connection->peer_addr_len, &connection->peer_addr))
2893                         goto nla_put_failure;
2894         }
2895         nla_nest_end(skb, nla);
2896         return 0;
2897
2898 nla_put_failure:
2899         if (nla)
2900                 nla_nest_cancel(skb, nla);
2901         return -EMSGSIZE;
2902 }
2903
2904 /*
2905  * Return the connection of @resource if @resource has exactly one connection.
2906  */
2907 static struct drbd_connection *the_only_connection(struct drbd_resource *resource)
2908 {
2909         struct list_head *connections = &resource->connections;
2910
2911         if (list_empty(connections) || connections->next->next != connections)
2912                 return NULL;
2913         return list_first_entry(&resource->connections, struct drbd_connection, connections);
2914 }
2915
2916 int nla_put_status_info(struct sk_buff *skb, struct drbd_device *device,
2917                 const struct sib_info *sib)
2918 {
2919         struct drbd_resource *resource = device->resource;
2920         struct state_info *si = NULL; /* for sizeof(si->member); */
2921         struct nlattr *nla;
2922         int got_ldev;
2923         int err = 0;
2924         int exclude_sensitive;
2925
2926         /* If sib != NULL, this is drbd_bcast_event, which anyone can listen
2927          * to.  So we better exclude_sensitive information.
2928          *
2929          * If sib == NULL, this is drbd_adm_get_status, executed synchronously
2930          * in the context of the requesting user process. Exclude sensitive
2931          * information, unless current has superuser.
2932          *
2933          * NOTE: for drbd_adm_get_status_all(), this is a netlink dump, and
2934          * relies on the current implementation of netlink_dump(), which
2935          * executes the dump callback successively from netlink_recvmsg(),
2936          * always in the context of the receiving process */
2937         exclude_sensitive = sib || !capable(CAP_SYS_ADMIN);
2938
2939         got_ldev = get_ldev(device);
2940
2941         /* We need to add connection name and volume number information still.
2942          * Minor number is in drbd_genlmsghdr. */
2943         if (nla_put_drbd_cfg_context(skb, resource, the_only_connection(resource), device))
2944                 goto nla_put_failure;
2945
2946         if (res_opts_to_skb(skb, &device->resource->res_opts, exclude_sensitive))
2947                 goto nla_put_failure;
2948
2949         rcu_read_lock();
2950         if (got_ldev) {
2951                 struct disk_conf *disk_conf;
2952
2953                 disk_conf = rcu_dereference(device->ldev->disk_conf);
2954                 err = disk_conf_to_skb(skb, disk_conf, exclude_sensitive);
2955         }
2956         if (!err) {
2957                 struct net_conf *nc;
2958
2959                 nc = rcu_dereference(first_peer_device(device)->connection->net_conf);
2960                 if (nc)
2961                         err = net_conf_to_skb(skb, nc, exclude_sensitive);
2962         }
2963         rcu_read_unlock();
2964         if (err)
2965                 goto nla_put_failure;
2966
2967         nla = nla_nest_start(skb, DRBD_NLA_STATE_INFO);
2968         if (!nla)
2969                 goto nla_put_failure;
2970         if (nla_put_u32(skb, T_sib_reason, sib ? sib->sib_reason : SIB_GET_STATUS_REPLY) ||
2971             nla_put_u32(skb, T_current_state, device->state.i) ||
2972             nla_put_u64(skb, T_ed_uuid, device->ed_uuid) ||
2973             nla_put_u64(skb, T_capacity, drbd_get_capacity(device->this_bdev)) ||
2974             nla_put_u64(skb, T_send_cnt, device->send_cnt) ||
2975             nla_put_u64(skb, T_recv_cnt, device->recv_cnt) ||
2976             nla_put_u64(skb, T_read_cnt, device->read_cnt) ||
2977             nla_put_u64(skb, T_writ_cnt, device->writ_cnt) ||
2978             nla_put_u64(skb, T_al_writ_cnt, device->al_writ_cnt) ||
2979             nla_put_u64(skb, T_bm_writ_cnt, device->bm_writ_cnt) ||
2980             nla_put_u32(skb, T_ap_bio_cnt, atomic_read(&device->ap_bio_cnt)) ||
2981             nla_put_u32(skb, T_ap_pending_cnt, atomic_read(&device->ap_pending_cnt)) ||
2982             nla_put_u32(skb, T_rs_pending_cnt, atomic_read(&device->rs_pending_cnt)))
2983                 goto nla_put_failure;
2984
2985         if (got_ldev) {
2986                 int err;
2987
2988                 spin_lock_irq(&device->ldev->md.uuid_lock);
2989                 err = nla_put(skb, T_uuids, sizeof(si->uuids), device->ldev->md.uuid);
2990                 spin_unlock_irq(&device->ldev->md.uuid_lock);
2991
2992                 if (err)
2993                         goto nla_put_failure;
2994
2995                 if (nla_put_u32(skb, T_disk_flags, device->ldev->md.flags) ||
2996                     nla_put_u64(skb, T_bits_total, drbd_bm_bits(device)) ||
2997                     nla_put_u64(skb, T_bits_oos, drbd_bm_total_weight(device)))
2998                         goto nla_put_failure;
2999                 if (C_SYNC_SOURCE <= device->state.conn &&
3000                     C_PAUSED_SYNC_T >= device->state.conn) {
3001                         if (nla_put_u64(skb, T_bits_rs_total, device->rs_total) ||
3002                             nla_put_u64(skb, T_bits_rs_failed, device->rs_failed))
3003                                 goto nla_put_failure;
3004                 }
3005         }
3006
3007         if (sib) {
3008                 switch(sib->sib_reason) {
3009                 case SIB_SYNC_PROGRESS:
3010                 case SIB_GET_STATUS_REPLY:
3011                         break;
3012                 case SIB_STATE_CHANGE:
3013                         if (nla_put_u32(skb, T_prev_state, sib->os.i) ||
3014                             nla_put_u32(skb, T_new_state, sib->ns.i))
3015                                 goto nla_put_failure;
3016                         break;
3017                 case SIB_HELPER_POST:
3018                         if (nla_put_u32(skb, T_helper_exit_code,
3019                                         sib->helper_exit_code))
3020                                 goto nla_put_failure;
3021                         /* fall through */
3022                 case SIB_HELPER_PRE:
3023                         if (nla_put_string(skb, T_helper, sib->helper_name))
3024                                 goto nla_put_failure;
3025                         break;
3026                 }
3027         }
3028         nla_nest_end(skb, nla);
3029
3030         if (0)
3031 nla_put_failure:
3032                 err = -EMSGSIZE;
3033         if (got_ldev)
3034                 put_ldev(device);
3035         return err;
3036 }
3037
3038 int drbd_adm_get_status(struct sk_buff *skb, struct genl_info *info)
3039 {
3040         struct drbd_config_context adm_ctx;
3041         enum drbd_ret_code retcode;
3042         int err;
3043
3044         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
3045         if (!adm_ctx.reply_skb)
3046                 return retcode;
3047         if (retcode != NO_ERROR)
3048                 goto out;
3049
3050         err = nla_put_status_info(adm_ctx.reply_skb, adm_ctx.device, NULL);
3051         if (err) {
3052                 nlmsg_free(adm_ctx.reply_skb);
3053                 return err;
3054         }
3055 out:
3056         drbd_adm_finish(&adm_ctx, info, retcode);
3057         return 0;
3058 }
3059
3060 static int get_one_status(struct sk_buff *skb, struct netlink_callback *cb)
3061 {
3062         struct drbd_device *device;
3063         struct drbd_genlmsghdr *dh;
3064         struct drbd_resource *pos = (struct drbd_resource *)cb->args[0];
3065         struct drbd_resource *resource = NULL;
3066         struct drbd_resource *tmp;
3067         unsigned volume = cb->args[1];
3068
3069         /* Open coded, deferred, iteration:
3070          * for_each_resource_safe(resource, tmp, &drbd_resources) {
3071          *      connection = "first connection of resource or undefined";
3072          *      idr_for_each_entry(&resource->devices, device, i) {
3073          *        ...
3074          *      }
3075          * }
3076          * where resource is cb->args[0];
3077          * and i is cb->args[1];
3078          *
3079          * cb->args[2] indicates if we shall loop over all resources,
3080          * or just dump all volumes of a single resource.
3081          *
3082          * This may miss entries inserted after this dump started,
3083          * or entries deleted before they are reached.
3084          *
3085          * We need to make sure the device won't disappear while
3086          * we are looking at it, and revalidate our iterators
3087          * on each iteration.
3088          */
3089
3090         /* synchronize with conn_create()/drbd_destroy_connection() */
3091         rcu_read_lock();
3092         /* revalidate iterator position */
3093         for_each_resource_rcu(tmp, &drbd_resources) {
3094                 if (pos == NULL) {
3095                         /* first iteration */
3096                         pos = tmp;
3097                         resource = pos;
3098                         break;
3099                 }
3100                 if (tmp == pos) {
3101                         resource = pos;
3102                         break;
3103                 }
3104         }
3105         if (resource) {
3106 next_resource:
3107                 device = idr_get_next(&resource->devices, &volume);
3108                 if (!device) {
3109                         /* No more volumes to dump on this resource.
3110                          * Advance resource iterator. */
3111                         pos = list_entry_rcu(resource->resources.next,
3112                                              struct drbd_resource, resources);
3113                         /* Did we dump any volume of this resource yet? */
3114                         if (volume != 0) {
3115                                 /* If we reached the end of the list,
3116                                  * or only a single resource dump was requested,
3117                                  * we are done. */
3118                                 if (&pos->resources == &drbd_resources || cb->args[2])
3119                                         goto out;
3120                                 volume = 0;
3121                                 resource = pos;
3122                                 goto next_resource;
3123                         }
3124                 }
3125
3126                 dh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
3127                                 cb->nlh->nlmsg_seq, &drbd_genl_family,
3128                                 NLM_F_MULTI, DRBD_ADM_GET_STATUS);
3129                 if (!dh)
3130                         goto out;
3131
3132                 if (!device) {
3133                         /* This is a connection without a single volume.
3134                          * Suprisingly enough, it may have a network
3135                          * configuration. */
3136                         struct drbd_connection *connection;
3137
3138                         dh->minor = -1U;
3139                         dh->ret_code = NO_ERROR;
3140                         connection = the_only_connection(resource);
3141                         if (nla_put_drbd_cfg_context(skb, resource, connection, NULL))
3142                                 goto cancel;
3143                         if (connection) {
3144                                 struct net_conf *nc;
3145
3146                                 nc = rcu_dereference(connection->net_conf);
3147                                 if (nc && net_conf_to_skb(skb, nc, 1) != 0)
3148                                         goto cancel;
3149                         }
3150                         goto done;
3151                 }
3152
3153                 D_ASSERT(device, device->vnr == volume);
3154                 D_ASSERT(device, device->resource == resource);
3155
3156                 dh->minor = device_to_minor(device);
3157                 dh->ret_code = NO_ERROR;
3158
3159                 if (nla_put_status_info(skb, device, NULL)) {
3160 cancel:
3161                         genlmsg_cancel(skb, dh);
3162                         goto out;
3163                 }
3164 done:
3165                 genlmsg_end(skb, dh);
3166         }
3167
3168 out:
3169         rcu_read_unlock();
3170         /* where to start the next iteration */
3171         cb->args[0] = (long)pos;
3172         cb->args[1] = (pos == resource) ? volume + 1 : 0;
3173
3174         /* No more resources/volumes/minors found results in an empty skb.
3175          * Which will terminate the dump. */
3176         return skb->len;
3177 }
3178
3179 /*
3180  * Request status of all resources, or of all volumes within a single resource.
3181  *
3182  * This is a dump, as the answer may not fit in a single reply skb otherwise.
3183  * Which means we cannot use the family->attrbuf or other such members, because
3184  * dump is NOT protected by the genl_lock().  During dump, we only have access
3185  * to the incoming skb, and need to opencode "parsing" of the nlattr payload.
3186  *
3187  * Once things are setup properly, we call into get_one_status().
3188  */
3189 int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb)
3190 {
3191         const unsigned hdrlen = GENL_HDRLEN + GENL_MAGIC_FAMILY_HDRSZ;
3192         struct nlattr *nla;
3193         const char *resource_name;
3194         struct drbd_resource *resource;
3195         int maxtype;
3196
3197         /* Is this a followup call? */
3198         if (cb->args[0]) {
3199                 /* ... of a single resource dump,
3200                  * and the resource iterator has been advanced already? */
3201                 if (cb->args[2] && cb->args[2] != cb->args[0])
3202                         return 0; /* DONE. */
3203                 goto dump;
3204         }
3205
3206         /* First call (from netlink_dump_start).  We need to figure out
3207          * which resource(s) the user wants us to dump. */
3208         nla = nla_find(nlmsg_attrdata(cb->nlh, hdrlen),
3209                         nlmsg_attrlen(cb->nlh, hdrlen),
3210                         DRBD_NLA_CFG_CONTEXT);
3211
3212         /* No explicit context given.  Dump all. */
3213         if (!nla)
3214                 goto dump;
3215         maxtype = ARRAY_SIZE(drbd_cfg_context_nl_policy) - 1;
3216         nla = drbd_nla_find_nested(maxtype, nla, __nla_type(T_ctx_resource_name));
3217         if (IS_ERR(nla))
3218                 return PTR_ERR(nla);
3219         /* context given, but no name present? */
3220         if (!nla)
3221                 return -EINVAL;
3222         resource_name = nla_data(nla);
3223         if (!*resource_name)
3224                 return -ENODEV;
3225         resource = drbd_find_resource(resource_name);
3226         if (!resource)
3227                 return -ENODEV;
3228
3229         kref_put(&resource->kref, drbd_destroy_resource); /* get_one_status() revalidates the resource */
3230
3231         /* prime iterators, and set "filter" mode mark:
3232          * only dump this connection. */
3233         cb->args[0] = (long)resource;
3234         /* cb->args[1] = 0; passed in this way. */
3235         cb->args[2] = (long)resource;
3236
3237 dump:
3238         return get_one_status(skb, cb);
3239 }
3240
3241 int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info)
3242 {
3243         struct drbd_config_context adm_ctx;
3244         enum drbd_ret_code retcode;
3245         struct timeout_parms tp;
3246         int err;
3247
3248         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
3249         if (!adm_ctx.reply_skb)
3250                 return retcode;
3251         if (retcode != NO_ERROR)
3252                 goto out;
3253
3254         tp.timeout_type =
3255                 adm_ctx.device->state.pdsk == D_OUTDATED ? UT_PEER_OUTDATED :
3256                 test_bit(USE_DEGR_WFC_T, &adm_ctx.device->flags) ? UT_DEGRADED :
3257                 UT_DEFAULT;
3258
3259         err = timeout_parms_to_priv_skb(adm_ctx.reply_skb, &tp);
3260         if (err) {
3261                 nlmsg_free(adm_ctx.reply_skb);
3262                 return err;
3263         }
3264 out:
3265         drbd_adm_finish(&adm_ctx, info, retcode);
3266         return 0;
3267 }
3268
3269 int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info)
3270 {
3271         struct drbd_config_context adm_ctx;
3272         struct drbd_device *device;
3273         enum drbd_ret_code retcode;
3274         struct start_ov_parms parms;
3275
3276         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
3277         if (!adm_ctx.reply_skb)
3278                 return retcode;
3279         if (retcode != NO_ERROR)
3280                 goto out;
3281
3282         device = adm_ctx.device;
3283
3284         /* resume from last known position, if possible */
3285         parms.ov_start_sector = device->ov_start_sector;
3286         parms.ov_stop_sector = ULLONG_MAX;
3287         if (info->attrs[DRBD_NLA_START_OV_PARMS]) {
3288                 int err = start_ov_parms_from_attrs(&parms, info);
3289                 if (err) {
3290                         retcode = ERR_MANDATORY_TAG;
3291                         drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
3292                         goto out;
3293                 }
3294         }
3295         mutex_lock(&adm_ctx.resource->adm_mutex);
3296
3297         /* w_make_ov_request expects position to be aligned */
3298         device->ov_start_sector = parms.ov_start_sector & ~(BM_SECT_PER_BIT-1);
3299         device->ov_stop_sector = parms.ov_stop_sector;
3300
3301         /* If there is still bitmap IO pending, e.g. previous resync or verify
3302          * just being finished, wait for it before requesting a new resync. */
3303         drbd_suspend_io(device);
3304         wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags));
3305         retcode = drbd_request_state(device, NS(conn, C_VERIFY_S));
3306         drbd_resume_io(device);
3307
3308         mutex_unlock(&adm_ctx.resource->adm_mutex);
3309 out:
3310         drbd_adm_finish(&adm_ctx, info, retcode);
3311         return 0;
3312 }
3313
3314
3315 int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info)
3316 {
3317         struct drbd_config_context adm_ctx;
3318         struct drbd_device *device;
3319         enum drbd_ret_code retcode;
3320         int skip_initial_sync = 0;
3321         int err;
3322         struct new_c_uuid_parms args;
3323
3324         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
3325         if (!adm_ctx.reply_skb)
3326                 return retcode;
3327         if (retcode != NO_ERROR)
3328                 goto out_nolock;
3329
3330         device = adm_ctx.device;
3331         memset(&args, 0, sizeof(args));
3332         if (info->attrs[DRBD_NLA_NEW_C_UUID_PARMS]) {
3333                 err = new_c_uuid_parms_from_attrs(&args, info);
3334                 if (err) {
3335                         retcode = ERR_MANDATORY_TAG;
3336                         drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
3337                         goto out_nolock;
3338                 }
3339         }
3340
3341         mutex_lock(&adm_ctx.resource->adm_mutex);
3342         mutex_lock(device->state_mutex); /* Protects us against serialized state changes. */
3343
3344         if (!get_ldev(device)) {
3345                 retcode = ERR_NO_DISK;
3346                 goto out;
3347         }
3348
3349         /* this is "skip initial sync", assume to be clean */
3350         if (device->state.conn == C_CONNECTED &&
3351             first_peer_device(device)->connection->agreed_pro_version >= 90 &&
3352             device->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED && args.clear_bm) {
3353                 drbd_info(device, "Preparing to skip initial sync\n");
3354                 skip_initial_sync = 1;
3355         } else if (device->state.conn != C_STANDALONE) {
3356                 retcode = ERR_CONNECTED;
3357                 goto out_dec;
3358         }
3359
3360         drbd_uuid_set(device, UI_BITMAP, 0); /* Rotate UI_BITMAP to History 1, etc... */
3361         drbd_uuid_new_current(device); /* New current, previous to UI_BITMAP */
3362
3363         if (args.clear_bm) {
3364                 err = drbd_bitmap_io(device, &drbd_bmio_clear_n_write,
3365                         "clear_n_write from new_c_uuid", BM_LOCKED_MASK);
3366                 if (err) {
3367                         drbd_err(device, "Writing bitmap failed with %d\n", err);
3368                         retcode = ERR_IO_MD_DISK;
3369                 }
3370                 if (skip_initial_sync) {
3371                         drbd_send_uuids_skip_initial_sync(first_peer_device(device));
3372                         _drbd_uuid_set(device, UI_BITMAP, 0);
3373                         drbd_print_uuids(device, "cleared bitmap UUID");
3374                         spin_lock_irq(&device->resource->req_lock);
3375                         _drbd_set_state(_NS2(device, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
3376                                         CS_VERBOSE, NULL);
3377                         spin_unlock_irq(&device->resource->req_lock);
3378                 }
3379         }
3380
3381         drbd_md_sync(device);
3382 out_dec:
3383         put_ldev(device);
3384 out:
3385         mutex_unlock(device->state_mutex);
3386         mutex_unlock(&adm_ctx.resource->adm_mutex);
3387 out_nolock:
3388         drbd_adm_finish(&adm_ctx, info, retcode);
3389         return 0;
3390 }
3391
3392 static enum drbd_ret_code
3393 drbd_check_resource_name(struct drbd_config_context *adm_ctx)
3394 {
3395         const char *name = adm_ctx->resource_name;
3396         if (!name || !name[0]) {
3397                 drbd_msg_put_info(adm_ctx->reply_skb, "resource name missing");
3398                 return ERR_MANDATORY_TAG;
3399         }
3400         /* if we want to use these in sysfs/configfs/debugfs some day,
3401          * we must not allow slashes */
3402         if (strchr(name, '/')) {
3403                 drbd_msg_put_info(adm_ctx->reply_skb, "invalid resource name");
3404                 return ERR_INVALID_REQUEST;
3405         }
3406         return NO_ERROR;
3407 }
3408
3409 int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info)
3410 {
3411         struct drbd_config_context adm_ctx;
3412         enum drbd_ret_code retcode;
3413         struct res_opts res_opts;
3414         int err;
3415
3416         retcode = drbd_adm_prepare(&adm_ctx, skb, info, 0);
3417         if (!adm_ctx.reply_skb)
3418                 return retcode;
3419         if (retcode != NO_ERROR)
3420                 goto out;
3421
3422         set_res_opts_defaults(&res_opts);
3423         err = res_opts_from_attrs(&res_opts, info);
3424         if (err && err != -ENOMSG) {
3425                 retcode = ERR_MANDATORY_TAG;
3426                 drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
3427                 goto out;
3428         }
3429
3430         retcode = drbd_check_resource_name(&adm_ctx);
3431         if (retcode != NO_ERROR)
3432                 goto out;
3433
3434         if (adm_ctx.resource) {
3435                 if (info->nlhdr->nlmsg_flags & NLM_F_EXCL) {
3436                         retcode = ERR_INVALID_REQUEST;
3437                         drbd_msg_put_info(adm_ctx.reply_skb, "resource exists");
3438                 }
3439                 /* else: still NO_ERROR */
3440                 goto out;
3441         }
3442
3443         /* not yet safe for genl_family.parallel_ops */
3444         if (!conn_create(adm_ctx.resource_name, &res_opts))
3445                 retcode = ERR_NOMEM;
3446 out:
3447         drbd_adm_finish(&adm_ctx, info, retcode);
3448         return 0;
3449 }
3450
3451 int drbd_adm_new_minor(struct sk_buff *skb, struct genl_info *info)
3452 {
3453         struct drbd_config_context adm_ctx;
3454         struct drbd_genlmsghdr *dh = info->userhdr;
3455         enum drbd_ret_code retcode;
3456
3457         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
3458         if (!adm_ctx.reply_skb)
3459                 return retcode;
3460         if (retcode != NO_ERROR)
3461                 goto out;
3462
3463         if (dh->minor > MINORMASK) {
3464                 drbd_msg_put_info(adm_ctx.reply_skb, "requested minor out of range");
3465                 retcode = ERR_INVALID_REQUEST;
3466                 goto out;
3467         }
3468         if (adm_ctx.volume > DRBD_VOLUME_MAX) {
3469                 drbd_msg_put_info(adm_ctx.reply_skb, "requested volume id out of range");
3470                 retcode = ERR_INVALID_REQUEST;
3471                 goto out;
3472         }
3473
3474         /* drbd_adm_prepare made sure already
3475          * that first_peer_device(device)->connection and device->vnr match the request. */
3476         if (adm_ctx.device) {
3477                 if (info->nlhdr->nlmsg_flags & NLM_F_EXCL)
3478                         retcode = ERR_MINOR_EXISTS;
3479                 /* else: still NO_ERROR */
3480                 goto out;
3481         }
3482
3483         mutex_lock(&adm_ctx.resource->adm_mutex);
3484         retcode = drbd_create_device(&adm_ctx, dh->minor);
3485         mutex_unlock(&adm_ctx.resource->adm_mutex);
3486 out:
3487         drbd_adm_finish(&adm_ctx, info, retcode);
3488         return 0;
3489 }
3490
3491 static enum drbd_ret_code adm_del_minor(struct drbd_device *device)
3492 {
3493         if (device->state.disk == D_DISKLESS &&
3494             /* no need to be device->state.conn == C_STANDALONE &&
3495              * we may want to delete a minor from a live replication group.
3496              */
3497             device->state.role == R_SECONDARY) {
3498                 _drbd_request_state(device, NS(conn, C_WF_REPORT_PARAMS),
3499                                     CS_VERBOSE + CS_WAIT_COMPLETE);
3500                 drbd_delete_device(device);
3501                 return NO_ERROR;
3502         } else
3503                 return ERR_MINOR_CONFIGURED;
3504 }
3505
3506 int drbd_adm_del_minor(struct sk_buff *skb, struct genl_info *info)
3507 {
3508         struct drbd_config_context adm_ctx;
3509         enum drbd_ret_code retcode;
3510
3511         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
3512         if (!adm_ctx.reply_skb)
3513                 return retcode;
3514         if (retcode != NO_ERROR)
3515                 goto out;
3516
3517         mutex_lock(&adm_ctx.resource->adm_mutex);
3518         retcode = adm_del_minor(adm_ctx.device);
3519         mutex_unlock(&adm_ctx.resource->adm_mutex);
3520 out:
3521         drbd_adm_finish(&adm_ctx, info, retcode);
3522         return 0;
3523 }
3524
3525 int drbd_adm_down(struct sk_buff *skb, struct genl_info *info)
3526 {
3527         struct drbd_config_context adm_ctx;
3528         struct drbd_resource *resource;
3529         struct drbd_connection *connection;
3530         struct drbd_device *device;
3531         int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
3532         unsigned i;
3533
3534         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
3535         if (!adm_ctx.reply_skb)
3536                 return retcode;
3537         if (retcode != NO_ERROR)
3538                 goto finish;
3539
3540         resource = adm_ctx.resource;
3541         mutex_lock(&resource->adm_mutex);
3542         /* demote */
3543         for_each_connection(connection, resource) {
3544                 struct drbd_peer_device *peer_device;
3545
3546                 idr_for_each_entry(&connection->peer_devices, peer_device, i) {
3547                         retcode = drbd_set_role(peer_device->device, R_SECONDARY, 0);
3548                         if (retcode < SS_SUCCESS) {
3549                                 drbd_msg_put_info(adm_ctx.reply_skb, "failed to demote");
3550                                 goto out;
3551                         }
3552                 }
3553
3554                 retcode = conn_try_disconnect(connection, 0);
3555                 if (retcode < SS_SUCCESS) {
3556                         drbd_msg_put_info(adm_ctx.reply_skb, "failed to disconnect");
3557                         goto out;
3558                 }
3559         }
3560
3561         /* detach */
3562         idr_for_each_entry(&resource->devices, device, i) {
3563                 retcode = adm_detach(device, 0);
3564                 if (retcode < SS_SUCCESS || retcode > NO_ERROR) {
3565                         drbd_msg_put_info(adm_ctx.reply_skb, "failed to detach");
3566                         goto out;
3567                 }
3568         }
3569
3570         /* If we reach this, all volumes (of this connection) are Secondary,
3571          * Disconnected, Diskless, aka Unconfigured. Make sure all threads have
3572          * actually stopped, state handling only does drbd_thread_stop_nowait(). */
3573         for_each_connection(connection, resource)
3574                 drbd_thread_stop(&connection->worker);
3575
3576         /* Now, nothing can fail anymore */
3577
3578         /* delete volumes */
3579         idr_for_each_entry(&resource->devices, device, i) {
3580                 retcode = adm_del_minor(device);
3581                 if (retcode != NO_ERROR) {
3582                         /* "can not happen" */
3583                         drbd_msg_put_info(adm_ctx.reply_skb, "failed to delete volume");
3584                         goto out;
3585                 }
3586         }
3587
3588         list_del_rcu(&resource->resources);
3589         synchronize_rcu();
3590         drbd_free_resource(resource);
3591         retcode = NO_ERROR;
3592 out:
3593         mutex_unlock(&resource->adm_mutex);
3594 finish:
3595         drbd_adm_finish(&adm_ctx, info, retcode);
3596         return 0;
3597 }
3598
3599 int drbd_adm_del_resource(struct sk_buff *skb, struct genl_info *info)
3600 {
3601         struct drbd_config_context adm_ctx;
3602         struct drbd_resource *resource;
3603         struct drbd_connection *connection;
3604         enum drbd_ret_code retcode;
3605
3606         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
3607         if (!adm_ctx.reply_skb)
3608                 return retcode;
3609         if (retcode != NO_ERROR)
3610                 goto finish;
3611
3612         resource = adm_ctx.resource;
3613         mutex_lock(&resource->adm_mutex);
3614         for_each_connection(connection, resource) {
3615                 if (connection->cstate > C_STANDALONE) {
3616                         retcode = ERR_NET_CONFIGURED;
3617                         goto out;
3618                 }
3619         }
3620         if (!idr_is_empty(&resource->devices)) {
3621                 retcode = ERR_RES_IN_USE;
3622                 goto out;
3623         }
3624
3625         list_del_rcu(&resource->resources);
3626         for_each_connection(connection, resource)
3627                 drbd_thread_stop(&connection->worker);
3628         synchronize_rcu();
3629         drbd_free_resource(resource);
3630         retcode = NO_ERROR;
3631 out:
3632         mutex_unlock(&resource->adm_mutex);
3633 finish:
3634         drbd_adm_finish(&adm_ctx, info, retcode);
3635         return 0;
3636 }
3637
3638 void drbd_bcast_event(struct drbd_device *device, const struct sib_info *sib)
3639 {
3640         static atomic_t drbd_genl_seq = ATOMIC_INIT(2); /* two. */
3641         struct sk_buff *msg;
3642         struct drbd_genlmsghdr *d_out;
3643         unsigned seq;
3644         int err = -ENOMEM;
3645
3646         seq = atomic_inc_return(&drbd_genl_seq);
3647         msg = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO);
3648         if (!msg)
3649                 goto failed;
3650
3651         err = -EMSGSIZE;
3652         d_out = genlmsg_put(msg, 0, seq, &drbd_genl_family, 0, DRBD_EVENT);
3653         if (!d_out) /* cannot happen, but anyways. */
3654                 goto nla_put_failure;
3655         d_out->minor = device_to_minor(device);
3656         d_out->ret_code = NO_ERROR;
3657
3658         if (nla_put_status_info(msg, device, sib))
3659                 goto nla_put_failure;
3660         genlmsg_end(msg, d_out);
3661         err = drbd_genl_multicast_events(msg, 0);
3662         /* msg has been consumed or freed in netlink_broadcast() */
3663         if (err && err != -ESRCH)
3664                 goto failed;
3665
3666         return;
3667
3668 nla_put_failure:
3669         nlmsg_free(msg);
3670 failed:
3671         drbd_err(device, "Error %d while broadcasting event. "
3672                         "Event seq:%u sib_reason:%u\n",
3673                         err, seq, sib->sib_reason);
3674 }