drivers/block/drbd/drbd_nl.c

   1 /*
   2    drbd_nl.c
   3
   4    This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
   5
   6    Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
   7    Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
   8    Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
   9
  10    drbd is free software; you can redistribute it and/or modify
  11    it under the terms of the GNU General Public License as published by
  12    the Free Software Foundation; either version 2, or (at your option)
  13    any later version.
  14
  15    drbd is distributed in the hope that it will be useful,
  16    but WITHOUT ANY WARRANTY; without even the implied warranty of
  17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  18    GNU General Public License for more details.
  19
  20    You should have received a copy of the GNU General Public License
  21    along with drbd; see the file COPYING.  If not, write to
  22    the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
  23
  24  */
  25
  26 #define pr_fmt(fmt)     KBUILD_MODNAME ": " fmt
  27
  28 #include <linux/module.h>
  29 #include <linux/drbd.h>
  30 #include <linux/in.h>
  31 #include <linux/fs.h>
  32 #include <linux/file.h>
  33 #include <linux/slab.h>
  34 #include <linux/blkpg.h>
  35 #include <linux/cpumask.h>
  36 #include "drbd_int.h"
  37 #include "drbd_protocol.h"
  38 #include "drbd_req.h"
  39 #include "drbd_state_change.h"
  40 #include <asm/unaligned.h>
  41 #include <linux/drbd_limits.h>
  42 #include <linux/kthread.h>
  43
  44 #include <net/genetlink.h>
  45
  46 /* .doit */
  47 // int drbd_adm_create_resource(struct sk_buff *skb, struct genl_info *info);
  48 // int drbd_adm_delete_resource(struct sk_buff *skb, struct genl_info *info);
  49
  50 int drbd_adm_new_minor(struct sk_buff *skb, struct genl_info *info);
  51 int drbd_adm_del_minor(struct sk_buff *skb, struct genl_info *info);
  52
  53 int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info);
  54 int drbd_adm_del_resource(struct sk_buff *skb, struct genl_info *info);
  55 int drbd_adm_down(struct sk_buff *skb, struct genl_info *info);
  56
  57 int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info);
  58 int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info);
  59 int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info);
  60 int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info);
  61 int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info);
  62 int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info);
  63 int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info);
  64 int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info);
  65 int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info);
  66 int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info);
  67 int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info);
  68 int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info);
  69 int drbd_adm_pause_sync(struct sk_buff *skb, struct genl_info *info);
  70 int drbd_adm_resume_sync(struct sk_buff *skb, struct genl_info *info);
  71 int drbd_adm_suspend_io(struct sk_buff *skb, struct genl_info *info);
  72 int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info);
  73 int drbd_adm_outdate(struct sk_buff *skb, struct genl_info *info);
  74 int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info);
  75 int drbd_adm_get_status(struct sk_buff *skb, struct genl_info *info);
  76 int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info);
  77 /* .dumpit */
  78 int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb);
  79 int drbd_adm_dump_resources(struct sk_buff *skb, struct netlink_callback *cb);
  80 int drbd_adm_dump_devices(struct sk_buff *skb, struct netlink_callback *cb);
  81 int drbd_adm_dump_devices_done(struct netlink_callback *cb);
  82 int drbd_adm_dump_connections(struct sk_buff *skb, struct netlink_callback *cb);
  83 int drbd_adm_dump_connections_done(struct netlink_callback *cb);
  84 int drbd_adm_dump_peer_devices(struct sk_buff *skb, struct netlink_callback *cb);
  85 int drbd_adm_dump_peer_devices_done(struct netlink_callback *cb);
  86 int drbd_adm_get_initial_state(struct sk_buff *skb, struct netlink_callback *cb);
  87
  88 #include <linux/drbd_genl_api.h>
  89 #include "drbd_nla.h"
  90 #include <linux/genl_magic_func.h>
  91
  92 static atomic_t drbd_genl_seq = ATOMIC_INIT(2); /* two. */
  93 static atomic_t notify_genl_seq = ATOMIC_INIT(2); /* two. */
  94
  95 DEFINE_MUTEX(notification_mutex);
  96
  97 /* used blkdev_get_by_path, to claim our meta data device(s) */
  98 static char *drbd_m_holder = "Hands off! this is DRBD's meta data device.";
  99
 100 static void drbd_adm_send_reply(struct sk_buff *skb, struct genl_info *info)
 101 {
 102         genlmsg_end(skb, genlmsg_data(nlmsg_data(nlmsg_hdr(skb))));
 103         if (genlmsg_reply(skb, info))
 104                 pr_err("error sending genl reply\n");
 105 }
 106
 107 /* Used on a fresh "drbd_adm_prepare"d reply_skb, this cannot fail: The only
 108  * reason it could fail was no space in skb, and there are 4k available. */
 109 static int drbd_msg_put_info(struct sk_buff *skb, const char *info)
 110 {
 111         struct nlattr *nla;
 112         int err = -EMSGSIZE;
 113
 114         if (!info || !info[0])
 115                 return 0;
 116
 117         nla = nla_nest_start(skb, DRBD_NLA_CFG_REPLY);
 118         if (!nla)
 119                 return err;
 120
 121         err = nla_put_string(skb, T_info_text, info);
 122         if (err) {
 123                 nla_nest_cancel(skb, nla);
 124                 return err;
 125         } else
 126                 nla_nest_end(skb, nla);
 127         return 0;
 128 }
 129
 130 /* This would be a good candidate for a "pre_doit" hook,
 131  * and per-family private info->pointers.
 132  * But we need to stay compatible with older kernels.
 133  * If it returns successfully, adm_ctx members are valid.
 134  *
 135  * At this point, we still rely on the global genl_lock().
 136  * If we want to avoid that, and allow "genl_family.parallel_ops", we may need
 137  * to add additional synchronization against object destruction/modification.
 138  */
 139 #define DRBD_ADM_NEED_MINOR     1
 140 #define DRBD_ADM_NEED_RESOURCE  2
 141 #define DRBD_ADM_NEED_CONNECTION 4
 142 static int drbd_adm_prepare(struct drbd_config_context *adm_ctx,
 143         struct sk_buff *skb, struct genl_info *info, unsigned flags)
 144 {
 145         struct drbd_genlmsghdr *d_in = info->userhdr;
 146         const u8 cmd = info->genlhdr->cmd;
 147         int err;
 148
 149         memset(adm_ctx, 0, sizeof(*adm_ctx));
 150
 151         /* genl_rcv_msg only checks for CAP_NET_ADMIN on "GENL_ADMIN_PERM" :( */
 152         if (cmd != DRBD_ADM_GET_STATUS && !capable(CAP_NET_ADMIN))
 153                return -EPERM;
 154
 155         adm_ctx->reply_skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
 156         if (!adm_ctx->reply_skb) {
 157                 err = -ENOMEM;
 158                 goto fail;
 159         }
 160
 161         adm_ctx->reply_dh = genlmsg_put_reply(adm_ctx->reply_skb,
 162                                         info, &drbd_genl_family, 0, cmd);
 163         /* put of a few bytes into a fresh skb of >= 4k will always succeed.
 164          * but anyways */
 165         if (!adm_ctx->reply_dh) {
 166                 err = -ENOMEM;
 167                 goto fail;
 168         }
 169
 170         adm_ctx->reply_dh->minor = d_in->minor;
 171         adm_ctx->reply_dh->ret_code = NO_ERROR;
 172
 173         adm_ctx->volume = VOLUME_UNSPECIFIED;
 174         if (info->attrs[DRBD_NLA_CFG_CONTEXT]) {
 175                 struct nlattr *nla;
 176                 /* parse and validate only */
 177                 err = drbd_cfg_context_from_attrs(NULL, info);
 178                 if (err)
 179                         goto fail;
 180
 181                 /* It was present, and valid,
 182                  * copy it over to the reply skb. */
 183                 err = nla_put_nohdr(adm_ctx->reply_skb,
 184                                 info->attrs[DRBD_NLA_CFG_CONTEXT]->nla_len,
 185                                 info->attrs[DRBD_NLA_CFG_CONTEXT]);
 186                 if (err)
 187                         goto fail;
 188
 189                 /* and assign stuff to the adm_ctx */
 190                 nla = nested_attr_tb[__nla_type(T_ctx_volume)];
 191                 if (nla)
 192                         adm_ctx->volume = nla_get_u32(nla);
 193                 nla = nested_attr_tb[__nla_type(T_ctx_resource_name)];
 194                 if (nla)
 195                         adm_ctx->resource_name = nla_data(nla);
 196                 adm_ctx->my_addr = nested_attr_tb[__nla_type(T_ctx_my_addr)];
 197                 adm_ctx->peer_addr = nested_attr_tb[__nla_type(T_ctx_peer_addr)];
 198                 if ((adm_ctx->my_addr &&
 199                      nla_len(adm_ctx->my_addr) > sizeof(adm_ctx->connection->my_addr)) ||
 200                     (adm_ctx->peer_addr &&
 201                      nla_len(adm_ctx->peer_addr) > sizeof(adm_ctx->connection->peer_addr))) {
 202                         err = -EINVAL;
 203                         goto fail;
 204                 }
 205         }
 206
 207         adm_ctx->minor = d_in->minor;
 208         adm_ctx->device = minor_to_device(d_in->minor);
 209
 210         /* We are protected by the global genl_lock().
 211          * But we may explicitly drop it/retake it in drbd_adm_set_role(),
 212          * so make sure this object stays around. */
 213         if (adm_ctx->device)
 214                 kref_get(&adm_ctx->device->kref);
 215
 216         if (adm_ctx->resource_name) {
 217                 adm_ctx->resource = drbd_find_resource(adm_ctx->resource_name);
 218         }
 219
 220         if (!adm_ctx->device && (flags & DRBD_ADM_NEED_MINOR)) {
 221                 drbd_msg_put_info(adm_ctx->reply_skb, "unknown minor");
 222                 return ERR_MINOR_INVALID;
 223         }
 224         if (!adm_ctx->resource && (flags & DRBD_ADM_NEED_RESOURCE)) {
 225                 drbd_msg_put_info(adm_ctx->reply_skb, "unknown resource");
 226                 if (adm_ctx->resource_name)
 227                         return ERR_RES_NOT_KNOWN;
 228                 return ERR_INVALID_REQUEST;
 229         }
 230
 231         if (flags & DRBD_ADM_NEED_CONNECTION) {
 232                 if (adm_ctx->resource) {
 233                         drbd_msg_put_info(adm_ctx->reply_skb, "no resource name expected");
 234                         return ERR_INVALID_REQUEST;
 235                 }
 236                 if (adm_ctx->device) {
 237                         drbd_msg_put_info(adm_ctx->reply_skb, "no minor number expected");
 238                         return ERR_INVALID_REQUEST;
 239                 }
 240                 if (adm_ctx->my_addr && adm_ctx->peer_addr)
 241                         adm_ctx->connection = conn_get_by_addrs(nla_data(adm_ctx->my_addr),
 242                                                           nla_len(adm_ctx->my_addr),
 243                                                           nla_data(adm_ctx->peer_addr),
 244                                                           nla_len(adm_ctx->peer_addr));
 245                 if (!adm_ctx->connection) {
 246                         drbd_msg_put_info(adm_ctx->reply_skb, "unknown connection");
 247                         return ERR_INVALID_REQUEST;
 248                 }
 249         }
 250
 251         /* some more paranoia, if the request was over-determined */
 252         if (adm_ctx->device && adm_ctx->resource &&
 253             adm_ctx->device->resource != adm_ctx->resource) {
 254                 pr_warning("request: minor=%u, resource=%s; but that minor belongs to resource %s\n",
 255                                 adm_ctx->minor, adm_ctx->resource->name,
 256                                 adm_ctx->device->resource->name);
 257                 drbd_msg_put_info(adm_ctx->reply_skb, "minor exists in different resource");
 258                 return ERR_INVALID_REQUEST;
 259         }
 260         if (adm_ctx->device &&
 261             adm_ctx->volume != VOLUME_UNSPECIFIED &&
 262             adm_ctx->volume != adm_ctx->device->vnr) {
 263                 pr_warning("request: minor=%u, volume=%u; but that minor is volume %u in %s\n",
 264                                 adm_ctx->minor, adm_ctx->volume,
 265                                 adm_ctx->device->vnr,
 266                                 adm_ctx->device->resource->name);
 267                 drbd_msg_put_info(adm_ctx->reply_skb, "minor exists as different volume");
 268                 return ERR_INVALID_REQUEST;
 269         }
 270
 271         /* still, provide adm_ctx->resource always, if possible. */
 272         if (!adm_ctx->resource) {
 273                 adm_ctx->resource = adm_ctx->device ? adm_ctx->device->resource
 274                         : adm_ctx->connection ? adm_ctx->connection->resource : NULL;
 275                 if (adm_ctx->resource)
 276                         kref_get(&adm_ctx->resource->kref);
 277         }
 278
 279         return NO_ERROR;
 280
 281 fail:
 282         nlmsg_free(adm_ctx->reply_skb);
 283         adm_ctx->reply_skb = NULL;
 284         return err;
 285 }
 286
 287 static int drbd_adm_finish(struct drbd_config_context *adm_ctx,
 288         struct genl_info *info, int retcode)
 289 {
 290         if (adm_ctx->device) {
 291                 kref_put(&adm_ctx->device->kref, drbd_destroy_device);
 292                 adm_ctx->device = NULL;
 293         }
 294         if (adm_ctx->connection) {
 295                 kref_put(&adm_ctx->connection->kref, &drbd_destroy_connection);
 296                 adm_ctx->connection = NULL;
 297         }
 298         if (adm_ctx->resource) {
 299                 kref_put(&adm_ctx->resource->kref, drbd_destroy_resource);
 300                 adm_ctx->resource = NULL;
 301         }
 302
 303         if (!adm_ctx->reply_skb)
 304                 return -ENOMEM;
 305
 306         adm_ctx->reply_dh->ret_code = retcode;
 307         drbd_adm_send_reply(adm_ctx->reply_skb, info);
 308         return 0;
 309 }
 310
 311 static void setup_khelper_env(struct drbd_connection *connection, char **envp)
 312 {
 313         char *afs;
 314
 315         /* FIXME: A future version will not allow this case. */
 316         if (connection->my_addr_len == 0 || connection->peer_addr_len == 0)
 317                 return;
 318
 319         switch (((struct sockaddr *)&connection->peer_addr)->sa_family) {
 320         case AF_INET6:
 321                 afs = "ipv6";
 322                 snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI6",
 323                          &((struct sockaddr_in6 *)&connection->peer_addr)->sin6_addr);
 324                 break;
 325         case AF_INET:
 326                 afs = "ipv4";
 327                 snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI4",
 328                          &((struct sockaddr_in *)&connection->peer_addr)->sin_addr);
 329                 break;
 330         default:
 331                 afs = "ssocks";
 332                 snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI4",
 333                          &((struct sockaddr_in *)&connection->peer_addr)->sin_addr);
 334         }
 335         snprintf(envp[3], 20, "DRBD_PEER_AF=%s", afs);
 336 }
 337
 338 int drbd_khelper(struct drbd_device *device, char *cmd)
 339 {
 340         char *envp[] = { "HOME=/",
 341                         "TERM=linux",
 342                         "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
 343                          (char[20]) { }, /* address family */
 344                          (char[60]) { }, /* address */
 345                         NULL };
 346         char mb[14];
 347         char *argv[] = {usermode_helper, cmd, mb, NULL };
 348         struct drbd_connection *connection = first_peer_device(device)->connection;
 349         struct sib_info sib;
 350         int ret;
 351
 352         if (current == connection->worker.task)
 353                 set_bit(CALLBACK_PENDING, &connection->flags);
 354
 355         snprintf(mb, 14, "minor-%d", device_to_minor(device));
 356         setup_khelper_env(connection, envp);
 357
 358         /* The helper may take some time.
 359          * write out any unsynced meta data changes now */
 360         drbd_md_sync(device);
 361
 362         drbd_info(device, "helper command: %s %s %s\n", usermode_helper, cmd, mb);
 363         sib.sib_reason = SIB_HELPER_PRE;
 364         sib.helper_name = cmd;
 365         drbd_bcast_event(device, &sib);
 366         notify_helper(NOTIFY_CALL, device, connection, cmd, 0);
 367         ret = call_usermodehelper(usermode_helper, argv, envp, UMH_WAIT_PROC);
 368         if (ret)
 369                 drbd_warn(device, "helper command: %s %s %s exit code %u (0x%x)\n",
 370                                 usermode_helper, cmd, mb,
 371                                 (ret >> 8) & 0xff, ret);
 372         else
 373                 drbd_info(device, "helper command: %s %s %s exit code %u (0x%x)\n",
 374                                 usermode_helper, cmd, mb,
 375                                 (ret >> 8) & 0xff, ret);
 376         sib.sib_reason = SIB_HELPER_POST;
 377         sib.helper_exit_code = ret;
 378         drbd_bcast_event(device, &sib);
 379         notify_helper(NOTIFY_RESPONSE, device, connection, cmd, ret);
 380
 381         if (current == connection->worker.task)
 382                 clear_bit(CALLBACK_PENDING, &connection->flags);
 383
 384         if (ret < 0) /* Ignore any ERRNOs we got. */
 385                 ret = 0;
 386
 387         return ret;
 388 }
 389
 390 enum drbd_peer_state conn_khelper(struct drbd_connection *connection, char *cmd)
 391 {
 392         char *envp[] = { "HOME=/",
 393                         "TERM=linux",
 394                         "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
 395                          (char[20]) { }, /* address family */
 396                          (char[60]) { }, /* address */
 397                         NULL };
 398         char *resource_name = connection->resource->name;
 399         char *argv[] = {usermode_helper, cmd, resource_name, NULL };
 400         int ret;
 401
 402         setup_khelper_env(connection, envp);
 403         conn_md_sync(connection);
 404
 405         drbd_info(connection, "helper command: %s %s %s\n", usermode_helper, cmd, resource_name);
 406         /* TODO: conn_bcast_event() ?? */
 407         notify_helper(NOTIFY_CALL, NULL, connection, cmd, 0);
 408
 409         ret = call_usermodehelper(usermode_helper, argv, envp, UMH_WAIT_PROC);
 410         if (ret)
 411                 drbd_warn(connection, "helper command: %s %s %s exit code %u (0x%x)\n",
 412                           usermode_helper, cmd, resource_name,
 413                           (ret >> 8) & 0xff, ret);
 414         else
 415                 drbd_info(connection, "helper command: %s %s %s exit code %u (0x%x)\n",
 416                           usermode_helper, cmd, resource_name,
 417                           (ret >> 8) & 0xff, ret);
 418         /* TODO: conn_bcast_event() ?? */
 419         notify_helper(NOTIFY_RESPONSE, NULL, connection, cmd, ret);
 420
 421         if (ret < 0) /* Ignore any ERRNOs we got. */
 422                 ret = 0;
 423
 424         return ret;
 425 }
 426
 427 static enum drbd_fencing_p highest_fencing_policy(struct drbd_connection *connection)
 428 {
 429         enum drbd_fencing_p fp = FP_NOT_AVAIL;
 430         struct drbd_peer_device *peer_device;
 431         int vnr;
 432
 433         rcu_read_lock();
 434         idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
 435                 struct drbd_device *device = peer_device->device;
 436                 if (get_ldev_if_state(device, D_CONSISTENT)) {
 437                         struct disk_conf *disk_conf =
 438                                 rcu_dereference(peer_device->device->ldev->disk_conf);
 439                         fp = max_t(enum drbd_fencing_p, fp, disk_conf->fencing);
 440                         put_ldev(device);
 441                 }
 442         }
 443         rcu_read_unlock();
 444
 445         return fp;
 446 }
 447
 448 static bool resource_is_supended(struct drbd_resource *resource)
 449 {
 450         return resource->susp || resource->susp_fen || resource->susp_nod;
 451 }
 452
 453 bool conn_try_outdate_peer(struct drbd_connection *connection)
 454 {
 455         struct drbd_resource * const resource = connection->resource;
 456         unsigned int connect_cnt;
 457         union drbd_state mask = { };
 458         union drbd_state val = { };
 459         enum drbd_fencing_p fp;
 460         char *ex_to_string;
 461         int r;
 462
 463         spin_lock_irq(&resource->req_lock);
 464         if (connection->cstate >= C_WF_REPORT_PARAMS) {
 465                 drbd_err(connection, "Expected cstate < C_WF_REPORT_PARAMS\n");
 466                 spin_unlock_irq(&resource->req_lock);
 467                 return false;
 468         }
 469
 470         connect_cnt = connection->connect_cnt;
 471         spin_unlock_irq(&resource->req_lock);
 472
 473         fp = highest_fencing_policy(connection);
 474         switch (fp) {
 475         case FP_NOT_AVAIL:
 476                 drbd_warn(connection, "Not fencing peer, I'm not even Consistent myself.\n");
 477                 spin_lock_irq(&resource->req_lock);
 478                 if (connection->cstate < C_WF_REPORT_PARAMS) {
 479                         _conn_request_state(connection,
 480                                             (union drbd_state) { { .susp_fen = 1 } },
 481                                             (union drbd_state) { { .susp_fen = 0 } },
 482                                             CS_VERBOSE | CS_HARD | CS_DC_SUSP);
 483                         /* We are no longer suspended due to the fencing policy.
 484                          * We may still be suspended due to the on-no-data-accessible policy.
 485                          * If that was OND_IO_ERROR, fail pending requests. */
 486                         if (!resource_is_supended(resource))
 487                                 _tl_restart(connection, CONNECTION_LOST_WHILE_PENDING);
 488                 }
 489                 /* Else: in case we raced with a connection handshake,
 490                  * let the handshake figure out if we maybe can RESEND,
 491                  * and do not resume/fail pending requests here.
 492                  * Worst case is we stay suspended for now, which may be
 493                  * resolved by either re-establishing the replication link, or
 494                  * the next link failure, or eventually the administrator.  */
 495                 spin_unlock_irq(&resource->req_lock);
 496                 return false;
 497
 498         case FP_DONT_CARE:
 499                 return true;
 500         default: ;
 501         }
 502
 503         r = conn_khelper(connection, "fence-peer");
 504
 505         switch ((r>>8) & 0xff) {
 506         case P_INCONSISTENT: /* peer is inconsistent */
 507                 ex_to_string = "peer is inconsistent or worse";
 508                 mask.pdsk = D_MASK;
 509                 val.pdsk = D_INCONSISTENT;
 510                 break;
 511         case P_OUTDATED: /* peer got outdated, or was already outdated */
 512                 ex_to_string = "peer was fenced";
 513                 mask.pdsk = D_MASK;
 514                 val.pdsk = D_OUTDATED;
 515                 break;
 516         case P_DOWN: /* peer was down */
 517                 if (conn_highest_disk(connection) == D_UP_TO_DATE) {
 518                         /* we will(have) create(d) a new UUID anyways... */
 519                         ex_to_string = "peer is unreachable, assumed to be dead";
 520                         mask.pdsk = D_MASK;
 521                         val.pdsk = D_OUTDATED;
 522                 } else {
 523                         ex_to_string = "peer unreachable, doing nothing since disk != UpToDate";
 524                 }
 525                 break;
 526         case P_PRIMARY: /* Peer is primary, voluntarily outdate myself.
 527                  * This is useful when an unconnected R_SECONDARY is asked to
 528                  * become R_PRIMARY, but finds the other peer being active. */
 529                 ex_to_string = "peer is active";
 530                 drbd_warn(connection, "Peer is primary, outdating myself.\n");
 531                 mask.disk = D_MASK;
 532                 val.disk = D_OUTDATED;
 533                 break;
 534         case P_FENCING:
 535                 /* THINK: do we need to handle this
 536                  * like case 4, or more like case 5? */
 537                 if (fp != FP_STONITH)
 538                         drbd_err(connection, "fence-peer() = 7 && fencing != Stonith !!!\n");
 539                 ex_to_string = "peer was stonithed";
 540                 mask.pdsk = D_MASK;
 541                 val.pdsk = D_OUTDATED;
 542                 break;
 543         default:
 544                 /* The script is broken ... */
 545                 drbd_err(connection, "fence-peer helper broken, returned %d\n", (r>>8)&0xff);
 546                 return false; /* Eventually leave IO frozen */
 547         }
 548
 549         drbd_info(connection, "fence-peer helper returned %d (%s)\n",
 550                   (r>>8) & 0xff, ex_to_string);
 551
 552         /* Not using
 553            conn_request_state(connection, mask, val, CS_VERBOSE);
 554            here, because we might were able to re-establish the connection in the
 555            meantime. */
 556         spin_lock_irq(&resource->req_lock);
 557         if (connection->cstate < C_WF_REPORT_PARAMS && !test_bit(STATE_SENT, &connection->flags)) {
 558                 if (connection->connect_cnt != connect_cnt)
 559                         /* In case the connection was established and droped
 560                            while the fence-peer handler was running, ignore it */
 561                         drbd_info(connection, "Ignoring fence-peer exit code\n");
 562                 else
 563                         _conn_request_state(connection, mask, val, CS_VERBOSE);
 564         }
 565         spin_unlock_irq(&resource->req_lock);
 566
 567         return conn_highest_pdsk(connection) <= D_OUTDATED;
 568 }
 569
 570 static int _try_outdate_peer_async(void *data)
 571 {
 572         struct drbd_connection *connection = (struct drbd_connection *)data;
 573
 574         conn_try_outdate_peer(connection);
 575
 576         kref_put(&connection->kref, drbd_destroy_connection);
 577         return 0;
 578 }
 579
 580 void conn_try_outdate_peer_async(struct drbd_connection *connection)
 581 {
 582         struct task_struct *opa;
 583
 584         kref_get(&connection->kref);
 585         /* We may just have force_sig()'ed this thread
 586          * to get it out of some blocking network function.
 587          * Clear signals; otherwise kthread_run(), which internally uses
 588          * wait_on_completion_killable(), will mistake our pending signal
 589          * for a new fatal signal and fail. */
 590         flush_signals(current);
 591         opa = kthread_run(_try_outdate_peer_async, connection, "drbd_async_h");
 592         if (IS_ERR(opa)) {
 593                 drbd_err(connection, "out of mem, failed to invoke fence-peer helper\n");
 594                 kref_put(&connection->kref, drbd_destroy_connection);
 595         }
 596 }
 597
 598 enum drbd_state_rv
 599 drbd_set_role(struct drbd_device *const device, enum drbd_role new_role, int force)
 600 {
 601         struct drbd_peer_device *const peer_device = first_peer_device(device);
 602         struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL;
 603         const int max_tries = 4;
 604         enum drbd_state_rv rv = SS_UNKNOWN_ERROR;
 605         struct net_conf *nc;
 606         int try = 0;
 607         int forced = 0;
 608         union drbd_state mask, val;
 609
 610         if (new_role == R_PRIMARY) {
 611                 struct drbd_connection *connection;
 612
 613                 /* Detect dead peers as soon as possible.  */
 614
 615                 rcu_read_lock();
 616                 for_each_connection(connection, device->resource)
 617                         request_ping(connection);
 618                 rcu_read_unlock();
 619         }
 620
 621         mutex_lock(device->state_mutex);
 622
 623         mask.i = 0; mask.role = R_MASK;
 624         val.i  = 0; val.role  = new_role;
 625
 626         while (try++ < max_tries) {
 627                 rv = _drbd_request_state_holding_state_mutex(device, mask, val, CS_WAIT_COMPLETE);
 628
 629                 /* in case we first succeeded to outdate,
 630                  * but now suddenly could establish a connection */
 631                 if (rv == SS_CW_FAILED_BY_PEER && mask.pdsk != 0) {
 632                         val.pdsk = 0;
 633                         mask.pdsk = 0;
 634                         continue;
 635                 }
 636
 637                 if (rv == SS_NO_UP_TO_DATE_DISK && force &&
 638                     (device->state.disk < D_UP_TO_DATE &&
 639                      device->state.disk >= D_INCONSISTENT)) {
 640                         mask.disk = D_MASK;
 641                         val.disk  = D_UP_TO_DATE;
 642                         forced = 1;
 643                         continue;
 644                 }
 645
 646                 if (rv == SS_NO_UP_TO_DATE_DISK &&
 647                     device->state.disk == D_CONSISTENT && mask.pdsk == 0) {
 648                         D_ASSERT(device, device->state.pdsk == D_UNKNOWN);
 649
 650                         if (conn_try_outdate_peer(connection)) {
 651                                 val.disk = D_UP_TO_DATE;
 652                                 mask.disk = D_MASK;
 653                         }
 654                         continue;
 655                 }
 656
 657                 if (rv == SS_NOTHING_TO_DO)
 658                         goto out;
 659                 if (rv == SS_PRIMARY_NOP && mask.pdsk == 0) {
 660                         if (!conn_try_outdate_peer(connection) && force) {
 661                                 drbd_warn(device, "Forced into split brain situation!\n");
 662                                 mask.pdsk = D_MASK;
 663                                 val.pdsk  = D_OUTDATED;
 664
 665                         }
 666                         continue;
 667                 }
 668                 if (rv == SS_TWO_PRIMARIES) {
 669                         /* Maybe the peer is detected as dead very soon...
 670                            retry at most once more in this case. */
 671                         int timeo;
 672                         rcu_read_lock();
 673                         nc = rcu_dereference(connection->net_conf);
 674                         timeo = nc ? (nc->ping_timeo + 1) * HZ / 10 : 1;
 675                         rcu_read_unlock();
 676                         schedule_timeout_interruptible(timeo);
 677                         if (try < max_tries)
 678                                 try = max_tries - 1;
 679                         continue;
 680                 }
 681                 if (rv < SS_SUCCESS) {
 682                         rv = _drbd_request_state(device, mask, val,
 683                                                 CS_VERBOSE + CS_WAIT_COMPLETE);
 684                         if (rv < SS_SUCCESS)
 685                                 goto out;
 686                 }
 687                 break;
 688         }
 689
 690         if (rv < SS_SUCCESS)
 691                 goto out;
 692
 693         if (forced)
 694                 drbd_warn(device, "Forced to consider local data as UpToDate!\n");
 695
 696         /* Wait until nothing is on the fly :) */
 697         wait_event(device->misc_wait, atomic_read(&device->ap_pending_cnt) == 0);
 698
 699         /* FIXME also wait for all pending P_BARRIER_ACK? */
 700
 701         if (new_role == R_SECONDARY) {
 702                 if (get_ldev(device)) {
 703                         device->ldev->md.uuid[UI_CURRENT] &= ~(u64)1;
 704                         put_ldev(device);
 705                 }
 706         } else {
 707                 mutex_lock(&device->resource->conf_update);
 708                 nc = connection->net_conf;
 709                 if (nc)
 710                         nc->discard_my_data = 0; /* without copy; single bit op is atomic */
 711                 mutex_unlock(&device->resource->conf_update);
 712
 713                 if (get_ldev(device)) {
 714                         if (((device->state.conn < C_CONNECTED ||
 715                                device->state.pdsk <= D_FAILED)
 716                               && device->ldev->md.uuid[UI_BITMAP] == 0) || forced)
 717                                 drbd_uuid_new_current(device);
 718
 719                         device->ldev->md.uuid[UI_CURRENT] |=  (u64)1;
 720                         put_ldev(device);
 721                 }
 722         }
 723
 724         /* writeout of activity log covered areas of the bitmap
 725          * to stable storage done in after state change already */
 726
 727         if (device->state.conn >= C_WF_REPORT_PARAMS) {
 728                 /* if this was forced, we should consider sync */
 729                 if (forced)
 730                         drbd_send_uuids(peer_device);
 731                 drbd_send_current_state(peer_device);
 732         }
 733
 734         drbd_md_sync(device);
 735         set_disk_ro(device->vdisk, new_role == R_SECONDARY);
 736         kobject_uevent(&disk_to_dev(device->vdisk)->kobj, KOBJ_CHANGE);
 737 out:
 738         mutex_unlock(device->state_mutex);
 739         return rv;
 740 }
 741
 742 static const char *from_attrs_err_to_txt(int err)
 743 {
 744         return  err == -ENOMSG ? "required attribute missing" :
 745                 err == -EOPNOTSUPP ? "unknown mandatory attribute" :
 746                 err == -EEXIST ? "can not change invariant setting" :
 747                 "invalid attribute value";
 748 }
 749
 750 int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info)
 751 {
 752         struct drbd_config_context adm_ctx;
 753         struct set_role_parms parms;
 754         int err;
 755         enum drbd_ret_code retcode;
 756
 757         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
 758         if (!adm_ctx.reply_skb)
 759                 return retcode;
 760         if (retcode != NO_ERROR)
 761                 goto out;
 762
 763         memset(&parms, 0, sizeof(parms));
 764         if (info->attrs[DRBD_NLA_SET_ROLE_PARMS]) {
 765                 err = set_role_parms_from_attrs(&parms, info);
 766                 if (err) {
 767                         retcode = ERR_MANDATORY_TAG;
 768                         drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
 769                         goto out;
 770                 }
 771         }
 772         genl_unlock();
 773         mutex_lock(&adm_ctx.resource->adm_mutex);
 774
 775         if (info->genlhdr->cmd == DRBD_ADM_PRIMARY)
 776                 retcode = drbd_set_role(adm_ctx.device, R_PRIMARY, parms.assume_uptodate);
 777         else
 778                 retcode = drbd_set_role(adm_ctx.device, R_SECONDARY, 0);
 779
 780         mutex_unlock(&adm_ctx.resource->adm_mutex);
 781         genl_lock();
 782 out:
 783         drbd_adm_finish(&adm_ctx, info, retcode);
 784         return 0;
 785 }
 786
 787 /* Initializes the md.*_offset members, so we are able to find
 788  * the on disk meta data.
 789  *
 790  * We currently have two possible layouts:
 791  * external:
 792  *   |----------- md_size_sect ------------------|
 793  *   [ 4k superblock ][ activity log ][  Bitmap  ]
 794  *   | al_offset == 8 |
 795  *   | bm_offset = al_offset + X      |
 796  *  ==> bitmap sectors = md_size_sect - bm_offset
 797  *
 798  * internal:
 799  *            |----------- md_size_sect ------------------|
 800  * [data.....][  Bitmap  ][ activity log ][ 4k superblock ]
 801  *                        | al_offset < 0 |
 802  *            | bm_offset = al_offset - Y |
 803  *  ==> bitmap sectors = Y = al_offset - bm_offset
 804  *
 805  *  Activity log size used to be fixed 32kB,
 806  *  but is about to become configurable.
 807  */
 808 static void drbd_md_set_sector_offsets(struct drbd_device *device,
 809                                        struct drbd_backing_dev *bdev)
 810 {
 811         sector_t md_size_sect = 0;
 812         unsigned int al_size_sect = bdev->md.al_size_4k * 8;
 813
 814         bdev->md.md_offset = drbd_md_ss(bdev);
 815
 816         switch (bdev->md.meta_dev_idx) {
 817         default:
 818                 /* v07 style fixed size indexed meta data */
 819                 bdev->md.md_size_sect = MD_128MB_SECT;
 820                 bdev->md.al_offset = MD_4kB_SECT;
 821                 bdev->md.bm_offset = MD_4kB_SECT + al_size_sect;
 822                 break;
 823         case DRBD_MD_INDEX_FLEX_EXT:
 824                 /* just occupy the full device; unit: sectors */
 825                 bdev->md.md_size_sect = drbd_get_capacity(bdev->md_bdev);
 826                 bdev->md.al_offset = MD_4kB_SECT;
 827                 bdev->md.bm_offset = MD_4kB_SECT + al_size_sect;
 828                 break;
 829         case DRBD_MD_INDEX_INTERNAL:
 830         case DRBD_MD_INDEX_FLEX_INT:
 831                 /* al size is still fixed */
 832                 bdev->md.al_offset = -al_size_sect;
 833                 /* we need (slightly less than) ~ this much bitmap sectors: */
 834                 md_size_sect = drbd_get_capacity(bdev->backing_bdev);
 835                 md_size_sect = ALIGN(md_size_sect, BM_SECT_PER_EXT);
 836                 md_size_sect = BM_SECT_TO_EXT(md_size_sect);
 837                 md_size_sect = ALIGN(md_size_sect, 8);
 838
 839                 /* plus the "drbd meta data super block",
 840                  * and the activity log; */
 841                 md_size_sect += MD_4kB_SECT + al_size_sect;
 842
 843                 bdev->md.md_size_sect = md_size_sect;
 844                 /* bitmap offset is adjusted by 'super' block size */
 845                 bdev->md.bm_offset   = -md_size_sect + MD_4kB_SECT;
 846                 break;
 847         }
 848 }
 849
 850 /* input size is expected to be in KB */
 851 char *ppsize(char *buf, unsigned long long size)
 852 {
 853         /* Needs 9 bytes at max including trailing NUL:
 854          * -1ULL ==> "16384 EB" */
 855         static char units[] = { 'K', 'M', 'G', 'T', 'P', 'E' };
 856         int base = 0;
 857         while (size >= 10000 && base < sizeof(units)-1) {
 858                 /* shift + round */
 859                 size = (size >> 10) + !!(size & (1<<9));
 860                 base++;
 861         }
 862         sprintf(buf, "%u %cB", (unsigned)size, units[base]);
 863
 864         return buf;
 865 }
 866
 867 /* there is still a theoretical deadlock when called from receiver
 868  * on an D_INCONSISTENT R_PRIMARY:
 869  *  remote READ does inc_ap_bio, receiver would need to receive answer
 870  *  packet from remote to dec_ap_bio again.
 871  *  receiver receive_sizes(), comes here,
 872  *  waits for ap_bio_cnt == 0. -> deadlock.
 873  * but this cannot happen, actually, because:
 874  *  R_PRIMARY D_INCONSISTENT, and peer's disk is unreachable
 875  *  (not connected, or bad/no disk on peer):
 876  *  see drbd_fail_request_early, ap_bio_cnt is zero.
 877  *  R_PRIMARY D_INCONSISTENT, and C_SYNC_TARGET:
 878  *  peer may not initiate a resize.
 879  */
 880 /* Note these are not to be confused with
 881  * drbd_adm_suspend_io/drbd_adm_resume_io,
 882  * which are (sub) state changes triggered by admin (drbdsetup),
 883  * and can be long lived.
 884  * This changes an device->flag, is triggered by drbd internals,
 885  * and should be short-lived. */
 886 /* It needs to be a counter, since multiple threads might
 887    independently suspend and resume IO. */
 888 void drbd_suspend_io(struct drbd_device *device)
 889 {
 890         atomic_inc(&device->suspend_cnt);
 891         if (drbd_suspended(device))
 892                 return;
 893         wait_event(device->misc_wait, !atomic_read(&device->ap_bio_cnt));
 894 }
 895
 896 void drbd_resume_io(struct drbd_device *device)
 897 {
 898         if (atomic_dec_and_test(&device->suspend_cnt))
 899                 wake_up(&device->misc_wait);
 900 }
 901
 902 /**
 903  * drbd_determine_dev_size() -  Sets the right device size obeying all constraints
 904  * @device:     DRBD device.
 905  *
 906  * Returns 0 on success, negative return values indicate errors.
 907  * You should call drbd_md_sync() after calling this function.
 908  */
 909 enum determine_dev_size
 910 drbd_determine_dev_size(struct drbd_device *device, enum dds_flags flags, struct resize_parms *rs) __must_hold(local)
 911 {
 912         struct md_offsets_and_sizes {
 913                 u64 last_agreed_sect;
 914                 u64 md_offset;
 915                 s32 al_offset;
 916                 s32 bm_offset;
 917                 u32 md_size_sect;
 918
 919                 u32 al_stripes;
 920                 u32 al_stripe_size_4k;
 921         } prev;
 922         sector_t u_size, size;
 923         struct drbd_md *md = &device->ldev->md;
 924         char ppb[10];
 925         void *buffer;
 926
 927         int md_moved, la_size_changed;
 928         enum determine_dev_size rv = DS_UNCHANGED;
 929
 930         /* We may change the on-disk offsets of our meta data below.  Lock out
 931          * anything that may cause meta data IO, to avoid acting on incomplete
 932          * layout changes or scribbling over meta data that is in the process
 933          * of being moved.
 934          *
 935          * Move is not exactly correct, btw, currently we have all our meta
 936          * data in core memory, to "move" it we just write it all out, there
 937          * are no reads. */
 938         drbd_suspend_io(device);
 939         buffer = drbd_md_get_buffer(device, __func__); /* Lock meta-data IO */
 940         if (!buffer) {
 941                 drbd_resume_io(device);
 942                 return DS_ERROR;
 943         }
 944
 945         /* remember current offset and sizes */
 946         prev.last_agreed_sect = md->la_size_sect;
 947         prev.md_offset = md->md_offset;
 948         prev.al_offset = md->al_offset;
 949         prev.bm_offset = md->bm_offset;
 950         prev.md_size_sect = md->md_size_sect;
 951         prev.al_stripes = md->al_stripes;
 952         prev.al_stripe_size_4k = md->al_stripe_size_4k;
 953
 954         if (rs) {
 955                 /* rs is non NULL if we should change the AL layout only */
 956                 md->al_stripes = rs->al_stripes;
 957                 md->al_stripe_size_4k = rs->al_stripe_size / 4;
 958                 md->al_size_4k = (u64)rs->al_stripes * rs->al_stripe_size / 4;
 959         }
 960
 961         drbd_md_set_sector_offsets(device, device->ldev);
 962
 963         rcu_read_lock();
 964         u_size = rcu_dereference(device->ldev->disk_conf)->disk_size;
 965         rcu_read_unlock();
 966         size = drbd_new_dev_size(device, device->ldev, u_size, flags & DDSF_FORCED);
 967
 968         if (size < prev.last_agreed_sect) {
 969                 if (rs && u_size == 0) {
 970                         /* Remove "rs &&" later. This check should always be active, but
 971                            right now the receiver expects the permissive behavior */
 972                         drbd_warn(device, "Implicit shrink not allowed. "
 973                                  "Use --size=%llus for explicit shrink.\n",
 974                                  (unsigned long long)size);
 975                         rv = DS_ERROR_SHRINK;
 976                 }
 977                 if (u_size > size)
 978                         rv = DS_ERROR_SPACE_MD;
 979                 if (rv != DS_UNCHANGED)
 980                         goto err_out;
 981         }
 982
 983         if (drbd_get_capacity(device->this_bdev) != size ||
 984             drbd_bm_capacity(device) != size) {
 985                 int err;
 986                 err = drbd_bm_resize(device, size, !(flags & DDSF_NO_RESYNC));
 987                 if (unlikely(err)) {
 988                         /* currently there is only one error: ENOMEM! */
 989                         size = drbd_bm_capacity(device);
 990                         if (size == 0) {
 991                                 drbd_err(device, "OUT OF MEMORY! "
 992                                     "Could not allocate bitmap!\n");
 993                         } else {
 994                                 drbd_err(device, "BM resizing failed. "
 995                                     "Leaving size unchanged\n");
 996                         }
 997                         rv = DS_ERROR;
 998                 }
 999                 /* racy, see comments above. */
1000                 drbd_set_my_capacity(device, size);
1001                 md->la_size_sect = size;
1002                 drbd_info(device, "size = %s (%llu KB)\n", ppsize(ppb, size>>1),
1003                      (unsigned long long)size>>1);
1004         }
1005         if (rv <= DS_ERROR)
1006                 goto err_out;
1007
1008         la_size_changed = (prev.last_agreed_sect != md->la_size_sect);
1009
1010         md_moved = prev.md_offset    != md->md_offset
1011                 || prev.md_size_sect != md->md_size_sect;
1012
1013         if (la_size_changed || md_moved || rs) {
1014                 u32 prev_flags;
1015
1016                 /* We do some synchronous IO below, which may take some time.
1017                  * Clear the timer, to avoid scary "timer expired!" messages,
1018                  * "Superblock" is written out at least twice below, anyways. */
1019                 del_timer(&device->md_sync_timer);
1020
1021                 /* We won't change the "al-extents" setting, we just may need
1022                  * to move the on-disk location of the activity log ringbuffer.
1023                  * Lock for transaction is good enough, it may well be "dirty"
1024                  * or even "starving". */
1025                 wait_event(device->al_wait, lc_try_lock_for_transaction(device->act_log));
1026
1027                 /* mark current on-disk bitmap and activity log as unreliable */
1028                 prev_flags = md->flags;
1029                 md->flags |= MDF_FULL_SYNC | MDF_AL_DISABLED;
1030                 drbd_md_write(device, buffer);
1031
1032                 drbd_al_initialize(device, buffer);
1033
1034                 drbd_info(device, "Writing the whole bitmap, %s\n",
1035                          la_size_changed && md_moved ? "size changed and md moved" :
1036                          la_size_changed ? "size changed" : "md moved");
1037                 /* next line implicitly does drbd_suspend_io()+drbd_resume_io() */
1038                 drbd_bitmap_io(device, md_moved ? &drbd_bm_write_all : &drbd_bm_write,
1039                                "size changed", BM_LOCKED_MASK);
1040
1041                 /* on-disk bitmap and activity log is authoritative again
1042                  * (unless there was an IO error meanwhile...) */
1043                 md->flags = prev_flags;
1044                 drbd_md_write(device, buffer);
1045
1046                 if (rs)
1047                         drbd_info(device, "Changed AL layout to al-stripes = %d, al-stripe-size-kB = %d\n",
1048                                   md->al_stripes, md->al_stripe_size_4k * 4);
1049         }
1050
1051         if (size > prev.last_agreed_sect)
1052                 rv = prev.last_agreed_sect ? DS_GREW : DS_GREW_FROM_ZERO;
1053         if (size < prev.last_agreed_sect)
1054                 rv = DS_SHRUNK;
1055
1056         if (0) {
1057         err_out:
1058                 /* restore previous offset and sizes */
1059                 md->la_size_sect = prev.last_agreed_sect;
1060                 md->md_offset = prev.md_offset;
1061                 md->al_offset = prev.al_offset;
1062                 md->bm_offset = prev.bm_offset;
1063                 md->md_size_sect = prev.md_size_sect;
1064                 md->al_stripes = prev.al_stripes;
1065                 md->al_stripe_size_4k = prev.al_stripe_size_4k;
1066                 md->al_size_4k = (u64)prev.al_stripes * prev.al_stripe_size_4k;
1067         }
1068         lc_unlock(device->act_log);
1069         wake_up(&device->al_wait);
1070         drbd_md_put_buffer(device);
1071         drbd_resume_io(device);
1072
1073         return rv;
1074 }
1075
1076 sector_t
1077 drbd_new_dev_size(struct drbd_device *device, struct drbd_backing_dev *bdev,
1078                   sector_t u_size, int assume_peer_has_space)
1079 {
1080         sector_t p_size = device->p_size;   /* partner's disk size. */
1081         sector_t la_size_sect = bdev->md.la_size_sect; /* last agreed size. */
1082         sector_t m_size; /* my size */
1083         sector_t size = 0;
1084
1085         m_size = drbd_get_max_capacity(bdev);
1086
1087         if (device->state.conn < C_CONNECTED && assume_peer_has_space) {
1088                 drbd_warn(device, "Resize while not connected was forced by the user!\n");
1089                 p_size = m_size;
1090         }
1091
1092         if (p_size && m_size) {
1093                 size = min_t(sector_t, p_size, m_size);
1094         } else {
1095                 if (la_size_sect) {
1096                         size = la_size_sect;
1097                         if (m_size && m_size < size)
1098                                 size = m_size;
1099                         if (p_size && p_size < size)
1100                                 size = p_size;
1101                 } else {
1102                         if (m_size)
1103                                 size = m_size;
1104                         if (p_size)
1105                                 size = p_size;
1106                 }
1107         }
1108
1109         if (size == 0)
1110                 drbd_err(device, "Both nodes diskless!\n");
1111
1112         if (u_size) {
1113                 if (u_size > size)
1114                         drbd_err(device, "Requested disk size is too big (%lu > %lu)\n",
1115                             (unsigned long)u_size>>1, (unsigned long)size>>1);
1116                 else
1117                         size = u_size;
1118         }
1119
1120         return size;
1121 }
1122
1123 /**
1124  * drbd_check_al_size() - Ensures that the AL is of the right size
1125  * @device:     DRBD device.
1126  *
1127  * Returns -EBUSY if current al lru is still used, -ENOMEM when allocation
1128  * failed, and 0 on success. You should call drbd_md_sync() after you called
1129  * this function.
1130  */
1131 static int drbd_check_al_size(struct drbd_device *device, struct disk_conf *dc)
1132 {
1133         struct lru_cache *n, *t;
1134         struct lc_element *e;
1135         unsigned int in_use;
1136         int i;
1137
1138         if (device->act_log &&
1139             device->act_log->nr_elements == dc->al_extents)
1140                 return 0;
1141
1142         in_use = 0;
1143         t = device->act_log;
1144         n = lc_create("act_log", drbd_al_ext_cache, AL_UPDATES_PER_TRANSACTION,
1145                 dc->al_extents, sizeof(struct lc_element), 0);
1146
1147         if (n == NULL) {
1148                 drbd_err(device, "Cannot allocate act_log lru!\n");
1149                 return -ENOMEM;
1150         }
1151         spin_lock_irq(&device->al_lock);
1152         if (t) {
1153                 for (i = 0; i < t->nr_elements; i++) {
1154                         e = lc_element_by_index(t, i);
1155                         if (e->refcnt)
1156                                 drbd_err(device, "refcnt(%d)==%d\n",
1157                                     e->lc_number, e->refcnt);
1158                         in_use += e->refcnt;
1159                 }
1160         }
1161         if (!in_use)
1162                 device->act_log = n;
1163         spin_unlock_irq(&device->al_lock);
1164         if (in_use) {
1165                 drbd_err(device, "Activity log still in use!\n");
1166                 lc_destroy(n);
1167                 return -EBUSY;
1168         } else {
1169                 lc_destroy(t);
1170         }
1171         drbd_md_mark_dirty(device); /* we changed device->act_log->nr_elemens */
1172         return 0;
1173 }
1174
1175 static void blk_queue_discard_granularity(struct request_queue *q, unsigned int granularity)
1176 {
1177         q->limits.discard_granularity = granularity;
1178 }
1179
1180 static unsigned int drbd_max_discard_sectors(struct drbd_connection *connection)
1181 {
1182         /* when we introduced REQ_WRITE_SAME support, we also bumped
1183          * our maximum supported batch bio size used for discards. */
1184         if (connection->agreed_features & DRBD_FF_WSAME)
1185                 return DRBD_MAX_BBIO_SECTORS;
1186         /* before, with DRBD <= 8.4.6, we only allowed up to one AL_EXTENT_SIZE. */
1187         return AL_EXTENT_SIZE >> 9;
1188 }
1189
1190 static void decide_on_discard_support(struct drbd_device *device,
1191                         struct request_queue *q,
1192                         struct request_queue *b,
1193                         bool discard_zeroes_if_aligned)
1194 {
1195         /* q = drbd device queue (device->rq_queue)
1196          * b = backing device queue (device->ldev->backing_bdev->bd_disk->queue),
1197          *     or NULL if diskless
1198          */
1199         struct drbd_connection *connection = first_peer_device(device)->connection;
1200         bool can_do = b ? blk_queue_discard(b) : true;
1201
1202         if (can_do && b && !b->limits.discard_zeroes_data && !discard_zeroes_if_aligned) {
1203                 can_do = false;
1204                 drbd_info(device, "discard_zeroes_data=0 and discard_zeroes_if_aligned=no: disabling discards\n");
1205         }
1206         if (can_do && connection->cstate >= C_CONNECTED && !(connection->agreed_features & DRBD_FF_TRIM)) {
1207                 can_do = false;
1208                 drbd_info(connection, "peer DRBD too old, does not support TRIM: disabling discards\n");
1209         }
1210         if (can_do) {
1211                 /* We don't care for the granularity, really.
1212                  * Stacking limits below should fix it for the local
1213                  * device.  Whether or not it is a suitable granularity
1214                  * on the remote device is not our problem, really. If
1215                  * you care, you need to use devices with similar
1216                  * topology on all peers. */
1217                 blk_queue_discard_granularity(q, 512);
1218                 q->limits.max_discard_sectors = drbd_max_discard_sectors(connection);
1219                 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
1220         } else {
1221                 queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q);
1222                 blk_queue_discard_granularity(q, 0);
1223                 q->limits.max_discard_sectors = 0;
1224         }
1225 }
1226
1227 static void fixup_discard_if_not_supported(struct request_queue *q)
1228 {
1229         /* To avoid confusion, if this queue does not support discard, clear
1230          * max_discard_sectors, which is what lsblk -D reports to the user.
1231          * Older kernels got this wrong in "stack limits".
1232          * */
1233         if (!blk_queue_discard(q)) {
1234                 blk_queue_max_discard_sectors(q, 0);
1235                 blk_queue_discard_granularity(q, 0);
1236         }
1237 }
1238
1239 static void decide_on_write_same_support(struct drbd_device *device,
1240                         struct request_queue *q,
1241                         struct request_queue *b, struct o_qlim *o)
1242 {
1243         struct drbd_peer_device *peer_device = first_peer_device(device);
1244         struct drbd_connection *connection = peer_device->connection;
1245         bool can_do = b ? b->limits.max_write_same_sectors : true;
1246
1247         if (can_do && connection->cstate >= C_CONNECTED && !(connection->agreed_features & DRBD_FF_WSAME)) {
1248                 can_do = false;
1249                 drbd_info(peer_device, "peer does not support WRITE_SAME\n");
1250         }
1251
1252         if (o) {
1253                 /* logical block size; queue_logical_block_size(NULL) is 512 */
1254                 unsigned int peer_lbs = be32_to_cpu(o->logical_block_size);
1255                 unsigned int me_lbs_b = queue_logical_block_size(b);
1256                 unsigned int me_lbs = queue_logical_block_size(q);
1257
1258                 if (me_lbs_b != me_lbs) {
1259                         drbd_warn(device,
1260                                 "logical block size of local backend does not match (drbd:%u, backend:%u); was this a late attach?\n",
1261                                 me_lbs, me_lbs_b);
1262                         /* rather disable write same than trigger some BUG_ON later in the scsi layer. */
1263                         can_do = false;
1264                 }
1265                 if (me_lbs_b != peer_lbs) {
1266                         drbd_warn(peer_device, "logical block sizes do not match (me:%u, peer:%u); this may cause problems.\n",
1267                                 me_lbs, peer_lbs);
1268                         if (can_do) {
1269                                 drbd_dbg(peer_device, "logical block size mismatch: WRITE_SAME disabled.\n");
1270                                 can_do = false;
1271                         }
1272                         me_lbs = max(me_lbs, me_lbs_b);
1273                         /* We cannot change the logical block size of an in-use queue.
1274                          * We can only hope that access happens to be properly aligned.
1275                          * If not, the peer will likely produce an IO error, and detach. */
1276                         if (peer_lbs > me_lbs) {
1277                                 if (device->state.role != R_PRIMARY) {
1278                                         blk_queue_logical_block_size(q, peer_lbs);
1279                                         drbd_warn(peer_device, "logical block size set to %u\n", peer_lbs);
1280                                 } else {
1281                                         drbd_warn(peer_device,
1282                                                 "current Primary must NOT adjust logical block size (%u -> %u); hope for the best.\n",
1283                                                 me_lbs, peer_lbs);
1284                                 }
1285                         }
1286                 }
1287                 if (can_do && !o->write_same_capable) {
1288                         /* If we introduce an open-coded write-same loop on the receiving side,
1289                          * the peer would present itself as "capable". */
1290                         drbd_dbg(peer_device, "WRITE_SAME disabled (peer device not capable)\n");
1291                         can_do = false;
1292                 }
1293         }
1294
1295         blk_queue_max_write_same_sectors(q, can_do ? DRBD_MAX_BBIO_SECTORS : 0);
1296 }
1297
1298 static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backing_dev *bdev,
1299                                    unsigned int max_bio_size, struct o_qlim *o)
1300 {
1301         struct request_queue * const q = device->rq_queue;
1302         unsigned int max_hw_sectors = max_bio_size >> 9;
1303         unsigned int max_segments = 0;
1304         struct request_queue *b = NULL;
1305         struct disk_conf *dc;
1306         bool discard_zeroes_if_aligned = true;
1307
1308         if (bdev) {
1309                 b = bdev->backing_bdev->bd_disk->queue;
1310
1311                 max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9);
1312                 rcu_read_lock();
1313                 dc = rcu_dereference(device->ldev->disk_conf);
1314                 max_segments = dc->max_bio_bvecs;
1315                 discard_zeroes_if_aligned = dc->discard_zeroes_if_aligned;
1316                 rcu_read_unlock();
1317
1318                 blk_set_stacking_limits(&q->limits);
1319         }
1320
1321         blk_queue_max_hw_sectors(q, max_hw_sectors);
1322         /* This is the workaround for "bio would need to, but cannot, be split" */
1323         blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS);
1324         blk_queue_segment_boundary(q, PAGE_SIZE-1);
1325         decide_on_discard_support(device, q, b, discard_zeroes_if_aligned);
1326         decide_on_write_same_support(device, q, b, o);
1327
1328         if (b) {
1329                 blk_queue_stack_limits(q, b);
1330
1331                 if (q->backing_dev_info->ra_pages !=
1332                     b->backing_dev_info->ra_pages) {
1333                         drbd_info(device, "Adjusting my ra_pages to backing device's (%lu -> %lu)\n",
1334                                  q->backing_dev_info->ra_pages,
1335                                  b->backing_dev_info->ra_pages);
1336                         q->backing_dev_info->ra_pages =
1337                                                 b->backing_dev_info->ra_pages;
1338                 }
1339         }
1340         fixup_discard_if_not_supported(q);
1341 }
1342
1343 void drbd_reconsider_queue_parameters(struct drbd_device *device, struct drbd_backing_dev *bdev, struct o_qlim *o)
1344 {
1345         unsigned int now, new, local, peer;
1346
1347         now = queue_max_hw_sectors(device->rq_queue) << 9;
1348         local = device->local_max_bio_size; /* Eventually last known value, from volatile memory */
1349         peer = device->peer_max_bio_size; /* Eventually last known value, from meta data */
1350
1351         if (bdev) {
1352                 local = queue_max_hw_sectors(bdev->backing_bdev->bd_disk->queue) << 9;
1353                 device->local_max_bio_size = local;
1354         }
1355         local = min(local, DRBD_MAX_BIO_SIZE);
1356
1357         /* We may ignore peer limits if the peer is modern enough.
1358            Because new from 8.3.8 onwards the peer can use multiple
1359            BIOs for a single peer_request */
1360         if (device->state.conn >= C_WF_REPORT_PARAMS) {
1361                 if (first_peer_device(device)->connection->agreed_pro_version < 94)
1362                         peer = min(device->peer_max_bio_size, DRBD_MAX_SIZE_H80_PACKET);
1363                         /* Correct old drbd (up to 8.3.7) if it believes it can do more than 32KiB */
1364                 else if (first_peer_device(device)->connection->agreed_pro_version == 94)
1365                         peer = DRBD_MAX_SIZE_H80_PACKET;
1366                 else if (first_peer_device(device)->connection->agreed_pro_version < 100)
1367                         peer = DRBD_MAX_BIO_SIZE_P95;  /* drbd 8.3.8 onwards, before 8.4.0 */
1368                 else
1369                         peer = DRBD_MAX_BIO_SIZE;
1370
1371                 /* We may later detach and re-attach on a disconnected Primary.
1372                  * Avoid this setting to jump back in that case.
1373                  * We want to store what we know the peer DRBD can handle,
1374                  * not what the peer IO backend can handle. */
1375                 if (peer > device->peer_max_bio_size)
1376                         device->peer_max_bio_size = peer;
1377         }
1378         new = min(local, peer);
1379
1380         if (device->state.role == R_PRIMARY && new < now)
1381                 drbd_err(device, "ASSERT FAILED new < now; (%u < %u)\n", new, now);
1382
1383         if (new != now)
1384                 drbd_info(device, "max BIO size = %u\n", new);
1385
1386         drbd_setup_queue_param(device, bdev, new, o);
1387 }
1388
1389 /* Starts the worker thread */
1390 static void conn_reconfig_start(struct drbd_connection *connection)
1391 {
1392         drbd_thread_start(&connection->worker);
1393         drbd_flush_workqueue(&connection->sender_work);
1394 }
1395
1396 /* if still unconfigured, stops worker again. */
1397 static void conn_reconfig_done(struct drbd_connection *connection)
1398 {
1399         bool stop_threads;
1400         spin_lock_irq(&connection->resource->req_lock);
1401         stop_threads = conn_all_vols_unconf(connection) &&
1402                 connection->cstate == C_STANDALONE;
1403         spin_unlock_irq(&connection->resource->req_lock);
1404         if (stop_threads) {
1405                 /* ack_receiver thread and ack_sender workqueue are implicitly
1406                  * stopped by receiver in conn_disconnect() */
1407                 drbd_thread_stop(&connection->receiver);
1408                 drbd_thread_stop(&connection->worker);
1409         }
1410 }
1411
1412 /* Make sure IO is suspended before calling this function(). */
1413 static void drbd_suspend_al(struct drbd_device *device)
1414 {
1415         int s = 0;
1416
1417         if (!lc_try_lock(device->act_log)) {
1418                 drbd_warn(device, "Failed to lock al in drbd_suspend_al()\n");
1419                 return;
1420         }
1421
1422         drbd_al_shrink(device);
1423         spin_lock_irq(&device->resource->req_lock);
1424         if (device->state.conn < C_CONNECTED)
1425                 s = !test_and_set_bit(AL_SUSPENDED, &device->flags);
1426         spin_unlock_irq(&device->resource->req_lock);
1427         lc_unlock(device->act_log);
1428
1429         if (s)
1430                 drbd_info(device, "Suspended AL updates\n");
1431 }
1432
1433
1434 static bool should_set_defaults(struct genl_info *info)
1435 {
1436         unsigned flags = ((struct drbd_genlmsghdr*)info->userhdr)->flags;
1437         return 0 != (flags & DRBD_GENL_F_SET_DEFAULTS);
1438 }
1439
1440 static unsigned int drbd_al_extents_max(struct drbd_backing_dev *bdev)
1441 {
1442         /* This is limited by 16 bit "slot" numbers,
1443          * and by available on-disk context storage.
1444          *
1445          * Also (u16)~0 is special (denotes a "free" extent).
1446          *
1447          * One transaction occupies one 4kB on-disk block,
1448          * we have n such blocks in the on disk ring buffer,
1449          * the "current" transaction may fail (n-1),
1450          * and there is 919 slot numbers context information per transaction.
1451          *
1452          * 72 transaction blocks amounts to more than 2**16 context slots,
1453          * so cap there first.
1454          */
1455         const unsigned int max_al_nr = DRBD_AL_EXTENTS_MAX;
1456         const unsigned int sufficient_on_disk =
1457                 (max_al_nr + AL_CONTEXT_PER_TRANSACTION -1)
1458                 /AL_CONTEXT_PER_TRANSACTION;
1459
1460         unsigned int al_size_4k = bdev->md.al_size_4k;
1461
1462         if (al_size_4k > sufficient_on_disk)
1463                 return max_al_nr;
1464
1465         return (al_size_4k - 1) * AL_CONTEXT_PER_TRANSACTION;
1466 }
1467
1468 static bool write_ordering_changed(struct disk_conf *a, struct disk_conf *b)
1469 {
1470         return  a->disk_barrier != b->disk_barrier ||
1471                 a->disk_flushes != b->disk_flushes ||
1472                 a->disk_drain != b->disk_drain;
1473 }
1474
1475 static void sanitize_disk_conf(struct drbd_device *device, struct disk_conf *disk_conf,
1476                                struct drbd_backing_dev *nbc)
1477 {
1478         struct request_queue * const q = nbc->backing_bdev->bd_disk->queue;
1479
1480         if (disk_conf->al_extents < DRBD_AL_EXTENTS_MIN)
1481                 disk_conf->al_extents = DRBD_AL_EXTENTS_MIN;
1482         if (disk_conf->al_extents > drbd_al_extents_max(nbc))
1483                 disk_conf->al_extents = drbd_al_extents_max(nbc);
1484
1485         if (!blk_queue_discard(q)
1486             || (!q->limits.discard_zeroes_data && !disk_conf->discard_zeroes_if_aligned)) {
1487                 if (disk_conf->rs_discard_granularity) {
1488                         disk_conf->rs_discard_granularity = 0; /* disable feature */
1489                         drbd_info(device, "rs_discard_granularity feature disabled\n");
1490                 }
1491         }
1492
1493         if (disk_conf->rs_discard_granularity) {
1494                 int orig_value = disk_conf->rs_discard_granularity;
1495                 int remainder;
1496
1497                 if (q->limits.discard_granularity > disk_conf->rs_discard_granularity)
1498                         disk_conf->rs_discard_granularity = q->limits.discard_granularity;
1499
1500                 remainder = disk_conf->rs_discard_granularity % q->limits.discard_granularity;
1501                 disk_conf->rs_discard_granularity += remainder;
1502
1503                 if (disk_conf->rs_discard_granularity > q->limits.max_discard_sectors << 9)
1504                         disk_conf->rs_discard_granularity = q->limits.max_discard_sectors << 9;
1505
1506                 if (disk_conf->rs_discard_granularity != orig_value)
1507                         drbd_info(device, "rs_discard_granularity changed to %d\n",
1508                                   disk_conf->rs_discard_granularity);
1509         }
1510 }
1511
1512 int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
1513 {
1514         struct drbd_config_context adm_ctx;
1515         enum drbd_ret_code retcode;
1516         struct drbd_device *device;
1517         struct disk_conf *new_disk_conf, *old_disk_conf;
1518         struct fifo_buffer *old_plan = NULL, *new_plan = NULL;
1519         int err, fifo_size;
1520
1521         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
1522         if (!adm_ctx.reply_skb)
1523                 return retcode;
1524         if (retcode != NO_ERROR)
1525                 goto finish;
1526
1527         device = adm_ctx.device;
1528         mutex_lock(&adm_ctx.resource->adm_mutex);
1529
1530         /* we also need a disk
1531          * to change the options on */
1532         if (!get_ldev(device)) {
1533                 retcode = ERR_NO_DISK;
1534                 goto out;
1535         }
1536
1537         new_disk_conf = kmalloc(sizeof(struct disk_conf), GFP_KERNEL);
1538         if (!new_disk_conf) {
1539                 retcode = ERR_NOMEM;
1540                 goto fail;
1541         }
1542
1543         mutex_lock(&device->resource->conf_update);
1544         old_disk_conf = device->ldev->disk_conf;
1545         *new_disk_conf = *old_disk_conf;
1546         if (should_set_defaults(info))
1547                 set_disk_conf_defaults(new_disk_conf);
1548
1549         err = disk_conf_from_attrs_for_change(new_disk_conf, info);
1550         if (err && err != -ENOMSG) {
1551                 retcode = ERR_MANDATORY_TAG;
1552                 drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
1553                 goto fail_unlock;
1554         }
1555
1556         if (!expect(new_disk_conf->resync_rate >= 1))
1557                 new_disk_conf->resync_rate = 1;
1558
1559         sanitize_disk_conf(device, new_disk_conf, device->ldev);
1560
1561         if (new_disk_conf->c_plan_ahead > DRBD_C_PLAN_AHEAD_MAX)
1562                 new_disk_conf->c_plan_ahead = DRBD_C_PLAN_AHEAD_MAX;
1563
1564         fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ;
1565         if (fifo_size != device->rs_plan_s->size) {
1566                 new_plan = fifo_alloc(fifo_size);
1567                 if (!new_plan) {
1568                         drbd_err(device, "kmalloc of fifo_buffer failed");
1569                         retcode = ERR_NOMEM;
1570                         goto fail_unlock;
1571                 }
1572         }
1573
1574         drbd_suspend_io(device);
1575         wait_event(device->al_wait, lc_try_lock(device->act_log));
1576         drbd_al_shrink(device);
1577         err = drbd_check_al_size(device, new_disk_conf);
1578         lc_unlock(device->act_log);
1579         wake_up(&device->al_wait);
1580         drbd_resume_io(device);
1581
1582         if (err) {
1583                 retcode = ERR_NOMEM;
1584                 goto fail_unlock;
1585         }
1586
1587         lock_all_resources();
1588         retcode = drbd_resync_after_valid(device, new_disk_conf->resync_after);
1589         if (retcode == NO_ERROR) {
1590                 rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
1591                 drbd_resync_after_changed(device);
1592         }
1593         unlock_all_resources();
1594
1595         if (retcode != NO_ERROR)
1596                 goto fail_unlock;
1597
1598         if (new_plan) {
1599                 old_plan = device->rs_plan_s;
1600                 rcu_assign_pointer(device->rs_plan_s, new_plan);
1601         }
1602
1603         mutex_unlock(&device->resource->conf_update);
1604
1605         if (new_disk_conf->al_updates)
1606                 device->ldev->md.flags &= ~MDF_AL_DISABLED;
1607         else
1608                 device->ldev->md.flags |= MDF_AL_DISABLED;
1609
1610         if (new_disk_conf->md_flushes)
1611                 clear_bit(MD_NO_FUA, &device->flags);
1612         else
1613                 set_bit(MD_NO_FUA, &device->flags);
1614
1615         if (write_ordering_changed(old_disk_conf, new_disk_conf))
1616                 drbd_bump_write_ordering(device->resource, NULL, WO_BDEV_FLUSH);
1617
1618         if (old_disk_conf->discard_zeroes_if_aligned != new_disk_conf->discard_zeroes_if_aligned)
1619                 drbd_reconsider_queue_parameters(device, device->ldev, NULL);
1620
1621         drbd_md_sync(device);
1622
1623         if (device->state.conn >= C_CONNECTED) {
1624                 struct drbd_peer_device *peer_device;
1625
1626                 for_each_peer_device(peer_device, device)
1627                         drbd_send_sync_param(peer_device);
1628         }
1629
1630         synchronize_rcu();
1631         kfree(old_disk_conf);
1632         kfree(old_plan);
1633         mod_timer(&device->request_timer, jiffies + HZ);
1634         goto success;
1635
1636 fail_unlock:
1637         mutex_unlock(&device->resource->conf_update);
1638  fail:
1639         kfree(new_disk_conf);
1640         kfree(new_plan);
1641 success:
1642         put_ldev(device);
1643  out:
1644         mutex_unlock(&adm_ctx.resource->adm_mutex);
1645  finish:
1646         drbd_adm_finish(&adm_ctx, info, retcode);
1647         return 0;
1648 }
1649
1650 static struct block_device *open_backing_dev(struct drbd_device *device,
1651                 const char *bdev_path, void *claim_ptr, bool do_bd_link)
1652 {
1653         struct block_device *bdev;
1654         int err = 0;
1655
1656         bdev = blkdev_get_by_path(bdev_path,
1657                                   FMODE_READ | FMODE_WRITE | FMODE_EXCL, claim_ptr);
1658         if (IS_ERR(bdev)) {
1659                 drbd_err(device, "open(\"%s\") failed with %ld\n",
1660                                 bdev_path, PTR_ERR(bdev));
1661                 return bdev;
1662         }
1663
1664         if (!do_bd_link)
1665                 return bdev;
1666
1667         err = bd_link_disk_holder(bdev, device->vdisk);
1668         if (err) {
1669                 blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
1670                 drbd_err(device, "bd_link_disk_holder(\"%s\", ...) failed with %d\n",
1671                                 bdev_path, err);
1672                 bdev = ERR_PTR(err);
1673         }
1674         return bdev;
1675 }
1676
1677 static int open_backing_devices(struct drbd_device *device,
1678                 struct disk_conf *new_disk_conf,
1679                 struct drbd_backing_dev *nbc)
1680 {
1681         struct block_device *bdev;
1682
1683         bdev = open_backing_dev(device, new_disk_conf->backing_dev, device, true);
1684         if (IS_ERR(bdev))
1685                 return ERR_OPEN_DISK;
1686         nbc->backing_bdev = bdev;
1687
1688         /*
1689          * meta_dev_idx >= 0: external fixed size, possibly multiple
1690          * drbd sharing one meta device.  TODO in that case, paranoia
1691          * check that [md_bdev, meta_dev_idx] is not yet used by some
1692          * other drbd minor!  (if you use drbd.conf + drbdadm, that
1693          * should check it for you already; but if you don't, or
1694          * someone fooled it, we need to double check here)
1695          */
1696         bdev = open_backing_dev(device, new_disk_conf->meta_dev,
1697                 /* claim ptr: device, if claimed exclusively; shared drbd_m_holder,
1698                  * if potentially shared with other drbd minors */
1699                         (new_disk_conf->meta_dev_idx < 0) ? (void*)device : (void*)drbd_m_holder,
1700                 /* avoid double bd_claim_by_disk() for the same (source,target) tuple,
1701                  * as would happen with internal metadata. */
1702                         (new_disk_conf->meta_dev_idx != DRBD_MD_INDEX_FLEX_INT &&
1703                          new_disk_conf->meta_dev_idx != DRBD_MD_INDEX_INTERNAL));
1704         if (IS_ERR(bdev))
1705                 return ERR_OPEN_MD_DISK;
1706         nbc->md_bdev = bdev;
1707         return NO_ERROR;
1708 }
1709
1710 static void close_backing_dev(struct drbd_device *device, struct block_device *bdev,
1711         bool do_bd_unlink)
1712 {
1713         if (!bdev)
1714                 return;
1715         if (do_bd_unlink)
1716                 bd_unlink_disk_holder(bdev, device->vdisk);
1717         blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
1718 }
1719
1720 void drbd_backing_dev_free(struct drbd_device *device, struct drbd_backing_dev *ldev)
1721 {
1722         if (ldev == NULL)
1723                 return;
1724
1725         close_backing_dev(device, ldev->md_bdev, ldev->md_bdev != ldev->backing_bdev);
1726         close_backing_dev(device, ldev->backing_bdev, true);
1727
1728         kfree(ldev->disk_conf);
1729         kfree(ldev);
1730 }
1731
1732 int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
1733 {
1734         struct drbd_config_context adm_ctx;
1735         struct drbd_device *device;
1736         struct drbd_peer_device *peer_device;
1737         struct drbd_connection *connection;
1738         int err;
1739         enum drbd_ret_code retcode;
1740         enum determine_dev_size dd;
1741         sector_t max_possible_sectors;
1742         sector_t min_md_device_sectors;
1743         struct drbd_backing_dev *nbc = NULL; /* new_backing_conf */
1744         struct disk_conf *new_disk_conf = NULL;
1745         struct lru_cache *resync_lru = NULL;
1746         struct fifo_buffer *new_plan = NULL;
1747         union drbd_state ns, os;
1748         enum drbd_state_rv rv;
1749         struct net_conf *nc;
1750
1751         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
1752         if (!adm_ctx.reply_skb)
1753                 return retcode;
1754         if (retcode != NO_ERROR)
1755                 goto finish;
1756
1757         device = adm_ctx.device;
1758         mutex_lock(&adm_ctx.resource->adm_mutex);
1759         peer_device = first_peer_device(device);
1760         connection = peer_device->connection;
1761         conn_reconfig_start(connection);
1762
1763         /* if you want to reconfigure, please tear down first */
1764         if (device->state.disk > D_DISKLESS) {
1765                 retcode = ERR_DISK_CONFIGURED;
1766                 goto fail;
1767         }
1768         /* It may just now have detached because of IO error.  Make sure
1769          * drbd_ldev_destroy is done already, we may end up here very fast,
1770          * e.g. if someone calls attach from the on-io-error handler,
1771          * to realize a "hot spare" feature (not that I'd recommend that) */
1772         wait_event(device->misc_wait, !test_bit(GOING_DISKLESS, &device->flags));
1773
1774         /* make sure there is no leftover from previous force-detach attempts */
1775         clear_bit(FORCE_DETACH, &device->flags);
1776         clear_bit(WAS_IO_ERROR, &device->flags);
1777         clear_bit(WAS_READ_ERROR, &device->flags);
1778
1779         /* and no leftover from previously aborted resync or verify, either */
1780         device->rs_total = 0;
1781         device->rs_failed = 0;
1782         atomic_set(&device->rs_pending_cnt, 0);
1783
1784         /* allocation not in the IO path, drbdsetup context */
1785         nbc = kzalloc(sizeof(struct drbd_backing_dev), GFP_KERNEL);
1786         if (!nbc) {
1787                 retcode = ERR_NOMEM;
1788                 goto fail;
1789         }
1790         spin_lock_init(&nbc->md.uuid_lock);
1791
1792         new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
1793         if (!new_disk_conf) {
1794                 retcode = ERR_NOMEM;
1795                 goto fail;
1796         }
1797         nbc->disk_conf = new_disk_conf;
1798
1799         set_disk_conf_defaults(new_disk_conf);
1800         err = disk_conf_from_attrs(new_disk_conf, info);
1801         if (err) {
1802                 retcode = ERR_MANDATORY_TAG;
1803                 drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
1804                 goto fail;
1805         }
1806
1807         if (new_disk_conf->c_plan_ahead > DRBD_C_PLAN_AHEAD_MAX)
1808                 new_disk_conf->c_plan_ahead = DRBD_C_PLAN_AHEAD_MAX;
1809
1810         new_plan = fifo_alloc((new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ);
1811         if (!new_plan) {
1812                 retcode = ERR_NOMEM;
1813                 goto fail;
1814         }
1815
1816         if (new_disk_conf->meta_dev_idx < DRBD_MD_INDEX_FLEX_INT) {
1817                 retcode = ERR_MD_IDX_INVALID;
1818                 goto fail;
1819         }
1820
1821         rcu_read_lock();
1822         nc = rcu_dereference(connection->net_conf);
1823         if (nc) {
1824                 if (new_disk_conf->fencing == FP_STONITH && nc->wire_protocol == DRBD_PROT_A) {
1825                         rcu_read_unlock();
1826                         retcode = ERR_STONITH_AND_PROT_A;
1827                         goto fail;
1828                 }
1829         }
1830         rcu_read_unlock();
1831
1832         retcode = open_backing_devices(device, new_disk_conf, nbc);
1833         if (retcode != NO_ERROR)
1834                 goto fail;
1835
1836         if ((nbc->backing_bdev == nbc->md_bdev) !=
1837             (new_disk_conf->meta_dev_idx == DRBD_MD_INDEX_INTERNAL ||
1838              new_disk_conf->meta_dev_idx == DRBD_MD_INDEX_FLEX_INT)) {
1839                 retcode = ERR_MD_IDX_INVALID;
1840                 goto fail;
1841         }
1842
1843         resync_lru = lc_create("resync", drbd_bm_ext_cache,
1844                         1, 61, sizeof(struct bm_extent),
1845                         offsetof(struct bm_extent, lce));
1846         if (!resync_lru) {
1847                 retcode = ERR_NOMEM;
1848                 goto fail;
1849         }
1850
1851         /* Read our meta data super block early.
1852          * This also sets other on-disk offsets. */
1853         retcode = drbd_md_read(device, nbc);
1854         if (retcode != NO_ERROR)
1855                 goto fail;
1856
1857         sanitize_disk_conf(device, new_disk_conf, nbc);
1858
1859         if (drbd_get_max_capacity(nbc) < new_disk_conf->disk_size) {
1860                 drbd_err(device, "max capacity %llu smaller than disk size %llu\n",
1861                         (unsigned long long) drbd_get_max_capacity(nbc),
1862                         (unsigned long long) new_disk_conf->disk_size);
1863                 retcode = ERR_DISK_TOO_SMALL;
1864                 goto fail;
1865         }
1866
1867         if (new_disk_conf->meta_dev_idx < 0) {
1868                 max_possible_sectors = DRBD_MAX_SECTORS_FLEX;
1869                 /* at least one MB, otherwise it does not make sense */
1870                 min_md_device_sectors = (2<<10);
1871         } else {
1872                 max_possible_sectors = DRBD_MAX_SECTORS;
1873                 min_md_device_sectors = MD_128MB_SECT * (new_disk_conf->meta_dev_idx + 1);
1874         }
1875
1876         if (drbd_get_capacity(nbc->md_bdev) < min_md_device_sectors) {
1877                 retcode = ERR_MD_DISK_TOO_SMALL;
1878                 drbd_warn(device, "refusing attach: md-device too small, "
1879                      "at least %llu sectors needed for this meta-disk type\n",
1880                      (unsigned long long) min_md_device_sectors);
1881                 goto fail;
1882         }
1883
1884         /* Make sure the new disk is big enough
1885          * (we may currently be R_PRIMARY with no local disk...) */
1886         if (drbd_get_max_capacity(nbc) <
1887             drbd_get_capacity(device->this_bdev)) {
1888                 retcode = ERR_DISK_TOO_SMALL;
1889                 goto fail;
1890         }
1891
1892         nbc->known_size = drbd_get_capacity(nbc->backing_bdev);
1893
1894         if (nbc->known_size > max_possible_sectors) {
1895                 drbd_warn(device, "==> truncating very big lower level device "
1896                         "to currently maximum possible %llu sectors <==\n",
1897                         (unsigned long long) max_possible_sectors);
1898                 if (new_disk_conf->meta_dev_idx >= 0)
1899                         drbd_warn(device, "==>> using internal or flexible "
1900                                       "meta data may help <<==\n");
1901         }
1902
1903         drbd_suspend_io(device);
1904         /* also wait for the last barrier ack. */
1905         /* FIXME see also https://daiquiri.linbit/cgi-bin/bugzilla/show_bug.cgi?id=171
1906          * We need a way to either ignore barrier acks for barriers sent before a device
1907          * was attached, or a way to wait for all pending barrier acks to come in.
1908          * As barriers are counted per resource,
1909          * we'd need to suspend io on all devices of a resource.
1910          */
1911         wait_event(device->misc_wait, !atomic_read(&device->ap_pending_cnt) || drbd_suspended(device));
1912         /* and for any other previously queued work */
1913         drbd_flush_workqueue(&connection->sender_work);
1914
1915         rv = _drbd_request_state(device, NS(disk, D_ATTACHING), CS_VERBOSE);
1916         retcode = rv;  /* FIXME: Type mismatch. */
1917         drbd_resume_io(device);
1918         if (rv < SS_SUCCESS)
1919                 goto fail;
1920
1921         if (!get_ldev_if_state(device, D_ATTACHING))
1922                 goto force_diskless;
1923
1924         if (!device->bitmap) {
1925                 if (drbd_bm_init(device)) {
1926                         retcode = ERR_NOMEM;
1927                         goto force_diskless_dec;
1928                 }
1929         }
1930
1931         if (device->state.conn < C_CONNECTED &&
1932             device->state.role == R_PRIMARY && device->ed_uuid &&
1933             (device->ed_uuid & ~((u64)1)) != (nbc->md.uuid[UI_CURRENT] & ~((u64)1))) {
1934                 drbd_err(device, "Can only attach to data with current UUID=%016llX\n",
1935                     (unsigned long long)device->ed_uuid);
1936                 retcode = ERR_DATA_NOT_CURRENT;
1937                 goto force_diskless_dec;
1938         }
1939
1940         /* Since we are diskless, fix the activity log first... */
1941         if (drbd_check_al_size(device, new_disk_conf)) {
1942                 retcode = ERR_NOMEM;
1943                 goto force_diskless_dec;
1944         }
1945
1946         /* Prevent shrinking of consistent devices ! */
1947         if (drbd_md_test_flag(nbc, MDF_CONSISTENT) &&
1948             drbd_new_dev_size(device, nbc, nbc->disk_conf->disk_size, 0) < nbc->md.la_size_sect) {
1949                 drbd_warn(device, "refusing to truncate a consistent device\n");
1950                 retcode = ERR_DISK_TOO_SMALL;
1951                 goto force_diskless_dec;
1952         }
1953
1954         lock_all_resources();
1955         retcode = drbd_resync_after_valid(device, new_disk_conf->resync_after);
1956         if (retcode != NO_ERROR) {
1957                 unlock_all_resources();
1958                 goto force_diskless_dec;
1959         }
1960
1961         /* Reset the "barriers don't work" bits here, then force meta data to
1962          * be written, to ensure we determine if barriers are supported. */
1963         if (new_disk_conf->md_flushes)
1964                 clear_bit(MD_NO_FUA, &device->flags);
1965         else
1966                 set_bit(MD_NO_FUA, &device->flags);
1967
1968         /* Point of no return reached.
1969          * Devices and memory are no longer released by error cleanup below.
1970          * now device takes over responsibility, and the state engine should
1971          * clean it up somewhere.  */
1972         D_ASSERT(device, device->ldev == NULL);
1973         device->ldev = nbc;
1974         device->resync = resync_lru;
1975         device->rs_plan_s = new_plan;
1976         nbc = NULL;
1977         resync_lru = NULL;
1978         new_disk_conf = NULL;
1979         new_plan = NULL;
1980
1981         drbd_resync_after_changed(device);
1982         drbd_bump_write_ordering(device->resource, device->ldev, WO_BDEV_FLUSH);
1983         unlock_all_resources();
1984
1985         if (drbd_md_test_flag(device->ldev, MDF_CRASHED_PRIMARY))
1986                 set_bit(CRASHED_PRIMARY, &device->flags);
1987         else
1988                 clear_bit(CRASHED_PRIMARY, &device->flags);
1989
1990         if (drbd_md_test_flag(device->ldev, MDF_PRIMARY_IND) &&
1991             !(device->state.role == R_PRIMARY && device->resource->susp_nod))
1992                 set_bit(CRASHED_PRIMARY, &device->flags);
1993
1994         device->send_cnt = 0;
1995         device->recv_cnt = 0;
1996         device->read_cnt = 0;
1997         device->writ_cnt = 0;
1998
1999         drbd_reconsider_queue_parameters(device, device->ldev, NULL);
2000
2001         /* If I am currently not R_PRIMARY,
2002          * but meta data primary indicator is set,
2003          * I just now recover from a hard crash,
2004          * and have been R_PRIMARY before that crash.
2005          *
2006          * Now, if I had no connection before that crash
2007          * (have been degraded R_PRIMARY), chances are that
2008          * I won't find my peer now either.
2009          *
2010          * In that case, and _only_ in that case,
2011          * we use the degr-wfc-timeout instead of the default,
2012          * so we can automatically recover from a crash of a
2013          * degraded but active "cluster" after a certain timeout.
2014          */
2015         clear_bit(USE_DEGR_WFC_T, &device->flags);
2016         if (device->state.role != R_PRIMARY &&
2017              drbd_md_test_flag(device->ldev, MDF_PRIMARY_IND) &&
2018             !drbd_md_test_flag(device->ldev, MDF_CONNECTED_IND))
2019                 set_bit(USE_DEGR_WFC_T, &device->flags);
2020
2021         dd = drbd_determine_dev_size(device, 0, NULL);
2022         if (dd <= DS_ERROR) {
2023                 retcode = ERR_NOMEM_BITMAP;
2024                 goto force_diskless_dec;
2025         } else if (dd == DS_GREW)
2026                 set_bit(RESYNC_AFTER_NEG, &device->flags);
2027
2028         if (drbd_md_test_flag(device->ldev, MDF_FULL_SYNC) ||
2029             (test_bit(CRASHED_PRIMARY, &device->flags) &&
2030              drbd_md_test_flag(device->ldev, MDF_AL_DISABLED))) {
2031                 drbd_info(device, "Assuming that all blocks are out of sync "
2032                      "(aka FullSync)\n");
2033                 if (drbd_bitmap_io(device, &drbd_bmio_set_n_write,
2034                         "set_n_write from attaching", BM_LOCKED_MASK)) {
2035                         retcode = ERR_IO_MD_DISK;
2036                         goto force_diskless_dec;
2037                 }
2038         } else {
2039                 if (drbd_bitmap_io(device, &drbd_bm_read,
2040                         "read from attaching", BM_LOCKED_MASK)) {
2041                         retcode = ERR_IO_MD_DISK;
2042                         goto force_diskless_dec;
2043                 }
2044         }
2045
2046         if (_drbd_bm_total_weight(device) == drbd_bm_bits(device))
2047                 drbd_suspend_al(device); /* IO is still suspended here... */
2048
2049         spin_lock_irq(&device->resource->req_lock);
2050         os = drbd_read_state(device);
2051         ns = os;
2052         /* If MDF_CONSISTENT is not set go into inconsistent state,
2053            otherwise investigate MDF_WasUpToDate...
2054            If MDF_WAS_UP_TO_DATE is not set go into D_OUTDATED disk state,
2055            otherwise into D_CONSISTENT state.
2056         */
2057         if (drbd_md_test_flag(device->ldev, MDF_CONSISTENT)) {
2058                 if (drbd_md_test_flag(device->ldev, MDF_WAS_UP_TO_DATE))
2059                         ns.disk = D_CONSISTENT;
2060                 else
2061                         ns.disk = D_OUTDATED;
2062         } else {
2063                 ns.disk = D_INCONSISTENT;
2064         }
2065
2066         if (drbd_md_test_flag(device->ldev, MDF_PEER_OUT_DATED))
2067                 ns.pdsk = D_OUTDATED;
2068
2069         rcu_read_lock();
2070         if (ns.disk == D_CONSISTENT &&
2071             (ns.pdsk == D_OUTDATED || rcu_dereference(device->ldev->disk_conf)->fencing == FP_DONT_CARE))
2072                 ns.disk = D_UP_TO_DATE;
2073
2074         /* All tests on MDF_PRIMARY_IND, MDF_CONNECTED_IND,
2075            MDF_CONSISTENT and MDF_WAS_UP_TO_DATE must happen before
2076            this point, because drbd_request_state() modifies these
2077            flags. */
2078
2079         if (rcu_dereference(device->ldev->disk_conf)->al_updates)
2080                 device->ldev->md.flags &= ~MDF_AL_DISABLED;
2081         else
2082                 device->ldev->md.flags |= MDF_AL_DISABLED;
2083
2084         rcu_read_unlock();
2085
2086         /* In case we are C_CONNECTED postpone any decision on the new disk
2087            state after the negotiation phase. */
2088         if (device->state.conn == C_CONNECTED) {
2089                 device->new_state_tmp.i = ns.i;
2090                 ns.i = os.i;
2091                 ns.disk = D_NEGOTIATING;
2092
2093                 /* We expect to receive up-to-date UUIDs soon.
2094                    To avoid a race in receive_state, free p_uuid while
2095                    holding req_lock. I.e. atomic with the state change */
2096                 kfree(device->p_uuid);
2097                 device->p_uuid = NULL;
2098         }
2099
2100         rv = _drbd_set_state(device, ns, CS_VERBOSE, NULL);
2101         spin_unlock_irq(&device->resource->req_lock);
2102
2103         if (rv < SS_SUCCESS)
2104                 goto force_diskless_dec;
2105
2106         mod_timer(&device->request_timer, jiffies + HZ);
2107
2108         if (device->state.role == R_PRIMARY)
2109                 device->ldev->md.uuid[UI_CURRENT] |=  (u64)1;
2110         else
2111                 device->ldev->md.uuid[UI_CURRENT] &= ~(u64)1;
2112
2113         drbd_md_mark_dirty(device);
2114         drbd_md_sync(device);
2115
2116         kobject_uevent(&disk_to_dev(device->vdisk)->kobj, KOBJ_CHANGE);
2117         put_ldev(device);
2118         conn_reconfig_done(connection);
2119         mutex_unlock(&adm_ctx.resource->adm_mutex);
2120         drbd_adm_finish(&adm_ctx, info, retcode);
2121         return 0;
2122
2123  force_diskless_dec:
2124         put_ldev(device);
2125  force_diskless:
2126         drbd_force_state(device, NS(disk, D_DISKLESS));
2127         drbd_md_sync(device);
2128  fail:
2129         conn_reconfig_done(connection);
2130         if (nbc) {
2131                 close_backing_dev(device, nbc->md_bdev, nbc->md_bdev != nbc->backing_bdev);
2132                 close_backing_dev(device, nbc->backing_bdev, true);
2133                 kfree(nbc);
2134         }
2135         kfree(new_disk_conf);
2136         lc_destroy(resync_lru);
2137         kfree(new_plan);
2138         mutex_unlock(&adm_ctx.resource->adm_mutex);
2139  finish:
2140         drbd_adm_finish(&adm_ctx, info, retcode);
2141         return 0;
2142 }
2143
2144 static int adm_detach(struct drbd_device *device, int force)
2145 {
2146         enum drbd_state_rv retcode;
2147         void *buffer;
2148         int ret;
2149
2150         if (force) {
2151                 set_bit(FORCE_DETACH, &device->flags);
2152                 drbd_force_state(device, NS(disk, D_FAILED));
2153                 retcode = SS_SUCCESS;
2154                 goto out;
2155         }
2156
2157         drbd_suspend_io(device); /* so no-one is stuck in drbd_al_begin_io */
2158         buffer = drbd_md_get_buffer(device, __func__); /* make sure there is no in-flight meta-data IO */
2159         if (buffer) {
2160                 retcode = drbd_request_state(device, NS(disk, D_FAILED));
2161                 drbd_md_put_buffer(device);
2162         } else /* already <= D_FAILED */
2163                 retcode = SS_NOTHING_TO_DO;
2164         /* D_FAILED will transition to DISKLESS. */
2165         drbd_resume_io(device);
2166         ret = wait_event_interruptible(device->misc_wait,
2167                         device->state.disk != D_FAILED);
2168         if ((int)retcode == (int)SS_IS_DISKLESS)
2169                 retcode = SS_NOTHING_TO_DO;
2170         if (ret)
2171                 retcode = ERR_INTR;
2172 out:
2173         return retcode;
2174 }
2175
2176 /* Detaching the disk is a process in multiple stages.  First we need to lock
2177  * out application IO, in-flight IO, IO stuck in drbd_al_begin_io.
2178  * Then we transition to D_DISKLESS, and wait for put_ldev() to return all
2179  * internal references as well.
2180  * Only then we have finally detached. */
2181 int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info)
2182 {
2183         struct drbd_config_context adm_ctx;
2184         enum drbd_ret_code retcode;
2185         struct detach_parms parms = { };
2186         int err;
2187
2188         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
2189         if (!adm_ctx.reply_skb)
2190                 return retcode;
2191         if (retcode != NO_ERROR)
2192                 goto out;
2193
2194         if (info->attrs[DRBD_NLA_DETACH_PARMS]) {
2195                 err = detach_parms_from_attrs(&parms, info);
2196                 if (err) {
2197                         retcode = ERR_MANDATORY_TAG;
2198                         drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
2199                         goto out;
2200                 }
2201         }
2202
2203         mutex_lock(&adm_ctx.resource->adm_mutex);
2204         retcode = adm_detach(adm_ctx.device, parms.force_detach);
2205         mutex_unlock(&adm_ctx.resource->adm_mutex);
2206 out:
2207         drbd_adm_finish(&adm_ctx, info, retcode);
2208         return 0;
2209 }
2210
2211 static bool conn_resync_running(struct drbd_connection *connection)
2212 {
2213         struct drbd_peer_device *peer_device;
2214         bool rv = false;
2215         int vnr;
2216
2217         rcu_read_lock();
2218         idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
2219                 struct drbd_device *device = peer_device->device;
2220                 if (device->state.conn == C_SYNC_SOURCE ||
2221                     device->state.conn == C_SYNC_TARGET ||
2222                     device->state.conn == C_PAUSED_SYNC_S ||
2223                     device->state.conn == C_PAUSED_SYNC_T) {
2224                         rv = true;
2225                         break;
2226                 }
2227         }
2228         rcu_read_unlock();
2229
2230         return rv;
2231 }
2232
2233 static bool conn_ov_running(struct drbd_connection *connection)
2234 {
2235         struct drbd_peer_device *peer_device;
2236         bool rv = false;
2237         int vnr;
2238
2239         rcu_read_lock();
2240         idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
2241                 struct drbd_device *device = peer_device->device;
2242                 if (device->state.conn == C_VERIFY_S ||
2243                     device->state.conn == C_VERIFY_T) {
2244                         rv = true;
2245                         break;
2246                 }
2247         }
2248         rcu_read_unlock();
2249
2250         return rv;
2251 }
2252
2253 static enum drbd_ret_code
2254 _check_net_options(struct drbd_connection *connection, struct net_conf *old_net_conf, struct net_conf *new_net_conf)
2255 {
2256         struct drbd_peer_device *peer_device;
2257         int i;
2258
2259         if (old_net_conf && connection->cstate == C_WF_REPORT_PARAMS && connection->agreed_pro_version < 100) {
2260                 if (new_net_conf->wire_protocol != old_net_conf->wire_protocol)
2261                         return ERR_NEED_APV_100;
2262
2263                 if (new_net_conf->two_primaries != old_net_conf->two_primaries)
2264                         return ERR_NEED_APV_100;
2265
2266                 if (strcmp(new_net_conf->integrity_alg, old_net_conf->integrity_alg))
2267                         return ERR_NEED_APV_100;
2268         }
2269
2270         if (!new_net_conf->two_primaries &&
2271             conn_highest_role(connection) == R_PRIMARY &&
2272             conn_highest_peer(connection) == R_PRIMARY)
2273                 return ERR_NEED_ALLOW_TWO_PRI;
2274
2275         if (new_net_conf->two_primaries &&
2276             (new_net_conf->wire_protocol != DRBD_PROT_C))
2277                 return ERR_NOT_PROTO_C;
2278
2279         idr_for_each_entry(&connection->peer_devices, peer_device, i) {
2280                 struct drbd_device *device = peer_device->device;
2281                 if (get_ldev(device)) {
2282                         enum drbd_fencing_p fp = rcu_dereference(device->ldev->disk_conf)->fencing;
2283                         put_ldev(device);
2284                         if (new_net_conf->wire_protocol == DRBD_PROT_A && fp == FP_STONITH)
2285                                 return ERR_STONITH_AND_PROT_A;
2286                 }
2287                 if (device->state.role == R_PRIMARY && new_net_conf->discard_my_data)
2288                         return ERR_DISCARD_IMPOSSIBLE;
2289         }
2290
2291         if (new_net_conf->on_congestion != OC_BLOCK && new_net_conf->wire_protocol != DRBD_PROT_A)
2292                 return ERR_CONG_NOT_PROTO_A;
2293
2294         return NO_ERROR;
2295 }
2296
2297 static enum drbd_ret_code
2298 check_net_options(struct drbd_connection *connection, struct net_conf *new_net_conf)
2299 {
2300         static enum drbd_ret_code rv;
2301         struct drbd_peer_device *peer_device;
2302         int i;
2303
2304         rcu_read_lock();
2305         rv = _check_net_options(connection, rcu_dereference(connection->net_conf), new_net_conf);
2306         rcu_read_unlock();
2307
2308         /* connection->peer_devices protected by genl_lock() here */
2309         idr_for_each_entry(&connection->peer_devices, peer_device, i) {
2310                 struct drbd_device *device = peer_device->device;
2311                 if (!device->bitmap) {
2312                         if (drbd_bm_init(device))
2313                                 return ERR_NOMEM;
2314                 }
2315         }
2316
2317         return rv;
2318 }
2319
2320 struct crypto {
2321         struct crypto_ahash *verify_tfm;
2322         struct crypto_ahash *csums_tfm;
2323         struct crypto_shash *cram_hmac_tfm;
2324         struct crypto_ahash *integrity_tfm;
2325 };
2326
2327 static int
2328 alloc_shash(struct crypto_shash **tfm, char *tfm_name, int err_alg)
2329 {
2330         if (!tfm_name[0])
2331                 return NO_ERROR;
2332
2333         *tfm = crypto_alloc_shash(tfm_name, 0, 0);
2334         if (IS_ERR(*tfm)) {
2335                 *tfm = NULL;
2336                 return err_alg;
2337         }
2338
2339         return NO_ERROR;
2340 }
2341
2342 static int
2343 alloc_ahash(struct crypto_ahash **tfm, char *tfm_name, int err_alg)
2344 {
2345         if (!tfm_name[0])
2346                 return NO_ERROR;
2347
2348         *tfm = crypto_alloc_ahash(tfm_name, 0, CRYPTO_ALG_ASYNC);
2349         if (IS_ERR(*tfm)) {
2350                 *tfm = NULL;
2351                 return err_alg;
2352         }
2353
2354         return NO_ERROR;
2355 }
2356
2357 static enum drbd_ret_code
2358 alloc_crypto(struct crypto *crypto, struct net_conf *new_net_conf)
2359 {
2360         char hmac_name[CRYPTO_MAX_ALG_NAME];
2361         enum drbd_ret_code rv;
2362
2363         rv = alloc_ahash(&crypto->csums_tfm, new_net_conf->csums_alg,
2364                          ERR_CSUMS_ALG);
2365         if (rv != NO_ERROR)
2366                 return rv;
2367         rv = alloc_ahash(&crypto->verify_tfm, new_net_conf->verify_alg,
2368                          ERR_VERIFY_ALG);
2369         if (rv != NO_ERROR)
2370                 return rv;
2371         rv = alloc_ahash(&crypto->integrity_tfm, new_net_conf->integrity_alg,
2372                          ERR_INTEGRITY_ALG);
2373         if (rv != NO_ERROR)
2374                 return rv;
2375         if (new_net_conf->cram_hmac_alg[0] != 0) {
2376                 snprintf(hmac_name, CRYPTO_MAX_ALG_NAME, "hmac(%s)",
2377                          new_net_conf->cram_hmac_alg);
2378
2379                 rv = alloc_shash(&crypto->cram_hmac_tfm, hmac_name,
2380                                  ERR_AUTH_ALG);
2381         }
2382
2383         return rv;
2384 }
2385
2386 static void free_crypto(struct crypto *crypto)
2387 {
2388         crypto_free_shash(crypto->cram_hmac_tfm);
2389         crypto_free_ahash(crypto->integrity_tfm);
2390         crypto_free_ahash(crypto->csums_tfm);
2391         crypto_free_ahash(crypto->verify_tfm);
2392 }
2393
2394 int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info)
2395 {
2396         struct drbd_config_context adm_ctx;
2397         enum drbd_ret_code retcode;
2398         struct drbd_connection *connection;
2399         struct net_conf *old_net_conf, *new_net_conf = NULL;
2400         int err;
2401         int ovr; /* online verify running */
2402         int rsr; /* re-sync running */
2403         struct crypto crypto = { };
2404
2405         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_CONNECTION);
2406         if (!adm_ctx.reply_skb)
2407                 return retcode;
2408         if (retcode != NO_ERROR)
2409                 goto finish;
2410
2411         connection = adm_ctx.connection;
2412         mutex_lock(&adm_ctx.resource->adm_mutex);
2413
2414         new_net_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL);
2415         if (!new_net_conf) {
2416                 retcode = ERR_NOMEM;
2417                 goto out;
2418         }
2419
2420         conn_reconfig_start(connection);
2421
2422         mutex_lock(&connection->data.mutex);
2423         mutex_lock(&connection->resource->conf_update);
2424         old_net_conf = connection->net_conf;
2425
2426         if (!old_net_conf) {
2427                 drbd_msg_put_info(adm_ctx.reply_skb, "net conf missing, try connect");
2428                 retcode = ERR_INVALID_REQUEST;
2429                 goto fail;
2430         }
2431
2432         *new_net_conf = *old_net_conf;
2433         if (should_set_defaults(info))
2434                 set_net_conf_defaults(new_net_conf);
2435
2436         err = net_conf_from_attrs_for_change(new_net_conf, info);
2437         if (err && err != -ENOMSG) {
2438                 retcode = ERR_MANDATORY_TAG;
2439                 drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
2440                 goto fail;
2441         }
2442
2443         retcode = check_net_options(connection, new_net_conf);
2444         if (retcode != NO_ERROR)
2445                 goto fail;
2446
2447         /* re-sync running */
2448         rsr = conn_resync_running(connection);
2449         if (rsr && strcmp(new_net_conf->csums_alg, old_net_conf->csums_alg)) {
2450                 retcode = ERR_CSUMS_RESYNC_RUNNING;
2451                 goto fail;
2452         }
2453
2454         /* online verify running */
2455         ovr = conn_ov_running(connection);
2456         if (ovr && strcmp(new_net_conf->verify_alg, old_net_conf->verify_alg)) {
2457                 retcode = ERR_VERIFY_RUNNING;
2458                 goto fail;
2459         }
2460
2461         retcode = alloc_crypto(&crypto, new_net_conf);
2462         if (retcode != NO_ERROR)
2463                 goto fail;
2464
2465         rcu_assign_pointer(connection->net_conf, new_net_conf);
2466
2467         if (!rsr) {
2468                 crypto_free_ahash(connection->csums_tfm);
2469                 connection->csums_tfm = crypto.csums_tfm;
2470                 crypto.csums_tfm = NULL;
2471         }
2472         if (!ovr) {
2473                 crypto_free_ahash(connection->verify_tfm);
2474                 connection->verify_tfm = crypto.verify_tfm;
2475                 crypto.verify_tfm = NULL;
2476         }
2477
2478         crypto_free_ahash(connection->integrity_tfm);
2479         connection->integrity_tfm = crypto.integrity_tfm;
2480         if (connection->cstate >= C_WF_REPORT_PARAMS && connection->agreed_pro_version >= 100)
2481                 /* Do this without trying to take connection->data.mutex again.  */
2482                 __drbd_send_protocol(connection, P_PROTOCOL_UPDATE);
2483
2484         crypto_free_shash(connection->cram_hmac_tfm);
2485         connection->cram_hmac_tfm = crypto.cram_hmac_tfm;
2486
2487         mutex_unlock(&connection->resource->conf_update);
2488         mutex_unlock(&connection->data.mutex);
2489         synchronize_rcu();
2490         kfree(old_net_conf);
2491
2492         if (connection->cstate >= C_WF_REPORT_PARAMS) {
2493                 struct drbd_peer_device *peer_device;
2494                 int vnr;
2495
2496                 idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
2497                         drbd_send_sync_param(peer_device);
2498         }
2499
2500         goto done;
2501
2502  fail:
2503         mutex_unlock(&connection->resource->conf_update);
2504         mutex_unlock(&connection->data.mutex);
2505         free_crypto(&crypto);
2506         kfree(new_net_conf);
2507  done:
2508         conn_reconfig_done(connection);
2509  out:
2510         mutex_unlock(&adm_ctx.resource->adm_mutex);
2511  finish:
2512         drbd_adm_finish(&adm_ctx, info, retcode);
2513         return 0;
2514 }
2515
2516 static void connection_to_info(struct connection_info *info,
2517                                struct drbd_connection *connection)
2518 {
2519         info->conn_connection_state = connection->cstate;
2520         info->conn_role = conn_highest_peer(connection);
2521 }
2522
2523 static void peer_device_to_info(struct peer_device_info *info,
2524                                 struct drbd_peer_device *peer_device)
2525 {
2526         struct drbd_device *device = peer_device->device;
2527
2528         info->peer_repl_state =
2529                 max_t(enum drbd_conns, C_WF_REPORT_PARAMS, device->state.conn);
2530         info->peer_disk_state = device->state.pdsk;
2531         info->peer_resync_susp_user = device->state.user_isp;
2532         info->peer_resync_susp_peer = device->state.peer_isp;
2533         info->peer_resync_susp_dependency = device->state.aftr_isp;
2534 }
2535
2536 int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
2537 {
2538         struct connection_info connection_info;
2539         enum drbd_notification_type flags;
2540         unsigned int peer_devices = 0;
2541         struct drbd_config_context adm_ctx;
2542         struct drbd_peer_device *peer_device;
2543         struct net_conf *old_net_conf, *new_net_conf = NULL;
2544         struct crypto crypto = { };
2545         struct drbd_resource *resource;
2546         struct drbd_connection *connection;
2547         enum drbd_ret_code retcode;
2548         int i;
2549         int err;
2550
2551         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
2552
2553         if (!adm_ctx.reply_skb)
2554                 return retcode;
2555         if (retcode != NO_ERROR)
2556                 goto out;
2557         if (!(adm_ctx.my_addr && adm_ctx.peer_addr)) {
2558                 drbd_msg_put_info(adm_ctx.reply_skb, "connection endpoint(s) missing");
2559                 retcode = ERR_INVALID_REQUEST;
2560                 goto out;
2561         }
2562
2563         /* No need for _rcu here. All reconfiguration is
2564          * strictly serialized on genl_lock(). We are protected against
2565          * concurrent reconfiguration/addition/deletion */
2566         for_each_resource(resource, &drbd_resources) {
2567                 for_each_connection(connection, resource) {
2568                         if (nla_len(adm_ctx.my_addr) == connection->my_addr_len &&
2569                             !memcmp(nla_data(adm_ctx.my_addr), &connection->my_addr,
2570                                     connection->my_addr_len)) {
2571                                 retcode = ERR_LOCAL_ADDR;
2572                                 goto out;
2573                         }
2574
2575                         if (nla_len(adm_ctx.peer_addr) == connection->peer_addr_len &&
2576                             !memcmp(nla_data(adm_ctx.peer_addr), &connection->peer_addr,
2577                                     connection->peer_addr_len)) {
2578                                 retcode = ERR_PEER_ADDR;
2579                                 goto out;
2580                         }
2581                 }
2582         }
2583
2584         mutex_lock(&adm_ctx.resource->adm_mutex);
2585         connection = first_connection(adm_ctx.resource);
2586         conn_reconfig_start(connection);
2587
2588         if (connection->cstate > C_STANDALONE) {
2589                 retcode = ERR_NET_CONFIGURED;
2590                 goto fail;
2591         }
2592
2593         /* allocation not in the IO path, drbdsetup / netlink process context */
2594         new_net_conf = kzalloc(sizeof(*new_net_conf), GFP_KERNEL);
2595         if (!new_net_conf) {
2596                 retcode = ERR_NOMEM;
2597                 goto fail;
2598         }
2599
2600         set_net_conf_defaults(new_net_conf);
2601
2602         err = net_conf_from_attrs(new_net_conf, info);
2603         if (err && err != -ENOMSG) {
2604                 retcode = ERR_MANDATORY_TAG;
2605                 drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
2606                 goto fail;
2607         }
2608
2609         retcode = check_net_options(connection, new_net_conf);
2610         if (retcode != NO_ERROR)
2611                 goto fail;
2612
2613         retcode = alloc_crypto(&crypto, new_net_conf);
2614         if (retcode != NO_ERROR)
2615                 goto fail;
2616
2617         ((char *)new_net_conf->shared_secret)[SHARED_SECRET_MAX-1] = 0;
2618
2619         drbd_flush_workqueue(&connection->sender_work);
2620
2621         mutex_lock(&adm_ctx.resource->conf_update);
2622         old_net_conf = connection->net_conf;
2623         if (old_net_conf) {
2624                 retcode = ERR_NET_CONFIGURED;
2625                 mutex_unlock(&adm_ctx.resource->conf_update);
2626                 goto fail;
2627         }
2628         rcu_assign_pointer(connection->net_conf, new_net_conf);
2629
2630         conn_free_crypto(connection);
2631         connection->cram_hmac_tfm = crypto.cram_hmac_tfm;
2632         connection->integrity_tfm = crypto.integrity_tfm;
2633         connection->csums_tfm = crypto.csums_tfm;
2634         connection->verify_tfm = crypto.verify_tfm;
2635
2636         connection->my_addr_len = nla_len(adm_ctx.my_addr);
2637         memcpy(&connection->my_addr, nla_data(adm_ctx.my_addr), connection->my_addr_len);
2638         connection->peer_addr_len = nla_len(adm_ctx.peer_addr);
2639         memcpy(&connection->peer_addr, nla_data(adm_ctx.peer_addr), connection->peer_addr_len);
2640
2641         idr_for_each_entry(&connection->peer_devices, peer_device, i) {
2642                 peer_devices++;
2643         }
2644
2645         connection_to_info(&connection_info, connection);
2646         flags = (peer_devices--) ? NOTIFY_CONTINUES : 0;
2647         mutex_lock(&notification_mutex);
2648         notify_connection_state(NULL, 0, connection, &connection_info, NOTIFY_CREATE | flags);
2649         idr_for_each_entry(&connection->peer_devices, peer_device, i) {
2650                 struct peer_device_info peer_device_info;
2651
2652                 peer_device_to_info(&peer_device_info, peer_device);
2653                 flags = (peer_devices--) ? NOTIFY_CONTINUES : 0;
2654                 notify_peer_device_state(NULL, 0, peer_device, &peer_device_info, NOTIFY_CREATE | flags);
2655         }
2656         mutex_unlock(&notification_mutex);
2657         mutex_unlock(&adm_ctx.resource->conf_update);
2658
2659         rcu_read_lock();
2660         idr_for_each_entry(&connection->peer_devices, peer_device, i) {
2661                 struct drbd_device *device = peer_device->device;
2662                 device->send_cnt = 0;
2663                 device->recv_cnt = 0;
2664         }
2665         rcu_read_unlock();
2666
2667         retcode = conn_request_state(connection, NS(conn, C_UNCONNECTED), CS_VERBOSE);
2668
2669         conn_reconfig_done(connection);
2670         mutex_unlock(&adm_ctx.resource->adm_mutex);
2671         drbd_adm_finish(&adm_ctx, info, retcode);
2672         return 0;
2673
2674 fail:
2675         free_crypto(&crypto);
2676         kfree(new_net_conf);
2677
2678         conn_reconfig_done(connection);
2679         mutex_unlock(&adm_ctx.resource->adm_mutex);
2680 out:
2681         drbd_adm_finish(&adm_ctx, info, retcode);
2682         return 0;
2683 }
2684
2685 static enum drbd_state_rv conn_try_disconnect(struct drbd_connection *connection, bool force)
2686 {
2687         enum drbd_state_rv rv;
2688
2689         rv = conn_request_state(connection, NS(conn, C_DISCONNECTING),
2690                         force ? CS_HARD : 0);
2691
2692         switch (rv) {
2693         case SS_NOTHING_TO_DO:
2694                 break;
2695         case SS_ALREADY_STANDALONE:
2696                 return SS_SUCCESS;
2697         case SS_PRIMARY_NOP:
2698                 /* Our state checking code wants to see the peer outdated. */
2699                 rv = conn_request_state(connection, NS2(conn, C_DISCONNECTING, pdsk, D_OUTDATED), 0);
2700
2701                 if (rv == SS_OUTDATE_WO_CONN) /* lost connection before graceful disconnect succeeded */
2702                         rv = conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_VERBOSE);
2703
2704                 break;
2705         case SS_CW_FAILED_BY_PEER:
2706                 /* The peer probably wants to see us outdated. */
2707                 rv = conn_request_state(connection, NS2(conn, C_DISCONNECTING,
2708                                                         disk, D_OUTDATED), 0);
2709                 if (rv == SS_IS_DISKLESS || rv == SS_LOWER_THAN_OUTDATED) {
2710                         rv = conn_request_state(connection, NS(conn, C_DISCONNECTING),
2711                                         CS_HARD);
2712                 }
2713                 break;
2714         default:;
2715                 /* no special handling necessary */
2716         }
2717
2718         if (rv >= SS_SUCCESS) {
2719                 enum drbd_state_rv rv2;
2720                 /* No one else can reconfigure the network while I am here.
2721                  * The state handling only uses drbd_thread_stop_nowait(),
2722                  * we want to really wait here until the receiver is no more.
2723                  */
2724                 drbd_thread_stop(&connection->receiver);
2725
2726                 /* Race breaker.  This additional state change request may be
2727                  * necessary, if this was a forced disconnect during a receiver
2728                  * restart.  We may have "killed" the receiver thread just
2729                  * after drbd_receiver() returned.  Typically, we should be
2730                  * C_STANDALONE already, now, and this becomes a no-op.
2731                  */
2732                 rv2 = conn_request_state(connection, NS(conn, C_STANDALONE),
2733                                 CS_VERBOSE | CS_HARD);
2734                 if (rv2 < SS_SUCCESS)
2735                         drbd_err(connection,
2736                                 "unexpected rv2=%d in conn_try_disconnect()\n",
2737                                 rv2);
2738                 /* Unlike in DRBD 9, the state engine has generated
2739                  * NOTIFY_DESTROY events before clearing connection->net_conf. */
2740         }
2741         return rv;
2742 }
2743
2744 int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info)
2745 {
2746         struct drbd_config_context adm_ctx;
2747         struct disconnect_parms parms;
2748         struct drbd_connection *connection;
2749         enum drbd_state_rv rv;
2750         enum drbd_ret_code retcode;
2751         int err;
2752
2753         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_CONNECTION);
2754         if (!adm_ctx.reply_skb)
2755                 return retcode;
2756         if (retcode != NO_ERROR)
2757                 goto fail;
2758
2759         connection = adm_ctx.connection;
2760         memset(&parms, 0, sizeof(parms));
2761         if (info->attrs[DRBD_NLA_DISCONNECT_PARMS]) {
2762                 err = disconnect_parms_from_attrs(&parms, info);
2763                 if (err) {
2764                         retcode = ERR_MANDATORY_TAG;
2765                         drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
2766                         goto fail;
2767                 }
2768         }
2769
2770         mutex_lock(&adm_ctx.resource->adm_mutex);
2771         rv = conn_try_disconnect(connection, parms.force_disconnect);
2772         if (rv < SS_SUCCESS)
2773                 retcode = rv;  /* FIXME: Type mismatch. */
2774         else
2775                 retcode = NO_ERROR;
2776         mutex_unlock(&adm_ctx.resource->adm_mutex);
2777  fail:
2778         drbd_adm_finish(&adm_ctx, info, retcode);
2779         return 0;
2780 }
2781
2782 void resync_after_online_grow(struct drbd_device *device)
2783 {
2784         int iass; /* I am sync source */
2785
2786         drbd_info(device, "Resync of new storage after online grow\n");
2787         if (device->state.role != device->state.peer)
2788                 iass = (device->state.role == R_PRIMARY);
2789         else
2790                 iass = test_bit(RESOLVE_CONFLICTS, &first_peer_device(device)->connection->flags);
2791
2792         if (iass)
2793                 drbd_start_resync(device, C_SYNC_SOURCE);
2794         else
2795                 _drbd_request_state(device, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE + CS_SERIALIZE);
2796 }
2797
2798 int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
2799 {
2800         struct drbd_config_context adm_ctx;
2801         struct disk_conf *old_disk_conf, *new_disk_conf = NULL;
2802         struct resize_parms rs;
2803         struct drbd_device *device;
2804         enum drbd_ret_code retcode;
2805         enum determine_dev_size dd;
2806         bool change_al_layout = false;
2807         enum dds_flags ddsf;
2808         sector_t u_size;
2809         int err;
2810
2811         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
2812         if (!adm_ctx.reply_skb)
2813                 return retcode;
2814         if (retcode != NO_ERROR)
2815                 goto finish;
2816
2817         mutex_lock(&adm_ctx.resource->adm_mutex);
2818         device = adm_ctx.device;
2819         if (!get_ldev(device)) {
2820                 retcode = ERR_NO_DISK;
2821                 goto fail;
2822         }
2823
2824         memset(&rs, 0, sizeof(struct resize_parms));
2825         rs.al_stripes = device->ldev->md.al_stripes;
2826         rs.al_stripe_size = device->ldev->md.al_stripe_size_4k * 4;
2827         if (info->attrs[DRBD_NLA_RESIZE_PARMS]) {
2828                 err = resize_parms_from_attrs(&rs, info);
2829                 if (err) {
2830                         retcode = ERR_MANDATORY_TAG;
2831                         drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
2832                         goto fail_ldev;
2833                 }
2834         }
2835
2836         if (device->state.conn > C_CONNECTED) {
2837                 retcode = ERR_RESIZE_RESYNC;
2838                 goto fail_ldev;
2839         }
2840
2841         if (device->state.role == R_SECONDARY &&
2842             device->state.peer == R_SECONDARY) {
2843                 retcode = ERR_NO_PRIMARY;
2844                 goto fail_ldev;
2845         }
2846
2847         if (rs.no_resync && first_peer_device(device)->connection->agreed_pro_version < 93) {
2848                 retcode = ERR_NEED_APV_93;
2849                 goto fail_ldev;
2850         }
2851
2852         rcu_read_lock();
2853         u_size = rcu_dereference(device->ldev->disk_conf)->disk_size;
2854         rcu_read_unlock();
2855         if (u_size != (sector_t)rs.resize_size) {
2856                 new_disk_conf = kmalloc(sizeof(struct disk_conf), GFP_KERNEL);
2857                 if (!new_disk_conf) {
2858                         retcode = ERR_NOMEM;
2859                         goto fail_ldev;
2860                 }
2861         }
2862
2863         if (device->ldev->md.al_stripes != rs.al_stripes ||
2864             device->ldev->md.al_stripe_size_4k != rs.al_stripe_size / 4) {
2865                 u32 al_size_k = rs.al_stripes * rs.al_stripe_size;
2866
2867                 if (al_size_k > (16 * 1024 * 1024)) {
2868                         retcode = ERR_MD_LAYOUT_TOO_BIG;
2869                         goto fail_ldev;
2870                 }
2871
2872                 if (al_size_k < MD_32kB_SECT/2) {
2873                         retcode = ERR_MD_LAYOUT_TOO_SMALL;
2874                         goto fail_ldev;
2875                 }
2876
2877                 if (device->state.conn != C_CONNECTED && !rs.resize_force) {
2878                         retcode = ERR_MD_LAYOUT_CONNECTED;
2879                         goto fail_ldev;
2880                 }
2881
2882                 change_al_layout = true;
2883         }
2884
2885         if (device->ldev->known_size != drbd_get_capacity(device->ldev->backing_bdev))
2886                 device->ldev->known_size = drbd_get_capacity(device->ldev->backing_bdev);
2887
2888         if (new_disk_conf) {
2889                 mutex_lock(&device->resource->conf_update);
2890                 old_disk_conf = device->ldev->disk_conf;
2891                 *new_disk_conf = *old_disk_conf;
2892                 new_disk_conf->disk_size = (sector_t)rs.resize_size;
2893                 rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
2894                 mutex_unlock(&device->resource->conf_update);
2895                 synchronize_rcu();
2896                 kfree(old_disk_conf);
2897                 new_disk_conf = NULL;
2898         }
2899
2900         ddsf = (rs.resize_force ? DDSF_FORCED : 0) | (rs.no_resync ? DDSF_NO_RESYNC : 0);
2901         dd = drbd_determine_dev_size(device, ddsf, change_al_layout ? &rs : NULL);
2902         drbd_md_sync(device);
2903         put_ldev(device);
2904         if (dd == DS_ERROR) {
2905                 retcode = ERR_NOMEM_BITMAP;
2906                 goto fail;
2907         } else if (dd == DS_ERROR_SPACE_MD) {
2908                 retcode = ERR_MD_LAYOUT_NO_FIT;
2909                 goto fail;
2910         } else if (dd == DS_ERROR_SHRINK) {
2911                 retcode = ERR_IMPLICIT_SHRINK;
2912                 goto fail;
2913         }
2914
2915         if (device->state.conn == C_CONNECTED) {
2916                 if (dd == DS_GREW)
2917                         set_bit(RESIZE_PENDING, &device->flags);
2918
2919                 drbd_send_uuids(first_peer_device(device));
2920                 drbd_send_sizes(first_peer_device(device), 1, ddsf);
2921         }
2922
2923  fail:
2924         mutex_unlock(&adm_ctx.resource->adm_mutex);
2925  finish:
2926         drbd_adm_finish(&adm_ctx, info, retcode);
2927         return 0;
2928
2929  fail_ldev:
2930         put_ldev(device);
2931         kfree(new_disk_conf);
2932         goto fail;
2933 }
2934
2935 int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info)
2936 {
2937         struct drbd_config_context adm_ctx;
2938         enum drbd_ret_code retcode;
2939         struct res_opts res_opts;
2940         int err;
2941
2942         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
2943         if (!adm_ctx.reply_skb)
2944                 return retcode;
2945         if (retcode != NO_ERROR)
2946                 goto fail;
2947
2948         res_opts = adm_ctx.resource->res_opts;
2949         if (should_set_defaults(info))
2950                 set_res_opts_defaults(&res_opts);
2951
2952         err = res_opts_from_attrs(&res_opts, info);
2953         if (err && err != -ENOMSG) {
2954                 retcode = ERR_MANDATORY_TAG;
2955                 drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
2956                 goto fail;
2957         }
2958
2959         mutex_lock(&adm_ctx.resource->adm_mutex);
2960         err = set_resource_options(adm_ctx.resource, &res_opts);
2961         if (err) {
2962                 retcode = ERR_INVALID_REQUEST;
2963                 if (err == -ENOMEM)
2964                         retcode = ERR_NOMEM;
2965         }
2966         mutex_unlock(&adm_ctx.resource->adm_mutex);
2967
2968 fail:
2969         drbd_adm_finish(&adm_ctx, info, retcode);
2970         return 0;
2971 }
2972
2973 int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info)
2974 {
2975         struct drbd_config_context adm_ctx;
2976         struct drbd_device *device;
2977         int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
2978
2979         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
2980         if (!adm_ctx.reply_skb)
2981                 return retcode;
2982         if (retcode != NO_ERROR)
2983                 goto out;
2984
2985         device = adm_ctx.device;
2986         if (!get_ldev(device)) {
2987                 retcode = ERR_NO_DISK;
2988                 goto out;
2989         }
2990
2991         mutex_lock(&adm_ctx.resource->adm_mutex);
2992
2993         /* If there is still bitmap IO pending, probably because of a previous
2994          * resync just being finished, wait for it before requesting a new resync.
2995          * Also wait for it's after_state_ch(). */
2996         drbd_suspend_io(device);
2997         wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags));
2998         drbd_flush_workqueue(&first_peer_device(device)->connection->sender_work);
2999
3000         /* If we happen to be C_STANDALONE R_SECONDARY, just change to
3001          * D_INCONSISTENT, and set all bits in the bitmap.  Otherwise,
3002          * try to start a resync handshake as sync target for full sync.
3003          */
3004         if (device->state.conn == C_STANDALONE && device->state.role == R_SECONDARY) {
3005                 retcode = drbd_request_state(device, NS(disk, D_INCONSISTENT));
3006                 if (retcode >= SS_SUCCESS) {
3007                         if (drbd_bitmap_io(device, &drbd_bmio_set_n_write,
3008                                 "set_n_write from invalidate", BM_LOCKED_MASK))
3009                                 retcode = ERR_IO_MD_DISK;
3010                 }
3011         } else
3012                 retcode = drbd_request_state(device, NS(conn, C_STARTING_SYNC_T));
3013         drbd_resume_io(device);
3014         mutex_unlock(&adm_ctx.resource->adm_mutex);
3015         put_ldev(device);
3016 out:
3017         drbd_adm_finish(&adm_ctx, info, retcode);
3018         return 0;
3019 }
3020
3021 static int drbd_adm_simple_request_state(struct sk_buff *skb, struct genl_info *info,
3022                 union drbd_state mask, union drbd_state val)
3023 {
3024         struct drbd_config_context adm_ctx;
3025         enum drbd_ret_code retcode;
3026
3027         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
3028         if (!adm_ctx.reply_skb)
3029                 return retcode;
3030         if (retcode != NO_ERROR)
3031                 goto out;
3032
3033         mutex_lock(&adm_ctx.resource->adm_mutex);
3034         retcode = drbd_request_state(adm_ctx.device, mask, val);
3035         mutex_unlock(&adm_ctx.resource->adm_mutex);
3036 out:
3037         drbd_adm_finish(&adm_ctx, info, retcode);
3038         return 0;
3039 }
3040
3041 static int drbd_bmio_set_susp_al(struct drbd_device *device) __must_hold(local)
3042 {
3043         int rv;
3044
3045         rv = drbd_bmio_set_n_write(device);
3046         drbd_suspend_al(device);
3047         return rv;
3048 }
3049
3050 int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info)
3051 {
3052         struct drbd_config_context adm_ctx;
3053         int retcode; /* drbd_ret_code, drbd_state_rv */
3054         struct drbd_device *device;
3055
3056         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
3057         if (!adm_ctx.reply_skb)
3058                 return retcode;
3059         if (retcode != NO_ERROR)
3060                 goto out;
3061
3062         device = adm_ctx.device;
3063         if (!get_ldev(device)) {
3064                 retcode = ERR_NO_DISK;
3065                 goto out;
3066         }
3067
3068         mutex_lock(&adm_ctx.resource->adm_mutex);
3069
3070         /* If there is still bitmap IO pending, probably because of a previous
3071          * resync just being finished, wait for it before requesting a new resync.
3072          * Also wait for it's after_state_ch(). */
3073         drbd_suspend_io(device);
3074         wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags));
3075         drbd_flush_workqueue(&first_peer_device(device)->connection->sender_work);
3076
3077         /* If we happen to be C_STANDALONE R_PRIMARY, just set all bits
3078          * in the bitmap.  Otherwise, try to start a resync handshake
3079          * as sync source for full sync.
3080          */
3081         if (device->state.conn == C_STANDALONE && device->state.role == R_PRIMARY) {
3082                 /* The peer will get a resync upon connect anyways. Just make that
3083                    into a full resync. */
3084                 retcode = drbd_request_state(device, NS(pdsk, D_INCONSISTENT));
3085                 if (retcode >= SS_SUCCESS) {
3086                         if (drbd_bitmap_io(device, &drbd_bmio_set_susp_al,
3087                                 "set_n_write from invalidate_peer",
3088                                 BM_LOCKED_SET_ALLOWED))
3089                                 retcode = ERR_IO_MD_DISK;
3090                 }
3091         } else
3092                 retcode = drbd_request_state(device, NS(conn, C_STARTING_SYNC_S));
3093         drbd_resume_io(device);
3094         mutex_unlock(&adm_ctx.resource->adm_mutex);
3095         put_ldev(device);
3096 out:
3097         drbd_adm_finish(&adm_ctx, info, retcode);
3098         return 0;
3099 }
3100
3101 int drbd_adm_pause_sync(struct sk_buff *skb, struct genl_info *info)
3102 {
3103         struct drbd_config_context adm_ctx;
3104         enum drbd_ret_code retcode;
3105
3106         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
3107         if (!adm_ctx.reply_skb)
3108                 return retcode;
3109         if (retcode != NO_ERROR)
3110                 goto out;
3111
3112         mutex_lock(&adm_ctx.resource->adm_mutex);
3113         if (drbd_request_state(adm_ctx.device, NS(user_isp, 1)) == SS_NOTHING_TO_DO)
3114                 retcode = ERR_PAUSE_IS_SET;
3115         mutex_unlock(&adm_ctx.resource->adm_mutex);
3116 out:
3117         drbd_adm_finish(&adm_ctx, info, retcode);
3118         return 0;
3119 }
3120
3121 int drbd_adm_resume_sync(struct sk_buff *skb, struct genl_info *info)
3122 {
3123         struct drbd_config_context adm_ctx;
3124         union drbd_dev_state s;
3125         enum drbd_ret_code retcode;
3126
3127         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
3128         if (!adm_ctx.reply_skb)
3129                 return retcode;
3130         if (retcode != NO_ERROR)
3131                 goto out;
3132
3133         mutex_lock(&adm_ctx.resource->adm_mutex);
3134         if (drbd_request_state(adm_ctx.device, NS(user_isp, 0)) == SS_NOTHING_TO_DO) {
3135                 s = adm_ctx.device->state;
3136                 if (s.conn == C_PAUSED_SYNC_S || s.conn == C_PAUSED_SYNC_T) {
3137                         retcode = s.aftr_isp ? ERR_PIC_AFTER_DEP :
3138                                   s.peer_isp ? ERR_PIC_PEER_DEP : ERR_PAUSE_IS_CLEAR;
3139                 } else {
3140                         retcode = ERR_PAUSE_IS_CLEAR;
3141                 }
3142         }
3143         mutex_unlock(&adm_ctx.resource->adm_mutex);
3144 out:
3145         drbd_adm_finish(&adm_ctx, info, retcode);
3146         return 0;
3147 }
3148
3149 int drbd_adm_suspend_io(struct sk_buff *skb, struct genl_info *info)
3150 {
3151         return drbd_adm_simple_request_state(skb, info, NS(susp, 1));
3152 }
3153
3154 int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info)
3155 {
3156         struct drbd_config_context adm_ctx;
3157         struct drbd_device *device;
3158         int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
3159
3160         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
3161         if (!adm_ctx.reply_skb)
3162                 return retcode;
3163         if (retcode != NO_ERROR)
3164                 goto out;
3165
3166         mutex_lock(&adm_ctx.resource->adm_mutex);
3167         device = adm_ctx.device;
3168         if (test_bit(NEW_CUR_UUID, &device->flags)) {
3169                 if (get_ldev_if_state(device, D_ATTACHING)) {
3170                         drbd_uuid_new_current(device);
3171                         put_ldev(device);
3172                 } else {
3173                         /* This is effectively a multi-stage "forced down".
3174                          * The NEW_CUR_UUID bit is supposedly only set, if we
3175                          * lost the replication connection, and are configured
3176                          * to freeze IO and wait for some fence-peer handler.
3177                          * So we still don't have a replication connection.
3178                          * And now we don't have a local disk either.  After
3179                          * resume, we will fail all pending and new IO, because
3180                          * we don't have any data anymore.  Which means we will
3181                          * eventually be able to terminate all users of this
3182                          * device, and then take it down.  By bumping the
3183                          * "effective" data uuid, we make sure that you really
3184                          * need to tear down before you reconfigure, we will
3185                          * the refuse to re-connect or re-attach (because no
3186                          * matching real data uuid exists).
3187                          */
3188                         u64 val;
3189                         get_random_bytes(&val, sizeof(u64));
3190                         drbd_set_ed_uuid(device, val);
3191                         drbd_warn(device, "Resumed without access to data; please tear down before attempting to re-configure.\n");
3192                 }
3193                 clear_bit(NEW_CUR_UUID, &device->flags);
3194         }
3195         drbd_suspend_io(device);
3196         retcode = drbd_request_state(device, NS3(susp, 0, susp_nod, 0, susp_fen, 0));
3197         if (retcode == SS_SUCCESS) {
3198                 if (device->state.conn < C_CONNECTED)
3199                         tl_clear(first_peer_device(device)->connection);
3200                 if (device->state.disk == D_DISKLESS || device->state.disk == D_FAILED)
3201                         tl_restart(first_peer_device(device)->connection, FAIL_FROZEN_DISK_IO);
3202         }
3203         drbd_resume_io(device);
3204         mutex_unlock(&adm_ctx.resource->adm_mutex);
3205 out:
3206         drbd_adm_finish(&adm_ctx, info, retcode);
3207         return 0;
3208 }
3209
3210 int drbd_adm_outdate(struct sk_buff *skb, struct genl_info *info)
3211 {
3212         return drbd_adm_simple_request_state(skb, info, NS(disk, D_OUTDATED));
3213 }
3214
3215 static int nla_put_drbd_cfg_context(struct sk_buff *skb,
3216                                     struct drbd_resource *resource,
3217                                     struct drbd_connection *connection,
3218                                     struct drbd_device *device)
3219 {
3220         struct nlattr *nla;
3221         nla = nla_nest_start(skb, DRBD_NLA_CFG_CONTEXT);
3222         if (!nla)
3223                 goto nla_put_failure;
3224         if (device &&
3225             nla_put_u32(skb, T_ctx_volume, device->vnr))
3226                 goto nla_put_failure;
3227         if (nla_put_string(skb, T_ctx_resource_name, resource->name))
3228                 goto nla_put_failure;
3229         if (connection) {
3230                 if (connection->my_addr_len &&
3231                     nla_put(skb, T_ctx_my_addr, connection->my_addr_len, &connection->my_addr))
3232                         goto nla_put_failure;
3233                 if (connection->peer_addr_len &&
3234                     nla_put(skb, T_ctx_peer_addr, connection->peer_addr_len, &connection->peer_addr))
3235                         goto nla_put_failure;
3236         }
3237         nla_nest_end(skb, nla);
3238         return 0;
3239
3240 nla_put_failure:
3241         if (nla)
3242                 nla_nest_cancel(skb, nla);
3243         return -EMSGSIZE;
3244 }
3245
3246 /*
3247  * The generic netlink dump callbacks are called outside the genl_lock(), so
3248  * they cannot use the simple attribute parsing code which uses global
3249  * attribute tables.
3250  */
3251 static struct nlattr *find_cfg_context_attr(const struct nlmsghdr *nlh, int attr)
3252 {
3253         const unsigned hdrlen = GENL_HDRLEN + GENL_MAGIC_FAMILY_HDRSZ;
3254         const int maxtype = ARRAY_SIZE(drbd_cfg_context_nl_policy) - 1;
3255         struct nlattr *nla;
3256
3257         nla = nla_find(nlmsg_attrdata(nlh, hdrlen), nlmsg_attrlen(nlh, hdrlen),
3258                        DRBD_NLA_CFG_CONTEXT);
3259         if (!nla)
3260                 return NULL;
3261         return drbd_nla_find_nested(maxtype, nla, __nla_type(attr));
3262 }
3263
3264 static void resource_to_info(struct resource_info *, struct drbd_resource *);
3265
3266 int drbd_adm_dump_resources(struct sk_buff *skb, struct netlink_callback *cb)
3267 {
3268         struct drbd_genlmsghdr *dh;
3269         struct drbd_resource *resource;
3270         struct resource_info resource_info;
3271         struct resource_statistics resource_statistics;
3272         int err;
3273
3274         rcu_read_lock();
3275         if (cb->args[0]) {
3276                 for_each_resource_rcu(resource, &drbd_resources)
3277                         if (resource == (struct drbd_resource *)cb->args[0])
3278                                 goto found_resource;
3279                 err = 0;  /* resource was probably deleted */
3280                 goto out;
3281         }
3282         resource = list_entry(&drbd_resources,
3283                               struct drbd_resource, resources);
3284
3285 found_resource:
3286         list_for_each_entry_continue_rcu(resource, &drbd_resources, resources) {
3287                 goto put_result;
3288         }
3289         err = 0;
3290         goto out;
3291
3292 put_result:
3293         dh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
3294                         cb->nlh->nlmsg_seq, &drbd_genl_family,
3295                         NLM_F_MULTI, DRBD_ADM_GET_RESOURCES);
3296         err = -ENOMEM;
3297         if (!dh)
3298                 goto out;
3299         dh->minor = -1U;
3300         dh->ret_code = NO_ERROR;
3301         err = nla_put_drbd_cfg_context(skb, resource, NULL, NULL);
3302         if (err)
3303                 goto out;
3304         err = res_opts_to_skb(skb, &resource->res_opts, !capable(CAP_SYS_ADMIN));
3305         if (err)
3306                 goto out;
3307         resource_to_info(&resource_info, resource);
3308         err = resource_info_to_skb(skb, &resource_info, !capable(CAP_SYS_ADMIN));
3309         if (err)
3310                 goto out;
3311         resource_statistics.res_stat_write_ordering = resource->write_ordering;
3312         err = resource_statistics_to_skb(skb, &resource_statistics, !capable(CAP_SYS_ADMIN));
3313         if (err)
3314                 goto out;
3315         cb->args[0] = (long)resource;
3316         genlmsg_end(skb, dh);
3317         err = 0;
3318
3319 out:
3320         rcu_read_unlock();
3321         if (err)
3322                 return err;
3323         return skb->len;
3324 }
3325
3326 static void device_to_statistics(struct device_statistics *s,
3327                                  struct drbd_device *device)
3328 {
3329         memset(s, 0, sizeof(*s));
3330         s->dev_upper_blocked = !may_inc_ap_bio(device);
3331         if (get_ldev(device)) {
3332                 struct drbd_md *md = &device->ldev->md;
3333                 u64 *history_uuids = (u64 *)s->history_uuids;
3334                 struct request_queue *q;
3335                 int n;
3336
3337                 spin_lock_irq(&md->uuid_lock);
3338                 s->dev_current_uuid = md->uuid[UI_CURRENT];
3339                 BUILD_BUG_ON(sizeof(s->history_uuids) < UI_HISTORY_END - UI_HISTORY_START + 1);
3340                 for (n = 0; n < UI_HISTORY_END - UI_HISTORY_START + 1; n++)
3341                         history_uuids[n] = md->uuid[UI_HISTORY_START + n];
3342                 for (; n < HISTORY_UUIDS; n++)
3343                         history_uuids[n] = 0;
3344                 s->history_uuids_len = HISTORY_UUIDS;
3345                 spin_unlock_irq(&md->uuid_lock);
3346
3347                 s->dev_disk_flags = md->flags;
3348                 q = bdev_get_queue(device->ldev->backing_bdev);
3349                 s->dev_lower_blocked =
3350                         bdi_congested(q->backing_dev_info,
3351                                       (1 << WB_async_congested) |
3352                                       (1 << WB_sync_congested));
3353                 put_ldev(device);
3354         }
3355         s->dev_size = drbd_get_capacity(device->this_bdev);
3356         s->dev_read = device->read_cnt;
3357         s->dev_write = device->writ_cnt;
3358         s->dev_al_writes = device->al_writ_cnt;
3359         s->dev_bm_writes = device->bm_writ_cnt;
3360         s->dev_upper_pending = atomic_read(&device->ap_bio_cnt);
3361         s->dev_lower_pending = atomic_read(&device->local_cnt);
3362         s->dev_al_suspended = test_bit(AL_SUSPENDED, &device->flags);
3363         s->dev_exposed_data_uuid = device->ed_uuid;
3364 }
3365
3366 static int put_resource_in_arg0(struct netlink_callback *cb, int holder_nr)
3367 {
3368         if (cb->args[0]) {
3369                 struct drbd_resource *resource =
3370                         (struct drbd_resource *)cb->args[0];
3371                 kref_put(&resource->kref, drbd_destroy_resource);
3372         }
3373
3374         return 0;
3375 }
3376
3377 int drbd_adm_dump_devices_done(struct netlink_callback *cb) {
3378         return put_resource_in_arg0(cb, 7);
3379 }
3380
3381 static void device_to_info(struct device_info *, struct drbd_device *);
3382
3383 int drbd_adm_dump_devices(struct sk_buff *skb, struct netlink_callback *cb)
3384 {
3385         struct nlattr *resource_filter;
3386         struct drbd_resource *resource;
3387         struct drbd_device *uninitialized_var(device);
3388         int minor, err, retcode;
3389         struct drbd_genlmsghdr *dh;
3390         struct device_info device_info;
3391         struct device_statistics device_statistics;
3392         struct idr *idr_to_search;
3393
3394         resource = (struct drbd_resource *)cb->args[0];
3395         if (!cb->args[0] && !cb->args[1]) {
3396                 resource_filter = find_cfg_context_attr(cb->nlh, T_ctx_resource_name);
3397                 if (resource_filter) {
3398                         retcode = ERR_RES_NOT_KNOWN;
3399                         resource = drbd_find_resource(nla_data(resource_filter));
3400                         if (!resource)
3401                                 goto put_result;
3402                         cb->args[0] = (long)resource;
3403                 }
3404         }
3405
3406         rcu_read_lock();
3407         minor = cb->args[1];
3408         idr_to_search = resource ? &resource->devices : &drbd_devices;
3409         device = idr_get_next(idr_to_search, &minor);
3410         if (!device) {
3411                 err = 0;
3412                 goto out;
3413         }
3414         idr_for_each_entry_continue(idr_to_search, device, minor) {
3415                 retcode = NO_ERROR;
3416                 goto put_result;  /* only one iteration */
3417         }
3418         err = 0;
3419         goto out;  /* no more devices */
3420
3421 put_result:
3422         dh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
3423                         cb->nlh->nlmsg_seq, &drbd_genl_family,
3424                         NLM_F_MULTI, DRBD_ADM_GET_DEVICES);
3425         err = -ENOMEM;
3426         if (!dh)
3427                 goto out;
3428         dh->ret_code = retcode;
3429         dh->minor = -1U;
3430         if (retcode == NO_ERROR) {
3431                 dh->minor = device->minor;
3432                 err = nla_put_drbd_cfg_context(skb, device->resource, NULL, device);
3433                 if (err)
3434                         goto out;
3435                 if (get_ldev(device)) {
3436                         struct disk_conf *disk_conf =
3437                                 rcu_dereference(device->ldev->disk_conf);
3438
3439                         err = disk_conf_to_skb(skb, disk_conf, !capable(CAP_SYS_ADMIN));
3440                         put_ldev(device);
3441                         if (err)
3442                                 goto out;
3443                 }
3444                 device_to_info(&device_info, device);
3445                 err = device_info_to_skb(skb, &device_info, !capable(CAP_SYS_ADMIN));
3446                 if (err)
3447                         goto out;
3448
3449                 device_to_statistics(&device_statistics, device);
3450                 err = device_statistics_to_skb(skb, &device_statistics, !capable(CAP_SYS_ADMIN));
3451                 if (err)
3452                         goto out;
3453                 cb->args[1] = minor + 1;
3454         }
3455         genlmsg_end(skb, dh);
3456         err = 0;
3457
3458 out:
3459         rcu_read_unlock();
3460         if (err)
3461                 return err;
3462         return skb->len;
3463 }
3464
3465 int drbd_adm_dump_connections_done(struct netlink_callback *cb)
3466 {
3467         return put_resource_in_arg0(cb, 6);
3468 }
3469
3470 enum { SINGLE_RESOURCE, ITERATE_RESOURCES };
3471
3472 int drbd_adm_dump_connections(struct sk_buff *skb, struct netlink_callback *cb)
3473 {
3474         struct nlattr *resource_filter;
3475         struct drbd_resource *resource = NULL, *next_resource;
3476         struct drbd_connection *uninitialized_var(connection);
3477         int err = 0, retcode;
3478         struct drbd_genlmsghdr *dh;
3479         struct connection_info connection_info;
3480         struct connection_statistics connection_statistics;
3481
3482         rcu_read_lock();
3483         resource = (struct drbd_resource *)cb->args[0];
3484         if (!cb->args[0]) {
3485                 resource_filter = find_cfg_context_attr(cb->nlh, T_ctx_resource_name);
3486                 if (resource_filter) {
3487                         retcode = ERR_RES_NOT_KNOWN;
3488                         resource = drbd_find_resource(nla_data(resource_filter));
3489                         if (!resource)
3490                                 goto put_result;
3491                         cb->args[0] = (long)resource;
3492                         cb->args[1] = SINGLE_RESOURCE;
3493                 }
3494         }
3495         if (!resource) {
3496                 if (list_empty(&drbd_resources))
3497                         goto out;
3498                 resource = list_first_entry(&drbd_resources, struct drbd_resource, resources);
3499                 kref_get(&resource->kref);
3500                 cb->args[0] = (long)resource;
3501                 cb->args[1] = ITERATE_RESOURCES;
3502         }
3503
3504     next_resource:
3505         rcu_read_unlock();
3506         mutex_lock(&resource->conf_update);
3507         rcu_read_lock();
3508         if (cb->args[2]) {
3509                 for_each_connection_rcu(connection, resource)
3510                         if (connection == (struct drbd_connection *)cb->args[2])
3511                                 goto found_connection;
3512                 /* connection was probably deleted */
3513                 goto no_more_connections;
3514         }
3515         connection = list_entry(&resource->connections, struct drbd_connection, connections);
3516
3517 found_connection:
3518         list_for_each_entry_continue_rcu(connection, &resource->connections, connections) {
3519                 if (!has_net_conf(connection))
3520                         continue;
3521                 retcode = NO_ERROR;
3522                 goto put_result;  /* only one iteration */
3523         }
3524
3525 no_more_connections:
3526         if (cb->args[1] == ITERATE_RESOURCES) {
3527                 for_each_resource_rcu(next_resource, &drbd_resources) {
3528                         if (next_resource == resource)
3529                                 goto found_resource;
3530                 }
3531                 /* resource was probably deleted */
3532         }
3533         goto out;
3534
3535 found_resource:
3536         list_for_each_entry_continue_rcu(next_resource, &drbd_resources, resources) {
3537                 mutex_unlock(&resource->conf_update);
3538                 kref_put(&resource->kref, drbd_destroy_resource);
3539                 resource = next_resource;
3540                 kref_get(&resource->kref);
3541                 cb->args[0] = (long)resource;
3542                 cb->args[2] = 0;
3543                 goto next_resource;
3544         }
3545         goto out;  /* no more resources */
3546
3547 put_result:
3548         dh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
3549                         cb->nlh->nlmsg_seq, &drbd_genl_family,
3550                         NLM_F_MULTI, DRBD_ADM_GET_CONNECTIONS);
3551         err = -ENOMEM;
3552         if (!dh)
3553                 goto out;
3554         dh->ret_code = retcode;
3555         dh->minor = -1U;
3556         if (retcode == NO_ERROR) {
3557                 struct net_conf *net_conf;
3558
3559                 err = nla_put_drbd_cfg_context(skb, resource, connection, NULL);
3560                 if (err)
3561                         goto out;
3562                 net_conf = rcu_dereference(connection->net_conf);
3563                 if (net_conf) {
3564                         err = net_conf_to_skb(skb, net_conf, !capable(CAP_SYS_ADMIN));
3565                         if (err)
3566                                 goto out;
3567                 }
3568                 connection_to_info(&connection_info, connection);
3569                 err = connection_info_to_skb(skb, &connection_info, !capable(CAP_SYS_ADMIN));
3570                 if (err)
3571                         goto out;
3572                 connection_statistics.conn_congested = test_bit(NET_CONGESTED, &connection->flags);
3573                 err = connection_statistics_to_skb(skb, &connection_statistics, !capable(CAP_SYS_ADMIN));
3574                 if (err)
3575                         goto out;
3576                 cb->args[2] = (long)connection;
3577         }
3578         genlmsg_end(skb, dh);
3579         err = 0;
3580
3581 out:
3582         rcu_read_unlock();
3583         if (resource)
3584                 mutex_unlock(&resource->conf_update);
3585         if (err)
3586                 return err;
3587         return skb->len;
3588 }
3589
3590 enum mdf_peer_flag {
3591         MDF_PEER_CONNECTED =    1 << 0,
3592         MDF_PEER_OUTDATED =     1 << 1,
3593         MDF_PEER_FENCING =      1 << 2,
3594         MDF_PEER_FULL_SYNC =    1 << 3,
3595 };
3596
3597 static void peer_device_to_statistics(struct peer_device_statistics *s,
3598                                       struct drbd_peer_device *peer_device)
3599 {
3600         struct drbd_device *device = peer_device->device;
3601
3602         memset(s, 0, sizeof(*s));
3603         s->peer_dev_received = device->recv_cnt;
3604         s->peer_dev_sent = device->send_cnt;
3605         s->peer_dev_pending = atomic_read(&device->ap_pending_cnt) +
3606                               atomic_read(&device->rs_pending_cnt);
3607         s->peer_dev_unacked = atomic_read(&device->unacked_cnt);
3608         s->peer_dev_out_of_sync = drbd_bm_total_weight(device) << (BM_BLOCK_SHIFT - 9);
3609         s->peer_dev_resync_failed = device->rs_failed << (BM_BLOCK_SHIFT - 9);
3610         if (get_ldev(device)) {
3611                 struct drbd_md *md = &device->ldev->md;
3612
3613                 spin_lock_irq(&md->uuid_lock);
3614                 s->peer_dev_bitmap_uuid = md->uuid[UI_BITMAP];
3615                 spin_unlock_irq(&md->uuid_lock);
3616                 s->peer_dev_flags =
3617                         (drbd_md_test_flag(device->ldev, MDF_CONNECTED_IND) ?
3618                                 MDF_PEER_CONNECTED : 0) +
3619                         (drbd_md_test_flag(device->ldev, MDF_CONSISTENT) &&
3620                          !drbd_md_test_flag(device->ldev, MDF_WAS_UP_TO_DATE) ?
3621                                 MDF_PEER_OUTDATED : 0) +
3622                         /* FIXME: MDF_PEER_FENCING? */
3623                         (drbd_md_test_flag(device->ldev, MDF_FULL_SYNC) ?
3624                                 MDF_PEER_FULL_SYNC : 0);
3625                 put_ldev(device);
3626         }
3627 }
3628
3629 int drbd_adm_dump_peer_devices_done(struct netlink_callback *cb)
3630 {
3631         return put_resource_in_arg0(cb, 9);
3632 }
3633
3634 int drbd_adm_dump_peer_devices(struct sk_buff *skb, struct netlink_callback *cb)
3635 {
3636         struct nlattr *resource_filter;
3637         struct drbd_resource *resource;
3638         struct drbd_device *uninitialized_var(device);
3639         struct drbd_peer_device *peer_device = NULL;
3640         int minor, err, retcode;
3641         struct drbd_genlmsghdr *dh;
3642         struct idr *idr_to_search;
3643
3644         resource = (struct drbd_resource *)cb->args[0];
3645         if (!cb->args[0] && !cb->args[1]) {
3646                 resource_filter = find_cfg_context_attr(cb->nlh, T_ctx_resource_name);
3647                 if (resource_filter) {
3648                         retcode = ERR_RES_NOT_KNOWN;
3649                         resource = drbd_find_resource(nla_data(resource_filter));
3650                         if (!resource)
3651                                 goto put_result;
3652                 }
3653                 cb->args[0] = (long)resource;
3654         }
3655
3656         rcu_read_lock();
3657         minor = cb->args[1];
3658         idr_to_search = resource ? &resource->devices : &drbd_devices;
3659         device = idr_find(idr_to_search, minor);
3660         if (!device) {
3661 next_device:
3662                 minor++;
3663                 cb->args[2] = 0;
3664                 device = idr_get_next(idr_to_search, &minor);
3665                 if (!device) {
3666                         err = 0;
3667                         goto out;
3668                 }
3669         }
3670         if (cb->args[2]) {
3671                 for_each_peer_device(peer_device, device)
3672                         if (peer_device == (struct drbd_peer_device *)cb->args[2])
3673                                 goto found_peer_device;
3674                 /* peer device was probably deleted */
3675                 goto next_device;
3676         }
3677         /* Make peer_device point to the list head (not the first entry). */
3678         peer_device = list_entry(&device->peer_devices, struct drbd_peer_device, peer_devices);
3679
3680 found_peer_device:
3681         list_for_each_entry_continue_rcu(peer_device, &device->peer_devices, peer_devices) {
3682                 if (!has_net_conf(peer_device->connection))
3683                         continue;
3684                 retcode = NO_ERROR;
3685                 goto put_result;  /* only one iteration */
3686         }
3687         goto next_device;
3688
3689 put_result:
3690         dh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
3691                         cb->nlh->nlmsg_seq, &drbd_genl_family,
3692                         NLM_F_MULTI, DRBD_ADM_GET_PEER_DEVICES);
3693         err = -ENOMEM;
3694         if (!dh)
3695                 goto out;
3696         dh->ret_code = retcode;
3697         dh->minor = -1U;
3698         if (retcode == NO_ERROR) {
3699                 struct peer_device_info peer_device_info;
3700                 struct peer_device_statistics peer_device_statistics;
3701
3702                 dh->minor = minor;
3703                 err = nla_put_drbd_cfg_context(skb, device->resource, peer_device->connection, device);
3704                 if (err)
3705                         goto out;
3706                 peer_device_to_info(&peer_device_info, peer_device);
3707                 err = peer_device_info_to_skb(skb, &peer_device_info, !capable(CAP_SYS_ADMIN));
3708                 if (err)
3709                         goto out;
3710                 peer_device_to_statistics(&peer_device_statistics, peer_device);
3711                 err = peer_device_statistics_to_skb(skb, &peer_device_statistics, !capable(CAP_SYS_ADMIN));
3712                 if (err)
3713                         goto out;
3714                 cb->args[1] = minor;
3715                 cb->args[2] = (long)peer_device;
3716         }
3717         genlmsg_end(skb, dh);
3718         err = 0;
3719
3720 out:
3721         rcu_read_unlock();
3722         if (err)
3723                 return err;
3724         return skb->len;
3725 }
3726 /*
3727  * Return the connection of @resource if @resource has exactly one connection.
3728  */
3729 static struct drbd_connection *the_only_connection(struct drbd_resource *resource)
3730 {
3731         struct list_head *connections = &resource->connections;
3732
3733         if (list_empty(connections) || connections->next->next != connections)
3734                 return NULL;
3735         return list_first_entry(&resource->connections, struct drbd_connection, connections);
3736 }
3737
3738 static int nla_put_status_info(struct sk_buff *skb, struct drbd_device *device,
3739                 const struct sib_info *sib)
3740 {
3741         struct drbd_resource *resource = device->resource;
3742         struct state_info *si = NULL; /* for sizeof(si->member); */
3743         struct nlattr *nla;
3744         int got_ldev;
3745         int err = 0;
3746         int exclude_sensitive;
3747
3748         /* If sib != NULL, this is drbd_bcast_event, which anyone can listen
3749          * to.  So we better exclude_sensitive information.
3750          *
3751          * If sib == NULL, this is drbd_adm_get_status, executed synchronously
3752          * in the context of the requesting user process. Exclude sensitive
3753          * information, unless current has superuser.
3754          *
3755          * NOTE: for drbd_adm_get_status_all(), this is a netlink dump, and
3756          * relies on the current implementation of netlink_dump(), which
3757          * executes the dump callback successively from netlink_recvmsg(),
3758          * always in the context of the receiving process */
3759         exclude_sensitive = sib || !capable(CAP_SYS_ADMIN);
3760
3761         got_ldev = get_ldev(device);
3762
3763         /* We need to add connection name and volume number information still.
3764          * Minor number is in drbd_genlmsghdr. */
3765         if (nla_put_drbd_cfg_context(skb, resource, the_only_connection(resource), device))
3766                 goto nla_put_failure;
3767
3768         if (res_opts_to_skb(skb, &device->resource->res_opts, exclude_sensitive))
3769                 goto nla_put_failure;
3770
3771         rcu_read_lock();
3772         if (got_ldev) {
3773                 struct disk_conf *disk_conf;
3774
3775                 disk_conf = rcu_dereference(device->ldev->disk_conf);
3776                 err = disk_conf_to_skb(skb, disk_conf, exclude_sensitive);
3777         }
3778         if (!err) {
3779                 struct net_conf *nc;
3780
3781                 nc = rcu_dereference(first_peer_device(device)->connection->net_conf);
3782                 if (nc)
3783                         err = net_conf_to_skb(skb, nc, exclude_sensitive);
3784         }
3785         rcu_read_unlock();
3786         if (err)
3787                 goto nla_put_failure;
3788
3789         nla = nla_nest_start(skb, DRBD_NLA_STATE_INFO);
3790         if (!nla)
3791                 goto nla_put_failure;
3792         if (nla_put_u32(skb, T_sib_reason, sib ? sib->sib_reason : SIB_GET_STATUS_REPLY) ||
3793             nla_put_u32(skb, T_current_state, device->state.i) ||
3794             nla_put_u64_0pad(skb, T_ed_uuid, device->ed_uuid) ||
3795             nla_put_u64_0pad(skb, T_capacity,
3796                              drbd_get_capacity(device->this_bdev)) ||
3797             nla_put_u64_0pad(skb, T_send_cnt, device->send_cnt) ||
3798             nla_put_u64_0pad(skb, T_recv_cnt, device->recv_cnt) ||
3799             nla_put_u64_0pad(skb, T_read_cnt, device->read_cnt) ||
3800             nla_put_u64_0pad(skb, T_writ_cnt, device->writ_cnt) ||
3801             nla_put_u64_0pad(skb, T_al_writ_cnt, device->al_writ_cnt) ||
3802             nla_put_u64_0pad(skb, T_bm_writ_cnt, device->bm_writ_cnt) ||
3803             nla_put_u32(skb, T_ap_bio_cnt, atomic_read(&device->ap_bio_cnt)) ||
3804             nla_put_u32(skb, T_ap_pending_cnt, atomic_read(&device->ap_pending_cnt)) ||
3805             nla_put_u32(skb, T_rs_pending_cnt, atomic_read(&device->rs_pending_cnt)))
3806                 goto nla_put_failure;
3807
3808         if (got_ldev) {
3809                 int err;
3810
3811                 spin_lock_irq(&device->ldev->md.uuid_lock);
3812                 err = nla_put(skb, T_uuids, sizeof(si->uuids), device->ldev->md.uuid);
3813                 spin_unlock_irq(&device->ldev->md.uuid_lock);
3814
3815                 if (err)
3816                         goto nla_put_failure;
3817
3818                 if (nla_put_u32(skb, T_disk_flags, device->ldev->md.flags) ||
3819                     nla_put_u64_0pad(skb, T_bits_total, drbd_bm_bits(device)) ||
3820                     nla_put_u64_0pad(skb, T_bits_oos,
3821                                      drbd_bm_total_weight(device)))
3822                         goto nla_put_failure;
3823                 if (C_SYNC_SOURCE <= device->state.conn &&
3824                     C_PAUSED_SYNC_T >= device->state.conn) {
3825                         if (nla_put_u64_0pad(skb, T_bits_rs_total,
3826                                              device->rs_total) ||
3827                             nla_put_u64_0pad(skb, T_bits_rs_failed,
3828                                              device->rs_failed))
3829                                 goto nla_put_failure;
3830                 }
3831         }
3832
3833         if (sib) {
3834                 switch(sib->sib_reason) {
3835                 case SIB_SYNC_PROGRESS:
3836                 case SIB_GET_STATUS_REPLY:
3837                         break;
3838                 case SIB_STATE_CHANGE:
3839                         if (nla_put_u32(skb, T_prev_state, sib->os.i) ||
3840                             nla_put_u32(skb, T_new_state, sib->ns.i))
3841                                 goto nla_put_failure;
3842                         break;
3843                 case SIB_HELPER_POST:
3844                         if (nla_put_u32(skb, T_helper_exit_code,
3845                                         sib->helper_exit_code))
3846                                 goto nla_put_failure;
3847                         /* fall through */
3848                 case SIB_HELPER_PRE:
3849                         if (nla_put_string(skb, T_helper, sib->helper_name))
3850                                 goto nla_put_failure;
3851                         break;
3852                 }
3853         }
3854         nla_nest_end(skb, nla);
3855
3856         if (0)
3857 nla_put_failure:
3858                 err = -EMSGSIZE;
3859         if (got_ldev)
3860                 put_ldev(device);
3861         return err;
3862 }
3863
3864 int drbd_adm_get_status(struct sk_buff *skb, struct genl_info *info)
3865 {
3866         struct drbd_config_context adm_ctx;
3867         enum drbd_ret_code retcode;
3868         int err;
3869
3870         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
3871         if (!adm_ctx.reply_skb)
3872                 return retcode;
3873         if (retcode != NO_ERROR)
3874                 goto out;
3875
3876         err = nla_put_status_info(adm_ctx.reply_skb, adm_ctx.device, NULL);
3877         if (err) {
3878                 nlmsg_free(adm_ctx.reply_skb);
3879                 return err;
3880         }
3881 out:
3882         drbd_adm_finish(&adm_ctx, info, retcode);
3883         return 0;
3884 }
3885
3886 static int get_one_status(struct sk_buff *skb, struct netlink_callback *cb)
3887 {
3888         struct drbd_device *device;
3889         struct drbd_genlmsghdr *dh;
3890         struct drbd_resource *pos = (struct drbd_resource *)cb->args[0];
3891         struct drbd_resource *resource = NULL;
3892         struct drbd_resource *tmp;
3893         unsigned volume = cb->args[1];
3894
3895         /* Open coded, deferred, iteration:
3896          * for_each_resource_safe(resource, tmp, &drbd_resources) {
3897          *      connection = "first connection of resource or undefined";
3898          *      idr_for_each_entry(&resource->devices, device, i) {
3899          *        ...
3900          *      }
3901          * }
3902          * where resource is cb->args[0];
3903          * and i is cb->args[1];
3904          *
3905          * cb->args[2] indicates if we shall loop over all resources,
3906          * or just dump all volumes of a single resource.
3907          *
3908          * This may miss entries inserted after this dump started,
3909          * or entries deleted before they are reached.
3910          *
3911          * We need to make sure the device won't disappear while
3912          * we are looking at it, and revalidate our iterators
3913          * on each iteration.
3914          */
3915
3916         /* synchronize with conn_create()/drbd_destroy_connection() */
3917         rcu_read_lock();
3918         /* revalidate iterator position */
3919         for_each_resource_rcu(tmp, &drbd_resources) {
3920                 if (pos == NULL) {
3921                         /* first iteration */
3922                         pos = tmp;
3923                         resource = pos;
3924                         break;
3925                 }
3926                 if (tmp == pos) {
3927                         resource = pos;
3928                         break;
3929                 }
3930         }
3931         if (resource) {
3932 next_resource:
3933                 device = idr_get_next(&resource->devices, &volume);
3934                 if (!device) {
3935                         /* No more volumes to dump on this resource.
3936                          * Advance resource iterator. */
3937                         pos = list_entry_rcu(resource->resources.next,
3938                                              struct drbd_resource, resources);
3939                         /* Did we dump any volume of this resource yet? */
3940                         if (volume != 0) {
3941                                 /* If we reached the end of the list,
3942                                  * or only a single resource dump was requested,
3943                                  * we are done. */
3944                                 if (&pos->resources == &drbd_resources || cb->args[2])
3945                                         goto out;
3946                                 volume = 0;
3947                                 resource = pos;
3948                                 goto next_resource;
3949                         }
3950                 }
3951
3952                 dh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
3953                                 cb->nlh->nlmsg_seq, &drbd_genl_family,
3954                                 NLM_F_MULTI, DRBD_ADM_GET_STATUS);
3955                 if (!dh)
3956                         goto out;
3957
3958                 if (!device) {
3959                         /* This is a connection without a single volume.
3960                          * Suprisingly enough, it may have a network
3961                          * configuration. */
3962                         struct drbd_connection *connection;
3963
3964                         dh->minor = -1U;
3965                         dh->ret_code = NO_ERROR;
3966                         connection = the_only_connection(resource);
3967                         if (nla_put_drbd_cfg_context(skb, resource, connection, NULL))
3968                                 goto cancel;
3969                         if (connection) {
3970                                 struct net_conf *nc;
3971
3972                                 nc = rcu_dereference(connection->net_conf);
3973                                 if (nc && net_conf_to_skb(skb, nc, 1) != 0)
3974                                         goto cancel;
3975                         }
3976                         goto done;
3977                 }
3978
3979                 D_ASSERT(device, device->vnr == volume);
3980                 D_ASSERT(device, device->resource == resource);
3981
3982                 dh->minor = device_to_minor(device);
3983                 dh->ret_code = NO_ERROR;
3984
3985                 if (nla_put_status_info(skb, device, NULL)) {
3986 cancel:
3987                         genlmsg_cancel(skb, dh);
3988                         goto out;
3989                 }
3990 done:
3991                 genlmsg_end(skb, dh);
3992         }
3993
3994 out:
3995         rcu_read_unlock();
3996         /* where to start the next iteration */
3997         cb->args[0] = (long)pos;
3998         cb->args[1] = (pos == resource) ? volume + 1 : 0;
3999
4000         /* No more resources/volumes/minors found results in an empty skb.
4001          * Which will terminate the dump. */
4002         return skb->len;
4003 }
4004
4005 /*
4006  * Request status of all resources, or of all volumes within a single resource.
4007  *
4008  * This is a dump, as the answer may not fit in a single reply skb otherwise.
4009  * Which means we cannot use the family->attrbuf or other such members, because
4010  * dump is NOT protected by the genl_lock().  During dump, we only have access
4011  * to the incoming skb, and need to opencode "parsing" of the nlattr payload.
4012  *
4013  * Once things are setup properly, we call into get_one_status().
4014  */
4015 int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb)
4016 {
4017         const unsigned hdrlen = GENL_HDRLEN + GENL_MAGIC_FAMILY_HDRSZ;
4018         struct nlattr *nla;
4019         const char *resource_name;
4020         struct drbd_resource *resource;
4021         int maxtype;
4022
4023         /* Is this a followup call? */
4024         if (cb->args[0]) {
4025                 /* ... of a single resource dump,
4026                  * and the resource iterator has been advanced already? */
4027                 if (cb->args[2] && cb->args[2] != cb->args[0])
4028                         return 0; /* DONE. */
4029                 goto dump;
4030         }
4031
4032         /* First call (from netlink_dump_start).  We need to figure out
4033          * which resource(s) the user wants us to dump. */
4034         nla = nla_find(nlmsg_attrdata(cb->nlh, hdrlen),
4035                         nlmsg_attrlen(cb->nlh, hdrlen),
4036                         DRBD_NLA_CFG_CONTEXT);
4037
4038         /* No explicit context given.  Dump all. */
4039         if (!nla)
4040                 goto dump;
4041         maxtype = ARRAY_SIZE(drbd_cfg_context_nl_policy) - 1;
4042         nla = drbd_nla_find_nested(maxtype, nla, __nla_type(T_ctx_resource_name));
4043         if (IS_ERR(nla))
4044                 return PTR_ERR(nla);
4045         /* context given, but no name present? */
4046         if (!nla)
4047                 return -EINVAL;
4048         resource_name = nla_data(nla);
4049         if (!*resource_name)
4050                 return -ENODEV;
4051         resource = drbd_find_resource(resource_name);
4052         if (!resource)
4053                 return -ENODEV;
4054
4055         kref_put(&resource->kref, drbd_destroy_resource); /* get_one_status() revalidates the resource */
4056
4057         /* prime iterators, and set "filter" mode mark:
4058          * only dump this connection. */
4059         cb->args[0] = (long)resource;
4060         /* cb->args[1] = 0; passed in this way. */
4061         cb->args[2] = (long)resource;
4062
4063 dump:
4064         return get_one_status(skb, cb);
4065 }
4066
4067 int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info)
4068 {
4069         struct drbd_config_context adm_ctx;
4070         enum drbd_ret_code retcode;
4071         struct timeout_parms tp;
4072         int err;
4073
4074         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
4075         if (!adm_ctx.reply_skb)
4076                 return retcode;
4077         if (retcode != NO_ERROR)
4078                 goto out;
4079
4080         tp.timeout_type =
4081                 adm_ctx.device->state.pdsk == D_OUTDATED ? UT_PEER_OUTDATED :
4082                 test_bit(USE_DEGR_WFC_T, &adm_ctx.device->flags) ? UT_DEGRADED :
4083                 UT_DEFAULT;
4084
4085         err = timeout_parms_to_priv_skb(adm_ctx.reply_skb, &tp);
4086         if (err) {
4087                 nlmsg_free(adm_ctx.reply_skb);
4088                 return err;
4089         }
4090 out:
4091         drbd_adm_finish(&adm_ctx, info, retcode);
4092         return 0;
4093 }
4094
4095 int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info)
4096 {
4097         struct drbd_config_context adm_ctx;
4098         struct drbd_device *device;
4099         enum drbd_ret_code retcode;
4100         struct start_ov_parms parms;
4101
4102         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
4103         if (!adm_ctx.reply_skb)
4104                 return retcode;
4105         if (retcode != NO_ERROR)
4106                 goto out;
4107
4108         device = adm_ctx.device;
4109
4110         /* resume from last known position, if possible */
4111         parms.ov_start_sector = device->ov_start_sector;
4112         parms.ov_stop_sector = ULLONG_MAX;
4113         if (info->attrs[DRBD_NLA_START_OV_PARMS]) {
4114                 int err = start_ov_parms_from_attrs(&parms, info);
4115                 if (err) {
4116                         retcode = ERR_MANDATORY_TAG;
4117                         drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
4118                         goto out;
4119                 }
4120         }
4121         mutex_lock(&adm_ctx.resource->adm_mutex);
4122
4123         /* w_make_ov_request expects position to be aligned */
4124         device->ov_start_sector = parms.ov_start_sector & ~(BM_SECT_PER_BIT-1);
4125         device->ov_stop_sector = parms.ov_stop_sector;
4126
4127         /* If there is still bitmap IO pending, e.g. previous resync or verify
4128          * just being finished, wait for it before requesting a new resync. */
4129         drbd_suspend_io(device);
4130         wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags));
4131         retcode = drbd_request_state(device, NS(conn, C_VERIFY_S));
4132         drbd_resume_io(device);
4133
4134         mutex_unlock(&adm_ctx.resource->adm_mutex);
4135 out:
4136         drbd_adm_finish(&adm_ctx, info, retcode);
4137         return 0;
4138 }
4139
4140
4141 int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info)
4142 {
4143         struct drbd_config_context adm_ctx;
4144         struct drbd_device *device;
4145         enum drbd_ret_code retcode;
4146         int skip_initial_sync = 0;
4147         int err;
4148         struct new_c_uuid_parms args;
4149
4150         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
4151         if (!adm_ctx.reply_skb)
4152                 return retcode;
4153         if (retcode != NO_ERROR)
4154                 goto out_nolock;
4155
4156         device = adm_ctx.device;
4157         memset(&args, 0, sizeof(args));
4158         if (info->attrs[DRBD_NLA_NEW_C_UUID_PARMS]) {
4159                 err = new_c_uuid_parms_from_attrs(&args, info);
4160                 if (err) {
4161                         retcode = ERR_MANDATORY_TAG;
4162                         drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
4163                         goto out_nolock;
4164                 }
4165         }
4166
4167         mutex_lock(&adm_ctx.resource->adm_mutex);
4168         mutex_lock(device->state_mutex); /* Protects us against serialized state changes. */
4169
4170         if (!get_ldev(device)) {
4171                 retcode = ERR_NO_DISK;
4172                 goto out;
4173         }
4174
4175         /* this is "skip initial sync", assume to be clean */
4176         if (device->state.conn == C_CONNECTED &&
4177             first_peer_device(device)->connection->agreed_pro_version >= 90 &&
4178             device->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED && args.clear_bm) {
4179                 drbd_info(device, "Preparing to skip initial sync\n");
4180                 skip_initial_sync = 1;
4181         } else if (device->state.conn != C_STANDALONE) {
4182                 retcode = ERR_CONNECTED;
4183                 goto out_dec;
4184         }
4185
4186         drbd_uuid_set(device, UI_BITMAP, 0); /* Rotate UI_BITMAP to History 1, etc... */
4187         drbd_uuid_new_current(device); /* New current, previous to UI_BITMAP */
4188
4189         if (args.clear_bm) {
4190                 err = drbd_bitmap_io(device, &drbd_bmio_clear_n_write,
4191                         "clear_n_write from new_c_uuid", BM_LOCKED_MASK);
4192                 if (err) {
4193                         drbd_err(device, "Writing bitmap failed with %d\n", err);
4194                         retcode = ERR_IO_MD_DISK;
4195                 }
4196                 if (skip_initial_sync) {
4197                         drbd_send_uuids_skip_initial_sync(first_peer_device(device));
4198                         _drbd_uuid_set(device, UI_BITMAP, 0);
4199                         drbd_print_uuids(device, "cleared bitmap UUID");
4200                         spin_lock_irq(&device->resource->req_lock);
4201                         _drbd_set_state(_NS2(device, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
4202                                         CS_VERBOSE, NULL);
4203                         spin_unlock_irq(&device->resource->req_lock);
4204                 }
4205         }
4206
4207         drbd_md_sync(device);
4208 out_dec:
4209         put_ldev(device);
4210 out:
4211         mutex_unlock(device->state_mutex);
4212         mutex_unlock(&adm_ctx.resource->adm_mutex);
4213 out_nolock:
4214         drbd_adm_finish(&adm_ctx, info, retcode);
4215         return 0;
4216 }
4217
4218 static enum drbd_ret_code
4219 drbd_check_resource_name(struct drbd_config_context *adm_ctx)
4220 {
4221         const char *name = adm_ctx->resource_name;
4222         if (!name || !name[0]) {
4223                 drbd_msg_put_info(adm_ctx->reply_skb, "resource name missing");
4224                 return ERR_MANDATORY_TAG;
4225         }
4226         /* if we want to use these in sysfs/configfs/debugfs some day,
4227          * we must not allow slashes */
4228         if (strchr(name, '/')) {
4229                 drbd_msg_put_info(adm_ctx->reply_skb, "invalid resource name");
4230                 return ERR_INVALID_REQUEST;
4231         }
4232         return NO_ERROR;
4233 }
4234
4235 static void resource_to_info(struct resource_info *info,
4236                              struct drbd_resource *resource)
4237 {
4238         info->res_role = conn_highest_role(first_connection(resource));
4239         info->res_susp = resource->susp;
4240         info->res_susp_nod = resource->susp_nod;
4241         info->res_susp_fen = resource->susp_fen;
4242 }
4243
4244 int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info)
4245 {
4246         struct drbd_connection *connection;
4247         struct drbd_config_context adm_ctx;
4248         enum drbd_ret_code retcode;
4249         struct res_opts res_opts;
4250         int err;
4251
4252         retcode = drbd_adm_prepare(&adm_ctx, skb, info, 0);
4253         if (!adm_ctx.reply_skb)
4254                 return retcode;
4255         if (retcode != NO_ERROR)
4256                 goto out;
4257
4258         set_res_opts_defaults(&res_opts);
4259         err = res_opts_from_attrs(&res_opts, info);
4260         if (err && err != -ENOMSG) {
4261                 retcode = ERR_MANDATORY_TAG;
4262                 drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
4263                 goto out;
4264         }
4265
4266         retcode = drbd_check_resource_name(&adm_ctx);
4267         if (retcode != NO_ERROR)
4268                 goto out;
4269
4270         if (adm_ctx.resource) {
4271                 if (info->nlhdr->nlmsg_flags & NLM_F_EXCL) {
4272                         retcode = ERR_INVALID_REQUEST;
4273                         drbd_msg_put_info(adm_ctx.reply_skb, "resource exists");
4274                 }
4275                 /* else: still NO_ERROR */
4276                 goto out;
4277         }
4278
4279         /* not yet safe for genl_family.parallel_ops */
4280         mutex_lock(&resources_mutex);
4281         connection = conn_create(adm_ctx.resource_name, &res_opts);
4282         mutex_unlock(&resources_mutex);
4283
4284         if (connection) {
4285                 struct resource_info resource_info;
4286
4287                 mutex_lock(&notification_mutex);
4288                 resource_to_info(&resource_info, connection->resource);
4289                 notify_resource_state(NULL, 0, connection->resource,
4290                                       &resource_info, NOTIFY_CREATE);
4291                 mutex_unlock(&notification_mutex);
4292         } else
4293                 retcode = ERR_NOMEM;
4294
4295 out:
4296         drbd_adm_finish(&adm_ctx, info, retcode);
4297         return 0;
4298 }
4299
4300 static void device_to_info(struct device_info *info,
4301                            struct drbd_device *device)
4302 {
4303         info->dev_disk_state = device->state.disk;
4304 }
4305
4306
4307 int drbd_adm_new_minor(struct sk_buff *skb, struct genl_info *info)
4308 {
4309         struct drbd_config_context adm_ctx;
4310         struct drbd_genlmsghdr *dh = info->userhdr;
4311         enum drbd_ret_code retcode;
4312
4313         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
4314         if (!adm_ctx.reply_skb)
4315                 return retcode;
4316         if (retcode != NO_ERROR)
4317                 goto out;
4318
4319         if (dh->minor > MINORMASK) {
4320                 drbd_msg_put_info(adm_ctx.reply_skb, "requested minor out of range");
4321                 retcode = ERR_INVALID_REQUEST;
4322                 goto out;
4323         }
4324         if (adm_ctx.volume > DRBD_VOLUME_MAX) {
4325                 drbd_msg_put_info(adm_ctx.reply_skb, "requested volume id out of range");
4326                 retcode = ERR_INVALID_REQUEST;
4327                 goto out;
4328         }
4329
4330         /* drbd_adm_prepare made sure already
4331          * that first_peer_device(device)->connection and device->vnr match the request. */
4332         if (adm_ctx.device) {
4333                 if (info->nlhdr->nlmsg_flags & NLM_F_EXCL)
4334                         retcode = ERR_MINOR_OR_VOLUME_EXISTS;
4335                 /* else: still NO_ERROR */
4336                 goto out;
4337         }
4338
4339         mutex_lock(&adm_ctx.resource->adm_mutex);
4340         retcode = drbd_create_device(&adm_ctx, dh->minor);
4341         if (retcode == NO_ERROR) {
4342                 struct drbd_device *device;
4343                 struct drbd_peer_device *peer_device;
4344                 struct device_info info;
4345                 unsigned int peer_devices = 0;
4346                 enum drbd_notification_type flags;
4347
4348                 device = minor_to_device(dh->minor);
4349                 for_each_peer_device(peer_device, device) {
4350                         if (!has_net_conf(peer_device->connection))
4351                                 continue;
4352                         peer_devices++;
4353                 }
4354
4355                 device_to_info(&info, device);
4356                 mutex_lock(&notification_mutex);
4357                 flags = (peer_devices--) ? NOTIFY_CONTINUES : 0;
4358                 notify_device_state(NULL, 0, device, &info, NOTIFY_CREATE | flags);
4359                 for_each_peer_device(peer_device, device) {
4360                         struct peer_device_info peer_device_info;
4361
4362                         if (!has_net_conf(peer_device->connection))
4363                                 continue;
4364                         peer_device_to_info(&peer_device_info, peer_device);
4365                         flags = (peer_devices--) ? NOTIFY_CONTINUES : 0;
4366                         notify_peer_device_state(NULL, 0, peer_device, &peer_device_info,
4367                                                  NOTIFY_CREATE | flags);
4368                 }
4369                 mutex_unlock(&notification_mutex);
4370         }
4371         mutex_unlock(&adm_ctx.resource->adm_mutex);
4372 out:
4373         drbd_adm_finish(&adm_ctx, info, retcode);
4374         return 0;
4375 }
4376
4377 static enum drbd_ret_code adm_del_minor(struct drbd_device *device)
4378 {
4379         struct drbd_peer_device *peer_device;
4380
4381         if (device->state.disk == D_DISKLESS &&
4382             /* no need to be device->state.conn == C_STANDALONE &&
4383              * we may want to delete a minor from a live replication group.
4384              */
4385             device->state.role == R_SECONDARY) {
4386                 struct drbd_connection *connection =
4387                         first_connection(device->resource);
4388
4389                 _drbd_request_state(device, NS(conn, C_WF_REPORT_PARAMS),
4390                                     CS_VERBOSE + CS_WAIT_COMPLETE);
4391
4392                 /* If the state engine hasn't stopped the sender thread yet, we
4393                  * need to flush the sender work queue before generating the
4394                  * DESTROY events here. */
4395                 if (get_t_state(&connection->worker) == RUNNING)
4396                         drbd_flush_workqueue(&connection->sender_work);
4397
4398                 mutex_lock(&notification_mutex);
4399                 for_each_peer_device(peer_device, device) {
4400                         if (!has_net_conf(peer_device->connection))
4401                                 continue;
4402                         notify_peer_device_state(NULL, 0, peer_device, NULL,
4403                                                  NOTIFY_DESTROY | NOTIFY_CONTINUES);
4404                 }
4405                 notify_device_state(NULL, 0, device, NULL, NOTIFY_DESTROY);
4406                 mutex_unlock(&notification_mutex);
4407
4408                 drbd_delete_device(device);
4409                 return NO_ERROR;
4410         } else
4411                 return ERR_MINOR_CONFIGURED;
4412 }
4413
4414 int drbd_adm_del_minor(struct sk_buff *skb, struct genl_info *info)
4415 {
4416         struct drbd_config_context adm_ctx;
4417         enum drbd_ret_code retcode;
4418
4419         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
4420         if (!adm_ctx.reply_skb)
4421                 return retcode;
4422         if (retcode != NO_ERROR)
4423                 goto out;
4424
4425         mutex_lock(&adm_ctx.resource->adm_mutex);
4426         retcode = adm_del_minor(adm_ctx.device);
4427         mutex_unlock(&adm_ctx.resource->adm_mutex);
4428 out:
4429         drbd_adm_finish(&adm_ctx, info, retcode);
4430         return 0;
4431 }
4432
4433 static int adm_del_resource(struct drbd_resource *resource)
4434 {
4435         struct drbd_connection *connection;
4436
4437         for_each_connection(connection, resource) {
4438                 if (connection->cstate > C_STANDALONE)
4439                         return ERR_NET_CONFIGURED;
4440         }
4441         if (!idr_is_empty(&resource->devices))
4442                 return ERR_RES_IN_USE;
4443
4444         /* The state engine has stopped the sender thread, so we don't
4445          * need to flush the sender work queue before generating the
4446          * DESTROY event here. */
4447         mutex_lock(&notification_mutex);
4448         notify_resource_state(NULL, 0, resource, NULL, NOTIFY_DESTROY);
4449         mutex_unlock(&notification_mutex);
4450
4451         mutex_lock(&resources_mutex);
4452         list_del_rcu(&resource->resources);
4453         mutex_unlock(&resources_mutex);
4454         /* Make sure all threads have actually stopped: state handling only
4455          * does drbd_thread_stop_nowait(). */
4456         list_for_each_entry(connection, &resource->connections, connections)
4457                 drbd_thread_stop(&connection->worker);
4458         synchronize_rcu();
4459         drbd_free_resource(resource);
4460         return NO_ERROR;
4461 }
4462
4463 int drbd_adm_down(struct sk_buff *skb, struct genl_info *info)
4464 {
4465         struct drbd_config_context adm_ctx;
4466         struct drbd_resource *resource;
4467         struct drbd_connection *connection;
4468         struct drbd_device *device;
4469         int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
4470         unsigned i;
4471
4472         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
4473         if (!adm_ctx.reply_skb)
4474                 return retcode;
4475         if (retcode != NO_ERROR)
4476                 goto finish;
4477
4478         resource = adm_ctx.resource;
4479         mutex_lock(&resource->adm_mutex);
4480         /* demote */
4481         for_each_connection(connection, resource) {
4482                 struct drbd_peer_device *peer_device;
4483
4484                 idr_for_each_entry(&connection->peer_devices, peer_device, i) {
4485                         retcode = drbd_set_role(peer_device->device, R_SECONDARY, 0);
4486                         if (retcode < SS_SUCCESS) {
4487                                 drbd_msg_put_info(adm_ctx.reply_skb, "failed to demote");
4488                                 goto out;
4489                         }
4490                 }
4491
4492                 retcode = conn_try_disconnect(connection, 0);
4493                 if (retcode < SS_SUCCESS) {
4494                         drbd_msg_put_info(adm_ctx.reply_skb, "failed to disconnect");
4495                         goto out;
4496                 }
4497         }
4498
4499         /* detach */
4500         idr_for_each_entry(&resource->devices, device, i) {
4501                 retcode = adm_detach(device, 0);
4502                 if (retcode < SS_SUCCESS || retcode > NO_ERROR) {
4503                         drbd_msg_put_info(adm_ctx.reply_skb, "failed to detach");
4504                         goto out;
4505                 }
4506         }
4507
4508         /* delete volumes */
4509         idr_for_each_entry(&resource->devices, device, i) {
4510                 retcode = adm_del_minor(device);
4511                 if (retcode != NO_ERROR) {
4512                         /* "can not happen" */
4513                         drbd_msg_put_info(adm_ctx.reply_skb, "failed to delete volume");
4514                         goto out;
4515                 }
4516         }
4517
4518         retcode = adm_del_resource(resource);
4519 out:
4520         mutex_unlock(&resource->adm_mutex);
4521 finish:
4522         drbd_adm_finish(&adm_ctx, info, retcode);
4523         return 0;
4524 }
4525
4526 int drbd_adm_del_resource(struct sk_buff *skb, struct genl_info *info)
4527 {
4528         struct drbd_config_context adm_ctx;
4529         struct drbd_resource *resource;
4530         enum drbd_ret_code retcode;
4531
4532         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
4533         if (!adm_ctx.reply_skb)
4534                 return retcode;
4535         if (retcode != NO_ERROR)
4536                 goto finish;
4537         resource = adm_ctx.resource;
4538
4539         mutex_lock(&resource->adm_mutex);
4540         retcode = adm_del_resource(resource);
4541         mutex_unlock(&resource->adm_mutex);
4542 finish:
4543         drbd_adm_finish(&adm_ctx, info, retcode);
4544         return 0;
4545 }
4546
4547 void drbd_bcast_event(struct drbd_device *device, const struct sib_info *sib)
4548 {
4549         struct sk_buff *msg;
4550         struct drbd_genlmsghdr *d_out;
4551         unsigned seq;
4552         int err = -ENOMEM;
4553
4554         seq = atomic_inc_return(&drbd_genl_seq);
4555         msg = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO);
4556         if (!msg)
4557                 goto failed;
4558
4559         err = -EMSGSIZE;
4560         d_out = genlmsg_put(msg, 0, seq, &drbd_genl_family, 0, DRBD_EVENT);
4561         if (!d_out) /* cannot happen, but anyways. */
4562                 goto nla_put_failure;
4563         d_out->minor = device_to_minor(device);
4564         d_out->ret_code = NO_ERROR;
4565
4566         if (nla_put_status_info(msg, device, sib))
4567                 goto nla_put_failure;
4568         genlmsg_end(msg, d_out);
4569         err = drbd_genl_multicast_events(msg, GFP_NOWAIT);
4570         /* msg has been consumed or freed in netlink_broadcast() */
4571         if (err && err != -ESRCH)
4572                 goto failed;
4573
4574         return;
4575
4576 nla_put_failure:
4577         nlmsg_free(msg);
4578 failed:
4579         drbd_err(device, "Error %d while broadcasting event. "
4580                         "Event seq:%u sib_reason:%u\n",
4581                         err, seq, sib->sib_reason);
4582 }
4583
4584 static int nla_put_notification_header(struct sk_buff *msg,
4585                                        enum drbd_notification_type type)
4586 {
4587         struct drbd_notification_header nh = {
4588                 .nh_type = type,
4589         };
4590
4591         return drbd_notification_header_to_skb(msg, &nh, true);
4592 }
4593
4594 void notify_resource_state(struct sk_buff *skb,
4595                            unsigned int seq,
4596                            struct drbd_resource *resource,
4597                            struct resource_info *resource_info,
4598                            enum drbd_notification_type type)
4599 {
4600         struct resource_statistics resource_statistics;
4601         struct drbd_genlmsghdr *dh;
4602         bool multicast = false;
4603         int err;
4604
4605         if (!skb) {
4606                 seq = atomic_inc_return(&notify_genl_seq);
4607                 skb = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO);
4608                 err = -ENOMEM;
4609                 if (!skb)
4610                         goto failed;
4611                 multicast = true;
4612         }
4613
4614         err = -EMSGSIZE;
4615         dh = genlmsg_put(skb, 0, seq, &drbd_genl_family, 0, DRBD_RESOURCE_STATE);
4616         if (!dh)
4617                 goto nla_put_failure;
4618         dh->minor = -1U;
4619         dh->ret_code = NO_ERROR;
4620         if (nla_put_drbd_cfg_context(skb, resource, NULL, NULL) ||
4621             nla_put_notification_header(skb, type) ||
4622             ((type & ~NOTIFY_FLAGS) != NOTIFY_DESTROY &&
4623              resource_info_to_skb(skb, resource_info, true)))
4624                 goto nla_put_failure;
4625         resource_statistics.res_stat_write_ordering = resource->write_ordering;
4626         err = resource_statistics_to_skb(skb, &resource_statistics, !capable(CAP_SYS_ADMIN));
4627         if (err)
4628                 goto nla_put_failure;
4629         genlmsg_end(skb, dh);
4630         if (multicast) {
4631                 err = drbd_genl_multicast_events(skb, GFP_NOWAIT);
4632                 /* skb has been consumed or freed in netlink_broadcast() */
4633                 if (err && err != -ESRCH)
4634                         goto failed;
4635         }
4636         return;
4637
4638 nla_put_failure:
4639         nlmsg_free(skb);
4640 failed:
4641         drbd_err(resource, "Error %d while broadcasting event. Event seq:%u\n",
4642                         err, seq);
4643 }
4644
4645 void notify_device_state(struct sk_buff *skb,
4646                          unsigned int seq,
4647                          struct drbd_device *device,
4648                          struct device_info *device_info,
4649                          enum drbd_notification_type type)
4650 {
4651         struct device_statistics device_statistics;
4652         struct drbd_genlmsghdr *dh;
4653         bool multicast = false;
4654         int err;
4655
4656         if (!skb) {
4657                 seq = atomic_inc_return(&notify_genl_seq);
4658                 skb = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO);
4659                 err = -ENOMEM;
4660                 if (!skb)
4661                         goto failed;
4662                 multicast = true;
4663         }
4664
4665         err = -EMSGSIZE;
4666         dh = genlmsg_put(skb, 0, seq, &drbd_genl_family, 0, DRBD_DEVICE_STATE);
4667         if (!dh)
4668                 goto nla_put_failure;
4669         dh->minor = device->minor;
4670         dh->ret_code = NO_ERROR;
4671         if (nla_put_drbd_cfg_context(skb, device->resource, NULL, device) ||
4672             nla_put_notification_header(skb, type) ||
4673             ((type & ~NOTIFY_FLAGS) != NOTIFY_DESTROY &&
4674              device_info_to_skb(skb, device_info, true)))
4675                 goto nla_put_failure;
4676         device_to_statistics(&device_statistics, device);
4677         device_statistics_to_skb(skb, &device_statistics, !capable(CAP_SYS_ADMIN));
4678         genlmsg_end(skb, dh);
4679         if (multicast) {
4680                 err = drbd_genl_multicast_events(skb, GFP_NOWAIT);
4681                 /* skb has been consumed or freed in netlink_broadcast() */
4682                 if (err && err != -ESRCH)
4683                         goto failed;
4684         }
4685         return;
4686
4687 nla_put_failure:
4688         nlmsg_free(skb);
4689 failed:
4690         drbd_err(device, "Error %d while broadcasting event. Event seq:%u\n",
4691                  err, seq);
4692 }
4693
4694 void notify_connection_state(struct sk_buff *skb,
4695                              unsigned int seq,
4696                              struct drbd_connection *connection,
4697                              struct connection_info *connection_info,
4698                              enum drbd_notification_type type)
4699 {
4700         struct connection_statistics connection_statistics;
4701         struct drbd_genlmsghdr *dh;
4702         bool multicast = false;
4703         int err;
4704
4705         if (!skb) {
4706                 seq = atomic_inc_return(&notify_genl_seq);
4707                 skb = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO);
4708                 err = -ENOMEM;
4709                 if (!skb)
4710                         goto failed;
4711                 multicast = true;
4712         }
4713
4714         err = -EMSGSIZE;
4715         dh = genlmsg_put(skb, 0, seq, &drbd_genl_family, 0, DRBD_CONNECTION_STATE);
4716         if (!dh)
4717                 goto nla_put_failure;
4718         dh->minor = -1U;
4719         dh->ret_code = NO_ERROR;
4720         if (nla_put_drbd_cfg_context(skb, connection->resource, connection, NULL) ||
4721             nla_put_notification_header(skb, type) ||
4722             ((type & ~NOTIFY_FLAGS) != NOTIFY_DESTROY &&
4723              connection_info_to_skb(skb, connection_info, true)))
4724                 goto nla_put_failure;
4725         connection_statistics.conn_congested = test_bit(NET_CONGESTED, &connection->flags);
4726         connection_statistics_to_skb(skb, &connection_statistics, !capable(CAP_SYS_ADMIN));
4727         genlmsg_end(skb, dh);
4728         if (multicast) {
4729                 err = drbd_genl_multicast_events(skb, GFP_NOWAIT);
4730                 /* skb has been consumed or freed in netlink_broadcast() */
4731                 if (err && err != -ESRCH)
4732                         goto failed;
4733         }
4734         return;
4735
4736 nla_put_failure:
4737         nlmsg_free(skb);
4738 failed:
4739         drbd_err(connection, "Error %d while broadcasting event. Event seq:%u\n",
4740                  err, seq);
4741 }
4742
4743 void notify_peer_device_state(struct sk_buff *skb,
4744                               unsigned int seq,
4745                               struct drbd_peer_device *peer_device,
4746                               struct peer_device_info *peer_device_info,
4747                               enum drbd_notification_type type)
4748 {
4749         struct peer_device_statistics peer_device_statistics;
4750         struct drbd_resource *resource = peer_device->device->resource;
4751         struct drbd_genlmsghdr *dh;
4752         bool multicast = false;
4753         int err;
4754
4755         if (!skb) {
4756                 seq = atomic_inc_return(&notify_genl_seq);
4757                 skb = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO);
4758                 err = -ENOMEM;
4759                 if (!skb)
4760                         goto failed;
4761                 multicast = true;
4762         }
4763
4764         err = -EMSGSIZE;
4765         dh = genlmsg_put(skb, 0, seq, &drbd_genl_family, 0, DRBD_PEER_DEVICE_STATE);
4766         if (!dh)
4767                 goto nla_put_failure;
4768         dh->minor = -1U;
4769         dh->ret_code = NO_ERROR;
4770         if (nla_put_drbd_cfg_context(skb, resource, peer_device->connection, peer_device->device) ||
4771             nla_put_notification_header(skb, type) ||
4772             ((type & ~NOTIFY_FLAGS) != NOTIFY_DESTROY &&
4773              peer_device_info_to_skb(skb, peer_device_info, true)))
4774                 goto nla_put_failure;
4775         peer_device_to_statistics(&peer_device_statistics, peer_device);
4776         peer_device_statistics_to_skb(skb, &peer_device_statistics, !capable(CAP_SYS_ADMIN));
4777         genlmsg_end(skb, dh);
4778         if (multicast) {
4779                 err = drbd_genl_multicast_events(skb, GFP_NOWAIT);
4780                 /* skb has been consumed or freed in netlink_broadcast() */
4781                 if (err && err != -ESRCH)
4782                         goto failed;
4783         }
4784         return;
4785
4786 nla_put_failure:
4787         nlmsg_free(skb);
4788 failed:
4789         drbd_err(peer_device, "Error %d while broadcasting event. Event seq:%u\n",
4790                  err, seq);
4791 }
4792
4793 void notify_helper(enum drbd_notification_type type,
4794                    struct drbd_device *device, struct drbd_connection *connection,
4795                    const char *name, int status)
4796 {
4797         struct drbd_resource *resource = device ? device->resource : connection->resource;
4798         struct drbd_helper_info helper_info;
4799         unsigned int seq = atomic_inc_return(&notify_genl_seq);
4800         struct sk_buff *skb = NULL;
4801         struct drbd_genlmsghdr *dh;
4802         int err;
4803
4804         strlcpy(helper_info.helper_name, name, sizeof(helper_info.helper_name));
4805         helper_info.helper_name_len = min(strlen(name), sizeof(helper_info.helper_name));
4806         helper_info.helper_status = status;
4807
4808         skb = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO);
4809         err = -ENOMEM;
4810         if (!skb)
4811                 goto fail;
4812
4813         err = -EMSGSIZE;
4814         dh = genlmsg_put(skb, 0, seq, &drbd_genl_family, 0, DRBD_HELPER);
4815         if (!dh)
4816                 goto fail;
4817         dh->minor = device ? device->minor : -1;
4818         dh->ret_code = NO_ERROR;
4819         mutex_lock(&notification_mutex);
4820         if (nla_put_drbd_cfg_context(skb, resource, connection, device) ||
4821             nla_put_notification_header(skb, type) ||
4822             drbd_helper_info_to_skb(skb, &helper_info, true))
4823                 goto unlock_fail;
4824         genlmsg_end(skb, dh);
4825         err = drbd_genl_multicast_events(skb, GFP_NOWAIT);
4826         skb = NULL;
4827         /* skb has been consumed or freed in netlink_broadcast() */
4828         if (err && err != -ESRCH)
4829                 goto unlock_fail;
4830         mutex_unlock(&notification_mutex);
4831         return;
4832
4833 unlock_fail:
4834         mutex_unlock(&notification_mutex);
4835 fail:
4836         nlmsg_free(skb);
4837         drbd_err(resource, "Error %d while broadcasting event. Event seq:%u\n",
4838                  err, seq);
4839 }
4840
4841 static void notify_initial_state_done(struct sk_buff *skb, unsigned int seq)
4842 {
4843         struct drbd_genlmsghdr *dh;
4844         int err;
4845
4846         err = -EMSGSIZE;
4847         dh = genlmsg_put(skb, 0, seq, &drbd_genl_family, 0, DRBD_INITIAL_STATE_DONE);
4848         if (!dh)
4849                 goto nla_put_failure;
4850         dh->minor = -1U;
4851         dh->ret_code = NO_ERROR;
4852         if (nla_put_notification_header(skb, NOTIFY_EXISTS))
4853                 goto nla_put_failure;
4854         genlmsg_end(skb, dh);
4855         return;
4856
4857 nla_put_failure:
4858         nlmsg_free(skb);
4859         pr_err("Error %d sending event. Event seq:%u\n", err, seq);
4860 }
4861
4862 static void free_state_changes(struct list_head *list)
4863 {
4864         while (!list_empty(list)) {
4865                 struct drbd_state_change *state_change =
4866                         list_first_entry(list, struct drbd_state_change, list);
4867                 list_del(&state_change->list);
4868                 forget_state_change(state_change);
4869         }
4870 }
4871
4872 static unsigned int notifications_for_state_change(struct drbd_state_change *state_change)
4873 {
4874         return 1 +
4875                state_change->n_connections +
4876                state_change->n_devices +
4877                state_change->n_devices * state_change->n_connections;
4878 }
4879
4880 static int get_initial_state(struct sk_buff *skb, struct netlink_callback *cb)
4881 {
4882         struct drbd_state_change *state_change = (struct drbd_state_change *)cb->args[0];
4883         unsigned int seq = cb->args[2];
4884         unsigned int n;
4885         enum drbd_notification_type flags = 0;
4886
4887         /* There is no need for taking notification_mutex here: it doesn't
4888            matter if the initial state events mix with later state chage
4889            events; we can always tell the events apart by the NOTIFY_EXISTS
4890            flag. */
4891
4892         cb->args[5]--;
4893         if (cb->args[5] == 1) {
4894                 notify_initial_state_done(skb, seq);
4895                 goto out;
4896         }
4897         n = cb->args[4]++;
4898         if (cb->args[4] < cb->args[3])
4899                 flags |= NOTIFY_CONTINUES;
4900         if (n < 1) {
4901                 notify_resource_state_change(skb, seq, state_change->resource,
4902                                              NOTIFY_EXISTS | flags);
4903                 goto next;
4904         }
4905         n--;
4906         if (n < state_change->n_connections) {
4907                 notify_connection_state_change(skb, seq, &state_change->connections[n],
4908                                                NOTIFY_EXISTS | flags);
4909                 goto next;
4910         }
4911         n -= state_change->n_connections;
4912         if (n < state_change->n_devices) {
4913                 notify_device_state_change(skb, seq, &state_change->devices[n],
4914                                            NOTIFY_EXISTS | flags);
4915                 goto next;
4916         }
4917         n -= state_change->n_devices;
4918         if (n < state_change->n_devices * state_change->n_connections) {
4919                 notify_peer_device_state_change(skb, seq, &state_change->peer_devices[n],
4920                                                 NOTIFY_EXISTS | flags);
4921                 goto next;
4922         }
4923
4924 next:
4925         if (cb->args[4] == cb->args[3]) {
4926                 struct drbd_state_change *next_state_change =
4927                         list_entry(state_change->list.next,
4928                                    struct drbd_state_change, list);
4929                 cb->args[0] = (long)next_state_change;
4930                 cb->args[3] = notifications_for_state_change(next_state_change);
4931                 cb->args[4] = 0;
4932         }
4933 out:
4934         return skb->len;
4935 }
4936
4937 int drbd_adm_get_initial_state(struct sk_buff *skb, struct netlink_callback *cb)
4938 {
4939         struct drbd_resource *resource;
4940         LIST_HEAD(head);
4941
4942         if (cb->args[5] >= 1) {
4943                 if (cb->args[5] > 1)
4944                         return get_initial_state(skb, cb);
4945                 if (cb->args[0]) {
4946                         struct drbd_state_change *state_change =
4947                                 (struct drbd_state_change *)cb->args[0];
4948
4949                         /* connect list to head */
4950                         list_add(&head, &state_change->list);
4951                         free_state_changes(&head);
4952                 }
4953                 return 0;
4954         }
4955
4956         cb->args[5] = 2;  /* number of iterations */
4957         mutex_lock(&resources_mutex);
4958         for_each_resource(resource, &drbd_resources) {
4959                 struct drbd_state_change *state_change;
4960
4961                 state_change = remember_old_state(resource, GFP_KERNEL);
4962                 if (!state_change) {
4963                         if (!list_empty(&head))
4964                                 free_state_changes(&head);
4965                         mutex_unlock(&resources_mutex);
4966                         return -ENOMEM;
4967                 }
4968                 copy_old_to_new_state_change(state_change);
4969                 list_add_tail(&state_change->list, &head);
4970                 cb->args[5] += notifications_for_state_change(state_change);
4971         }
4972         mutex_unlock(&resources_mutex);
4973
4974         if (!list_empty(&head)) {
4975                 struct drbd_state_change *state_change =
4976                         list_entry(head.next, struct drbd_state_change, list);
4977                 cb->args[0] = (long)state_change;
4978                 cb->args[3] = notifications_for_state_change(state_change);
4979                 list_del(&head);  /* detach list from head */
4980         }
4981
4982         cb->args[2] = cb->nlh->nlmsg_seq;
4983         return get_initial_state(skb, cb);
4984 }