drivers/md/dm-log-userspace-base.c

   1 /*
   2  * Copyright (C) 2006-2009 Red Hat, Inc.
   3  *
   4  * This file is released under the LGPL.
   5  */
   6
   7 #include <linux/bio.h>
   8 #include <linux/dm-dirty-log.h>
   9 #include <linux/device-mapper.h>
  10 #include <linux/dm-log-userspace.h>
  11
  12 #include "dm-log-userspace-transfer.h"
  13
  14 struct flush_entry {
  15         int type;
  16         region_t region;
  17         struct list_head list;
  18 };
  19
  20 struct log_c {
  21         struct dm_target *ti;
  22         uint32_t region_size;
  23         region_t region_count;
  24         char uuid[DM_UUID_LEN];
  25
  26         char *usr_argv_str;
  27         uint32_t usr_argc;
  28
  29         /*
  30          * in_sync_hint gets set when doing is_remote_recovering.  It
  31          * represents the first region that needs recovery.  IOW, the
  32          * first zero bit of sync_bits.  This can be useful for to limit
  33          * traffic for calls like is_remote_recovering and get_resync_work,
  34          * but be take care in its use for anything else.
  35          */
  36         uint64_t in_sync_hint;
  37
  38         spinlock_t flush_lock;
  39         struct list_head flush_list;  /* only for clear and mark requests */
  40 };
  41
  42 static mempool_t *flush_entry_pool;
  43
  44 static void *flush_entry_alloc(gfp_t gfp_mask, void *pool_data)
  45 {
  46         return kmalloc(sizeof(struct flush_entry), gfp_mask);
  47 }
  48
  49 static void flush_entry_free(void *element, void *pool_data)
  50 {
  51         kfree(element);
  52 }
  53
  54 static int userspace_do_request(struct log_c *lc, const char *uuid,
  55                                 int request_type, char *data, size_t data_size,
  56                                 char *rdata, size_t *rdata_size)
  57 {
  58         int r;
  59
  60         /*
  61          * If the server isn't there, -ESRCH is returned,
  62          * and we must keep trying until the server is
  63          * restored.
  64          */
  65 retry:
  66         r = dm_consult_userspace(uuid, request_type, data,
  67                                  data_size, rdata, rdata_size);
  68
  69         if (r != -ESRCH)
  70                 return r;
  71
  72         DMERR(" Userspace log server not found.");
  73         while (1) {
  74                 set_current_state(TASK_INTERRUPTIBLE);
  75                 schedule_timeout(2*HZ);
  76                 DMWARN("Attempting to contact userspace log server...");
  77                 r = dm_consult_userspace(uuid, DM_ULOG_CTR, lc->usr_argv_str,
  78                                          strlen(lc->usr_argv_str) + 1,
  79                                          NULL, NULL);
  80                 if (!r)
  81                         break;
  82         }
  83         DMINFO("Reconnected to userspace log server... DM_ULOG_CTR complete");
  84         r = dm_consult_userspace(uuid, DM_ULOG_RESUME, NULL,
  85                                  0, NULL, NULL);
  86         if (!r)
  87                 goto retry;
  88
  89         DMERR("Error trying to resume userspace log: %d", r);
  90
  91         return -ESRCH;
  92 }
  93
  94 static int build_constructor_string(struct dm_target *ti,
  95                                     unsigned argc, char **argv,
  96                                     char **ctr_str)
  97 {
  98         int i, str_size;
  99         char *str = NULL;
 100
 101         *ctr_str = NULL;
 102
 103         for (i = 0, str_size = 0; i < argc; i++)
 104                 str_size += strlen(argv[i]) + 1; /* +1 for space between args */
 105
 106         str_size += 20; /* Max number of chars in a printed u64 number */
 107
 108         str = kzalloc(str_size, GFP_KERNEL);
 109         if (!str) {
 110                 DMWARN("Unable to allocate memory for constructor string");
 111                 return -ENOMEM;
 112         }
 113
 114         for (i = 0, str_size = 0; i < argc; i++)
 115                 str_size += sprintf(str + str_size, "%s ", argv[i]);
 116         str_size += sprintf(str + str_size, "%llu",
 117                             (unsigned long long)ti->len);
 118
 119         *ctr_str = str;
 120         return str_size;
 121 }
 122
 123 /*
 124  * userspace_ctr
 125  *
 126  * argv contains:
 127  *      <UUID> <other args>
 128  * Where 'other args' is the userspace implementation specific log
 129  * arguments.  An example might be:
 130  *      <UUID> clustered_disk <arg count> <log dev> <region_size> [[no]sync]
 131  *
 132  * So, this module will strip off the <UUID> for identification purposes
 133  * when communicating with userspace about a log; but will pass on everything
 134  * else.
 135  */
 136 static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti,
 137                          unsigned argc, char **argv)
 138 {
 139         int r = 0;
 140         int str_size;
 141         char *ctr_str = NULL;
 142         struct log_c *lc = NULL;
 143         uint64_t rdata;
 144         size_t rdata_size = sizeof(rdata);
 145
 146         if (argc < 3) {
 147                 DMWARN("Too few arguments to userspace dirty log");
 148                 return -EINVAL;
 149         }
 150
 151         lc = kmalloc(sizeof(*lc), GFP_KERNEL);
 152         if (!lc) {
 153                 DMWARN("Unable to allocate userspace log context.");
 154                 return -ENOMEM;
 155         }
 156
 157         lc->ti = ti;
 158
 159         if (strlen(argv[0]) > (DM_UUID_LEN - 1)) {
 160                 DMWARN("UUID argument too long.");
 161                 kfree(lc);
 162                 return -EINVAL;
 163         }
 164
 165         strncpy(lc->uuid, argv[0], DM_UUID_LEN);
 166         spin_lock_init(&lc->flush_lock);
 167         INIT_LIST_HEAD(&lc->flush_list);
 168
 169         str_size = build_constructor_string(ti, argc - 1, argv + 1, &ctr_str);
 170         if (str_size < 0) {
 171                 kfree(lc);
 172                 return str_size;
 173         }
 174
 175         /* Send table string */
 176         r = dm_consult_userspace(lc->uuid, DM_ULOG_CTR,
 177                                  ctr_str, str_size, NULL, NULL);
 178
 179         if (r == -ESRCH) {
 180                 DMERR("Userspace log server not found");
 181                 goto out;
 182         }
 183
 184         /* Since the region size does not change, get it now */
 185         rdata_size = sizeof(rdata);
 186         r = dm_consult_userspace(lc->uuid, DM_ULOG_GET_REGION_SIZE,
 187                                  NULL, 0, (char *)&rdata, &rdata_size);
 188
 189         if (r) {
 190                 DMERR("Failed to get region size of dirty log");
 191                 goto out;
 192         }
 193
 194         lc->region_size = (uint32_t)rdata;
 195         lc->region_count = dm_sector_div_up(ti->len, lc->region_size);
 196
 197 out:
 198         if (r) {
 199                 kfree(lc);
 200                 kfree(ctr_str);
 201         } else {
 202                 lc->usr_argv_str = ctr_str;
 203                 lc->usr_argc = argc;
 204                 log->context = lc;
 205         }
 206
 207         return r;
 208 }
 209
 210 static void userspace_dtr(struct dm_dirty_log *log)
 211 {
 212         int r;
 213         struct log_c *lc = log->context;
 214
 215         r = dm_consult_userspace(lc->uuid, DM_ULOG_DTR,
 216                                  NULL, 0,
 217                                  NULL, NULL);
 218
 219         kfree(lc->usr_argv_str);
 220         kfree(lc);
 221
 222         return;
 223 }
 224
 225 static int userspace_presuspend(struct dm_dirty_log *log)
 226 {
 227         int r;
 228         struct log_c *lc = log->context;
 229
 230         r = dm_consult_userspace(lc->uuid, DM_ULOG_PRESUSPEND,
 231                                  NULL, 0,
 232                                  NULL, NULL);
 233
 234         return r;
 235 }
 236
 237 static int userspace_postsuspend(struct dm_dirty_log *log)
 238 {
 239         int r;
 240         struct log_c *lc = log->context;
 241
 242         r = dm_consult_userspace(lc->uuid, DM_ULOG_POSTSUSPEND,
 243                                  NULL, 0,
 244                                  NULL, NULL);
 245
 246         return r;
 247 }
 248
 249 static int userspace_resume(struct dm_dirty_log *log)
 250 {
 251         int r;
 252         struct log_c *lc = log->context;
 253
 254         lc->in_sync_hint = 0;
 255         r = dm_consult_userspace(lc->uuid, DM_ULOG_RESUME,
 256                                  NULL, 0,
 257                                  NULL, NULL);
 258
 259         return r;
 260 }
 261
 262 static uint32_t userspace_get_region_size(struct dm_dirty_log *log)
 263 {
 264         struct log_c *lc = log->context;
 265
 266         return lc->region_size;
 267 }
 268
 269 /*
 270  * userspace_is_clean
 271  *
 272  * Check whether a region is clean.  If there is any sort of
 273  * failure when consulting the server, we return not clean.
 274  *
 275  * Returns: 1 if clean, 0 otherwise
 276  */
 277 static int userspace_is_clean(struct dm_dirty_log *log, region_t region)
 278 {
 279         int r;
 280         uint64_t region64 = (uint64_t)region;
 281         int64_t is_clean;
 282         size_t rdata_size;
 283         struct log_c *lc = log->context;
 284
 285         rdata_size = sizeof(is_clean);
 286         r = userspace_do_request(lc, lc->uuid, DM_ULOG_IS_CLEAN,
 287                                  (char *)&region64, sizeof(region64),
 288                                  (char *)&is_clean, &rdata_size);
 289
 290         return (r) ? 0 : (int)is_clean;
 291 }
 292
 293 /*
 294  * userspace_in_sync
 295  *
 296  * Check if the region is in-sync.  If there is any sort
 297  * of failure when consulting the server, we assume that
 298  * the region is not in sync.
 299  *
 300  * If 'can_block' is set, return immediately
 301  *
 302  * Returns: 1 if in-sync, 0 if not-in-sync, -EWOULDBLOCK
 303  */
 304 static int userspace_in_sync(struct dm_dirty_log *log, region_t region,
 305                              int can_block)
 306 {
 307         int r;
 308         uint64_t region64 = region;
 309         int64_t in_sync;
 310         size_t rdata_size;
 311         struct log_c *lc = log->context;
 312
 313         /*
 314          * We can never respond directly - even if in_sync_hint is
 315          * set.  This is because another machine could see a device
 316          * failure and mark the region out-of-sync.  If we don't go
 317          * to userspace to ask, we might think the region is in-sync
 318          * and allow a read to pick up data that is stale.  (This is
 319          * very unlikely if a device actually fails; but it is very
 320          * likely if a connection to one device from one machine fails.)
 321          *
 322          * There still might be a problem if the mirror caches the region
 323          * state as in-sync... but then this call would not be made.  So,
 324          * that is a mirror problem.
 325          */
 326         if (!can_block)
 327                 return -EWOULDBLOCK;
 328
 329         rdata_size = sizeof(in_sync);
 330         r = userspace_do_request(lc, lc->uuid, DM_ULOG_IN_SYNC,
 331                                  (char *)&region64, sizeof(region64),
 332                                  (char *)&in_sync, &rdata_size);
 333         return (r) ? 0 : (int)in_sync;
 334 }
 335
 336 /*
 337  * userspace_flush
 338  *
 339  * This function is ok to block.
 340  * The flush happens in two stages.  First, it sends all
 341  * clear/mark requests that are on the list.  Then it
 342  * tells the server to commit them.  This gives the
 343  * server a chance to optimise the commit, instead of
 344  * doing it for every request.
 345  *
 346  * Additionally, we could implement another thread that
 347  * sends the requests up to the server - reducing the
 348  * load on flush.  Then the flush would have less in
 349  * the list and be responsible for the finishing commit.
 350  *
 351  * Returns: 0 on success, < 0 on failure
 352  */
 353 static int userspace_flush(struct dm_dirty_log *log)
 354 {
 355         int r = 0;
 356         unsigned long flags;
 357         struct log_c *lc = log->context;
 358         LIST_HEAD(flush_list);
 359         struct flush_entry *fe, *tmp_fe;
 360
 361         spin_lock_irqsave(&lc->flush_lock, flags);
 362         list_splice_init(&lc->flush_list, &flush_list);
 363         spin_unlock_irqrestore(&lc->flush_lock, flags);
 364
 365         if (list_empty(&flush_list))
 366                 return 0;
 367
 368         /*
 369          * FIXME: Count up requests, group request types,
 370          * allocate memory to stick all requests in and
 371          * send to server in one go.  Failing the allocation,
 372          * do it one by one.
 373          */
 374
 375         list_for_each_entry(fe, &flush_list, list) {
 376                 r = userspace_do_request(lc, lc->uuid, fe->type,
 377                                          (char *)&fe->region,
 378                                          sizeof(fe->region),
 379                                          NULL, NULL);
 380                 if (r)
 381                         goto fail;
 382         }
 383
 384         r = userspace_do_request(lc, lc->uuid, DM_ULOG_FLUSH,
 385                                  NULL, 0, NULL, NULL);
 386
 387 fail:
 388         /*
 389          * We can safely remove these entries, even if failure.
 390          * Calling code will receive an error and will know that
 391          * the log facility has failed.
 392          */
 393         list_for_each_entry_safe(fe, tmp_fe, &flush_list, list) {
 394                 list_del(&fe->list);
 395                 mempool_free(fe, flush_entry_pool);
 396         }
 397
 398         if (r)
 399                 dm_table_event(lc->ti->table);
 400
 401         return r;
 402 }
 403
 404 /*
 405  * userspace_mark_region
 406  *
 407  * This function should avoid blocking unless absolutely required.
 408  * (Memory allocation is valid for blocking.)
 409  */
 410 static void userspace_mark_region(struct dm_dirty_log *log, region_t region)
 411 {
 412         unsigned long flags;
 413         struct log_c *lc = log->context;
 414         struct flush_entry *fe;
 415
 416         /* Wait for an allocation, but _never_ fail */
 417         fe = mempool_alloc(flush_entry_pool, GFP_NOIO);
 418         BUG_ON(!fe);
 419
 420         spin_lock_irqsave(&lc->flush_lock, flags);
 421         fe->type = DM_ULOG_MARK_REGION;
 422         fe->region = region;
 423         list_add(&fe->list, &lc->flush_list);
 424         spin_unlock_irqrestore(&lc->flush_lock, flags);
 425
 426         return;
 427 }
 428
 429 /*
 430  * userspace_clear_region
 431  *
 432  * This function must not block.
 433  * So, the alloc can't block.  In the worst case, it is ok to
 434  * fail.  It would simply mean we can't clear the region.
 435  * Does nothing to current sync context, but does mean
 436  * the region will be re-sync'ed on a reload of the mirror
 437  * even though it is in-sync.
 438  */
 439 static void userspace_clear_region(struct dm_dirty_log *log, region_t region)
 440 {
 441         unsigned long flags;
 442         struct log_c *lc = log->context;
 443         struct flush_entry *fe;
 444
 445         /*
 446          * If we fail to allocate, we skip the clearing of
 447          * the region.  This doesn't hurt us in any way, except
 448          * to cause the region to be resync'ed when the
 449          * device is activated next time.
 450          */
 451         fe = mempool_alloc(flush_entry_pool, GFP_ATOMIC);
 452         if (!fe) {
 453                 DMERR("Failed to allocate memory to clear region.");
 454                 return;
 455         }
 456
 457         spin_lock_irqsave(&lc->flush_lock, flags);
 458         fe->type = DM_ULOG_CLEAR_REGION;
 459         fe->region = region;
 460         list_add(&fe->list, &lc->flush_list);
 461         spin_unlock_irqrestore(&lc->flush_lock, flags);
 462
 463         return;
 464 }
 465
 466 /*
 467  * userspace_get_resync_work
 468  *
 469  * Get a region that needs recovery.  It is valid to return
 470  * an error for this function.
 471  *
 472  * Returns: 1 if region filled, 0 if no work, <0 on error
 473  */
 474 static int userspace_get_resync_work(struct dm_dirty_log *log, region_t *region)
 475 {
 476         int r;
 477         size_t rdata_size;
 478         struct log_c *lc = log->context;
 479         struct {
 480                 int64_t i; /* 64-bit for mix arch compatibility */
 481                 region_t r;
 482         } pkg;
 483
 484         if (lc->in_sync_hint >= lc->region_count)
 485                 return 0;
 486
 487         rdata_size = sizeof(pkg);
 488         r = userspace_do_request(lc, lc->uuid, DM_ULOG_GET_RESYNC_WORK,
 489                                  NULL, 0,
 490                                  (char *)&pkg, &rdata_size);
 491
 492         *region = pkg.r;
 493         return (r) ? r : (int)pkg.i;
 494 }
 495
 496 /*
 497  * userspace_set_region_sync
 498  *
 499  * Set the sync status of a given region.  This function
 500  * must not fail.
 501  */
 502 static void userspace_set_region_sync(struct dm_dirty_log *log,
 503                                       region_t region, int in_sync)
 504 {
 505         int r;
 506         struct log_c *lc = log->context;
 507         struct {
 508                 region_t r;
 509                 int64_t i;
 510         } pkg;
 511
 512         pkg.r = region;
 513         pkg.i = (int64_t)in_sync;
 514
 515         r = userspace_do_request(lc, lc->uuid, DM_ULOG_SET_REGION_SYNC,
 516                                  (char *)&pkg, sizeof(pkg),
 517                                  NULL, NULL);
 518
 519         /*
 520          * It would be nice to be able to report failures.
 521          * However, it is easy emough to detect and resolve.
 522          */
 523         return;
 524 }
 525
 526 /*
 527  * userspace_get_sync_count
 528  *
 529  * If there is any sort of failure when consulting the server,
 530  * we assume that the sync count is zero.
 531  *
 532  * Returns: sync count on success, 0 on failure
 533  */
 534 static region_t userspace_get_sync_count(struct dm_dirty_log *log)
 535 {
 536         int r;
 537         size_t rdata_size;
 538         uint64_t sync_count;
 539         struct log_c *lc = log->context;
 540
 541         rdata_size = sizeof(sync_count);
 542         r = userspace_do_request(lc, lc->uuid, DM_ULOG_GET_SYNC_COUNT,
 543                                  NULL, 0,
 544                                  (char *)&sync_count, &rdata_size);
 545
 546         if (r)
 547                 return 0;
 548
 549         if (sync_count >= lc->region_count)
 550                 lc->in_sync_hint = lc->region_count;
 551
 552         return (region_t)sync_count;
 553 }
 554
 555 /*
 556  * userspace_status
 557  *
 558  * Returns: amount of space consumed
 559  */
 560 static int userspace_status(struct dm_dirty_log *log, status_type_t status_type,
 561                             char *result, unsigned maxlen)
 562 {
 563         int r = 0;
 564         size_t sz = (size_t)maxlen;
 565         struct log_c *lc = log->context;
 566
 567         switch (status_type) {
 568         case STATUSTYPE_INFO:
 569                 r = userspace_do_request(lc, lc->uuid, DM_ULOG_STATUS_INFO,
 570                                          NULL, 0,
 571                                          result, &sz);
 572
 573                 if (r) {
 574                         sz = 0;
 575                         DMEMIT("%s 1 COM_FAILURE", log->type->name);
 576                 }
 577                 break;
 578         case STATUSTYPE_TABLE:
 579                 sz = 0;
 580                 DMEMIT("%s %u %s %s", log->type->name, lc->usr_argc + 1,
 581                        lc->uuid, lc->usr_argv_str);
 582                 break;
 583         }
 584         return (r) ? 0 : (int)sz;
 585 }
 586
 587 /*
 588  * userspace_is_remote_recovering
 589  *
 590  * Returns: 1 if region recovering, 0 otherwise
 591  */
 592 static int userspace_is_remote_recovering(struct dm_dirty_log *log,
 593                                           region_t region)
 594 {
 595         int r;
 596         uint64_t region64 = region;
 597         struct log_c *lc = log->context;
 598         static unsigned long long limit;
 599         struct {
 600                 int64_t is_recovering;
 601                 uint64_t in_sync_hint;
 602         } pkg;
 603         size_t rdata_size = sizeof(pkg);
 604
 605         /*
 606          * Once the mirror has been reported to be in-sync,
 607          * it will never again ask for recovery work.  So,
 608          * we can safely say there is not a remote machine
 609          * recovering if the device is in-sync.  (in_sync_hint
 610          * must be reset at resume time.)
 611          */
 612         if (region < lc->in_sync_hint)
 613                 return 0;
 614         else if (jiffies < limit)
 615                 return 1;
 616
 617         limit = jiffies + (HZ / 4);
 618         r = userspace_do_request(lc, lc->uuid, DM_ULOG_IS_REMOTE_RECOVERING,
 619                                  (char *)&region64, sizeof(region64),
 620                                  (char *)&pkg, &rdata_size);
 621         if (r)
 622                 return 1;
 623
 624         lc->in_sync_hint = pkg.in_sync_hint;
 625
 626         return (int)pkg.is_recovering;
 627 }
 628
 629 static struct dm_dirty_log_type _userspace_type = {
 630         .name = "userspace",
 631         .module = THIS_MODULE,
 632         .ctr = userspace_ctr,
 633         .dtr = userspace_dtr,
 634         .presuspend = userspace_presuspend,
 635         .postsuspend = userspace_postsuspend,
 636         .resume = userspace_resume,
 637         .get_region_size = userspace_get_region_size,
 638         .is_clean = userspace_is_clean,
 639         .in_sync = userspace_in_sync,
 640         .flush = userspace_flush,
 641         .mark_region = userspace_mark_region,
 642         .clear_region = userspace_clear_region,
 643         .get_resync_work = userspace_get_resync_work,
 644         .set_region_sync = userspace_set_region_sync,
 645         .get_sync_count = userspace_get_sync_count,
 646         .status = userspace_status,
 647         .is_remote_recovering = userspace_is_remote_recovering,
 648 };
 649
 650 static int __init userspace_dirty_log_init(void)
 651 {
 652         int r = 0;
 653
 654         flush_entry_pool = mempool_create(100, flush_entry_alloc,
 655                                           flush_entry_free, NULL);
 656
 657         if (!flush_entry_pool) {
 658                 DMWARN("Unable to create flush_entry_pool:  No memory.");
 659                 return -ENOMEM;
 660         }
 661
 662         r = dm_ulog_tfr_init();
 663         if (r) {
 664                 DMWARN("Unable to initialize userspace log communications");
 665                 mempool_destroy(flush_entry_pool);
 666                 return r;
 667         }
 668
 669         r = dm_dirty_log_type_register(&_userspace_type);
 670         if (r) {
 671                 DMWARN("Couldn't register userspace dirty log type");
 672                 dm_ulog_tfr_exit();
 673                 mempool_destroy(flush_entry_pool);
 674                 return r;
 675         }
 676
 677         DMINFO("version 1.0.0 loaded");
 678         return 0;
 679 }
 680
 681 static void __exit userspace_dirty_log_exit(void)
 682 {
 683         dm_dirty_log_type_unregister(&_userspace_type);
 684         dm_ulog_tfr_exit();
 685         mempool_destroy(flush_entry_pool);
 686
 687         DMINFO("version 1.0.0 unloaded");
 688         return;
 689 }
 690
 691 module_init(userspace_dirty_log_init);
 692 module_exit(userspace_dirty_log_exit);
 693
 694 MODULE_DESCRIPTION(DM_NAME " userspace dirty log link");
 695 MODULE_AUTHOR("Jonathan Brassow <dm-devel@redhat.com>");
 696 MODULE_LICENSE("GPL");