module/spl/spl-generic.c

   1 /*****************************************************************************\
   2  *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
   3  *  Copyright (C) 2007 The Regents of the University of California.
   4  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
   5  *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
   6  *  UCRL-CODE-235197
   7  *
   8  *  This file is part of the SPL, Solaris Porting Layer.
   9  *  For details, see <http://zfsonlinux.org/>.
  10  *
  11  *  The SPL is free software; you can redistribute it and/or modify it
  12  *  under the terms of the GNU General Public License as published by the
  13  *  Free Software Foundation; either version 2 of the License, or (at your
  14  *  option) any later version.
  15  *
  16  *  The SPL is distributed in the hope that it will be useful, but WITHOUT
  17  *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  18  *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  19  *  for more details.
  20  *
  21  *  You should have received a copy of the GNU General Public License along
  22  *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
  23  *****************************************************************************
  24  *  Solaris Porting Layer (SPL) Generic Implementation.
  25 \*****************************************************************************/
  26
  27 #include <sys/sysmacros.h>
  28 #include <sys/systeminfo.h>
  29 #include <sys/vmsystm.h>
  30 #include <sys/kobj.h>
  31 #include <sys/kmem.h>
  32 #include <sys/kmem_cache.h>
  33 #include <sys/vmem.h>
  34 #include <sys/mutex.h>
  35 #include <sys/rwlock.h>
  36 #include <sys/taskq.h>
  37 #include <sys/tsd.h>
  38 #include <sys/zmod.h>
  39 #include <sys/debug.h>
  40 #include <sys/proc.h>
  41 #include <sys/kstat.h>
  42 #include <sys/file.h>
  43 #include <linux/ctype.h>
  44 #include <linux/kmod.h>
  45 #include <linux/math64_compat.h>
  46 #include <linux/proc_compat.h>
  47
  48 char spl_version[32] = "SPL v" SPL_META_VERSION "-" SPL_META_RELEASE;
  49 EXPORT_SYMBOL(spl_version);
  50
  51 unsigned long spl_hostid = 0;
  52 EXPORT_SYMBOL(spl_hostid);
  53 module_param(spl_hostid, ulong, 0644);
  54 MODULE_PARM_DESC(spl_hostid, "The system hostid.");
  55
  56 proc_t p0;
  57 EXPORT_SYMBOL(p0);
  58
  59 #if BITS_PER_LONG == 32
  60 /*
  61  * Support 64/64 => 64 division on a 32-bit platform.  While the kernel
  62  * provides a div64_u64() function for this we do not use it because the
  63  * implementation is flawed.  There are cases which return incorrect
  64  * results as late as linux-2.6.35.  Until this is fixed upstream the
  65  * spl must provide its own implementation.
  66  *
  67  * This implementation is a slightly modified version of the algorithm
  68  * proposed by the book 'Hacker's Delight'.  The original source can be
  69  * found here and is available for use without restriction.
  70  *
  71  * http://www.hackersdelight.org/HDcode/newCode/divDouble.c
  72  */
  73
  74 /*
  75  * Calculate number of leading of zeros for a 64-bit value.
  76  */
  77 static int
  78 nlz64(uint64_t x) {
  79         register int n = 0;
  80
  81         if (x == 0)
  82                 return 64;
  83
  84         if (x <= 0x00000000FFFFFFFFULL) {n = n + 32; x = x << 32;}
  85         if (x <= 0x0000FFFFFFFFFFFFULL) {n = n + 16; x = x << 16;}
  86         if (x <= 0x00FFFFFFFFFFFFFFULL) {n = n +  8; x = x <<  8;}
  87         if (x <= 0x0FFFFFFFFFFFFFFFULL) {n = n +  4; x = x <<  4;}
  88         if (x <= 0x3FFFFFFFFFFFFFFFULL) {n = n +  2; x = x <<  2;}
  89         if (x <= 0x7FFFFFFFFFFFFFFFULL) {n = n +  1;}
  90
  91         return n;
  92 }
  93
  94 /*
  95  * Newer kernels have a div_u64() function but we define our own
  96  * to simplify portibility between kernel versions.
  97  */
  98 static inline uint64_t
  99 __div_u64(uint64_t u, uint32_t v)
 100 {
 101         (void) do_div(u, v);
 102         return u;
 103 }
 104
 105 /*
 106  * Implementation of 64-bit unsigned division for 32-bit machines.
 107  *
 108  * First the procedure takes care of the case in which the divisor is a
 109  * 32-bit quantity. There are two subcases: (1) If the left half of the
 110  * dividend is less than the divisor, one execution of do_div() is all that
 111  * is required (overflow is not possible). (2) Otherwise it does two
 112  * divisions, using the grade school method.
 113  */
 114 uint64_t
 115 __udivdi3(uint64_t u, uint64_t v)
 116 {
 117         uint64_t u0, u1, v1, q0, q1, k;
 118         int n;
 119
 120         if (v >> 32 == 0) {                     // If v < 2**32:
 121                 if (u >> 32 < v) {              // If u/v cannot overflow,
 122                         return __div_u64(u, v); // just do one division.
 123                 } else {                        // If u/v would overflow:
 124                         u1 = u >> 32;           // Break u into two halves.
 125                         u0 = u & 0xFFFFFFFF;
 126                         q1 = __div_u64(u1, v);  // First quotient digit.
 127                         k  = u1 - q1 * v;       // First remainder, < v.
 128                         u0 += (k << 32);
 129                         q0 = __div_u64(u0, v);  // Seconds quotient digit.
 130                         return (q1 << 32) + q0;
 131                 }
 132         } else {                                // If v >= 2**32:
 133                 n = nlz64(v);                   // 0 <= n <= 31.
 134                 v1 = (v << n) >> 32;            // Normalize divisor, MSB is 1.
 135                 u1 = u >> 1;                    // To ensure no overflow.
 136                 q1 = __div_u64(u1, v1);         // Get quotient from
 137                 q0 = (q1 << n) >> 31;           // Undo normalization and
 138                                                 // division of u by 2.
 139                 if (q0 != 0)                    // Make q0 correct or
 140                         q0 = q0 - 1;            // too small by 1.
 141                 if ((u - q0 * v) >= v)
 142                         q0 = q0 + 1;            // Now q0 is correct.
 143
 144                 return q0;
 145         }
 146 }
 147 EXPORT_SYMBOL(__udivdi3);
 148
 149 /*
 150  * Implementation of 64-bit signed division for 32-bit machines.
 151  */
 152 int64_t
 153 __divdi3(int64_t u, int64_t v)
 154 {
 155         int64_t q, t;
 156         q = __udivdi3(abs64(u), abs64(v));
 157         t = (u ^ v) >> 63;      // If u, v have different
 158         return (q ^ t) - t;     // signs, negate q.
 159 }
 160 EXPORT_SYMBOL(__divdi3);
 161
 162 /*
 163  * Implementation of 64-bit unsigned modulo for 32-bit machines.
 164  */
 165 uint64_t
 166 __umoddi3(uint64_t dividend, uint64_t divisor)
 167 {
 168         return (dividend - (divisor * __udivdi3(dividend, divisor)));
 169 }
 170 EXPORT_SYMBOL(__umoddi3);
 171
 172 #if defined(__arm) || defined(__arm__)
 173 /*
 174  * Implementation of 64-bit (un)signed division for 32-bit arm machines.
 175  *
 176  * Run-time ABI for the ARM Architecture (page 20).  A pair of (unsigned)
 177  * long longs is returned in {{r0, r1}, {r2,r3}}, the quotient in {r0, r1},
 178  * and the remainder in {r2, r3}.  The return type is specifically left
 179  * set to 'void' to ensure the compiler does not overwrite these registers
 180  * during the return.  All results are in registers as per ABI
 181  */
 182 void
 183 __aeabi_uldivmod(uint64_t u, uint64_t v)
 184 {
 185         uint64_t res;
 186         uint64_t mod;
 187
 188         res = __udivdi3(u, v);
 189         mod = __umoddi3(u, v);
 190         {
 191                 register uint32_t r0 asm("r0") = (res & 0xFFFFFFFF);
 192                 register uint32_t r1 asm("r1") = (res >> 32);
 193                 register uint32_t r2 asm("r2") = (mod & 0xFFFFFFFF);
 194                 register uint32_t r3 asm("r3") = (mod >> 32);
 195
 196                 asm volatile(""
 197                     : "+r"(r0), "+r"(r1), "+r"(r2),"+r"(r3)  /* output */
 198                     : "r"(r0), "r"(r1), "r"(r2), "r"(r3));   /* input */
 199
 200                 return; /* r0; */
 201         }
 202 }
 203 EXPORT_SYMBOL(__aeabi_uldivmod);
 204
 205 void
 206 __aeabi_ldivmod(int64_t u, int64_t v)
 207 {
 208         int64_t res;
 209         uint64_t mod;
 210
 211         res =  __divdi3(u, v);
 212         mod = __umoddi3(u, v);
 213         {
 214                 register uint32_t r0 asm("r0") = (res & 0xFFFFFFFF);
 215                 register uint32_t r1 asm("r1") = (res >> 32);
 216                 register uint32_t r2 asm("r2") = (mod & 0xFFFFFFFF);
 217                 register uint32_t r3 asm("r3") = (mod >> 32);
 218
 219                 asm volatile(""
 220                     : "+r"(r0), "+r"(r1), "+r"(r2),"+r"(r3)  /* output */
 221                     : "r"(r0), "r"(r1), "r"(r2), "r"(r3));   /* input */
 222
 223                 return; /* r0; */
 224         }
 225 }
 226 EXPORT_SYMBOL(__aeabi_ldivmod);
 227 #endif /* __arm || __arm__ */
 228 #endif /* BITS_PER_LONG */
 229
 230 /* NOTE: The strtoxx behavior is solely based on my reading of the Solaris
 231  * ddi_strtol(9F) man page.  I have not verified the behavior of these
 232  * functions against their Solaris counterparts.  It is possible that I
 233  * may have misinterpreted the man page or the man page is incorrect.
 234  */
 235 int ddi_strtoul(const char *, char **, int, unsigned long *);
 236 int ddi_strtol(const char *, char **, int, long *);
 237 int ddi_strtoull(const char *, char **, int, unsigned long long *);
 238 int ddi_strtoll(const char *, char **, int, long long *);
 239
 240 #define define_ddi_strtoux(type, valtype)                               \
 241 int ddi_strtou##type(const char *str, char **endptr,                    \
 242                      int base, valtype *result)                         \
 243 {                                                                       \
 244         valtype last_value, value = 0;                                  \
 245         char *ptr = (char *)str;                                        \
 246         int flag = 1, digit;                                            \
 247                                                                         \
 248         if (strlen(ptr) == 0)                                           \
 249                 return EINVAL;                                          \
 250                                                                         \
 251         /* Auto-detect base based on prefix */                          \
 252         if (!base) {                                                    \
 253                 if (str[0] == '0') {                                    \
 254                         if (tolower(str[1])=='x' && isxdigit(str[2])) { \
 255                                 base = 16; /* hex */                    \
 256                                 ptr += 2;                               \
 257                         } else if (str[1] >= '0' && str[1] < 8) {       \
 258                                 base = 8; /* octal */                   \
 259                                 ptr += 1;                               \
 260                         } else {                                        \
 261                                 return EINVAL;                          \
 262                         }                                               \
 263                 } else {                                                \
 264                         base = 10; /* decimal */                        \
 265                 }                                                       \
 266         }                                                               \
 267                                                                         \
 268         while (1) {                                                     \
 269                 if (isdigit(*ptr))                                      \
 270                         digit = *ptr - '0';                             \
 271                 else if (isalpha(*ptr))                                 \
 272                         digit = tolower(*ptr) - 'a' + 10;               \
 273                 else                                                    \
 274                         break;                                          \
 275                                                                         \
 276                 if (digit >= base)                                      \
 277                         break;                                          \
 278                                                                         \
 279                 last_value = value;                                     \
 280                 value = value * base + digit;                           \
 281                 if (last_value > value) /* Overflow */                  \
 282                         return ERANGE;                                  \
 283                                                                         \
 284                 flag = 1;                                               \
 285                 ptr++;                                                  \
 286         }                                                               \
 287                                                                         \
 288         if (flag)                                                       \
 289                 *result = value;                                        \
 290                                                                         \
 291         if (endptr)                                                     \
 292                 *endptr = (char *)(flag ? ptr : str);                   \
 293                                                                         \
 294         return 0;                                                       \
 295 }                                                                       \
 296
 297 #define define_ddi_strtox(type, valtype)                                \
 298 int ddi_strto##type(const char *str, char **endptr,                     \
 299                        int base, valtype *result)                       \
 300 {                                                                       \
 301         int rc;                                                         \
 302                                                                         \
 303         if (*str == '-') {                                              \
 304                 rc = ddi_strtou##type(str + 1, endptr, base, result);   \
 305                 if (!rc) {                                              \
 306                         if (*endptr == str + 1)                         \
 307                                 *endptr = (char *)str;                  \
 308                         else                                            \
 309                                 *result = -*result;                     \
 310                 }                                                       \
 311         } else {                                                        \
 312                 rc = ddi_strtou##type(str, endptr, base, result);       \
 313         }                                                               \
 314                                                                         \
 315         return rc;                                                      \
 316 }
 317
 318 define_ddi_strtoux(l, unsigned long)
 319 define_ddi_strtox(l, long)
 320 define_ddi_strtoux(ll, unsigned long long)
 321 define_ddi_strtox(ll, long long)
 322
 323 EXPORT_SYMBOL(ddi_strtoul);
 324 EXPORT_SYMBOL(ddi_strtol);
 325 EXPORT_SYMBOL(ddi_strtoll);
 326 EXPORT_SYMBOL(ddi_strtoull);
 327
 328 int
 329 ddi_copyin(const void *from, void *to, size_t len, int flags)
 330 {
 331         /* Fake ioctl() issued by kernel, 'from' is a kernel address */
 332         if (flags & FKIOCTL) {
 333                 memcpy(to, from, len);
 334                 return 0;
 335         }
 336
 337         return copyin(from, to, len);
 338 }
 339 EXPORT_SYMBOL(ddi_copyin);
 340
 341 int
 342 ddi_copyout(const void *from, void *to, size_t len, int flags)
 343 {
 344         /* Fake ioctl() issued by kernel, 'from' is a kernel address */
 345         if (flags & FKIOCTL) {
 346                 memcpy(to, from, len);
 347                 return 0;
 348         }
 349
 350         return copyout(from, to, len);
 351 }
 352 EXPORT_SYMBOL(ddi_copyout);
 353
 354 /*
 355  * Read the unique system identifier from the /etc/hostid file.
 356  *
 357  * The behavior of /usr/bin/hostid on Linux systems with the
 358  * regular eglibc and coreutils is:
 359  *
 360  *   1. Generate the value if the /etc/hostid file does not exist
 361  *      or if the /etc/hostid file is less than four bytes in size.
 362  *
 363  *   2. If the /etc/hostid file is at least 4 bytes, then return
 364  *      the first four bytes [0..3] in native endian order.
 365  *
 366  *   3. Always ignore bytes [4..] if they exist in the file.
 367  *
 368  * Only the first four bytes are significant, even on systems that
 369  * have a 64-bit word size.
 370  *
 371  * See:
 372  *
 373  *   eglibc: sysdeps/unix/sysv/linux/gethostid.c
 374  *   coreutils: src/hostid.c
 375  *
 376  * Notes:
 377  *
 378  * The /etc/hostid file on Solaris is a text file that often reads:
 379  *
 380  *   # DO NOT EDIT
 381  *   "0123456789"
 382  *
 383  * Directly copying this file to Linux results in a constant
 384  * hostid of 4f442023 because the default comment constitutes
 385  * the first four bytes of the file.
 386  *
 387  */
 388
 389 char *spl_hostid_path = HW_HOSTID_PATH;
 390 module_param(spl_hostid_path, charp, 0444);
 391 MODULE_PARM_DESC(spl_hostid_path, "The system hostid file (/etc/hostid)");
 392
 393 static int
 394 hostid_read(void)
 395 {
 396         int result;
 397         uint64_t size;
 398         struct _buf *file;
 399         uint32_t hostid = 0;
 400
 401         file = kobj_open_file(spl_hostid_path);
 402
 403         if (file == (struct _buf *)-1)
 404                 return -1;
 405
 406         result = kobj_get_filesize(file, &size);
 407
 408         if (result != 0) {
 409                 printk(KERN_WARNING
 410                        "SPL: kobj_get_filesize returned %i on %s\n",
 411                        result, spl_hostid_path);
 412                 kobj_close_file(file);
 413                 return -2;
 414         }
 415
 416         if (size < sizeof(HW_HOSTID_MASK)) {
 417                 printk(KERN_WARNING
 418                        "SPL: Ignoring the %s file because it is %llu bytes; "
 419                        "expecting %lu bytes instead.\n", spl_hostid_path,
 420                        size, (unsigned long)sizeof(HW_HOSTID_MASK));
 421                 kobj_close_file(file);
 422                 return -3;
 423         }
 424
 425         /* Read directly into the variable like eglibc does. */
 426         /* Short reads are okay; native behavior is preserved. */
 427         result = kobj_read_file(file, (char *)&hostid, sizeof(hostid), 0);
 428
 429         if (result < 0) {
 430                 printk(KERN_WARNING
 431                        "SPL: kobj_read_file returned %i on %s\n",
 432                        result, spl_hostid_path);
 433                 kobj_close_file(file);
 434                 return -4;
 435         }
 436
 437         /* Mask down to 32 bits like coreutils does. */
 438         spl_hostid = hostid & HW_HOSTID_MASK;
 439         kobj_close_file(file);
 440         return 0;
 441 }
 442
 443 uint32_t
 444 zone_get_hostid(void *zone)
 445 {
 446         static int first = 1;
 447
 448         /* Only the global zone is supported */
 449         ASSERT(zone == NULL);
 450
 451         if (first) {
 452                 first = 0;
 453
 454                 spl_hostid &= HW_HOSTID_MASK;
 455                 /*
 456                  * Get the hostid if it was not passed as a module parameter.
 457                  * Try reading the /etc/hostid file directly.
 458                  */
 459                 if (spl_hostid == 0 && hostid_read())
 460                         spl_hostid = 0;
 461
 462
 463                 printk(KERN_NOTICE "SPL: using hostid 0x%08x\n",
 464                         (unsigned int) spl_hostid);
 465         }
 466
 467         return spl_hostid;
 468 }
 469 EXPORT_SYMBOL(zone_get_hostid);
 470
 471 static int
 472 spl_kvmem_init(void)
 473 {
 474         int rc = 0;
 475
 476         rc = spl_kmem_init();
 477         if (rc)
 478                 goto out1;
 479
 480         rc = spl_vmem_init();
 481         if (rc)
 482                 goto out2;
 483
 484         rc = spl_kmem_cache_init();
 485         if (rc)
 486                 goto out3;
 487
 488         return (rc);
 489 out3:
 490         spl_vmem_fini();
 491 out2:
 492         spl_kmem_fini();
 493 out1:
 494         return (rc);
 495 }
 496
 497 static void
 498 spl_kvmem_fini(void)
 499 {
 500         spl_kmem_cache_fini();
 501         spl_vmem_fini();
 502         spl_kmem_fini();
 503 }
 504
 505 static int __init
 506 spl_init(void)
 507 {
 508         int rc = 0;
 509
 510         bzero(&p0, sizeof (proc_t));
 511
 512         if ((rc = spl_kvmem_init()))
 513                 goto out1;
 514
 515         if ((rc = spl_mutex_init()))
 516                 goto out2;
 517
 518         if ((rc = spl_rw_init()))
 519                 goto out3;
 520
 521         if ((rc = spl_taskq_init()))
 522                 goto out4;
 523
 524         if ((rc = spl_vn_init()))
 525                 goto out5;
 526
 527         if ((rc = spl_proc_init()))
 528                 goto out6;
 529
 530         if ((rc = spl_kstat_init()))
 531                 goto out7;
 532
 533         if ((rc = spl_tsd_init()))
 534                 goto out8;
 535
 536         if ((rc = spl_zlib_init()))
 537                 goto out9;
 538
 539         printk(KERN_NOTICE "SPL: Loaded module v%s-%s%s\n", SPL_META_VERSION,
 540                SPL_META_RELEASE, SPL_DEBUG_STR);
 541         return (rc);
 542
 543 out9:
 544         spl_tsd_fini();
 545 out8:
 546         spl_kstat_fini();
 547 out7:
 548         spl_proc_fini();
 549 out6:
 550         spl_vn_fini();
 551 out5:
 552         spl_taskq_fini();
 553 out4:
 554         spl_rw_fini();
 555 out3:
 556         spl_mutex_fini();
 557 out2:
 558         spl_kvmem_fini();
 559 out1:
 560         printk(KERN_NOTICE "SPL: Failed to Load Solaris Porting Layer "
 561                "v%s-%s%s, rc = %d\n", SPL_META_VERSION, SPL_META_RELEASE,
 562                SPL_DEBUG_STR, rc);
 563
 564         return (rc);
 565 }
 566
 567 static void __exit
 568 spl_fini(void)
 569 {
 570         printk(KERN_NOTICE "SPL: Unloaded module v%s-%s%s\n",
 571                SPL_META_VERSION, SPL_META_RELEASE, SPL_DEBUG_STR);
 572         spl_zlib_fini();
 573         spl_tsd_fini();
 574         spl_kstat_fini();
 575         spl_proc_fini();
 576         spl_vn_fini();
 577         spl_taskq_fini();
 578         spl_rw_fini();
 579         spl_mutex_fini();
 580         spl_kvmem_fini();
 581 }
 582
 583 module_init(spl_init);
 584 module_exit(spl_fini);
 585
 586 MODULE_DESCRIPTION("Solaris Porting Layer");
 587 MODULE_AUTHOR(SPL_META_AUTHOR);
 588 MODULE_LICENSE(SPL_META_LICENSE);
 589 MODULE_VERSION(SPL_META_VERSION "-" SPL_META_RELEASE);