module/spl/spl-generic.c

   1 /*****************************************************************************\
   2  *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
   3  *  Copyright (C) 2007 The Regents of the University of California.
   4  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
   5  *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
   6  *  UCRL-CODE-235197
   7  *
   8  *  This file is part of the SPL, Solaris Porting Layer.
   9  *  For details, see <http://zfsonlinux.org/>.
  10  *
  11  *  The SPL is free software; you can redistribute it and/or modify it
  12  *  under the terms of the GNU General Public License as published by the
  13  *  Free Software Foundation; either version 2 of the License, or (at your
  14  *  option) any later version.
  15  *
  16  *  The SPL is distributed in the hope that it will be useful, but WITHOUT
  17  *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  18  *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  19  *  for more details.
  20  *
  21  *  You should have received a copy of the GNU General Public License along
  22  *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
  23  *****************************************************************************
  24  *  Solaris Porting Layer (SPL) Generic Implementation.
  25 \*****************************************************************************/
  26
  27 #include <sys/sysmacros.h>
  28 #include <sys/systeminfo.h>
  29 #include <sys/vmsystm.h>
  30 #include <sys/kobj.h>
  31 #include <sys/kmem.h>
  32 #include <sys/kmem_cache.h>
  33 #include <sys/vmem.h>
  34 #include <sys/mutex.h>
  35 #include <sys/rwlock.h>
  36 #include <sys/taskq.h>
  37 #include <sys/tsd.h>
  38 #include <sys/zmod.h>
  39 #include <sys/debug.h>
  40 #include <sys/proc.h>
  41 #include <sys/kstat.h>
  42 #include <sys/file.h>
  43 #include <linux/ctype.h>
  44 #include <linux/kmod.h>
  45 #include <linux/math64_compat.h>
  46 #include <linux/proc_compat.h>
  47
  48 char spl_version[32] = "SPL v" SPL_META_VERSION "-" SPL_META_RELEASE;
  49 EXPORT_SYMBOL(spl_version);
  50
  51 unsigned long spl_hostid = 0;
  52 EXPORT_SYMBOL(spl_hostid);
  53 module_param(spl_hostid, ulong, 0644);
  54 MODULE_PARM_DESC(spl_hostid, "The system hostid.");
  55
  56 proc_t p0 = { 0 };
  57 EXPORT_SYMBOL(p0);
  58
  59 #if BITS_PER_LONG == 32
  60 /*
  61  * Support 64/64 => 64 division on a 32-bit platform.  While the kernel
  62  * provides a div64_u64() function for this we do not use it because the
  63  * implementation is flawed.  There are cases which return incorrect
  64  * results as late as linux-2.6.35.  Until this is fixed upstream the
  65  * spl must provide its own implementation.
  66  *
  67  * This implementation is a slightly modified version of the algorithm
  68  * proposed by the book 'Hacker's Delight'.  The original source can be
  69  * found here and is available for use without restriction.
  70  *
  71  * http://www.hackersdelight.org/HDcode/newCode/divDouble.c
  72  */
  73
  74 /*
  75  * Calculate number of leading of zeros for a 64-bit value.
  76  */
  77 static int
  78 nlz64(uint64_t x) {
  79         register int n = 0;
  80
  81         if (x == 0)
  82                 return 64;
  83
  84         if (x <= 0x00000000FFFFFFFFULL) {n = n + 32; x = x << 32;}
  85         if (x <= 0x0000FFFFFFFFFFFFULL) {n = n + 16; x = x << 16;}
  86         if (x <= 0x00FFFFFFFFFFFFFFULL) {n = n +  8; x = x <<  8;}
  87         if (x <= 0x0FFFFFFFFFFFFFFFULL) {n = n +  4; x = x <<  4;}
  88         if (x <= 0x3FFFFFFFFFFFFFFFULL) {n = n +  2; x = x <<  2;}
  89         if (x <= 0x7FFFFFFFFFFFFFFFULL) {n = n +  1;}
  90
  91         return n;
  92 }
  93
  94 /*
  95  * Newer kernels have a div_u64() function but we define our own
  96  * to simplify portibility between kernel versions.
  97  */
  98 static inline uint64_t
  99 __div_u64(uint64_t u, uint32_t v)
 100 {
 101         (void) do_div(u, v);
 102         return u;
 103 }
 104
 105 /*
 106  * Implementation of 64-bit unsigned division for 32-bit machines.
 107  *
 108  * First the procedure takes care of the case in which the divisor is a
 109  * 32-bit quantity. There are two subcases: (1) If the left half of the
 110  * dividend is less than the divisor, one execution of do_div() is all that
 111  * is required (overflow is not possible). (2) Otherwise it does two
 112  * divisions, using the grade school method.
 113  */
 114 uint64_t
 115 __udivdi3(uint64_t u, uint64_t v)
 116 {
 117         uint64_t u0, u1, v1, q0, q1, k;
 118         int n;
 119
 120         if (v >> 32 == 0) {                     // If v < 2**32:
 121                 if (u >> 32 < v) {              // If u/v cannot overflow,
 122                         return __div_u64(u, v); // just do one division.
 123                 } else {                        // If u/v would overflow:
 124                         u1 = u >> 32;           // Break u into two halves.
 125                         u0 = u & 0xFFFFFFFF;
 126                         q1 = __div_u64(u1, v);  // First quotient digit.
 127                         k  = u1 - q1 * v;       // First remainder, < v.
 128                         u0 += (k << 32);
 129                         q0 = __div_u64(u0, v);  // Seconds quotient digit.
 130                         return (q1 << 32) + q0;
 131                 }
 132         } else {                                // If v >= 2**32:
 133                 n = nlz64(v);                   // 0 <= n <= 31.
 134                 v1 = (v << n) >> 32;            // Normalize divisor, MSB is 1.
 135                 u1 = u >> 1;                    // To ensure no overflow.
 136                 q1 = __div_u64(u1, v1);         // Get quotient from
 137                 q0 = (q1 << n) >> 31;           // Undo normalization and
 138                                                 // division of u by 2.
 139                 if (q0 != 0)                    // Make q0 correct or
 140                         q0 = q0 - 1;            // too small by 1.
 141                 if ((u - q0 * v) >= v)
 142                         q0 = q0 + 1;            // Now q0 is correct.
 143
 144                 return q0;
 145         }
 146 }
 147 EXPORT_SYMBOL(__udivdi3);
 148
 149 /*
 150  * Implementation of 64-bit signed division for 32-bit machines.
 151  */
 152 int64_t
 153 __divdi3(int64_t u, int64_t v)
 154 {
 155         int64_t q, t;
 156         q = __udivdi3(abs64(u), abs64(v));
 157         t = (u ^ v) >> 63;      // If u, v have different
 158         return (q ^ t) - t;     // signs, negate q.
 159 }
 160 EXPORT_SYMBOL(__divdi3);
 161
 162 /*
 163  * Implementation of 64-bit unsigned modulo for 32-bit machines.
 164  */
 165 uint64_t
 166 __umoddi3(uint64_t dividend, uint64_t divisor)
 167 {
 168         return (dividend - (divisor * __udivdi3(dividend, divisor)));
 169 }
 170 EXPORT_SYMBOL(__umoddi3);
 171
 172 #if defined(__arm) || defined(__arm__)
 173 /*
 174  * Implementation of 64-bit (un)signed division for 32-bit arm machines.
 175  *
 176  * Run-time ABI for the ARM Architecture (page 20).  A pair of (unsigned)
 177  * long longs is returned in {{r0, r1}, {r2,r3}}, the quotient in {r0, r1},
 178  * and the remainder in {r2, r3}.  The return type is specifically left
 179  * set to 'void' to ensure the compiler does not overwrite these registers
 180  * during the return.  All results are in registers as per ABI
 181  */
 182 void
 183 __aeabi_uldivmod(uint64_t u, uint64_t v)
 184 {
 185         uint64_t res;
 186         uint64_t mod;
 187
 188         res = __udivdi3(u, v);
 189         mod = __umoddi3(u, v);
 190         {
 191                 register uint32_t r0 asm("r0") = (res & 0xFFFFFFFF);
 192                 register uint32_t r1 asm("r1") = (res >> 32);
 193                 register uint32_t r2 asm("r2") = (mod & 0xFFFFFFFF);
 194                 register uint32_t r3 asm("r3") = (mod >> 32);
 195
 196                 asm volatile(""
 197                     : "+r"(r0), "+r"(r1), "+r"(r2),"+r"(r3)  /* output */
 198                     : "r"(r0), "r"(r1), "r"(r2), "r"(r3));   /* input */
 199
 200                 return; /* r0; */
 201         }
 202 }
 203 EXPORT_SYMBOL(__aeabi_uldivmod);
 204
 205 void
 206 __aeabi_ldivmod(int64_t u, int64_t v)
 207 {
 208         int64_t res;
 209         uint64_t mod;
 210
 211         res =  __divdi3(u, v);
 212         mod = __umoddi3(u, v);
 213         {
 214                 register uint32_t r0 asm("r0") = (res & 0xFFFFFFFF);
 215                 register uint32_t r1 asm("r1") = (res >> 32);
 216                 register uint32_t r2 asm("r2") = (mod & 0xFFFFFFFF);
 217                 register uint32_t r3 asm("r3") = (mod >> 32);
 218
 219                 asm volatile(""
 220                     : "+r"(r0), "+r"(r1), "+r"(r2),"+r"(r3)  /* output */
 221                     : "r"(r0), "r"(r1), "r"(r2), "r"(r3));   /* input */
 222
 223                 return; /* r0; */
 224         }
 225 }
 226 EXPORT_SYMBOL(__aeabi_ldivmod);
 227 #endif /* __arm || __arm__ */
 228 #endif /* BITS_PER_LONG */
 229
 230 /* NOTE: The strtoxx behavior is solely based on my reading of the Solaris
 231  * ddi_strtol(9F) man page.  I have not verified the behavior of these
 232  * functions against their Solaris counterparts.  It is possible that I
 233  * may have misinterpreted the man page or the man page is incorrect.
 234  */
 235 int ddi_strtoul(const char *, char **, int, unsigned long *);
 236 int ddi_strtol(const char *, char **, int, long *);
 237 int ddi_strtoull(const char *, char **, int, unsigned long long *);
 238 int ddi_strtoll(const char *, char **, int, long long *);
 239
 240 #define define_ddi_strtoux(type, valtype)                               \
 241 int ddi_strtou##type(const char *str, char **endptr,                    \
 242                      int base, valtype *result)                         \
 243 {                                                                       \
 244         valtype last_value, value = 0;                                  \
 245         char *ptr = (char *)str;                                        \
 246         int flag = 1, digit;                                            \
 247                                                                         \
 248         if (strlen(ptr) == 0)                                           \
 249                 return EINVAL;                                          \
 250                                                                         \
 251         /* Auto-detect base based on prefix */                          \
 252         if (!base) {                                                    \
 253                 if (str[0] == '0') {                                    \
 254                         if (tolower(str[1])=='x' && isxdigit(str[2])) { \
 255                                 base = 16; /* hex */                    \
 256                                 ptr += 2;                               \
 257                         } else if (str[1] >= '0' && str[1] < 8) {       \
 258                                 base = 8; /* octal */                   \
 259                                 ptr += 1;                               \
 260                         } else {                                        \
 261                                 return EINVAL;                          \
 262                         }                                               \
 263                 } else {                                                \
 264                         base = 10; /* decimal */                        \
 265                 }                                                       \
 266         }                                                               \
 267                                                                         \
 268         while (1) {                                                     \
 269                 if (isdigit(*ptr))                                      \
 270                         digit = *ptr - '0';                             \
 271                 else if (isalpha(*ptr))                                 \
 272                         digit = tolower(*ptr) - 'a' + 10;               \
 273                 else                                                    \
 274                         break;                                          \
 275                                                                         \
 276                 if (digit >= base)                                      \
 277                         break;                                          \
 278                                                                         \
 279                 last_value = value;                                     \
 280                 value = value * base + digit;                           \
 281                 if (last_value > value) /* Overflow */                  \
 282                         return ERANGE;                                  \
 283                                                                         \
 284                 flag = 1;                                               \
 285                 ptr++;                                                  \
 286         }                                                               \
 287                                                                         \
 288         if (flag)                                                       \
 289                 *result = value;                                        \
 290                                                                         \
 291         if (endptr)                                                     \
 292                 *endptr = (char *)(flag ? ptr : str);                   \
 293                                                                         \
 294         return 0;                                                       \
 295 }                                                                       \
 296
 297 #define define_ddi_strtox(type, valtype)                                \
 298 int ddi_strto##type(const char *str, char **endptr,                     \
 299                        int base, valtype *result)                       \
 300 {                                                                       \
 301         int rc;                                                         \
 302                                                                         \
 303         if (*str == '-') {                                              \
 304                 rc = ddi_strtou##type(str + 1, endptr, base, result);   \
 305                 if (!rc) {                                              \
 306                         if (*endptr == str + 1)                         \
 307                                 *endptr = (char *)str;                  \
 308                         else                                            \
 309                                 *result = -*result;                     \
 310                 }                                                       \
 311         } else {                                                        \
 312                 rc = ddi_strtou##type(str, endptr, base, result);       \
 313         }                                                               \
 314                                                                         \
 315         return rc;                                                      \
 316 }
 317
 318 define_ddi_strtoux(l, unsigned long)
 319 define_ddi_strtox(l, long)
 320 define_ddi_strtoux(ll, unsigned long long)
 321 define_ddi_strtox(ll, long long)
 322
 323 EXPORT_SYMBOL(ddi_strtoul);
 324 EXPORT_SYMBOL(ddi_strtol);
 325 EXPORT_SYMBOL(ddi_strtoll);
 326 EXPORT_SYMBOL(ddi_strtoull);
 327
 328 int
 329 ddi_copyin(const void *from, void *to, size_t len, int flags)
 330 {
 331         /* Fake ioctl() issued by kernel, 'from' is a kernel address */
 332         if (flags & FKIOCTL) {
 333                 memcpy(to, from, len);
 334                 return 0;
 335         }
 336
 337         return copyin(from, to, len);
 338 }
 339 EXPORT_SYMBOL(ddi_copyin);
 340
 341 int
 342 ddi_copyout(const void *from, void *to, size_t len, int flags)
 343 {
 344         /* Fake ioctl() issued by kernel, 'from' is a kernel address */
 345         if (flags & FKIOCTL) {
 346                 memcpy(to, from, len);
 347                 return 0;
 348         }
 349
 350         return copyout(from, to, len);
 351 }
 352 EXPORT_SYMBOL(ddi_copyout);
 353
 354 #ifndef HAVE_PUT_TASK_STRUCT
 355 /*
 356  * This is only a stub function which should never be used.  The SPL should
 357  * never be putting away the last reference on a task structure so this will
 358  * not be called.  However, we still need to define it so the module does not
 359  * have undefined symbol at load time.  That all said if this impossible
 360  * thing does somehow happen PANIC immediately so we know about it.
 361  */
 362 void
 363 __put_task_struct(struct task_struct *t)
 364 {
 365         PANIC("Unexpectly put last reference on task %d\n", (int)t->pid);
 366 }
 367 EXPORT_SYMBOL(__put_task_struct);
 368 #endif /* HAVE_PUT_TASK_STRUCT */
 369
 370 /*
 371  * Read the unique system identifier from the /etc/hostid file.
 372  *
 373  * The behavior of /usr/bin/hostid on Linux systems with the
 374  * regular eglibc and coreutils is:
 375  *
 376  *   1. Generate the value if the /etc/hostid file does not exist
 377  *      or if the /etc/hostid file is less than four bytes in size.
 378  *
 379  *   2. If the /etc/hostid file is at least 4 bytes, then return
 380  *      the first four bytes [0..3] in native endian order.
 381  *
 382  *   3. Always ignore bytes [4..] if they exist in the file.
 383  *
 384  * Only the first four bytes are significant, even on systems that
 385  * have a 64-bit word size.
 386  *
 387  * See:
 388  *
 389  *   eglibc: sysdeps/unix/sysv/linux/gethostid.c
 390  *   coreutils: src/hostid.c
 391  *
 392  * Notes:
 393  *
 394  * The /etc/hostid file on Solaris is a text file that often reads:
 395  *
 396  *   # DO NOT EDIT
 397  *   "0123456789"
 398  *
 399  * Directly copying this file to Linux results in a constant
 400  * hostid of 4f442023 because the default comment constitutes
 401  * the first four bytes of the file.
 402  *
 403  */
 404
 405 char *spl_hostid_path = HW_HOSTID_PATH;
 406 module_param(spl_hostid_path, charp, 0444);
 407 MODULE_PARM_DESC(spl_hostid_path, "The system hostid file (/etc/hostid)");
 408
 409 static int
 410 hostid_read(void)
 411 {
 412         int result;
 413         uint64_t size;
 414         struct _buf *file;
 415         uint32_t hostid = 0;
 416
 417         file = kobj_open_file(spl_hostid_path);
 418
 419         if (file == (struct _buf *)-1)
 420                 return -1;
 421
 422         result = kobj_get_filesize(file, &size);
 423
 424         if (result != 0) {
 425                 printk(KERN_WARNING
 426                        "SPL: kobj_get_filesize returned %i on %s\n",
 427                        result, spl_hostid_path);
 428                 kobj_close_file(file);
 429                 return -2;
 430         }
 431
 432         if (size < sizeof(HW_HOSTID_MASK)) {
 433                 printk(KERN_WARNING
 434                        "SPL: Ignoring the %s file because it is %llu bytes; "
 435                        "expecting %lu bytes instead.\n", spl_hostid_path,
 436                        size, (unsigned long)sizeof(HW_HOSTID_MASK));
 437                 kobj_close_file(file);
 438                 return -3;
 439         }
 440
 441         /* Read directly into the variable like eglibc does. */
 442         /* Short reads are okay; native behavior is preserved. */
 443         result = kobj_read_file(file, (char *)&hostid, sizeof(hostid), 0);
 444
 445         if (result < 0) {
 446                 printk(KERN_WARNING
 447                        "SPL: kobj_read_file returned %i on %s\n",
 448                        result, spl_hostid_path);
 449                 kobj_close_file(file);
 450                 return -4;
 451         }
 452
 453         /* Mask down to 32 bits like coreutils does. */
 454         spl_hostid = hostid & HW_HOSTID_MASK;
 455         kobj_close_file(file);
 456         return 0;
 457 }
 458
 459 uint32_t
 460 zone_get_hostid(void *zone)
 461 {
 462         static int first = 1;
 463
 464         /* Only the global zone is supported */
 465         ASSERT(zone == NULL);
 466
 467         if (first) {
 468                 first = 0;
 469
 470                 spl_hostid &= HW_HOSTID_MASK;
 471                 /*
 472                  * Get the hostid if it was not passed as a module parameter.
 473                  * Try reading the /etc/hostid file directly.
 474                  */
 475                 if (spl_hostid == 0 && hostid_read())
 476                         spl_hostid = 0;
 477
 478
 479                 printk(KERN_NOTICE "SPL: using hostid 0x%08x\n",
 480                         (unsigned int) spl_hostid);
 481         }
 482
 483         return spl_hostid;
 484 }
 485 EXPORT_SYMBOL(zone_get_hostid);
 486
 487 static int
 488 spl_kvmem_init(void)
 489 {
 490         int rc = 0;
 491
 492         rc = spl_kmem_init();
 493         if (rc)
 494                 return (rc);
 495
 496         rc = spl_vmem_init();
 497         if (rc) {
 498                 spl_kmem_fini();
 499                 return (rc);
 500         }
 501
 502         return (rc);
 503 }
 504
 505 static void
 506 spl_kvmem_fini(void)
 507 {
 508         spl_vmem_fini();
 509         spl_kmem_fini();
 510 }
 511
 512 static int __init
 513 spl_init(void)
 514 {
 515         int rc = 0;
 516
 517         if ((rc = spl_kvmem_init()))
 518                 goto out1;
 519
 520         if ((rc = spl_mutex_init()))
 521                 goto out2;
 522
 523         if ((rc = spl_rw_init()))
 524                 goto out3;
 525
 526         if ((rc = spl_tsd_init()))
 527                 goto out4;
 528
 529         if ((rc = spl_taskq_init()))
 530                 goto out5;
 531
 532         if ((rc = spl_kmem_cache_init()))
 533                 goto out6;
 534
 535         if ((rc = spl_vn_init()))
 536                 goto out7;
 537
 538         if ((rc = spl_proc_init()))
 539                 goto out8;
 540
 541         if ((rc = spl_kstat_init()))
 542                 goto out9;
 543
 544         if ((rc = spl_zlib_init()))
 545                 goto out10;
 546
 547         printk(KERN_NOTICE "SPL: Loaded module v%s-%s%s\n", SPL_META_VERSION,
 548                SPL_META_RELEASE, SPL_DEBUG_STR);
 549         return (rc);
 550
 551 out10:
 552         spl_kstat_fini();
 553 out9:
 554         spl_proc_fini();
 555 out8:
 556         spl_vn_fini();
 557 out7:
 558         spl_kmem_cache_fini();
 559 out6:
 560         spl_taskq_fini();
 561 out5:
 562         spl_tsd_fini();
 563 out4:
 564         spl_rw_fini();
 565 out3:
 566         spl_mutex_fini();
 567 out2:
 568         spl_kvmem_fini();
 569 out1:
 570         printk(KERN_NOTICE "SPL: Failed to Load Solaris Porting Layer "
 571                "v%s-%s%s, rc = %d\n", SPL_META_VERSION, SPL_META_RELEASE,
 572                SPL_DEBUG_STR, rc);
 573
 574         return (rc);
 575 }
 576
 577 static void __exit
 578 spl_fini(void)
 579 {
 580         printk(KERN_NOTICE "SPL: Unloaded module v%s-%s%s\n",
 581                SPL_META_VERSION, SPL_META_RELEASE, SPL_DEBUG_STR);
 582         spl_zlib_fini();
 583         spl_kstat_fini();
 584         spl_proc_fini();
 585         spl_vn_fini();
 586         spl_kmem_cache_fini();
 587         spl_taskq_fini();
 588         spl_tsd_fini();
 589         spl_rw_fini();
 590         spl_mutex_fini();
 591         spl_kvmem_fini();
 592 }
 593
 594 module_init(spl_init);
 595 module_exit(spl_fini);
 596
 597 MODULE_DESCRIPTION("Solaris Porting Layer");
 598 MODULE_AUTHOR(SPL_META_AUTHOR);
 599 MODULE_LICENSE(SPL_META_LICENSE);
 600 MODULE_VERSION(SPL_META_VERSION "-" SPL_META_RELEASE);