]> git.proxmox.com Git - mirror_spl.git/blob - module/spl/spl-generic.c
Remove shrink_{i,d}node_cache() wrappers
[mirror_spl.git] / module / spl / spl-generic.c
1 /*****************************************************************************\
2 * Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
3 * Copyright (C) 2007 The Regents of the University of California.
4 * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
5 * Written by Brian Behlendorf <behlendorf1@llnl.gov>.
6 * UCRL-CODE-235197
7 *
8 * This file is part of the SPL, Solaris Porting Layer.
9 * For details, see <http://zfsonlinux.org/>.
10 *
11 * The SPL is free software; you can redistribute it and/or modify it
12 * under the terms of the GNU General Public License as published by the
13 * Free Software Foundation; either version 2 of the License, or (at your
14 * option) any later version.
15 *
16 * The SPL is distributed in the hope that it will be useful, but WITHOUT
17 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
18 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 * for more details.
20 *
21 * You should have received a copy of the GNU General Public License along
22 * with the SPL. If not, see <http://www.gnu.org/licenses/>.
23 *****************************************************************************
24 * Solaris Porting Layer (SPL) Generic Implementation.
25 \*****************************************************************************/
26
27 #include <sys/sysmacros.h>
28 #include <sys/systeminfo.h>
29 #include <sys/vmsystm.h>
30 #include <sys/kobj.h>
31 #include <sys/kmem.h>
32 #include <sys/mutex.h>
33 #include <sys/rwlock.h>
34 #include <sys/taskq.h>
35 #include <sys/tsd.h>
36 #include <sys/zmod.h>
37 #include <sys/debug.h>
38 #include <sys/proc.h>
39 #include <sys/kstat.h>
40 #include <sys/file.h>
41 #include <linux/kmod.h>
42 #include <linux/proc_compat.h>
43 #include <spl-debug.h>
44
45 #ifdef SS_DEBUG_SUBSYS
46 #undef SS_DEBUG_SUBSYS
47 #endif
48
49 #define SS_DEBUG_SUBSYS SS_GENERIC
50
51 char spl_version[32] = "SPL v" SPL_META_VERSION "-" SPL_META_RELEASE;
52 EXPORT_SYMBOL(spl_version);
53
54 unsigned long spl_hostid = 0;
55 EXPORT_SYMBOL(spl_hostid);
56 module_param(spl_hostid, ulong, 0644);
57 MODULE_PARM_DESC(spl_hostid, "The system hostid.");
58
59 proc_t p0 = { 0 };
60 EXPORT_SYMBOL(p0);
61
62 #ifndef HAVE_KALLSYMS_LOOKUP_NAME
63 DECLARE_WAIT_QUEUE_HEAD(spl_kallsyms_lookup_name_waitq);
64 kallsyms_lookup_name_t spl_kallsyms_lookup_name_fn = SYMBOL_POISON;
65 #endif
66
67 #if BITS_PER_LONG == 32
68 /*
69 * Support 64/64 => 64 division on a 32-bit platform. While the kernel
70 * provides a div64_u64() function for this we do not use it because the
71 * implementation is flawed. There are cases which return incorrect
72 * results as late as linux-2.6.35. Until this is fixed upstream the
73 * spl must provide its own implementation.
74 *
75 * This implementation is a slightly modified version of the algorithm
76 * proposed by the book 'Hacker's Delight'. The original source can be
77 * found here and is available for use without restriction.
78 *
79 * http://www.hackersdelight.org/HDcode/newCode/divDouble.c
80 */
81
82 /*
83 * Calculate number of leading of zeros for a 64-bit value.
84 */
85 static int
86 nlz64(uint64_t x) {
87 register int n = 0;
88
89 if (x == 0)
90 return 64;
91
92 if (x <= 0x00000000FFFFFFFFULL) {n = n + 32; x = x << 32;}
93 if (x <= 0x0000FFFFFFFFFFFFULL) {n = n + 16; x = x << 16;}
94 if (x <= 0x00FFFFFFFFFFFFFFULL) {n = n + 8; x = x << 8;}
95 if (x <= 0x0FFFFFFFFFFFFFFFULL) {n = n + 4; x = x << 4;}
96 if (x <= 0x3FFFFFFFFFFFFFFFULL) {n = n + 2; x = x << 2;}
97 if (x <= 0x7FFFFFFFFFFFFFFFULL) {n = n + 1;}
98
99 return n;
100 }
101
102 /*
103 * Newer kernels have a div_u64() function but we define our own
104 * to simplify portibility between kernel versions.
105 */
106 static inline uint64_t
107 __div_u64(uint64_t u, uint32_t v)
108 {
109 (void) do_div(u, v);
110 return u;
111 }
112
113 /*
114 * Implementation of 64-bit unsigned division for 32-bit machines.
115 *
116 * First the procedure takes care of the case in which the divisor is a
117 * 32-bit quantity. There are two subcases: (1) If the left half of the
118 * dividend is less than the divisor, one execution of do_div() is all that
119 * is required (overflow is not possible). (2) Otherwise it does two
120 * divisions, using the grade school method.
121 */
122 uint64_t
123 __udivdi3(uint64_t u, uint64_t v)
124 {
125 uint64_t u0, u1, v1, q0, q1, k;
126 int n;
127
128 if (v >> 32 == 0) { // If v < 2**32:
129 if (u >> 32 < v) { // If u/v cannot overflow,
130 return __div_u64(u, v); // just do one division.
131 } else { // If u/v would overflow:
132 u1 = u >> 32; // Break u into two halves.
133 u0 = u & 0xFFFFFFFF;
134 q1 = __div_u64(u1, v); // First quotient digit.
135 k = u1 - q1 * v; // First remainder, < v.
136 u0 += (k << 32);
137 q0 = __div_u64(u0, v); // Seconds quotient digit.
138 return (q1 << 32) + q0;
139 }
140 } else { // If v >= 2**32:
141 n = nlz64(v); // 0 <= n <= 31.
142 v1 = (v << n) >> 32; // Normalize divisor, MSB is 1.
143 u1 = u >> 1; // To ensure no overflow.
144 q1 = __div_u64(u1, v1); // Get quotient from
145 q0 = (q1 << n) >> 31; // Undo normalization and
146 // division of u by 2.
147 if (q0 != 0) // Make q0 correct or
148 q0 = q0 - 1; // too small by 1.
149 if ((u - q0 * v) >= v)
150 q0 = q0 + 1; // Now q0 is correct.
151
152 return q0;
153 }
154 }
155 EXPORT_SYMBOL(__udivdi3);
156
157 /*
158 * Implementation of 64-bit signed division for 32-bit machines.
159 */
160 int64_t
161 __divdi3(int64_t u, int64_t v)
162 {
163 int64_t q, t;
164 q = __udivdi3(abs64(u), abs64(v));
165 t = (u ^ v) >> 63; // If u, v have different
166 return (q ^ t) - t; // signs, negate q.
167 }
168 EXPORT_SYMBOL(__divdi3);
169
170 /*
171 * Implementation of 64-bit unsigned modulo for 32-bit machines.
172 */
173 uint64_t
174 __umoddi3(uint64_t dividend, uint64_t divisor)
175 {
176 return (dividend - (divisor * __udivdi3(dividend, divisor)));
177 }
178 EXPORT_SYMBOL(__umoddi3);
179
180 #if defined(__arm) || defined(__arm__)
181 /*
182 * Implementation of 64-bit (un)signed division for 32-bit arm machines.
183 *
184 * Run-time ABI for the ARM Architecture (page 20). A pair of (unsigned)
185 * long longs is returned in {{r0, r1}, {r2,r3}}, the quotient in {r0, r1},
186 * and the remainder in {r2, r3}. The return type is specifically left
187 * set to 'void' to ensure the compiler does not overwrite these registers
188 * during the return. All results are in registers as per ABI
189 */
190 void
191 __aeabi_uldivmod(uint64_t u, uint64_t v)
192 {
193 uint64_t res;
194 uint64_t mod;
195
196 res = __udivdi3(u, v);
197 mod = __umoddi3(u, v);
198 {
199 register uint32_t r0 asm("r0") = (res & 0xFFFFFFFF);
200 register uint32_t r1 asm("r1") = (res >> 32);
201 register uint32_t r2 asm("r2") = (mod & 0xFFFFFFFF);
202 register uint32_t r3 asm("r3") = (mod >> 32);
203
204 asm volatile(""
205 : "+r"(r0), "+r"(r1), "+r"(r2),"+r"(r3) /* output */
206 : "r"(r0), "r"(r1), "r"(r2), "r"(r3)); /* input */
207
208 return; /* r0; */
209 }
210 }
211 EXPORT_SYMBOL(__aeabi_uldivmod);
212
213 void
214 __aeabi_ldivmod(int64_t u, int64_t v)
215 {
216 int64_t res;
217 uint64_t mod;
218
219 res = __divdi3(u, v);
220 mod = __umoddi3(u, v);
221 {
222 register uint32_t r0 asm("r0") = (res & 0xFFFFFFFF);
223 register uint32_t r1 asm("r1") = (res >> 32);
224 register uint32_t r2 asm("r2") = (mod & 0xFFFFFFFF);
225 register uint32_t r3 asm("r3") = (mod >> 32);
226
227 asm volatile(""
228 : "+r"(r0), "+r"(r1), "+r"(r2),"+r"(r3) /* output */
229 : "r"(r0), "r"(r1), "r"(r2), "r"(r3)); /* input */
230
231 return; /* r0; */
232 }
233 }
234 EXPORT_SYMBOL(__aeabi_ldivmod);
235 #endif /* __arm || __arm__ */
236 #endif /* BITS_PER_LONG */
237
238 /* NOTE: The strtoxx behavior is solely based on my reading of the Solaris
239 * ddi_strtol(9F) man page. I have not verified the behavior of these
240 * functions against their Solaris counterparts. It is possible that I
241 * may have misinterpreted the man page or the man page is incorrect.
242 */
243 int ddi_strtoul(const char *, char **, int, unsigned long *);
244 int ddi_strtol(const char *, char **, int, long *);
245 int ddi_strtoull(const char *, char **, int, unsigned long long *);
246 int ddi_strtoll(const char *, char **, int, long long *);
247
248 #define define_ddi_strtoux(type, valtype) \
249 int ddi_strtou##type(const char *str, char **endptr, \
250 int base, valtype *result) \
251 { \
252 valtype last_value, value = 0; \
253 char *ptr = (char *)str; \
254 int flag = 1, digit; \
255 \
256 if (strlen(ptr) == 0) \
257 return EINVAL; \
258 \
259 /* Auto-detect base based on prefix */ \
260 if (!base) { \
261 if (str[0] == '0') { \
262 if (tolower(str[1])=='x' && isxdigit(str[2])) { \
263 base = 16; /* hex */ \
264 ptr += 2; \
265 } else if (str[1] >= '0' && str[1] < 8) { \
266 base = 8; /* octal */ \
267 ptr += 1; \
268 } else { \
269 return EINVAL; \
270 } \
271 } else { \
272 base = 10; /* decimal */ \
273 } \
274 } \
275 \
276 while (1) { \
277 if (isdigit(*ptr)) \
278 digit = *ptr - '0'; \
279 else if (isalpha(*ptr)) \
280 digit = tolower(*ptr) - 'a' + 10; \
281 else \
282 break; \
283 \
284 if (digit >= base) \
285 break; \
286 \
287 last_value = value; \
288 value = value * base + digit; \
289 if (last_value > value) /* Overflow */ \
290 return ERANGE; \
291 \
292 flag = 1; \
293 ptr++; \
294 } \
295 \
296 if (flag) \
297 *result = value; \
298 \
299 if (endptr) \
300 *endptr = (char *)(flag ? ptr : str); \
301 \
302 return 0; \
303 } \
304
305 #define define_ddi_strtox(type, valtype) \
306 int ddi_strto##type(const char *str, char **endptr, \
307 int base, valtype *result) \
308 { \
309 int rc; \
310 \
311 if (*str == '-') { \
312 rc = ddi_strtou##type(str + 1, endptr, base, result); \
313 if (!rc) { \
314 if (*endptr == str + 1) \
315 *endptr = (char *)str; \
316 else \
317 *result = -*result; \
318 } \
319 } else { \
320 rc = ddi_strtou##type(str, endptr, base, result); \
321 } \
322 \
323 return rc; \
324 }
325
326 define_ddi_strtoux(l, unsigned long)
327 define_ddi_strtox(l, long)
328 define_ddi_strtoux(ll, unsigned long long)
329 define_ddi_strtox(ll, long long)
330
331 EXPORT_SYMBOL(ddi_strtoul);
332 EXPORT_SYMBOL(ddi_strtol);
333 EXPORT_SYMBOL(ddi_strtoll);
334 EXPORT_SYMBOL(ddi_strtoull);
335
336 int
337 ddi_copyin(const void *from, void *to, size_t len, int flags)
338 {
339 /* Fake ioctl() issued by kernel, 'from' is a kernel address */
340 if (flags & FKIOCTL) {
341 memcpy(to, from, len);
342 return 0;
343 }
344
345 return copyin(from, to, len);
346 }
347 EXPORT_SYMBOL(ddi_copyin);
348
349 int
350 ddi_copyout(const void *from, void *to, size_t len, int flags)
351 {
352 /* Fake ioctl() issued by kernel, 'from' is a kernel address */
353 if (flags & FKIOCTL) {
354 memcpy(to, from, len);
355 return 0;
356 }
357
358 return copyout(from, to, len);
359 }
360 EXPORT_SYMBOL(ddi_copyout);
361
362 #ifndef HAVE_PUT_TASK_STRUCT
363 /*
364 * This is only a stub function which should never be used. The SPL should
365 * never be putting away the last reference on a task structure so this will
366 * not be called. However, we still need to define it so the module does not
367 * have undefined symbol at load time. That all said if this impossible
368 * thing does somehow happen PANIC immediately so we know about it.
369 */
370 void
371 __put_task_struct(struct task_struct *t)
372 {
373 PANIC("Unexpectly put last reference on task %d\n", (int)t->pid);
374 }
375 EXPORT_SYMBOL(__put_task_struct);
376 #endif /* HAVE_PUT_TASK_STRUCT */
377
378 /*
379 * Read the unique system identifier from the /etc/hostid file.
380 *
381 * The behavior of /usr/bin/hostid on Linux systems with the
382 * regular eglibc and coreutils is:
383 *
384 * 1. Generate the value if the /etc/hostid file does not exist
385 * or if the /etc/hostid file is less than four bytes in size.
386 *
387 * 2. If the /etc/hostid file is at least 4 bytes, then return
388 * the first four bytes [0..3] in native endian order.
389 *
390 * 3. Always ignore bytes [4..] if they exist in the file.
391 *
392 * Only the first four bytes are significant, even on systems that
393 * have a 64-bit word size.
394 *
395 * See:
396 *
397 * eglibc: sysdeps/unix/sysv/linux/gethostid.c
398 * coreutils: src/hostid.c
399 *
400 * Notes:
401 *
402 * The /etc/hostid file on Solaris is a text file that often reads:
403 *
404 * # DO NOT EDIT
405 * "0123456789"
406 *
407 * Directly copying this file to Linux results in a constant
408 * hostid of 4f442023 because the default comment constitutes
409 * the first four bytes of the file.
410 *
411 */
412
413 char *spl_hostid_path = HW_HOSTID_PATH;
414 module_param(spl_hostid_path, charp, 0444);
415 MODULE_PARM_DESC(spl_hostid_path, "The system hostid file (/etc/hostid)");
416
417 static int
418 hostid_read(void)
419 {
420 int result;
421 uint64_t size;
422 struct _buf *file;
423 uint32_t hostid = 0;
424
425 file = kobj_open_file(spl_hostid_path);
426
427 if (file == (struct _buf *)-1)
428 return -1;
429
430 result = kobj_get_filesize(file, &size);
431
432 if (result != 0) {
433 printk(KERN_WARNING
434 "SPL: kobj_get_filesize returned %i on %s\n",
435 result, spl_hostid_path);
436 kobj_close_file(file);
437 return -2;
438 }
439
440 if (size < sizeof(HW_HOSTID_MASK)) {
441 printk(KERN_WARNING
442 "SPL: Ignoring the %s file because it is %llu bytes; "
443 "expecting %lu bytes instead.\n", spl_hostid_path,
444 size, (unsigned long)sizeof(HW_HOSTID_MASK));
445 kobj_close_file(file);
446 return -3;
447 }
448
449 /* Read directly into the variable like eglibc does. */
450 /* Short reads are okay; native behavior is preserved. */
451 result = kobj_read_file(file, (char *)&hostid, sizeof(hostid), 0);
452
453 if (result < 0) {
454 printk(KERN_WARNING
455 "SPL: kobj_read_file returned %i on %s\n",
456 result, spl_hostid_path);
457 kobj_close_file(file);
458 return -4;
459 }
460
461 /* Mask down to 32 bits like coreutils does. */
462 spl_hostid = hostid & HW_HOSTID_MASK;
463 kobj_close_file(file);
464 return 0;
465 }
466
467 uint32_t
468 zone_get_hostid(void *zone)
469 {
470 static int first = 1;
471
472 /* Only the global zone is supported */
473 ASSERT(zone == NULL);
474
475 if (first) {
476 first = 0;
477
478 /*
479 * Get the hostid if it was not passed as a module parameter.
480 * Try reading the /etc/hostid file directly.
481 */
482 if (hostid_read())
483 spl_hostid = 0;
484
485 printk(KERN_NOTICE "SPL: using hostid 0x%08x\n",
486 (unsigned int) spl_hostid);
487 }
488
489 return spl_hostid;
490 }
491 EXPORT_SYMBOL(zone_get_hostid);
492
493 /*
494 * The kallsyms_lookup_name() kernel function is not an exported symbol in
495 * Linux 2.6.19 through 2.6.32 inclusive.
496 *
497 * This function replaces the functionality by performing an upcall to user
498 * space where /proc/kallsyms is consulted for the requested address.
499 *
500 */
501 #define GET_KALLSYMS_ADDR_CMD \
502 "exec 0</dev/null " \
503 " 1>/proc/sys/kernel/spl/kallsyms_lookup_name " \
504 " 2>/dev/null; " \
505 "awk '{ if ( $3 == \"kallsyms_lookup_name\" ) { print $1 } }' " \
506 " /proc/kallsyms "
507
508 static int
509 set_kallsyms_lookup_name(void)
510 {
511 #ifndef HAVE_KALLSYMS_LOOKUP_NAME
512 char *argv[] = { "/bin/sh",
513 "-c",
514 GET_KALLSYMS_ADDR_CMD,
515 NULL };
516 char *envp[] = { "HOME=/",
517 "TERM=linux",
518 "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
519 NULL };
520 int rc;
521
522 rc = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC);
523
524 /*
525 * Due to I/O buffering the helper may return successfully before
526 * the proc handler has a chance to execute. To catch this case
527 * wait up to 1 second to verify spl_kallsyms_lookup_name_fn was
528 * updated to a non SYMBOL_POISON value.
529 */
530 if (rc == 0) {
531 rc = wait_event_timeout(spl_kallsyms_lookup_name_waitq,
532 spl_kallsyms_lookup_name_fn != SYMBOL_POISON, HZ);
533 if (rc == 0)
534 rc = -ETIMEDOUT;
535 else if (spl_kallsyms_lookup_name_fn == SYMBOL_POISON)
536 rc = -EFAULT;
537 else
538 rc = 0;
539 }
540
541 if (rc)
542 printk("SPL: Failed user helper '%s %s %s', rc = %d\n",
543 argv[0], argv[1], argv[2], rc);
544
545 return (rc);
546 #else
547 return (0);
548 #endif /* HAVE_KALLSYMS_LOOKUP_NAME */
549 }
550
551 static int
552 __init spl_init(void)
553 {
554 int rc = 0;
555
556 if ((rc = spl_debug_init()))
557 return rc;
558
559 if ((rc = spl_kmem_init()))
560 SGOTO(out1, rc);
561
562 if ((rc = spl_mutex_init()))
563 SGOTO(out2, rc);
564
565 if ((rc = spl_rw_init()))
566 SGOTO(out3, rc);
567
568 if ((rc = spl_taskq_init()))
569 SGOTO(out4, rc);
570
571 if ((rc = spl_vn_init()))
572 SGOTO(out5, rc);
573
574 if ((rc = spl_proc_init()))
575 SGOTO(out6, rc);
576
577 if ((rc = spl_kstat_init()))
578 SGOTO(out7, rc);
579
580 if ((rc = spl_tsd_init()))
581 SGOTO(out8, rc);
582
583 if ((rc = spl_zlib_init()))
584 SGOTO(out9, rc);
585
586 if ((rc = set_kallsyms_lookup_name()))
587 SGOTO(out10, rc = -EADDRNOTAVAIL);
588
589 printk(KERN_NOTICE "SPL: Loaded module v%s-%s%s\n", SPL_META_VERSION,
590 SPL_META_RELEASE, SPL_DEBUG_STR);
591 SRETURN(rc);
592 out10:
593 spl_zlib_fini();
594 out9:
595 spl_tsd_fini();
596 out8:
597 spl_kstat_fini();
598 out7:
599 spl_proc_fini();
600 out6:
601 spl_vn_fini();
602 out5:
603 spl_taskq_fini();
604 out4:
605 spl_rw_fini();
606 out3:
607 spl_mutex_fini();
608 out2:
609 spl_kmem_fini();
610 out1:
611 spl_debug_fini();
612
613 printk(KERN_NOTICE "SPL: Failed to Load Solaris Porting Layer "
614 "v%s-%s%s, rc = %d\n", SPL_META_VERSION, SPL_META_RELEASE,
615 SPL_DEBUG_STR, rc);
616 return rc;
617 }
618
619 static void
620 spl_fini(void)
621 {
622 SENTRY;
623
624 printk(KERN_NOTICE "SPL: Unloaded module v%s-%s%s\n",
625 SPL_META_VERSION, SPL_META_RELEASE, SPL_DEBUG_STR);
626 spl_zlib_fini();
627 spl_tsd_fini();
628 spl_kstat_fini();
629 spl_proc_fini();
630 spl_vn_fini();
631 spl_taskq_fini();
632 spl_rw_fini();
633 spl_mutex_fini();
634 spl_kmem_fini();
635 spl_debug_fini();
636 }
637
638 /* Called when a dependent module is loaded */
639 void
640 spl_setup(void)
641 {
642 int rc;
643
644 /*
645 * At module load time the pwd is set to '/' on a Solaris system.
646 * On a Linux system will be set to whatever directory the caller
647 * was in when executing insmod/modprobe.
648 */
649 rc = vn_set_pwd("/");
650 if (rc)
651 printk("SPL: Warning unable to set pwd to '/': %d\n", rc);
652 }
653 EXPORT_SYMBOL(spl_setup);
654
655 /* Called when a dependent module is unloaded */
656 void
657 spl_cleanup(void)
658 {
659 }
660 EXPORT_SYMBOL(spl_cleanup);
661
662 module_init(spl_init);
663 module_exit(spl_fini);
664
665 MODULE_DESCRIPTION("Solaris Porting Layer");
666 MODULE_AUTHOR(SPL_META_AUTHOR);
667 MODULE_LICENSE(SPL_META_LICENSE);
668 MODULE_VERSION(SPL_META_VERSION "-" SPL_META_RELEASE);