]> git.proxmox.com Git - mirror_spl-debian.git/blame - module/spl/spl-generic.c
Refresh autogen.sh products
[mirror_spl-debian.git] / module / spl / spl-generic.c
CommitLineData
716154c5
BB
1/*****************************************************************************\
2 * Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
3 * Copyright (C) 2007 The Regents of the University of California.
4 * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
5 * Written by Brian Behlendorf <behlendorf1@llnl.gov>.
715f6251 6 * UCRL-CODE-235197
7 *
716154c5
BB
8 * This file is part of the SPL, Solaris Porting Layer.
9 * For details, see <http://github.com/behlendorf/spl/>.
10 *
11 * The SPL is free software; you can redistribute it and/or modify it
12 * under the terms of the GNU General Public License as published by the
13 * Free Software Foundation; either version 2 of the License, or (at your
14 * option) any later version.
715f6251 15 *
716154c5 16 * The SPL is distributed in the hope that it will be useful, but WITHOUT
715f6251 17 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
18 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 * for more details.
20 *
21 * You should have received a copy of the GNU General Public License along
716154c5
BB
22 * with the SPL. If not, see <http://www.gnu.org/licenses/>.
23 *****************************************************************************
24 * Solaris Porting Layer (SPL) Generic Implementation.
25\*****************************************************************************/
715f6251 26
14c5326c 27#include <sys/sysmacros.h>
99639e4a 28#include <sys/systeminfo.h>
af828292 29#include <sys/vmsystm.h>
30#include <sys/vnode.h>
c19c06f3 31#include <sys/kmem.h>
9ab1ac14 32#include <sys/mutex.h>
d28db80f 33#include <sys/rwlock.h>
e9cb2b4f 34#include <sys/taskq.h>
8d0f1ee9 35#include <sys/debug.h>
57d1b188 36#include <sys/proc.h>
04a479f7 37#include <sys/kstat.h>
691d2bd7 38#include <sys/utsname.h>
d3126abe 39#include <sys/file.h>
f23e92fa 40#include <linux/kmod.h>
ae4c36ad 41#include <linux/proc_compat.h>
55abb092 42#include <spl-debug.h>
f1b59d26 43
b17edc10
BB
44#ifdef SS_DEBUG_SUBSYS
45#undef SS_DEBUG_SUBSYS
57d1b188 46#endif
8d0f1ee9 47
b17edc10 48#define SS_DEBUG_SUBSYS SS_GENERIC
f23e92fa 49
0cbaeb11 50char spl_version[16] = "SPL v" SPL_META_VERSION;
1a73940d 51EXPORT_SYMBOL(spl_version);
3561541c 52
937879f1 53long spl_hostid = 0;
f23e92fa 54EXPORT_SYMBOL(spl_hostid);
8d0f1ee9 55
99639e4a 56char hw_serial[HW_HOSTID_LEN] = "<none>";
937879f1 57EXPORT_SYMBOL(hw_serial);
f1b59d26 58
ae4c36ad 59proc_t p0 = { 0 };
f1b59d26 60EXPORT_SYMBOL(p0);
70eadc19 61
d1ff2312 62#ifndef HAVE_KALLSYMS_LOOKUP_NAME
96dded38 63kallsyms_lookup_name_t spl_kallsyms_lookup_name_fn = SYMBOL_POISON;
d1ff2312
BB
64#endif
65
77b1fe8f 66int
67highbit(unsigned long i)
68{
69 register int h = 1;
b17edc10 70 SENTRY;
77b1fe8f 71
72 if (i == 0)
b17edc10 73 SRETURN(0);
77b1fe8f 74#if BITS_PER_LONG == 64
75 if (i & 0xffffffff00000000ul) {
76 h += 32; i >>= 32;
77 }
78#endif
79 if (i & 0xffff0000) {
80 h += 16; i >>= 16;
81 }
82 if (i & 0xff00) {
83 h += 8; i >>= 8;
84 }
85 if (i & 0xf0) {
86 h += 4; i >>= 4;
87 }
88 if (i & 0xc) {
89 h += 2; i >>= 2;
90 }
91 if (i & 0x2) {
92 h += 1;
93 }
b17edc10 94 SRETURN(h);
77b1fe8f 95}
96EXPORT_SYMBOL(highbit);
97
a4bfd8ea 98#if BITS_PER_LONG == 32
b61a6e8b 99/*
a4bfd8ea
BB
100 * Support 64/64 => 64 division on a 32-bit platform. While the kernel
101 * provides a div64_u64() function for this we do not use it because the
102 * implementation is flawed. There are cases which return incorrect
103 * results as late as linux-2.6.35. Until this is fixed upstream the
104 * spl must provide its own implementation.
105 *
106 * This implementation is a slightly modified version of the algorithm
107 * proposed by the book 'Hacker's Delight'. The original source can be
108 * found here and is available for use without restriction.
109 *
110 * http://www.hackersdelight.org/HDcode/newCode/divDouble.c
111 */
112
113/*
114 * Calculate number of leading of zeros for a 64-bit value.
115 */
116static int
117nlz64(uint64_t x) {
118 register int n = 0;
119
120 if (x == 0)
121 return 64;
122
123 if (x <= 0x00000000FFFFFFFFULL) {n = n + 32; x = x << 32;}
124 if (x <= 0x0000FFFFFFFFFFFFULL) {n = n + 16; x = x << 16;}
125 if (x <= 0x00FFFFFFFFFFFFFFULL) {n = n + 8; x = x << 8;}
126 if (x <= 0x0FFFFFFFFFFFFFFFULL) {n = n + 4; x = x << 4;}
127 if (x <= 0x3FFFFFFFFFFFFFFFULL) {n = n + 2; x = x << 2;}
128 if (x <= 0x7FFFFFFFFFFFFFFFULL) {n = n + 1;}
129
130 return n;
131}
132
133/*
134 * Newer kernels have a div_u64() function but we define our own
135 * to simplify portibility between kernel versions.
136 */
137static inline uint64_t
138__div_u64(uint64_t u, uint32_t v)
139{
140 (void) do_div(u, v);
141 return u;
142}
143
144/*
145 * Implementation of 64-bit unsigned division for 32-bit machines.
146 *
147 * First the procedure takes care of the case in which the divisor is a
148 * 32-bit quantity. There are two subcases: (1) If the left half of the
149 * dividend is less than the divisor, one execution of do_div() is all that
150 * is required (overflow is not possible). (2) Otherwise it does two
151 * divisions, using the grade school method.
b61a6e8b 152 */
1b4ad25e 153uint64_t
a4bfd8ea 154__udivdi3(uint64_t u, uint64_t v)
b61a6e8b 155{
a4bfd8ea
BB
156 uint64_t u0, u1, v1, q0, q1, k;
157 int n;
158
159 if (v >> 32 == 0) { // If v < 2**32:
160 if (u >> 32 < v) { // If u/v cannot overflow,
161 return __div_u64(u, v); // just do one division.
162 } else { // If u/v would overflow:
163 u1 = u >> 32; // Break u into two halves.
164 u0 = u & 0xFFFFFFFF;
165 q1 = __div_u64(u1, v); // First quotient digit.
166 k = u1 - q1 * v; // First remainder, < v.
167 u0 += (k << 32);
168 q0 = __div_u64(u0, v); // Seconds quotient digit.
169 return (q1 << 32) + q0;
170 }
171 } else { // If v >= 2**32:
172 n = nlz64(v); // 0 <= n <= 31.
173 v1 = (v << n) >> 32; // Normalize divisor, MSB is 1.
174 u1 = u >> 1; // To ensure no overflow.
175 q1 = __div_u64(u1, v1); // Get quotient from
176 q0 = (q1 << n) >> 31; // Undo normalization and
177 // division of u by 2.
178 if (q0 != 0) // Make q0 correct or
179 q0 = q0 - 1; // too small by 1.
180 if ((u - q0 * v) >= v)
181 q0 = q0 + 1; // Now q0 is correct.
182
183 return q0;
184 }
550f1705 185}
186EXPORT_SYMBOL(__udivdi3);
187
188/*
a4bfd8ea
BB
189 * Implementation of 64-bit signed division for 32-bit machines.
190 */
191int64_t
192__divdi3(int64_t u, int64_t v)
193{
194 int64_t q, t;
195 q = __udivdi3(abs64(u), abs64(v));
196 t = (u ^ v) >> 63; // If u, v have different
197 return (q ^ t) - t; // signs, negate q.
198}
199EXPORT_SYMBOL(__divdi3);
200
201/*
202 * Implementation of 64-bit unsigned modulo for 32-bit machines.
550f1705 203 */
1b4ad25e
AZ
204uint64_t
205__umoddi3(uint64_t dividend, uint64_t divisor)
550f1705 206{
1b4ad25e 207 return (dividend - (divisor * __udivdi3(dividend, divisor)));
b61a6e8b 208}
550f1705 209EXPORT_SYMBOL(__umoddi3);
a4bfd8ea 210
96dded38 211#endif /* BITS_PER_LONG */
b61a6e8b 212
b871b8cd
BB
213/* NOTE: The strtoxx behavior is solely based on my reading of the Solaris
214 * ddi_strtol(9F) man page. I have not verified the behavior of these
215 * functions against their Solaris counterparts. It is possible that I
96dded38 216 * may have misinterpreted the man page or the man page is incorrect.
b871b8cd 217 */
2ee63a54
BB
218int ddi_strtoul(const char *, char **, int, unsigned long *);
219int ddi_strtol(const char *, char **, int, long *);
220int ddi_strtoull(const char *, char **, int, unsigned long long *);
221int ddi_strtoll(const char *, char **, int, long long *);
222
223#define define_ddi_strtoux(type, valtype) \
224int ddi_strtou##type(const char *str, char **endptr, \
b871b8cd 225 int base, valtype *result) \
2ee63a54 226{ \
b871b8cd
BB
227 valtype last_value, value = 0; \
228 char *ptr = (char *)str; \
229 int flag = 1, digit; \
230 \
231 if (strlen(ptr) == 0) \
232 return EINVAL; \
233 \
234 /* Auto-detect base based on prefix */ \
235 if (!base) { \
236 if (str[0] == '0') { \
237 if (tolower(str[1])=='x' && isxdigit(str[2])) { \
238 base = 16; /* hex */ \
239 ptr += 2; \
240 } else if (str[1] >= '0' && str[1] < 8) { \
241 base = 8; /* octal */ \
242 ptr += 1; \
243 } else { \
244 return EINVAL; \
245 } \
246 } else { \
247 base = 10; /* decimal */ \
248 } \
249 } \
250 \
251 while (1) { \
252 if (isdigit(*ptr)) \
253 digit = *ptr - '0'; \
254 else if (isalpha(*ptr)) \
255 digit = tolower(*ptr) - 'a' + 10; \
256 else \
257 break; \
258 \
259 if (digit >= base) \
260 break; \
2ee63a54 261 \
b871b8cd
BB
262 last_value = value; \
263 value = value * base + digit; \
264 if (last_value > value) /* Overflow */ \
265 return ERANGE; \
2ee63a54 266 \
b871b8cd
BB
267 flag = 1; \
268 ptr++; \
2ee63a54
BB
269 } \
270 \
b871b8cd
BB
271 if (flag) \
272 *result = value; \
273 \
274 if (endptr) \
275 *endptr = (char *)(flag ? ptr : str); \
276 \
277 return 0; \
2ee63a54
BB
278} \
279
280#define define_ddi_strtox(type, valtype) \
281int ddi_strto##type(const char *str, char **endptr, \
282 int base, valtype *result) \
b871b8cd
BB
283{ \
284 int rc; \
2ee63a54
BB
285 \
286 if (*str == '-') { \
b871b8cd
BB
287 rc = ddi_strtou##type(str + 1, endptr, base, result); \
288 if (!rc) { \
289 if (*endptr == str + 1) \
290 *endptr = (char *)str; \
291 else \
292 *result = -*result; \
293 } \
2ee63a54 294 } else { \
b871b8cd 295 rc = ddi_strtou##type(str, endptr, base, result); \
2ee63a54
BB
296 } \
297 \
b871b8cd
BB
298 return rc; \
299}
2ee63a54
BB
300
301define_ddi_strtoux(l, unsigned long)
302define_ddi_strtox(l, long)
303define_ddi_strtoux(ll, unsigned long long)
304define_ddi_strtox(ll, long long)
305
2f5d55aa 306EXPORT_SYMBOL(ddi_strtoul);
2ee63a54
BB
307EXPORT_SYMBOL(ddi_strtol);
308EXPORT_SYMBOL(ddi_strtoll);
309EXPORT_SYMBOL(ddi_strtoull);
2f5d55aa 310
d3126abe
BB
311int
312ddi_copyin(const void *from, void *to, size_t len, int flags)
313{
314 /* Fake ioctl() issued by kernel, 'from' is a kernel address */
315 if (flags & FKIOCTL) {
316 memcpy(to, from, len);
317 return 0;
318 }
319
320 return copyin(from, to, len);
321}
322EXPORT_SYMBOL(ddi_copyin);
323
324int
325ddi_copyout(const void *from, void *to, size_t len, int flags)
326{
327 /* Fake ioctl() issued by kernel, 'from' is a kernel address */
328 if (flags & FKIOCTL) {
329 memcpy(to, from, len);
330 return 0;
331 }
332
333 return copyout(from, to, len);
334}
335EXPORT_SYMBOL(ddi_copyout);
336
e811949a
BB
337#ifndef HAVE_PUT_TASK_STRUCT
338/*
339 * This is only a stub function which should never be used. The SPL should
340 * never be putting away the last reference on a task structure so this will
341 * not be called. However, we still need to define it so the module does not
342 * have undefined symbol at load time. That all said if this impossible
55abb092 343 * thing does somehow happen PANIC immediately so we know about it.
e811949a
BB
344 */
345void
346__put_task_struct(struct task_struct *t)
347{
55abb092 348 PANIC("Unexpectly put last reference on task %d\n", (int)t->pid);
e811949a
BB
349}
350EXPORT_SYMBOL(__put_task_struct);
351#endif /* HAVE_PUT_TASK_STRUCT */
352
691d2bd7 353struct new_utsname *__utsname(void)
354{
3d061e9d 355#ifdef HAVE_INIT_UTSNAME
691d2bd7 356 return init_utsname();
3d061e9d 357#else
358 return &system_utsname;
359#endif
691d2bd7 360}
361EXPORT_SYMBOL(__utsname);
362
8d0f1ee9 363static int
57d1b188 364set_hostid(void)
8d0f1ee9 365{
f23e92fa 366 char sh_path[] = "/bin/sh";
367 char *argv[] = { sh_path,
368 "-c",
57d86234 369 "/usr/bin/hostid >/proc/sys/kernel/spl/hostid",
f23e92fa 370 NULL };
371 char *envp[] = { "HOME=/",
372 "TERM=linux",
373 "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
374 NULL };
96dded38 375 int rc;
8d0f1ee9 376
57d1b188 377 /* Doing address resolution in the kernel is tricky and just
937879f1 378 * not a good idea in general. So to set the proper 'hw_serial'
57d1b188 379 * use the usermodehelper support to ask '/bin/sh' to run
380 * '/usr/bin/hostid' and redirect the result to /proc/sys/spl/hostid
96dded38 381 * for us to use. It's a horrific solution but it will do for now.
57d1b188 382 */
96dded38
BB
383 rc = call_usermodehelper(sh_path, argv, envp, 1);
384 if (rc)
385 printk("SPL: Failed user helper '%s %s %s', rc = %d\n",
386 argv[0], argv[1], argv[2], rc);
387
388 return rc;
57d1b188 389}
8d0f1ee9 390
99639e4a
BB
391uint32_t
392zone_get_hostid(void *zone)
393{
394 unsigned long hostid;
395
396 /* Only the global zone is supported */
397 ASSERT(zone == NULL);
398
399 if (ddi_strtoul(hw_serial, NULL, HW_HOSTID_LEN-1, &hostid) != 0)
400 return HW_INVALID_HOSTID;
401
402 return (uint32_t)hostid;
403}
404EXPORT_SYMBOL(zone_get_hostid);
405
96dded38 406#ifndef HAVE_KALLSYMS_LOOKUP_NAME
d1ff2312
BB
407/*
408 * Because kallsyms_lookup_name() is no longer exported in the
409 * mainline kernel we are forced to resort to somewhat drastic
410 * measures. This function replaces the functionality by performing
411 * an upcall to user space where /proc/kallsyms is consulted for
412 * the requested address.
413 */
414#define GET_KALLSYMS_ADDR_CMD \
18142514 415 "gawk '{ if ( $3 == \"kallsyms_lookup_name\") { print $1 } }' " \
d1ff2312
BB
416 "/proc/kallsyms >/proc/sys/kernel/spl/kallsyms_lookup_name"
417
418static int
419set_kallsyms_lookup_name(void)
420{
421 char sh_path[] = "/bin/sh";
422 char *argv[] = { sh_path,
423 "-c",
424 GET_KALLSYMS_ADDR_CMD,
425 NULL };
426 char *envp[] = { "HOME=/",
427 "TERM=linux",
428 "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
429 NULL };
430 int rc;
431
432 rc = call_usermodehelper(sh_path, argv, envp, 1);
433 if (rc)
96dded38
BB
434 printk("SPL: Failed user helper '%s %s %s', rc = %d\n",
435 argv[0], argv[1], argv[2], rc);
d1ff2312 436
96dded38 437 return rc;
d1ff2312
BB
438}
439#endif
440
51a727e9
BB
441static int
442__init spl_init(void)
57d1b188 443{
444 int rc = 0;
f23e92fa 445
57d1b188 446 if ((rc = debug_init()))
18c9eadf 447 return rc;
f23e92fa 448
2fb9b26a 449 if ((rc = spl_kmem_init()))
b17edc10 450 SGOTO(out1, rc);
8d0f1ee9 451
9ab1ac14 452 if ((rc = spl_mutex_init()))
b17edc10 453 SGOTO(out2, rc);
9ab1ac14 454
d28db80f 455 if ((rc = spl_rw_init()))
b17edc10 456 SGOTO(out3, rc);
8d0f1ee9 457
d28db80f 458 if ((rc = spl_taskq_init()))
b17edc10 459 SGOTO(out4, rc);
af828292 460
d28db80f 461 if ((rc = vn_init()))
b17edc10 462 SGOTO(out5, rc);
04a479f7 463
d28db80f 464 if ((rc = proc_init()))
b17edc10 465 SGOTO(out6, rc);
e9cb2b4f 466
d28db80f 467 if ((rc = kstat_init()))
b17edc10 468 SGOTO(out7, rc);
d28db80f 469
57d1b188 470 if ((rc = set_hostid()))
b17edc10 471 SGOTO(out8, rc = -EADDRNOTAVAIL);
f23e92fa 472
96dded38 473#ifndef HAVE_KALLSYMS_LOOKUP_NAME
d1ff2312 474 if ((rc = set_kallsyms_lookup_name()))
b17edc10 475 SGOTO(out8, rc = -EADDRNOTAVAIL);
96dded38
BB
476#endif /* HAVE_KALLSYMS_LOOKUP_NAME */
477
478 if ((rc = spl_kmem_init_kallsyms_lookup()))
b17edc10 479 SGOTO(out8, rc);
d1ff2312 480
81672c01
RC
481 printk(KERN_NOTICE "SPL: Loaded Solaris Porting Layer v%s%s\n",
482 SPL_META_VERSION, SPL_DEBUG_STR);
b17edc10 483 SRETURN(rc);
d28db80f 484out8:
04a479f7 485 kstat_fini();
d28db80f 486out7:
57d1b188 487 proc_fini();
d28db80f 488out6:
57d1b188 489 vn_fini();
d28db80f 490out5:
e9cb2b4f 491 spl_taskq_fini();
d28db80f
BB
492out4:
493 spl_rw_fini();
9ab1ac14 494out3:
495 spl_mutex_fini();
8d0f1ee9 496out2:
2fb9b26a 497 spl_kmem_fini();
d28db80f 498out1:
57d1b188 499 debug_fini();
8d0f1ee9 500
81672c01
RC
501 printk(KERN_NOTICE "SPL: Failed to Load Solaris Porting Layer v%s%s"
502 ", rc = %d\n", SPL_META_VERSION, SPL_DEBUG_STR, rc);
18c9eadf 503 return rc;
70eadc19 504}
505
51a727e9
BB
506static void
507spl_fini(void)
70eadc19 508{
b17edc10 509 SENTRY;
57d1b188 510
81672c01
RC
511 printk(KERN_NOTICE "SPL: Unloaded Solaris Porting Layer v%s%s\n",
512 SPL_META_VERSION, SPL_DEBUG_STR);
04a479f7 513 kstat_fini();
57d1b188 514 proc_fini();
af828292 515 vn_fini();
e9cb2b4f 516 spl_taskq_fini();
d28db80f 517 spl_rw_fini();
2fb9b26a 518 spl_mutex_fini();
519 spl_kmem_fini();
57d1b188 520 debug_fini();
70eadc19 521}
522
51a727e9
BB
523/* Called when a dependent module is loaded */
524void
525spl_setup(void)
526{
82a358d9
BB
527 int rc;
528
51a727e9
BB
529 /*
530 * At module load time the pwd is set to '/' on a Solaris system.
531 * On a Linux system will be set to whatever directory the caller
532 * was in when executing insmod/modprobe.
533 */
82a358d9
BB
534 rc = vn_set_pwd("/");
535 if (rc)
536 printk("SPL: Warning unable to set pwd to '/': %d\n", rc);
51a727e9
BB
537}
538EXPORT_SYMBOL(spl_setup);
539
540/* Called when a dependent module is unloaded */
541void
542spl_cleanup(void)
543{
544}
545EXPORT_SYMBOL(spl_cleanup);
546
70eadc19 547module_init(spl_init);
548module_exit(spl_fini);
549
550MODULE_AUTHOR("Lawrence Livermore National Labs");
551MODULE_DESCRIPTION("Solaris Porting Layer");
552MODULE_LICENSE("GPL");