]>
Commit | Line | Data |
---|---|---|
4b393c50 | 1 | /* |
716154c5 BB |
2 | * Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC. |
3 | * Copyright (C) 2007 The Regents of the University of California. | |
4 | * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). | |
5 | * Written by Brian Behlendorf <behlendorf1@llnl.gov>. | |
715f6251 | 6 | * UCRL-CODE-235197 |
7 | * | |
716154c5 | 8 | * This file is part of the SPL, Solaris Porting Layer. |
3d6af2dd | 9 | * For details, see <http://zfsonlinux.org/>. |
716154c5 BB |
10 | * |
11 | * The SPL is free software; you can redistribute it and/or modify it | |
12 | * under the terms of the GNU General Public License as published by the | |
13 | * Free Software Foundation; either version 2 of the License, or (at your | |
14 | * option) any later version. | |
715f6251 | 15 | * |
716154c5 | 16 | * The SPL is distributed in the hope that it will be useful, but WITHOUT |
715f6251 | 17 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
18 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
19 | * for more details. | |
20 | * | |
21 | * You should have received a copy of the GNU General Public License along | |
716154c5 | 22 | * with the SPL. If not, see <http://www.gnu.org/licenses/>. |
5461eefe | 23 | * |
716154c5 | 24 | * Solaris Porting Layer (SPL) Generic Implementation. |
4b393c50 | 25 | */ |
715f6251 | 26 | |
14c5326c | 27 | #include <sys/sysmacros.h> |
99639e4a | 28 | #include <sys/systeminfo.h> |
af828292 | 29 | #include <sys/vmsystm.h> |
0d54dcb5 | 30 | #include <sys/kobj.h> |
c19c06f3 | 31 | #include <sys/kmem.h> |
e5b9b344 BB |
32 | #include <sys/kmem_cache.h> |
33 | #include <sys/vmem.h> | |
9ab1ac14 | 34 | #include <sys/mutex.h> |
d28db80f | 35 | #include <sys/rwlock.h> |
e9cb2b4f | 36 | #include <sys/taskq.h> |
9fe45dc1 | 37 | #include <sys/tsd.h> |
5c1967eb | 38 | #include <sys/zmod.h> |
8d0f1ee9 | 39 | #include <sys/debug.h> |
57d1b188 | 40 | #include <sys/proc.h> |
04a479f7 | 41 | #include <sys/kstat.h> |
d3126abe | 42 | #include <sys/file.h> |
e5b9b344 | 43 | #include <linux/ctype.h> |
0b43696e RY |
44 | #include <sys/disp.h> |
45 | #include <sys/random.h> | |
f23e92fa | 46 | #include <linux/kmod.h> |
52479ecf | 47 | #include <linux/math64_compat.h> |
ae4c36ad | 48 | #include <linux/proc_compat.h> |
f23e92fa | 49 | |
0835057e | 50 | char spl_version[32] = "SPL v" SPL_META_VERSION "-" SPL_META_RELEASE; |
1a73940d | 51 | EXPORT_SYMBOL(spl_version); |
3561541c | 52 | |
3673d032 | 53 | /* BEGIN CSTYLED */ |
acf0ade3 | 54 | unsigned long spl_hostid = 0; |
f23e92fa | 55 | EXPORT_SYMBOL(spl_hostid); |
fa6f7d8f DH |
56 | module_param(spl_hostid, ulong, 0644); |
57 | MODULE_PARM_DESC(spl_hostid, "The system hostid."); | |
3673d032 | 58 | /* END CSTYLED */ |
8d0f1ee9 | 59 | |
341dfdb3 | 60 | proc_t p0; |
f1b59d26 | 61 | EXPORT_SYMBOL(p0); |
70eadc19 | 62 | |
0b43696e RY |
63 | /* |
64 | * Xorshift Pseudo Random Number Generator based on work by Sebastiano Vigna | |
65 | * | |
66 | * "Further scramblings of Marsaglia's xorshift generators" | |
67 | * http://vigna.di.unimi.it/ftp/papers/xorshiftplus.pdf | |
68 | * | |
69 | * random_get_pseudo_bytes() is an API function on Illumos whose sole purpose | |
70 | * is to provide bytes containing random numbers. It is mapped to /dev/urandom | |
71 | * on Illumos, which uses a "FIPS 186-2 algorithm". No user of the SPL's | |
72 | * random_get_pseudo_bytes() needs bytes that are of cryptographic quality, so | |
73 | * we can implement it using a fast PRNG that we seed using Linux' actual | |
74 | * equivalent to random_get_pseudo_bytes(). We do this by providing each CPU | |
75 | * with an independent seed so that all calls to random_get_pseudo_bytes() are | |
76 | * free of atomic instructions. | |
77 | * | |
78 | * A consequence of using a fast PRNG is that using random_get_pseudo_bytes() | |
79 | * to generate words larger than 128 bits will paradoxically be limited to | |
80 | * `2^128 - 1` possibilities. This is because we have a sequence of `2^128 - 1` | |
81 | * 128-bit words and selecting the first will implicitly select the second. If | |
82 | * a caller finds this behavior undesireable, random_get_bytes() should be used | |
83 | * instead. | |
84 | * | |
85 | * XXX: Linux interrupt handlers that trigger within the critical section | |
86 | * formed by `s[1] = xp[1];` and `xp[0] = s[0];` and call this function will | |
87 | * see the same numbers. Nothing in the code currently calls this in an | |
88 | * interrupt handler, so this is considered to be okay. If that becomes a | |
89 | * problem, we could create a set of per-cpu variables for interrupt handlers | |
90 | * and use them when in_interrupt() from linux/preempt_mask.h evaluates to | |
91 | * true. | |
92 | */ | |
93 | static DEFINE_PER_CPU(uint64_t[2], spl_pseudo_entropy); | |
94 | ||
95 | /* | |
96 | * spl_rand_next()/spl_rand_jump() are copied from the following CC-0 licensed | |
97 | * file: | |
98 | * | |
99 | * http://xorshift.di.unimi.it/xorshift128plus.c | |
100 | */ | |
101 | ||
102 | static inline uint64_t | |
3673d032 BB |
103 | spl_rand_next(uint64_t *s) |
104 | { | |
0b43696e RY |
105 | uint64_t s1 = s[0]; |
106 | const uint64_t s0 = s[1]; | |
107 | s[0] = s0; | |
108 | s1 ^= s1 << 23; // a | |
109 | s[1] = s1 ^ s0 ^ (s1 >> 18) ^ (s0 >> 5); // b, c | |
110 | return (s[1] + s0); | |
111 | } | |
112 | ||
113 | static inline void | |
3673d032 BB |
114 | spl_rand_jump(uint64_t *s) |
115 | { | |
5461eefe BB |
116 | static const uint64_t JUMP[] = |
117 | { 0x8a5cd789635d2dff, 0x121fd2155c472f96 }; | |
0b43696e RY |
118 | |
119 | uint64_t s0 = 0; | |
120 | uint64_t s1 = 0; | |
121 | int i, b; | |
5461eefe BB |
122 | for (i = 0; i < sizeof (JUMP) / sizeof (*JUMP); i++) |
123 | for (b = 0; b < 64; b++) { | |
0b43696e RY |
124 | if (JUMP[i] & 1ULL << b) { |
125 | s0 ^= s[0]; | |
126 | s1 ^= s[1]; | |
127 | } | |
128 | (void) spl_rand_next(s); | |
129 | } | |
130 | ||
131 | s[0] = s0; | |
132 | s[1] = s1; | |
133 | } | |
134 | ||
135 | int | |
136 | random_get_pseudo_bytes(uint8_t *ptr, size_t len) | |
137 | { | |
138 | uint64_t *xp, s[2]; | |
139 | ||
140 | ASSERT(ptr); | |
141 | ||
142 | xp = get_cpu_var(spl_pseudo_entropy); | |
143 | ||
144 | s[0] = xp[0]; | |
145 | s[1] = xp[1]; | |
146 | ||
147 | while (len) { | |
148 | union { | |
149 | uint64_t ui64; | |
150 | uint8_t byte[sizeof (uint64_t)]; | |
151 | }entropy; | |
152 | int i = MIN(len, sizeof (uint64_t)); | |
153 | ||
154 | len -= i; | |
155 | entropy.ui64 = spl_rand_next(s); | |
156 | ||
157 | while (i--) | |
158 | *ptr++ = entropy.byte[i]; | |
159 | } | |
160 | ||
161 | xp[0] = s[0]; | |
162 | xp[1] = s[1]; | |
163 | ||
164 | put_cpu_var(spl_pseudo_entropy); | |
165 | ||
166 | return (0); | |
167 | } | |
168 | ||
169 | ||
170 | EXPORT_SYMBOL(random_get_pseudo_bytes); | |
171 | ||
a4bfd8ea | 172 | #if BITS_PER_LONG == 32 |
b61a6e8b | 173 | /* |
a4bfd8ea BB |
174 | * Support 64/64 => 64 division on a 32-bit platform. While the kernel |
175 | * provides a div64_u64() function for this we do not use it because the | |
176 | * implementation is flawed. There are cases which return incorrect | |
177 | * results as late as linux-2.6.35. Until this is fixed upstream the | |
178 | * spl must provide its own implementation. | |
179 | * | |
180 | * This implementation is a slightly modified version of the algorithm | |
181 | * proposed by the book 'Hacker's Delight'. The original source can be | |
182 | * found here and is available for use without restriction. | |
183 | * | |
184 | * http://www.hackersdelight.org/HDcode/newCode/divDouble.c | |
185 | */ | |
186 | ||
187 | /* | |
188 | * Calculate number of leading of zeros for a 64-bit value. | |
189 | */ | |
190 | static int | |
3673d032 BB |
191 | nlz64(uint64_t x) |
192 | { | |
a4bfd8ea BB |
193 | register int n = 0; |
194 | ||
195 | if (x == 0) | |
5461eefe | 196 | return (64); |
a4bfd8ea | 197 | |
5461eefe BB |
198 | if (x <= 0x00000000FFFFFFFFULL) { n = n + 32; x = x << 32; } |
199 | if (x <= 0x0000FFFFFFFFFFFFULL) { n = n + 16; x = x << 16; } | |
200 | if (x <= 0x00FFFFFFFFFFFFFFULL) { n = n + 8; x = x << 8; } | |
201 | if (x <= 0x0FFFFFFFFFFFFFFFULL) { n = n + 4; x = x << 4; } | |
202 | if (x <= 0x3FFFFFFFFFFFFFFFULL) { n = n + 2; x = x << 2; } | |
203 | if (x <= 0x7FFFFFFFFFFFFFFFULL) { n = n + 1; } | |
a4bfd8ea | 204 | |
5461eefe | 205 | return (n); |
a4bfd8ea BB |
206 | } |
207 | ||
208 | /* | |
209 | * Newer kernels have a div_u64() function but we define our own | |
210 | * to simplify portibility between kernel versions. | |
211 | */ | |
212 | static inline uint64_t | |
213 | __div_u64(uint64_t u, uint32_t v) | |
214 | { | |
215 | (void) do_div(u, v); | |
5461eefe | 216 | return (u); |
a4bfd8ea BB |
217 | } |
218 | ||
219 | /* | |
220 | * Implementation of 64-bit unsigned division for 32-bit machines. | |
221 | * | |
222 | * First the procedure takes care of the case in which the divisor is a | |
223 | * 32-bit quantity. There are two subcases: (1) If the left half of the | |
224 | * dividend is less than the divisor, one execution of do_div() is all that | |
225 | * is required (overflow is not possible). (2) Otherwise it does two | |
226 | * divisions, using the grade school method. | |
b61a6e8b | 227 | */ |
1b4ad25e | 228 | uint64_t |
a4bfd8ea | 229 | __udivdi3(uint64_t u, uint64_t v) |
b61a6e8b | 230 | { |
a4bfd8ea BB |
231 | uint64_t u0, u1, v1, q0, q1, k; |
232 | int n; | |
233 | ||
234 | if (v >> 32 == 0) { // If v < 2**32: | |
235 | if (u >> 32 < v) { // If u/v cannot overflow, | |
5461eefe | 236 | return (__div_u64(u, v)); // just do one division. |
a4bfd8ea BB |
237 | } else { // If u/v would overflow: |
238 | u1 = u >> 32; // Break u into two halves. | |
239 | u0 = u & 0xFFFFFFFF; | |
240 | q1 = __div_u64(u1, v); // First quotient digit. | |
241 | k = u1 - q1 * v; // First remainder, < v. | |
242 | u0 += (k << 32); | |
243 | q0 = __div_u64(u0, v); // Seconds quotient digit. | |
5461eefe | 244 | return ((q1 << 32) + q0); |
a4bfd8ea BB |
245 | } |
246 | } else { // If v >= 2**32: | |
247 | n = nlz64(v); // 0 <= n <= 31. | |
248 | v1 = (v << n) >> 32; // Normalize divisor, MSB is 1. | |
249 | u1 = u >> 1; // To ensure no overflow. | |
250 | q1 = __div_u64(u1, v1); // Get quotient from | |
251 | q0 = (q1 << n) >> 31; // Undo normalization and | |
252 | // division of u by 2. | |
253 | if (q0 != 0) // Make q0 correct or | |
254 | q0 = q0 - 1; // too small by 1. | |
255 | if ((u - q0 * v) >= v) | |
256 | q0 = q0 + 1; // Now q0 is correct. | |
ef6f91ce | 257 | |
5461eefe | 258 | return (q0); |
a4bfd8ea | 259 | } |
550f1705 | 260 | } |
261 | EXPORT_SYMBOL(__udivdi3); | |
262 | ||
263 | /* | |
a4bfd8ea BB |
264 | * Implementation of 64-bit signed division for 32-bit machines. |
265 | */ | |
266 | int64_t | |
267 | __divdi3(int64_t u, int64_t v) | |
268 | { | |
269 | int64_t q, t; | |
270 | q = __udivdi3(abs64(u), abs64(v)); | |
271 | t = (u ^ v) >> 63; // If u, v have different | |
5461eefe | 272 | return ((q ^ t) - t); // signs, negate q. |
a4bfd8ea BB |
273 | } |
274 | EXPORT_SYMBOL(__divdi3); | |
275 | ||
276 | /* | |
277 | * Implementation of 64-bit unsigned modulo for 32-bit machines. | |
550f1705 | 278 | */ |
1b4ad25e AZ |
279 | uint64_t |
280 | __umoddi3(uint64_t dividend, uint64_t divisor) | |
550f1705 | 281 | { |
1b4ad25e | 282 | return (dividend - (divisor * __udivdi3(dividend, divisor))); |
b61a6e8b | 283 | } |
550f1705 | 284 | EXPORT_SYMBOL(__umoddi3); |
a4bfd8ea | 285 | |
6ecfd2b5 CC |
286 | /* |
287 | * Implementation of 64-bit unsigned division/modulo for 32-bit machines. | |
288 | */ | |
289 | uint64_t | |
290 | __udivmoddi4(uint64_t n, uint64_t d, uint64_t *r) | |
291 | { | |
292 | uint64_t q = __udivdi3(n, d); | |
293 | if (r) | |
294 | *r = n - d * q; | |
295 | return (q); | |
296 | } | |
297 | EXPORT_SYMBOL(__udivmoddi4); | |
298 | ||
299 | /* | |
300 | * Implementation of 64-bit signed division/modulo for 32-bit machines. | |
301 | */ | |
302 | int64_t | |
303 | __divmoddi4(int64_t n, int64_t d, int64_t *r) | |
304 | { | |
305 | int64_t q, rr; | |
306 | boolean_t nn = B_FALSE; | |
307 | boolean_t nd = B_FALSE; | |
308 | if (n < 0) { | |
309 | nn = B_TRUE; | |
310 | n = -n; | |
311 | } | |
312 | if (d < 0) { | |
313 | nd = B_TRUE; | |
314 | d = -d; | |
315 | } | |
316 | ||
317 | q = __udivmoddi4(n, d, (uint64_t *)&rr); | |
318 | ||
319 | if (nn != nd) | |
320 | q = -q; | |
321 | if (nn) | |
322 | rr = -rr; | |
323 | if (r) | |
324 | *r = rr; | |
325 | return (q); | |
326 | } | |
327 | EXPORT_SYMBOL(__divmoddi4); | |
328 | ||
ef6f91ce JL |
329 | #if defined(__arm) || defined(__arm__) |
330 | /* | |
93b0dc92 JL |
331 | * Implementation of 64-bit (un)signed division for 32-bit arm machines. |
332 | * | |
333 | * Run-time ABI for the ARM Architecture (page 20). A pair of (unsigned) | |
334 | * long longs is returned in {{r0, r1}, {r2,r3}}, the quotient in {r0, r1}, | |
335 | * and the remainder in {r2, r3}. The return type is specifically left | |
336 | * set to 'void' to ensure the compiler does not overwrite these registers | |
337 | * during the return. All results are in registers as per ABI | |
ef6f91ce | 338 | */ |
93b0dc92 | 339 | void |
ef6f91ce JL |
340 | __aeabi_uldivmod(uint64_t u, uint64_t v) |
341 | { | |
93b0dc92 JL |
342 | uint64_t res; |
343 | uint64_t mod; | |
344 | ||
345 | res = __udivdi3(u, v); | |
346 | mod = __umoddi3(u, v); | |
347 | { | |
348 | register uint32_t r0 asm("r0") = (res & 0xFFFFFFFF); | |
349 | register uint32_t r1 asm("r1") = (res >> 32); | |
350 | register uint32_t r2 asm("r2") = (mod & 0xFFFFFFFF); | |
351 | register uint32_t r3 asm("r3") = (mod >> 32); | |
352 | ||
5461eefe | 353 | /* BEGIN CSTYLED */ |
93b0dc92 JL |
354 | asm volatile("" |
355 | : "+r"(r0), "+r"(r1), "+r"(r2),"+r"(r3) /* output */ | |
356 | : "r"(r0), "r"(r1), "r"(r2), "r"(r3)); /* input */ | |
5461eefe | 357 | /* END CSTYLED */ |
93b0dc92 JL |
358 | |
359 | return; /* r0; */ | |
360 | } | |
ef6f91ce JL |
361 | } |
362 | EXPORT_SYMBOL(__aeabi_uldivmod); | |
363 | ||
93b0dc92 | 364 | void |
ef6f91ce JL |
365 | __aeabi_ldivmod(int64_t u, int64_t v) |
366 | { | |
93b0dc92 JL |
367 | int64_t res; |
368 | uint64_t mod; | |
369 | ||
370 | res = __divdi3(u, v); | |
371 | mod = __umoddi3(u, v); | |
372 | { | |
373 | register uint32_t r0 asm("r0") = (res & 0xFFFFFFFF); | |
374 | register uint32_t r1 asm("r1") = (res >> 32); | |
375 | register uint32_t r2 asm("r2") = (mod & 0xFFFFFFFF); | |
376 | register uint32_t r3 asm("r3") = (mod >> 32); | |
377 | ||
5461eefe | 378 | /* BEGIN CSTYLED */ |
93b0dc92 JL |
379 | asm volatile("" |
380 | : "+r"(r0), "+r"(r1), "+r"(r2),"+r"(r3) /* output */ | |
381 | : "r"(r0), "r"(r1), "r"(r2), "r"(r3)); /* input */ | |
5461eefe | 382 | /* END CSTYLED */ |
93b0dc92 JL |
383 | |
384 | return; /* r0; */ | |
385 | } | |
ef6f91ce JL |
386 | } |
387 | EXPORT_SYMBOL(__aeabi_ldivmod); | |
388 | #endif /* __arm || __arm__ */ | |
96dded38 | 389 | #endif /* BITS_PER_LONG */ |
b61a6e8b | 390 | |
5461eefe BB |
391 | /* |
392 | * NOTE: The strtoxx behavior is solely based on my reading of the Solaris | |
b871b8cd BB |
393 | * ddi_strtol(9F) man page. I have not verified the behavior of these |
394 | * functions against their Solaris counterparts. It is possible that I | |
96dded38 | 395 | * may have misinterpreted the man page or the man page is incorrect. |
b871b8cd | 396 | */ |
2ee63a54 BB |
397 | int ddi_strtoul(const char *, char **, int, unsigned long *); |
398 | int ddi_strtol(const char *, char **, int, long *); | |
399 | int ddi_strtoull(const char *, char **, int, unsigned long long *); | |
400 | int ddi_strtoll(const char *, char **, int, long long *); | |
401 | ||
5461eefe | 402 | #define define_ddi_strtoux(type, valtype) \ |
2ee63a54 | 403 | int ddi_strtou##type(const char *str, char **endptr, \ |
5461eefe | 404 | int base, valtype *result) \ |
2ee63a54 | 405 | { \ |
b871b8cd BB |
406 | valtype last_value, value = 0; \ |
407 | char *ptr = (char *)str; \ | |
408 | int flag = 1, digit; \ | |
409 | \ | |
410 | if (strlen(ptr) == 0) \ | |
5461eefe | 411 | return (EINVAL); \ |
b871b8cd BB |
412 | \ |
413 | /* Auto-detect base based on prefix */ \ | |
414 | if (!base) { \ | |
415 | if (str[0] == '0') { \ | |
5461eefe | 416 | if (tolower(str[1]) == 'x' && isxdigit(str[2])) { \ |
b871b8cd BB |
417 | base = 16; /* hex */ \ |
418 | ptr += 2; \ | |
419 | } else if (str[1] >= '0' && str[1] < 8) { \ | |
420 | base = 8; /* octal */ \ | |
421 | ptr += 1; \ | |
422 | } else { \ | |
5461eefe | 423 | return (EINVAL); \ |
b871b8cd BB |
424 | } \ |
425 | } else { \ | |
426 | base = 10; /* decimal */ \ | |
427 | } \ | |
428 | } \ | |
429 | \ | |
430 | while (1) { \ | |
431 | if (isdigit(*ptr)) \ | |
432 | digit = *ptr - '0'; \ | |
433 | else if (isalpha(*ptr)) \ | |
434 | digit = tolower(*ptr) - 'a' + 10; \ | |
435 | else \ | |
436 | break; \ | |
437 | \ | |
438 | if (digit >= base) \ | |
439 | break; \ | |
2ee63a54 | 440 | \ |
b871b8cd BB |
441 | last_value = value; \ |
442 | value = value * base + digit; \ | |
443 | if (last_value > value) /* Overflow */ \ | |
5461eefe | 444 | return (ERANGE); \ |
2ee63a54 | 445 | \ |
b871b8cd BB |
446 | flag = 1; \ |
447 | ptr++; \ | |
2ee63a54 BB |
448 | } \ |
449 | \ | |
b871b8cd BB |
450 | if (flag) \ |
451 | *result = value; \ | |
452 | \ | |
453 | if (endptr) \ | |
454 | *endptr = (char *)(flag ? ptr : str); \ | |
455 | \ | |
5461eefe | 456 | return (0); \ |
2ee63a54 BB |
457 | } \ |
458 | ||
5461eefe | 459 | #define define_ddi_strtox(type, valtype) \ |
2ee63a54 | 460 | int ddi_strto##type(const char *str, char **endptr, \ |
5461eefe | 461 | int base, valtype *result) \ |
b871b8cd BB |
462 | { \ |
463 | int rc; \ | |
2ee63a54 BB |
464 | \ |
465 | if (*str == '-') { \ | |
b871b8cd BB |
466 | rc = ddi_strtou##type(str + 1, endptr, base, result); \ |
467 | if (!rc) { \ | |
468 | if (*endptr == str + 1) \ | |
469 | *endptr = (char *)str; \ | |
470 | else \ | |
471 | *result = -*result; \ | |
472 | } \ | |
2ee63a54 | 473 | } else { \ |
b871b8cd | 474 | rc = ddi_strtou##type(str, endptr, base, result); \ |
2ee63a54 BB |
475 | } \ |
476 | \ | |
5461eefe | 477 | return (rc); \ |
b871b8cd | 478 | } |
2ee63a54 BB |
479 | |
480 | define_ddi_strtoux(l, unsigned long) | |
481 | define_ddi_strtox(l, long) | |
482 | define_ddi_strtoux(ll, unsigned long long) | |
483 | define_ddi_strtox(ll, long long) | |
484 | ||
2f5d55aa | 485 | EXPORT_SYMBOL(ddi_strtoul); |
2ee63a54 BB |
486 | EXPORT_SYMBOL(ddi_strtol); |
487 | EXPORT_SYMBOL(ddi_strtoll); | |
488 | EXPORT_SYMBOL(ddi_strtoull); | |
2f5d55aa | 489 | |
d3126abe BB |
490 | int |
491 | ddi_copyin(const void *from, void *to, size_t len, int flags) | |
492 | { | |
493 | /* Fake ioctl() issued by kernel, 'from' is a kernel address */ | |
494 | if (flags & FKIOCTL) { | |
495 | memcpy(to, from, len); | |
5461eefe | 496 | return (0); |
d3126abe BB |
497 | } |
498 | ||
5461eefe | 499 | return (copyin(from, to, len)); |
d3126abe BB |
500 | } |
501 | EXPORT_SYMBOL(ddi_copyin); | |
502 | ||
503 | int | |
504 | ddi_copyout(const void *from, void *to, size_t len, int flags) | |
505 | { | |
506 | /* Fake ioctl() issued by kernel, 'from' is a kernel address */ | |
507 | if (flags & FKIOCTL) { | |
508 | memcpy(to, from, len); | |
5461eefe | 509 | return (0); |
d3126abe BB |
510 | } |
511 | ||
5461eefe | 512 | return (copyout(from, to, len)); |
d3126abe BB |
513 | } |
514 | EXPORT_SYMBOL(ddi_copyout); | |
515 | ||
0d54dcb5 DH |
516 | /* |
517 | * Read the unique system identifier from the /etc/hostid file. | |
518 | * | |
519 | * The behavior of /usr/bin/hostid on Linux systems with the | |
520 | * regular eglibc and coreutils is: | |
521 | * | |
522 | * 1. Generate the value if the /etc/hostid file does not exist | |
523 | * or if the /etc/hostid file is less than four bytes in size. | |
524 | * | |
525 | * 2. If the /etc/hostid file is at least 4 bytes, then return | |
526 | * the first four bytes [0..3] in native endian order. | |
527 | * | |
528 | * 3. Always ignore bytes [4..] if they exist in the file. | |
529 | * | |
530 | * Only the first four bytes are significant, even on systems that | |
531 | * have a 64-bit word size. | |
532 | * | |
533 | * See: | |
534 | * | |
535 | * eglibc: sysdeps/unix/sysv/linux/gethostid.c | |
536 | * coreutils: src/hostid.c | |
537 | * | |
538 | * Notes: | |
539 | * | |
540 | * The /etc/hostid file on Solaris is a text file that often reads: | |
541 | * | |
542 | * # DO NOT EDIT | |
543 | * "0123456789" | |
544 | * | |
545 | * Directly copying this file to Linux results in a constant | |
546 | * hostid of 4f442023 because the default comment constitutes | |
547 | * the first four bytes of the file. | |
548 | * | |
549 | */ | |
550 | ||
551 | char *spl_hostid_path = HW_HOSTID_PATH; | |
552 | module_param(spl_hostid_path, charp, 0444); | |
553 | MODULE_PARM_DESC(spl_hostid_path, "The system hostid file (/etc/hostid)"); | |
554 | ||
555 | static int | |
c93d9dff | 556 | hostid_read(uint32_t *hostid) |
0d54dcb5 | 557 | { |
0d54dcb5 DH |
558 | uint64_t size; |
559 | struct _buf *file; | |
c93d9dff BB |
560 | uint32_t value = 0; |
561 | int error; | |
0d54dcb5 DH |
562 | |
563 | file = kobj_open_file(spl_hostid_path); | |
6b3b569d | 564 | if (file == (struct _buf *)-1) |
c93d9dff | 565 | return (ENOENT); |
0d54dcb5 | 566 | |
c93d9dff BB |
567 | error = kobj_get_filesize(file, &size); |
568 | if (error) { | |
0d54dcb5 | 569 | kobj_close_file(file); |
c93d9dff | 570 | return (error); |
0d54dcb5 DH |
571 | } |
572 | ||
5461eefe | 573 | if (size < sizeof (HW_HOSTID_MASK)) { |
0d54dcb5 | 574 | kobj_close_file(file); |
c93d9dff | 575 | return (EINVAL); |
0d54dcb5 DH |
576 | } |
577 | ||
c93d9dff BB |
578 | /* |
579 | * Read directly into the variable like eglibc does. | |
580 | * Short reads are okay; native behavior is preserved. | |
581 | */ | |
5461eefe | 582 | error = kobj_read_file(file, (char *)&value, sizeof (value), 0); |
c93d9dff | 583 | if (error < 0) { |
0d54dcb5 | 584 | kobj_close_file(file); |
c93d9dff | 585 | return (EIO); |
0d54dcb5 DH |
586 | } |
587 | ||
588 | /* Mask down to 32 bits like coreutils does. */ | |
c93d9dff | 589 | *hostid = (value & HW_HOSTID_MASK); |
0d54dcb5 | 590 | kobj_close_file(file); |
c93d9dff | 591 | |
5461eefe | 592 | return (0); |
0d54dcb5 DH |
593 | } |
594 | ||
c93d9dff BB |
595 | /* |
596 | * Return the system hostid. Preferentially use the spl_hostid module option | |
597 | * when set, otherwise use the value in the /etc/hostid file. | |
598 | */ | |
99639e4a BB |
599 | uint32_t |
600 | zone_get_hostid(void *zone) | |
601 | { | |
c93d9dff | 602 | uint32_t hostid; |
99639e4a | 603 | |
c93d9dff | 604 | ASSERT3P(zone, ==, NULL); |
a9f2397e | 605 | |
c93d9dff BB |
606 | if (spl_hostid != 0) |
607 | return ((uint32_t)(spl_hostid & HW_HOSTID_MASK)); | |
a9f2397e | 608 | |
c93d9dff BB |
609 | if (hostid_read(&hostid) == 0) |
610 | return (hostid); | |
086476f9 | 611 | |
c93d9dff | 612 | return (0); |
99639e4a BB |
613 | } |
614 | EXPORT_SYMBOL(zone_get_hostid); | |
615 | ||
e5b9b344 BB |
616 | static int |
617 | spl_kvmem_init(void) | |
618 | { | |
619 | int rc = 0; | |
620 | ||
621 | rc = spl_kmem_init(); | |
622 | if (rc) | |
16522ac2 | 623 | return (rc); |
e5b9b344 BB |
624 | |
625 | rc = spl_vmem_init(); | |
16522ac2 CC |
626 | if (rc) { |
627 | spl_kmem_fini(); | |
628 | return (rc); | |
629 | } | |
e5b9b344 | 630 | |
e5b9b344 BB |
631 | return (rc); |
632 | } | |
633 | ||
0b43696e RY |
634 | /* |
635 | * We initialize the random number generator with 128 bits of entropy from the | |
636 | * system random number generator. In the improbable case that we have a zero | |
637 | * seed, we fallback to the system jiffies, unless it is also zero, in which | |
638 | * situation we use a preprogrammed seed. We step forward by 2^64 iterations to | |
639 | * initialize each of the per-cpu seeds so that the sequences generated on each | |
640 | * CPU are guaranteed to never overlap in practice. | |
641 | */ | |
642 | static void __init | |
643 | spl_random_init(void) | |
644 | { | |
645 | uint64_t s[2]; | |
646 | int i; | |
647 | ||
648 | get_random_bytes(s, sizeof (s)); | |
649 | ||
650 | if (s[0] == 0 && s[1] == 0) { | |
651 | if (jiffies != 0) { | |
652 | s[0] = jiffies; | |
653 | s[1] = ~0 - jiffies; | |
654 | } else { | |
655 | (void) memcpy(s, "improbable seed", sizeof (s)); | |
656 | } | |
657 | printk("SPL: get_random_bytes() returned 0 " | |
658 | "when generating random seed. Setting initial seed to " | |
659 | "0x%016llx%016llx.", cpu_to_be64(s[0]), cpu_to_be64(s[1])); | |
660 | } | |
661 | ||
0d267566 | 662 | for_each_possible_cpu(i) { |
0b43696e RY |
663 | uint64_t *wordp = per_cpu(spl_pseudo_entropy, i); |
664 | ||
665 | spl_rand_jump(s); | |
666 | ||
667 | wordp[0] = s[0]; | |
668 | wordp[1] = s[1]; | |
669 | } | |
670 | } | |
671 | ||
e5b9b344 BB |
672 | static void |
673 | spl_kvmem_fini(void) | |
674 | { | |
e5b9b344 BB |
675 | spl_vmem_fini(); |
676 | spl_kmem_fini(); | |
677 | } | |
678 | ||
c1bc8e61 BB |
679 | static int __init |
680 | spl_init(void) | |
57d1b188 | 681 | { |
682 | int rc = 0; | |
f23e92fa | 683 | |
341dfdb3 | 684 | bzero(&p0, sizeof (proc_t)); |
0b43696e RY |
685 | spl_random_init(); |
686 | ||
e5b9b344 | 687 | if ((rc = spl_kvmem_init())) |
8d9a23e8 | 688 | goto out1; |
8d0f1ee9 | 689 | |
9ab1ac14 | 690 | if ((rc = spl_mutex_init())) |
8d9a23e8 | 691 | goto out2; |
9ab1ac14 | 692 | |
d28db80f | 693 | if ((rc = spl_rw_init())) |
8d9a23e8 | 694 | goto out3; |
8d0f1ee9 | 695 | |
16522ac2 | 696 | if ((rc = spl_tsd_init())) |
8d9a23e8 | 697 | goto out4; |
af828292 | 698 | |
16522ac2 | 699 | if ((rc = spl_taskq_init())) |
8d9a23e8 | 700 | goto out5; |
04a479f7 | 701 | |
16522ac2 | 702 | if ((rc = spl_kmem_cache_init())) |
8d9a23e8 | 703 | goto out6; |
e9cb2b4f | 704 | |
16522ac2 | 705 | if ((rc = spl_vn_init())) |
8d9a23e8 | 706 | goto out7; |
d28db80f | 707 | |
16522ac2 | 708 | if ((rc = spl_proc_init())) |
8d9a23e8 | 709 | goto out8; |
9fe45dc1 | 710 | |
16522ac2 | 711 | if ((rc = spl_kstat_init())) |
8d9a23e8 | 712 | goto out9; |
5c1967eb | 713 | |
16522ac2 CC |
714 | if ((rc = spl_zlib_init())) |
715 | goto out10; | |
716 | ||
a9f2397e | 717 | printk(KERN_NOTICE "SPL: Loaded module v%s-%s%s\n", SPL_META_VERSION, |
5461eefe | 718 | SPL_META_RELEASE, SPL_DEBUG_STR); |
8d9a23e8 | 719 | return (rc); |
44778f41 | 720 | |
16522ac2 CC |
721 | out10: |
722 | spl_kstat_fini(); | |
9fe45dc1 | 723 | out9: |
16522ac2 | 724 | spl_proc_fini(); |
d28db80f | 725 | out8: |
16522ac2 | 726 | spl_vn_fini(); |
d28db80f | 727 | out7: |
16522ac2 | 728 | spl_kmem_cache_fini(); |
d28db80f | 729 | out6: |
e9cb2b4f | 730 | spl_taskq_fini(); |
16522ac2 CC |
731 | out5: |
732 | spl_tsd_fini(); | |
d28db80f BB |
733 | out4: |
734 | spl_rw_fini(); | |
9ab1ac14 | 735 | out3: |
736 | spl_mutex_fini(); | |
8d0f1ee9 | 737 | out2: |
e5b9b344 | 738 | spl_kvmem_fini(); |
d28db80f | 739 | out1: |
0835057e | 740 | printk(KERN_NOTICE "SPL: Failed to Load Solaris Porting Layer " |
5461eefe BB |
741 | "v%s-%s%s, rc = %d\n", SPL_META_VERSION, SPL_META_RELEASE, |
742 | SPL_DEBUG_STR, rc); | |
8d9a23e8 | 743 | |
c1bc8e61 | 744 | return (rc); |
70eadc19 | 745 | } |
746 | ||
c1bc8e61 | 747 | static void __exit |
51a727e9 | 748 | spl_fini(void) |
70eadc19 | 749 | { |
0835057e | 750 | printk(KERN_NOTICE "SPL: Unloaded module v%s-%s%s\n", |
5461eefe | 751 | SPL_META_VERSION, SPL_META_RELEASE, SPL_DEBUG_STR); |
1114ae6a | 752 | spl_zlib_fini(); |
1114ae6a BB |
753 | spl_kstat_fini(); |
754 | spl_proc_fini(); | |
12ff95ff | 755 | spl_vn_fini(); |
16522ac2 | 756 | spl_kmem_cache_fini(); |
e9cb2b4f | 757 | spl_taskq_fini(); |
16522ac2 | 758 | spl_tsd_fini(); |
d28db80f | 759 | spl_rw_fini(); |
2fb9b26a | 760 | spl_mutex_fini(); |
e5b9b344 | 761 | spl_kvmem_fini(); |
70eadc19 | 762 | } |
763 | ||
764 | module_init(spl_init); | |
765 | module_exit(spl_fini); | |
766 | ||
70eadc19 | 767 | MODULE_DESCRIPTION("Solaris Porting Layer"); |
62032954 BB |
768 | MODULE_AUTHOR(SPL_META_AUTHOR); |
769 | MODULE_LICENSE(SPL_META_LICENSE); | |
921a35ad | 770 | MODULE_VERSION(SPL_META_VERSION "-" SPL_META_RELEASE); |