]>
Commit | Line | Data |
---|---|---|
4b393c50 | 1 | /* |
716154c5 BB |
2 | * Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC. |
3 | * Copyright (C) 2007 The Regents of the University of California. | |
4 | * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). | |
5 | * Written by Brian Behlendorf <behlendorf1@llnl.gov>. | |
715f6251 BB |
6 | * UCRL-CODE-235197 |
7 | * | |
716154c5 | 8 | * This file is part of the SPL, Solaris Porting Layer. |
3d6af2dd | 9 | * For details, see <http://zfsonlinux.org/>. |
716154c5 BB |
10 | * |
11 | * The SPL is free software; you can redistribute it and/or modify it | |
12 | * under the terms of the GNU General Public License as published by the | |
13 | * Free Software Foundation; either version 2 of the License, or (at your | |
14 | * option) any later version. | |
715f6251 | 15 | * |
716154c5 | 16 | * The SPL is distributed in the hope that it will be useful, but WITHOUT |
715f6251 BB |
17 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
18 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
19 | * for more details. | |
20 | * | |
21 | * You should have received a copy of the GNU General Public License along | |
716154c5 | 22 | * with the SPL. If not, see <http://www.gnu.org/licenses/>. |
5461eefe | 23 | * |
716154c5 | 24 | * Solaris Porting Layer (SPL) Generic Implementation. |
4b393c50 | 25 | */ |
715f6251 | 26 | |
14c5326c | 27 | #include <sys/sysmacros.h> |
99639e4a | 28 | #include <sys/systeminfo.h> |
af828292 | 29 | #include <sys/vmsystm.h> |
0d54dcb5 | 30 | #include <sys/kobj.h> |
c19c06f3 | 31 | #include <sys/kmem.h> |
e5b9b344 BB |
32 | #include <sys/kmem_cache.h> |
33 | #include <sys/vmem.h> | |
9ab1ac14 | 34 | #include <sys/mutex.h> |
d28db80f | 35 | #include <sys/rwlock.h> |
e9cb2b4f | 36 | #include <sys/taskq.h> |
9fe45dc1 | 37 | #include <sys/tsd.h> |
5c1967eb | 38 | #include <sys/zmod.h> |
8d0f1ee9 | 39 | #include <sys/debug.h> |
57d1b188 | 40 | #include <sys/proc.h> |
04a479f7 | 41 | #include <sys/kstat.h> |
d3126abe | 42 | #include <sys/file.h> |
e5b9b344 | 43 | #include <linux/ctype.h> |
0b43696e RY |
44 | #include <sys/disp.h> |
45 | #include <sys/random.h> | |
a9125891 | 46 | #include <sys/strings.h> |
f23e92fa | 47 | #include <linux/kmod.h> |
4cbde2ec | 48 | #include "zfs_gitrev.h" |
f23e92fa | 49 | |
4cbde2ec | 50 | char spl_gitrev[64] = ZFS_META_GITREV; |
3561541c | 51 | |
3673d032 | 52 | /* BEGIN CSTYLED */ |
acf0ade3 | 53 | unsigned long spl_hostid = 0; |
f23e92fa | 54 | EXPORT_SYMBOL(spl_hostid); |
93ce2b4c | 55 | /* BEGIN CSTYLED */ |
fa6f7d8f DH |
56 | module_param(spl_hostid, ulong, 0644); |
57 | MODULE_PARM_DESC(spl_hostid, "The system hostid."); | |
3673d032 | 58 | /* END CSTYLED */ |
8d0f1ee9 | 59 | |
341dfdb3 | 60 | proc_t p0; |
f1b59d26 | 61 | EXPORT_SYMBOL(p0); |
70eadc19 | 62 | |
0b43696e RY |
63 | /* |
64 | * Xorshift Pseudo Random Number Generator based on work by Sebastiano Vigna | |
65 | * | |
66 | * "Further scramblings of Marsaglia's xorshift generators" | |
67 | * http://vigna.di.unimi.it/ftp/papers/xorshiftplus.pdf | |
68 | * | |
69 | * random_get_pseudo_bytes() is an API function on Illumos whose sole purpose | |
70 | * is to provide bytes containing random numbers. It is mapped to /dev/urandom | |
71 | * on Illumos, which uses a "FIPS 186-2 algorithm". No user of the SPL's | |
72 | * random_get_pseudo_bytes() needs bytes that are of cryptographic quality, so | |
73 | * we can implement it using a fast PRNG that we seed using Linux' actual | |
74 | * equivalent to random_get_pseudo_bytes(). We do this by providing each CPU | |
75 | * with an independent seed so that all calls to random_get_pseudo_bytes() are | |
76 | * free of atomic instructions. | |
77 | * | |
78 | * A consequence of using a fast PRNG is that using random_get_pseudo_bytes() | |
79 | * to generate words larger than 128 bits will paradoxically be limited to | |
80 | * `2^128 - 1` possibilities. This is because we have a sequence of `2^128 - 1` | |
81 | * 128-bit words and selecting the first will implicitly select the second. If | |
82 | * a caller finds this behavior undesireable, random_get_bytes() should be used | |
83 | * instead. | |
84 | * | |
85 | * XXX: Linux interrupt handlers that trigger within the critical section | |
86 | * formed by `s[1] = xp[1];` and `xp[0] = s[0];` and call this function will | |
87 | * see the same numbers. Nothing in the code currently calls this in an | |
88 | * interrupt handler, so this is considered to be okay. If that becomes a | |
89 | * problem, we could create a set of per-cpu variables for interrupt handlers | |
90 | * and use them when in_interrupt() from linux/preempt_mask.h evaluates to | |
91 | * true. | |
92 | */ | |
93 | static DEFINE_PER_CPU(uint64_t[2], spl_pseudo_entropy); | |
94 | ||
95 | /* | |
96 | * spl_rand_next()/spl_rand_jump() are copied from the following CC-0 licensed | |
97 | * file: | |
98 | * | |
99 | * http://xorshift.di.unimi.it/xorshift128plus.c | |
100 | */ | |
101 | ||
102 | static inline uint64_t | |
3673d032 BB |
103 | spl_rand_next(uint64_t *s) |
104 | { | |
0b43696e RY |
105 | uint64_t s1 = s[0]; |
106 | const uint64_t s0 = s[1]; | |
107 | s[0] = s0; | |
108 | s1 ^= s1 << 23; // a | |
109 | s[1] = s1 ^ s0 ^ (s1 >> 18) ^ (s0 >> 5); // b, c | |
110 | return (s[1] + s0); | |
111 | } | |
112 | ||
113 | static inline void | |
3673d032 BB |
114 | spl_rand_jump(uint64_t *s) |
115 | { | |
5461eefe BB |
116 | static const uint64_t JUMP[] = |
117 | { 0x8a5cd789635d2dff, 0x121fd2155c472f96 }; | |
0b43696e RY |
118 | |
119 | uint64_t s0 = 0; | |
120 | uint64_t s1 = 0; | |
121 | int i, b; | |
5461eefe BB |
122 | for (i = 0; i < sizeof (JUMP) / sizeof (*JUMP); i++) |
123 | for (b = 0; b < 64; b++) { | |
0b43696e RY |
124 | if (JUMP[i] & 1ULL << b) { |
125 | s0 ^= s[0]; | |
126 | s1 ^= s[1]; | |
127 | } | |
128 | (void) spl_rand_next(s); | |
129 | } | |
130 | ||
131 | s[0] = s0; | |
132 | s[1] = s1; | |
133 | } | |
134 | ||
135 | int | |
136 | random_get_pseudo_bytes(uint8_t *ptr, size_t len) | |
137 | { | |
138 | uint64_t *xp, s[2]; | |
139 | ||
140 | ASSERT(ptr); | |
141 | ||
142 | xp = get_cpu_var(spl_pseudo_entropy); | |
143 | ||
144 | s[0] = xp[0]; | |
145 | s[1] = xp[1]; | |
146 | ||
147 | while (len) { | |
148 | union { | |
149 | uint64_t ui64; | |
150 | uint8_t byte[sizeof (uint64_t)]; | |
151 | }entropy; | |
152 | int i = MIN(len, sizeof (uint64_t)); | |
153 | ||
154 | len -= i; | |
155 | entropy.ui64 = spl_rand_next(s); | |
156 | ||
157 | while (i--) | |
158 | *ptr++ = entropy.byte[i]; | |
159 | } | |
160 | ||
161 | xp[0] = s[0]; | |
162 | xp[1] = s[1]; | |
163 | ||
164 | put_cpu_var(spl_pseudo_entropy); | |
165 | ||
166 | return (0); | |
167 | } | |
168 | ||
169 | ||
170 | EXPORT_SYMBOL(random_get_pseudo_bytes); | |
171 | ||
a4bfd8ea | 172 | #if BITS_PER_LONG == 32 |
b61a6e8b | 173 | /* |
a4bfd8ea BB |
174 | * Support 64/64 => 64 division on a 32-bit platform. While the kernel |
175 | * provides a div64_u64() function for this we do not use it because the | |
176 | * implementation is flawed. There are cases which return incorrect | |
177 | * results as late as linux-2.6.35. Until this is fixed upstream the | |
178 | * spl must provide its own implementation. | |
179 | * | |
180 | * This implementation is a slightly modified version of the algorithm | |
181 | * proposed by the book 'Hacker's Delight'. The original source can be | |
182 | * found here and is available for use without restriction. | |
183 | * | |
184 | * http://www.hackersdelight.org/HDcode/newCode/divDouble.c | |
185 | */ | |
186 | ||
187 | /* | |
188 | * Calculate number of leading of zeros for a 64-bit value. | |
189 | */ | |
190 | static int | |
3673d032 BB |
191 | nlz64(uint64_t x) |
192 | { | |
a4bfd8ea BB |
193 | register int n = 0; |
194 | ||
195 | if (x == 0) | |
5461eefe | 196 | return (64); |
a4bfd8ea | 197 | |
5461eefe BB |
198 | if (x <= 0x00000000FFFFFFFFULL) { n = n + 32; x = x << 32; } |
199 | if (x <= 0x0000FFFFFFFFFFFFULL) { n = n + 16; x = x << 16; } | |
200 | if (x <= 0x00FFFFFFFFFFFFFFULL) { n = n + 8; x = x << 8; } | |
201 | if (x <= 0x0FFFFFFFFFFFFFFFULL) { n = n + 4; x = x << 4; } | |
202 | if (x <= 0x3FFFFFFFFFFFFFFFULL) { n = n + 2; x = x << 2; } | |
203 | if (x <= 0x7FFFFFFFFFFFFFFFULL) { n = n + 1; } | |
a4bfd8ea | 204 | |
5461eefe | 205 | return (n); |
a4bfd8ea BB |
206 | } |
207 | ||
208 | /* | |
209 | * Newer kernels have a div_u64() function but we define our own | |
210 | * to simplify portibility between kernel versions. | |
211 | */ | |
212 | static inline uint64_t | |
213 | __div_u64(uint64_t u, uint32_t v) | |
214 | { | |
215 | (void) do_div(u, v); | |
5461eefe | 216 | return (u); |
a4bfd8ea BB |
217 | } |
218 | ||
219 | /* | |
220 | * Implementation of 64-bit unsigned division for 32-bit machines. | |
221 | * | |
222 | * First the procedure takes care of the case in which the divisor is a | |
223 | * 32-bit quantity. There are two subcases: (1) If the left half of the | |
224 | * dividend is less than the divisor, one execution of do_div() is all that | |
225 | * is required (overflow is not possible). (2) Otherwise it does two | |
226 | * divisions, using the grade school method. | |
b61a6e8b | 227 | */ |
1b4ad25e | 228 | uint64_t |
a4bfd8ea | 229 | __udivdi3(uint64_t u, uint64_t v) |
b61a6e8b | 230 | { |
a4bfd8ea BB |
231 | uint64_t u0, u1, v1, q0, q1, k; |
232 | int n; | |
233 | ||
234 | if (v >> 32 == 0) { // If v < 2**32: | |
235 | if (u >> 32 < v) { // If u/v cannot overflow, | |
5461eefe | 236 | return (__div_u64(u, v)); // just do one division. |
a4bfd8ea BB |
237 | } else { // If u/v would overflow: |
238 | u1 = u >> 32; // Break u into two halves. | |
239 | u0 = u & 0xFFFFFFFF; | |
240 | q1 = __div_u64(u1, v); // First quotient digit. | |
241 | k = u1 - q1 * v; // First remainder, < v. | |
242 | u0 += (k << 32); | |
243 | q0 = __div_u64(u0, v); // Seconds quotient digit. | |
5461eefe | 244 | return ((q1 << 32) + q0); |
a4bfd8ea BB |
245 | } |
246 | } else { // If v >= 2**32: | |
247 | n = nlz64(v); // 0 <= n <= 31. | |
248 | v1 = (v << n) >> 32; // Normalize divisor, MSB is 1. | |
249 | u1 = u >> 1; // To ensure no overflow. | |
250 | q1 = __div_u64(u1, v1); // Get quotient from | |
251 | q0 = (q1 << n) >> 31; // Undo normalization and | |
252 | // division of u by 2. | |
253 | if (q0 != 0) // Make q0 correct or | |
254 | q0 = q0 - 1; // too small by 1. | |
255 | if ((u - q0 * v) >= v) | |
256 | q0 = q0 + 1; // Now q0 is correct. | |
ef6f91ce | 257 | |
5461eefe | 258 | return (q0); |
a4bfd8ea | 259 | } |
550f1705 BB |
260 | } |
261 | EXPORT_SYMBOL(__udivdi3); | |
262 | ||
a9125891 BB |
263 | /* BEGIN CSTYLED */ |
264 | #ifndef abs64 | |
265 | #define abs64(x) ({ uint64_t t = (x) >> 63; ((x) ^ t) - t; }) | |
266 | #endif | |
267 | /* END CSTYLED */ | |
268 | ||
550f1705 | 269 | /* |
a4bfd8ea BB |
270 | * Implementation of 64-bit signed division for 32-bit machines. |
271 | */ | |
272 | int64_t | |
273 | __divdi3(int64_t u, int64_t v) | |
274 | { | |
275 | int64_t q, t; | |
276 | q = __udivdi3(abs64(u), abs64(v)); | |
277 | t = (u ^ v) >> 63; // If u, v have different | |
5461eefe | 278 | return ((q ^ t) - t); // signs, negate q. |
a4bfd8ea BB |
279 | } |
280 | EXPORT_SYMBOL(__divdi3); | |
281 | ||
282 | /* | |
283 | * Implementation of 64-bit unsigned modulo for 32-bit machines. | |
550f1705 | 284 | */ |
1b4ad25e AZ |
285 | uint64_t |
286 | __umoddi3(uint64_t dividend, uint64_t divisor) | |
550f1705 | 287 | { |
1b4ad25e | 288 | return (dividend - (divisor * __udivdi3(dividend, divisor))); |
b61a6e8b | 289 | } |
550f1705 | 290 | EXPORT_SYMBOL(__umoddi3); |
a4bfd8ea | 291 | |
6ecfd2b5 CC |
292 | /* |
293 | * Implementation of 64-bit unsigned division/modulo for 32-bit machines. | |
294 | */ | |
295 | uint64_t | |
296 | __udivmoddi4(uint64_t n, uint64_t d, uint64_t *r) | |
297 | { | |
298 | uint64_t q = __udivdi3(n, d); | |
299 | if (r) | |
300 | *r = n - d * q; | |
301 | return (q); | |
302 | } | |
303 | EXPORT_SYMBOL(__udivmoddi4); | |
304 | ||
305 | /* | |
306 | * Implementation of 64-bit signed division/modulo for 32-bit machines. | |
307 | */ | |
308 | int64_t | |
309 | __divmoddi4(int64_t n, int64_t d, int64_t *r) | |
310 | { | |
311 | int64_t q, rr; | |
312 | boolean_t nn = B_FALSE; | |
313 | boolean_t nd = B_FALSE; | |
314 | if (n < 0) { | |
315 | nn = B_TRUE; | |
316 | n = -n; | |
317 | } | |
318 | if (d < 0) { | |
319 | nd = B_TRUE; | |
320 | d = -d; | |
321 | } | |
322 | ||
323 | q = __udivmoddi4(n, d, (uint64_t *)&rr); | |
324 | ||
325 | if (nn != nd) | |
326 | q = -q; | |
327 | if (nn) | |
328 | rr = -rr; | |
329 | if (r) | |
330 | *r = rr; | |
331 | return (q); | |
332 | } | |
333 | EXPORT_SYMBOL(__divmoddi4); | |
334 | ||
ef6f91ce JL |
335 | #if defined(__arm) || defined(__arm__) |
336 | /* | |
93b0dc92 JL |
337 | * Implementation of 64-bit (un)signed division for 32-bit arm machines. |
338 | * | |
339 | * Run-time ABI for the ARM Architecture (page 20). A pair of (unsigned) | |
340 | * long longs is returned in {{r0, r1}, {r2,r3}}, the quotient in {r0, r1}, | |
341 | * and the remainder in {r2, r3}. The return type is specifically left | |
342 | * set to 'void' to ensure the compiler does not overwrite these registers | |
343 | * during the return. All results are in registers as per ABI | |
ef6f91ce | 344 | */ |
93b0dc92 | 345 | void |
ef6f91ce JL |
346 | __aeabi_uldivmod(uint64_t u, uint64_t v) |
347 | { | |
93b0dc92 JL |
348 | uint64_t res; |
349 | uint64_t mod; | |
350 | ||
351 | res = __udivdi3(u, v); | |
352 | mod = __umoddi3(u, v); | |
353 | { | |
354 | register uint32_t r0 asm("r0") = (res & 0xFFFFFFFF); | |
355 | register uint32_t r1 asm("r1") = (res >> 32); | |
356 | register uint32_t r2 asm("r2") = (mod & 0xFFFFFFFF); | |
357 | register uint32_t r3 asm("r3") = (mod >> 32); | |
358 | ||
5461eefe | 359 | /* BEGIN CSTYLED */ |
93b0dc92 JL |
360 | asm volatile("" |
361 | : "+r"(r0), "+r"(r1), "+r"(r2),"+r"(r3) /* output */ | |
362 | : "r"(r0), "r"(r1), "r"(r2), "r"(r3)); /* input */ | |
5461eefe | 363 | /* END CSTYLED */ |
93b0dc92 JL |
364 | |
365 | return; /* r0; */ | |
366 | } | |
ef6f91ce JL |
367 | } |
368 | EXPORT_SYMBOL(__aeabi_uldivmod); | |
369 | ||
93b0dc92 | 370 | void |
ef6f91ce JL |
371 | __aeabi_ldivmod(int64_t u, int64_t v) |
372 | { | |
93b0dc92 JL |
373 | int64_t res; |
374 | uint64_t mod; | |
375 | ||
376 | res = __divdi3(u, v); | |
377 | mod = __umoddi3(u, v); | |
378 | { | |
379 | register uint32_t r0 asm("r0") = (res & 0xFFFFFFFF); | |
380 | register uint32_t r1 asm("r1") = (res >> 32); | |
381 | register uint32_t r2 asm("r2") = (mod & 0xFFFFFFFF); | |
382 | register uint32_t r3 asm("r3") = (mod >> 32); | |
383 | ||
5461eefe | 384 | /* BEGIN CSTYLED */ |
93b0dc92 JL |
385 | asm volatile("" |
386 | : "+r"(r0), "+r"(r1), "+r"(r2),"+r"(r3) /* output */ | |
387 | : "r"(r0), "r"(r1), "r"(r2), "r"(r3)); /* input */ | |
5461eefe | 388 | /* END CSTYLED */ |
93b0dc92 JL |
389 | |
390 | return; /* r0; */ | |
391 | } | |
ef6f91ce JL |
392 | } |
393 | EXPORT_SYMBOL(__aeabi_ldivmod); | |
394 | #endif /* __arm || __arm__ */ | |
96dded38 | 395 | #endif /* BITS_PER_LONG */ |
b61a6e8b | 396 | |
5461eefe BB |
397 | /* |
398 | * NOTE: The strtoxx behavior is solely based on my reading of the Solaris | |
b871b8cd BB |
399 | * ddi_strtol(9F) man page. I have not verified the behavior of these |
400 | * functions against their Solaris counterparts. It is possible that I | |
96dded38 | 401 | * may have misinterpreted the man page or the man page is incorrect. |
b871b8cd | 402 | */ |
2ee63a54 BB |
403 | int ddi_strtoul(const char *, char **, int, unsigned long *); |
404 | int ddi_strtol(const char *, char **, int, long *); | |
405 | int ddi_strtoull(const char *, char **, int, unsigned long long *); | |
406 | int ddi_strtoll(const char *, char **, int, long long *); | |
407 | ||
5461eefe | 408 | #define define_ddi_strtoux(type, valtype) \ |
2ee63a54 | 409 | int ddi_strtou##type(const char *str, char **endptr, \ |
5461eefe | 410 | int base, valtype *result) \ |
2ee63a54 | 411 | { \ |
b871b8cd BB |
412 | valtype last_value, value = 0; \ |
413 | char *ptr = (char *)str; \ | |
414 | int flag = 1, digit; \ | |
415 | \ | |
416 | if (strlen(ptr) == 0) \ | |
5461eefe | 417 | return (EINVAL); \ |
b871b8cd BB |
418 | \ |
419 | /* Auto-detect base based on prefix */ \ | |
420 | if (!base) { \ | |
421 | if (str[0] == '0') { \ | |
5461eefe | 422 | if (tolower(str[1]) == 'x' && isxdigit(str[2])) { \ |
b871b8cd BB |
423 | base = 16; /* hex */ \ |
424 | ptr += 2; \ | |
425 | } else if (str[1] >= '0' && str[1] < 8) { \ | |
426 | base = 8; /* octal */ \ | |
427 | ptr += 1; \ | |
428 | } else { \ | |
5461eefe | 429 | return (EINVAL); \ |
b871b8cd BB |
430 | } \ |
431 | } else { \ | |
432 | base = 10; /* decimal */ \ | |
433 | } \ | |
434 | } \ | |
435 | \ | |
436 | while (1) { \ | |
437 | if (isdigit(*ptr)) \ | |
438 | digit = *ptr - '0'; \ | |
439 | else if (isalpha(*ptr)) \ | |
440 | digit = tolower(*ptr) - 'a' + 10; \ | |
441 | else \ | |
442 | break; \ | |
443 | \ | |
444 | if (digit >= base) \ | |
445 | break; \ | |
2ee63a54 | 446 | \ |
b871b8cd BB |
447 | last_value = value; \ |
448 | value = value * base + digit; \ | |
449 | if (last_value > value) /* Overflow */ \ | |
5461eefe | 450 | return (ERANGE); \ |
2ee63a54 | 451 | \ |
b871b8cd BB |
452 | flag = 1; \ |
453 | ptr++; \ | |
2ee63a54 BB |
454 | } \ |
455 | \ | |
b871b8cd BB |
456 | if (flag) \ |
457 | *result = value; \ | |
458 | \ | |
459 | if (endptr) \ | |
460 | *endptr = (char *)(flag ? ptr : str); \ | |
461 | \ | |
5461eefe | 462 | return (0); \ |
2ee63a54 BB |
463 | } \ |
464 | ||
5461eefe | 465 | #define define_ddi_strtox(type, valtype) \ |
2ee63a54 | 466 | int ddi_strto##type(const char *str, char **endptr, \ |
5461eefe | 467 | int base, valtype *result) \ |
b871b8cd BB |
468 | { \ |
469 | int rc; \ | |
2ee63a54 BB |
470 | \ |
471 | if (*str == '-') { \ | |
b871b8cd BB |
472 | rc = ddi_strtou##type(str + 1, endptr, base, result); \ |
473 | if (!rc) { \ | |
474 | if (*endptr == str + 1) \ | |
475 | *endptr = (char *)str; \ | |
476 | else \ | |
477 | *result = -*result; \ | |
478 | } \ | |
2ee63a54 | 479 | } else { \ |
b871b8cd | 480 | rc = ddi_strtou##type(str, endptr, base, result); \ |
2ee63a54 BB |
481 | } \ |
482 | \ | |
5461eefe | 483 | return (rc); \ |
b871b8cd | 484 | } |
2ee63a54 BB |
485 | |
486 | define_ddi_strtoux(l, unsigned long) | |
487 | define_ddi_strtox(l, long) | |
488 | define_ddi_strtoux(ll, unsigned long long) | |
489 | define_ddi_strtox(ll, long long) | |
490 | ||
2f5d55aa | 491 | EXPORT_SYMBOL(ddi_strtoul); |
2ee63a54 BB |
492 | EXPORT_SYMBOL(ddi_strtol); |
493 | EXPORT_SYMBOL(ddi_strtoll); | |
494 | EXPORT_SYMBOL(ddi_strtoull); | |
2f5d55aa | 495 | |
d3126abe BB |
496 | int |
497 | ddi_copyin(const void *from, void *to, size_t len, int flags) | |
498 | { | |
499 | /* Fake ioctl() issued by kernel, 'from' is a kernel address */ | |
500 | if (flags & FKIOCTL) { | |
501 | memcpy(to, from, len); | |
5461eefe | 502 | return (0); |
d3126abe BB |
503 | } |
504 | ||
5461eefe | 505 | return (copyin(from, to, len)); |
d3126abe BB |
506 | } |
507 | EXPORT_SYMBOL(ddi_copyin); | |
508 | ||
509 | int | |
510 | ddi_copyout(const void *from, void *to, size_t len, int flags) | |
511 | { | |
512 | /* Fake ioctl() issued by kernel, 'from' is a kernel address */ | |
513 | if (flags & FKIOCTL) { | |
514 | memcpy(to, from, len); | |
5461eefe | 515 | return (0); |
d3126abe BB |
516 | } |
517 | ||
5461eefe | 518 | return (copyout(from, to, len)); |
d3126abe BB |
519 | } |
520 | EXPORT_SYMBOL(ddi_copyout); | |
521 | ||
0d54dcb5 DH |
522 | /* |
523 | * Read the unique system identifier from the /etc/hostid file. | |
524 | * | |
525 | * The behavior of /usr/bin/hostid on Linux systems with the | |
526 | * regular eglibc and coreutils is: | |
527 | * | |
528 | * 1. Generate the value if the /etc/hostid file does not exist | |
529 | * or if the /etc/hostid file is less than four bytes in size. | |
530 | * | |
531 | * 2. If the /etc/hostid file is at least 4 bytes, then return | |
532 | * the first four bytes [0..3] in native endian order. | |
533 | * | |
534 | * 3. Always ignore bytes [4..] if they exist in the file. | |
535 | * | |
536 | * Only the first four bytes are significant, even on systems that | |
537 | * have a 64-bit word size. | |
538 | * | |
539 | * See: | |
540 | * | |
541 | * eglibc: sysdeps/unix/sysv/linux/gethostid.c | |
542 | * coreutils: src/hostid.c | |
543 | * | |
544 | * Notes: | |
545 | * | |
546 | * The /etc/hostid file on Solaris is a text file that often reads: | |
547 | * | |
548 | * # DO NOT EDIT | |
549 | * "0123456789" | |
550 | * | |
551 | * Directly copying this file to Linux results in a constant | |
552 | * hostid of 4f442023 because the default comment constitutes | |
553 | * the first four bytes of the file. | |
554 | * | |
555 | */ | |
556 | ||
557 | char *spl_hostid_path = HW_HOSTID_PATH; | |
558 | module_param(spl_hostid_path, charp, 0444); | |
559 | MODULE_PARM_DESC(spl_hostid_path, "The system hostid file (/etc/hostid)"); | |
560 | ||
561 | static int | |
c93d9dff | 562 | hostid_read(uint32_t *hostid) |
0d54dcb5 | 563 | { |
0d54dcb5 DH |
564 | uint64_t size; |
565 | struct _buf *file; | |
c93d9dff BB |
566 | uint32_t value = 0; |
567 | int error; | |
0d54dcb5 DH |
568 | |
569 | file = kobj_open_file(spl_hostid_path); | |
6b3b569d | 570 | if (file == (struct _buf *)-1) |
c93d9dff | 571 | return (ENOENT); |
0d54dcb5 | 572 | |
c93d9dff BB |
573 | error = kobj_get_filesize(file, &size); |
574 | if (error) { | |
0d54dcb5 | 575 | kobj_close_file(file); |
c93d9dff | 576 | return (error); |
0d54dcb5 DH |
577 | } |
578 | ||
5461eefe | 579 | if (size < sizeof (HW_HOSTID_MASK)) { |
0d54dcb5 | 580 | kobj_close_file(file); |
c93d9dff | 581 | return (EINVAL); |
0d54dcb5 DH |
582 | } |
583 | ||
c93d9dff BB |
584 | /* |
585 | * Read directly into the variable like eglibc does. | |
586 | * Short reads are okay; native behavior is preserved. | |
587 | */ | |
5461eefe | 588 | error = kobj_read_file(file, (char *)&value, sizeof (value), 0); |
c93d9dff | 589 | if (error < 0) { |
0d54dcb5 | 590 | kobj_close_file(file); |
c93d9dff | 591 | return (EIO); |
0d54dcb5 DH |
592 | } |
593 | ||
594 | /* Mask down to 32 bits like coreutils does. */ | |
c93d9dff | 595 | *hostid = (value & HW_HOSTID_MASK); |
0d54dcb5 | 596 | kobj_close_file(file); |
c93d9dff | 597 | |
5461eefe | 598 | return (0); |
0d54dcb5 DH |
599 | } |
600 | ||
c93d9dff BB |
601 | /* |
602 | * Return the system hostid. Preferentially use the spl_hostid module option | |
603 | * when set, otherwise use the value in the /etc/hostid file. | |
604 | */ | |
99639e4a BB |
605 | uint32_t |
606 | zone_get_hostid(void *zone) | |
607 | { | |
c93d9dff | 608 | uint32_t hostid; |
99639e4a | 609 | |
c93d9dff | 610 | ASSERT3P(zone, ==, NULL); |
a9f2397e | 611 | |
c93d9dff BB |
612 | if (spl_hostid != 0) |
613 | return ((uint32_t)(spl_hostid & HW_HOSTID_MASK)); | |
a9f2397e | 614 | |
c93d9dff BB |
615 | if (hostid_read(&hostid) == 0) |
616 | return (hostid); | |
086476f9 | 617 | |
c93d9dff | 618 | return (0); |
99639e4a BB |
619 | } |
620 | EXPORT_SYMBOL(zone_get_hostid); | |
621 | ||
e5b9b344 BB |
622 | static int |
623 | spl_kvmem_init(void) | |
624 | { | |
625 | int rc = 0; | |
626 | ||
627 | rc = spl_kmem_init(); | |
628 | if (rc) | |
16522ac2 | 629 | return (rc); |
e5b9b344 BB |
630 | |
631 | rc = spl_vmem_init(); | |
16522ac2 CC |
632 | if (rc) { |
633 | spl_kmem_fini(); | |
634 | return (rc); | |
635 | } | |
e5b9b344 | 636 | |
e5b9b344 BB |
637 | return (rc); |
638 | } | |
639 | ||
0b43696e RY |
640 | /* |
641 | * We initialize the random number generator with 128 bits of entropy from the | |
642 | * system random number generator. In the improbable case that we have a zero | |
643 | * seed, we fallback to the system jiffies, unless it is also zero, in which | |
644 | * situation we use a preprogrammed seed. We step forward by 2^64 iterations to | |
645 | * initialize each of the per-cpu seeds so that the sequences generated on each | |
646 | * CPU are guaranteed to never overlap in practice. | |
647 | */ | |
648 | static void __init | |
649 | spl_random_init(void) | |
650 | { | |
651 | uint64_t s[2]; | |
652 | int i; | |
653 | ||
654 | get_random_bytes(s, sizeof (s)); | |
655 | ||
656 | if (s[0] == 0 && s[1] == 0) { | |
657 | if (jiffies != 0) { | |
658 | s[0] = jiffies; | |
659 | s[1] = ~0 - jiffies; | |
660 | } else { | |
661 | (void) memcpy(s, "improbable seed", sizeof (s)); | |
662 | } | |
663 | printk("SPL: get_random_bytes() returned 0 " | |
664 | "when generating random seed. Setting initial seed to " | |
b689de85 TK |
665 | "0x%016llx%016llx.\n", cpu_to_be64(s[0]), |
666 | cpu_to_be64(s[1])); | |
0b43696e RY |
667 | } |
668 | ||
0d267566 | 669 | for_each_possible_cpu(i) { |
0b43696e RY |
670 | uint64_t *wordp = per_cpu(spl_pseudo_entropy, i); |
671 | ||
672 | spl_rand_jump(s); | |
673 | ||
674 | wordp[0] = s[0]; | |
675 | wordp[1] = s[1]; | |
676 | } | |
677 | } | |
678 | ||
e5b9b344 BB |
679 | static void |
680 | spl_kvmem_fini(void) | |
681 | { | |
e5b9b344 BB |
682 | spl_vmem_fini(); |
683 | spl_kmem_fini(); | |
684 | } | |
685 | ||
c1bc8e61 BB |
686 | static int __init |
687 | spl_init(void) | |
57d1b188 BB |
688 | { |
689 | int rc = 0; | |
f23e92fa | 690 | |
341dfdb3 | 691 | bzero(&p0, sizeof (proc_t)); |
0b43696e RY |
692 | spl_random_init(); |
693 | ||
e5b9b344 | 694 | if ((rc = spl_kvmem_init())) |
8d9a23e8 | 695 | goto out1; |
8d0f1ee9 | 696 | |
9ab1ac14 | 697 | if ((rc = spl_mutex_init())) |
8d9a23e8 | 698 | goto out2; |
9ab1ac14 | 699 | |
d28db80f | 700 | if ((rc = spl_rw_init())) |
8d9a23e8 | 701 | goto out3; |
8d0f1ee9 | 702 | |
16522ac2 | 703 | if ((rc = spl_tsd_init())) |
8d9a23e8 | 704 | goto out4; |
af828292 | 705 | |
16522ac2 | 706 | if ((rc = spl_taskq_init())) |
8d9a23e8 | 707 | goto out5; |
04a479f7 | 708 | |
16522ac2 | 709 | if ((rc = spl_kmem_cache_init())) |
8d9a23e8 | 710 | goto out6; |
e9cb2b4f | 711 | |
16522ac2 | 712 | if ((rc = spl_vn_init())) |
8d9a23e8 | 713 | goto out7; |
d28db80f | 714 | |
16522ac2 | 715 | if ((rc = spl_proc_init())) |
8d9a23e8 | 716 | goto out8; |
9fe45dc1 | 717 | |
16522ac2 | 718 | if ((rc = spl_kstat_init())) |
8d9a23e8 | 719 | goto out9; |
5c1967eb | 720 | |
16522ac2 CC |
721 | if ((rc = spl_zlib_init())) |
722 | goto out10; | |
723 | ||
8d9a23e8 | 724 | return (rc); |
44778f41 | 725 | |
16522ac2 CC |
726 | out10: |
727 | spl_kstat_fini(); | |
9fe45dc1 | 728 | out9: |
16522ac2 | 729 | spl_proc_fini(); |
d28db80f | 730 | out8: |
16522ac2 | 731 | spl_vn_fini(); |
d28db80f | 732 | out7: |
16522ac2 | 733 | spl_kmem_cache_fini(); |
d28db80f | 734 | out6: |
e9cb2b4f | 735 | spl_taskq_fini(); |
16522ac2 CC |
736 | out5: |
737 | spl_tsd_fini(); | |
d28db80f BB |
738 | out4: |
739 | spl_rw_fini(); | |
9ab1ac14 BB |
740 | out3: |
741 | spl_mutex_fini(); | |
8d0f1ee9 | 742 | out2: |
e5b9b344 | 743 | spl_kvmem_fini(); |
d28db80f | 744 | out1: |
c1bc8e61 | 745 | return (rc); |
70eadc19 BB |
746 | } |
747 | ||
c1bc8e61 | 748 | static void __exit |
51a727e9 | 749 | spl_fini(void) |
70eadc19 | 750 | { |
1114ae6a | 751 | spl_zlib_fini(); |
1114ae6a BB |
752 | spl_kstat_fini(); |
753 | spl_proc_fini(); | |
12ff95ff | 754 | spl_vn_fini(); |
16522ac2 | 755 | spl_kmem_cache_fini(); |
e9cb2b4f | 756 | spl_taskq_fini(); |
16522ac2 | 757 | spl_tsd_fini(); |
d28db80f | 758 | spl_rw_fini(); |
2fb9b26a | 759 | spl_mutex_fini(); |
e5b9b344 | 760 | spl_kvmem_fini(); |
70eadc19 BB |
761 | } |
762 | ||
763 | module_init(spl_init); | |
764 | module_exit(spl_fini); | |
765 | ||
70eadc19 | 766 | MODULE_DESCRIPTION("Solaris Porting Layer"); |
93ce2b4c BB |
767 | MODULE_AUTHOR(ZFS_META_AUTHOR); |
768 | MODULE_LICENSE("GPL"); | |
769 | MODULE_VERSION(ZFS_META_VERSION "-" ZFS_META_RELEASE); |