]>
Commit | Line | Data |
---|---|---|
716154c5 BB |
1 | /*****************************************************************************\ |
2 | * Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC. | |
3 | * Copyright (C) 2007 The Regents of the University of California. | |
4 | * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). | |
5 | * Written by Brian Behlendorf <behlendorf1@llnl.gov>. | |
715f6251 | 6 | * UCRL-CODE-235197 |
7 | * | |
716154c5 | 8 | * This file is part of the SPL, Solaris Porting Layer. |
3d6af2dd | 9 | * For details, see <http://zfsonlinux.org/>. |
716154c5 BB |
10 | * |
11 | * The SPL is free software; you can redistribute it and/or modify it | |
12 | * under the terms of the GNU General Public License as published by the | |
13 | * Free Software Foundation; either version 2 of the License, or (at your | |
14 | * option) any later version. | |
715f6251 | 15 | * |
716154c5 | 16 | * The SPL is distributed in the hope that it will be useful, but WITHOUT |
715f6251 | 17 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
18 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
19 | * for more details. | |
20 | * | |
21 | * You should have received a copy of the GNU General Public License along | |
716154c5 BB |
22 | * with the SPL. If not, see <http://www.gnu.org/licenses/>. |
23 | ***************************************************************************** | |
24 | * Solaris Porting Layer (SPL) Generic Implementation. | |
25 | \*****************************************************************************/ | |
715f6251 | 26 | |
14c5326c | 27 | #include <sys/sysmacros.h> |
99639e4a | 28 | #include <sys/systeminfo.h> |
af828292 | 29 | #include <sys/vmsystm.h> |
0d54dcb5 | 30 | #include <sys/kobj.h> |
c19c06f3 | 31 | #include <sys/kmem.h> |
10946b02 AX |
32 | #include <sys/kmem_cache.h> |
33 | #include <sys/vmem.h> | |
9ab1ac14 | 34 | #include <sys/mutex.h> |
d28db80f | 35 | #include <sys/rwlock.h> |
e9cb2b4f | 36 | #include <sys/taskq.h> |
9fe45dc1 | 37 | #include <sys/tsd.h> |
5c1967eb | 38 | #include <sys/zmod.h> |
8d0f1ee9 | 39 | #include <sys/debug.h> |
57d1b188 | 40 | #include <sys/proc.h> |
04a479f7 | 41 | #include <sys/kstat.h> |
d3126abe | 42 | #include <sys/file.h> |
10946b02 | 43 | #include <linux/ctype.h> |
ec06701b AX |
44 | #include <sys/disp.h> |
45 | #include <sys/random.h> | |
f23e92fa | 46 | #include <linux/kmod.h> |
10946b02 | 47 | #include <linux/math64_compat.h> |
ae4c36ad | 48 | #include <linux/proc_compat.h> |
f23e92fa | 49 | |
0835057e | 50 | char spl_version[32] = "SPL v" SPL_META_VERSION "-" SPL_META_RELEASE; |
1a73940d | 51 | EXPORT_SYMBOL(spl_version); |
3561541c | 52 | |
9e4fb5c2 | 53 | unsigned long spl_hostid = 0; |
f23e92fa | 54 | EXPORT_SYMBOL(spl_hostid); |
fa6f7d8f DH |
55 | module_param(spl_hostid, ulong, 0644); |
56 | MODULE_PARM_DESC(spl_hostid, "The system hostid."); | |
8d0f1ee9 | 57 | |
ac9cc135 | 58 | proc_t p0; |
f1b59d26 | 59 | EXPORT_SYMBOL(p0); |
70eadc19 | 60 | |
ec06701b AX |
61 | /* |
62 | * Xorshift Pseudo Random Number Generator based on work by Sebastiano Vigna | |
63 | * | |
64 | * "Further scramblings of Marsaglia's xorshift generators" | |
65 | * http://vigna.di.unimi.it/ftp/papers/xorshiftplus.pdf | |
66 | * | |
67 | * random_get_pseudo_bytes() is an API function on Illumos whose sole purpose | |
68 | * is to provide bytes containing random numbers. It is mapped to /dev/urandom | |
69 | * on Illumos, which uses a "FIPS 186-2 algorithm". No user of the SPL's | |
70 | * random_get_pseudo_bytes() needs bytes that are of cryptographic quality, so | |
71 | * we can implement it using a fast PRNG that we seed using Linux' actual | |
72 | * equivalent to random_get_pseudo_bytes(). We do this by providing each CPU | |
73 | * with an independent seed so that all calls to random_get_pseudo_bytes() are | |
74 | * free of atomic instructions. | |
75 | * | |
76 | * A consequence of using a fast PRNG is that using random_get_pseudo_bytes() | |
77 | * to generate words larger than 128 bits will paradoxically be limited to | |
78 | * `2^128 - 1` possibilities. This is because we have a sequence of `2^128 - 1` | |
79 | * 128-bit words and selecting the first will implicitly select the second. If | |
80 | * a caller finds this behavior undesireable, random_get_bytes() should be used | |
81 | * instead. | |
82 | * | |
83 | * XXX: Linux interrupt handlers that trigger within the critical section | |
84 | * formed by `s[1] = xp[1];` and `xp[0] = s[0];` and call this function will | |
85 | * see the same numbers. Nothing in the code currently calls this in an | |
86 | * interrupt handler, so this is considered to be okay. If that becomes a | |
87 | * problem, we could create a set of per-cpu variables for interrupt handlers | |
88 | * and use them when in_interrupt() from linux/preempt_mask.h evaluates to | |
89 | * true. | |
90 | */ | |
91 | static DEFINE_PER_CPU(uint64_t[2], spl_pseudo_entropy); | |
92 | ||
93 | /* | |
94 | * spl_rand_next()/spl_rand_jump() are copied from the following CC-0 licensed | |
95 | * file: | |
96 | * | |
97 | * http://xorshift.di.unimi.it/xorshift128plus.c | |
98 | */ | |
99 | ||
100 | static inline uint64_t | |
101 | spl_rand_next(uint64_t *s) { | |
102 | uint64_t s1 = s[0]; | |
103 | const uint64_t s0 = s[1]; | |
104 | s[0] = s0; | |
105 | s1 ^= s1 << 23; // a | |
106 | s[1] = s1 ^ s0 ^ (s1 >> 18) ^ (s0 >> 5); // b, c | |
107 | return (s[1] + s0); | |
108 | } | |
109 | ||
110 | static inline void | |
111 | spl_rand_jump(uint64_t *s) { | |
112 | static const uint64_t JUMP[] = { 0x8a5cd789635d2dff, 0x121fd2155c472f96 }; | |
113 | ||
114 | uint64_t s0 = 0; | |
115 | uint64_t s1 = 0; | |
116 | int i, b; | |
117 | for(i = 0; i < sizeof JUMP / sizeof *JUMP; i++) | |
118 | for(b = 0; b < 64; b++) { | |
119 | if (JUMP[i] & 1ULL << b) { | |
120 | s0 ^= s[0]; | |
121 | s1 ^= s[1]; | |
122 | } | |
123 | (void) spl_rand_next(s); | |
124 | } | |
125 | ||
126 | s[0] = s0; | |
127 | s[1] = s1; | |
128 | } | |
129 | ||
130 | int | |
131 | random_get_pseudo_bytes(uint8_t *ptr, size_t len) | |
132 | { | |
133 | uint64_t *xp, s[2]; | |
134 | ||
135 | ASSERT(ptr); | |
136 | ||
137 | xp = get_cpu_var(spl_pseudo_entropy); | |
138 | ||
139 | s[0] = xp[0]; | |
140 | s[1] = xp[1]; | |
141 | ||
142 | while (len) { | |
143 | union { | |
144 | uint64_t ui64; | |
145 | uint8_t byte[sizeof (uint64_t)]; | |
146 | }entropy; | |
147 | int i = MIN(len, sizeof (uint64_t)); | |
148 | ||
149 | len -= i; | |
150 | entropy.ui64 = spl_rand_next(s); | |
151 | ||
152 | while (i--) | |
153 | *ptr++ = entropy.byte[i]; | |
154 | } | |
155 | ||
156 | xp[0] = s[0]; | |
157 | xp[1] = s[1]; | |
158 | ||
159 | put_cpu_var(spl_pseudo_entropy); | |
160 | ||
161 | return (0); | |
162 | } | |
163 | ||
164 | ||
165 | EXPORT_SYMBOL(random_get_pseudo_bytes); | |
166 | ||
a4bfd8ea | 167 | #if BITS_PER_LONG == 32 |
b61a6e8b | 168 | /* |
a4bfd8ea BB |
169 | * Support 64/64 => 64 division on a 32-bit platform. While the kernel |
170 | * provides a div64_u64() function for this we do not use it because the | |
171 | * implementation is flawed. There are cases which return incorrect | |
172 | * results as late as linux-2.6.35. Until this is fixed upstream the | |
173 | * spl must provide its own implementation. | |
174 | * | |
175 | * This implementation is a slightly modified version of the algorithm | |
176 | * proposed by the book 'Hacker's Delight'. The original source can be | |
177 | * found here and is available for use without restriction. | |
178 | * | |
179 | * http://www.hackersdelight.org/HDcode/newCode/divDouble.c | |
180 | */ | |
181 | ||
182 | /* | |
183 | * Calculate number of leading of zeros for a 64-bit value. | |
184 | */ | |
185 | static int | |
186 | nlz64(uint64_t x) { | |
187 | register int n = 0; | |
188 | ||
189 | if (x == 0) | |
190 | return 64; | |
191 | ||
192 | if (x <= 0x00000000FFFFFFFFULL) {n = n + 32; x = x << 32;} | |
193 | if (x <= 0x0000FFFFFFFFFFFFULL) {n = n + 16; x = x << 16;} | |
194 | if (x <= 0x00FFFFFFFFFFFFFFULL) {n = n + 8; x = x << 8;} | |
195 | if (x <= 0x0FFFFFFFFFFFFFFFULL) {n = n + 4; x = x << 4;} | |
196 | if (x <= 0x3FFFFFFFFFFFFFFFULL) {n = n + 2; x = x << 2;} | |
197 | if (x <= 0x7FFFFFFFFFFFFFFFULL) {n = n + 1;} | |
198 | ||
199 | return n; | |
200 | } | |
201 | ||
202 | /* | |
203 | * Newer kernels have a div_u64() function but we define our own | |
204 | * to simplify portibility between kernel versions. | |
205 | */ | |
206 | static inline uint64_t | |
207 | __div_u64(uint64_t u, uint32_t v) | |
208 | { | |
209 | (void) do_div(u, v); | |
210 | return u; | |
211 | } | |
212 | ||
213 | /* | |
214 | * Implementation of 64-bit unsigned division for 32-bit machines. | |
215 | * | |
216 | * First the procedure takes care of the case in which the divisor is a | |
217 | * 32-bit quantity. There are two subcases: (1) If the left half of the | |
218 | * dividend is less than the divisor, one execution of do_div() is all that | |
219 | * is required (overflow is not possible). (2) Otherwise it does two | |
220 | * divisions, using the grade school method. | |
b61a6e8b | 221 | */ |
1b4ad25e | 222 | uint64_t |
a4bfd8ea | 223 | __udivdi3(uint64_t u, uint64_t v) |
b61a6e8b | 224 | { |
a4bfd8ea BB |
225 | uint64_t u0, u1, v1, q0, q1, k; |
226 | int n; | |
227 | ||
228 | if (v >> 32 == 0) { // If v < 2**32: | |
229 | if (u >> 32 < v) { // If u/v cannot overflow, | |
230 | return __div_u64(u, v); // just do one division. | |
231 | } else { // If u/v would overflow: | |
232 | u1 = u >> 32; // Break u into two halves. | |
233 | u0 = u & 0xFFFFFFFF; | |
234 | q1 = __div_u64(u1, v); // First quotient digit. | |
235 | k = u1 - q1 * v; // First remainder, < v. | |
236 | u0 += (k << 32); | |
237 | q0 = __div_u64(u0, v); // Seconds quotient digit. | |
238 | return (q1 << 32) + q0; | |
239 | } | |
240 | } else { // If v >= 2**32: | |
241 | n = nlz64(v); // 0 <= n <= 31. | |
242 | v1 = (v << n) >> 32; // Normalize divisor, MSB is 1. | |
243 | u1 = u >> 1; // To ensure no overflow. | |
244 | q1 = __div_u64(u1, v1); // Get quotient from | |
245 | q0 = (q1 << n) >> 31; // Undo normalization and | |
246 | // division of u by 2. | |
247 | if (q0 != 0) // Make q0 correct or | |
248 | q0 = q0 - 1; // too small by 1. | |
249 | if ((u - q0 * v) >= v) | |
250 | q0 = q0 + 1; // Now q0 is correct. | |
ef6f91ce | 251 | |
a4bfd8ea BB |
252 | return q0; |
253 | } | |
550f1705 | 254 | } |
255 | EXPORT_SYMBOL(__udivdi3); | |
256 | ||
257 | /* | |
a4bfd8ea BB |
258 | * Implementation of 64-bit signed division for 32-bit machines. |
259 | */ | |
260 | int64_t | |
261 | __divdi3(int64_t u, int64_t v) | |
262 | { | |
263 | int64_t q, t; | |
264 | q = __udivdi3(abs64(u), abs64(v)); | |
265 | t = (u ^ v) >> 63; // If u, v have different | |
266 | return (q ^ t) - t; // signs, negate q. | |
267 | } | |
268 | EXPORT_SYMBOL(__divdi3); | |
269 | ||
270 | /* | |
271 | * Implementation of 64-bit unsigned modulo for 32-bit machines. | |
550f1705 | 272 | */ |
1b4ad25e AZ |
273 | uint64_t |
274 | __umoddi3(uint64_t dividend, uint64_t divisor) | |
550f1705 | 275 | { |
1b4ad25e | 276 | return (dividend - (divisor * __udivdi3(dividend, divisor))); |
b61a6e8b | 277 | } |
550f1705 | 278 | EXPORT_SYMBOL(__umoddi3); |
a4bfd8ea | 279 | |
ec06701b AX |
280 | /* |
281 | * Implementation of 64-bit unsigned division/modulo for 32-bit machines. | |
282 | */ | |
283 | uint64_t | |
284 | __udivmoddi4(uint64_t n, uint64_t d, uint64_t *r) | |
285 | { | |
286 | uint64_t q = __udivdi3(n, d); | |
287 | if (r) | |
288 | *r = n - d * q; | |
289 | return (q); | |
290 | } | |
291 | EXPORT_SYMBOL(__udivmoddi4); | |
292 | ||
293 | /* | |
294 | * Implementation of 64-bit signed division/modulo for 32-bit machines. | |
295 | */ | |
296 | int64_t | |
297 | __divmoddi4(int64_t n, int64_t d, int64_t *r) | |
298 | { | |
299 | int64_t q, rr; | |
300 | boolean_t nn = B_FALSE; | |
301 | boolean_t nd = B_FALSE; | |
302 | if (n < 0) { | |
303 | nn = B_TRUE; | |
304 | n = -n; | |
305 | } | |
306 | if (d < 0) { | |
307 | nd = B_TRUE; | |
308 | d = -d; | |
309 | } | |
310 | ||
311 | q = __udivmoddi4(n, d, (uint64_t *)&rr); | |
312 | ||
313 | if (nn != nd) | |
314 | q = -q; | |
315 | if (nn) | |
316 | rr = -rr; | |
317 | if (r) | |
318 | *r = rr; | |
319 | return (q); | |
320 | } | |
321 | EXPORT_SYMBOL(__divmoddi4); | |
322 | ||
ef6f91ce JL |
323 | #if defined(__arm) || defined(__arm__) |
324 | /* | |
93b0dc92 JL |
325 | * Implementation of 64-bit (un)signed division for 32-bit arm machines. |
326 | * | |
327 | * Run-time ABI for the ARM Architecture (page 20). A pair of (unsigned) | |
328 | * long longs is returned in {{r0, r1}, {r2,r3}}, the quotient in {r0, r1}, | |
329 | * and the remainder in {r2, r3}. The return type is specifically left | |
330 | * set to 'void' to ensure the compiler does not overwrite these registers | |
331 | * during the return. All results are in registers as per ABI | |
ef6f91ce | 332 | */ |
93b0dc92 | 333 | void |
ef6f91ce JL |
334 | __aeabi_uldivmod(uint64_t u, uint64_t v) |
335 | { | |
93b0dc92 JL |
336 | uint64_t res; |
337 | uint64_t mod; | |
338 | ||
339 | res = __udivdi3(u, v); | |
340 | mod = __umoddi3(u, v); | |
341 | { | |
342 | register uint32_t r0 asm("r0") = (res & 0xFFFFFFFF); | |
343 | register uint32_t r1 asm("r1") = (res >> 32); | |
344 | register uint32_t r2 asm("r2") = (mod & 0xFFFFFFFF); | |
345 | register uint32_t r3 asm("r3") = (mod >> 32); | |
346 | ||
347 | asm volatile("" | |
348 | : "+r"(r0), "+r"(r1), "+r"(r2),"+r"(r3) /* output */ | |
349 | : "r"(r0), "r"(r1), "r"(r2), "r"(r3)); /* input */ | |
350 | ||
351 | return; /* r0; */ | |
352 | } | |
ef6f91ce JL |
353 | } |
354 | EXPORT_SYMBOL(__aeabi_uldivmod); | |
355 | ||
93b0dc92 | 356 | void |
ef6f91ce JL |
357 | __aeabi_ldivmod(int64_t u, int64_t v) |
358 | { | |
93b0dc92 JL |
359 | int64_t res; |
360 | uint64_t mod; | |
361 | ||
362 | res = __divdi3(u, v); | |
363 | mod = __umoddi3(u, v); | |
364 | { | |
365 | register uint32_t r0 asm("r0") = (res & 0xFFFFFFFF); | |
366 | register uint32_t r1 asm("r1") = (res >> 32); | |
367 | register uint32_t r2 asm("r2") = (mod & 0xFFFFFFFF); | |
368 | register uint32_t r3 asm("r3") = (mod >> 32); | |
369 | ||
370 | asm volatile("" | |
371 | : "+r"(r0), "+r"(r1), "+r"(r2),"+r"(r3) /* output */ | |
372 | : "r"(r0), "r"(r1), "r"(r2), "r"(r3)); /* input */ | |
373 | ||
374 | return; /* r0; */ | |
375 | } | |
ef6f91ce JL |
376 | } |
377 | EXPORT_SYMBOL(__aeabi_ldivmod); | |
378 | #endif /* __arm || __arm__ */ | |
96dded38 | 379 | #endif /* BITS_PER_LONG */ |
b61a6e8b | 380 | |
b871b8cd BB |
381 | /* NOTE: The strtoxx behavior is solely based on my reading of the Solaris |
382 | * ddi_strtol(9F) man page. I have not verified the behavior of these | |
383 | * functions against their Solaris counterparts. It is possible that I | |
96dded38 | 384 | * may have misinterpreted the man page or the man page is incorrect. |
b871b8cd | 385 | */ |
2ee63a54 BB |
386 | int ddi_strtoul(const char *, char **, int, unsigned long *); |
387 | int ddi_strtol(const char *, char **, int, long *); | |
388 | int ddi_strtoull(const char *, char **, int, unsigned long long *); | |
389 | int ddi_strtoll(const char *, char **, int, long long *); | |
390 | ||
391 | #define define_ddi_strtoux(type, valtype) \ | |
392 | int ddi_strtou##type(const char *str, char **endptr, \ | |
b871b8cd | 393 | int base, valtype *result) \ |
2ee63a54 | 394 | { \ |
b871b8cd BB |
395 | valtype last_value, value = 0; \ |
396 | char *ptr = (char *)str; \ | |
397 | int flag = 1, digit; \ | |
398 | \ | |
399 | if (strlen(ptr) == 0) \ | |
400 | return EINVAL; \ | |
401 | \ | |
402 | /* Auto-detect base based on prefix */ \ | |
403 | if (!base) { \ | |
404 | if (str[0] == '0') { \ | |
405 | if (tolower(str[1])=='x' && isxdigit(str[2])) { \ | |
406 | base = 16; /* hex */ \ | |
407 | ptr += 2; \ | |
408 | } else if (str[1] >= '0' && str[1] < 8) { \ | |
409 | base = 8; /* octal */ \ | |
410 | ptr += 1; \ | |
411 | } else { \ | |
412 | return EINVAL; \ | |
413 | } \ | |
414 | } else { \ | |
415 | base = 10; /* decimal */ \ | |
416 | } \ | |
417 | } \ | |
418 | \ | |
419 | while (1) { \ | |
420 | if (isdigit(*ptr)) \ | |
421 | digit = *ptr - '0'; \ | |
422 | else if (isalpha(*ptr)) \ | |
423 | digit = tolower(*ptr) - 'a' + 10; \ | |
424 | else \ | |
425 | break; \ | |
426 | \ | |
427 | if (digit >= base) \ | |
428 | break; \ | |
2ee63a54 | 429 | \ |
b871b8cd BB |
430 | last_value = value; \ |
431 | value = value * base + digit; \ | |
432 | if (last_value > value) /* Overflow */ \ | |
433 | return ERANGE; \ | |
2ee63a54 | 434 | \ |
b871b8cd BB |
435 | flag = 1; \ |
436 | ptr++; \ | |
2ee63a54 BB |
437 | } \ |
438 | \ | |
b871b8cd BB |
439 | if (flag) \ |
440 | *result = value; \ | |
441 | \ | |
442 | if (endptr) \ | |
443 | *endptr = (char *)(flag ? ptr : str); \ | |
444 | \ | |
445 | return 0; \ | |
2ee63a54 BB |
446 | } \ |
447 | ||
448 | #define define_ddi_strtox(type, valtype) \ | |
449 | int ddi_strto##type(const char *str, char **endptr, \ | |
450 | int base, valtype *result) \ | |
b871b8cd BB |
451 | { \ |
452 | int rc; \ | |
2ee63a54 BB |
453 | \ |
454 | if (*str == '-') { \ | |
b871b8cd BB |
455 | rc = ddi_strtou##type(str + 1, endptr, base, result); \ |
456 | if (!rc) { \ | |
457 | if (*endptr == str + 1) \ | |
458 | *endptr = (char *)str; \ | |
459 | else \ | |
460 | *result = -*result; \ | |
461 | } \ | |
2ee63a54 | 462 | } else { \ |
b871b8cd | 463 | rc = ddi_strtou##type(str, endptr, base, result); \ |
2ee63a54 BB |
464 | } \ |
465 | \ | |
b871b8cd BB |
466 | return rc; \ |
467 | } | |
2ee63a54 BB |
468 | |
469 | define_ddi_strtoux(l, unsigned long) | |
470 | define_ddi_strtox(l, long) | |
471 | define_ddi_strtoux(ll, unsigned long long) | |
472 | define_ddi_strtox(ll, long long) | |
473 | ||
2f5d55aa | 474 | EXPORT_SYMBOL(ddi_strtoul); |
2ee63a54 BB |
475 | EXPORT_SYMBOL(ddi_strtol); |
476 | EXPORT_SYMBOL(ddi_strtoll); | |
477 | EXPORT_SYMBOL(ddi_strtoull); | |
2f5d55aa | 478 | |
d3126abe BB |
479 | int |
480 | ddi_copyin(const void *from, void *to, size_t len, int flags) | |
481 | { | |
482 | /* Fake ioctl() issued by kernel, 'from' is a kernel address */ | |
483 | if (flags & FKIOCTL) { | |
484 | memcpy(to, from, len); | |
485 | return 0; | |
486 | } | |
487 | ||
488 | return copyin(from, to, len); | |
489 | } | |
490 | EXPORT_SYMBOL(ddi_copyin); | |
491 | ||
492 | int | |
493 | ddi_copyout(const void *from, void *to, size_t len, int flags) | |
494 | { | |
495 | /* Fake ioctl() issued by kernel, 'from' is a kernel address */ | |
496 | if (flags & FKIOCTL) { | |
497 | memcpy(to, from, len); | |
498 | return 0; | |
499 | } | |
500 | ||
501 | return copyout(from, to, len); | |
502 | } | |
503 | EXPORT_SYMBOL(ddi_copyout); | |
504 | ||
0d54dcb5 DH |
505 | /* |
506 | * Read the unique system identifier from the /etc/hostid file. | |
507 | * | |
508 | * The behavior of /usr/bin/hostid on Linux systems with the | |
509 | * regular eglibc and coreutils is: | |
510 | * | |
511 | * 1. Generate the value if the /etc/hostid file does not exist | |
512 | * or if the /etc/hostid file is less than four bytes in size. | |
513 | * | |
514 | * 2. If the /etc/hostid file is at least 4 bytes, then return | |
515 | * the first four bytes [0..3] in native endian order. | |
516 | * | |
517 | * 3. Always ignore bytes [4..] if they exist in the file. | |
518 | * | |
519 | * Only the first four bytes are significant, even on systems that | |
520 | * have a 64-bit word size. | |
521 | * | |
522 | * See: | |
523 | * | |
524 | * eglibc: sysdeps/unix/sysv/linux/gethostid.c | |
525 | * coreutils: src/hostid.c | |
526 | * | |
527 | * Notes: | |
528 | * | |
529 | * The /etc/hostid file on Solaris is a text file that often reads: | |
530 | * | |
531 | * # DO NOT EDIT | |
532 | * "0123456789" | |
533 | * | |
534 | * Directly copying this file to Linux results in a constant | |
535 | * hostid of 4f442023 because the default comment constitutes | |
536 | * the first four bytes of the file. | |
537 | * | |
538 | */ | |
539 | ||
540 | char *spl_hostid_path = HW_HOSTID_PATH; | |
541 | module_param(spl_hostid_path, charp, 0444); | |
542 | MODULE_PARM_DESC(spl_hostid_path, "The system hostid file (/etc/hostid)"); | |
543 | ||
544 | static int | |
ec06701b | 545 | hostid_read(uint32_t *hostid) |
0d54dcb5 | 546 | { |
0d54dcb5 DH |
547 | uint64_t size; |
548 | struct _buf *file; | |
ec06701b AX |
549 | uint32_t value = 0; |
550 | int error; | |
0d54dcb5 DH |
551 | |
552 | file = kobj_open_file(spl_hostid_path); | |
6b3b569d | 553 | if (file == (struct _buf *)-1) |
ec06701b | 554 | return (ENOENT); |
0d54dcb5 | 555 | |
ec06701b AX |
556 | error = kobj_get_filesize(file, &size); |
557 | if (error) { | |
0d54dcb5 | 558 | kobj_close_file(file); |
ec06701b | 559 | return (error); |
0d54dcb5 DH |
560 | } |
561 | ||
562 | if (size < sizeof(HW_HOSTID_MASK)) { | |
0d54dcb5 | 563 | kobj_close_file(file); |
ec06701b | 564 | return (EINVAL); |
0d54dcb5 DH |
565 | } |
566 | ||
ec06701b AX |
567 | /* |
568 | * Read directly into the variable like eglibc does. | |
569 | * Short reads are okay; native behavior is preserved. | |
570 | */ | |
571 | error = kobj_read_file(file, (char *)&value, sizeof(value), 0); | |
572 | if (error < 0) { | |
0d54dcb5 | 573 | kobj_close_file(file); |
ec06701b | 574 | return (EIO); |
0d54dcb5 DH |
575 | } |
576 | ||
577 | /* Mask down to 32 bits like coreutils does. */ | |
ec06701b | 578 | *hostid = (value & HW_HOSTID_MASK); |
0d54dcb5 | 579 | kobj_close_file(file); |
ec06701b | 580 | |
0d54dcb5 DH |
581 | return 0; |
582 | } | |
583 | ||
ec06701b AX |
584 | /* |
585 | * Return the system hostid. Preferentially use the spl_hostid module option | |
586 | * when set, otherwise use the value in the /etc/hostid file. | |
587 | */ | |
99639e4a BB |
588 | uint32_t |
589 | zone_get_hostid(void *zone) | |
590 | { | |
ec06701b | 591 | uint32_t hostid; |
99639e4a | 592 | |
ec06701b | 593 | ASSERT3P(zone, ==, NULL); |
a9f2397e | 594 | |
ec06701b AX |
595 | if (spl_hostid != 0) |
596 | return ((uint32_t)(spl_hostid & HW_HOSTID_MASK)); | |
a9f2397e | 597 | |
ec06701b AX |
598 | if (hostid_read(&hostid) == 0) |
599 | return (hostid); | |
10946b02 | 600 | |
ec06701b | 601 | return (0); |
99639e4a BB |
602 | } |
603 | EXPORT_SYMBOL(zone_get_hostid); | |
604 | ||
d1ff2312 | 605 | static int |
10946b02 | 606 | spl_kvmem_init(void) |
d1ff2312 | 607 | { |
10946b02 AX |
608 | int rc = 0; |
609 | ||
610 | rc = spl_kmem_init(); | |
611 | if (rc) | |
ec06701b | 612 | return (rc); |
034f1b33 | 613 | |
10946b02 | 614 | rc = spl_vmem_init(); |
ec06701b AX |
615 | if (rc) { |
616 | spl_kmem_fini(); | |
617 | return (rc); | |
618 | } | |
10946b02 AX |
619 | |
620 | return (rc); | |
ec06701b AX |
621 | } |
622 | ||
623 | /* | |
624 | * We initialize the random number generator with 128 bits of entropy from the | |
625 | * system random number generator. In the improbable case that we have a zero | |
626 | * seed, we fallback to the system jiffies, unless it is also zero, in which | |
627 | * situation we use a preprogrammed seed. We step forward by 2^64 iterations to | |
628 | * initialize each of the per-cpu seeds so that the sequences generated on each | |
629 | * CPU are guaranteed to never overlap in practice. | |
630 | */ | |
631 | static void __init | |
632 | spl_random_init(void) | |
633 | { | |
634 | uint64_t s[2]; | |
635 | int i; | |
636 | ||
637 | get_random_bytes(s, sizeof (s)); | |
638 | ||
639 | if (s[0] == 0 && s[1] == 0) { | |
640 | if (jiffies != 0) { | |
641 | s[0] = jiffies; | |
642 | s[1] = ~0 - jiffies; | |
643 | } else { | |
644 | (void) memcpy(s, "improbable seed", sizeof (s)); | |
645 | } | |
646 | printk("SPL: get_random_bytes() returned 0 " | |
647 | "when generating random seed. Setting initial seed to " | |
648 | "0x%016llx%016llx.", cpu_to_be64(s[0]), cpu_to_be64(s[1])); | |
649 | } | |
650 | ||
651 | for_each_possible_cpu(i) { | |
652 | uint64_t *wordp = per_cpu(spl_pseudo_entropy, i); | |
653 | ||
654 | spl_rand_jump(s); | |
655 | ||
656 | wordp[0] = s[0]; | |
657 | wordp[1] = s[1]; | |
658 | } | |
10946b02 | 659 | } |
d1ff2312 | 660 | |
10946b02 AX |
661 | static void |
662 | spl_kvmem_fini(void) | |
663 | { | |
10946b02 AX |
664 | spl_vmem_fini(); |
665 | spl_kmem_fini(); | |
d1ff2312 | 666 | } |
d1ff2312 | 667 | |
10946b02 AX |
668 | static int __init |
669 | spl_init(void) | |
57d1b188 | 670 | { |
671 | int rc = 0; | |
f23e92fa | 672 | |
ac9cc135 | 673 | bzero(&p0, sizeof (proc_t)); |
ec06701b | 674 | spl_random_init(); |
ac9cc135 | 675 | |
10946b02 AX |
676 | if ((rc = spl_kvmem_init())) |
677 | goto out1; | |
8d0f1ee9 | 678 | |
9ab1ac14 | 679 | if ((rc = spl_mutex_init())) |
10946b02 | 680 | goto out2; |
9ab1ac14 | 681 | |
d28db80f | 682 | if ((rc = spl_rw_init())) |
10946b02 | 683 | goto out3; |
8d0f1ee9 | 684 | |
ec06701b | 685 | if ((rc = spl_tsd_init())) |
10946b02 | 686 | goto out4; |
af828292 | 687 | |
ec06701b | 688 | if ((rc = spl_taskq_init())) |
10946b02 | 689 | goto out5; |
04a479f7 | 690 | |
ec06701b | 691 | if ((rc = spl_kmem_cache_init())) |
10946b02 | 692 | goto out6; |
e9cb2b4f | 693 | |
ec06701b | 694 | if ((rc = spl_vn_init())) |
10946b02 | 695 | goto out7; |
d28db80f | 696 | |
ec06701b | 697 | if ((rc = spl_proc_init())) |
10946b02 | 698 | goto out8; |
9fe45dc1 | 699 | |
ec06701b | 700 | if ((rc = spl_kstat_init())) |
10946b02 | 701 | goto out9; |
12ff95ff | 702 | |
ec06701b AX |
703 | if ((rc = spl_zlib_init())) |
704 | goto out10; | |
705 | ||
a9f2397e ED |
706 | printk(KERN_NOTICE "SPL: Loaded module v%s-%s%s\n", SPL_META_VERSION, |
707 | SPL_META_RELEASE, SPL_DEBUG_STR); | |
10946b02 AX |
708 | return (rc); |
709 | ||
ec06701b AX |
710 | out10: |
711 | spl_kstat_fini(); | |
9fe45dc1 | 712 | out9: |
ec06701b | 713 | spl_proc_fini(); |
d28db80f | 714 | out8: |
ec06701b | 715 | spl_vn_fini(); |
d28db80f | 716 | out7: |
ec06701b | 717 | spl_kmem_cache_fini(); |
d28db80f | 718 | out6: |
e9cb2b4f | 719 | spl_taskq_fini(); |
ec06701b AX |
720 | out5: |
721 | spl_tsd_fini(); | |
d28db80f BB |
722 | out4: |
723 | spl_rw_fini(); | |
9ab1ac14 | 724 | out3: |
725 | spl_mutex_fini(); | |
8d0f1ee9 | 726 | out2: |
10946b02 | 727 | spl_kvmem_fini(); |
d28db80f | 728 | out1: |
0835057e BB |
729 | printk(KERN_NOTICE "SPL: Failed to Load Solaris Porting Layer " |
730 | "v%s-%s%s, rc = %d\n", SPL_META_VERSION, SPL_META_RELEASE, | |
731 | SPL_DEBUG_STR, rc); | |
10946b02 AX |
732 | |
733 | return (rc); | |
70eadc19 | 734 | } |
735 | ||
10946b02 | 736 | static void __exit |
51a727e9 | 737 | spl_fini(void) |
70eadc19 | 738 | { |
0835057e BB |
739 | printk(KERN_NOTICE "SPL: Unloaded module v%s-%s%s\n", |
740 | SPL_META_VERSION, SPL_META_RELEASE, SPL_DEBUG_STR); | |
1114ae6a | 741 | spl_zlib_fini(); |
1114ae6a BB |
742 | spl_kstat_fini(); |
743 | spl_proc_fini(); | |
12ff95ff | 744 | spl_vn_fini(); |
ec06701b | 745 | spl_kmem_cache_fini(); |
e9cb2b4f | 746 | spl_taskq_fini(); |
ec06701b | 747 | spl_tsd_fini(); |
d28db80f | 748 | spl_rw_fini(); |
2fb9b26a | 749 | spl_mutex_fini(); |
10946b02 | 750 | spl_kvmem_fini(); |
51a727e9 | 751 | } |
51a727e9 | 752 | |
70eadc19 | 753 | module_init(spl_init); |
754 | module_exit(spl_fini); | |
755 | ||
70eadc19 | 756 | MODULE_DESCRIPTION("Solaris Porting Layer"); |
10946b02 AX |
757 | MODULE_AUTHOR(SPL_META_AUTHOR); |
758 | MODULE_LICENSE(SPL_META_LICENSE); | |
33a20369 | 759 | MODULE_VERSION(SPL_META_VERSION "-" SPL_META_RELEASE); |