]>
Commit | Line | Data |
---|---|---|
716154c5 BB |
1 | /*****************************************************************************\ |
2 | * Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC. | |
3 | * Copyright (C) 2007 The Regents of the University of California. | |
4 | * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). | |
5 | * Written by Brian Behlendorf <behlendorf1@llnl.gov>. | |
715f6251 BB |
6 | * UCRL-CODE-235197 |
7 | * | |
716154c5 | 8 | * This file is part of the SPL, Solaris Porting Layer. |
3d6af2dd | 9 | * For details, see <http://zfsonlinux.org/>. |
716154c5 BB |
10 | * |
11 | * The SPL is free software; you can redistribute it and/or modify it | |
12 | * under the terms of the GNU General Public License as published by the | |
13 | * Free Software Foundation; either version 2 of the License, or (at your | |
14 | * option) any later version. | |
715f6251 | 15 | * |
716154c5 | 16 | * The SPL is distributed in the hope that it will be useful, but WITHOUT |
715f6251 BB |
17 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
18 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
19 | * for more details. | |
20 | * | |
21 | * You should have received a copy of the GNU General Public License along | |
716154c5 BB |
22 | * with the SPL. If not, see <http://www.gnu.org/licenses/>. |
23 | ***************************************************************************** | |
24 | * Solaris Porting Layer (SPL) Generic Implementation. | |
25 | \*****************************************************************************/ | |
715f6251 | 26 | |
14c5326c | 27 | #include <sys/sysmacros.h> |
99639e4a | 28 | #include <sys/systeminfo.h> |
af828292 | 29 | #include <sys/vmsystm.h> |
0d54dcb5 | 30 | #include <sys/kobj.h> |
c19c06f3 | 31 | #include <sys/kmem.h> |
e5b9b344 BB |
32 | #include <sys/kmem_cache.h> |
33 | #include <sys/vmem.h> | |
9ab1ac14 | 34 | #include <sys/mutex.h> |
d28db80f | 35 | #include <sys/rwlock.h> |
e9cb2b4f | 36 | #include <sys/taskq.h> |
9fe45dc1 | 37 | #include <sys/tsd.h> |
5c1967eb | 38 | #include <sys/zmod.h> |
8d0f1ee9 | 39 | #include <sys/debug.h> |
57d1b188 | 40 | #include <sys/proc.h> |
04a479f7 | 41 | #include <sys/kstat.h> |
d3126abe | 42 | #include <sys/file.h> |
e5b9b344 | 43 | #include <linux/ctype.h> |
0b43696e RY |
44 | #include <sys/disp.h> |
45 | #include <sys/random.h> | |
f23e92fa | 46 | #include <linux/kmod.h> |
52479ecf | 47 | #include <linux/math64_compat.h> |
ae4c36ad | 48 | #include <linux/proc_compat.h> |
f23e92fa | 49 | |
0835057e | 50 | char spl_version[32] = "SPL v" SPL_META_VERSION "-" SPL_META_RELEASE; |
1a73940d | 51 | EXPORT_SYMBOL(spl_version); |
3561541c | 52 | |
acf0ade3 | 53 | unsigned long spl_hostid = 0; |
f23e92fa | 54 | EXPORT_SYMBOL(spl_hostid); |
fa6f7d8f DH |
55 | module_param(spl_hostid, ulong, 0644); |
56 | MODULE_PARM_DESC(spl_hostid, "The system hostid."); | |
8d0f1ee9 | 57 | |
ae4c36ad | 58 | proc_t p0 = { 0 }; |
f1b59d26 | 59 | EXPORT_SYMBOL(p0); |
70eadc19 | 60 | |
0b43696e RY |
61 | /* |
62 | * Xorshift Pseudo Random Number Generator based on work by Sebastiano Vigna | |
63 | * | |
64 | * "Further scramblings of Marsaglia's xorshift generators" | |
65 | * http://vigna.di.unimi.it/ftp/papers/xorshiftplus.pdf | |
66 | * | |
67 | * random_get_pseudo_bytes() is an API function on Illumos whose sole purpose | |
68 | * is to provide bytes containing random numbers. It is mapped to /dev/urandom | |
69 | * on Illumos, which uses a "FIPS 186-2 algorithm". No user of the SPL's | |
70 | * random_get_pseudo_bytes() needs bytes that are of cryptographic quality, so | |
71 | * we can implement it using a fast PRNG that we seed using Linux' actual | |
72 | * equivalent to random_get_pseudo_bytes(). We do this by providing each CPU | |
73 | * with an independent seed so that all calls to random_get_pseudo_bytes() are | |
74 | * free of atomic instructions. | |
75 | * | |
76 | * A consequence of using a fast PRNG is that using random_get_pseudo_bytes() | |
77 | * to generate words larger than 128 bits will paradoxically be limited to | |
78 | * `2^128 - 1` possibilities. This is because we have a sequence of `2^128 - 1` | |
79 | * 128-bit words and selecting the first will implicitly select the second. If | |
80 | * a caller finds this behavior undesireable, random_get_bytes() should be used | |
81 | * instead. | |
82 | * | |
83 | * XXX: Linux interrupt handlers that trigger within the critical section | |
84 | * formed by `s[1] = xp[1];` and `xp[0] = s[0];` and call this function will | |
85 | * see the same numbers. Nothing in the code currently calls this in an | |
86 | * interrupt handler, so this is considered to be okay. If that becomes a | |
87 | * problem, we could create a set of per-cpu variables for interrupt handlers | |
88 | * and use them when in_interrupt() from linux/preempt_mask.h evaluates to | |
89 | * true. | |
90 | */ | |
91 | static DEFINE_PER_CPU(uint64_t[2], spl_pseudo_entropy); | |
92 | ||
93 | /* | |
94 | * spl_rand_next()/spl_rand_jump() are copied from the following CC-0 licensed | |
95 | * file: | |
96 | * | |
97 | * http://xorshift.di.unimi.it/xorshift128plus.c | |
98 | */ | |
99 | ||
100 | static inline uint64_t | |
101 | spl_rand_next(uint64_t *s) { | |
102 | uint64_t s1 = s[0]; | |
103 | const uint64_t s0 = s[1]; | |
104 | s[0] = s0; | |
105 | s1 ^= s1 << 23; // a | |
106 | s[1] = s1 ^ s0 ^ (s1 >> 18) ^ (s0 >> 5); // b, c | |
107 | return (s[1] + s0); | |
108 | } | |
109 | ||
110 | static inline void | |
111 | spl_rand_jump(uint64_t *s) { | |
112 | static const uint64_t JUMP[] = { 0x8a5cd789635d2dff, 0x121fd2155c472f96 }; | |
113 | ||
114 | uint64_t s0 = 0; | |
115 | uint64_t s1 = 0; | |
116 | int i, b; | |
117 | for(i = 0; i < sizeof JUMP / sizeof *JUMP; i++) | |
118 | for(b = 0; b < 64; b++) { | |
119 | if (JUMP[i] & 1ULL << b) { | |
120 | s0 ^= s[0]; | |
121 | s1 ^= s[1]; | |
122 | } | |
123 | (void) spl_rand_next(s); | |
124 | } | |
125 | ||
126 | s[0] = s0; | |
127 | s[1] = s1; | |
128 | } | |
129 | ||
130 | int | |
131 | random_get_pseudo_bytes(uint8_t *ptr, size_t len) | |
132 | { | |
133 | uint64_t *xp, s[2]; | |
134 | ||
135 | ASSERT(ptr); | |
136 | ||
137 | xp = get_cpu_var(spl_pseudo_entropy); | |
138 | ||
139 | s[0] = xp[0]; | |
140 | s[1] = xp[1]; | |
141 | ||
142 | while (len) { | |
143 | union { | |
144 | uint64_t ui64; | |
145 | uint8_t byte[sizeof (uint64_t)]; | |
146 | }entropy; | |
147 | int i = MIN(len, sizeof (uint64_t)); | |
148 | ||
149 | len -= i; | |
150 | entropy.ui64 = spl_rand_next(s); | |
151 | ||
152 | while (i--) | |
153 | *ptr++ = entropy.byte[i]; | |
154 | } | |
155 | ||
156 | xp[0] = s[0]; | |
157 | xp[1] = s[1]; | |
158 | ||
159 | put_cpu_var(spl_pseudo_entropy); | |
160 | ||
161 | return (0); | |
162 | } | |
163 | ||
164 | ||
165 | EXPORT_SYMBOL(random_get_pseudo_bytes); | |
166 | ||
a4bfd8ea | 167 | #if BITS_PER_LONG == 32 |
b61a6e8b | 168 | /* |
a4bfd8ea BB |
169 | * Support 64/64 => 64 division on a 32-bit platform. While the kernel |
170 | * provides a div64_u64() function for this we do not use it because the | |
171 | * implementation is flawed. There are cases which return incorrect | |
172 | * results as late as linux-2.6.35. Until this is fixed upstream the | |
173 | * spl must provide its own implementation. | |
174 | * | |
175 | * This implementation is a slightly modified version of the algorithm | |
176 | * proposed by the book 'Hacker's Delight'. The original source can be | |
177 | * found here and is available for use without restriction. | |
178 | * | |
179 | * http://www.hackersdelight.org/HDcode/newCode/divDouble.c | |
180 | */ | |
181 | ||
182 | /* | |
183 | * Calculate number of leading of zeros for a 64-bit value. | |
184 | */ | |
185 | static int | |
186 | nlz64(uint64_t x) { | |
187 | register int n = 0; | |
188 | ||
189 | if (x == 0) | |
190 | return 64; | |
191 | ||
192 | if (x <= 0x00000000FFFFFFFFULL) {n = n + 32; x = x << 32;} | |
193 | if (x <= 0x0000FFFFFFFFFFFFULL) {n = n + 16; x = x << 16;} | |
194 | if (x <= 0x00FFFFFFFFFFFFFFULL) {n = n + 8; x = x << 8;} | |
195 | if (x <= 0x0FFFFFFFFFFFFFFFULL) {n = n + 4; x = x << 4;} | |
196 | if (x <= 0x3FFFFFFFFFFFFFFFULL) {n = n + 2; x = x << 2;} | |
197 | if (x <= 0x7FFFFFFFFFFFFFFFULL) {n = n + 1;} | |
198 | ||
199 | return n; | |
200 | } | |
201 | ||
202 | /* | |
203 | * Newer kernels have a div_u64() function but we define our own | |
204 | * to simplify portibility between kernel versions. | |
205 | */ | |
206 | static inline uint64_t | |
207 | __div_u64(uint64_t u, uint32_t v) | |
208 | { | |
209 | (void) do_div(u, v); | |
210 | return u; | |
211 | } | |
212 | ||
213 | /* | |
214 | * Implementation of 64-bit unsigned division for 32-bit machines. | |
215 | * | |
216 | * First the procedure takes care of the case in which the divisor is a | |
217 | * 32-bit quantity. There are two subcases: (1) If the left half of the | |
218 | * dividend is less than the divisor, one execution of do_div() is all that | |
219 | * is required (overflow is not possible). (2) Otherwise it does two | |
220 | * divisions, using the grade school method. | |
b61a6e8b | 221 | */ |
1b4ad25e | 222 | uint64_t |
a4bfd8ea | 223 | __udivdi3(uint64_t u, uint64_t v) |
b61a6e8b | 224 | { |
a4bfd8ea BB |
225 | uint64_t u0, u1, v1, q0, q1, k; |
226 | int n; | |
227 | ||
228 | if (v >> 32 == 0) { // If v < 2**32: | |
229 | if (u >> 32 < v) { // If u/v cannot overflow, | |
230 | return __div_u64(u, v); // just do one division. | |
231 | } else { // If u/v would overflow: | |
232 | u1 = u >> 32; // Break u into two halves. | |
233 | u0 = u & 0xFFFFFFFF; | |
234 | q1 = __div_u64(u1, v); // First quotient digit. | |
235 | k = u1 - q1 * v; // First remainder, < v. | |
236 | u0 += (k << 32); | |
237 | q0 = __div_u64(u0, v); // Seconds quotient digit. | |
238 | return (q1 << 32) + q0; | |
239 | } | |
240 | } else { // If v >= 2**32: | |
241 | n = nlz64(v); // 0 <= n <= 31. | |
242 | v1 = (v << n) >> 32; // Normalize divisor, MSB is 1. | |
243 | u1 = u >> 1; // To ensure no overflow. | |
244 | q1 = __div_u64(u1, v1); // Get quotient from | |
245 | q0 = (q1 << n) >> 31; // Undo normalization and | |
246 | // division of u by 2. | |
247 | if (q0 != 0) // Make q0 correct or | |
248 | q0 = q0 - 1; // too small by 1. | |
249 | if ((u - q0 * v) >= v) | |
250 | q0 = q0 + 1; // Now q0 is correct. | |
ef6f91ce | 251 | |
a4bfd8ea BB |
252 | return q0; |
253 | } | |
550f1705 BB |
254 | } |
255 | EXPORT_SYMBOL(__udivdi3); | |
256 | ||
257 | /* | |
a4bfd8ea BB |
258 | * Implementation of 64-bit signed division for 32-bit machines. |
259 | */ | |
260 | int64_t | |
261 | __divdi3(int64_t u, int64_t v) | |
262 | { | |
263 | int64_t q, t; | |
264 | q = __udivdi3(abs64(u), abs64(v)); | |
265 | t = (u ^ v) >> 63; // If u, v have different | |
266 | return (q ^ t) - t; // signs, negate q. | |
267 | } | |
268 | EXPORT_SYMBOL(__divdi3); | |
269 | ||
270 | /* | |
271 | * Implementation of 64-bit unsigned modulo for 32-bit machines. | |
550f1705 | 272 | */ |
1b4ad25e AZ |
273 | uint64_t |
274 | __umoddi3(uint64_t dividend, uint64_t divisor) | |
550f1705 | 275 | { |
1b4ad25e | 276 | return (dividend - (divisor * __udivdi3(dividend, divisor))); |
b61a6e8b | 277 | } |
550f1705 | 278 | EXPORT_SYMBOL(__umoddi3); |
a4bfd8ea | 279 | |
ef6f91ce JL |
280 | #if defined(__arm) || defined(__arm__) |
281 | /* | |
93b0dc92 JL |
282 | * Implementation of 64-bit (un)signed division for 32-bit arm machines. |
283 | * | |
284 | * Run-time ABI for the ARM Architecture (page 20). A pair of (unsigned) | |
285 | * long longs is returned in {{r0, r1}, {r2,r3}}, the quotient in {r0, r1}, | |
286 | * and the remainder in {r2, r3}. The return type is specifically left | |
287 | * set to 'void' to ensure the compiler does not overwrite these registers | |
288 | * during the return. All results are in registers as per ABI | |
ef6f91ce | 289 | */ |
93b0dc92 | 290 | void |
ef6f91ce JL |
291 | __aeabi_uldivmod(uint64_t u, uint64_t v) |
292 | { | |
93b0dc92 JL |
293 | uint64_t res; |
294 | uint64_t mod; | |
295 | ||
296 | res = __udivdi3(u, v); | |
297 | mod = __umoddi3(u, v); | |
298 | { | |
299 | register uint32_t r0 asm("r0") = (res & 0xFFFFFFFF); | |
300 | register uint32_t r1 asm("r1") = (res >> 32); | |
301 | register uint32_t r2 asm("r2") = (mod & 0xFFFFFFFF); | |
302 | register uint32_t r3 asm("r3") = (mod >> 32); | |
303 | ||
304 | asm volatile("" | |
305 | : "+r"(r0), "+r"(r1), "+r"(r2),"+r"(r3) /* output */ | |
306 | : "r"(r0), "r"(r1), "r"(r2), "r"(r3)); /* input */ | |
307 | ||
308 | return; /* r0; */ | |
309 | } | |
ef6f91ce JL |
310 | } |
311 | EXPORT_SYMBOL(__aeabi_uldivmod); | |
312 | ||
93b0dc92 | 313 | void |
ef6f91ce JL |
314 | __aeabi_ldivmod(int64_t u, int64_t v) |
315 | { | |
93b0dc92 JL |
316 | int64_t res; |
317 | uint64_t mod; | |
318 | ||
319 | res = __divdi3(u, v); | |
320 | mod = __umoddi3(u, v); | |
321 | { | |
322 | register uint32_t r0 asm("r0") = (res & 0xFFFFFFFF); | |
323 | register uint32_t r1 asm("r1") = (res >> 32); | |
324 | register uint32_t r2 asm("r2") = (mod & 0xFFFFFFFF); | |
325 | register uint32_t r3 asm("r3") = (mod >> 32); | |
326 | ||
327 | asm volatile("" | |
328 | : "+r"(r0), "+r"(r1), "+r"(r2),"+r"(r3) /* output */ | |
329 | : "r"(r0), "r"(r1), "r"(r2), "r"(r3)); /* input */ | |
330 | ||
331 | return; /* r0; */ | |
332 | } | |
ef6f91ce JL |
333 | } |
334 | EXPORT_SYMBOL(__aeabi_ldivmod); | |
335 | #endif /* __arm || __arm__ */ | |
96dded38 | 336 | #endif /* BITS_PER_LONG */ |
b61a6e8b | 337 | |
b871b8cd BB |
338 | /* NOTE: The strtoxx behavior is solely based on my reading of the Solaris |
339 | * ddi_strtol(9F) man page. I have not verified the behavior of these | |
340 | * functions against their Solaris counterparts. It is possible that I | |
96dded38 | 341 | * may have misinterpreted the man page or the man page is incorrect. |
b871b8cd | 342 | */ |
2ee63a54 BB |
343 | int ddi_strtoul(const char *, char **, int, unsigned long *); |
344 | int ddi_strtol(const char *, char **, int, long *); | |
345 | int ddi_strtoull(const char *, char **, int, unsigned long long *); | |
346 | int ddi_strtoll(const char *, char **, int, long long *); | |
347 | ||
348 | #define define_ddi_strtoux(type, valtype) \ | |
349 | int ddi_strtou##type(const char *str, char **endptr, \ | |
b871b8cd | 350 | int base, valtype *result) \ |
2ee63a54 | 351 | { \ |
b871b8cd BB |
352 | valtype last_value, value = 0; \ |
353 | char *ptr = (char *)str; \ | |
354 | int flag = 1, digit; \ | |
355 | \ | |
356 | if (strlen(ptr) == 0) \ | |
357 | return EINVAL; \ | |
358 | \ | |
359 | /* Auto-detect base based on prefix */ \ | |
360 | if (!base) { \ | |
361 | if (str[0] == '0') { \ | |
362 | if (tolower(str[1])=='x' && isxdigit(str[2])) { \ | |
363 | base = 16; /* hex */ \ | |
364 | ptr += 2; \ | |
365 | } else if (str[1] >= '0' && str[1] < 8) { \ | |
366 | base = 8; /* octal */ \ | |
367 | ptr += 1; \ | |
368 | } else { \ | |
369 | return EINVAL; \ | |
370 | } \ | |
371 | } else { \ | |
372 | base = 10; /* decimal */ \ | |
373 | } \ | |
374 | } \ | |
375 | \ | |
376 | while (1) { \ | |
377 | if (isdigit(*ptr)) \ | |
378 | digit = *ptr - '0'; \ | |
379 | else if (isalpha(*ptr)) \ | |
380 | digit = tolower(*ptr) - 'a' + 10; \ | |
381 | else \ | |
382 | break; \ | |
383 | \ | |
384 | if (digit >= base) \ | |
385 | break; \ | |
2ee63a54 | 386 | \ |
b871b8cd BB |
387 | last_value = value; \ |
388 | value = value * base + digit; \ | |
389 | if (last_value > value) /* Overflow */ \ | |
390 | return ERANGE; \ | |
2ee63a54 | 391 | \ |
b871b8cd BB |
392 | flag = 1; \ |
393 | ptr++; \ | |
2ee63a54 BB |
394 | } \ |
395 | \ | |
b871b8cd BB |
396 | if (flag) \ |
397 | *result = value; \ | |
398 | \ | |
399 | if (endptr) \ | |
400 | *endptr = (char *)(flag ? ptr : str); \ | |
401 | \ | |
402 | return 0; \ | |
2ee63a54 BB |
403 | } \ |
404 | ||
405 | #define define_ddi_strtox(type, valtype) \ | |
406 | int ddi_strto##type(const char *str, char **endptr, \ | |
407 | int base, valtype *result) \ | |
b871b8cd BB |
408 | { \ |
409 | int rc; \ | |
2ee63a54 BB |
410 | \ |
411 | if (*str == '-') { \ | |
b871b8cd BB |
412 | rc = ddi_strtou##type(str + 1, endptr, base, result); \ |
413 | if (!rc) { \ | |
414 | if (*endptr == str + 1) \ | |
415 | *endptr = (char *)str; \ | |
416 | else \ | |
417 | *result = -*result; \ | |
418 | } \ | |
2ee63a54 | 419 | } else { \ |
b871b8cd | 420 | rc = ddi_strtou##type(str, endptr, base, result); \ |
2ee63a54 BB |
421 | } \ |
422 | \ | |
b871b8cd BB |
423 | return rc; \ |
424 | } | |
2ee63a54 BB |
425 | |
426 | define_ddi_strtoux(l, unsigned long) | |
427 | define_ddi_strtox(l, long) | |
428 | define_ddi_strtoux(ll, unsigned long long) | |
429 | define_ddi_strtox(ll, long long) | |
430 | ||
2f5d55aa | 431 | EXPORT_SYMBOL(ddi_strtoul); |
2ee63a54 BB |
432 | EXPORT_SYMBOL(ddi_strtol); |
433 | EXPORT_SYMBOL(ddi_strtoll); | |
434 | EXPORT_SYMBOL(ddi_strtoull); | |
2f5d55aa | 435 | |
d3126abe BB |
436 | int |
437 | ddi_copyin(const void *from, void *to, size_t len, int flags) | |
438 | { | |
439 | /* Fake ioctl() issued by kernel, 'from' is a kernel address */ | |
440 | if (flags & FKIOCTL) { | |
441 | memcpy(to, from, len); | |
442 | return 0; | |
443 | } | |
444 | ||
445 | return copyin(from, to, len); | |
446 | } | |
447 | EXPORT_SYMBOL(ddi_copyin); | |
448 | ||
449 | int | |
450 | ddi_copyout(const void *from, void *to, size_t len, int flags) | |
451 | { | |
452 | /* Fake ioctl() issued by kernel, 'from' is a kernel address */ | |
453 | if (flags & FKIOCTL) { | |
454 | memcpy(to, from, len); | |
455 | return 0; | |
456 | } | |
457 | ||
458 | return copyout(from, to, len); | |
459 | } | |
460 | EXPORT_SYMBOL(ddi_copyout); | |
461 | ||
e811949a BB |
462 | #ifndef HAVE_PUT_TASK_STRUCT |
463 | /* | |
464 | * This is only a stub function which should never be used. The SPL should | |
465 | * never be putting away the last reference on a task structure so this will | |
466 | * not be called. However, we still need to define it so the module does not | |
467 | * have undefined symbol at load time. That all said if this impossible | |
55abb092 | 468 | * thing does somehow happen PANIC immediately so we know about it. |
e811949a BB |
469 | */ |
470 | void | |
471 | __put_task_struct(struct task_struct *t) | |
472 | { | |
55abb092 | 473 | PANIC("Unexpectly put last reference on task %d\n", (int)t->pid); |
e811949a BB |
474 | } |
475 | EXPORT_SYMBOL(__put_task_struct); | |
476 | #endif /* HAVE_PUT_TASK_STRUCT */ | |
477 | ||
0d54dcb5 DH |
478 | /* |
479 | * Read the unique system identifier from the /etc/hostid file. | |
480 | * | |
481 | * The behavior of /usr/bin/hostid on Linux systems with the | |
482 | * regular eglibc and coreutils is: | |
483 | * | |
484 | * 1. Generate the value if the /etc/hostid file does not exist | |
485 | * or if the /etc/hostid file is less than four bytes in size. | |
486 | * | |
487 | * 2. If the /etc/hostid file is at least 4 bytes, then return | |
488 | * the first four bytes [0..3] in native endian order. | |
489 | * | |
490 | * 3. Always ignore bytes [4..] if they exist in the file. | |
491 | * | |
492 | * Only the first four bytes are significant, even on systems that | |
493 | * have a 64-bit word size. | |
494 | * | |
495 | * See: | |
496 | * | |
497 | * eglibc: sysdeps/unix/sysv/linux/gethostid.c | |
498 | * coreutils: src/hostid.c | |
499 | * | |
500 | * Notes: | |
501 | * | |
502 | * The /etc/hostid file on Solaris is a text file that often reads: | |
503 | * | |
504 | * # DO NOT EDIT | |
505 | * "0123456789" | |
506 | * | |
507 | * Directly copying this file to Linux results in a constant | |
508 | * hostid of 4f442023 because the default comment constitutes | |
509 | * the first four bytes of the file. | |
510 | * | |
511 | */ | |
512 | ||
513 | char *spl_hostid_path = HW_HOSTID_PATH; | |
514 | module_param(spl_hostid_path, charp, 0444); | |
515 | MODULE_PARM_DESC(spl_hostid_path, "The system hostid file (/etc/hostid)"); | |
516 | ||
517 | static int | |
518 | hostid_read(void) | |
519 | { | |
520 | int result; | |
521 | uint64_t size; | |
522 | struct _buf *file; | |
acf0ade3 | 523 | uint32_t hostid = 0; |
0d54dcb5 DH |
524 | |
525 | file = kobj_open_file(spl_hostid_path); | |
526 | ||
6b3b569d | 527 | if (file == (struct _buf *)-1) |
0d54dcb5 | 528 | return -1; |
0d54dcb5 DH |
529 | |
530 | result = kobj_get_filesize(file, &size); | |
531 | ||
532 | if (result != 0) { | |
533 | printk(KERN_WARNING | |
534 | "SPL: kobj_get_filesize returned %i on %s\n", | |
535 | result, spl_hostid_path); | |
536 | kobj_close_file(file); | |
537 | return -2; | |
538 | } | |
539 | ||
540 | if (size < sizeof(HW_HOSTID_MASK)) { | |
541 | printk(KERN_WARNING | |
542 | "SPL: Ignoring the %s file because it is %llu bytes; " | |
e8267acd BB |
543 | "expecting %lu bytes instead.\n", spl_hostid_path, |
544 | size, (unsigned long)sizeof(HW_HOSTID_MASK)); | |
0d54dcb5 DH |
545 | kobj_close_file(file); |
546 | return -3; | |
547 | } | |
548 | ||
549 | /* Read directly into the variable like eglibc does. */ | |
550 | /* Short reads are okay; native behavior is preserved. */ | |
551 | result = kobj_read_file(file, (char *)&hostid, sizeof(hostid), 0); | |
552 | ||
553 | if (result < 0) { | |
554 | printk(KERN_WARNING | |
555 | "SPL: kobj_read_file returned %i on %s\n", | |
556 | result, spl_hostid_path); | |
557 | kobj_close_file(file); | |
558 | return -4; | |
559 | } | |
560 | ||
561 | /* Mask down to 32 bits like coreutils does. */ | |
562 | spl_hostid = hostid & HW_HOSTID_MASK; | |
563 | kobj_close_file(file); | |
564 | return 0; | |
565 | } | |
566 | ||
99639e4a BB |
567 | uint32_t |
568 | zone_get_hostid(void *zone) | |
569 | { | |
a9f2397e | 570 | static int first = 1; |
99639e4a BB |
571 | |
572 | /* Only the global zone is supported */ | |
573 | ASSERT(zone == NULL); | |
574 | ||
a9f2397e ED |
575 | if (first) { |
576 | first = 0; | |
577 | ||
086476f9 | 578 | spl_hostid &= HW_HOSTID_MASK; |
a9f2397e ED |
579 | /* |
580 | * Get the hostid if it was not passed as a module parameter. | |
acf0ade3 | 581 | * Try reading the /etc/hostid file directly. |
a9f2397e | 582 | */ |
086476f9 | 583 | if (spl_hostid == 0 && hostid_read()) |
acf0ade3 | 584 | spl_hostid = 0; |
a9f2397e | 585 | |
086476f9 | 586 | |
a9f2397e ED |
587 | printk(KERN_NOTICE "SPL: using hostid 0x%08x\n", |
588 | (unsigned int) spl_hostid); | |
589 | } | |
590 | ||
acf0ade3 | 591 | return spl_hostid; |
99639e4a BB |
592 | } |
593 | EXPORT_SYMBOL(zone_get_hostid); | |
594 | ||
e5b9b344 BB |
595 | static int |
596 | spl_kvmem_init(void) | |
597 | { | |
598 | int rc = 0; | |
599 | ||
600 | rc = spl_kmem_init(); | |
601 | if (rc) | |
16522ac2 | 602 | return (rc); |
e5b9b344 BB |
603 | |
604 | rc = spl_vmem_init(); | |
16522ac2 CC |
605 | if (rc) { |
606 | spl_kmem_fini(); | |
607 | return (rc); | |
608 | } | |
e5b9b344 | 609 | |
e5b9b344 BB |
610 | return (rc); |
611 | } | |
612 | ||
0b43696e RY |
613 | /* |
614 | * We initialize the random number generator with 128 bits of entropy from the | |
615 | * system random number generator. In the improbable case that we have a zero | |
616 | * seed, we fallback to the system jiffies, unless it is also zero, in which | |
617 | * situation we use a preprogrammed seed. We step forward by 2^64 iterations to | |
618 | * initialize each of the per-cpu seeds so that the sequences generated on each | |
619 | * CPU are guaranteed to never overlap in practice. | |
620 | */ | |
621 | static void __init | |
622 | spl_random_init(void) | |
623 | { | |
624 | uint64_t s[2]; | |
625 | int i; | |
626 | ||
627 | get_random_bytes(s, sizeof (s)); | |
628 | ||
629 | if (s[0] == 0 && s[1] == 0) { | |
630 | if (jiffies != 0) { | |
631 | s[0] = jiffies; | |
632 | s[1] = ~0 - jiffies; | |
633 | } else { | |
634 | (void) memcpy(s, "improbable seed", sizeof (s)); | |
635 | } | |
636 | printk("SPL: get_random_bytes() returned 0 " | |
637 | "when generating random seed. Setting initial seed to " | |
638 | "0x%016llx%016llx.", cpu_to_be64(s[0]), cpu_to_be64(s[1])); | |
639 | } | |
640 | ||
641 | for (i = 0; i < NR_CPUS; i++) { | |
642 | uint64_t *wordp = per_cpu(spl_pseudo_entropy, i); | |
643 | ||
644 | spl_rand_jump(s); | |
645 | ||
646 | wordp[0] = s[0]; | |
647 | wordp[1] = s[1]; | |
648 | } | |
649 | } | |
650 | ||
e5b9b344 BB |
651 | static void |
652 | spl_kvmem_fini(void) | |
653 | { | |
e5b9b344 BB |
654 | spl_vmem_fini(); |
655 | spl_kmem_fini(); | |
656 | } | |
657 | ||
c1bc8e61 BB |
658 | static int __init |
659 | spl_init(void) | |
57d1b188 BB |
660 | { |
661 | int rc = 0; | |
f23e92fa | 662 | |
0b43696e RY |
663 | spl_random_init(); |
664 | ||
e5b9b344 | 665 | if ((rc = spl_kvmem_init())) |
8d9a23e8 | 666 | goto out1; |
8d0f1ee9 | 667 | |
9ab1ac14 | 668 | if ((rc = spl_mutex_init())) |
8d9a23e8 | 669 | goto out2; |
9ab1ac14 | 670 | |
d28db80f | 671 | if ((rc = spl_rw_init())) |
8d9a23e8 | 672 | goto out3; |
8d0f1ee9 | 673 | |
16522ac2 | 674 | if ((rc = spl_tsd_init())) |
8d9a23e8 | 675 | goto out4; |
af828292 | 676 | |
16522ac2 | 677 | if ((rc = spl_taskq_init())) |
8d9a23e8 | 678 | goto out5; |
04a479f7 | 679 | |
16522ac2 | 680 | if ((rc = spl_kmem_cache_init())) |
8d9a23e8 | 681 | goto out6; |
e9cb2b4f | 682 | |
16522ac2 | 683 | if ((rc = spl_vn_init())) |
8d9a23e8 | 684 | goto out7; |
d28db80f | 685 | |
16522ac2 | 686 | if ((rc = spl_proc_init())) |
8d9a23e8 | 687 | goto out8; |
9fe45dc1 | 688 | |
16522ac2 | 689 | if ((rc = spl_kstat_init())) |
8d9a23e8 | 690 | goto out9; |
5c1967eb | 691 | |
16522ac2 CC |
692 | if ((rc = spl_zlib_init())) |
693 | goto out10; | |
694 | ||
a9f2397e ED |
695 | printk(KERN_NOTICE "SPL: Loaded module v%s-%s%s\n", SPL_META_VERSION, |
696 | SPL_META_RELEASE, SPL_DEBUG_STR); | |
8d9a23e8 | 697 | return (rc); |
44778f41 | 698 | |
16522ac2 CC |
699 | out10: |
700 | spl_kstat_fini(); | |
9fe45dc1 | 701 | out9: |
16522ac2 | 702 | spl_proc_fini(); |
d28db80f | 703 | out8: |
16522ac2 | 704 | spl_vn_fini(); |
d28db80f | 705 | out7: |
16522ac2 | 706 | spl_kmem_cache_fini(); |
d28db80f | 707 | out6: |
e9cb2b4f | 708 | spl_taskq_fini(); |
16522ac2 CC |
709 | out5: |
710 | spl_tsd_fini(); | |
d28db80f BB |
711 | out4: |
712 | spl_rw_fini(); | |
9ab1ac14 BB |
713 | out3: |
714 | spl_mutex_fini(); | |
8d0f1ee9 | 715 | out2: |
e5b9b344 | 716 | spl_kvmem_fini(); |
d28db80f | 717 | out1: |
0835057e BB |
718 | printk(KERN_NOTICE "SPL: Failed to Load Solaris Porting Layer " |
719 | "v%s-%s%s, rc = %d\n", SPL_META_VERSION, SPL_META_RELEASE, | |
720 | SPL_DEBUG_STR, rc); | |
8d9a23e8 | 721 | |
c1bc8e61 | 722 | return (rc); |
70eadc19 BB |
723 | } |
724 | ||
c1bc8e61 | 725 | static void __exit |
51a727e9 | 726 | spl_fini(void) |
70eadc19 | 727 | { |
0835057e BB |
728 | printk(KERN_NOTICE "SPL: Unloaded module v%s-%s%s\n", |
729 | SPL_META_VERSION, SPL_META_RELEASE, SPL_DEBUG_STR); | |
1114ae6a | 730 | spl_zlib_fini(); |
1114ae6a BB |
731 | spl_kstat_fini(); |
732 | spl_proc_fini(); | |
12ff95ff | 733 | spl_vn_fini(); |
16522ac2 | 734 | spl_kmem_cache_fini(); |
e9cb2b4f | 735 | spl_taskq_fini(); |
16522ac2 | 736 | spl_tsd_fini(); |
d28db80f | 737 | spl_rw_fini(); |
2fb9b26a | 738 | spl_mutex_fini(); |
e5b9b344 | 739 | spl_kvmem_fini(); |
70eadc19 BB |
740 | } |
741 | ||
742 | module_init(spl_init); | |
743 | module_exit(spl_fini); | |
744 | ||
70eadc19 | 745 | MODULE_DESCRIPTION("Solaris Porting Layer"); |
62032954 BB |
746 | MODULE_AUTHOR(SPL_META_AUTHOR); |
747 | MODULE_LICENSE(SPL_META_LICENSE); | |
921a35ad | 748 | MODULE_VERSION(SPL_META_VERSION "-" SPL_META_RELEASE); |