]>
Commit | Line | Data |
---|---|---|
34dc7c2f BB |
1 | /* |
2 | * CDDL HEADER START | |
3 | * | |
4 | * The contents of this file are subject to the terms of the | |
5 | * Common Development and Distribution License (the "License"). | |
6 | * You may not use this file except in compliance with the License. | |
7 | * | |
8 | * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | |
9 | * or http://www.opensolaris.org/os/licensing. | |
10 | * See the License for the specific language governing permissions | |
11 | * and limitations under the License. | |
12 | * | |
13 | * When distributing Covered Code, include this CDDL HEADER in each | |
14 | * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | |
15 | * If applicable, add the following below this CDDL HEADER, with the | |
16 | * fields enclosed by brackets "[]" replaced with your own identifying | |
17 | * information: Portions Copyright [yyyy] [name of copyright owner] | |
18 | * | |
19 | * CDDL HEADER END | |
20 | */ | |
21 | /* | |
572e2857 | 22 | * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. |
34dc7c2f BB |
23 | */ |
24 | ||
34dc7c2f BB |
25 | #include <assert.h> |
26 | #include <fcntl.h> | |
27 | #include <poll.h> | |
28 | #include <stdio.h> | |
29 | #include <stdlib.h> | |
30 | #include <string.h> | |
31 | #include <zlib.h> | |
1e33ac1e | 32 | #include <sys/signal.h> |
34dc7c2f BB |
33 | #include <sys/spa.h> |
34 | #include <sys/stat.h> | |
35 | #include <sys/processor.h> | |
36 | #include <sys/zfs_context.h> | |
34dc7c2f | 37 | #include <sys/utsname.h> |
d164b209 | 38 | #include <sys/systeminfo.h> |
34dc7c2f BB |
39 | |
40 | /* | |
41 | * Emulation of kernel services in userland. | |
42 | */ | |
43 | ||
428870ff | 44 | int aok; |
34dc7c2f BB |
45 | uint64_t physmem; |
46 | vnode_t *rootdir = (vnode_t *)0xabcd1234; | |
d164b209 | 47 | char hw_serial[HW_HOSTID_LEN]; |
34dc7c2f BB |
48 | |
49 | struct utsname utsname = { | |
50 | "userland", "libzpool", "1", "1", "na" | |
51 | }; | |
52 | ||
428870ff BB |
53 | /* this only exists to have its address taken */ |
54 | struct proc p0; | |
55 | ||
34dc7c2f BB |
56 | /* |
57 | * ========================================================================= | |
58 | * threads | |
59 | * ========================================================================= | |
60 | */ | |
1e33ac1e BB |
61 | |
62 | pthread_cond_t kthread_cond = PTHREAD_COND_INITIALIZER; | |
63 | pthread_mutex_t kthread_lock = PTHREAD_MUTEX_INITIALIZER; | |
64 | pthread_key_t kthread_key; | |
65 | int kthread_nr = 0; | |
66 | ||
67 | static void | |
68 | thread_init(void) | |
69 | { | |
70 | kthread_t *kt; | |
71 | ||
72 | VERIFY3S(pthread_key_create(&kthread_key, NULL), ==, 0); | |
73 | ||
74 | /* Create entry for primary kthread */ | |
75 | kt = umem_zalloc(sizeof(kthread_t), UMEM_NOFAIL); | |
76 | kt->t_tid = pthread_self(); | |
77 | kt->t_func = NULL; | |
78 | ||
79 | VERIFY3S(pthread_setspecific(kthread_key, kt), ==, 0); | |
80 | ||
81 | /* Only the main thread should be running at the moment */ | |
82 | ASSERT3S(kthread_nr, ==, 0); | |
83 | kthread_nr = 1; | |
84 | } | |
85 | ||
86 | static void | |
87 | thread_fini(void) | |
88 | { | |
89 | kthread_t *kt = curthread; | |
90 | ||
91 | ASSERT(pthread_equal(kt->t_tid, pthread_self())); | |
92 | ASSERT3P(kt->t_func, ==, NULL); | |
93 | ||
94 | umem_free(kt, sizeof(kthread_t)); | |
95 | ||
96 | /* Wait for all threads to exit via thread_exit() */ | |
97 | VERIFY3S(pthread_mutex_lock(&kthread_lock), ==, 0); | |
98 | ||
99 | kthread_nr--; /* Main thread is exiting */ | |
100 | ||
101 | while (kthread_nr > 0) | |
102 | VERIFY3S(pthread_cond_wait(&kthread_cond, &kthread_lock), ==, | |
103 | 0); | |
104 | ||
105 | ASSERT3S(kthread_nr, ==, 0); | |
106 | VERIFY3S(pthread_mutex_unlock(&kthread_lock), ==, 0); | |
107 | ||
108 | VERIFY3S(pthread_key_delete(kthread_key), ==, 0); | |
109 | } | |
110 | ||
34dc7c2f | 111 | kthread_t * |
1e33ac1e BB |
112 | zk_thread_current(void) |
113 | { | |
114 | kthread_t *kt = pthread_getspecific(kthread_key); | |
115 | ||
116 | ASSERT3P(kt, !=, NULL); | |
117 | ||
118 | return kt; | |
119 | } | |
120 | ||
121 | void * | |
122 | zk_thread_helper(void *arg) | |
34dc7c2f | 123 | { |
1e33ac1e BB |
124 | kthread_t *kt = (kthread_t *) arg; |
125 | ||
126 | VERIFY3S(pthread_setspecific(kthread_key, kt), ==, 0); | |
34dc7c2f | 127 | |
1e33ac1e BB |
128 | VERIFY3S(pthread_mutex_lock(&kthread_lock), ==, 0); |
129 | kthread_nr++; | |
130 | VERIFY3S(pthread_mutex_unlock(&kthread_lock), ==, 0); | |
34dc7c2f | 131 | |
1e33ac1e BB |
132 | kt->t_tid = pthread_self(); |
133 | ((thread_func_arg_t) kt->t_func)(kt->t_arg); | |
134 | ||
135 | /* Unreachable, thread must exit with thread_exit() */ | |
136 | abort(); | |
137 | ||
138 | return NULL; | |
139 | } | |
140 | ||
141 | kthread_t * | |
142 | zk_thread_create(caddr_t stk, size_t stksize, thread_func_t func, void *arg, | |
143 | size_t len, proc_t *pp, int state, pri_t pri) | |
144 | { | |
145 | kthread_t *kt; | |
146 | pthread_attr_t attr; | |
147 | size_t stack; | |
148 | ||
149 | ASSERT3S(state & ~TS_RUN, ==, 0); | |
150 | ||
151 | kt = umem_zalloc(sizeof(kthread_t), UMEM_NOFAIL); | |
152 | kt->t_func = func; | |
153 | kt->t_arg = arg; | |
154 | ||
155 | /* | |
156 | * The Solaris kernel stack size is 24k for x86/x86_64. | |
157 | * The Linux kernel stack size is 8k for x86/x86_64. | |
158 | * | |
159 | * We reduce the default stack size in userspace, to ensure | |
160 | * we observe stack overruns in user space as well as in | |
161 | * kernel space. PTHREAD_STACK_MIN is the minimum stack | |
162 | * required for a NULL procedure in user space and is added | |
163 | * in to the stack requirements. | |
164 | * | |
165 | * Some buggy NPTL threading implementations include the | |
166 | * guard area within the stack size allocations. In | |
167 | * this case we allocate an extra page to account for the | |
168 | * guard area since we only have two pages of usable stack | |
169 | * on Linux. | |
170 | */ | |
171 | ||
172 | stack = PTHREAD_STACK_MIN + MAX(stksize, STACK_SIZE) + | |
173 | EXTRA_GUARD_BYTES; | |
174 | ||
175 | VERIFY3S(pthread_attr_init(&attr), ==, 0); | |
176 | VERIFY3S(pthread_attr_setstacksize(&attr, stack), ==, 0); | |
177 | VERIFY3S(pthread_attr_setguardsize(&attr, PAGESIZE), ==, 0); | |
178 | ||
179 | VERIFY3S(pthread_create(&kt->t_tid, &attr, &zk_thread_helper, kt), | |
180 | ==, 0); | |
181 | ||
182 | VERIFY3S(pthread_attr_destroy(&attr), ==, 0); | |
183 | ||
184 | return kt; | |
185 | } | |
186 | ||
187 | void | |
188 | zk_thread_exit(void) | |
189 | { | |
190 | kthread_t *kt = curthread; | |
191 | ||
192 | ASSERT(pthread_equal(kt->t_tid, pthread_self())); | |
193 | ||
194 | umem_free(kt, sizeof(kthread_t)); | |
195 | ||
196 | pthread_mutex_lock(&kthread_lock); | |
197 | kthread_nr--; | |
198 | pthread_mutex_unlock(&kthread_lock); | |
199 | ||
200 | pthread_cond_broadcast(&kthread_cond); | |
201 | pthread_exit((void *)TS_MAGIC); | |
202 | } | |
203 | ||
204 | void | |
205 | zk_thread_join(kt_did_t tid) | |
206 | { | |
207 | void *ret; | |
208 | ||
209 | pthread_join((pthread_t)tid, &ret); | |
210 | VERIFY3P(ret, ==, (void *)TS_MAGIC); | |
34dc7c2f BB |
211 | } |
212 | ||
213 | /* | |
214 | * ========================================================================= | |
215 | * kstats | |
216 | * ========================================================================= | |
217 | */ | |
218 | /*ARGSUSED*/ | |
219 | kstat_t * | |
220 | kstat_create(char *module, int instance, char *name, char *class, | |
221 | uchar_t type, ulong_t ndata, uchar_t ks_flag) | |
222 | { | |
223 | return (NULL); | |
224 | } | |
225 | ||
226 | /*ARGSUSED*/ | |
227 | void | |
228 | kstat_install(kstat_t *ksp) | |
229 | {} | |
230 | ||
231 | /*ARGSUSED*/ | |
232 | void | |
233 | kstat_delete(kstat_t *ksp) | |
234 | {} | |
235 | ||
236 | /* | |
237 | * ========================================================================= | |
238 | * mutexes | |
239 | * ========================================================================= | |
240 | */ | |
1e33ac1e | 241 | |
34dc7c2f | 242 | void |
1e33ac1e | 243 | mutex_init(kmutex_t *mp, char *name, int type, void *cookie) |
34dc7c2f | 244 | { |
1e33ac1e BB |
245 | ASSERT3S(type, ==, MUTEX_DEFAULT); |
246 | ASSERT3P(cookie, ==, NULL); | |
247 | mp->m_owner = MTX_INIT; | |
248 | mp->m_magic = MTX_MAGIC; | |
249 | VERIFY3S(pthread_mutex_init(&mp->m_lock, NULL), ==, 0); | |
34dc7c2f BB |
250 | } |
251 | ||
252 | void | |
1e33ac1e | 253 | mutex_destroy(kmutex_t *mp) |
34dc7c2f | 254 | { |
1e33ac1e BB |
255 | ASSERT3U(mp->m_magic, ==, MTX_MAGIC); |
256 | ASSERT3P(mp->m_owner, ==, MTX_INIT); | |
257 | VERIFY3S(pthread_mutex_destroy(&(mp)->m_lock), ==, 0); | |
258 | mp->m_owner = MTX_DEST; | |
259 | mp->m_magic = 0; | |
34dc7c2f BB |
260 | } |
261 | ||
262 | void | |
263 | mutex_enter(kmutex_t *mp) | |
264 | { | |
1e33ac1e BB |
265 | ASSERT3U(mp->m_magic, ==, MTX_MAGIC); |
266 | ASSERT3P(mp->m_owner, !=, MTX_DEST); | |
267 | ASSERT3P(mp->m_owner, !=, curthread); | |
268 | VERIFY3S(pthread_mutex_lock(&mp->m_lock), ==, 0); | |
269 | ASSERT3P(mp->m_owner, ==, MTX_INIT); | |
34dc7c2f BB |
270 | mp->m_owner = curthread; |
271 | } | |
272 | ||
273 | int | |
274 | mutex_tryenter(kmutex_t *mp) | |
275 | { | |
1e33ac1e BB |
276 | ASSERT3U(mp->m_magic, ==, MTX_MAGIC); |
277 | ASSERT3P(mp->m_owner, !=, MTX_DEST); | |
278 | if (0 == pthread_mutex_trylock(&mp->m_lock)) { | |
279 | ASSERT3P(mp->m_owner, ==, MTX_INIT); | |
34dc7c2f BB |
280 | mp->m_owner = curthread; |
281 | return (1); | |
282 | } else { | |
283 | return (0); | |
284 | } | |
285 | } | |
286 | ||
287 | void | |
288 | mutex_exit(kmutex_t *mp) | |
289 | { | |
1e33ac1e BB |
290 | ASSERT3U(mp->m_magic, ==, MTX_MAGIC); |
291 | ASSERT3P(mutex_owner(mp), ==, curthread); | |
292 | mp->m_owner = MTX_INIT; | |
293 | VERIFY3S(pthread_mutex_unlock(&mp->m_lock), ==, 0); | |
34dc7c2f BB |
294 | } |
295 | ||
296 | void * | |
297 | mutex_owner(kmutex_t *mp) | |
298 | { | |
1e33ac1e | 299 | ASSERT3U(mp->m_magic, ==, MTX_MAGIC); |
34dc7c2f BB |
300 | return (mp->m_owner); |
301 | } | |
302 | ||
1e33ac1e BB |
303 | int |
304 | mutex_held(kmutex_t *mp) | |
305 | { | |
306 | return (mp->m_owner == curthread); | |
307 | } | |
308 | ||
34dc7c2f BB |
309 | /* |
310 | * ========================================================================= | |
311 | * rwlocks | |
312 | * ========================================================================= | |
313 | */ | |
1e33ac1e | 314 | |
34dc7c2f BB |
315 | void |
316 | rw_init(krwlock_t *rwlp, char *name, int type, void *arg) | |
317 | { | |
1e33ac1e BB |
318 | ASSERT3S(type, ==, RW_DEFAULT); |
319 | ASSERT3P(arg, ==, NULL); | |
320 | VERIFY3S(pthread_rwlock_init(&rwlp->rw_lock, NULL), ==, 0); | |
321 | rwlp->rw_owner = RW_INIT; | |
322 | rwlp->rw_wr_owner = RW_INIT; | |
323 | rwlp->rw_readers = 0; | |
324 | rwlp->rw_magic = RW_MAGIC; | |
34dc7c2f BB |
325 | } |
326 | ||
327 | void | |
328 | rw_destroy(krwlock_t *rwlp) | |
329 | { | |
1e33ac1e BB |
330 | ASSERT3U(rwlp->rw_magic, ==, RW_MAGIC); |
331 | ||
332 | VERIFY3S(pthread_rwlock_destroy(&rwlp->rw_lock), ==, 0); | |
333 | rwlp->rw_magic = 0; | |
34dc7c2f BB |
334 | } |
335 | ||
336 | void | |
337 | rw_enter(krwlock_t *rwlp, krw_t rw) | |
338 | { | |
1e33ac1e BB |
339 | ASSERT3U(rwlp->rw_magic, ==, RW_MAGIC); |
340 | ASSERT3P(rwlp->rw_owner, !=, curthread); | |
341 | ASSERT3P(rwlp->rw_wr_owner, !=, curthread); | |
34dc7c2f | 342 | |
1e33ac1e BB |
343 | if (rw == RW_READER) { |
344 | VERIFY3S(pthread_rwlock_rdlock(&rwlp->rw_lock), ==, 0); | |
345 | ASSERT3P(rwlp->rw_wr_owner, ==, RW_INIT); | |
346 | ||
347 | atomic_inc_uint(&rwlp->rw_readers); | |
348 | } else { | |
349 | VERIFY3S(pthread_rwlock_wrlock(&rwlp->rw_lock), ==, 0); | |
350 | ASSERT3P(rwlp->rw_wr_owner, ==, RW_INIT); | |
351 | ASSERT3U(rwlp->rw_readers, ==, 0); | |
352 | ||
353 | rwlp->rw_wr_owner = curthread; | |
354 | } | |
34dc7c2f BB |
355 | |
356 | rwlp->rw_owner = curthread; | |
357 | } | |
358 | ||
359 | void | |
360 | rw_exit(krwlock_t *rwlp) | |
361 | { | |
1e33ac1e BB |
362 | ASSERT3U(rwlp->rw_magic, ==, RW_MAGIC); |
363 | ASSERT(RW_LOCK_HELD(rwlp)); | |
364 | ||
365 | if (RW_READ_HELD(rwlp)) | |
366 | atomic_dec_uint(&rwlp->rw_readers); | |
367 | else | |
368 | rwlp->rw_wr_owner = RW_INIT; | |
34dc7c2f | 369 | |
1e33ac1e BB |
370 | rwlp->rw_owner = RW_INIT; |
371 | VERIFY3S(pthread_rwlock_unlock(&rwlp->rw_lock), ==, 0); | |
34dc7c2f BB |
372 | } |
373 | ||
374 | int | |
375 | rw_tryenter(krwlock_t *rwlp, krw_t rw) | |
376 | { | |
377 | int rv; | |
378 | ||
1e33ac1e | 379 | ASSERT3U(rwlp->rw_magic, ==, RW_MAGIC); |
34dc7c2f BB |
380 | |
381 | if (rw == RW_READER) | |
1e33ac1e | 382 | rv = pthread_rwlock_tryrdlock(&rwlp->rw_lock); |
34dc7c2f | 383 | else |
1e33ac1e | 384 | rv = pthread_rwlock_trywrlock(&rwlp->rw_lock); |
34dc7c2f BB |
385 | |
386 | if (rv == 0) { | |
1e33ac1e BB |
387 | ASSERT3P(rwlp->rw_wr_owner, ==, RW_INIT); |
388 | ||
389 | if (rw == RW_READER) | |
390 | atomic_inc_uint(&rwlp->rw_readers); | |
391 | else { | |
392 | ASSERT3U(rwlp->rw_readers, ==, 0); | |
393 | rwlp->rw_wr_owner = curthread; | |
394 | } | |
395 | ||
34dc7c2f BB |
396 | rwlp->rw_owner = curthread; |
397 | return (1); | |
398 | } | |
399 | ||
1e33ac1e BB |
400 | VERIFY3S(rv, ==, EBUSY); |
401 | ||
34dc7c2f BB |
402 | return (0); |
403 | } | |
404 | ||
34dc7c2f BB |
405 | int |
406 | rw_tryupgrade(krwlock_t *rwlp) | |
407 | { | |
1e33ac1e | 408 | ASSERT3U(rwlp->rw_magic, ==, RW_MAGIC); |
34dc7c2f BB |
409 | |
410 | return (0); | |
411 | } | |
412 | ||
413 | /* | |
414 | * ========================================================================= | |
415 | * condition variables | |
416 | * ========================================================================= | |
417 | */ | |
1e33ac1e | 418 | |
34dc7c2f BB |
419 | void |
420 | cv_init(kcondvar_t *cv, char *name, int type, void *arg) | |
421 | { | |
1e33ac1e BB |
422 | ASSERT3S(type, ==, CV_DEFAULT); |
423 | cv->cv_magic = CV_MAGIC; | |
424 | VERIFY3S(pthread_cond_init(&cv->cv, NULL), ==, 0); | |
34dc7c2f BB |
425 | } |
426 | ||
427 | void | |
428 | cv_destroy(kcondvar_t *cv) | |
429 | { | |
1e33ac1e BB |
430 | ASSERT3U(cv->cv_magic, ==, CV_MAGIC); |
431 | VERIFY3S(pthread_cond_destroy(&cv->cv), ==, 0); | |
432 | cv->cv_magic = 0; | |
34dc7c2f BB |
433 | } |
434 | ||
435 | void | |
436 | cv_wait(kcondvar_t *cv, kmutex_t *mp) | |
437 | { | |
1e33ac1e BB |
438 | ASSERT3U(cv->cv_magic, ==, CV_MAGIC); |
439 | ASSERT3P(mutex_owner(mp), ==, curthread); | |
440 | mp->m_owner = MTX_INIT; | |
441 | int ret = pthread_cond_wait(&cv->cv, &mp->m_lock); | |
442 | if (ret != 0) | |
443 | VERIFY3S(ret, ==, EINTR); | |
34dc7c2f BB |
444 | mp->m_owner = curthread; |
445 | } | |
446 | ||
447 | clock_t | |
448 | cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime) | |
449 | { | |
450 | int error; | |
1e33ac1e | 451 | struct timeval tv; |
34dc7c2f BB |
452 | timestruc_t ts; |
453 | clock_t delta; | |
454 | ||
1e33ac1e BB |
455 | ASSERT3U(cv->cv_magic, ==, CV_MAGIC); |
456 | ||
34dc7c2f | 457 | top: |
428870ff | 458 | delta = abstime - ddi_get_lbolt(); |
34dc7c2f BB |
459 | if (delta <= 0) |
460 | return (-1); | |
461 | ||
1e33ac1e BB |
462 | VERIFY(gettimeofday(&tv, NULL) == 0); |
463 | ||
464 | ts.tv_sec = tv.tv_sec + delta / hz; | |
465 | ts.tv_nsec = tv.tv_usec * 1000 + (delta % hz) * (NANOSEC / hz); | |
466 | if (ts.tv_nsec >= NANOSEC) { | |
467 | ts.tv_sec++; | |
468 | ts.tv_nsec -= NANOSEC; | |
469 | } | |
34dc7c2f | 470 | |
1e33ac1e BB |
471 | ASSERT3P(mutex_owner(mp), ==, curthread); |
472 | mp->m_owner = MTX_INIT; | |
473 | error = pthread_cond_timedwait(&cv->cv, &mp->m_lock, &ts); | |
34dc7c2f BB |
474 | mp->m_owner = curthread; |
475 | ||
1e33ac1e | 476 | if (error == ETIMEDOUT) |
34dc7c2f BB |
477 | return (-1); |
478 | ||
479 | if (error == EINTR) | |
480 | goto top; | |
481 | ||
1e33ac1e | 482 | VERIFY3S(error, ==, 0); |
34dc7c2f BB |
483 | |
484 | return (1); | |
485 | } | |
486 | ||
487 | void | |
488 | cv_signal(kcondvar_t *cv) | |
489 | { | |
1e33ac1e BB |
490 | ASSERT3U(cv->cv_magic, ==, CV_MAGIC); |
491 | VERIFY3S(pthread_cond_signal(&cv->cv), ==, 0); | |
34dc7c2f BB |
492 | } |
493 | ||
494 | void | |
495 | cv_broadcast(kcondvar_t *cv) | |
496 | { | |
1e33ac1e BB |
497 | ASSERT3U(cv->cv_magic, ==, CV_MAGIC); |
498 | VERIFY3S(pthread_cond_broadcast(&cv->cv), ==, 0); | |
34dc7c2f BB |
499 | } |
500 | ||
501 | /* | |
502 | * ========================================================================= | |
503 | * vnode operations | |
504 | * ========================================================================= | |
505 | */ | |
506 | /* | |
507 | * Note: for the xxxat() versions of these functions, we assume that the | |
508 | * starting vp is always rootdir (which is true for spa_directory.c, the only | |
509 | * ZFS consumer of these interfaces). We assert this is true, and then emulate | |
510 | * them by adding '/' in front of the path. | |
511 | */ | |
512 | ||
513 | /*ARGSUSED*/ | |
514 | int | |
515 | vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3) | |
516 | { | |
517 | int fd; | |
518 | vnode_t *vp; | |
519 | int old_umask; | |
5ae4e2c2 | 520 | char *realpath; |
34dc7c2f | 521 | struct stat64 st; |
4d58b69d | 522 | int err; |
34dc7c2f | 523 | |
5ae4e2c2 BB |
524 | realpath = umem_alloc(MAXPATHLEN, UMEM_NOFAIL); |
525 | ||
34dc7c2f BB |
526 | /* |
527 | * If we're accessing a real disk from userland, we need to use | |
528 | * the character interface to avoid caching. This is particularly | |
529 | * important if we're trying to look at a real in-kernel storage | |
530 | * pool from userland, e.g. via zdb, because otherwise we won't | |
531 | * see the changes occurring under the segmap cache. | |
532 | * On the other hand, the stupid character device returns zero | |
533 | * for its size. So -- gag -- we open the block device to get | |
534 | * its size, and remember it for subsequent VOP_GETATTR(). | |
535 | */ | |
536 | if (strncmp(path, "/dev/", 5) == 0) { | |
537 | char *dsk; | |
538 | fd = open64(path, O_RDONLY); | |
5ae4e2c2 BB |
539 | if (fd == -1) { |
540 | err = errno; | |
541 | free(realpath); | |
542 | return (err); | |
543 | } | |
34dc7c2f | 544 | if (fstat64(fd, &st) == -1) { |
5ae4e2c2 | 545 | err = errno; |
34dc7c2f | 546 | close(fd); |
5ae4e2c2 BB |
547 | free(realpath); |
548 | return (err); | |
34dc7c2f BB |
549 | } |
550 | close(fd); | |
551 | (void) sprintf(realpath, "%s", path); | |
552 | dsk = strstr(path, "/dsk/"); | |
553 | if (dsk != NULL) | |
554 | (void) sprintf(realpath + (dsk - path) + 1, "r%s", | |
555 | dsk + 1); | |
556 | } else { | |
557 | (void) sprintf(realpath, "%s", path); | |
5ae4e2c2 BB |
558 | if (!(flags & FCREAT) && stat64(realpath, &st) == -1) { |
559 | err = errno; | |
560 | free(realpath); | |
561 | return (err); | |
562 | } | |
34dc7c2f BB |
563 | } |
564 | ||
565 | if (flags & FCREAT) | |
566 | old_umask = umask(0); | |
567 | ||
568 | /* | |
569 | * The construct 'flags - FREAD' conveniently maps combinations of | |
570 | * FREAD and FWRITE to the corresponding O_RDONLY, O_WRONLY, and O_RDWR. | |
571 | */ | |
572 | fd = open64(realpath, flags - FREAD, mode); | |
5ae4e2c2 | 573 | free(realpath); |
34dc7c2f BB |
574 | |
575 | if (flags & FCREAT) | |
576 | (void) umask(old_umask); | |
577 | ||
578 | if (fd == -1) | |
579 | return (errno); | |
580 | ||
581 | if (fstat64(fd, &st) == -1) { | |
4d58b69d | 582 | err = errno; |
34dc7c2f | 583 | close(fd); |
4d58b69d | 584 | return (err); |
34dc7c2f BB |
585 | } |
586 | ||
587 | (void) fcntl(fd, F_SETFD, FD_CLOEXEC); | |
588 | ||
589 | *vpp = vp = umem_zalloc(sizeof (vnode_t), UMEM_NOFAIL); | |
590 | ||
591 | vp->v_fd = fd; | |
592 | vp->v_size = st.st_size; | |
593 | vp->v_path = spa_strdup(path); | |
594 | ||
595 | return (0); | |
596 | } | |
597 | ||
598 | /*ARGSUSED*/ | |
599 | int | |
600 | vn_openat(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, | |
601 | int x3, vnode_t *startvp, int fd) | |
602 | { | |
603 | char *realpath = umem_alloc(strlen(path) + 2, UMEM_NOFAIL); | |
604 | int ret; | |
605 | ||
606 | ASSERT(startvp == rootdir); | |
607 | (void) sprintf(realpath, "/%s", path); | |
608 | ||
609 | /* fd ignored for now, need if want to simulate nbmand support */ | |
610 | ret = vn_open(realpath, x1, flags, mode, vpp, x2, x3); | |
611 | ||
612 | umem_free(realpath, strlen(path) + 2); | |
613 | ||
614 | return (ret); | |
615 | } | |
616 | ||
617 | /*ARGSUSED*/ | |
618 | int | |
619 | vn_rdwr(int uio, vnode_t *vp, void *addr, ssize_t len, offset_t offset, | |
620 | int x1, int x2, rlim64_t x3, void *x4, ssize_t *residp) | |
621 | { | |
4d58b69d | 622 | ssize_t rc, done = 0, split; |
34dc7c2f BB |
623 | |
624 | if (uio == UIO_READ) { | |
4d58b69d | 625 | rc = pread64(vp->v_fd, addr, len, offset); |
34dc7c2f BB |
626 | } else { |
627 | /* | |
628 | * To simulate partial disk writes, we split writes into two | |
629 | * system calls so that the process can be killed in between. | |
630 | */ | |
631 | split = (len > 0 ? rand() % len : 0); | |
4d58b69d RC |
632 | rc = pwrite64(vp->v_fd, addr, split, offset); |
633 | if (rc != -1) { | |
634 | done = rc; | |
635 | rc = pwrite64(vp->v_fd, (char *)addr + split, | |
636 | len - split, offset + split); | |
637 | } | |
34dc7c2f BB |
638 | } |
639 | ||
4d58b69d | 640 | if (rc == -1) |
34dc7c2f | 641 | return (errno); |
4d58b69d RC |
642 | |
643 | done += rc; | |
644 | ||
34dc7c2f | 645 | if (residp) |
4d58b69d RC |
646 | *residp = len - done; |
647 | else if (done != len) | |
34dc7c2f BB |
648 | return (EIO); |
649 | return (0); | |
650 | } | |
651 | ||
652 | void | |
653 | vn_close(vnode_t *vp) | |
654 | { | |
655 | close(vp->v_fd); | |
656 | spa_strfree(vp->v_path); | |
657 | umem_free(vp, sizeof (vnode_t)); | |
658 | } | |
659 | ||
428870ff BB |
660 | /* |
661 | * At a minimum we need to update the size since vdev_reopen() | |
662 | * will no longer call vn_openat(). | |
663 | */ | |
664 | int | |
665 | fop_getattr(vnode_t *vp, vattr_t *vap) | |
666 | { | |
667 | struct stat64 st; | |
668 | ||
669 | if (fstat64(vp->v_fd, &st) == -1) { | |
670 | close(vp->v_fd); | |
671 | return (errno); | |
672 | } | |
673 | ||
674 | vap->va_size = st.st_size; | |
675 | return (0); | |
676 | } | |
677 | ||
34dc7c2f BB |
678 | #ifdef ZFS_DEBUG |
679 | ||
680 | /* | |
681 | * ========================================================================= | |
682 | * Figure out which debugging statements to print | |
683 | * ========================================================================= | |
684 | */ | |
685 | ||
686 | static char *dprintf_string; | |
687 | static int dprintf_print_all; | |
688 | ||
689 | int | |
690 | dprintf_find_string(const char *string) | |
691 | { | |
692 | char *tmp_str = dprintf_string; | |
693 | int len = strlen(string); | |
694 | ||
695 | /* | |
696 | * Find out if this is a string we want to print. | |
697 | * String format: file1.c,function_name1,file2.c,file3.c | |
698 | */ | |
699 | ||
700 | while (tmp_str != NULL) { | |
701 | if (strncmp(tmp_str, string, len) == 0 && | |
702 | (tmp_str[len] == ',' || tmp_str[len] == '\0')) | |
703 | return (1); | |
704 | tmp_str = strchr(tmp_str, ','); | |
705 | if (tmp_str != NULL) | |
706 | tmp_str++; /* Get rid of , */ | |
707 | } | |
708 | return (0); | |
709 | } | |
710 | ||
711 | void | |
712 | dprintf_setup(int *argc, char **argv) | |
713 | { | |
714 | int i, j; | |
715 | ||
716 | /* | |
717 | * Debugging can be specified two ways: by setting the | |
718 | * environment variable ZFS_DEBUG, or by including a | |
719 | * "debug=..." argument on the command line. The command | |
720 | * line setting overrides the environment variable. | |
721 | */ | |
722 | ||
723 | for (i = 1; i < *argc; i++) { | |
724 | int len = strlen("debug="); | |
725 | /* First look for a command line argument */ | |
726 | if (strncmp("debug=", argv[i], len) == 0) { | |
727 | dprintf_string = argv[i] + len; | |
728 | /* Remove from args */ | |
729 | for (j = i; j < *argc; j++) | |
730 | argv[j] = argv[j+1]; | |
731 | argv[j] = NULL; | |
732 | (*argc)--; | |
733 | } | |
734 | } | |
735 | ||
736 | if (dprintf_string == NULL) { | |
737 | /* Look for ZFS_DEBUG environment variable */ | |
738 | dprintf_string = getenv("ZFS_DEBUG"); | |
739 | } | |
740 | ||
741 | /* | |
742 | * Are we just turning on all debugging? | |
743 | */ | |
744 | if (dprintf_find_string("on")) | |
745 | dprintf_print_all = 1; | |
746 | } | |
747 | ||
748 | /* | |
749 | * ========================================================================= | |
750 | * debug printfs | |
751 | * ========================================================================= | |
752 | */ | |
753 | void | |
754 | __dprintf(const char *file, const char *func, int line, const char *fmt, ...) | |
755 | { | |
756 | const char *newfile; | |
757 | va_list adx; | |
758 | ||
759 | /* | |
760 | * Get rid of annoying "../common/" prefix to filename. | |
761 | */ | |
762 | newfile = strrchr(file, '/'); | |
763 | if (newfile != NULL) { | |
764 | newfile = newfile + 1; /* Get rid of leading / */ | |
765 | } else { | |
766 | newfile = file; | |
767 | } | |
768 | ||
769 | if (dprintf_print_all || | |
770 | dprintf_find_string(newfile) || | |
771 | dprintf_find_string(func)) { | |
772 | /* Print out just the function name if requested */ | |
773 | flockfile(stdout); | |
774 | if (dprintf_find_string("pid")) | |
775 | (void) printf("%d ", getpid()); | |
776 | if (dprintf_find_string("tid")) | |
1e33ac1e | 777 | (void) printf("%u ", (uint_t) pthread_self()); |
34dc7c2f BB |
778 | if (dprintf_find_string("cpu")) |
779 | (void) printf("%u ", getcpuid()); | |
780 | if (dprintf_find_string("time")) | |
781 | (void) printf("%llu ", gethrtime()); | |
782 | if (dprintf_find_string("long")) | |
783 | (void) printf("%s, line %d: ", newfile, line); | |
784 | (void) printf("%s: ", func); | |
785 | va_start(adx, fmt); | |
786 | (void) vprintf(fmt, adx); | |
787 | va_end(adx); | |
788 | funlockfile(stdout); | |
789 | } | |
790 | } | |
791 | ||
792 | #endif /* ZFS_DEBUG */ | |
793 | ||
794 | /* | |
795 | * ========================================================================= | |
796 | * cmn_err() and panic() | |
797 | * ========================================================================= | |
798 | */ | |
799 | static char ce_prefix[CE_IGNORE][10] = { "", "NOTICE: ", "WARNING: ", "" }; | |
800 | static char ce_suffix[CE_IGNORE][2] = { "", "\n", "\n", "" }; | |
801 | ||
802 | void | |
803 | vpanic(const char *fmt, va_list adx) | |
804 | { | |
805 | (void) fprintf(stderr, "error: "); | |
806 | (void) vfprintf(stderr, fmt, adx); | |
807 | (void) fprintf(stderr, "\n"); | |
808 | ||
809 | abort(); /* think of it as a "user-level crash dump" */ | |
810 | } | |
811 | ||
812 | void | |
813 | panic(const char *fmt, ...) | |
814 | { | |
815 | va_list adx; | |
816 | ||
817 | va_start(adx, fmt); | |
818 | vpanic(fmt, adx); | |
819 | va_end(adx); | |
820 | } | |
821 | ||
822 | void | |
823 | vcmn_err(int ce, const char *fmt, va_list adx) | |
824 | { | |
825 | if (ce == CE_PANIC) | |
826 | vpanic(fmt, adx); | |
827 | if (ce != CE_NOTE) { /* suppress noise in userland stress testing */ | |
828 | (void) fprintf(stderr, "%s", ce_prefix[ce]); | |
829 | (void) vfprintf(stderr, fmt, adx); | |
830 | (void) fprintf(stderr, "%s", ce_suffix[ce]); | |
831 | } | |
832 | } | |
833 | ||
834 | /*PRINTFLIKE2*/ | |
835 | void | |
836 | cmn_err(int ce, const char *fmt, ...) | |
837 | { | |
838 | va_list adx; | |
839 | ||
840 | va_start(adx, fmt); | |
841 | vcmn_err(ce, fmt, adx); | |
842 | va_end(adx); | |
843 | } | |
844 | ||
845 | /* | |
846 | * ========================================================================= | |
847 | * kobj interfaces | |
848 | * ========================================================================= | |
849 | */ | |
850 | struct _buf * | |
851 | kobj_open_file(char *name) | |
852 | { | |
853 | struct _buf *file; | |
854 | vnode_t *vp; | |
855 | ||
856 | /* set vp as the _fd field of the file */ | |
857 | if (vn_openat(name, UIO_SYSSPACE, FREAD, 0, &vp, 0, 0, rootdir, | |
858 | -1) != 0) | |
859 | return ((void *)-1UL); | |
860 | ||
861 | file = umem_zalloc(sizeof (struct _buf), UMEM_NOFAIL); | |
862 | file->_fd = (intptr_t)vp; | |
863 | return (file); | |
864 | } | |
865 | ||
866 | int | |
867 | kobj_read_file(struct _buf *file, char *buf, unsigned size, unsigned off) | |
868 | { | |
869 | ssize_t resid; | |
870 | ||
871 | vn_rdwr(UIO_READ, (vnode_t *)file->_fd, buf, size, (offset_t)off, | |
872 | UIO_SYSSPACE, 0, 0, 0, &resid); | |
873 | ||
874 | return (size - resid); | |
875 | } | |
876 | ||
877 | void | |
878 | kobj_close_file(struct _buf *file) | |
879 | { | |
880 | vn_close((vnode_t *)file->_fd); | |
881 | umem_free(file, sizeof (struct _buf)); | |
882 | } | |
883 | ||
884 | int | |
885 | kobj_get_filesize(struct _buf *file, uint64_t *size) | |
886 | { | |
887 | struct stat64 st; | |
888 | vnode_t *vp = (vnode_t *)file->_fd; | |
889 | ||
890 | if (fstat64(vp->v_fd, &st) == -1) { | |
891 | vn_close(vp); | |
892 | return (errno); | |
893 | } | |
894 | *size = st.st_size; | |
895 | return (0); | |
896 | } | |
897 | ||
898 | /* | |
899 | * ========================================================================= | |
900 | * misc routines | |
901 | * ========================================================================= | |
902 | */ | |
903 | ||
904 | void | |
905 | delay(clock_t ticks) | |
906 | { | |
907 | poll(0, 0, ticks * (1000 / hz)); | |
908 | } | |
909 | ||
910 | /* | |
911 | * Find highest one bit set. | |
912 | * Returns bit number + 1 of highest bit that is set, otherwise returns 0. | |
913 | * High order bit is 31 (or 63 in _LP64 kernel). | |
914 | */ | |
915 | int | |
916 | highbit(ulong_t i) | |
917 | { | |
918 | register int h = 1; | |
919 | ||
920 | if (i == 0) | |
921 | return (0); | |
922 | #ifdef _LP64 | |
923 | if (i & 0xffffffff00000000ul) { | |
924 | h += 32; i >>= 32; | |
925 | } | |
926 | #endif | |
927 | if (i & 0xffff0000) { | |
928 | h += 16; i >>= 16; | |
929 | } | |
930 | if (i & 0xff00) { | |
931 | h += 8; i >>= 8; | |
932 | } | |
933 | if (i & 0xf0) { | |
934 | h += 4; i >>= 4; | |
935 | } | |
936 | if (i & 0xc) { | |
937 | h += 2; i >>= 2; | |
938 | } | |
939 | if (i & 0x2) { | |
940 | h += 1; | |
941 | } | |
942 | return (h); | |
943 | } | |
944 | ||
945 | static int random_fd = -1, urandom_fd = -1; | |
946 | ||
947 | static int | |
948 | random_get_bytes_common(uint8_t *ptr, size_t len, int fd) | |
949 | { | |
950 | size_t resid = len; | |
951 | ssize_t bytes; | |
952 | ||
953 | ASSERT(fd != -1); | |
954 | ||
955 | while (resid != 0) { | |
956 | bytes = read(fd, ptr, resid); | |
957 | ASSERT3S(bytes, >=, 0); | |
958 | ptr += bytes; | |
959 | resid -= bytes; | |
960 | } | |
961 | ||
962 | return (0); | |
963 | } | |
964 | ||
965 | int | |
966 | random_get_bytes(uint8_t *ptr, size_t len) | |
967 | { | |
968 | return (random_get_bytes_common(ptr, len, random_fd)); | |
969 | } | |
970 | ||
971 | int | |
972 | random_get_pseudo_bytes(uint8_t *ptr, size_t len) | |
973 | { | |
974 | return (random_get_bytes_common(ptr, len, urandom_fd)); | |
975 | } | |
976 | ||
977 | int | |
978 | ddi_strtoul(const char *hw_serial, char **nptr, int base, unsigned long *result) | |
979 | { | |
980 | char *end; | |
981 | ||
982 | *result = strtoul(hw_serial, &end, base); | |
983 | if (*result == 0) | |
984 | return (errno); | |
985 | return (0); | |
986 | } | |
987 | ||
428870ff BB |
988 | int |
989 | ddi_strtoull(const char *str, char **nptr, int base, u_longlong_t *result) | |
990 | { | |
991 | char *end; | |
992 | ||
993 | *result = strtoull(str, &end, base); | |
994 | if (*result == 0) | |
995 | return (errno); | |
996 | return (0); | |
997 | } | |
998 | ||
34dc7c2f BB |
999 | /* |
1000 | * ========================================================================= | |
1001 | * kernel emulation setup & teardown | |
1002 | * ========================================================================= | |
1003 | */ | |
1004 | static int | |
1005 | umem_out_of_memory(void) | |
1006 | { | |
1007 | char errmsg[] = "out of memory -- generating core dump\n"; | |
1008 | ||
0e5b68e0 | 1009 | (void) fprintf(stderr, "%s", errmsg); |
34dc7c2f BB |
1010 | abort(); |
1011 | return (0); | |
1012 | } | |
1013 | ||
1014 | void | |
1015 | kernel_init(int mode) | |
1016 | { | |
1017 | umem_nofail_callback(umem_out_of_memory); | |
1018 | ||
1019 | physmem = sysconf(_SC_PHYS_PAGES); | |
1020 | ||
1021 | dprintf("physmem = %llu pages (%.2f GB)\n", physmem, | |
1022 | (double)physmem * sysconf(_SC_PAGE_SIZE) / (1ULL << 30)); | |
1023 | ||
428870ff BB |
1024 | (void) snprintf(hw_serial, sizeof (hw_serial), "%ld", |
1025 | (mode & FWRITE) ? gethostid() : 0); | |
34dc7c2f BB |
1026 | |
1027 | VERIFY((random_fd = open("/dev/random", O_RDONLY)) != -1); | |
1028 | VERIFY((urandom_fd = open("/dev/urandom", O_RDONLY)) != -1); | |
1029 | ||
1e33ac1e | 1030 | thread_init(); |
b128c09f BB |
1031 | system_taskq_init(); |
1032 | ||
34dc7c2f BB |
1033 | spa_init(mode); |
1034 | } | |
1035 | ||
1036 | void | |
1037 | kernel_fini(void) | |
1038 | { | |
1039 | spa_fini(); | |
1040 | ||
428870ff | 1041 | system_taskq_fini(); |
1e33ac1e | 1042 | thread_fini(); |
428870ff | 1043 | |
34dc7c2f BB |
1044 | close(random_fd); |
1045 | close(urandom_fd); | |
1046 | ||
1047 | random_fd = -1; | |
1048 | urandom_fd = -1; | |
1049 | } | |
1050 | ||
34dc7c2f BB |
1051 | uid_t |
1052 | crgetuid(cred_t *cr) | |
1053 | { | |
1054 | return (0); | |
1055 | } | |
1056 | ||
1057 | gid_t | |
1058 | crgetgid(cred_t *cr) | |
1059 | { | |
1060 | return (0); | |
1061 | } | |
1062 | ||
1063 | int | |
1064 | crgetngroups(cred_t *cr) | |
1065 | { | |
1066 | return (0); | |
1067 | } | |
1068 | ||
1069 | gid_t * | |
1070 | crgetgroups(cred_t *cr) | |
1071 | { | |
1072 | return (NULL); | |
1073 | } | |
1074 | ||
1075 | int | |
1076 | zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr) | |
1077 | { | |
1078 | return (0); | |
1079 | } | |
1080 | ||
1081 | int | |
1082 | zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr) | |
1083 | { | |
1084 | return (0); | |
1085 | } | |
1086 | ||
1087 | int | |
1088 | zfs_secpolicy_destroy_perms(const char *name, cred_t *cr) | |
1089 | { | |
1090 | return (0); | |
1091 | } | |
1092 | ||
1093 | ksiddomain_t * | |
1094 | ksid_lookupdomain(const char *dom) | |
1095 | { | |
1096 | ksiddomain_t *kd; | |
1097 | ||
1098 | kd = umem_zalloc(sizeof (ksiddomain_t), UMEM_NOFAIL); | |
1099 | kd->kd_name = spa_strdup(dom); | |
1100 | return (kd); | |
1101 | } | |
1102 | ||
1103 | void | |
1104 | ksiddomain_rele(ksiddomain_t *ksid) | |
1105 | { | |
1106 | spa_strfree(ksid->kd_name); | |
1107 | umem_free(ksid, sizeof (ksiddomain_t)); | |
1108 | } | |
428870ff | 1109 | |
428870ff | 1110 | char * |
00b46022 | 1111 | kmem_vasprintf(const char *fmt, va_list adx) |
428870ff | 1112 | { |
00b46022 BB |
1113 | char *buf = NULL; |
1114 | va_list adx_copy; | |
428870ff | 1115 | |
00b46022 BB |
1116 | va_copy(adx_copy, adx); |
1117 | VERIFY(vasprintf(&buf, fmt, adx_copy) != -1); | |
1118 | va_end(adx_copy); | |
428870ff | 1119 | |
00b46022 BB |
1120 | return (buf); |
1121 | } | |
1122 | ||
1123 | char * | |
1124 | kmem_asprintf(const char *fmt, ...) | |
1125 | { | |
1126 | char *buf = NULL; | |
1127 | va_list adx; | |
428870ff BB |
1128 | |
1129 | va_start(adx, fmt); | |
00b46022 | 1130 | VERIFY(vasprintf(&buf, fmt, adx) != -1); |
428870ff BB |
1131 | va_end(adx); |
1132 | ||
1133 | return (buf); | |
1134 | } | |
572e2857 BB |
1135 | |
1136 | /* ARGSUSED */ | |
1137 | int | |
1138 | zfs_onexit_fd_hold(int fd, minor_t *minorp) | |
1139 | { | |
1140 | *minorp = 0; | |
1141 | return (0); | |
1142 | } | |
1143 | ||
1144 | /* ARGSUSED */ | |
1145 | void | |
1146 | zfs_onexit_fd_rele(int fd) | |
1147 | { | |
1148 | } | |
1149 | ||
1150 | /* ARGSUSED */ | |
1151 | int | |
1152 | zfs_onexit_add_cb(minor_t minor, void (*func)(void *), void *data, | |
1153 | uint64_t *action_handle) | |
1154 | { | |
1155 | return (0); | |
1156 | } | |
1157 | ||
1158 | /* ARGSUSED */ | |
1159 | int | |
1160 | zfs_onexit_del_cb(minor_t minor, uint64_t action_handle, boolean_t fire) | |
1161 | { | |
1162 | return (0); | |
1163 | } | |
1164 | ||
1165 | /* ARGSUSED */ | |
1166 | int | |
1167 | zfs_onexit_cb_data(minor_t minor, uint64_t action_handle, void **data) | |
1168 | { | |
1169 | return (0); | |
1170 | } |