4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2016 Actifio, Inc. All rights reserved.
34 #include <sys/signal.h>
37 #include <sys/processor.h>
38 #include <sys/zfs_context.h>
39 #include <sys/rrwlock.h>
40 #include <sys/utsname.h>
42 #include <sys/systeminfo.h>
43 #include <zfs_fletcher.h>
44 #include <sys/crypto/icp.h>
47 * Emulation of kernel services in userland.
52 vnode_t
*rootdir
= (vnode_t
*)0xabcd1234;
53 char hw_serial
[HW_HOSTID_LEN
];
54 struct utsname hw_utsname
;
55 vmem_t
*zio_arena
= NULL
;
57 /* If set, all blocks read will be copied to the specified directory. */
58 char *vn_dumpdir
= NULL
;
60 /* this only exists to have its address taken */
64 * =========================================================================
66 * =========================================================================
69 pthread_cond_t kthread_cond
= PTHREAD_COND_INITIALIZER
;
70 pthread_mutex_t kthread_lock
= PTHREAD_MUTEX_INITIALIZER
;
71 pthread_key_t kthread_key
;
79 VERIFY3S(pthread_key_create(&kthread_key
, NULL
), ==, 0);
81 /* Create entry for primary kthread */
82 kt
= umem_zalloc(sizeof (kthread_t
), UMEM_NOFAIL
);
83 kt
->t_tid
= pthread_self();
86 VERIFY3S(pthread_setspecific(kthread_key
, kt
), ==, 0);
88 /* Only the main thread should be running at the moment */
89 ASSERT3S(kthread_nr
, ==, 0);
96 kthread_t
*kt
= curthread
;
98 ASSERT(pthread_equal(kt
->t_tid
, pthread_self()));
99 ASSERT3P(kt
->t_func
, ==, NULL
);
101 umem_free(kt
, sizeof (kthread_t
));
103 /* Wait for all threads to exit via thread_exit() */
104 VERIFY3S(pthread_mutex_lock(&kthread_lock
), ==, 0);
106 kthread_nr
--; /* Main thread is exiting */
108 while (kthread_nr
> 0)
109 VERIFY0(pthread_cond_wait(&kthread_cond
, &kthread_lock
));
111 ASSERT3S(kthread_nr
, ==, 0);
112 VERIFY3S(pthread_mutex_unlock(&kthread_lock
), ==, 0);
114 VERIFY3S(pthread_key_delete(kthread_key
), ==, 0);
118 zk_thread_current(void)
120 kthread_t
*kt
= pthread_getspecific(kthread_key
);
122 ASSERT3P(kt
, !=, NULL
);
128 zk_thread_helper(void *arg
)
130 kthread_t
*kt
= (kthread_t
*)arg
;
132 VERIFY3S(pthread_setspecific(kthread_key
, kt
), ==, 0);
134 VERIFY3S(pthread_mutex_lock(&kthread_lock
), ==, 0);
136 VERIFY3S(pthread_mutex_unlock(&kthread_lock
), ==, 0);
137 (void) setpriority(PRIO_PROCESS
, 0, kt
->t_pri
);
139 kt
->t_tid
= pthread_self();
140 ((thread_func_arg_t
)kt
->t_func
)(kt
->t_arg
);
142 /* Unreachable, thread must exit with thread_exit() */
149 zk_thread_create(caddr_t stk
, size_t stksize
, thread_func_t func
, void *arg
,
150 uint64_t len
, proc_t
*pp
, int state
, pri_t pri
, int detachstate
)
156 ASSERT0(state
& ~TS_RUN
);
159 kt
= umem_zalloc(sizeof (kthread_t
), UMEM_NOFAIL
);
164 VERIFY0(pthread_attr_init(&attr
));
165 VERIFY0(pthread_attr_setdetachstate(&attr
, detachstate
));
168 * We allow the default stack size in user space to be specified by
169 * setting the ZFS_STACK_SIZE environment variable. This allows us
170 * the convenience of observing and debugging stack overruns in
171 * user space. Explicitly specified stack sizes will be honored.
172 * The usage of ZFS_STACK_SIZE is discussed further in the
173 * ENVIRONMENT VARIABLES sections of the ztest(1) man page.
176 stkstr
= getenv("ZFS_STACK_SIZE");
179 stksize
= TS_STACK_MAX
;
181 stksize
= MAX(atoi(stkstr
), TS_STACK_MIN
);
184 VERIFY3S(stksize
, >, 0);
185 stksize
= P2ROUNDUP(MAX(stksize
, TS_STACK_MIN
), PAGESIZE
);
187 * If this ever fails, it may be because the stack size is not a
188 * multiple of system page size.
190 VERIFY0(pthread_attr_setstacksize(&attr
, stksize
));
191 VERIFY0(pthread_attr_setguardsize(&attr
, PAGESIZE
));
193 VERIFY0(pthread_create(&kt
->t_tid
, &attr
, &zk_thread_helper
, kt
));
194 VERIFY0(pthread_attr_destroy(&attr
));
202 kthread_t
*kt
= curthread
;
204 ASSERT(pthread_equal(kt
->t_tid
, pthread_self()));
206 umem_free(kt
, sizeof (kthread_t
));
208 VERIFY0(pthread_mutex_lock(&kthread_lock
));
210 VERIFY0(pthread_mutex_unlock(&kthread_lock
));
212 VERIFY0(pthread_cond_broadcast(&kthread_cond
));
213 pthread_exit((void *)TS_MAGIC
);
217 zk_thread_join(kt_did_t tid
)
221 pthread_join((pthread_t
)tid
, &ret
);
222 VERIFY3P(ret
, ==, (void *)TS_MAGIC
);
226 * =========================================================================
228 * =========================================================================
232 kstat_create(const char *module
, int instance
, const char *name
,
233 const char *class, uchar_t type
, ulong_t ndata
, uchar_t ks_flag
)
240 kstat_install(kstat_t
*ksp
)
245 kstat_delete(kstat_t
*ksp
)
250 kstat_waitq_enter(kstat_io_t
*kiop
)
255 kstat_waitq_exit(kstat_io_t
*kiop
)
260 kstat_runq_enter(kstat_io_t
*kiop
)
265 kstat_runq_exit(kstat_io_t
*kiop
)
270 kstat_waitq_to_runq(kstat_io_t
*kiop
)
275 kstat_runq_back_to_waitq(kstat_io_t
*kiop
)
279 kstat_set_raw_ops(kstat_t
*ksp
,
280 int (*headers
)(char *buf
, size_t size
),
281 int (*data
)(char *buf
, size_t size
, void *data
),
282 void *(*addr
)(kstat_t
*ksp
, loff_t index
))
286 * =========================================================================
288 * =========================================================================
292 mutex_init(kmutex_t
*mp
, char *name
, int type
, void *cookie
)
294 ASSERT3S(type
, ==, MUTEX_DEFAULT
);
295 ASSERT3P(cookie
, ==, NULL
);
296 mp
->m_owner
= MTX_INIT
;
297 mp
->m_magic
= MTX_MAGIC
;
298 VERIFY3S(pthread_mutex_init(&mp
->m_lock
, NULL
), ==, 0);
302 mutex_destroy(kmutex_t
*mp
)
304 ASSERT3U(mp
->m_magic
, ==, MTX_MAGIC
);
305 ASSERT3P(mp
->m_owner
, ==, MTX_INIT
);
306 ASSERT0(pthread_mutex_destroy(&(mp
)->m_lock
));
307 mp
->m_owner
= MTX_DEST
;
312 mutex_enter(kmutex_t
*mp
)
314 ASSERT3U(mp
->m_magic
, ==, MTX_MAGIC
);
315 ASSERT3P(mp
->m_owner
, !=, MTX_DEST
);
316 ASSERT3P(mp
->m_owner
, !=, curthread
);
317 VERIFY3S(pthread_mutex_lock(&mp
->m_lock
), ==, 0);
318 ASSERT3P(mp
->m_owner
, ==, MTX_INIT
);
319 mp
->m_owner
= curthread
;
323 mutex_tryenter(kmutex_t
*mp
)
326 ASSERT3U(mp
->m_magic
, ==, MTX_MAGIC
);
327 ASSERT3P(mp
->m_owner
, !=, MTX_DEST
);
328 if (0 == (err
= pthread_mutex_trylock(&mp
->m_lock
))) {
329 ASSERT3P(mp
->m_owner
, ==, MTX_INIT
);
330 mp
->m_owner
= curthread
;
333 VERIFY3S(err
, ==, EBUSY
);
339 mutex_exit(kmutex_t
*mp
)
341 ASSERT3U(mp
->m_magic
, ==, MTX_MAGIC
);
342 ASSERT3P(mutex_owner(mp
), ==, curthread
);
343 mp
->m_owner
= MTX_INIT
;
344 VERIFY3S(pthread_mutex_unlock(&mp
->m_lock
), ==, 0);
348 mutex_owner(kmutex_t
*mp
)
350 ASSERT3U(mp
->m_magic
, ==, MTX_MAGIC
);
351 return (mp
->m_owner
);
355 mutex_held(kmutex_t
*mp
)
357 return (mp
->m_owner
== curthread
);
361 * =========================================================================
363 * =========================================================================
367 rw_init(krwlock_t
*rwlp
, char *name
, int type
, void *arg
)
369 ASSERT3S(type
, ==, RW_DEFAULT
);
370 ASSERT3P(arg
, ==, NULL
);
371 VERIFY3S(pthread_rwlock_init(&rwlp
->rw_lock
, NULL
), ==, 0);
372 rwlp
->rw_owner
= RW_INIT
;
373 rwlp
->rw_wr_owner
= RW_INIT
;
374 rwlp
->rw_readers
= 0;
375 rwlp
->rw_magic
= RW_MAGIC
;
379 rw_destroy(krwlock_t
*rwlp
)
381 ASSERT3U(rwlp
->rw_magic
, ==, RW_MAGIC
);
382 ASSERT(rwlp
->rw_readers
== 0 && rwlp
->rw_wr_owner
== RW_INIT
);
383 VERIFY3S(pthread_rwlock_destroy(&rwlp
->rw_lock
), ==, 0);
388 rw_enter(krwlock_t
*rwlp
, krw_t rw
)
390 ASSERT3U(rwlp
->rw_magic
, ==, RW_MAGIC
);
391 ASSERT3P(rwlp
->rw_owner
, !=, curthread
);
392 ASSERT3P(rwlp
->rw_wr_owner
, !=, curthread
);
394 if (rw
== RW_READER
) {
395 VERIFY3S(pthread_rwlock_rdlock(&rwlp
->rw_lock
), ==, 0);
396 ASSERT3P(rwlp
->rw_wr_owner
, ==, RW_INIT
);
398 atomic_inc_uint(&rwlp
->rw_readers
);
400 VERIFY3S(pthread_rwlock_wrlock(&rwlp
->rw_lock
), ==, 0);
401 ASSERT3P(rwlp
->rw_wr_owner
, ==, RW_INIT
);
402 ASSERT3U(rwlp
->rw_readers
, ==, 0);
404 rwlp
->rw_wr_owner
= curthread
;
407 rwlp
->rw_owner
= curthread
;
411 rw_exit(krwlock_t
*rwlp
)
413 ASSERT3U(rwlp
->rw_magic
, ==, RW_MAGIC
);
414 ASSERT(RW_LOCK_HELD(rwlp
));
416 if (RW_READ_HELD(rwlp
))
417 atomic_dec_uint(&rwlp
->rw_readers
);
419 rwlp
->rw_wr_owner
= RW_INIT
;
421 rwlp
->rw_owner
= RW_INIT
;
422 VERIFY3S(pthread_rwlock_unlock(&rwlp
->rw_lock
), ==, 0);
426 rw_tryenter(krwlock_t
*rwlp
, krw_t rw
)
430 ASSERT3U(rwlp
->rw_magic
, ==, RW_MAGIC
);
433 rv
= pthread_rwlock_tryrdlock(&rwlp
->rw_lock
);
435 rv
= pthread_rwlock_trywrlock(&rwlp
->rw_lock
);
438 ASSERT3P(rwlp
->rw_wr_owner
, ==, RW_INIT
);
441 atomic_inc_uint(&rwlp
->rw_readers
);
443 ASSERT3U(rwlp
->rw_readers
, ==, 0);
444 rwlp
->rw_wr_owner
= curthread
;
447 rwlp
->rw_owner
= curthread
;
451 VERIFY3S(rv
, ==, EBUSY
);
457 rw_tryupgrade(krwlock_t
*rwlp
)
459 ASSERT3U(rwlp
->rw_magic
, ==, RW_MAGIC
);
465 * =========================================================================
466 * condition variables
467 * =========================================================================
471 cv_init(kcondvar_t
*cv
, char *name
, int type
, void *arg
)
473 ASSERT3S(type
, ==, CV_DEFAULT
);
474 cv
->cv_magic
= CV_MAGIC
;
475 VERIFY0(pthread_cond_init(&cv
->cv
, NULL
));
479 cv_destroy(kcondvar_t
*cv
)
481 ASSERT3U(cv
->cv_magic
, ==, CV_MAGIC
);
482 VERIFY0(pthread_cond_destroy(&cv
->cv
));
487 cv_wait(kcondvar_t
*cv
, kmutex_t
*mp
)
489 ASSERT3U(cv
->cv_magic
, ==, CV_MAGIC
);
490 ASSERT3P(mutex_owner(mp
), ==, curthread
);
491 mp
->m_owner
= MTX_INIT
;
492 VERIFY0(pthread_cond_wait(&cv
->cv
, &mp
->m_lock
));
493 mp
->m_owner
= curthread
;
497 cv_timedwait(kcondvar_t
*cv
, kmutex_t
*mp
, clock_t abstime
)
504 ASSERT3U(cv
->cv_magic
, ==, CV_MAGIC
);
506 delta
= abstime
- ddi_get_lbolt();
510 VERIFY(gettimeofday(&tv
, NULL
) == 0);
512 ts
.tv_sec
= tv
.tv_sec
+ delta
/ hz
;
513 ts
.tv_nsec
= tv
.tv_usec
* NSEC_PER_USEC
+ (delta
% hz
) * (NANOSEC
/ hz
);
514 if (ts
.tv_nsec
>= NANOSEC
) {
516 ts
.tv_nsec
-= NANOSEC
;
519 ASSERT3P(mutex_owner(mp
), ==, curthread
);
520 mp
->m_owner
= MTX_INIT
;
521 error
= pthread_cond_timedwait(&cv
->cv
, &mp
->m_lock
, &ts
);
522 mp
->m_owner
= curthread
;
524 if (error
== ETIMEDOUT
)
534 cv_timedwait_hires(kcondvar_t
*cv
, kmutex_t
*mp
, hrtime_t tim
, hrtime_t res
,
542 ASSERT(flag
== 0 || flag
== CALLOUT_FLAG_ABSOLUTE
);
545 if (flag
& CALLOUT_FLAG_ABSOLUTE
)
546 delta
-= gethrtime();
551 VERIFY(gettimeofday(&tv
, NULL
) == 0);
553 ts
.tv_sec
= tv
.tv_sec
+ delta
/ NANOSEC
;
554 ts
.tv_nsec
= tv
.tv_usec
* NSEC_PER_USEC
+ (delta
% NANOSEC
);
555 if (ts
.tv_nsec
>= NANOSEC
) {
557 ts
.tv_nsec
-= NANOSEC
;
560 ASSERT(mutex_owner(mp
) == curthread
);
561 mp
->m_owner
= MTX_INIT
;
562 error
= pthread_cond_timedwait(&cv
->cv
, &mp
->m_lock
, &ts
);
563 mp
->m_owner
= curthread
;
565 if (error
== ETIMEDOUT
)
574 cv_signal(kcondvar_t
*cv
)
576 ASSERT3U(cv
->cv_magic
, ==, CV_MAGIC
);
577 VERIFY0(pthread_cond_signal(&cv
->cv
));
581 cv_broadcast(kcondvar_t
*cv
)
583 ASSERT3U(cv
->cv_magic
, ==, CV_MAGIC
);
584 VERIFY0(pthread_cond_broadcast(&cv
->cv
));
588 * =========================================================================
590 * =========================================================================
593 * Note: for the xxxat() versions of these functions, we assume that the
594 * starting vp is always rootdir (which is true for spa_directory.c, the only
595 * ZFS consumer of these interfaces). We assert this is true, and then emulate
596 * them by adding '/' in front of the path.
601 vn_open(char *path
, int x1
, int flags
, int mode
, vnode_t
**vpp
, int x2
, int x3
)
611 realpath
= umem_alloc(MAXPATHLEN
, UMEM_NOFAIL
);
614 * If we're accessing a real disk from userland, we need to use
615 * the character interface to avoid caching. This is particularly
616 * important if we're trying to look at a real in-kernel storage
617 * pool from userland, e.g. via zdb, because otherwise we won't
618 * see the changes occurring under the segmap cache.
619 * On the other hand, the stupid character device returns zero
620 * for its size. So -- gag -- we open the block device to get
621 * its size, and remember it for subsequent VOP_GETATTR().
623 #if defined(__sun__) || defined(__sun)
624 if (strncmp(path
, "/dev/", 5) == 0) {
629 fd
= open64(path
, O_RDONLY
);
635 if (fstat64(fd
, &st
) == -1) {
642 (void) sprintf(realpath
, "%s", path
);
643 dsk
= strstr(path
, "/dsk/");
645 (void) sprintf(realpath
+ (dsk
- path
) + 1, "r%s",
648 (void) sprintf(realpath
, "%s", path
);
649 if (!(flags
& FCREAT
) && stat64(realpath
, &st
) == -1) {
656 if (!(flags
& FCREAT
) && S_ISBLK(st
.st_mode
)) {
660 /* We shouldn't be writing to block devices in userspace */
661 VERIFY(!(flags
& FWRITE
));
665 old_umask
= umask(0);
668 * The construct 'flags - FREAD' conveniently maps combinations of
669 * FREAD and FWRITE to the corresponding O_RDONLY, O_WRONLY, and O_RDWR.
671 fd
= open64(realpath
, flags
- FREAD
, mode
);
679 (void) umask(old_umask
);
681 if (vn_dumpdir
!= NULL
) {
682 char *dumppath
= umem_zalloc(MAXPATHLEN
, UMEM_NOFAIL
);
683 (void) snprintf(dumppath
, MAXPATHLEN
,
684 "%s/%s", vn_dumpdir
, basename(realpath
));
685 dump_fd
= open64(dumppath
, O_CREAT
| O_WRONLY
, 0666);
686 umem_free(dumppath
, MAXPATHLEN
);
699 if (fstat64_blk(fd
, &st
) == -1) {
707 (void) fcntl(fd
, F_SETFD
, FD_CLOEXEC
);
709 *vpp
= vp
= umem_zalloc(sizeof (vnode_t
), UMEM_NOFAIL
);
712 vp
->v_size
= st
.st_size
;
713 vp
->v_path
= spa_strdup(path
);
714 vp
->v_dump_fd
= dump_fd
;
721 vn_openat(char *path
, int x1
, int flags
, int mode
, vnode_t
**vpp
, int x2
,
722 int x3
, vnode_t
*startvp
, int fd
)
724 char *realpath
= umem_alloc(strlen(path
) + 2, UMEM_NOFAIL
);
727 ASSERT(startvp
== rootdir
);
728 (void) sprintf(realpath
, "/%s", path
);
730 /* fd ignored for now, need if want to simulate nbmand support */
731 ret
= vn_open(realpath
, x1
, flags
, mode
, vpp
, x2
, x3
);
733 umem_free(realpath
, strlen(path
) + 2);
740 vn_rdwr(int uio
, vnode_t
*vp
, void *addr
, ssize_t len
, offset_t offset
,
741 int x1
, int x2
, rlim64_t x3
, void *x4
, ssize_t
*residp
)
743 ssize_t rc
, done
= 0, split
;
745 if (uio
== UIO_READ
) {
746 rc
= pread64(vp
->v_fd
, addr
, len
, offset
);
747 if (vp
->v_dump_fd
!= -1 && rc
!= -1) {
749 status
= pwrite64(vp
->v_dump_fd
, addr
, rc
, offset
);
750 ASSERT(status
!= -1);
754 * To simulate partial disk writes, we split writes into two
755 * system calls so that the process can be killed in between.
757 int sectors
= len
>> SPA_MINBLOCKSHIFT
;
758 split
= (sectors
> 0 ? rand() % sectors
: 0) <<
760 rc
= pwrite64(vp
->v_fd
, addr
, split
, offset
);
763 rc
= pwrite64(vp
->v_fd
, (char *)addr
+ split
,
764 len
- split
, offset
+ split
);
769 if (rc
== -1 && errno
== EINVAL
) {
771 * Under Linux, this most likely means an alignment issue
772 * (memory or disk) due to O_DIRECT, so we abort() in order to
773 * catch the offender.
784 *residp
= len
- done
;
785 else if (done
!= len
)
791 vn_close(vnode_t
*vp
)
794 if (vp
->v_dump_fd
!= -1)
795 close(vp
->v_dump_fd
);
796 spa_strfree(vp
->v_path
);
797 umem_free(vp
, sizeof (vnode_t
));
801 * At a minimum we need to update the size since vdev_reopen()
802 * will no longer call vn_openat().
805 fop_getattr(vnode_t
*vp
, vattr_t
*vap
)
810 if (fstat64_blk(vp
->v_fd
, &st
) == -1) {
816 vap
->va_size
= st
.st_size
;
821 * =========================================================================
822 * Figure out which debugging statements to print
823 * =========================================================================
826 static char *dprintf_string
;
827 static int dprintf_print_all
;
830 dprintf_find_string(const char *string
)
832 char *tmp_str
= dprintf_string
;
833 int len
= strlen(string
);
836 * Find out if this is a string we want to print.
837 * String format: file1.c,function_name1,file2.c,file3.c
840 while (tmp_str
!= NULL
) {
841 if (strncmp(tmp_str
, string
, len
) == 0 &&
842 (tmp_str
[len
] == ',' || tmp_str
[len
] == '\0'))
844 tmp_str
= strchr(tmp_str
, ',');
846 tmp_str
++; /* Get rid of , */
852 dprintf_setup(int *argc
, char **argv
)
857 * Debugging can be specified two ways: by setting the
858 * environment variable ZFS_DEBUG, or by including a
859 * "debug=..." argument on the command line. The command
860 * line setting overrides the environment variable.
863 for (i
= 1; i
< *argc
; i
++) {
864 int len
= strlen("debug=");
865 /* First look for a command line argument */
866 if (strncmp("debug=", argv
[i
], len
) == 0) {
867 dprintf_string
= argv
[i
] + len
;
868 /* Remove from args */
869 for (j
= i
; j
< *argc
; j
++)
876 if (dprintf_string
== NULL
) {
877 /* Look for ZFS_DEBUG environment variable */
878 dprintf_string
= getenv("ZFS_DEBUG");
882 * Are we just turning on all debugging?
884 if (dprintf_find_string("on"))
885 dprintf_print_all
= 1;
887 if (dprintf_string
!= NULL
)
888 zfs_flags
|= ZFS_DEBUG_DPRINTF
;
892 * =========================================================================
894 * =========================================================================
897 __dprintf(const char *file
, const char *func
, int line
, const char *fmt
, ...)
903 * Get rid of annoying "../common/" prefix to filename.
905 newfile
= strrchr(file
, '/');
906 if (newfile
!= NULL
) {
907 newfile
= newfile
+ 1; /* Get rid of leading / */
912 if (dprintf_print_all
||
913 dprintf_find_string(newfile
) ||
914 dprintf_find_string(func
)) {
915 /* Print out just the function name if requested */
917 if (dprintf_find_string("pid"))
918 (void) printf("%d ", getpid());
919 if (dprintf_find_string("tid"))
920 (void) printf("%u ", (uint_t
)pthread_self());
921 if (dprintf_find_string("cpu"))
922 (void) printf("%u ", getcpuid());
923 if (dprintf_find_string("time"))
924 (void) printf("%llu ", gethrtime());
925 if (dprintf_find_string("long"))
926 (void) printf("%s, line %d: ", newfile
, line
);
927 (void) printf("%s: ", func
);
929 (void) vprintf(fmt
, adx
);
936 * =========================================================================
937 * cmn_err() and panic()
938 * =========================================================================
940 static char ce_prefix
[CE_IGNORE
][10] = { "", "NOTICE: ", "WARNING: ", "" };
941 static char ce_suffix
[CE_IGNORE
][2] = { "", "\n", "\n", "" };
944 vpanic(const char *fmt
, va_list adx
)
946 (void) fprintf(stderr
, "error: ");
947 (void) vfprintf(stderr
, fmt
, adx
);
948 (void) fprintf(stderr
, "\n");
950 abort(); /* think of it as a "user-level crash dump" */
954 panic(const char *fmt
, ...)
964 vcmn_err(int ce
, const char *fmt
, va_list adx
)
968 if (ce
!= CE_NOTE
) { /* suppress noise in userland stress testing */
969 (void) fprintf(stderr
, "%s", ce_prefix
[ce
]);
970 (void) vfprintf(stderr
, fmt
, adx
);
971 (void) fprintf(stderr
, "%s", ce_suffix
[ce
]);
977 cmn_err(int ce
, const char *fmt
, ...)
982 vcmn_err(ce
, fmt
, adx
);
987 * =========================================================================
989 * =========================================================================
992 kobj_open_file(char *name
)
997 /* set vp as the _fd field of the file */
998 if (vn_openat(name
, UIO_SYSSPACE
, FREAD
, 0, &vp
, 0, 0, rootdir
,
1000 return ((void *)-1UL);
1002 file
= umem_zalloc(sizeof (struct _buf
), UMEM_NOFAIL
);
1003 file
->_fd
= (intptr_t)vp
;
1008 kobj_read_file(struct _buf
*file
, char *buf
, unsigned size
, unsigned off
)
1012 if (vn_rdwr(UIO_READ
, (vnode_t
*)file
->_fd
, buf
, size
, (offset_t
)off
,
1013 UIO_SYSSPACE
, 0, 0, 0, &resid
) != 0)
1016 return (size
- resid
);
1020 kobj_close_file(struct _buf
*file
)
1022 vn_close((vnode_t
*)file
->_fd
);
1023 umem_free(file
, sizeof (struct _buf
));
1027 kobj_get_filesize(struct _buf
*file
, uint64_t *size
)
1030 vnode_t
*vp
= (vnode_t
*)file
->_fd
;
1032 if (fstat64(vp
->v_fd
, &st
) == -1) {
1041 * =========================================================================
1043 * =========================================================================
1047 delay(clock_t ticks
)
1049 (void) poll(0, 0, ticks
* (1000 / hz
));
1053 * Find highest one bit set.
1054 * Returns bit number + 1 of highest bit that is set, otherwise returns 0.
1055 * High order bit is 31 (or 63 in _LP64 kernel).
1058 highbit64(uint64_t i
)
1064 if (i
& 0xffffffff00000000ULL
) {
1067 if (i
& 0xffff0000) {
1086 * Find lowest one bit set.
1087 * Returns bit number + 1 of lowest bit that is set, otherwise returns 0.
1088 * This is basically a reimplementation of ffsll(), which is GNU specific.
1091 lowbit64(uint64_t i
)
1093 register int h
= 64;
1097 if (i
& 0x00000000ffffffffULL
)
1129 * Find highest one bit set.
1130 * Returns bit number + 1 of highest bit that is set, otherwise returns 0.
1131 * High order bit is 31 (or 63 in _LP64 kernel).
1141 if (i
& 0xffffffff00000000ul
) {
1145 if (i
& 0xffff0000) {
1164 * Find lowest one bit set.
1165 * Returns bit number + 1 of lowest bit that is set, otherwise returns 0.
1166 * Low order bit is 0.
1177 if (!(i
& 0xffffffff)) {
1181 if (!(i
& 0xffff)) {
1199 static int random_fd
= -1, urandom_fd
= -1;
1204 VERIFY((random_fd
= open("/dev/random", O_RDONLY
)) != -1);
1205 VERIFY((urandom_fd
= open("/dev/urandom", O_RDONLY
)) != -1);
1219 random_get_bytes_common(uint8_t *ptr
, size_t len
, int fd
)
1226 while (resid
!= 0) {
1227 bytes
= read(fd
, ptr
, resid
);
1228 ASSERT3S(bytes
, >=, 0);
1237 random_get_bytes(uint8_t *ptr
, size_t len
)
1239 return (random_get_bytes_common(ptr
, len
, random_fd
));
1243 random_get_pseudo_bytes(uint8_t *ptr
, size_t len
)
1245 return (random_get_bytes_common(ptr
, len
, urandom_fd
));
1249 ddi_strtoul(const char *hw_serial
, char **nptr
, int base
, unsigned long *result
)
1253 *result
= strtoul(hw_serial
, &end
, base
);
1260 ddi_strtoull(const char *str
, char **nptr
, int base
, u_longlong_t
*result
)
1264 *result
= strtoull(str
, &end
, base
);
1273 return (&hw_utsname
);
1277 * =========================================================================
1278 * kernel emulation setup & teardown
1279 * =========================================================================
1282 umem_out_of_memory(void)
1284 char errmsg
[] = "out of memory -- generating core dump\n";
1286 (void) fprintf(stderr
, "%s", errmsg
);
1291 static unsigned long
1292 get_spl_hostid(void)
1295 unsigned long hostid
;
1297 f
= fopen("/sys/module/spl/parameters/spl_hostid", "r");
1300 if (fscanf(f
, "%lu", &hostid
) != 1)
1303 return (hostid
& 0xffffffff);
1307 get_system_hostid(void)
1309 unsigned long system_hostid
= get_spl_hostid();
1310 if (system_hostid
== 0)
1311 system_hostid
= gethostid() & 0xffffffff;
1312 return (system_hostid
);
1316 kernel_init(int mode
)
1318 extern uint_t rrw_tsd_key
;
1320 umem_nofail_callback(umem_out_of_memory
);
1322 physmem
= sysconf(_SC_PHYS_PAGES
);
1324 dprintf("physmem = %llu pages (%.2f GB)\n", physmem
,
1325 (double)physmem
* sysconf(_SC_PAGE_SIZE
) / (1ULL << 30));
1327 (void) snprintf(hw_serial
, sizeof (hw_serial
), "%ld",
1328 (mode
& FWRITE
) ? get_system_hostid() : 0);
1332 VERIFY0(uname(&hw_utsname
));
1335 system_taskq_init();
1342 tsd_create(&rrw_tsd_key
, rrw_tsd_destroy
);
1352 system_taskq_fini();
1359 crgetuid(cred_t
*cr
)
1365 crgetruid(cred_t
*cr
)
1371 crgetgid(cred_t
*cr
)
1377 crgetngroups(cred_t
*cr
)
1383 crgetgroups(cred_t
*cr
)
1389 zfs_secpolicy_snapshot_perms(const char *name
, cred_t
*cr
)
1395 zfs_secpolicy_rename_perms(const char *from
, const char *to
, cred_t
*cr
)
1401 zfs_secpolicy_destroy_perms(const char *name
, cred_t
*cr
)
1407 secpolicy_zfs(const cred_t
*cr
)
1413 ksid_lookupdomain(const char *dom
)
1417 kd
= umem_zalloc(sizeof (ksiddomain_t
), UMEM_NOFAIL
);
1418 kd
->kd_name
= spa_strdup(dom
);
1423 ksiddomain_rele(ksiddomain_t
*ksid
)
1425 spa_strfree(ksid
->kd_name
);
1426 umem_free(ksid
, sizeof (ksiddomain_t
));
1430 kmem_vasprintf(const char *fmt
, va_list adx
)
1435 va_copy(adx_copy
, adx
);
1436 VERIFY(vasprintf(&buf
, fmt
, adx_copy
) != -1);
1443 kmem_asprintf(const char *fmt
, ...)
1449 VERIFY(vasprintf(&buf
, fmt
, adx
) != -1);
1457 zfs_onexit_fd_hold(int fd
, minor_t
*minorp
)
1465 zfs_onexit_fd_rele(int fd
)
1471 zfs_onexit_add_cb(minor_t minor
, void (*func
)(void *), void *data
,
1472 uint64_t *action_handle
)
1479 zfs_onexit_del_cb(minor_t minor
, uint64_t action_handle
, boolean_t fire
)
1486 zfs_onexit_cb_data(minor_t minor
, uint64_t action_handle
, void **data
)
1492 spl_fstrans_mark(void)
1494 return ((fstrans_cookie_t
)0);
1498 spl_fstrans_unmark(fstrans_cookie_t cookie
)
1503 __spl_pf_fstrans_check(void)
1508 void *zvol_tag
= "zvol_tag";
1511 zvol_create_minors(spa_t
*spa
, const char *name
, boolean_t async
)
1516 zvol_remove_minor(spa_t
*spa
, const char *name
, boolean_t async
)
1521 zvol_remove_minors(spa_t
*spa
, const char *name
, boolean_t async
)
1526 zvol_rename_minors(spa_t
*spa
, const char *oldname
, const char *newname
,