]> git.proxmox.com Git - mirror_zfs.git/blame - lib/libzpool/kernel.c
Simplify threads, mutexs, cvs and rwlocks
[mirror_zfs.git] / lib / libzpool / kernel.c
CommitLineData
34dc7c2f
BB
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
572e2857 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
a0bd735a 23 * Copyright (c) 2016 Actifio, Inc. All rights reserved.
34dc7c2f
BB
24 */
25
34dc7c2f
BB
26#include <assert.h>
27#include <fcntl.h>
28#include <poll.h>
29#include <stdio.h>
30#include <stdlib.h>
31#include <string.h>
32#include <zlib.h>
9867e8be 33#include <libgen.h>
1e33ac1e 34#include <sys/signal.h>
34dc7c2f
BB
35#include <sys/spa.h>
36#include <sys/stat.h>
37#include <sys/processor.h>
38#include <sys/zfs_context.h>
13fe0198 39#include <sys/rrwlock.h>
34dc7c2f 40#include <sys/utsname.h>
d603ed6c 41#include <sys/time.h>
d164b209 42#include <sys/systeminfo.h>
1eeb4562 43#include <zfs_fletcher.h>
0b04990a 44#include <sys/crypto/icp.h>
34dc7c2f
BB
45
46/*
47 * Emulation of kernel services in userland.
48 */
49
428870ff 50int aok;
34dc7c2f
BB
51uint64_t physmem;
52vnode_t *rootdir = (vnode_t *)0xabcd1234;
d164b209 53char hw_serial[HW_HOSTID_LEN];
f0e324f2 54struct utsname hw_utsname;
ca67b33a 55vmem_t *zio_arena = NULL;
34dc7c2f 56
9867e8be
MA
57/* If set, all blocks read will be copied to the specified directory. */
58char *vn_dumpdir = NULL;
59
428870ff
BB
60/* this only exists to have its address taken */
61struct proc p0;
62
34dc7c2f
BB
63/*
64 * =========================================================================
65 * threads
66 * =========================================================================
c25b8f99
BB
67 *
68 * TS_STACK_MIN is dictated by the minimum allowed pthread stack size. While
69 * TS_STACK_MAX is somewhat arbitrary, it was selected to be large enough for
70 * the expected stack depth while small enough to avoid exhausting address
71 * space with high thread counts.
34dc7c2f 72 */
c25b8f99
BB
73#define TS_STACK_MIN MAX(PTHREAD_STACK_MIN, 32768)
74#define TS_STACK_MAX (256 * 1024)
1e33ac1e 75
c25b8f99 76/*ARGSUSED*/
1e33ac1e 77kthread_t *
c25b8f99 78zk_thread_create(void (*func)(void *), void *arg, size_t stksize, int state)
1e33ac1e 79{
1e33ac1e 80 pthread_attr_t attr;
c25b8f99 81 pthread_t tid;
aa0ac7ca 82 char *stkstr;
c25b8f99 83 int detachstate = PTHREAD_CREATE_DETACHED;
1e33ac1e 84
c25b8f99 85 VERIFY0(pthread_attr_init(&attr));
1e33ac1e 86
c25b8f99
BB
87 if (state & TS_JOINABLE)
88 detachstate = PTHREAD_CREATE_JOINABLE;
1e33ac1e 89
aa0ac7ca
BB
90 VERIFY0(pthread_attr_setdetachstate(&attr, detachstate));
91
1e33ac1e 92 /*
aa0ac7ca
BB
93 * We allow the default stack size in user space to be specified by
94 * setting the ZFS_STACK_SIZE environment variable. This allows us
95 * the convenience of observing and debugging stack overruns in
96 * user space. Explicitly specified stack sizes will be honored.
97 * The usage of ZFS_STACK_SIZE is discussed further in the
98 * ENVIRONMENT VARIABLES sections of the ztest(1) man page.
1e33ac1e 99 */
aa0ac7ca
BB
100 if (stksize == 0) {
101 stkstr = getenv("ZFS_STACK_SIZE");
102
103 if (stkstr == NULL)
104 stksize = TS_STACK_MAX;
105 else
106 stksize = MAX(atoi(stkstr), TS_STACK_MIN);
107 }
108
109 VERIFY3S(stksize, >, 0);
110 stksize = P2ROUNDUP(MAX(stksize, TS_STACK_MIN), PAGESIZE);
c25b8f99 111
206971d2
DR
112 /*
113 * If this ever fails, it may be because the stack size is not a
114 * multiple of system page size.
115 */
aa0ac7ca
BB
116 VERIFY0(pthread_attr_setstacksize(&attr, stksize));
117 VERIFY0(pthread_attr_setguardsize(&attr, PAGESIZE));
118
c25b8f99 119 VERIFY0(pthread_create(&tid, &attr, (void *(*)(void *))func, arg));
aa0ac7ca 120 VERIFY0(pthread_attr_destroy(&attr));
1e33ac1e 121
c25b8f99 122 return ((void *)(uintptr_t)tid);
34dc7c2f
BB
123}
124
125/*
126 * =========================================================================
127 * kstats
128 * =========================================================================
129 */
130/*ARGSUSED*/
131kstat_t *
330847ff
MA
132kstat_create(const char *module, int instance, const char *name,
133 const char *class, uchar_t type, ulong_t ndata, uchar_t ks_flag)
34dc7c2f
BB
134{
135 return (NULL);
136}
137
138/*ARGSUSED*/
139void
140kstat_install(kstat_t *ksp)
141{}
142
143/*ARGSUSED*/
144void
145kstat_delete(kstat_t *ksp)
146{}
147
1421c891 148/*ARGSUSED*/
330847ff
MA
149void
150kstat_waitq_enter(kstat_io_t *kiop)
151{}
152
153/*ARGSUSED*/
154void
155kstat_waitq_exit(kstat_io_t *kiop)
156{}
157
158/*ARGSUSED*/
159void
160kstat_runq_enter(kstat_io_t *kiop)
161{}
162
163/*ARGSUSED*/
164void
165kstat_runq_exit(kstat_io_t *kiop)
166{}
167
168/*ARGSUSED*/
169void
170kstat_waitq_to_runq(kstat_io_t *kiop)
171{}
172
173/*ARGSUSED*/
174void
175kstat_runq_back_to_waitq(kstat_io_t *kiop)
176{}
177
1421c891
PS
178void
179kstat_set_raw_ops(kstat_t *ksp,
180 int (*headers)(char *buf, size_t size),
181 int (*data)(char *buf, size_t size, void *data),
182 void *(*addr)(kstat_t *ksp, loff_t index))
183{}
184
34dc7c2f
BB
185/*
186 * =========================================================================
187 * mutexes
188 * =========================================================================
189 */
1e33ac1e 190
34dc7c2f 191void
1e33ac1e 192mutex_init(kmutex_t *mp, char *name, int type, void *cookie)
34dc7c2f 193{
c25b8f99
BB
194 VERIFY0(pthread_mutex_init(&mp->m_lock, NULL));
195 memset(&mp->m_owner, 0, sizeof (pthread_t));
34dc7c2f
BB
196}
197
198void
1e33ac1e 199mutex_destroy(kmutex_t *mp)
34dc7c2f 200{
c25b8f99 201 VERIFY0(pthread_mutex_destroy(&mp->m_lock));
34dc7c2f
BB
202}
203
204void
205mutex_enter(kmutex_t *mp)
206{
c25b8f99
BB
207 VERIFY0(pthread_mutex_lock(&mp->m_lock));
208 mp->m_owner = pthread_self();
34dc7c2f
BB
209}
210
211int
212mutex_tryenter(kmutex_t *mp)
213{
c25b8f99
BB
214 int error;
215
216 error = pthread_mutex_trylock(&mp->m_lock);
217 if (error == 0) {
218 mp->m_owner = pthread_self();
34dc7c2f
BB
219 return (1);
220 } else {
c25b8f99 221 VERIFY3S(error, ==, EBUSY);
34dc7c2f
BB
222 return (0);
223 }
224}
225
226void
227mutex_exit(kmutex_t *mp)
228{
c25b8f99
BB
229 memset(&mp->m_owner, 0, sizeof (pthread_t));
230 VERIFY0(pthread_mutex_unlock(&mp->m_lock));
1e33ac1e
BB
231}
232
34dc7c2f
BB
233/*
234 * =========================================================================
235 * rwlocks
236 * =========================================================================
237 */
1e33ac1e 238
34dc7c2f
BB
239void
240rw_init(krwlock_t *rwlp, char *name, int type, void *arg)
241{
c25b8f99 242 VERIFY0(pthread_rwlock_init(&rwlp->rw_lock, NULL));
1e33ac1e 243 rwlp->rw_readers = 0;
c25b8f99 244 rwlp->rw_owner = 0;
34dc7c2f
BB
245}
246
247void
248rw_destroy(krwlock_t *rwlp)
249{
c25b8f99 250 VERIFY0(pthread_rwlock_destroy(&rwlp->rw_lock));
34dc7c2f
BB
251}
252
253void
254rw_enter(krwlock_t *rwlp, krw_t rw)
255{
1e33ac1e 256 if (rw == RW_READER) {
c25b8f99 257 VERIFY0(pthread_rwlock_rdlock(&rwlp->rw_lock));
1e33ac1e
BB
258 atomic_inc_uint(&rwlp->rw_readers);
259 } else {
c25b8f99
BB
260 VERIFY0(pthread_rwlock_wrlock(&rwlp->rw_lock));
261 rwlp->rw_owner = pthread_self();
1e33ac1e 262 }
34dc7c2f
BB
263}
264
265void
266rw_exit(krwlock_t *rwlp)
267{
1e33ac1e
BB
268 if (RW_READ_HELD(rwlp))
269 atomic_dec_uint(&rwlp->rw_readers);
270 else
c25b8f99 271 rwlp->rw_owner = 0;
34dc7c2f 272
c25b8f99 273 VERIFY0(pthread_rwlock_unlock(&rwlp->rw_lock));
34dc7c2f
BB
274}
275
276int
277rw_tryenter(krwlock_t *rwlp, krw_t rw)
278{
c25b8f99 279 int error;
34dc7c2f
BB
280
281 if (rw == RW_READER)
c25b8f99 282 error = pthread_rwlock_tryrdlock(&rwlp->rw_lock);
34dc7c2f 283 else
c25b8f99 284 error = pthread_rwlock_trywrlock(&rwlp->rw_lock);
1e33ac1e 285
c25b8f99 286 if (error == 0) {
1e33ac1e
BB
287 if (rw == RW_READER)
288 atomic_inc_uint(&rwlp->rw_readers);
c25b8f99
BB
289 else
290 rwlp->rw_owner = pthread_self();
1e33ac1e 291
34dc7c2f
BB
292 return (1);
293 }
294
c25b8f99 295 VERIFY3S(error, ==, EBUSY);
1e33ac1e 296
34dc7c2f
BB
297 return (0);
298}
299
34dc7c2f
BB
300int
301rw_tryupgrade(krwlock_t *rwlp)
302{
34dc7c2f
BB
303 return (0);
304}
305
306/*
307 * =========================================================================
308 * condition variables
309 * =========================================================================
310 */
1e33ac1e 311
34dc7c2f
BB
312void
313cv_init(kcondvar_t *cv, char *name, int type, void *arg)
314{
c25b8f99 315 VERIFY0(pthread_cond_init(cv, NULL));
34dc7c2f
BB
316}
317
318void
319cv_destroy(kcondvar_t *cv)
320{
c25b8f99 321 VERIFY0(pthread_cond_destroy(cv));
34dc7c2f
BB
322}
323
324void
325cv_wait(kcondvar_t *cv, kmutex_t *mp)
326{
c25b8f99
BB
327 memset(&mp->m_owner, 0, sizeof (pthread_t));
328 VERIFY0(pthread_cond_wait(cv, &mp->m_lock));
329 mp->m_owner = pthread_self();
34dc7c2f
BB
330}
331
332clock_t
333cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime)
334{
335 int error;
1e33ac1e 336 struct timeval tv;
34dc7c2f
BB
337 timestruc_t ts;
338 clock_t delta;
339
428870ff 340 delta = abstime - ddi_get_lbolt();
34dc7c2f
BB
341 if (delta <= 0)
342 return (-1);
343
1e33ac1e
BB
344 VERIFY(gettimeofday(&tv, NULL) == 0);
345
346 ts.tv_sec = tv.tv_sec + delta / hz;
67925abb 347 ts.tv_nsec = tv.tv_usec * NSEC_PER_USEC + (delta % hz) * (NANOSEC / hz);
1e33ac1e
BB
348 if (ts.tv_nsec >= NANOSEC) {
349 ts.tv_sec++;
350 ts.tv_nsec -= NANOSEC;
351 }
34dc7c2f 352
c25b8f99
BB
353 memset(&mp->m_owner, 0, sizeof (pthread_t));
354 error = pthread_cond_timedwait(cv, &mp->m_lock, &ts);
355 mp->m_owner = pthread_self();
34dc7c2f 356
1e33ac1e 357 if (error == ETIMEDOUT)
34dc7c2f
BB
358 return (-1);
359
206971d2 360 VERIFY0(error);
34dc7c2f
BB
361
362 return (1);
363}
364
63fd3c6c
AL
365/*ARGSUSED*/
366clock_t
367cv_timedwait_hires(kcondvar_t *cv, kmutex_t *mp, hrtime_t tim, hrtime_t res,
368 int flag)
369{
370 int error;
67925abb 371 struct timeval tv;
63fd3c6c
AL
372 timestruc_t ts;
373 hrtime_t delta;
374
206971d2
DR
375 ASSERT(flag == 0 || flag == CALLOUT_FLAG_ABSOLUTE);
376
377 delta = tim;
378 if (flag & CALLOUT_FLAG_ABSOLUTE)
379 delta -= gethrtime();
63fd3c6c 380
63fd3c6c
AL
381 if (delta <= 0)
382 return (-1);
383
c25b8f99 384 VERIFY0(gettimeofday(&tv, NULL));
67925abb
BB
385
386 ts.tv_sec = tv.tv_sec + delta / NANOSEC;
387 ts.tv_nsec = tv.tv_usec * NSEC_PER_USEC + (delta % NANOSEC);
388 if (ts.tv_nsec >= NANOSEC) {
389 ts.tv_sec++;
390 ts.tv_nsec -= NANOSEC;
391 }
63fd3c6c 392
c25b8f99
BB
393 memset(&mp->m_owner, 0, sizeof (pthread_t));
394 error = pthread_cond_timedwait(cv, &mp->m_lock, &ts);
395 mp->m_owner = pthread_self();
63fd3c6c 396
206971d2 397 if (error == ETIMEDOUT)
63fd3c6c
AL
398 return (-1);
399
206971d2 400 VERIFY0(error);
63fd3c6c
AL
401
402 return (1);
403}
404
34dc7c2f
BB
405void
406cv_signal(kcondvar_t *cv)
407{
c25b8f99 408 VERIFY0(pthread_cond_signal(cv));
34dc7c2f
BB
409}
410
411void
412cv_broadcast(kcondvar_t *cv)
413{
c25b8f99 414 VERIFY0(pthread_cond_broadcast(cv));
34dc7c2f
BB
415}
416
417/*
418 * =========================================================================
419 * vnode operations
420 * =========================================================================
421 */
422/*
423 * Note: for the xxxat() versions of these functions, we assume that the
424 * starting vp is always rootdir (which is true for spa_directory.c, the only
425 * ZFS consumer of these interfaces). We assert this is true, and then emulate
426 * them by adding '/' in front of the path.
427 */
428
429/*ARGSUSED*/
430int
431vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3)
432{
e2c292bb 433 int fd = -1;
434 int dump_fd = -1;
34dc7c2f 435 vnode_t *vp;
a4914d38 436 int old_umask = 0;
5ae4e2c2 437 char *realpath;
34dc7c2f 438 struct stat64 st;
4d58b69d 439 int err;
34dc7c2f 440
5ae4e2c2
BB
441 realpath = umem_alloc(MAXPATHLEN, UMEM_NOFAIL);
442
34dc7c2f
BB
443 /*
444 * If we're accessing a real disk from userland, we need to use
445 * the character interface to avoid caching. This is particularly
446 * important if we're trying to look at a real in-kernel storage
447 * pool from userland, e.g. via zdb, because otherwise we won't
448 * see the changes occurring under the segmap cache.
449 * On the other hand, the stupid character device returns zero
450 * for its size. So -- gag -- we open the block device to get
451 * its size, and remember it for subsequent VOP_GETATTR().
452 */
d603ed6c 453#if defined(__sun__) || defined(__sun)
34dc7c2f 454 if (strncmp(path, "/dev/", 5) == 0) {
d603ed6c
BB
455#else
456 if (0) {
457#endif
34dc7c2f
BB
458 char *dsk;
459 fd = open64(path, O_RDONLY);
5ae4e2c2
BB
460 if (fd == -1) {
461 err = errno;
462 free(realpath);
463 return (err);
464 }
34dc7c2f 465 if (fstat64(fd, &st) == -1) {
5ae4e2c2 466 err = errno;
34dc7c2f 467 close(fd);
5ae4e2c2
BB
468 free(realpath);
469 return (err);
34dc7c2f
BB
470 }
471 close(fd);
472 (void) sprintf(realpath, "%s", path);
473 dsk = strstr(path, "/dsk/");
474 if (dsk != NULL)
475 (void) sprintf(realpath + (dsk - path) + 1, "r%s",
476 dsk + 1);
477 } else {
478 (void) sprintf(realpath, "%s", path);
5ae4e2c2
BB
479 if (!(flags & FCREAT) && stat64(realpath, &st) == -1) {
480 err = errno;
481 free(realpath);
482 return (err);
483 }
34dc7c2f
BB
484 }
485
d603ed6c
BB
486 if (!(flags & FCREAT) && S_ISBLK(st.st_mode)) {
487#ifdef __linux__
488 flags |= O_DIRECT;
489#endif
ada82581
BB
490 /* We shouldn't be writing to block devices in userspace */
491 VERIFY(!(flags & FWRITE));
d603ed6c
BB
492 }
493
34dc7c2f
BB
494 if (flags & FCREAT)
495 old_umask = umask(0);
496
497 /*
498 * The construct 'flags - FREAD' conveniently maps combinations of
499 * FREAD and FWRITE to the corresponding O_RDONLY, O_WRONLY, and O_RDWR.
500 */
501 fd = open64(realpath, flags - FREAD, mode);
470f12d6
G
502 if (fd == -1) {
503 err = errno;
504 free(realpath);
505 return (err);
506 }
34dc7c2f
BB
507
508 if (flags & FCREAT)
509 (void) umask(old_umask);
510
9867e8be
MA
511 if (vn_dumpdir != NULL) {
512 char *dumppath = umem_zalloc(MAXPATHLEN, UMEM_NOFAIL);
513 (void) snprintf(dumppath, MAXPATHLEN,
514 "%s/%s", vn_dumpdir, basename(realpath));
515 dump_fd = open64(dumppath, O_CREAT | O_WRONLY, 0666);
516 umem_free(dumppath, MAXPATHLEN);
517 if (dump_fd == -1) {
518 err = errno;
519 free(realpath);
520 close(fd);
521 return (err);
522 }
523 } else {
524 dump_fd = -1;
525 }
526
527 free(realpath);
528
8d4e8140 529 if (fstat64_blk(fd, &st) == -1) {
4d58b69d 530 err = errno;
34dc7c2f 531 close(fd);
e2c292bb 532 if (dump_fd != -1)
533 close(dump_fd);
4d58b69d 534 return (err);
34dc7c2f
BB
535 }
536
537 (void) fcntl(fd, F_SETFD, FD_CLOEXEC);
538
539 *vpp = vp = umem_zalloc(sizeof (vnode_t), UMEM_NOFAIL);
540
541 vp->v_fd = fd;
542 vp->v_size = st.st_size;
543 vp->v_path = spa_strdup(path);
9867e8be 544 vp->v_dump_fd = dump_fd;
34dc7c2f
BB
545
546 return (0);
547}
548
549/*ARGSUSED*/
550int
551vn_openat(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2,
552 int x3, vnode_t *startvp, int fd)
553{
554 char *realpath = umem_alloc(strlen(path) + 2, UMEM_NOFAIL);
555 int ret;
556
557 ASSERT(startvp == rootdir);
558 (void) sprintf(realpath, "/%s", path);
559
560 /* fd ignored for now, need if want to simulate nbmand support */
561 ret = vn_open(realpath, x1, flags, mode, vpp, x2, x3);
562
563 umem_free(realpath, strlen(path) + 2);
564
565 return (ret);
566}
567
568/*ARGSUSED*/
569int
570vn_rdwr(int uio, vnode_t *vp, void *addr, ssize_t len, offset_t offset,
e9aa730c 571 int x1, int x2, rlim64_t x3, void *x4, ssize_t *residp)
34dc7c2f 572{
4d58b69d 573 ssize_t rc, done = 0, split;
34dc7c2f
BB
574
575 if (uio == UIO_READ) {
4d58b69d 576 rc = pread64(vp->v_fd, addr, len, offset);
470f12d6 577 if (vp->v_dump_fd != -1 && rc != -1) {
928c58dd
BB
578 int status;
579 status = pwrite64(vp->v_dump_fd, addr, rc, offset);
9867e8be
MA
580 ASSERT(status != -1);
581 }
34dc7c2f
BB
582 } else {
583 /*
584 * To simulate partial disk writes, we split writes into two
585 * system calls so that the process can be killed in between.
586 */
9ae529ec
CS
587 int sectors = len >> SPA_MINBLOCKSHIFT;
588 split = (sectors > 0 ? rand() % sectors : 0) <<
589 SPA_MINBLOCKSHIFT;
4d58b69d
RC
590 rc = pwrite64(vp->v_fd, addr, split, offset);
591 if (rc != -1) {
592 done = rc;
593 rc = pwrite64(vp->v_fd, (char *)addr + split,
594 len - split, offset + split);
595 }
34dc7c2f
BB
596 }
597
d603ed6c
BB
598#ifdef __linux__
599 if (rc == -1 && errno == EINVAL) {
600 /*
601 * Under Linux, this most likely means an alignment issue
602 * (memory or disk) due to O_DIRECT, so we abort() in order to
603 * catch the offender.
604 */
d1d7e268 605 abort();
d603ed6c
BB
606 }
607#endif
4d58b69d 608 if (rc == -1)
34dc7c2f 609 return (errno);
4d58b69d
RC
610
611 done += rc;
612
34dc7c2f 613 if (residp)
4d58b69d
RC
614 *residp = len - done;
615 else if (done != len)
34dc7c2f
BB
616 return (EIO);
617 return (0);
618}
619
620void
621vn_close(vnode_t *vp)
622{
623 close(vp->v_fd);
9867e8be
MA
624 if (vp->v_dump_fd != -1)
625 close(vp->v_dump_fd);
34dc7c2f
BB
626 spa_strfree(vp->v_path);
627 umem_free(vp, sizeof (vnode_t));
628}
629
428870ff
BB
630/*
631 * At a minimum we need to update the size since vdev_reopen()
632 * will no longer call vn_openat().
633 */
634int
635fop_getattr(vnode_t *vp, vattr_t *vap)
636{
637 struct stat64 st;
8d4e8140 638 int err;
428870ff 639
8d4e8140
RC
640 if (fstat64_blk(vp->v_fd, &st) == -1) {
641 err = errno;
428870ff 642 close(vp->v_fd);
8d4e8140 643 return (err);
428870ff
BB
644 }
645
646 vap->va_size = st.st_size;
647 return (0);
648}
649
34dc7c2f
BB
650/*
651 * =========================================================================
652 * Figure out which debugging statements to print
653 * =========================================================================
654 */
655
656static char *dprintf_string;
657static int dprintf_print_all;
658
659int
660dprintf_find_string(const char *string)
661{
662 char *tmp_str = dprintf_string;
663 int len = strlen(string);
664
665 /*
666 * Find out if this is a string we want to print.
667 * String format: file1.c,function_name1,file2.c,file3.c
668 */
669
670 while (tmp_str != NULL) {
671 if (strncmp(tmp_str, string, len) == 0 &&
672 (tmp_str[len] == ',' || tmp_str[len] == '\0'))
673 return (1);
674 tmp_str = strchr(tmp_str, ',');
675 if (tmp_str != NULL)
676 tmp_str++; /* Get rid of , */
677 }
678 return (0);
679}
680
681void
682dprintf_setup(int *argc, char **argv)
683{
684 int i, j;
685
686 /*
687 * Debugging can be specified two ways: by setting the
688 * environment variable ZFS_DEBUG, or by including a
689 * "debug=..." argument on the command line. The command
690 * line setting overrides the environment variable.
691 */
692
693 for (i = 1; i < *argc; i++) {
694 int len = strlen("debug=");
695 /* First look for a command line argument */
696 if (strncmp("debug=", argv[i], len) == 0) {
697 dprintf_string = argv[i] + len;
698 /* Remove from args */
699 for (j = i; j < *argc; j++)
700 argv[j] = argv[j+1];
701 argv[j] = NULL;
702 (*argc)--;
703 }
704 }
705
706 if (dprintf_string == NULL) {
707 /* Look for ZFS_DEBUG environment variable */
708 dprintf_string = getenv("ZFS_DEBUG");
709 }
710
711 /*
712 * Are we just turning on all debugging?
713 */
714 if (dprintf_find_string("on"))
715 dprintf_print_all = 1;
308a451f
MA
716
717 if (dprintf_string != NULL)
718 zfs_flags |= ZFS_DEBUG_DPRINTF;
34dc7c2f
BB
719}
720
721/*
722 * =========================================================================
723 * debug printfs
724 * =========================================================================
725 */
726void
727__dprintf(const char *file, const char *func, int line, const char *fmt, ...)
728{
729 const char *newfile;
730 va_list adx;
731
732 /*
733 * Get rid of annoying "../common/" prefix to filename.
734 */
735 newfile = strrchr(file, '/');
736 if (newfile != NULL) {
737 newfile = newfile + 1; /* Get rid of leading / */
738 } else {
739 newfile = file;
740 }
741
742 if (dprintf_print_all ||
743 dprintf_find_string(newfile) ||
744 dprintf_find_string(func)) {
745 /* Print out just the function name if requested */
746 flockfile(stdout);
747 if (dprintf_find_string("pid"))
748 (void) printf("%d ", getpid());
749 if (dprintf_find_string("tid"))
02730c33 750 (void) printf("%u ", (uint_t)pthread_self());
34dc7c2f
BB
751 if (dprintf_find_string("cpu"))
752 (void) printf("%u ", getcpuid());
753 if (dprintf_find_string("time"))
754 (void) printf("%llu ", gethrtime());
755 if (dprintf_find_string("long"))
756 (void) printf("%s, line %d: ", newfile, line);
757 (void) printf("%s: ", func);
758 va_start(adx, fmt);
759 (void) vprintf(fmt, adx);
760 va_end(adx);
761 funlockfile(stdout);
762 }
763}
764
34dc7c2f
BB
765/*
766 * =========================================================================
767 * cmn_err() and panic()
768 * =========================================================================
769 */
770static char ce_prefix[CE_IGNORE][10] = { "", "NOTICE: ", "WARNING: ", "" };
771static char ce_suffix[CE_IGNORE][2] = { "", "\n", "\n", "" };
772
773void
774vpanic(const char *fmt, va_list adx)
775{
776 (void) fprintf(stderr, "error: ");
777 (void) vfprintf(stderr, fmt, adx);
778 (void) fprintf(stderr, "\n");
779
780 abort(); /* think of it as a "user-level crash dump" */
781}
782
783void
784panic(const char *fmt, ...)
785{
786 va_list adx;
787
788 va_start(adx, fmt);
789 vpanic(fmt, adx);
790 va_end(adx);
791}
792
793void
794vcmn_err(int ce, const char *fmt, va_list adx)
795{
796 if (ce == CE_PANIC)
797 vpanic(fmt, adx);
798 if (ce != CE_NOTE) { /* suppress noise in userland stress testing */
799 (void) fprintf(stderr, "%s", ce_prefix[ce]);
800 (void) vfprintf(stderr, fmt, adx);
801 (void) fprintf(stderr, "%s", ce_suffix[ce]);
802 }
803}
804
805/*PRINTFLIKE2*/
806void
807cmn_err(int ce, const char *fmt, ...)
808{
809 va_list adx;
810
811 va_start(adx, fmt);
812 vcmn_err(ce, fmt, adx);
813 va_end(adx);
814}
815
816/*
817 * =========================================================================
818 * kobj interfaces
819 * =========================================================================
820 */
821struct _buf *
822kobj_open_file(char *name)
823{
824 struct _buf *file;
825 vnode_t *vp;
826
827 /* set vp as the _fd field of the file */
828 if (vn_openat(name, UIO_SYSSPACE, FREAD, 0, &vp, 0, 0, rootdir,
829 -1) != 0)
830 return ((void *)-1UL);
831
832 file = umem_zalloc(sizeof (struct _buf), UMEM_NOFAIL);
833 file->_fd = (intptr_t)vp;
834 return (file);
835}
836
837int
838kobj_read_file(struct _buf *file, char *buf, unsigned size, unsigned off)
839{
689f093e 840 ssize_t resid = 0;
34dc7c2f 841
957dc932
RY
842 if (vn_rdwr(UIO_READ, (vnode_t *)file->_fd, buf, size, (offset_t)off,
843 UIO_SYSSPACE, 0, 0, 0, &resid) != 0)
844 return (-1);
34dc7c2f
BB
845
846 return (size - resid);
847}
848
849void
850kobj_close_file(struct _buf *file)
851{
852 vn_close((vnode_t *)file->_fd);
853 umem_free(file, sizeof (struct _buf));
854}
855
856int
857kobj_get_filesize(struct _buf *file, uint64_t *size)
858{
859 struct stat64 st;
860 vnode_t *vp = (vnode_t *)file->_fd;
861
862 if (fstat64(vp->v_fd, &st) == -1) {
863 vn_close(vp);
864 return (errno);
865 }
866 *size = st.st_size;
867 return (0);
868}
869
870/*
871 * =========================================================================
872 * misc routines
873 * =========================================================================
874 */
875
876void
877delay(clock_t ticks)
878{
af4db70f 879 (void) poll(0, 0, ticks * (1000 / hz));
34dc7c2f
BB
880}
881
882/*
883 * Find highest one bit set.
46364cb2
BB
884 * Returns bit number + 1 of highest bit that is set, otherwise returns 0.
885 * The __builtin_clzll() function is supported by both GCC and Clang.
34dc7c2f
BB
886 */
887int
9bd274dd 888highbit64(uint64_t i)
34dc7c2f 889{
34dc7c2f 890 if (i == 0)
46364cb2
BB
891 return (0);
892
893 return (NBBY * sizeof (uint64_t) - __builtin_clzll(i));
34dc7c2f
BB
894}
895
193a37cb
TH
896/*
897 * Find lowest one bit set.
898 * Returns bit number + 1 of lowest bit that is set, otherwise returns 0.
46364cb2 899 * The __builtin_ffsll() function is supported by both GCC and Clang.
193a37cb
TH
900 */
901int
902lowbit64(uint64_t i)
903{
193a37cb
TH
904 if (i == 0)
905 return (0);
906
46364cb2 907 return (__builtin_ffsll(i));
0b04990a 908}
193a37cb 909
34dc7c2f
BB
910static int random_fd = -1, urandom_fd = -1;
911
0b04990a
TC
912void
913random_init(void)
914{
915 VERIFY((random_fd = open("/dev/random", O_RDONLY)) != -1);
916 VERIFY((urandom_fd = open("/dev/urandom", O_RDONLY)) != -1);
917}
918
919void
920random_fini(void)
921{
922 close(random_fd);
923 close(urandom_fd);
924
925 random_fd = -1;
926 urandom_fd = -1;
927}
928
34dc7c2f
BB
929static int
930random_get_bytes_common(uint8_t *ptr, size_t len, int fd)
931{
932 size_t resid = len;
933 ssize_t bytes;
934
935 ASSERT(fd != -1);
936
937 while (resid != 0) {
938 bytes = read(fd, ptr, resid);
939 ASSERT3S(bytes, >=, 0);
940 ptr += bytes;
941 resid -= bytes;
942 }
943
944 return (0);
945}
946
947int
948random_get_bytes(uint8_t *ptr, size_t len)
949{
950 return (random_get_bytes_common(ptr, len, random_fd));
951}
952
953int
954random_get_pseudo_bytes(uint8_t *ptr, size_t len)
955{
956 return (random_get_bytes_common(ptr, len, urandom_fd));
957}
958
959int
960ddi_strtoul(const char *hw_serial, char **nptr, int base, unsigned long *result)
961{
962 char *end;
963
964 *result = strtoul(hw_serial, &end, base);
965 if (*result == 0)
966 return (errno);
967 return (0);
968}
969
428870ff
BB
970int
971ddi_strtoull(const char *str, char **nptr, int base, u_longlong_t *result)
972{
973 char *end;
974
975 *result = strtoull(str, &end, base);
976 if (*result == 0)
977 return (errno);
978 return (0);
979}
980
f0e324f2
BB
981utsname_t *
982utsname(void)
983{
984 return (&hw_utsname);
985}
986
34dc7c2f
BB
987/*
988 * =========================================================================
989 * kernel emulation setup & teardown
990 * =========================================================================
991 */
992static int
993umem_out_of_memory(void)
994{
995 char errmsg[] = "out of memory -- generating core dump\n";
996
0e5b68e0 997 (void) fprintf(stderr, "%s", errmsg);
34dc7c2f
BB
998 abort();
999 return (0);
1000}
1001
1002void
1003kernel_init(int mode)
1004{
13fe0198
MA
1005 extern uint_t rrw_tsd_key;
1006
34dc7c2f
BB
1007 umem_nofail_callback(umem_out_of_memory);
1008
1009 physmem = sysconf(_SC_PHYS_PAGES);
1010
1011 dprintf("physmem = %llu pages (%.2f GB)\n", physmem,
1012 (double)physmem * sysconf(_SC_PAGE_SIZE) / (1ULL << 30));
1013
428870ff 1014 (void) snprintf(hw_serial, sizeof (hw_serial), "%ld",
53698a45 1015 (mode & FWRITE) ? get_system_hostid() : 0);
34dc7c2f 1016
0b04990a
TC
1017 random_init();
1018
f0e324f2 1019 VERIFY0(uname(&hw_utsname));
34dc7c2f 1020
b128c09f 1021 system_taskq_init();
0b04990a 1022 icp_init();
b128c09f 1023
34dc7c2f 1024 spa_init(mode);
13fe0198 1025
1eeb4562
JX
1026 fletcher_4_init();
1027
13fe0198 1028 tsd_create(&rrw_tsd_key, rrw_tsd_destroy);
34dc7c2f
BB
1029}
1030
1031void
1032kernel_fini(void)
1033{
1eeb4562 1034 fletcher_4_fini();
34dc7c2f
BB
1035 spa_fini();
1036
0b04990a 1037 icp_fini();
428870ff
BB
1038 system_taskq_fini();
1039
0b04990a 1040 random_fini();
34dc7c2f
BB
1041}
1042
34dc7c2f
BB
1043uid_t
1044crgetuid(cred_t *cr)
1045{
1046 return (0);
1047}
1048
6f1ffb06
MA
1049uid_t
1050crgetruid(cred_t *cr)
1051{
1052 return (0);
1053}
1054
34dc7c2f
BB
1055gid_t
1056crgetgid(cred_t *cr)
1057{
1058 return (0);
1059}
1060
1061int
1062crgetngroups(cred_t *cr)
1063{
1064 return (0);
1065}
1066
1067gid_t *
1068crgetgroups(cred_t *cr)
1069{
1070 return (NULL);
1071}
1072
1073int
1074zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
1075{
1076 return (0);
1077}
1078
1079int
1080zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
1081{
1082 return (0);
1083}
1084
1085int
1086zfs_secpolicy_destroy_perms(const char *name, cred_t *cr)
1087{
1088 return (0);
1089}
1090
f74b821a
BB
1091int
1092secpolicy_zfs(const cred_t *cr)
1093{
1094 return (0);
1095}
1096
34dc7c2f
BB
1097ksiddomain_t *
1098ksid_lookupdomain(const char *dom)
1099{
1100 ksiddomain_t *kd;
1101
1102 kd = umem_zalloc(sizeof (ksiddomain_t), UMEM_NOFAIL);
1103 kd->kd_name = spa_strdup(dom);
1104 return (kd);
1105}
1106
1107void
1108ksiddomain_rele(ksiddomain_t *ksid)
1109{
1110 spa_strfree(ksid->kd_name);
1111 umem_free(ksid, sizeof (ksiddomain_t));
1112}
428870ff 1113
428870ff 1114char *
00b46022 1115kmem_vasprintf(const char *fmt, va_list adx)
428870ff 1116{
00b46022
BB
1117 char *buf = NULL;
1118 va_list adx_copy;
428870ff 1119
00b46022
BB
1120 va_copy(adx_copy, adx);
1121 VERIFY(vasprintf(&buf, fmt, adx_copy) != -1);
1122 va_end(adx_copy);
428870ff 1123
00b46022
BB
1124 return (buf);
1125}
1126
1127char *
1128kmem_asprintf(const char *fmt, ...)
1129{
1130 char *buf = NULL;
1131 va_list adx;
428870ff
BB
1132
1133 va_start(adx, fmt);
00b46022 1134 VERIFY(vasprintf(&buf, fmt, adx) != -1);
428870ff
BB
1135 va_end(adx);
1136
1137 return (buf);
1138}
572e2857
BB
1139
1140/* ARGSUSED */
1141int
1142zfs_onexit_fd_hold(int fd, minor_t *minorp)
1143{
1144 *minorp = 0;
1145 return (0);
1146}
1147
1148/* ARGSUSED */
1149void
1150zfs_onexit_fd_rele(int fd)
1151{
1152}
1153
1154/* ARGSUSED */
1155int
1156zfs_onexit_add_cb(minor_t minor, void (*func)(void *), void *data,
1157 uint64_t *action_handle)
1158{
1159 return (0);
1160}
1161
1162/* ARGSUSED */
1163int
1164zfs_onexit_del_cb(minor_t minor, uint64_t action_handle, boolean_t fire)
1165{
1166 return (0);
1167}
1168
1169/* ARGSUSED */
1170int
1171zfs_onexit_cb_data(minor_t minor, uint64_t action_handle, void **data)
1172{
1173 return (0);
1174}
92119cc2
BB
1175
1176fstrans_cookie_t
1177spl_fstrans_mark(void)
1178{
02730c33 1179 return ((fstrans_cookie_t)0);
92119cc2
BB
1180}
1181
1182void
1183spl_fstrans_unmark(fstrans_cookie_t cookie)
1184{
1185}
1186
1187int
e624cd19 1188__spl_pf_fstrans_check(void)
92119cc2
BB
1189{
1190 return (0);
1191}
a0bd735a 1192
47dfff3b
MA
1193void *zvol_tag = "zvol_tag";
1194
a0bd735a
BP
1195void
1196zvol_create_minors(spa_t *spa, const char *name, boolean_t async)
1197{
1198}
1199
1200void
1201zvol_remove_minor(spa_t *spa, const char *name, boolean_t async)
1202{
1203}
1204
1205void
1206zvol_remove_minors(spa_t *spa, const char *name, boolean_t async)
1207{
1208}
1209
1210void
1211zvol_rename_minors(spa_t *spa, const char *oldname, const char *newname,
1212 boolean_t async)
1213{
1214}