]> git.proxmox.com Git - mirror_zfs.git/blame - lib/libzpool/kernel.c
Fix stack vdev_cache_read()
[mirror_zfs.git] / lib / libzpool / kernel.c
CommitLineData
34dc7c2f
BB
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
572e2857 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
34dc7c2f
BB
23 */
24
34dc7c2f
BB
25#include <assert.h>
26#include <fcntl.h>
27#include <poll.h>
28#include <stdio.h>
29#include <stdlib.h>
30#include <string.h>
31#include <zlib.h>
1e33ac1e 32#include <sys/signal.h>
34dc7c2f
BB
33#include <sys/spa.h>
34#include <sys/stat.h>
35#include <sys/processor.h>
36#include <sys/zfs_context.h>
34dc7c2f 37#include <sys/utsname.h>
d164b209 38#include <sys/systeminfo.h>
34dc7c2f
BB
39
40/*
41 * Emulation of kernel services in userland.
42 */
43
428870ff 44int aok;
34dc7c2f
BB
45uint64_t physmem;
46vnode_t *rootdir = (vnode_t *)0xabcd1234;
d164b209 47char hw_serial[HW_HOSTID_LEN];
34dc7c2f
BB
48
49struct utsname utsname = {
50 "userland", "libzpool", "1", "1", "na"
51};
52
428870ff
BB
53/* this only exists to have its address taken */
54struct proc p0;
55
34dc7c2f
BB
56/*
57 * =========================================================================
58 * threads
59 * =========================================================================
60 */
1e33ac1e
BB
61
62pthread_cond_t kthread_cond = PTHREAD_COND_INITIALIZER;
63pthread_mutex_t kthread_lock = PTHREAD_MUTEX_INITIALIZER;
64pthread_key_t kthread_key;
65int kthread_nr = 0;
66
67static void
68thread_init(void)
69{
70 kthread_t *kt;
71
72 VERIFY3S(pthread_key_create(&kthread_key, NULL), ==, 0);
73
74 /* Create entry for primary kthread */
75 kt = umem_zalloc(sizeof(kthread_t), UMEM_NOFAIL);
76 kt->t_tid = pthread_self();
77 kt->t_func = NULL;
78
79 VERIFY3S(pthread_setspecific(kthread_key, kt), ==, 0);
80
81 /* Only the main thread should be running at the moment */
82 ASSERT3S(kthread_nr, ==, 0);
83 kthread_nr = 1;
84}
85
86static void
87thread_fini(void)
88{
89 kthread_t *kt = curthread;
90
91 ASSERT(pthread_equal(kt->t_tid, pthread_self()));
92 ASSERT3P(kt->t_func, ==, NULL);
93
94 umem_free(kt, sizeof(kthread_t));
95
96 /* Wait for all threads to exit via thread_exit() */
97 VERIFY3S(pthread_mutex_lock(&kthread_lock), ==, 0);
98
99 kthread_nr--; /* Main thread is exiting */
100
101 while (kthread_nr > 0)
102 VERIFY3S(pthread_cond_wait(&kthread_cond, &kthread_lock), ==,
103 0);
104
105 ASSERT3S(kthread_nr, ==, 0);
106 VERIFY3S(pthread_mutex_unlock(&kthread_lock), ==, 0);
107
108 VERIFY3S(pthread_key_delete(kthread_key), ==, 0);
109}
110
34dc7c2f 111kthread_t *
1e33ac1e
BB
112zk_thread_current(void)
113{
114 kthread_t *kt = pthread_getspecific(kthread_key);
115
116 ASSERT3P(kt, !=, NULL);
117
118 return kt;
119}
120
121void *
122zk_thread_helper(void *arg)
34dc7c2f 123{
1e33ac1e
BB
124 kthread_t *kt = (kthread_t *) arg;
125
126 VERIFY3S(pthread_setspecific(kthread_key, kt), ==, 0);
34dc7c2f 127
1e33ac1e
BB
128 VERIFY3S(pthread_mutex_lock(&kthread_lock), ==, 0);
129 kthread_nr++;
130 VERIFY3S(pthread_mutex_unlock(&kthread_lock), ==, 0);
34dc7c2f 131
1e33ac1e
BB
132 kt->t_tid = pthread_self();
133 ((thread_func_arg_t) kt->t_func)(kt->t_arg);
134
135 /* Unreachable, thread must exit with thread_exit() */
136 abort();
137
138 return NULL;
139}
140
141kthread_t *
142zk_thread_create(caddr_t stk, size_t stksize, thread_func_t func, void *arg,
143 size_t len, proc_t *pp, int state, pri_t pri)
144{
145 kthread_t *kt;
146 pthread_attr_t attr;
147 size_t stack;
148
149 ASSERT3S(state & ~TS_RUN, ==, 0);
150
151 kt = umem_zalloc(sizeof(kthread_t), UMEM_NOFAIL);
152 kt->t_func = func;
153 kt->t_arg = arg;
154
155 /*
156 * The Solaris kernel stack size is 24k for x86/x86_64.
157 * The Linux kernel stack size is 8k for x86/x86_64.
158 *
159 * We reduce the default stack size in userspace, to ensure
160 * we observe stack overruns in user space as well as in
161 * kernel space. PTHREAD_STACK_MIN is the minimum stack
162 * required for a NULL procedure in user space and is added
163 * in to the stack requirements.
164 *
165 * Some buggy NPTL threading implementations include the
166 * guard area within the stack size allocations. In
167 * this case we allocate an extra page to account for the
168 * guard area since we only have two pages of usable stack
169 * on Linux.
170 */
171
172 stack = PTHREAD_STACK_MIN + MAX(stksize, STACK_SIZE) +
173 EXTRA_GUARD_BYTES;
174
175 VERIFY3S(pthread_attr_init(&attr), ==, 0);
176 VERIFY3S(pthread_attr_setstacksize(&attr, stack), ==, 0);
177 VERIFY3S(pthread_attr_setguardsize(&attr, PAGESIZE), ==, 0);
178
179 VERIFY3S(pthread_create(&kt->t_tid, &attr, &zk_thread_helper, kt),
180 ==, 0);
181
182 VERIFY3S(pthread_attr_destroy(&attr), ==, 0);
183
184 return kt;
185}
186
187void
188zk_thread_exit(void)
189{
190 kthread_t *kt = curthread;
191
192 ASSERT(pthread_equal(kt->t_tid, pthread_self()));
193
194 umem_free(kt, sizeof(kthread_t));
195
196 pthread_mutex_lock(&kthread_lock);
197 kthread_nr--;
198 pthread_mutex_unlock(&kthread_lock);
199
200 pthread_cond_broadcast(&kthread_cond);
201 pthread_exit((void *)TS_MAGIC);
202}
203
204void
205zk_thread_join(kt_did_t tid)
206{
207 void *ret;
208
209 pthread_join((pthread_t)tid, &ret);
210 VERIFY3P(ret, ==, (void *)TS_MAGIC);
34dc7c2f
BB
211}
212
213/*
214 * =========================================================================
215 * kstats
216 * =========================================================================
217 */
218/*ARGSUSED*/
219kstat_t *
220kstat_create(char *module, int instance, char *name, char *class,
221 uchar_t type, ulong_t ndata, uchar_t ks_flag)
222{
223 return (NULL);
224}
225
226/*ARGSUSED*/
227void
228kstat_install(kstat_t *ksp)
229{}
230
231/*ARGSUSED*/
232void
233kstat_delete(kstat_t *ksp)
234{}
235
236/*
237 * =========================================================================
238 * mutexes
239 * =========================================================================
240 */
1e33ac1e 241
34dc7c2f 242void
1e33ac1e 243mutex_init(kmutex_t *mp, char *name, int type, void *cookie)
34dc7c2f 244{
1e33ac1e
BB
245 ASSERT3S(type, ==, MUTEX_DEFAULT);
246 ASSERT3P(cookie, ==, NULL);
247 mp->m_owner = MTX_INIT;
248 mp->m_magic = MTX_MAGIC;
249 VERIFY3S(pthread_mutex_init(&mp->m_lock, NULL), ==, 0);
34dc7c2f
BB
250}
251
252void
1e33ac1e 253mutex_destroy(kmutex_t *mp)
34dc7c2f 254{
1e33ac1e
BB
255 ASSERT3U(mp->m_magic, ==, MTX_MAGIC);
256 ASSERT3P(mp->m_owner, ==, MTX_INIT);
257 VERIFY3S(pthread_mutex_destroy(&(mp)->m_lock), ==, 0);
258 mp->m_owner = MTX_DEST;
259 mp->m_magic = 0;
34dc7c2f
BB
260}
261
262void
263mutex_enter(kmutex_t *mp)
264{
1e33ac1e
BB
265 ASSERT3U(mp->m_magic, ==, MTX_MAGIC);
266 ASSERT3P(mp->m_owner, !=, MTX_DEST);
267 ASSERT3P(mp->m_owner, !=, curthread);
268 VERIFY3S(pthread_mutex_lock(&mp->m_lock), ==, 0);
269 ASSERT3P(mp->m_owner, ==, MTX_INIT);
34dc7c2f
BB
270 mp->m_owner = curthread;
271}
272
273int
274mutex_tryenter(kmutex_t *mp)
275{
1e33ac1e
BB
276 ASSERT3U(mp->m_magic, ==, MTX_MAGIC);
277 ASSERT3P(mp->m_owner, !=, MTX_DEST);
278 if (0 == pthread_mutex_trylock(&mp->m_lock)) {
279 ASSERT3P(mp->m_owner, ==, MTX_INIT);
34dc7c2f
BB
280 mp->m_owner = curthread;
281 return (1);
282 } else {
283 return (0);
284 }
285}
286
287void
288mutex_exit(kmutex_t *mp)
289{
1e33ac1e
BB
290 ASSERT3U(mp->m_magic, ==, MTX_MAGIC);
291 ASSERT3P(mutex_owner(mp), ==, curthread);
292 mp->m_owner = MTX_INIT;
293 VERIFY3S(pthread_mutex_unlock(&mp->m_lock), ==, 0);
34dc7c2f
BB
294}
295
296void *
297mutex_owner(kmutex_t *mp)
298{
1e33ac1e 299 ASSERT3U(mp->m_magic, ==, MTX_MAGIC);
34dc7c2f
BB
300 return (mp->m_owner);
301}
302
1e33ac1e
BB
303int
304mutex_held(kmutex_t *mp)
305{
306 return (mp->m_owner == curthread);
307}
308
34dc7c2f
BB
309/*
310 * =========================================================================
311 * rwlocks
312 * =========================================================================
313 */
1e33ac1e 314
34dc7c2f
BB
315void
316rw_init(krwlock_t *rwlp, char *name, int type, void *arg)
317{
1e33ac1e
BB
318 ASSERT3S(type, ==, RW_DEFAULT);
319 ASSERT3P(arg, ==, NULL);
320 VERIFY3S(pthread_rwlock_init(&rwlp->rw_lock, NULL), ==, 0);
321 rwlp->rw_owner = RW_INIT;
322 rwlp->rw_wr_owner = RW_INIT;
323 rwlp->rw_readers = 0;
324 rwlp->rw_magic = RW_MAGIC;
34dc7c2f
BB
325}
326
327void
328rw_destroy(krwlock_t *rwlp)
329{
1e33ac1e
BB
330 ASSERT3U(rwlp->rw_magic, ==, RW_MAGIC);
331
332 VERIFY3S(pthread_rwlock_destroy(&rwlp->rw_lock), ==, 0);
333 rwlp->rw_magic = 0;
34dc7c2f
BB
334}
335
336void
337rw_enter(krwlock_t *rwlp, krw_t rw)
338{
1e33ac1e
BB
339 ASSERT3U(rwlp->rw_magic, ==, RW_MAGIC);
340 ASSERT3P(rwlp->rw_owner, !=, curthread);
341 ASSERT3P(rwlp->rw_wr_owner, !=, curthread);
34dc7c2f 342
1e33ac1e
BB
343 if (rw == RW_READER) {
344 VERIFY3S(pthread_rwlock_rdlock(&rwlp->rw_lock), ==, 0);
345 ASSERT3P(rwlp->rw_wr_owner, ==, RW_INIT);
346
347 atomic_inc_uint(&rwlp->rw_readers);
348 } else {
349 VERIFY3S(pthread_rwlock_wrlock(&rwlp->rw_lock), ==, 0);
350 ASSERT3P(rwlp->rw_wr_owner, ==, RW_INIT);
351 ASSERT3U(rwlp->rw_readers, ==, 0);
352
353 rwlp->rw_wr_owner = curthread;
354 }
34dc7c2f
BB
355
356 rwlp->rw_owner = curthread;
357}
358
359void
360rw_exit(krwlock_t *rwlp)
361{
1e33ac1e
BB
362 ASSERT3U(rwlp->rw_magic, ==, RW_MAGIC);
363 ASSERT(RW_LOCK_HELD(rwlp));
364
365 if (RW_READ_HELD(rwlp))
366 atomic_dec_uint(&rwlp->rw_readers);
367 else
368 rwlp->rw_wr_owner = RW_INIT;
34dc7c2f 369
1e33ac1e
BB
370 rwlp->rw_owner = RW_INIT;
371 VERIFY3S(pthread_rwlock_unlock(&rwlp->rw_lock), ==, 0);
34dc7c2f
BB
372}
373
374int
375rw_tryenter(krwlock_t *rwlp, krw_t rw)
376{
377 int rv;
378
1e33ac1e 379 ASSERT3U(rwlp->rw_magic, ==, RW_MAGIC);
34dc7c2f
BB
380
381 if (rw == RW_READER)
1e33ac1e 382 rv = pthread_rwlock_tryrdlock(&rwlp->rw_lock);
34dc7c2f 383 else
1e33ac1e 384 rv = pthread_rwlock_trywrlock(&rwlp->rw_lock);
34dc7c2f
BB
385
386 if (rv == 0) {
1e33ac1e
BB
387 ASSERT3P(rwlp->rw_wr_owner, ==, RW_INIT);
388
389 if (rw == RW_READER)
390 atomic_inc_uint(&rwlp->rw_readers);
391 else {
392 ASSERT3U(rwlp->rw_readers, ==, 0);
393 rwlp->rw_wr_owner = curthread;
394 }
395
34dc7c2f
BB
396 rwlp->rw_owner = curthread;
397 return (1);
398 }
399
1e33ac1e
BB
400 VERIFY3S(rv, ==, EBUSY);
401
34dc7c2f
BB
402 return (0);
403}
404
34dc7c2f
BB
405int
406rw_tryupgrade(krwlock_t *rwlp)
407{
1e33ac1e 408 ASSERT3U(rwlp->rw_magic, ==, RW_MAGIC);
34dc7c2f
BB
409
410 return (0);
411}
412
413/*
414 * =========================================================================
415 * condition variables
416 * =========================================================================
417 */
1e33ac1e 418
34dc7c2f
BB
419void
420cv_init(kcondvar_t *cv, char *name, int type, void *arg)
421{
1e33ac1e
BB
422 ASSERT3S(type, ==, CV_DEFAULT);
423 cv->cv_magic = CV_MAGIC;
424 VERIFY3S(pthread_cond_init(&cv->cv, NULL), ==, 0);
34dc7c2f
BB
425}
426
427void
428cv_destroy(kcondvar_t *cv)
429{
1e33ac1e
BB
430 ASSERT3U(cv->cv_magic, ==, CV_MAGIC);
431 VERIFY3S(pthread_cond_destroy(&cv->cv), ==, 0);
432 cv->cv_magic = 0;
34dc7c2f
BB
433}
434
435void
436cv_wait(kcondvar_t *cv, kmutex_t *mp)
437{
1e33ac1e
BB
438 ASSERT3U(cv->cv_magic, ==, CV_MAGIC);
439 ASSERT3P(mutex_owner(mp), ==, curthread);
440 mp->m_owner = MTX_INIT;
441 int ret = pthread_cond_wait(&cv->cv, &mp->m_lock);
442 if (ret != 0)
443 VERIFY3S(ret, ==, EINTR);
34dc7c2f
BB
444 mp->m_owner = curthread;
445}
446
447clock_t
448cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime)
449{
450 int error;
1e33ac1e 451 struct timeval tv;
34dc7c2f
BB
452 timestruc_t ts;
453 clock_t delta;
454
1e33ac1e
BB
455 ASSERT3U(cv->cv_magic, ==, CV_MAGIC);
456
34dc7c2f 457top:
428870ff 458 delta = abstime - ddi_get_lbolt();
34dc7c2f
BB
459 if (delta <= 0)
460 return (-1);
461
1e33ac1e
BB
462 VERIFY(gettimeofday(&tv, NULL) == 0);
463
464 ts.tv_sec = tv.tv_sec + delta / hz;
465 ts.tv_nsec = tv.tv_usec * 1000 + (delta % hz) * (NANOSEC / hz);
466 if (ts.tv_nsec >= NANOSEC) {
467 ts.tv_sec++;
468 ts.tv_nsec -= NANOSEC;
469 }
34dc7c2f 470
1e33ac1e
BB
471 ASSERT3P(mutex_owner(mp), ==, curthread);
472 mp->m_owner = MTX_INIT;
473 error = pthread_cond_timedwait(&cv->cv, &mp->m_lock, &ts);
34dc7c2f
BB
474 mp->m_owner = curthread;
475
1e33ac1e 476 if (error == ETIMEDOUT)
34dc7c2f
BB
477 return (-1);
478
479 if (error == EINTR)
480 goto top;
481
1e33ac1e 482 VERIFY3S(error, ==, 0);
34dc7c2f
BB
483
484 return (1);
485}
486
487void
488cv_signal(kcondvar_t *cv)
489{
1e33ac1e
BB
490 ASSERT3U(cv->cv_magic, ==, CV_MAGIC);
491 VERIFY3S(pthread_cond_signal(&cv->cv), ==, 0);
34dc7c2f
BB
492}
493
494void
495cv_broadcast(kcondvar_t *cv)
496{
1e33ac1e
BB
497 ASSERT3U(cv->cv_magic, ==, CV_MAGIC);
498 VERIFY3S(pthread_cond_broadcast(&cv->cv), ==, 0);
34dc7c2f
BB
499}
500
501/*
502 * =========================================================================
503 * vnode operations
504 * =========================================================================
505 */
506/*
507 * Note: for the xxxat() versions of these functions, we assume that the
508 * starting vp is always rootdir (which is true for spa_directory.c, the only
509 * ZFS consumer of these interfaces). We assert this is true, and then emulate
510 * them by adding '/' in front of the path.
511 */
512
513/*ARGSUSED*/
514int
515vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3)
516{
517 int fd;
518 vnode_t *vp;
519 int old_umask;
520 char realpath[MAXPATHLEN];
521 struct stat64 st;
4d58b69d 522 int err;
34dc7c2f
BB
523
524 /*
525 * If we're accessing a real disk from userland, we need to use
526 * the character interface to avoid caching. This is particularly
527 * important if we're trying to look at a real in-kernel storage
528 * pool from userland, e.g. via zdb, because otherwise we won't
529 * see the changes occurring under the segmap cache.
530 * On the other hand, the stupid character device returns zero
531 * for its size. So -- gag -- we open the block device to get
532 * its size, and remember it for subsequent VOP_GETATTR().
533 */
534 if (strncmp(path, "/dev/", 5) == 0) {
535 char *dsk;
536 fd = open64(path, O_RDONLY);
537 if (fd == -1)
538 return (errno);
539 if (fstat64(fd, &st) == -1) {
540 close(fd);
541 return (errno);
542 }
543 close(fd);
544 (void) sprintf(realpath, "%s", path);
545 dsk = strstr(path, "/dsk/");
546 if (dsk != NULL)
547 (void) sprintf(realpath + (dsk - path) + 1, "r%s",
548 dsk + 1);
549 } else {
550 (void) sprintf(realpath, "%s", path);
551 if (!(flags & FCREAT) && stat64(realpath, &st) == -1)
552 return (errno);
553 }
554
555 if (flags & FCREAT)
556 old_umask = umask(0);
557
558 /*
559 * The construct 'flags - FREAD' conveniently maps combinations of
560 * FREAD and FWRITE to the corresponding O_RDONLY, O_WRONLY, and O_RDWR.
561 */
562 fd = open64(realpath, flags - FREAD, mode);
563
564 if (flags & FCREAT)
565 (void) umask(old_umask);
566
567 if (fd == -1)
568 return (errno);
569
570 if (fstat64(fd, &st) == -1) {
4d58b69d 571 err = errno;
34dc7c2f 572 close(fd);
4d58b69d 573 return (err);
34dc7c2f
BB
574 }
575
576 (void) fcntl(fd, F_SETFD, FD_CLOEXEC);
577
578 *vpp = vp = umem_zalloc(sizeof (vnode_t), UMEM_NOFAIL);
579
580 vp->v_fd = fd;
581 vp->v_size = st.st_size;
582 vp->v_path = spa_strdup(path);
583
584 return (0);
585}
586
587/*ARGSUSED*/
588int
589vn_openat(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2,
590 int x3, vnode_t *startvp, int fd)
591{
592 char *realpath = umem_alloc(strlen(path) + 2, UMEM_NOFAIL);
593 int ret;
594
595 ASSERT(startvp == rootdir);
596 (void) sprintf(realpath, "/%s", path);
597
598 /* fd ignored for now, need if want to simulate nbmand support */
599 ret = vn_open(realpath, x1, flags, mode, vpp, x2, x3);
600
601 umem_free(realpath, strlen(path) + 2);
602
603 return (ret);
604}
605
606/*ARGSUSED*/
607int
608vn_rdwr(int uio, vnode_t *vp, void *addr, ssize_t len, offset_t offset,
609 int x1, int x2, rlim64_t x3, void *x4, ssize_t *residp)
610{
4d58b69d 611 ssize_t rc, done = 0, split;
34dc7c2f
BB
612
613 if (uio == UIO_READ) {
4d58b69d 614 rc = pread64(vp->v_fd, addr, len, offset);
34dc7c2f
BB
615 } else {
616 /*
617 * To simulate partial disk writes, we split writes into two
618 * system calls so that the process can be killed in between.
619 */
620 split = (len > 0 ? rand() % len : 0);
4d58b69d
RC
621 rc = pwrite64(vp->v_fd, addr, split, offset);
622 if (rc != -1) {
623 done = rc;
624 rc = pwrite64(vp->v_fd, (char *)addr + split,
625 len - split, offset + split);
626 }
34dc7c2f
BB
627 }
628
4d58b69d 629 if (rc == -1)
34dc7c2f 630 return (errno);
4d58b69d
RC
631
632 done += rc;
633
34dc7c2f 634 if (residp)
4d58b69d
RC
635 *residp = len - done;
636 else if (done != len)
34dc7c2f
BB
637 return (EIO);
638 return (0);
639}
640
641void
642vn_close(vnode_t *vp)
643{
644 close(vp->v_fd);
645 spa_strfree(vp->v_path);
646 umem_free(vp, sizeof (vnode_t));
647}
648
428870ff
BB
649/*
650 * At a minimum we need to update the size since vdev_reopen()
651 * will no longer call vn_openat().
652 */
653int
654fop_getattr(vnode_t *vp, vattr_t *vap)
655{
656 struct stat64 st;
657
658 if (fstat64(vp->v_fd, &st) == -1) {
659 close(vp->v_fd);
660 return (errno);
661 }
662
663 vap->va_size = st.st_size;
664 return (0);
665}
666
34dc7c2f
BB
667#ifdef ZFS_DEBUG
668
669/*
670 * =========================================================================
671 * Figure out which debugging statements to print
672 * =========================================================================
673 */
674
675static char *dprintf_string;
676static int dprintf_print_all;
677
678int
679dprintf_find_string(const char *string)
680{
681 char *tmp_str = dprintf_string;
682 int len = strlen(string);
683
684 /*
685 * Find out if this is a string we want to print.
686 * String format: file1.c,function_name1,file2.c,file3.c
687 */
688
689 while (tmp_str != NULL) {
690 if (strncmp(tmp_str, string, len) == 0 &&
691 (tmp_str[len] == ',' || tmp_str[len] == '\0'))
692 return (1);
693 tmp_str = strchr(tmp_str, ',');
694 if (tmp_str != NULL)
695 tmp_str++; /* Get rid of , */
696 }
697 return (0);
698}
699
700void
701dprintf_setup(int *argc, char **argv)
702{
703 int i, j;
704
705 /*
706 * Debugging can be specified two ways: by setting the
707 * environment variable ZFS_DEBUG, or by including a
708 * "debug=..." argument on the command line. The command
709 * line setting overrides the environment variable.
710 */
711
712 for (i = 1; i < *argc; i++) {
713 int len = strlen("debug=");
714 /* First look for a command line argument */
715 if (strncmp("debug=", argv[i], len) == 0) {
716 dprintf_string = argv[i] + len;
717 /* Remove from args */
718 for (j = i; j < *argc; j++)
719 argv[j] = argv[j+1];
720 argv[j] = NULL;
721 (*argc)--;
722 }
723 }
724
725 if (dprintf_string == NULL) {
726 /* Look for ZFS_DEBUG environment variable */
727 dprintf_string = getenv("ZFS_DEBUG");
728 }
729
730 /*
731 * Are we just turning on all debugging?
732 */
733 if (dprintf_find_string("on"))
734 dprintf_print_all = 1;
735}
736
737/*
738 * =========================================================================
739 * debug printfs
740 * =========================================================================
741 */
742void
743__dprintf(const char *file, const char *func, int line, const char *fmt, ...)
744{
745 const char *newfile;
746 va_list adx;
747
748 /*
749 * Get rid of annoying "../common/" prefix to filename.
750 */
751 newfile = strrchr(file, '/');
752 if (newfile != NULL) {
753 newfile = newfile + 1; /* Get rid of leading / */
754 } else {
755 newfile = file;
756 }
757
758 if (dprintf_print_all ||
759 dprintf_find_string(newfile) ||
760 dprintf_find_string(func)) {
761 /* Print out just the function name if requested */
762 flockfile(stdout);
763 if (dprintf_find_string("pid"))
764 (void) printf("%d ", getpid());
765 if (dprintf_find_string("tid"))
1e33ac1e 766 (void) printf("%u ", (uint_t) pthread_self());
34dc7c2f
BB
767 if (dprintf_find_string("cpu"))
768 (void) printf("%u ", getcpuid());
769 if (dprintf_find_string("time"))
770 (void) printf("%llu ", gethrtime());
771 if (dprintf_find_string("long"))
772 (void) printf("%s, line %d: ", newfile, line);
773 (void) printf("%s: ", func);
774 va_start(adx, fmt);
775 (void) vprintf(fmt, adx);
776 va_end(adx);
777 funlockfile(stdout);
778 }
779}
780
781#endif /* ZFS_DEBUG */
782
783/*
784 * =========================================================================
785 * cmn_err() and panic()
786 * =========================================================================
787 */
788static char ce_prefix[CE_IGNORE][10] = { "", "NOTICE: ", "WARNING: ", "" };
789static char ce_suffix[CE_IGNORE][2] = { "", "\n", "\n", "" };
790
791void
792vpanic(const char *fmt, va_list adx)
793{
794 (void) fprintf(stderr, "error: ");
795 (void) vfprintf(stderr, fmt, adx);
796 (void) fprintf(stderr, "\n");
797
798 abort(); /* think of it as a "user-level crash dump" */
799}
800
801void
802panic(const char *fmt, ...)
803{
804 va_list adx;
805
806 va_start(adx, fmt);
807 vpanic(fmt, adx);
808 va_end(adx);
809}
810
811void
812vcmn_err(int ce, const char *fmt, va_list adx)
813{
814 if (ce == CE_PANIC)
815 vpanic(fmt, adx);
816 if (ce != CE_NOTE) { /* suppress noise in userland stress testing */
817 (void) fprintf(stderr, "%s", ce_prefix[ce]);
818 (void) vfprintf(stderr, fmt, adx);
819 (void) fprintf(stderr, "%s", ce_suffix[ce]);
820 }
821}
822
823/*PRINTFLIKE2*/
824void
825cmn_err(int ce, const char *fmt, ...)
826{
827 va_list adx;
828
829 va_start(adx, fmt);
830 vcmn_err(ce, fmt, adx);
831 va_end(adx);
832}
833
834/*
835 * =========================================================================
836 * kobj interfaces
837 * =========================================================================
838 */
839struct _buf *
840kobj_open_file(char *name)
841{
842 struct _buf *file;
843 vnode_t *vp;
844
845 /* set vp as the _fd field of the file */
846 if (vn_openat(name, UIO_SYSSPACE, FREAD, 0, &vp, 0, 0, rootdir,
847 -1) != 0)
848 return ((void *)-1UL);
849
850 file = umem_zalloc(sizeof (struct _buf), UMEM_NOFAIL);
851 file->_fd = (intptr_t)vp;
852 return (file);
853}
854
855int
856kobj_read_file(struct _buf *file, char *buf, unsigned size, unsigned off)
857{
858 ssize_t resid;
859
860 vn_rdwr(UIO_READ, (vnode_t *)file->_fd, buf, size, (offset_t)off,
861 UIO_SYSSPACE, 0, 0, 0, &resid);
862
863 return (size - resid);
864}
865
866void
867kobj_close_file(struct _buf *file)
868{
869 vn_close((vnode_t *)file->_fd);
870 umem_free(file, sizeof (struct _buf));
871}
872
873int
874kobj_get_filesize(struct _buf *file, uint64_t *size)
875{
876 struct stat64 st;
877 vnode_t *vp = (vnode_t *)file->_fd;
878
879 if (fstat64(vp->v_fd, &st) == -1) {
880 vn_close(vp);
881 return (errno);
882 }
883 *size = st.st_size;
884 return (0);
885}
886
887/*
888 * =========================================================================
889 * misc routines
890 * =========================================================================
891 */
892
893void
894delay(clock_t ticks)
895{
896 poll(0, 0, ticks * (1000 / hz));
897}
898
899/*
900 * Find highest one bit set.
901 * Returns bit number + 1 of highest bit that is set, otherwise returns 0.
902 * High order bit is 31 (or 63 in _LP64 kernel).
903 */
904int
905highbit(ulong_t i)
906{
907 register int h = 1;
908
909 if (i == 0)
910 return (0);
911#ifdef _LP64
912 if (i & 0xffffffff00000000ul) {
913 h += 32; i >>= 32;
914 }
915#endif
916 if (i & 0xffff0000) {
917 h += 16; i >>= 16;
918 }
919 if (i & 0xff00) {
920 h += 8; i >>= 8;
921 }
922 if (i & 0xf0) {
923 h += 4; i >>= 4;
924 }
925 if (i & 0xc) {
926 h += 2; i >>= 2;
927 }
928 if (i & 0x2) {
929 h += 1;
930 }
931 return (h);
932}
933
934static int random_fd = -1, urandom_fd = -1;
935
936static int
937random_get_bytes_common(uint8_t *ptr, size_t len, int fd)
938{
939 size_t resid = len;
940 ssize_t bytes;
941
942 ASSERT(fd != -1);
943
944 while (resid != 0) {
945 bytes = read(fd, ptr, resid);
946 ASSERT3S(bytes, >=, 0);
947 ptr += bytes;
948 resid -= bytes;
949 }
950
951 return (0);
952}
953
954int
955random_get_bytes(uint8_t *ptr, size_t len)
956{
957 return (random_get_bytes_common(ptr, len, random_fd));
958}
959
960int
961random_get_pseudo_bytes(uint8_t *ptr, size_t len)
962{
963 return (random_get_bytes_common(ptr, len, urandom_fd));
964}
965
966int
967ddi_strtoul(const char *hw_serial, char **nptr, int base, unsigned long *result)
968{
969 char *end;
970
971 *result = strtoul(hw_serial, &end, base);
972 if (*result == 0)
973 return (errno);
974 return (0);
975}
976
428870ff
BB
977int
978ddi_strtoull(const char *str, char **nptr, int base, u_longlong_t *result)
979{
980 char *end;
981
982 *result = strtoull(str, &end, base);
983 if (*result == 0)
984 return (errno);
985 return (0);
986}
987
34dc7c2f
BB
988/*
989 * =========================================================================
990 * kernel emulation setup & teardown
991 * =========================================================================
992 */
993static int
994umem_out_of_memory(void)
995{
996 char errmsg[] = "out of memory -- generating core dump\n";
997
0e5b68e0 998 (void) fprintf(stderr, "%s", errmsg);
34dc7c2f
BB
999 abort();
1000 return (0);
1001}
1002
1003void
1004kernel_init(int mode)
1005{
1006 umem_nofail_callback(umem_out_of_memory);
1007
1008 physmem = sysconf(_SC_PHYS_PAGES);
1009
1010 dprintf("physmem = %llu pages (%.2f GB)\n", physmem,
1011 (double)physmem * sysconf(_SC_PAGE_SIZE) / (1ULL << 30));
1012
428870ff
BB
1013 (void) snprintf(hw_serial, sizeof (hw_serial), "%ld",
1014 (mode & FWRITE) ? gethostid() : 0);
34dc7c2f
BB
1015
1016 VERIFY((random_fd = open("/dev/random", O_RDONLY)) != -1);
1017 VERIFY((urandom_fd = open("/dev/urandom", O_RDONLY)) != -1);
1018
1e33ac1e 1019 thread_init();
b128c09f
BB
1020 system_taskq_init();
1021
34dc7c2f
BB
1022 spa_init(mode);
1023}
1024
1025void
1026kernel_fini(void)
1027{
1028 spa_fini();
1029
428870ff 1030 system_taskq_fini();
1e33ac1e 1031 thread_fini();
428870ff 1032
34dc7c2f
BB
1033 close(random_fd);
1034 close(urandom_fd);
1035
1036 random_fd = -1;
1037 urandom_fd = -1;
1038}
1039
34dc7c2f
BB
1040uid_t
1041crgetuid(cred_t *cr)
1042{
1043 return (0);
1044}
1045
1046gid_t
1047crgetgid(cred_t *cr)
1048{
1049 return (0);
1050}
1051
1052int
1053crgetngroups(cred_t *cr)
1054{
1055 return (0);
1056}
1057
1058gid_t *
1059crgetgroups(cred_t *cr)
1060{
1061 return (NULL);
1062}
1063
1064int
1065zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
1066{
1067 return (0);
1068}
1069
1070int
1071zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
1072{
1073 return (0);
1074}
1075
1076int
1077zfs_secpolicy_destroy_perms(const char *name, cred_t *cr)
1078{
1079 return (0);
1080}
1081
1082ksiddomain_t *
1083ksid_lookupdomain(const char *dom)
1084{
1085 ksiddomain_t *kd;
1086
1087 kd = umem_zalloc(sizeof (ksiddomain_t), UMEM_NOFAIL);
1088 kd->kd_name = spa_strdup(dom);
1089 return (kd);
1090}
1091
1092void
1093ksiddomain_rele(ksiddomain_t *ksid)
1094{
1095 spa_strfree(ksid->kd_name);
1096 umem_free(ksid, sizeof (ksiddomain_t));
1097}
428870ff
BB
1098
1099/*
1100 * Do not change the length of the returned string; it must be freed
1101 * with strfree().
1102 */
1103char *
1104kmem_asprintf(const char *fmt, ...)
1105{
1106 int size;
1107 va_list adx;
1108 char *buf;
1109
1110 va_start(adx, fmt);
1111 size = vsnprintf(NULL, 0, fmt, adx) + 1;
1112 va_end(adx);
1113
1114 buf = kmem_alloc(size, KM_SLEEP);
1115
1116 va_start(adx, fmt);
1117 size = vsnprintf(buf, size, fmt, adx);
1118 va_end(adx);
1119
1120 return (buf);
1121}
572e2857
BB
1122
1123/* ARGSUSED */
1124int
1125zfs_onexit_fd_hold(int fd, minor_t *minorp)
1126{
1127 *minorp = 0;
1128 return (0);
1129}
1130
1131/* ARGSUSED */
1132void
1133zfs_onexit_fd_rele(int fd)
1134{
1135}
1136
1137/* ARGSUSED */
1138int
1139zfs_onexit_add_cb(minor_t minor, void (*func)(void *), void *data,
1140 uint64_t *action_handle)
1141{
1142 return (0);
1143}
1144
1145/* ARGSUSED */
1146int
1147zfs_onexit_del_cb(minor_t minor, uint64_t action_handle, boolean_t fire)
1148{
1149 return (0);
1150}
1151
1152/* ARGSUSED */
1153int
1154zfs_onexit_cb_data(minor_t minor, uint64_t action_handle, void **data)
1155{
1156 return (0);
1157}