]> git.proxmox.com Git - mirror_zfs-debian.git/blob - lib/libzpool/kernel.c
002276a90db7feb6f7909fd64fdd8b932e8e2e87
[mirror_zfs-debian.git] / lib / libzpool / kernel.c
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25 #include <assert.h>
26 #include <fcntl.h>
27 #include <poll.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <zlib.h>
32 #include <sys/signal.h>
33 #include <sys/spa.h>
34 #include <sys/stat.h>
35 #include <sys/processor.h>
36 #include <sys/zfs_context.h>
37 #include <sys/utsname.h>
38 #include <sys/time.h>
39 #include <sys/systeminfo.h>
40
41 /*
42 * Emulation of kernel services in userland.
43 */
44
45 int aok;
46 uint64_t physmem;
47 vnode_t *rootdir = (vnode_t *)0xabcd1234;
48 char hw_serial[HW_HOSTID_LEN];
49
50 struct utsname utsname = {
51 "userland", "libzpool", "1", "1", "na"
52 };
53
54 /* this only exists to have its address taken */
55 struct proc p0;
56
57 /*
58 * =========================================================================
59 * threads
60 * =========================================================================
61 */
62
63 pthread_cond_t kthread_cond = PTHREAD_COND_INITIALIZER;
64 pthread_mutex_t kthread_lock = PTHREAD_MUTEX_INITIALIZER;
65 pthread_key_t kthread_key;
66 int kthread_nr = 0;
67
68 static void
69 thread_init(void)
70 {
71 kthread_t *kt;
72
73 VERIFY3S(pthread_key_create(&kthread_key, NULL), ==, 0);
74
75 /* Create entry for primary kthread */
76 kt = umem_zalloc(sizeof(kthread_t), UMEM_NOFAIL);
77 kt->t_tid = pthread_self();
78 kt->t_func = NULL;
79
80 VERIFY3S(pthread_setspecific(kthread_key, kt), ==, 0);
81
82 /* Only the main thread should be running at the moment */
83 ASSERT3S(kthread_nr, ==, 0);
84 kthread_nr = 1;
85 }
86
87 static void
88 thread_fini(void)
89 {
90 kthread_t *kt = curthread;
91
92 ASSERT(pthread_equal(kt->t_tid, pthread_self()));
93 ASSERT3P(kt->t_func, ==, NULL);
94
95 umem_free(kt, sizeof(kthread_t));
96
97 /* Wait for all threads to exit via thread_exit() */
98 VERIFY3S(pthread_mutex_lock(&kthread_lock), ==, 0);
99
100 kthread_nr--; /* Main thread is exiting */
101
102 while (kthread_nr > 0)
103 VERIFY3S(pthread_cond_wait(&kthread_cond, &kthread_lock), ==,
104 0);
105
106 ASSERT3S(kthread_nr, ==, 0);
107 VERIFY3S(pthread_mutex_unlock(&kthread_lock), ==, 0);
108
109 VERIFY3S(pthread_key_delete(kthread_key), ==, 0);
110 }
111
112 kthread_t *
113 zk_thread_current(void)
114 {
115 kthread_t *kt = pthread_getspecific(kthread_key);
116
117 ASSERT3P(kt, !=, NULL);
118
119 return kt;
120 }
121
122 void *
123 zk_thread_helper(void *arg)
124 {
125 kthread_t *kt = (kthread_t *) arg;
126
127 VERIFY3S(pthread_setspecific(kthread_key, kt), ==, 0);
128
129 VERIFY3S(pthread_mutex_lock(&kthread_lock), ==, 0);
130 kthread_nr++;
131 VERIFY3S(pthread_mutex_unlock(&kthread_lock), ==, 0);
132
133 kt->t_tid = pthread_self();
134 ((thread_func_arg_t) kt->t_func)(kt->t_arg);
135
136 /* Unreachable, thread must exit with thread_exit() */
137 abort();
138
139 return NULL;
140 }
141
142 kthread_t *
143 zk_thread_create(caddr_t stk, size_t stksize, thread_func_t func, void *arg,
144 size_t len, proc_t *pp, int state, pri_t pri)
145 {
146 kthread_t *kt;
147 pthread_attr_t attr;
148 size_t stack;
149
150 ASSERT3S(state & ~TS_RUN, ==, 0);
151
152 kt = umem_zalloc(sizeof(kthread_t), UMEM_NOFAIL);
153 kt->t_func = func;
154 kt->t_arg = arg;
155
156 /*
157 * The Solaris kernel stack size is 24k for x86/x86_64.
158 * The Linux kernel stack size is 8k for x86/x86_64.
159 *
160 * We reduce the default stack size in userspace, to ensure
161 * we observe stack overruns in user space as well as in
162 * kernel space. PTHREAD_STACK_MIN is the minimum stack
163 * required for a NULL procedure in user space and is added
164 * in to the stack requirements.
165 *
166 * Some buggy NPTL threading implementations include the
167 * guard area within the stack size allocations. In
168 * this case we allocate an extra page to account for the
169 * guard area since we only have two pages of usable stack
170 * on Linux.
171 */
172
173 stack = PTHREAD_STACK_MIN + MAX(stksize, STACK_SIZE) +
174 EXTRA_GUARD_BYTES;
175
176 VERIFY3S(pthread_attr_init(&attr), ==, 0);
177 VERIFY3S(pthread_attr_setstacksize(&attr, stack), ==, 0);
178 VERIFY3S(pthread_attr_setguardsize(&attr, PAGESIZE), ==, 0);
179
180 VERIFY3S(pthread_create(&kt->t_tid, &attr, &zk_thread_helper, kt),
181 ==, 0);
182
183 VERIFY3S(pthread_attr_destroy(&attr), ==, 0);
184
185 return kt;
186 }
187
188 void
189 zk_thread_exit(void)
190 {
191 kthread_t *kt = curthread;
192
193 ASSERT(pthread_equal(kt->t_tid, pthread_self()));
194
195 umem_free(kt, sizeof(kthread_t));
196
197 pthread_mutex_lock(&kthread_lock);
198 kthread_nr--;
199 pthread_mutex_unlock(&kthread_lock);
200
201 pthread_cond_broadcast(&kthread_cond);
202 pthread_exit((void *)TS_MAGIC);
203 }
204
205 void
206 zk_thread_join(kt_did_t tid)
207 {
208 void *ret;
209
210 pthread_join((pthread_t)tid, &ret);
211 VERIFY3P(ret, ==, (void *)TS_MAGIC);
212 }
213
214 /*
215 * =========================================================================
216 * kstats
217 * =========================================================================
218 */
219 /*ARGSUSED*/
220 kstat_t *
221 kstat_create(char *module, int instance, char *name, char *class,
222 uchar_t type, ulong_t ndata, uchar_t ks_flag)
223 {
224 return (NULL);
225 }
226
227 /*ARGSUSED*/
228 void
229 kstat_install(kstat_t *ksp)
230 {}
231
232 /*ARGSUSED*/
233 void
234 kstat_delete(kstat_t *ksp)
235 {}
236
237 /*
238 * =========================================================================
239 * mutexes
240 * =========================================================================
241 */
242
243 void
244 mutex_init(kmutex_t *mp, char *name, int type, void *cookie)
245 {
246 ASSERT3S(type, ==, MUTEX_DEFAULT);
247 ASSERT3P(cookie, ==, NULL);
248 mp->m_owner = MTX_INIT;
249 mp->m_magic = MTX_MAGIC;
250 VERIFY3S(pthread_mutex_init(&mp->m_lock, NULL), ==, 0);
251 }
252
253 void
254 mutex_destroy(kmutex_t *mp)
255 {
256 ASSERT3U(mp->m_magic, ==, MTX_MAGIC);
257 ASSERT3P(mp->m_owner, ==, MTX_INIT);
258 VERIFY3S(pthread_mutex_destroy(&(mp)->m_lock), ==, 0);
259 mp->m_owner = MTX_DEST;
260 mp->m_magic = 0;
261 }
262
263 void
264 mutex_enter(kmutex_t *mp)
265 {
266 ASSERT3U(mp->m_magic, ==, MTX_MAGIC);
267 ASSERT3P(mp->m_owner, !=, MTX_DEST);
268 ASSERT3P(mp->m_owner, !=, curthread);
269 VERIFY3S(pthread_mutex_lock(&mp->m_lock), ==, 0);
270 ASSERT3P(mp->m_owner, ==, MTX_INIT);
271 mp->m_owner = curthread;
272 }
273
274 int
275 mutex_tryenter(kmutex_t *mp)
276 {
277 ASSERT3U(mp->m_magic, ==, MTX_MAGIC);
278 ASSERT3P(mp->m_owner, !=, MTX_DEST);
279 if (0 == pthread_mutex_trylock(&mp->m_lock)) {
280 ASSERT3P(mp->m_owner, ==, MTX_INIT);
281 mp->m_owner = curthread;
282 return (1);
283 } else {
284 return (0);
285 }
286 }
287
288 void
289 mutex_exit(kmutex_t *mp)
290 {
291 ASSERT3U(mp->m_magic, ==, MTX_MAGIC);
292 ASSERT3P(mutex_owner(mp), ==, curthread);
293 mp->m_owner = MTX_INIT;
294 VERIFY3S(pthread_mutex_unlock(&mp->m_lock), ==, 0);
295 }
296
297 void *
298 mutex_owner(kmutex_t *mp)
299 {
300 ASSERT3U(mp->m_magic, ==, MTX_MAGIC);
301 return (mp->m_owner);
302 }
303
304 int
305 mutex_held(kmutex_t *mp)
306 {
307 return (mp->m_owner == curthread);
308 }
309
310 /*
311 * =========================================================================
312 * rwlocks
313 * =========================================================================
314 */
315
316 void
317 rw_init(krwlock_t *rwlp, char *name, int type, void *arg)
318 {
319 ASSERT3S(type, ==, RW_DEFAULT);
320 ASSERT3P(arg, ==, NULL);
321 VERIFY3S(pthread_rwlock_init(&rwlp->rw_lock, NULL), ==, 0);
322 rwlp->rw_owner = RW_INIT;
323 rwlp->rw_wr_owner = RW_INIT;
324 rwlp->rw_readers = 0;
325 rwlp->rw_magic = RW_MAGIC;
326 }
327
328 void
329 rw_destroy(krwlock_t *rwlp)
330 {
331 ASSERT3U(rwlp->rw_magic, ==, RW_MAGIC);
332
333 VERIFY3S(pthread_rwlock_destroy(&rwlp->rw_lock), ==, 0);
334 rwlp->rw_magic = 0;
335 }
336
337 void
338 rw_enter(krwlock_t *rwlp, krw_t rw)
339 {
340 ASSERT3U(rwlp->rw_magic, ==, RW_MAGIC);
341 ASSERT3P(rwlp->rw_owner, !=, curthread);
342 ASSERT3P(rwlp->rw_wr_owner, !=, curthread);
343
344 if (rw == RW_READER) {
345 VERIFY3S(pthread_rwlock_rdlock(&rwlp->rw_lock), ==, 0);
346 ASSERT3P(rwlp->rw_wr_owner, ==, RW_INIT);
347
348 atomic_inc_uint(&rwlp->rw_readers);
349 } else {
350 VERIFY3S(pthread_rwlock_wrlock(&rwlp->rw_lock), ==, 0);
351 ASSERT3P(rwlp->rw_wr_owner, ==, RW_INIT);
352 ASSERT3U(rwlp->rw_readers, ==, 0);
353
354 rwlp->rw_wr_owner = curthread;
355 }
356
357 rwlp->rw_owner = curthread;
358 }
359
360 void
361 rw_exit(krwlock_t *rwlp)
362 {
363 ASSERT3U(rwlp->rw_magic, ==, RW_MAGIC);
364 ASSERT(RW_LOCK_HELD(rwlp));
365
366 if (RW_READ_HELD(rwlp))
367 atomic_dec_uint(&rwlp->rw_readers);
368 else
369 rwlp->rw_wr_owner = RW_INIT;
370
371 rwlp->rw_owner = RW_INIT;
372 VERIFY3S(pthread_rwlock_unlock(&rwlp->rw_lock), ==, 0);
373 }
374
375 int
376 rw_tryenter(krwlock_t *rwlp, krw_t rw)
377 {
378 int rv;
379
380 ASSERT3U(rwlp->rw_magic, ==, RW_MAGIC);
381
382 if (rw == RW_READER)
383 rv = pthread_rwlock_tryrdlock(&rwlp->rw_lock);
384 else
385 rv = pthread_rwlock_trywrlock(&rwlp->rw_lock);
386
387 if (rv == 0) {
388 ASSERT3P(rwlp->rw_wr_owner, ==, RW_INIT);
389
390 if (rw == RW_READER)
391 atomic_inc_uint(&rwlp->rw_readers);
392 else {
393 ASSERT3U(rwlp->rw_readers, ==, 0);
394 rwlp->rw_wr_owner = curthread;
395 }
396
397 rwlp->rw_owner = curthread;
398 return (1);
399 }
400
401 VERIFY3S(rv, ==, EBUSY);
402
403 return (0);
404 }
405
406 int
407 rw_tryupgrade(krwlock_t *rwlp)
408 {
409 ASSERT3U(rwlp->rw_magic, ==, RW_MAGIC);
410
411 return (0);
412 }
413
414 /*
415 * =========================================================================
416 * condition variables
417 * =========================================================================
418 */
419
420 void
421 cv_init(kcondvar_t *cv, char *name, int type, void *arg)
422 {
423 ASSERT3S(type, ==, CV_DEFAULT);
424 cv->cv_magic = CV_MAGIC;
425 VERIFY3S(pthread_cond_init(&cv->cv, NULL), ==, 0);
426 }
427
428 void
429 cv_destroy(kcondvar_t *cv)
430 {
431 ASSERT3U(cv->cv_magic, ==, CV_MAGIC);
432 VERIFY3S(pthread_cond_destroy(&cv->cv), ==, 0);
433 cv->cv_magic = 0;
434 }
435
436 void
437 cv_wait(kcondvar_t *cv, kmutex_t *mp)
438 {
439 ASSERT3U(cv->cv_magic, ==, CV_MAGIC);
440 ASSERT3P(mutex_owner(mp), ==, curthread);
441 mp->m_owner = MTX_INIT;
442 int ret = pthread_cond_wait(&cv->cv, &mp->m_lock);
443 if (ret != 0)
444 VERIFY3S(ret, ==, EINTR);
445 mp->m_owner = curthread;
446 }
447
448 clock_t
449 cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime)
450 {
451 int error;
452 struct timeval tv;
453 timestruc_t ts;
454 clock_t delta;
455
456 ASSERT3U(cv->cv_magic, ==, CV_MAGIC);
457
458 top:
459 delta = abstime - ddi_get_lbolt();
460 if (delta <= 0)
461 return (-1);
462
463 VERIFY(gettimeofday(&tv, NULL) == 0);
464
465 ts.tv_sec = tv.tv_sec + delta / hz;
466 ts.tv_nsec = tv.tv_usec * 1000 + (delta % hz) * (NANOSEC / hz);
467 if (ts.tv_nsec >= NANOSEC) {
468 ts.tv_sec++;
469 ts.tv_nsec -= NANOSEC;
470 }
471
472 ASSERT3P(mutex_owner(mp), ==, curthread);
473 mp->m_owner = MTX_INIT;
474 error = pthread_cond_timedwait(&cv->cv, &mp->m_lock, &ts);
475 mp->m_owner = curthread;
476
477 if (error == ETIMEDOUT)
478 return (-1);
479
480 if (error == EINTR)
481 goto top;
482
483 VERIFY3S(error, ==, 0);
484
485 return (1);
486 }
487
488 void
489 cv_signal(kcondvar_t *cv)
490 {
491 ASSERT3U(cv->cv_magic, ==, CV_MAGIC);
492 VERIFY3S(pthread_cond_signal(&cv->cv), ==, 0);
493 }
494
495 void
496 cv_broadcast(kcondvar_t *cv)
497 {
498 ASSERT3U(cv->cv_magic, ==, CV_MAGIC);
499 VERIFY3S(pthread_cond_broadcast(&cv->cv), ==, 0);
500 }
501
502 /*
503 * =========================================================================
504 * vnode operations
505 * =========================================================================
506 */
507 /*
508 * Note: for the xxxat() versions of these functions, we assume that the
509 * starting vp is always rootdir (which is true for spa_directory.c, the only
510 * ZFS consumer of these interfaces). We assert this is true, and then emulate
511 * them by adding '/' in front of the path.
512 */
513
514 /*ARGSUSED*/
515 int
516 vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3)
517 {
518 int fd;
519 vnode_t *vp;
520 int old_umask;
521 char *realpath;
522 struct stat64 st;
523 int err;
524
525 realpath = umem_alloc(MAXPATHLEN, UMEM_NOFAIL);
526
527 /*
528 * If we're accessing a real disk from userland, we need to use
529 * the character interface to avoid caching. This is particularly
530 * important if we're trying to look at a real in-kernel storage
531 * pool from userland, e.g. via zdb, because otherwise we won't
532 * see the changes occurring under the segmap cache.
533 * On the other hand, the stupid character device returns zero
534 * for its size. So -- gag -- we open the block device to get
535 * its size, and remember it for subsequent VOP_GETATTR().
536 */
537 #if defined(__sun__) || defined(__sun)
538 if (strncmp(path, "/dev/", 5) == 0) {
539 #else
540 if (0) {
541 #endif
542 char *dsk;
543 fd = open64(path, O_RDONLY);
544 if (fd == -1) {
545 err = errno;
546 free(realpath);
547 return (err);
548 }
549 if (fstat64(fd, &st) == -1) {
550 err = errno;
551 close(fd);
552 free(realpath);
553 return (err);
554 }
555 close(fd);
556 (void) sprintf(realpath, "%s", path);
557 dsk = strstr(path, "/dsk/");
558 if (dsk != NULL)
559 (void) sprintf(realpath + (dsk - path) + 1, "r%s",
560 dsk + 1);
561 } else {
562 (void) sprintf(realpath, "%s", path);
563 if (!(flags & FCREAT) && stat64(realpath, &st) == -1) {
564 err = errno;
565 free(realpath);
566 return (err);
567 }
568 }
569
570 if (!(flags & FCREAT) && S_ISBLK(st.st_mode)) {
571 #ifdef __linux__
572 flags |= O_DIRECT;
573 #endif
574 /* We shouldn't be writing to block devices in userspace */
575 VERIFY(!(flags & FWRITE));
576 }
577
578 if (flags & FCREAT)
579 old_umask = umask(0);
580
581 /*
582 * The construct 'flags - FREAD' conveniently maps combinations of
583 * FREAD and FWRITE to the corresponding O_RDONLY, O_WRONLY, and O_RDWR.
584 */
585 fd = open64(realpath, flags - FREAD, mode);
586 free(realpath);
587
588 if (flags & FCREAT)
589 (void) umask(old_umask);
590
591 if (fd == -1)
592 return (errno);
593
594 if (fstat64_blk(fd, &st) == -1) {
595 err = errno;
596 close(fd);
597 return (err);
598 }
599
600 (void) fcntl(fd, F_SETFD, FD_CLOEXEC);
601
602 *vpp = vp = umem_zalloc(sizeof (vnode_t), UMEM_NOFAIL);
603
604 vp->v_fd = fd;
605 vp->v_size = st.st_size;
606 vp->v_path = spa_strdup(path);
607
608 return (0);
609 }
610
611 /*ARGSUSED*/
612 int
613 vn_openat(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2,
614 int x3, vnode_t *startvp, int fd)
615 {
616 char *realpath = umem_alloc(strlen(path) + 2, UMEM_NOFAIL);
617 int ret;
618
619 ASSERT(startvp == rootdir);
620 (void) sprintf(realpath, "/%s", path);
621
622 /* fd ignored for now, need if want to simulate nbmand support */
623 ret = vn_open(realpath, x1, flags, mode, vpp, x2, x3);
624
625 umem_free(realpath, strlen(path) + 2);
626
627 return (ret);
628 }
629
630 /*ARGSUSED*/
631 int
632 vn_rdwr(int uio, vnode_t *vp, void *addr, ssize_t len, offset_t offset,
633 int x1, int x2, rlim64_t x3, void *x4, ssize_t *residp)
634 {
635 ssize_t rc, done = 0, split;
636
637 if (uio == UIO_READ) {
638 rc = pread64(vp->v_fd, addr, len, offset);
639 } else {
640 /*
641 * To simulate partial disk writes, we split writes into two
642 * system calls so that the process can be killed in between.
643 */
644 split = (len > 0 ? rand() % len : 0);
645 rc = pwrite64(vp->v_fd, addr, split, offset);
646 if (rc != -1) {
647 done = rc;
648 rc = pwrite64(vp->v_fd, (char *)addr + split,
649 len - split, offset + split);
650 }
651 }
652
653 #ifdef __linux__
654 if (rc == -1 && errno == EINVAL) {
655 /*
656 * Under Linux, this most likely means an alignment issue
657 * (memory or disk) due to O_DIRECT, so we abort() in order to
658 * catch the offender.
659 */
660 abort();
661 }
662 #endif
663 if (rc == -1)
664 return (errno);
665
666 done += rc;
667
668 if (residp)
669 *residp = len - done;
670 else if (done != len)
671 return (EIO);
672 return (0);
673 }
674
675 void
676 vn_close(vnode_t *vp)
677 {
678 close(vp->v_fd);
679 spa_strfree(vp->v_path);
680 umem_free(vp, sizeof (vnode_t));
681 }
682
683 /*
684 * At a minimum we need to update the size since vdev_reopen()
685 * will no longer call vn_openat().
686 */
687 int
688 fop_getattr(vnode_t *vp, vattr_t *vap)
689 {
690 struct stat64 st;
691 int err;
692
693 if (fstat64_blk(vp->v_fd, &st) == -1) {
694 err = errno;
695 close(vp->v_fd);
696 return (err);
697 }
698
699 vap->va_size = st.st_size;
700 return (0);
701 }
702
703 #ifdef ZFS_DEBUG
704
705 /*
706 * =========================================================================
707 * Figure out which debugging statements to print
708 * =========================================================================
709 */
710
711 static char *dprintf_string;
712 static int dprintf_print_all;
713
714 int
715 dprintf_find_string(const char *string)
716 {
717 char *tmp_str = dprintf_string;
718 int len = strlen(string);
719
720 /*
721 * Find out if this is a string we want to print.
722 * String format: file1.c,function_name1,file2.c,file3.c
723 */
724
725 while (tmp_str != NULL) {
726 if (strncmp(tmp_str, string, len) == 0 &&
727 (tmp_str[len] == ',' || tmp_str[len] == '\0'))
728 return (1);
729 tmp_str = strchr(tmp_str, ',');
730 if (tmp_str != NULL)
731 tmp_str++; /* Get rid of , */
732 }
733 return (0);
734 }
735
736 void
737 dprintf_setup(int *argc, char **argv)
738 {
739 int i, j;
740
741 /*
742 * Debugging can be specified two ways: by setting the
743 * environment variable ZFS_DEBUG, or by including a
744 * "debug=..." argument on the command line. The command
745 * line setting overrides the environment variable.
746 */
747
748 for (i = 1; i < *argc; i++) {
749 int len = strlen("debug=");
750 /* First look for a command line argument */
751 if (strncmp("debug=", argv[i], len) == 0) {
752 dprintf_string = argv[i] + len;
753 /* Remove from args */
754 for (j = i; j < *argc; j++)
755 argv[j] = argv[j+1];
756 argv[j] = NULL;
757 (*argc)--;
758 }
759 }
760
761 if (dprintf_string == NULL) {
762 /* Look for ZFS_DEBUG environment variable */
763 dprintf_string = getenv("ZFS_DEBUG");
764 }
765
766 /*
767 * Are we just turning on all debugging?
768 */
769 if (dprintf_find_string("on"))
770 dprintf_print_all = 1;
771 }
772
773 /*
774 * =========================================================================
775 * debug printfs
776 * =========================================================================
777 */
778 void
779 __dprintf(const char *file, const char *func, int line, const char *fmt, ...)
780 {
781 const char *newfile;
782 va_list adx;
783
784 /*
785 * Get rid of annoying "../common/" prefix to filename.
786 */
787 newfile = strrchr(file, '/');
788 if (newfile != NULL) {
789 newfile = newfile + 1; /* Get rid of leading / */
790 } else {
791 newfile = file;
792 }
793
794 if (dprintf_print_all ||
795 dprintf_find_string(newfile) ||
796 dprintf_find_string(func)) {
797 /* Print out just the function name if requested */
798 flockfile(stdout);
799 if (dprintf_find_string("pid"))
800 (void) printf("%d ", getpid());
801 if (dprintf_find_string("tid"))
802 (void) printf("%u ", (uint_t) pthread_self());
803 if (dprintf_find_string("cpu"))
804 (void) printf("%u ", getcpuid());
805 if (dprintf_find_string("time"))
806 (void) printf("%llu ", gethrtime());
807 if (dprintf_find_string("long"))
808 (void) printf("%s, line %d: ", newfile, line);
809 (void) printf("%s: ", func);
810 va_start(adx, fmt);
811 (void) vprintf(fmt, adx);
812 va_end(adx);
813 funlockfile(stdout);
814 }
815 }
816
817 #endif /* ZFS_DEBUG */
818
819 /*
820 * =========================================================================
821 * cmn_err() and panic()
822 * =========================================================================
823 */
824 static char ce_prefix[CE_IGNORE][10] = { "", "NOTICE: ", "WARNING: ", "" };
825 static char ce_suffix[CE_IGNORE][2] = { "", "\n", "\n", "" };
826
827 void
828 vpanic(const char *fmt, va_list adx)
829 {
830 (void) fprintf(stderr, "error: ");
831 (void) vfprintf(stderr, fmt, adx);
832 (void) fprintf(stderr, "\n");
833
834 abort(); /* think of it as a "user-level crash dump" */
835 }
836
837 void
838 panic(const char *fmt, ...)
839 {
840 va_list adx;
841
842 va_start(adx, fmt);
843 vpanic(fmt, adx);
844 va_end(adx);
845 }
846
847 void
848 vcmn_err(int ce, const char *fmt, va_list adx)
849 {
850 if (ce == CE_PANIC)
851 vpanic(fmt, adx);
852 if (ce != CE_NOTE) { /* suppress noise in userland stress testing */
853 (void) fprintf(stderr, "%s", ce_prefix[ce]);
854 (void) vfprintf(stderr, fmt, adx);
855 (void) fprintf(stderr, "%s", ce_suffix[ce]);
856 }
857 }
858
859 /*PRINTFLIKE2*/
860 void
861 cmn_err(int ce, const char *fmt, ...)
862 {
863 va_list adx;
864
865 va_start(adx, fmt);
866 vcmn_err(ce, fmt, adx);
867 va_end(adx);
868 }
869
870 /*
871 * =========================================================================
872 * kobj interfaces
873 * =========================================================================
874 */
875 struct _buf *
876 kobj_open_file(char *name)
877 {
878 struct _buf *file;
879 vnode_t *vp;
880
881 /* set vp as the _fd field of the file */
882 if (vn_openat(name, UIO_SYSSPACE, FREAD, 0, &vp, 0, 0, rootdir,
883 -1) != 0)
884 return ((void *)-1UL);
885
886 file = umem_zalloc(sizeof (struct _buf), UMEM_NOFAIL);
887 file->_fd = (intptr_t)vp;
888 return (file);
889 }
890
891 int
892 kobj_read_file(struct _buf *file, char *buf, unsigned size, unsigned off)
893 {
894 ssize_t resid;
895
896 vn_rdwr(UIO_READ, (vnode_t *)file->_fd, buf, size, (offset_t)off,
897 UIO_SYSSPACE, 0, 0, 0, &resid);
898
899 return (size - resid);
900 }
901
902 void
903 kobj_close_file(struct _buf *file)
904 {
905 vn_close((vnode_t *)file->_fd);
906 umem_free(file, sizeof (struct _buf));
907 }
908
909 int
910 kobj_get_filesize(struct _buf *file, uint64_t *size)
911 {
912 struct stat64 st;
913 vnode_t *vp = (vnode_t *)file->_fd;
914
915 if (fstat64(vp->v_fd, &st) == -1) {
916 vn_close(vp);
917 return (errno);
918 }
919 *size = st.st_size;
920 return (0);
921 }
922
923 /*
924 * =========================================================================
925 * misc routines
926 * =========================================================================
927 */
928
929 void
930 delay(clock_t ticks)
931 {
932 poll(0, 0, ticks * (1000 / hz));
933 }
934
935 /*
936 * Find highest one bit set.
937 * Returns bit number + 1 of highest bit that is set, otherwise returns 0.
938 * High order bit is 31 (or 63 in _LP64 kernel).
939 */
940 int
941 highbit(ulong_t i)
942 {
943 register int h = 1;
944
945 if (i == 0)
946 return (0);
947 #ifdef _LP64
948 if (i & 0xffffffff00000000ul) {
949 h += 32; i >>= 32;
950 }
951 #endif
952 if (i & 0xffff0000) {
953 h += 16; i >>= 16;
954 }
955 if (i & 0xff00) {
956 h += 8; i >>= 8;
957 }
958 if (i & 0xf0) {
959 h += 4; i >>= 4;
960 }
961 if (i & 0xc) {
962 h += 2; i >>= 2;
963 }
964 if (i & 0x2) {
965 h += 1;
966 }
967 return (h);
968 }
969
970 static int random_fd = -1, urandom_fd = -1;
971
972 static int
973 random_get_bytes_common(uint8_t *ptr, size_t len, int fd)
974 {
975 size_t resid = len;
976 ssize_t bytes;
977
978 ASSERT(fd != -1);
979
980 while (resid != 0) {
981 bytes = read(fd, ptr, resid);
982 ASSERT3S(bytes, >=, 0);
983 ptr += bytes;
984 resid -= bytes;
985 }
986
987 return (0);
988 }
989
990 int
991 random_get_bytes(uint8_t *ptr, size_t len)
992 {
993 return (random_get_bytes_common(ptr, len, random_fd));
994 }
995
996 int
997 random_get_pseudo_bytes(uint8_t *ptr, size_t len)
998 {
999 return (random_get_bytes_common(ptr, len, urandom_fd));
1000 }
1001
1002 int
1003 ddi_strtoul(const char *hw_serial, char **nptr, int base, unsigned long *result)
1004 {
1005 char *end;
1006
1007 *result = strtoul(hw_serial, &end, base);
1008 if (*result == 0)
1009 return (errno);
1010 return (0);
1011 }
1012
1013 int
1014 ddi_strtoull(const char *str, char **nptr, int base, u_longlong_t *result)
1015 {
1016 char *end;
1017
1018 *result = strtoull(str, &end, base);
1019 if (*result == 0)
1020 return (errno);
1021 return (0);
1022 }
1023
1024 /*
1025 * =========================================================================
1026 * kernel emulation setup & teardown
1027 * =========================================================================
1028 */
1029 static int
1030 umem_out_of_memory(void)
1031 {
1032 char errmsg[] = "out of memory -- generating core dump\n";
1033
1034 (void) fprintf(stderr, "%s", errmsg);
1035 abort();
1036 return (0);
1037 }
1038
1039 void
1040 kernel_init(int mode)
1041 {
1042 umem_nofail_callback(umem_out_of_memory);
1043
1044 physmem = sysconf(_SC_PHYS_PAGES);
1045
1046 dprintf("physmem = %llu pages (%.2f GB)\n", physmem,
1047 (double)physmem * sysconf(_SC_PAGE_SIZE) / (1ULL << 30));
1048
1049 (void) snprintf(hw_serial, sizeof (hw_serial), "%ld",
1050 (mode & FWRITE) ? gethostid() : 0);
1051
1052 VERIFY((random_fd = open("/dev/random", O_RDONLY)) != -1);
1053 VERIFY((urandom_fd = open("/dev/urandom", O_RDONLY)) != -1);
1054
1055 thread_init();
1056 system_taskq_init();
1057
1058 spa_init(mode);
1059 }
1060
1061 void
1062 kernel_fini(void)
1063 {
1064 spa_fini();
1065
1066 system_taskq_fini();
1067 thread_fini();
1068
1069 close(random_fd);
1070 close(urandom_fd);
1071
1072 random_fd = -1;
1073 urandom_fd = -1;
1074 }
1075
1076 uid_t
1077 crgetuid(cred_t *cr)
1078 {
1079 return (0);
1080 }
1081
1082 gid_t
1083 crgetgid(cred_t *cr)
1084 {
1085 return (0);
1086 }
1087
1088 int
1089 crgetngroups(cred_t *cr)
1090 {
1091 return (0);
1092 }
1093
1094 gid_t *
1095 crgetgroups(cred_t *cr)
1096 {
1097 return (NULL);
1098 }
1099
1100 int
1101 zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
1102 {
1103 return (0);
1104 }
1105
1106 int
1107 zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
1108 {
1109 return (0);
1110 }
1111
1112 int
1113 zfs_secpolicy_destroy_perms(const char *name, cred_t *cr)
1114 {
1115 return (0);
1116 }
1117
1118 ksiddomain_t *
1119 ksid_lookupdomain(const char *dom)
1120 {
1121 ksiddomain_t *kd;
1122
1123 kd = umem_zalloc(sizeof (ksiddomain_t), UMEM_NOFAIL);
1124 kd->kd_name = spa_strdup(dom);
1125 return (kd);
1126 }
1127
1128 void
1129 ksiddomain_rele(ksiddomain_t *ksid)
1130 {
1131 spa_strfree(ksid->kd_name);
1132 umem_free(ksid, sizeof (ksiddomain_t));
1133 }
1134
1135 char *
1136 kmem_vasprintf(const char *fmt, va_list adx)
1137 {
1138 char *buf = NULL;
1139 va_list adx_copy;
1140
1141 va_copy(adx_copy, adx);
1142 VERIFY(vasprintf(&buf, fmt, adx_copy) != -1);
1143 va_end(adx_copy);
1144
1145 return (buf);
1146 }
1147
1148 char *
1149 kmem_asprintf(const char *fmt, ...)
1150 {
1151 char *buf = NULL;
1152 va_list adx;
1153
1154 va_start(adx, fmt);
1155 VERIFY(vasprintf(&buf, fmt, adx) != -1);
1156 va_end(adx);
1157
1158 return (buf);
1159 }
1160
1161 /* ARGSUSED */
1162 int
1163 zfs_onexit_fd_hold(int fd, minor_t *minorp)
1164 {
1165 *minorp = 0;
1166 return (0);
1167 }
1168
1169 /* ARGSUSED */
1170 void
1171 zfs_onexit_fd_rele(int fd)
1172 {
1173 }
1174
1175 /* ARGSUSED */
1176 int
1177 zfs_onexit_add_cb(minor_t minor, void (*func)(void *), void *data,
1178 uint64_t *action_handle)
1179 {
1180 return (0);
1181 }
1182
1183 /* ARGSUSED */
1184 int
1185 zfs_onexit_del_cb(minor_t minor, uint64_t action_handle, boolean_t fire)
1186 {
1187 return (0);
1188 }
1189
1190 /* ARGSUSED */
1191 int
1192 zfs_onexit_cb_data(minor_t minor, uint64_t action_handle, void **data)
1193 {
1194 return (0);
1195 }