]> git.proxmox.com Git - mirror_zfs-debian.git/blob - lib/libzpool/kernel.c
Linux 3.3 compat, sops->show_options()
[mirror_zfs-debian.git] / lib / libzpool / kernel.c
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25 #include <assert.h>
26 #include <fcntl.h>
27 #include <poll.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <zlib.h>
32 #include <sys/signal.h>
33 #include <sys/spa.h>
34 #include <sys/stat.h>
35 #include <sys/processor.h>
36 #include <sys/zfs_context.h>
37 #include <sys/utsname.h>
38 #include <sys/time.h>
39 #include <sys/systeminfo.h>
40
41 /*
42 * Emulation of kernel services in userland.
43 */
44
45 int aok;
46 uint64_t physmem;
47 vnode_t *rootdir = (vnode_t *)0xabcd1234;
48 char hw_serial[HW_HOSTID_LEN];
49
50 struct utsname utsname = {
51 "userland", "libzpool", "1", "1", "na"
52 };
53
54 /* this only exists to have its address taken */
55 struct proc p0;
56
57 /*
58 * =========================================================================
59 * threads
60 * =========================================================================
61 */
62
63 pthread_cond_t kthread_cond = PTHREAD_COND_INITIALIZER;
64 pthread_mutex_t kthread_lock = PTHREAD_MUTEX_INITIALIZER;
65 pthread_key_t kthread_key;
66 int kthread_nr = 0;
67
68 static void
69 thread_init(void)
70 {
71 kthread_t *kt;
72
73 VERIFY3S(pthread_key_create(&kthread_key, NULL), ==, 0);
74
75 /* Create entry for primary kthread */
76 kt = umem_zalloc(sizeof(kthread_t), UMEM_NOFAIL);
77 kt->t_tid = pthread_self();
78 kt->t_func = NULL;
79
80 VERIFY3S(pthread_setspecific(kthread_key, kt), ==, 0);
81
82 /* Only the main thread should be running at the moment */
83 ASSERT3S(kthread_nr, ==, 0);
84 kthread_nr = 1;
85 }
86
87 static void
88 thread_fini(void)
89 {
90 kthread_t *kt = curthread;
91
92 ASSERT(pthread_equal(kt->t_tid, pthread_self()));
93 ASSERT3P(kt->t_func, ==, NULL);
94
95 umem_free(kt, sizeof(kthread_t));
96
97 /* Wait for all threads to exit via thread_exit() */
98 VERIFY3S(pthread_mutex_lock(&kthread_lock), ==, 0);
99
100 kthread_nr--; /* Main thread is exiting */
101
102 while (kthread_nr > 0)
103 VERIFY3S(pthread_cond_wait(&kthread_cond, &kthread_lock), ==,
104 0);
105
106 ASSERT3S(kthread_nr, ==, 0);
107 VERIFY3S(pthread_mutex_unlock(&kthread_lock), ==, 0);
108
109 VERIFY3S(pthread_key_delete(kthread_key), ==, 0);
110 }
111
112 kthread_t *
113 zk_thread_current(void)
114 {
115 kthread_t *kt = pthread_getspecific(kthread_key);
116
117 ASSERT3P(kt, !=, NULL);
118
119 return kt;
120 }
121
122 void *
123 zk_thread_helper(void *arg)
124 {
125 kthread_t *kt = (kthread_t *) arg;
126
127 VERIFY3S(pthread_setspecific(kthread_key, kt), ==, 0);
128
129 VERIFY3S(pthread_mutex_lock(&kthread_lock), ==, 0);
130 kthread_nr++;
131 VERIFY3S(pthread_mutex_unlock(&kthread_lock), ==, 0);
132
133 kt->t_tid = pthread_self();
134 ((thread_func_arg_t) kt->t_func)(kt->t_arg);
135
136 /* Unreachable, thread must exit with thread_exit() */
137 abort();
138
139 return NULL;
140 }
141
142 kthread_t *
143 zk_thread_create(caddr_t stk, size_t stksize, thread_func_t func, void *arg,
144 size_t len, proc_t *pp, int state, pri_t pri)
145 {
146 kthread_t *kt;
147 pthread_attr_t attr;
148 size_t stack;
149
150 ASSERT3S(state & ~TS_RUN, ==, 0);
151
152 kt = umem_zalloc(sizeof(kthread_t), UMEM_NOFAIL);
153 kt->t_func = func;
154 kt->t_arg = arg;
155
156 /*
157 * The Solaris kernel stack size is 24k for x86/x86_64.
158 * The Linux kernel stack size is 8k for x86/x86_64.
159 *
160 * We reduce the default stack size in userspace, to ensure
161 * we observe stack overruns in user space as well as in
162 * kernel space. PTHREAD_STACK_MIN is the minimum stack
163 * required for a NULL procedure in user space and is added
164 * in to the stack requirements.
165 *
166 * Some buggy NPTL threading implementations include the
167 * guard area within the stack size allocations. In
168 * this case we allocate an extra page to account for the
169 * guard area since we only have two pages of usable stack
170 * on Linux.
171 */
172
173 stack = PTHREAD_STACK_MIN + MAX(stksize, STACK_SIZE) +
174 EXTRA_GUARD_BYTES;
175
176 VERIFY3S(pthread_attr_init(&attr), ==, 0);
177 VERIFY3S(pthread_attr_setstacksize(&attr, stack), ==, 0);
178 VERIFY3S(pthread_attr_setguardsize(&attr, PAGESIZE), ==, 0);
179
180 VERIFY3S(pthread_create(&kt->t_tid, &attr, &zk_thread_helper, kt),
181 ==, 0);
182
183 VERIFY3S(pthread_attr_destroy(&attr), ==, 0);
184
185 return kt;
186 }
187
188 void
189 zk_thread_exit(void)
190 {
191 kthread_t *kt = curthread;
192
193 ASSERT(pthread_equal(kt->t_tid, pthread_self()));
194
195 umem_free(kt, sizeof(kthread_t));
196
197 pthread_mutex_lock(&kthread_lock);
198 kthread_nr--;
199 pthread_mutex_unlock(&kthread_lock);
200
201 pthread_cond_broadcast(&kthread_cond);
202 pthread_exit((void *)TS_MAGIC);
203 }
204
205 void
206 zk_thread_join(kt_did_t tid)
207 {
208 void *ret;
209
210 pthread_join((pthread_t)tid, &ret);
211 VERIFY3P(ret, ==, (void *)TS_MAGIC);
212 }
213
214 /*
215 * =========================================================================
216 * kstats
217 * =========================================================================
218 */
219 /*ARGSUSED*/
220 kstat_t *
221 kstat_create(char *module, int instance, char *name, char *class,
222 uchar_t type, ulong_t ndata, uchar_t ks_flag)
223 {
224 return (NULL);
225 }
226
227 /*ARGSUSED*/
228 void
229 kstat_install(kstat_t *ksp)
230 {}
231
232 /*ARGSUSED*/
233 void
234 kstat_delete(kstat_t *ksp)
235 {}
236
237 /*
238 * =========================================================================
239 * mutexes
240 * =========================================================================
241 */
242
243 void
244 mutex_init(kmutex_t *mp, char *name, int type, void *cookie)
245 {
246 ASSERT3S(type, ==, MUTEX_DEFAULT);
247 ASSERT3P(cookie, ==, NULL);
248 mp->m_owner = MTX_INIT;
249 mp->m_magic = MTX_MAGIC;
250 VERIFY3S(pthread_mutex_init(&mp->m_lock, NULL), ==, 0);
251 }
252
253 void
254 mutex_destroy(kmutex_t *mp)
255 {
256 ASSERT3U(mp->m_magic, ==, MTX_MAGIC);
257 ASSERT3P(mp->m_owner, ==, MTX_INIT);
258 VERIFY3S(pthread_mutex_destroy(&(mp)->m_lock), ==, 0);
259 mp->m_owner = MTX_DEST;
260 mp->m_magic = 0;
261 }
262
263 void
264 mutex_enter(kmutex_t *mp)
265 {
266 ASSERT3U(mp->m_magic, ==, MTX_MAGIC);
267 ASSERT3P(mp->m_owner, !=, MTX_DEST);
268 ASSERT3P(mp->m_owner, !=, curthread);
269 VERIFY3S(pthread_mutex_lock(&mp->m_lock), ==, 0);
270 ASSERT3P(mp->m_owner, ==, MTX_INIT);
271 mp->m_owner = curthread;
272 }
273
274 int
275 mutex_tryenter(kmutex_t *mp)
276 {
277 ASSERT3U(mp->m_magic, ==, MTX_MAGIC);
278 ASSERT3P(mp->m_owner, !=, MTX_DEST);
279 if (0 == pthread_mutex_trylock(&mp->m_lock)) {
280 ASSERT3P(mp->m_owner, ==, MTX_INIT);
281 mp->m_owner = curthread;
282 return (1);
283 } else {
284 return (0);
285 }
286 }
287
288 void
289 mutex_exit(kmutex_t *mp)
290 {
291 ASSERT3U(mp->m_magic, ==, MTX_MAGIC);
292 ASSERT3P(mutex_owner(mp), ==, curthread);
293 mp->m_owner = MTX_INIT;
294 VERIFY3S(pthread_mutex_unlock(&mp->m_lock), ==, 0);
295 }
296
297 void *
298 mutex_owner(kmutex_t *mp)
299 {
300 ASSERT3U(mp->m_magic, ==, MTX_MAGIC);
301 return (mp->m_owner);
302 }
303
304 int
305 mutex_held(kmutex_t *mp)
306 {
307 return (mp->m_owner == curthread);
308 }
309
310 /*
311 * =========================================================================
312 * rwlocks
313 * =========================================================================
314 */
315
316 void
317 rw_init(krwlock_t *rwlp, char *name, int type, void *arg)
318 {
319 ASSERT3S(type, ==, RW_DEFAULT);
320 ASSERT3P(arg, ==, NULL);
321 VERIFY3S(pthread_rwlock_init(&rwlp->rw_lock, NULL), ==, 0);
322 rwlp->rw_owner = RW_INIT;
323 rwlp->rw_wr_owner = RW_INIT;
324 rwlp->rw_readers = 0;
325 rwlp->rw_magic = RW_MAGIC;
326 }
327
328 void
329 rw_destroy(krwlock_t *rwlp)
330 {
331 ASSERT3U(rwlp->rw_magic, ==, RW_MAGIC);
332
333 VERIFY3S(pthread_rwlock_destroy(&rwlp->rw_lock), ==, 0);
334 rwlp->rw_magic = 0;
335 }
336
337 void
338 rw_enter(krwlock_t *rwlp, krw_t rw)
339 {
340 ASSERT3U(rwlp->rw_magic, ==, RW_MAGIC);
341 ASSERT3P(rwlp->rw_owner, !=, curthread);
342 ASSERT3P(rwlp->rw_wr_owner, !=, curthread);
343
344 if (rw == RW_READER) {
345 VERIFY3S(pthread_rwlock_rdlock(&rwlp->rw_lock), ==, 0);
346 ASSERT3P(rwlp->rw_wr_owner, ==, RW_INIT);
347
348 atomic_inc_uint(&rwlp->rw_readers);
349 } else {
350 VERIFY3S(pthread_rwlock_wrlock(&rwlp->rw_lock), ==, 0);
351 ASSERT3P(rwlp->rw_wr_owner, ==, RW_INIT);
352 ASSERT3U(rwlp->rw_readers, ==, 0);
353
354 rwlp->rw_wr_owner = curthread;
355 }
356
357 rwlp->rw_owner = curthread;
358 }
359
360 void
361 rw_exit(krwlock_t *rwlp)
362 {
363 ASSERT3U(rwlp->rw_magic, ==, RW_MAGIC);
364 ASSERT(RW_LOCK_HELD(rwlp));
365
366 if (RW_READ_HELD(rwlp))
367 atomic_dec_uint(&rwlp->rw_readers);
368 else
369 rwlp->rw_wr_owner = RW_INIT;
370
371 rwlp->rw_owner = RW_INIT;
372 VERIFY3S(pthread_rwlock_unlock(&rwlp->rw_lock), ==, 0);
373 }
374
375 int
376 rw_tryenter(krwlock_t *rwlp, krw_t rw)
377 {
378 int rv;
379
380 ASSERT3U(rwlp->rw_magic, ==, RW_MAGIC);
381
382 if (rw == RW_READER)
383 rv = pthread_rwlock_tryrdlock(&rwlp->rw_lock);
384 else
385 rv = pthread_rwlock_trywrlock(&rwlp->rw_lock);
386
387 if (rv == 0) {
388 ASSERT3P(rwlp->rw_wr_owner, ==, RW_INIT);
389
390 if (rw == RW_READER)
391 atomic_inc_uint(&rwlp->rw_readers);
392 else {
393 ASSERT3U(rwlp->rw_readers, ==, 0);
394 rwlp->rw_wr_owner = curthread;
395 }
396
397 rwlp->rw_owner = curthread;
398 return (1);
399 }
400
401 VERIFY3S(rv, ==, EBUSY);
402
403 return (0);
404 }
405
406 int
407 rw_tryupgrade(krwlock_t *rwlp)
408 {
409 ASSERT3U(rwlp->rw_magic, ==, RW_MAGIC);
410
411 return (0);
412 }
413
414 /*
415 * =========================================================================
416 * condition variables
417 * =========================================================================
418 */
419
420 void
421 cv_init(kcondvar_t *cv, char *name, int type, void *arg)
422 {
423 ASSERT3S(type, ==, CV_DEFAULT);
424 cv->cv_magic = CV_MAGIC;
425 VERIFY3S(pthread_cond_init(&cv->cv, NULL), ==, 0);
426 }
427
428 void
429 cv_destroy(kcondvar_t *cv)
430 {
431 ASSERT3U(cv->cv_magic, ==, CV_MAGIC);
432 VERIFY3S(pthread_cond_destroy(&cv->cv), ==, 0);
433 cv->cv_magic = 0;
434 }
435
436 void
437 cv_wait(kcondvar_t *cv, kmutex_t *mp)
438 {
439 ASSERT3U(cv->cv_magic, ==, CV_MAGIC);
440 ASSERT3P(mutex_owner(mp), ==, curthread);
441 mp->m_owner = MTX_INIT;
442 int ret = pthread_cond_wait(&cv->cv, &mp->m_lock);
443 if (ret != 0)
444 VERIFY3S(ret, ==, EINTR);
445 mp->m_owner = curthread;
446 }
447
448 clock_t
449 cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime)
450 {
451 int error;
452 struct timeval tv;
453 timestruc_t ts;
454 clock_t delta;
455
456 ASSERT3U(cv->cv_magic, ==, CV_MAGIC);
457
458 top:
459 delta = abstime - ddi_get_lbolt();
460 if (delta <= 0)
461 return (-1);
462
463 VERIFY(gettimeofday(&tv, NULL) == 0);
464
465 ts.tv_sec = tv.tv_sec + delta / hz;
466 ts.tv_nsec = tv.tv_usec * 1000 + (delta % hz) * (NANOSEC / hz);
467 if (ts.tv_nsec >= NANOSEC) {
468 ts.tv_sec++;
469 ts.tv_nsec -= NANOSEC;
470 }
471
472 ASSERT3P(mutex_owner(mp), ==, curthread);
473 mp->m_owner = MTX_INIT;
474 error = pthread_cond_timedwait(&cv->cv, &mp->m_lock, &ts);
475 mp->m_owner = curthread;
476
477 if (error == ETIMEDOUT)
478 return (-1);
479
480 if (error == EINTR)
481 goto top;
482
483 VERIFY3S(error, ==, 0);
484
485 return (1);
486 }
487
488 void
489 cv_signal(kcondvar_t *cv)
490 {
491 ASSERT3U(cv->cv_magic, ==, CV_MAGIC);
492 VERIFY3S(pthread_cond_signal(&cv->cv), ==, 0);
493 }
494
495 void
496 cv_broadcast(kcondvar_t *cv)
497 {
498 ASSERT3U(cv->cv_magic, ==, CV_MAGIC);
499 VERIFY3S(pthread_cond_broadcast(&cv->cv), ==, 0);
500 }
501
502 /*
503 * =========================================================================
504 * vnode operations
505 * =========================================================================
506 */
507 /*
508 * Note: for the xxxat() versions of these functions, we assume that the
509 * starting vp is always rootdir (which is true for spa_directory.c, the only
510 * ZFS consumer of these interfaces). We assert this is true, and then emulate
511 * them by adding '/' in front of the path.
512 */
513
514 /*ARGSUSED*/
515 int
516 vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3)
517 {
518 int fd;
519 vnode_t *vp;
520 int old_umask;
521 char *realpath;
522 struct stat64 st;
523 int err;
524
525 realpath = umem_alloc(MAXPATHLEN, UMEM_NOFAIL);
526
527 /*
528 * If we're accessing a real disk from userland, we need to use
529 * the character interface to avoid caching. This is particularly
530 * important if we're trying to look at a real in-kernel storage
531 * pool from userland, e.g. via zdb, because otherwise we won't
532 * see the changes occurring under the segmap cache.
533 * On the other hand, the stupid character device returns zero
534 * for its size. So -- gag -- we open the block device to get
535 * its size, and remember it for subsequent VOP_GETATTR().
536 */
537 #if defined(__sun__) || defined(__sun)
538 if (strncmp(path, "/dev/", 5) == 0) {
539 #else
540 if (0) {
541 #endif
542 char *dsk;
543 fd = open64(path, O_RDONLY);
544 if (fd == -1) {
545 err = errno;
546 free(realpath);
547 return (err);
548 }
549 if (fstat64(fd, &st) == -1) {
550 err = errno;
551 close(fd);
552 free(realpath);
553 return (err);
554 }
555 close(fd);
556 (void) sprintf(realpath, "%s", path);
557 dsk = strstr(path, "/dsk/");
558 if (dsk != NULL)
559 (void) sprintf(realpath + (dsk - path) + 1, "r%s",
560 dsk + 1);
561 } else {
562 (void) sprintf(realpath, "%s", path);
563 if (!(flags & FCREAT) && stat64(realpath, &st) == -1) {
564 err = errno;
565 free(realpath);
566 return (err);
567 }
568 }
569
570 if (!(flags & FCREAT) && S_ISBLK(st.st_mode)) {
571 #ifdef __linux__
572 flags |= O_DIRECT;
573 #endif
574 /* We shouldn't be writing to block devices in userspace */
575 VERIFY(!(flags & FWRITE));
576 }
577
578 if (flags & FCREAT)
579 old_umask = umask(0);
580
581 /*
582 * The construct 'flags - FREAD' conveniently maps combinations of
583 * FREAD and FWRITE to the corresponding O_RDONLY, O_WRONLY, and O_RDWR.
584 */
585 fd = open64(realpath, flags - FREAD, mode);
586 free(realpath);
587
588 if (flags & FCREAT)
589 (void) umask(old_umask);
590
591 if (fd == -1)
592 return (errno);
593
594 if (fstat64_blk(fd, &st) == -1) {
595 err = errno;
596 close(fd);
597 return (err);
598 }
599
600 (void) fcntl(fd, F_SETFD, FD_CLOEXEC);
601
602 *vpp = vp = umem_zalloc(sizeof (vnode_t), UMEM_NOFAIL);
603
604 vp->v_fd = fd;
605 vp->v_size = st.st_size;
606 vp->v_path = spa_strdup(path);
607
608 return (0);
609 }
610
611 /*ARGSUSED*/
612 int
613 vn_openat(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2,
614 int x3, vnode_t *startvp, int fd)
615 {
616 char *realpath = umem_alloc(strlen(path) + 2, UMEM_NOFAIL);
617 int ret;
618
619 ASSERT(startvp == rootdir);
620 (void) sprintf(realpath, "/%s", path);
621
622 /* fd ignored for now, need if want to simulate nbmand support */
623 ret = vn_open(realpath, x1, flags, mode, vpp, x2, x3);
624
625 umem_free(realpath, strlen(path) + 2);
626
627 return (ret);
628 }
629
630 /*ARGSUSED*/
631 int
632 vn_rdwr(int uio, vnode_t *vp, void *addr, ssize_t len, offset_t offset,
633 int x1, int x2, rlim64_t x3, void *x4, ssize_t *residp)
634 {
635 ssize_t rc, done = 0, split;
636
637 if (uio == UIO_READ) {
638 rc = pread64(vp->v_fd, addr, len, offset);
639 } else {
640 /*
641 * To simulate partial disk writes, we split writes into two
642 * system calls so that the process can be killed in between.
643 */
644 split = (len > 0 ? rand() % len : 0);
645 rc = pwrite64(vp->v_fd, addr, split, offset);
646 if (rc != -1) {
647 done = rc;
648 rc = pwrite64(vp->v_fd, (char *)addr + split,
649 len - split, offset + split);
650 }
651 }
652
653 #ifdef __linux__
654 if (rc == -1 && errno == EINVAL) {
655 /*
656 * Under Linux, this most likely means an alignment issue
657 * (memory or disk) due to O_DIRECT, so we abort() in order to
658 * catch the offender.
659 */
660 abort();
661 }
662 #endif
663 if (rc == -1)
664 return (errno);
665
666 done += rc;
667
668 if (residp)
669 *residp = len - done;
670 else if (done != len)
671 return (EIO);
672 return (0);
673 }
674
675 void
676 vn_close(vnode_t *vp)
677 {
678 close(vp->v_fd);
679 spa_strfree(vp->v_path);
680 umem_free(vp, sizeof (vnode_t));
681 }
682
683 /*
684 * At a minimum we need to update the size since vdev_reopen()
685 * will no longer call vn_openat().
686 */
687 int
688 fop_getattr(vnode_t *vp, vattr_t *vap)
689 {
690 struct stat64 st;
691 int err;
692
693 if (fstat64_blk(vp->v_fd, &st) == -1) {
694 err = errno;
695 close(vp->v_fd);
696 return (err);
697 }
698
699 vap->va_size = st.st_size;
700 return (0);
701 }
702
703 /*
704 * =========================================================================
705 * Figure out which debugging statements to print
706 * =========================================================================
707 */
708
709 static char *dprintf_string;
710 static int dprintf_print_all;
711
712 int
713 dprintf_find_string(const char *string)
714 {
715 char *tmp_str = dprintf_string;
716 int len = strlen(string);
717
718 /*
719 * Find out if this is a string we want to print.
720 * String format: file1.c,function_name1,file2.c,file3.c
721 */
722
723 while (tmp_str != NULL) {
724 if (strncmp(tmp_str, string, len) == 0 &&
725 (tmp_str[len] == ',' || tmp_str[len] == '\0'))
726 return (1);
727 tmp_str = strchr(tmp_str, ',');
728 if (tmp_str != NULL)
729 tmp_str++; /* Get rid of , */
730 }
731 return (0);
732 }
733
734 void
735 dprintf_setup(int *argc, char **argv)
736 {
737 int i, j;
738
739 /*
740 * Debugging can be specified two ways: by setting the
741 * environment variable ZFS_DEBUG, or by including a
742 * "debug=..." argument on the command line. The command
743 * line setting overrides the environment variable.
744 */
745
746 for (i = 1; i < *argc; i++) {
747 int len = strlen("debug=");
748 /* First look for a command line argument */
749 if (strncmp("debug=", argv[i], len) == 0) {
750 dprintf_string = argv[i] + len;
751 /* Remove from args */
752 for (j = i; j < *argc; j++)
753 argv[j] = argv[j+1];
754 argv[j] = NULL;
755 (*argc)--;
756 }
757 }
758
759 if (dprintf_string == NULL) {
760 /* Look for ZFS_DEBUG environment variable */
761 dprintf_string = getenv("ZFS_DEBUG");
762 }
763
764 /*
765 * Are we just turning on all debugging?
766 */
767 if (dprintf_find_string("on"))
768 dprintf_print_all = 1;
769 }
770
771 /*
772 * =========================================================================
773 * debug printfs
774 * =========================================================================
775 */
776 void
777 __dprintf(const char *file, const char *func, int line, const char *fmt, ...)
778 {
779 const char *newfile;
780 va_list adx;
781
782 /*
783 * Get rid of annoying "../common/" prefix to filename.
784 */
785 newfile = strrchr(file, '/');
786 if (newfile != NULL) {
787 newfile = newfile + 1; /* Get rid of leading / */
788 } else {
789 newfile = file;
790 }
791
792 if (dprintf_print_all ||
793 dprintf_find_string(newfile) ||
794 dprintf_find_string(func)) {
795 /* Print out just the function name if requested */
796 flockfile(stdout);
797 if (dprintf_find_string("pid"))
798 (void) printf("%d ", getpid());
799 if (dprintf_find_string("tid"))
800 (void) printf("%u ", (uint_t) pthread_self());
801 if (dprintf_find_string("cpu"))
802 (void) printf("%u ", getcpuid());
803 if (dprintf_find_string("time"))
804 (void) printf("%llu ", gethrtime());
805 if (dprintf_find_string("long"))
806 (void) printf("%s, line %d: ", newfile, line);
807 (void) printf("%s: ", func);
808 va_start(adx, fmt);
809 (void) vprintf(fmt, adx);
810 va_end(adx);
811 funlockfile(stdout);
812 }
813 }
814
815 /*
816 * =========================================================================
817 * cmn_err() and panic()
818 * =========================================================================
819 */
820 static char ce_prefix[CE_IGNORE][10] = { "", "NOTICE: ", "WARNING: ", "" };
821 static char ce_suffix[CE_IGNORE][2] = { "", "\n", "\n", "" };
822
823 void
824 vpanic(const char *fmt, va_list adx)
825 {
826 (void) fprintf(stderr, "error: ");
827 (void) vfprintf(stderr, fmt, adx);
828 (void) fprintf(stderr, "\n");
829
830 abort(); /* think of it as a "user-level crash dump" */
831 }
832
833 void
834 panic(const char *fmt, ...)
835 {
836 va_list adx;
837
838 va_start(adx, fmt);
839 vpanic(fmt, adx);
840 va_end(adx);
841 }
842
843 void
844 vcmn_err(int ce, const char *fmt, va_list adx)
845 {
846 if (ce == CE_PANIC)
847 vpanic(fmt, adx);
848 if (ce != CE_NOTE) { /* suppress noise in userland stress testing */
849 (void) fprintf(stderr, "%s", ce_prefix[ce]);
850 (void) vfprintf(stderr, fmt, adx);
851 (void) fprintf(stderr, "%s", ce_suffix[ce]);
852 }
853 }
854
855 /*PRINTFLIKE2*/
856 void
857 cmn_err(int ce, const char *fmt, ...)
858 {
859 va_list adx;
860
861 va_start(adx, fmt);
862 vcmn_err(ce, fmt, adx);
863 va_end(adx);
864 }
865
866 /*
867 * =========================================================================
868 * kobj interfaces
869 * =========================================================================
870 */
871 struct _buf *
872 kobj_open_file(char *name)
873 {
874 struct _buf *file;
875 vnode_t *vp;
876
877 /* set vp as the _fd field of the file */
878 if (vn_openat(name, UIO_SYSSPACE, FREAD, 0, &vp, 0, 0, rootdir,
879 -1) != 0)
880 return ((void *)-1UL);
881
882 file = umem_zalloc(sizeof (struct _buf), UMEM_NOFAIL);
883 file->_fd = (intptr_t)vp;
884 return (file);
885 }
886
887 int
888 kobj_read_file(struct _buf *file, char *buf, unsigned size, unsigned off)
889 {
890 ssize_t resid;
891
892 vn_rdwr(UIO_READ, (vnode_t *)file->_fd, buf, size, (offset_t)off,
893 UIO_SYSSPACE, 0, 0, 0, &resid);
894
895 return (size - resid);
896 }
897
898 void
899 kobj_close_file(struct _buf *file)
900 {
901 vn_close((vnode_t *)file->_fd);
902 umem_free(file, sizeof (struct _buf));
903 }
904
905 int
906 kobj_get_filesize(struct _buf *file, uint64_t *size)
907 {
908 struct stat64 st;
909 vnode_t *vp = (vnode_t *)file->_fd;
910
911 if (fstat64(vp->v_fd, &st) == -1) {
912 vn_close(vp);
913 return (errno);
914 }
915 *size = st.st_size;
916 return (0);
917 }
918
919 /*
920 * =========================================================================
921 * misc routines
922 * =========================================================================
923 */
924
925 void
926 delay(clock_t ticks)
927 {
928 poll(0, 0, ticks * (1000 / hz));
929 }
930
931 /*
932 * Find highest one bit set.
933 * Returns bit number + 1 of highest bit that is set, otherwise returns 0.
934 * High order bit is 31 (or 63 in _LP64 kernel).
935 */
936 int
937 highbit(ulong_t i)
938 {
939 register int h = 1;
940
941 if (i == 0)
942 return (0);
943 #ifdef _LP64
944 if (i & 0xffffffff00000000ul) {
945 h += 32; i >>= 32;
946 }
947 #endif
948 if (i & 0xffff0000) {
949 h += 16; i >>= 16;
950 }
951 if (i & 0xff00) {
952 h += 8; i >>= 8;
953 }
954 if (i & 0xf0) {
955 h += 4; i >>= 4;
956 }
957 if (i & 0xc) {
958 h += 2; i >>= 2;
959 }
960 if (i & 0x2) {
961 h += 1;
962 }
963 return (h);
964 }
965
966 static int random_fd = -1, urandom_fd = -1;
967
968 static int
969 random_get_bytes_common(uint8_t *ptr, size_t len, int fd)
970 {
971 size_t resid = len;
972 ssize_t bytes;
973
974 ASSERT(fd != -1);
975
976 while (resid != 0) {
977 bytes = read(fd, ptr, resid);
978 ASSERT3S(bytes, >=, 0);
979 ptr += bytes;
980 resid -= bytes;
981 }
982
983 return (0);
984 }
985
986 int
987 random_get_bytes(uint8_t *ptr, size_t len)
988 {
989 return (random_get_bytes_common(ptr, len, random_fd));
990 }
991
992 int
993 random_get_pseudo_bytes(uint8_t *ptr, size_t len)
994 {
995 return (random_get_bytes_common(ptr, len, urandom_fd));
996 }
997
998 int
999 ddi_strtoul(const char *hw_serial, char **nptr, int base, unsigned long *result)
1000 {
1001 char *end;
1002
1003 *result = strtoul(hw_serial, &end, base);
1004 if (*result == 0)
1005 return (errno);
1006 return (0);
1007 }
1008
1009 int
1010 ddi_strtoull(const char *str, char **nptr, int base, u_longlong_t *result)
1011 {
1012 char *end;
1013
1014 *result = strtoull(str, &end, base);
1015 if (*result == 0)
1016 return (errno);
1017 return (0);
1018 }
1019
1020 /*
1021 * =========================================================================
1022 * kernel emulation setup & teardown
1023 * =========================================================================
1024 */
1025 static int
1026 umem_out_of_memory(void)
1027 {
1028 char errmsg[] = "out of memory -- generating core dump\n";
1029
1030 (void) fprintf(stderr, "%s", errmsg);
1031 abort();
1032 return (0);
1033 }
1034
1035 void
1036 kernel_init(int mode)
1037 {
1038 umem_nofail_callback(umem_out_of_memory);
1039
1040 physmem = sysconf(_SC_PHYS_PAGES);
1041
1042 dprintf("physmem = %llu pages (%.2f GB)\n", physmem,
1043 (double)physmem * sysconf(_SC_PAGE_SIZE) / (1ULL << 30));
1044
1045 (void) snprintf(hw_serial, sizeof (hw_serial), "%ld",
1046 (mode & FWRITE) ? gethostid() : 0);
1047
1048 VERIFY((random_fd = open("/dev/random", O_RDONLY)) != -1);
1049 VERIFY((urandom_fd = open("/dev/urandom", O_RDONLY)) != -1);
1050
1051 thread_init();
1052 system_taskq_init();
1053
1054 spa_init(mode);
1055 }
1056
1057 void
1058 kernel_fini(void)
1059 {
1060 spa_fini();
1061
1062 system_taskq_fini();
1063 thread_fini();
1064
1065 close(random_fd);
1066 close(urandom_fd);
1067
1068 random_fd = -1;
1069 urandom_fd = -1;
1070 }
1071
1072 uid_t
1073 crgetuid(cred_t *cr)
1074 {
1075 return (0);
1076 }
1077
1078 gid_t
1079 crgetgid(cred_t *cr)
1080 {
1081 return (0);
1082 }
1083
1084 int
1085 crgetngroups(cred_t *cr)
1086 {
1087 return (0);
1088 }
1089
1090 gid_t *
1091 crgetgroups(cred_t *cr)
1092 {
1093 return (NULL);
1094 }
1095
1096 int
1097 zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
1098 {
1099 return (0);
1100 }
1101
1102 int
1103 zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
1104 {
1105 return (0);
1106 }
1107
1108 int
1109 zfs_secpolicy_destroy_perms(const char *name, cred_t *cr)
1110 {
1111 return (0);
1112 }
1113
1114 ksiddomain_t *
1115 ksid_lookupdomain(const char *dom)
1116 {
1117 ksiddomain_t *kd;
1118
1119 kd = umem_zalloc(sizeof (ksiddomain_t), UMEM_NOFAIL);
1120 kd->kd_name = spa_strdup(dom);
1121 return (kd);
1122 }
1123
1124 void
1125 ksiddomain_rele(ksiddomain_t *ksid)
1126 {
1127 spa_strfree(ksid->kd_name);
1128 umem_free(ksid, sizeof (ksiddomain_t));
1129 }
1130
1131 char *
1132 kmem_vasprintf(const char *fmt, va_list adx)
1133 {
1134 char *buf = NULL;
1135 va_list adx_copy;
1136
1137 va_copy(adx_copy, adx);
1138 VERIFY(vasprintf(&buf, fmt, adx_copy) != -1);
1139 va_end(adx_copy);
1140
1141 return (buf);
1142 }
1143
1144 char *
1145 kmem_asprintf(const char *fmt, ...)
1146 {
1147 char *buf = NULL;
1148 va_list adx;
1149
1150 va_start(adx, fmt);
1151 VERIFY(vasprintf(&buf, fmt, adx) != -1);
1152 va_end(adx);
1153
1154 return (buf);
1155 }
1156
1157 /* ARGSUSED */
1158 int
1159 zfs_onexit_fd_hold(int fd, minor_t *minorp)
1160 {
1161 *minorp = 0;
1162 return (0);
1163 }
1164
1165 /* ARGSUSED */
1166 void
1167 zfs_onexit_fd_rele(int fd)
1168 {
1169 }
1170
1171 /* ARGSUSED */
1172 int
1173 zfs_onexit_add_cb(minor_t minor, void (*func)(void *), void *data,
1174 uint64_t *action_handle)
1175 {
1176 return (0);
1177 }
1178
1179 /* ARGSUSED */
1180 int
1181 zfs_onexit_del_cb(minor_t minor, uint64_t action_handle, boolean_t fire)
1182 {
1183 return (0);
1184 }
1185
1186 /* ARGSUSED */
1187 int
1188 zfs_onexit_cb_data(minor_t minor, uint64_t action_handle, void **data)
1189 {
1190 return (0);
1191 }