]> git.proxmox.com Git - mirror_zfs-debian.git/blob - lib/libzpool/kernel.c
Add linux kernel memory support
[mirror_zfs-debian.git] / lib / libzpool / kernel.c
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25 #include <assert.h>
26 #include <fcntl.h>
27 #include <poll.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <zlib.h>
32 #include <sys/signal.h>
33 #include <sys/spa.h>
34 #include <sys/stat.h>
35 #include <sys/processor.h>
36 #include <sys/zfs_context.h>
37 #include <sys/utsname.h>
38 #include <sys/systeminfo.h>
39
40 /*
41 * Emulation of kernel services in userland.
42 */
43
44 int aok;
45 uint64_t physmem;
46 vnode_t *rootdir = (vnode_t *)0xabcd1234;
47 char hw_serial[HW_HOSTID_LEN];
48
49 struct utsname utsname = {
50 "userland", "libzpool", "1", "1", "na"
51 };
52
53 /* this only exists to have its address taken */
54 struct proc p0;
55
56 /*
57 * =========================================================================
58 * threads
59 * =========================================================================
60 */
61
62 pthread_cond_t kthread_cond = PTHREAD_COND_INITIALIZER;
63 pthread_mutex_t kthread_lock = PTHREAD_MUTEX_INITIALIZER;
64 pthread_key_t kthread_key;
65 int kthread_nr = 0;
66
67 static void
68 thread_init(void)
69 {
70 kthread_t *kt;
71
72 VERIFY3S(pthread_key_create(&kthread_key, NULL), ==, 0);
73
74 /* Create entry for primary kthread */
75 kt = umem_zalloc(sizeof(kthread_t), UMEM_NOFAIL);
76 kt->t_tid = pthread_self();
77 kt->t_func = NULL;
78
79 VERIFY3S(pthread_setspecific(kthread_key, kt), ==, 0);
80
81 /* Only the main thread should be running at the moment */
82 ASSERT3S(kthread_nr, ==, 0);
83 kthread_nr = 1;
84 }
85
86 static void
87 thread_fini(void)
88 {
89 kthread_t *kt = curthread;
90
91 ASSERT(pthread_equal(kt->t_tid, pthread_self()));
92 ASSERT3P(kt->t_func, ==, NULL);
93
94 umem_free(kt, sizeof(kthread_t));
95
96 /* Wait for all threads to exit via thread_exit() */
97 VERIFY3S(pthread_mutex_lock(&kthread_lock), ==, 0);
98
99 kthread_nr--; /* Main thread is exiting */
100
101 while (kthread_nr > 0)
102 VERIFY3S(pthread_cond_wait(&kthread_cond, &kthread_lock), ==,
103 0);
104
105 ASSERT3S(kthread_nr, ==, 0);
106 VERIFY3S(pthread_mutex_unlock(&kthread_lock), ==, 0);
107
108 VERIFY3S(pthread_key_delete(kthread_key), ==, 0);
109 }
110
111 kthread_t *
112 zk_thread_current(void)
113 {
114 kthread_t *kt = pthread_getspecific(kthread_key);
115
116 ASSERT3P(kt, !=, NULL);
117
118 return kt;
119 }
120
121 void *
122 zk_thread_helper(void *arg)
123 {
124 kthread_t *kt = (kthread_t *) arg;
125
126 VERIFY3S(pthread_setspecific(kthread_key, kt), ==, 0);
127
128 VERIFY3S(pthread_mutex_lock(&kthread_lock), ==, 0);
129 kthread_nr++;
130 VERIFY3S(pthread_mutex_unlock(&kthread_lock), ==, 0);
131
132 kt->t_tid = pthread_self();
133 ((thread_func_arg_t) kt->t_func)(kt->t_arg);
134
135 /* Unreachable, thread must exit with thread_exit() */
136 abort();
137
138 return NULL;
139 }
140
141 kthread_t *
142 zk_thread_create(caddr_t stk, size_t stksize, thread_func_t func, void *arg,
143 size_t len, proc_t *pp, int state, pri_t pri)
144 {
145 kthread_t *kt;
146 pthread_attr_t attr;
147 size_t stack;
148
149 ASSERT3S(state & ~TS_RUN, ==, 0);
150
151 kt = umem_zalloc(sizeof(kthread_t), UMEM_NOFAIL);
152 kt->t_func = func;
153 kt->t_arg = arg;
154
155 /*
156 * The Solaris kernel stack size is 24k for x86/x86_64.
157 * The Linux kernel stack size is 8k for x86/x86_64.
158 *
159 * We reduce the default stack size in userspace, to ensure
160 * we observe stack overruns in user space as well as in
161 * kernel space. PTHREAD_STACK_MIN is the minimum stack
162 * required for a NULL procedure in user space and is added
163 * in to the stack requirements.
164 *
165 * Some buggy NPTL threading implementations include the
166 * guard area within the stack size allocations. In
167 * this case we allocate an extra page to account for the
168 * guard area since we only have two pages of usable stack
169 * on Linux.
170 */
171
172 stack = PTHREAD_STACK_MIN + MAX(stksize, STACK_SIZE) +
173 EXTRA_GUARD_BYTES;
174
175 VERIFY3S(pthread_attr_init(&attr), ==, 0);
176 VERIFY3S(pthread_attr_setstacksize(&attr, stack), ==, 0);
177 VERIFY3S(pthread_attr_setguardsize(&attr, PAGESIZE), ==, 0);
178
179 VERIFY3S(pthread_create(&kt->t_tid, &attr, &zk_thread_helper, kt),
180 ==, 0);
181
182 VERIFY3S(pthread_attr_destroy(&attr), ==, 0);
183
184 return kt;
185 }
186
187 void
188 zk_thread_exit(void)
189 {
190 kthread_t *kt = curthread;
191
192 ASSERT(pthread_equal(kt->t_tid, pthread_self()));
193
194 umem_free(kt, sizeof(kthread_t));
195
196 pthread_mutex_lock(&kthread_lock);
197 kthread_nr--;
198 pthread_mutex_unlock(&kthread_lock);
199
200 pthread_cond_broadcast(&kthread_cond);
201 pthread_exit((void *)TS_MAGIC);
202 }
203
204 void
205 zk_thread_join(kt_did_t tid)
206 {
207 void *ret;
208
209 pthread_join((pthread_t)tid, &ret);
210 VERIFY3P(ret, ==, (void *)TS_MAGIC);
211 }
212
213 /*
214 * =========================================================================
215 * kstats
216 * =========================================================================
217 */
218 /*ARGSUSED*/
219 kstat_t *
220 kstat_create(char *module, int instance, char *name, char *class,
221 uchar_t type, ulong_t ndata, uchar_t ks_flag)
222 {
223 return (NULL);
224 }
225
226 /*ARGSUSED*/
227 void
228 kstat_install(kstat_t *ksp)
229 {}
230
231 /*ARGSUSED*/
232 void
233 kstat_delete(kstat_t *ksp)
234 {}
235
236 /*
237 * =========================================================================
238 * mutexes
239 * =========================================================================
240 */
241
242 void
243 mutex_init(kmutex_t *mp, char *name, int type, void *cookie)
244 {
245 ASSERT3S(type, ==, MUTEX_DEFAULT);
246 ASSERT3P(cookie, ==, NULL);
247 mp->m_owner = MTX_INIT;
248 mp->m_magic = MTX_MAGIC;
249 VERIFY3S(pthread_mutex_init(&mp->m_lock, NULL), ==, 0);
250 }
251
252 void
253 mutex_destroy(kmutex_t *mp)
254 {
255 ASSERT3U(mp->m_magic, ==, MTX_MAGIC);
256 ASSERT3P(mp->m_owner, ==, MTX_INIT);
257 VERIFY3S(pthread_mutex_destroy(&(mp)->m_lock), ==, 0);
258 mp->m_owner = MTX_DEST;
259 mp->m_magic = 0;
260 }
261
262 void
263 mutex_enter(kmutex_t *mp)
264 {
265 ASSERT3U(mp->m_magic, ==, MTX_MAGIC);
266 ASSERT3P(mp->m_owner, !=, MTX_DEST);
267 ASSERT3P(mp->m_owner, !=, curthread);
268 VERIFY3S(pthread_mutex_lock(&mp->m_lock), ==, 0);
269 ASSERT3P(mp->m_owner, ==, MTX_INIT);
270 mp->m_owner = curthread;
271 }
272
273 int
274 mutex_tryenter(kmutex_t *mp)
275 {
276 ASSERT3U(mp->m_magic, ==, MTX_MAGIC);
277 ASSERT3P(mp->m_owner, !=, MTX_DEST);
278 if (0 == pthread_mutex_trylock(&mp->m_lock)) {
279 ASSERT3P(mp->m_owner, ==, MTX_INIT);
280 mp->m_owner = curthread;
281 return (1);
282 } else {
283 return (0);
284 }
285 }
286
287 void
288 mutex_exit(kmutex_t *mp)
289 {
290 ASSERT3U(mp->m_magic, ==, MTX_MAGIC);
291 ASSERT3P(mutex_owner(mp), ==, curthread);
292 mp->m_owner = MTX_INIT;
293 VERIFY3S(pthread_mutex_unlock(&mp->m_lock), ==, 0);
294 }
295
296 void *
297 mutex_owner(kmutex_t *mp)
298 {
299 ASSERT3U(mp->m_magic, ==, MTX_MAGIC);
300 return (mp->m_owner);
301 }
302
303 int
304 mutex_held(kmutex_t *mp)
305 {
306 return (mp->m_owner == curthread);
307 }
308
309 /*
310 * =========================================================================
311 * rwlocks
312 * =========================================================================
313 */
314
315 void
316 rw_init(krwlock_t *rwlp, char *name, int type, void *arg)
317 {
318 ASSERT3S(type, ==, RW_DEFAULT);
319 ASSERT3P(arg, ==, NULL);
320 VERIFY3S(pthread_rwlock_init(&rwlp->rw_lock, NULL), ==, 0);
321 rwlp->rw_owner = RW_INIT;
322 rwlp->rw_wr_owner = RW_INIT;
323 rwlp->rw_readers = 0;
324 rwlp->rw_magic = RW_MAGIC;
325 }
326
327 void
328 rw_destroy(krwlock_t *rwlp)
329 {
330 ASSERT3U(rwlp->rw_magic, ==, RW_MAGIC);
331
332 VERIFY3S(pthread_rwlock_destroy(&rwlp->rw_lock), ==, 0);
333 rwlp->rw_magic = 0;
334 }
335
336 void
337 rw_enter(krwlock_t *rwlp, krw_t rw)
338 {
339 ASSERT3U(rwlp->rw_magic, ==, RW_MAGIC);
340 ASSERT3P(rwlp->rw_owner, !=, curthread);
341 ASSERT3P(rwlp->rw_wr_owner, !=, curthread);
342
343 if (rw == RW_READER) {
344 VERIFY3S(pthread_rwlock_rdlock(&rwlp->rw_lock), ==, 0);
345 ASSERT3P(rwlp->rw_wr_owner, ==, RW_INIT);
346
347 atomic_inc_uint(&rwlp->rw_readers);
348 } else {
349 VERIFY3S(pthread_rwlock_wrlock(&rwlp->rw_lock), ==, 0);
350 ASSERT3P(rwlp->rw_wr_owner, ==, RW_INIT);
351 ASSERT3U(rwlp->rw_readers, ==, 0);
352
353 rwlp->rw_wr_owner = curthread;
354 }
355
356 rwlp->rw_owner = curthread;
357 }
358
359 void
360 rw_exit(krwlock_t *rwlp)
361 {
362 ASSERT3U(rwlp->rw_magic, ==, RW_MAGIC);
363 ASSERT(RW_LOCK_HELD(rwlp));
364
365 if (RW_READ_HELD(rwlp))
366 atomic_dec_uint(&rwlp->rw_readers);
367 else
368 rwlp->rw_wr_owner = RW_INIT;
369
370 rwlp->rw_owner = RW_INIT;
371 VERIFY3S(pthread_rwlock_unlock(&rwlp->rw_lock), ==, 0);
372 }
373
374 int
375 rw_tryenter(krwlock_t *rwlp, krw_t rw)
376 {
377 int rv;
378
379 ASSERT3U(rwlp->rw_magic, ==, RW_MAGIC);
380
381 if (rw == RW_READER)
382 rv = pthread_rwlock_tryrdlock(&rwlp->rw_lock);
383 else
384 rv = pthread_rwlock_trywrlock(&rwlp->rw_lock);
385
386 if (rv == 0) {
387 ASSERT3P(rwlp->rw_wr_owner, ==, RW_INIT);
388
389 if (rw == RW_READER)
390 atomic_inc_uint(&rwlp->rw_readers);
391 else {
392 ASSERT3U(rwlp->rw_readers, ==, 0);
393 rwlp->rw_wr_owner = curthread;
394 }
395
396 rwlp->rw_owner = curthread;
397 return (1);
398 }
399
400 VERIFY3S(rv, ==, EBUSY);
401
402 return (0);
403 }
404
405 int
406 rw_tryupgrade(krwlock_t *rwlp)
407 {
408 ASSERT3U(rwlp->rw_magic, ==, RW_MAGIC);
409
410 return (0);
411 }
412
413 /*
414 * =========================================================================
415 * condition variables
416 * =========================================================================
417 */
418
419 void
420 cv_init(kcondvar_t *cv, char *name, int type, void *arg)
421 {
422 ASSERT3S(type, ==, CV_DEFAULT);
423 cv->cv_magic = CV_MAGIC;
424 VERIFY3S(pthread_cond_init(&cv->cv, NULL), ==, 0);
425 }
426
427 void
428 cv_destroy(kcondvar_t *cv)
429 {
430 ASSERT3U(cv->cv_magic, ==, CV_MAGIC);
431 VERIFY3S(pthread_cond_destroy(&cv->cv), ==, 0);
432 cv->cv_magic = 0;
433 }
434
435 void
436 cv_wait(kcondvar_t *cv, kmutex_t *mp)
437 {
438 ASSERT3U(cv->cv_magic, ==, CV_MAGIC);
439 ASSERT3P(mutex_owner(mp), ==, curthread);
440 mp->m_owner = MTX_INIT;
441 int ret = pthread_cond_wait(&cv->cv, &mp->m_lock);
442 if (ret != 0)
443 VERIFY3S(ret, ==, EINTR);
444 mp->m_owner = curthread;
445 }
446
447 clock_t
448 cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime)
449 {
450 int error;
451 struct timeval tv;
452 timestruc_t ts;
453 clock_t delta;
454
455 ASSERT3U(cv->cv_magic, ==, CV_MAGIC);
456
457 top:
458 delta = abstime - ddi_get_lbolt();
459 if (delta <= 0)
460 return (-1);
461
462 VERIFY(gettimeofday(&tv, NULL) == 0);
463
464 ts.tv_sec = tv.tv_sec + delta / hz;
465 ts.tv_nsec = tv.tv_usec * 1000 + (delta % hz) * (NANOSEC / hz);
466 if (ts.tv_nsec >= NANOSEC) {
467 ts.tv_sec++;
468 ts.tv_nsec -= NANOSEC;
469 }
470
471 ASSERT3P(mutex_owner(mp), ==, curthread);
472 mp->m_owner = MTX_INIT;
473 error = pthread_cond_timedwait(&cv->cv, &mp->m_lock, &ts);
474 mp->m_owner = curthread;
475
476 if (error == ETIMEDOUT)
477 return (-1);
478
479 if (error == EINTR)
480 goto top;
481
482 VERIFY3S(error, ==, 0);
483
484 return (1);
485 }
486
487 void
488 cv_signal(kcondvar_t *cv)
489 {
490 ASSERT3U(cv->cv_magic, ==, CV_MAGIC);
491 VERIFY3S(pthread_cond_signal(&cv->cv), ==, 0);
492 }
493
494 void
495 cv_broadcast(kcondvar_t *cv)
496 {
497 ASSERT3U(cv->cv_magic, ==, CV_MAGIC);
498 VERIFY3S(pthread_cond_broadcast(&cv->cv), ==, 0);
499 }
500
501 /*
502 * =========================================================================
503 * vnode operations
504 * =========================================================================
505 */
506 /*
507 * Note: for the xxxat() versions of these functions, we assume that the
508 * starting vp is always rootdir (which is true for spa_directory.c, the only
509 * ZFS consumer of these interfaces). We assert this is true, and then emulate
510 * them by adding '/' in front of the path.
511 */
512
513 /*ARGSUSED*/
514 int
515 vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3)
516 {
517 int fd;
518 vnode_t *vp;
519 int old_umask;
520 char *realpath;
521 struct stat64 st;
522 int err;
523
524 realpath = umem_alloc(MAXPATHLEN, UMEM_NOFAIL);
525
526 /*
527 * If we're accessing a real disk from userland, we need to use
528 * the character interface to avoid caching. This is particularly
529 * important if we're trying to look at a real in-kernel storage
530 * pool from userland, e.g. via zdb, because otherwise we won't
531 * see the changes occurring under the segmap cache.
532 * On the other hand, the stupid character device returns zero
533 * for its size. So -- gag -- we open the block device to get
534 * its size, and remember it for subsequent VOP_GETATTR().
535 */
536 if (strncmp(path, "/dev/", 5) == 0) {
537 char *dsk;
538 fd = open64(path, O_RDONLY);
539 if (fd == -1) {
540 err = errno;
541 free(realpath);
542 return (err);
543 }
544 if (fstat64(fd, &st) == -1) {
545 err = errno;
546 close(fd);
547 free(realpath);
548 return (err);
549 }
550 close(fd);
551 (void) sprintf(realpath, "%s", path);
552 dsk = strstr(path, "/dsk/");
553 if (dsk != NULL)
554 (void) sprintf(realpath + (dsk - path) + 1, "r%s",
555 dsk + 1);
556 } else {
557 (void) sprintf(realpath, "%s", path);
558 if (!(flags & FCREAT) && stat64(realpath, &st) == -1) {
559 err = errno;
560 free(realpath);
561 return (err);
562 }
563 }
564
565 if (flags & FCREAT)
566 old_umask = umask(0);
567
568 /*
569 * The construct 'flags - FREAD' conveniently maps combinations of
570 * FREAD and FWRITE to the corresponding O_RDONLY, O_WRONLY, and O_RDWR.
571 */
572 fd = open64(realpath, flags - FREAD, mode);
573 free(realpath);
574
575 if (flags & FCREAT)
576 (void) umask(old_umask);
577
578 if (fd == -1)
579 return (errno);
580
581 if (fstat64(fd, &st) == -1) {
582 err = errno;
583 close(fd);
584 return (err);
585 }
586
587 (void) fcntl(fd, F_SETFD, FD_CLOEXEC);
588
589 *vpp = vp = umem_zalloc(sizeof (vnode_t), UMEM_NOFAIL);
590
591 vp->v_fd = fd;
592 vp->v_size = st.st_size;
593 vp->v_path = spa_strdup(path);
594
595 return (0);
596 }
597
598 /*ARGSUSED*/
599 int
600 vn_openat(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2,
601 int x3, vnode_t *startvp, int fd)
602 {
603 char *realpath = umem_alloc(strlen(path) + 2, UMEM_NOFAIL);
604 int ret;
605
606 ASSERT(startvp == rootdir);
607 (void) sprintf(realpath, "/%s", path);
608
609 /* fd ignored for now, need if want to simulate nbmand support */
610 ret = vn_open(realpath, x1, flags, mode, vpp, x2, x3);
611
612 umem_free(realpath, strlen(path) + 2);
613
614 return (ret);
615 }
616
617 /*ARGSUSED*/
618 int
619 vn_rdwr(int uio, vnode_t *vp, void *addr, ssize_t len, offset_t offset,
620 int x1, int x2, rlim64_t x3, void *x4, ssize_t *residp)
621 {
622 ssize_t rc, done = 0, split;
623
624 if (uio == UIO_READ) {
625 rc = pread64(vp->v_fd, addr, len, offset);
626 } else {
627 /*
628 * To simulate partial disk writes, we split writes into two
629 * system calls so that the process can be killed in between.
630 */
631 split = (len > 0 ? rand() % len : 0);
632 rc = pwrite64(vp->v_fd, addr, split, offset);
633 if (rc != -1) {
634 done = rc;
635 rc = pwrite64(vp->v_fd, (char *)addr + split,
636 len - split, offset + split);
637 }
638 }
639
640 if (rc == -1)
641 return (errno);
642
643 done += rc;
644
645 if (residp)
646 *residp = len - done;
647 else if (done != len)
648 return (EIO);
649 return (0);
650 }
651
652 void
653 vn_close(vnode_t *vp)
654 {
655 close(vp->v_fd);
656 spa_strfree(vp->v_path);
657 umem_free(vp, sizeof (vnode_t));
658 }
659
660 /*
661 * At a minimum we need to update the size since vdev_reopen()
662 * will no longer call vn_openat().
663 */
664 int
665 fop_getattr(vnode_t *vp, vattr_t *vap)
666 {
667 struct stat64 st;
668
669 if (fstat64(vp->v_fd, &st) == -1) {
670 close(vp->v_fd);
671 return (errno);
672 }
673
674 vap->va_size = st.st_size;
675 return (0);
676 }
677
678 #ifdef ZFS_DEBUG
679
680 /*
681 * =========================================================================
682 * Figure out which debugging statements to print
683 * =========================================================================
684 */
685
686 static char *dprintf_string;
687 static int dprintf_print_all;
688
689 int
690 dprintf_find_string(const char *string)
691 {
692 char *tmp_str = dprintf_string;
693 int len = strlen(string);
694
695 /*
696 * Find out if this is a string we want to print.
697 * String format: file1.c,function_name1,file2.c,file3.c
698 */
699
700 while (tmp_str != NULL) {
701 if (strncmp(tmp_str, string, len) == 0 &&
702 (tmp_str[len] == ',' || tmp_str[len] == '\0'))
703 return (1);
704 tmp_str = strchr(tmp_str, ',');
705 if (tmp_str != NULL)
706 tmp_str++; /* Get rid of , */
707 }
708 return (0);
709 }
710
711 void
712 dprintf_setup(int *argc, char **argv)
713 {
714 int i, j;
715
716 /*
717 * Debugging can be specified two ways: by setting the
718 * environment variable ZFS_DEBUG, or by including a
719 * "debug=..." argument on the command line. The command
720 * line setting overrides the environment variable.
721 */
722
723 for (i = 1; i < *argc; i++) {
724 int len = strlen("debug=");
725 /* First look for a command line argument */
726 if (strncmp("debug=", argv[i], len) == 0) {
727 dprintf_string = argv[i] + len;
728 /* Remove from args */
729 for (j = i; j < *argc; j++)
730 argv[j] = argv[j+1];
731 argv[j] = NULL;
732 (*argc)--;
733 }
734 }
735
736 if (dprintf_string == NULL) {
737 /* Look for ZFS_DEBUG environment variable */
738 dprintf_string = getenv("ZFS_DEBUG");
739 }
740
741 /*
742 * Are we just turning on all debugging?
743 */
744 if (dprintf_find_string("on"))
745 dprintf_print_all = 1;
746 }
747
748 /*
749 * =========================================================================
750 * debug printfs
751 * =========================================================================
752 */
753 void
754 __dprintf(const char *file, const char *func, int line, const char *fmt, ...)
755 {
756 const char *newfile;
757 va_list adx;
758
759 /*
760 * Get rid of annoying "../common/" prefix to filename.
761 */
762 newfile = strrchr(file, '/');
763 if (newfile != NULL) {
764 newfile = newfile + 1; /* Get rid of leading / */
765 } else {
766 newfile = file;
767 }
768
769 if (dprintf_print_all ||
770 dprintf_find_string(newfile) ||
771 dprintf_find_string(func)) {
772 /* Print out just the function name if requested */
773 flockfile(stdout);
774 if (dprintf_find_string("pid"))
775 (void) printf("%d ", getpid());
776 if (dprintf_find_string("tid"))
777 (void) printf("%u ", (uint_t) pthread_self());
778 if (dprintf_find_string("cpu"))
779 (void) printf("%u ", getcpuid());
780 if (dprintf_find_string("time"))
781 (void) printf("%llu ", gethrtime());
782 if (dprintf_find_string("long"))
783 (void) printf("%s, line %d: ", newfile, line);
784 (void) printf("%s: ", func);
785 va_start(adx, fmt);
786 (void) vprintf(fmt, adx);
787 va_end(adx);
788 funlockfile(stdout);
789 }
790 }
791
792 #endif /* ZFS_DEBUG */
793
794 /*
795 * =========================================================================
796 * cmn_err() and panic()
797 * =========================================================================
798 */
799 static char ce_prefix[CE_IGNORE][10] = { "", "NOTICE: ", "WARNING: ", "" };
800 static char ce_suffix[CE_IGNORE][2] = { "", "\n", "\n", "" };
801
802 void
803 vpanic(const char *fmt, va_list adx)
804 {
805 (void) fprintf(stderr, "error: ");
806 (void) vfprintf(stderr, fmt, adx);
807 (void) fprintf(stderr, "\n");
808
809 abort(); /* think of it as a "user-level crash dump" */
810 }
811
812 void
813 panic(const char *fmt, ...)
814 {
815 va_list adx;
816
817 va_start(adx, fmt);
818 vpanic(fmt, adx);
819 va_end(adx);
820 }
821
822 void
823 vcmn_err(int ce, const char *fmt, va_list adx)
824 {
825 if (ce == CE_PANIC)
826 vpanic(fmt, adx);
827 if (ce != CE_NOTE) { /* suppress noise in userland stress testing */
828 (void) fprintf(stderr, "%s", ce_prefix[ce]);
829 (void) vfprintf(stderr, fmt, adx);
830 (void) fprintf(stderr, "%s", ce_suffix[ce]);
831 }
832 }
833
834 /*PRINTFLIKE2*/
835 void
836 cmn_err(int ce, const char *fmt, ...)
837 {
838 va_list adx;
839
840 va_start(adx, fmt);
841 vcmn_err(ce, fmt, adx);
842 va_end(adx);
843 }
844
845 /*
846 * =========================================================================
847 * kobj interfaces
848 * =========================================================================
849 */
850 struct _buf *
851 kobj_open_file(char *name)
852 {
853 struct _buf *file;
854 vnode_t *vp;
855
856 /* set vp as the _fd field of the file */
857 if (vn_openat(name, UIO_SYSSPACE, FREAD, 0, &vp, 0, 0, rootdir,
858 -1) != 0)
859 return ((void *)-1UL);
860
861 file = umem_zalloc(sizeof (struct _buf), UMEM_NOFAIL);
862 file->_fd = (intptr_t)vp;
863 return (file);
864 }
865
866 int
867 kobj_read_file(struct _buf *file, char *buf, unsigned size, unsigned off)
868 {
869 ssize_t resid;
870
871 vn_rdwr(UIO_READ, (vnode_t *)file->_fd, buf, size, (offset_t)off,
872 UIO_SYSSPACE, 0, 0, 0, &resid);
873
874 return (size - resid);
875 }
876
877 void
878 kobj_close_file(struct _buf *file)
879 {
880 vn_close((vnode_t *)file->_fd);
881 umem_free(file, sizeof (struct _buf));
882 }
883
884 int
885 kobj_get_filesize(struct _buf *file, uint64_t *size)
886 {
887 struct stat64 st;
888 vnode_t *vp = (vnode_t *)file->_fd;
889
890 if (fstat64(vp->v_fd, &st) == -1) {
891 vn_close(vp);
892 return (errno);
893 }
894 *size = st.st_size;
895 return (0);
896 }
897
898 /*
899 * =========================================================================
900 * misc routines
901 * =========================================================================
902 */
903
904 void
905 delay(clock_t ticks)
906 {
907 poll(0, 0, ticks * (1000 / hz));
908 }
909
910 /*
911 * Find highest one bit set.
912 * Returns bit number + 1 of highest bit that is set, otherwise returns 0.
913 * High order bit is 31 (or 63 in _LP64 kernel).
914 */
915 int
916 highbit(ulong_t i)
917 {
918 register int h = 1;
919
920 if (i == 0)
921 return (0);
922 #ifdef _LP64
923 if (i & 0xffffffff00000000ul) {
924 h += 32; i >>= 32;
925 }
926 #endif
927 if (i & 0xffff0000) {
928 h += 16; i >>= 16;
929 }
930 if (i & 0xff00) {
931 h += 8; i >>= 8;
932 }
933 if (i & 0xf0) {
934 h += 4; i >>= 4;
935 }
936 if (i & 0xc) {
937 h += 2; i >>= 2;
938 }
939 if (i & 0x2) {
940 h += 1;
941 }
942 return (h);
943 }
944
945 static int random_fd = -1, urandom_fd = -1;
946
947 static int
948 random_get_bytes_common(uint8_t *ptr, size_t len, int fd)
949 {
950 size_t resid = len;
951 ssize_t bytes;
952
953 ASSERT(fd != -1);
954
955 while (resid != 0) {
956 bytes = read(fd, ptr, resid);
957 ASSERT3S(bytes, >=, 0);
958 ptr += bytes;
959 resid -= bytes;
960 }
961
962 return (0);
963 }
964
965 int
966 random_get_bytes(uint8_t *ptr, size_t len)
967 {
968 return (random_get_bytes_common(ptr, len, random_fd));
969 }
970
971 int
972 random_get_pseudo_bytes(uint8_t *ptr, size_t len)
973 {
974 return (random_get_bytes_common(ptr, len, urandom_fd));
975 }
976
977 int
978 ddi_strtoul(const char *hw_serial, char **nptr, int base, unsigned long *result)
979 {
980 char *end;
981
982 *result = strtoul(hw_serial, &end, base);
983 if (*result == 0)
984 return (errno);
985 return (0);
986 }
987
988 int
989 ddi_strtoull(const char *str, char **nptr, int base, u_longlong_t *result)
990 {
991 char *end;
992
993 *result = strtoull(str, &end, base);
994 if (*result == 0)
995 return (errno);
996 return (0);
997 }
998
999 /*
1000 * =========================================================================
1001 * kernel emulation setup & teardown
1002 * =========================================================================
1003 */
1004 static int
1005 umem_out_of_memory(void)
1006 {
1007 char errmsg[] = "out of memory -- generating core dump\n";
1008
1009 (void) fprintf(stderr, "%s", errmsg);
1010 abort();
1011 return (0);
1012 }
1013
1014 void
1015 kernel_init(int mode)
1016 {
1017 umem_nofail_callback(umem_out_of_memory);
1018
1019 physmem = sysconf(_SC_PHYS_PAGES);
1020
1021 dprintf("physmem = %llu pages (%.2f GB)\n", physmem,
1022 (double)physmem * sysconf(_SC_PAGE_SIZE) / (1ULL << 30));
1023
1024 (void) snprintf(hw_serial, sizeof (hw_serial), "%ld",
1025 (mode & FWRITE) ? gethostid() : 0);
1026
1027 VERIFY((random_fd = open("/dev/random", O_RDONLY)) != -1);
1028 VERIFY((urandom_fd = open("/dev/urandom", O_RDONLY)) != -1);
1029
1030 thread_init();
1031 system_taskq_init();
1032
1033 spa_init(mode);
1034 }
1035
1036 void
1037 kernel_fini(void)
1038 {
1039 spa_fini();
1040
1041 system_taskq_fini();
1042 thread_fini();
1043
1044 close(random_fd);
1045 close(urandom_fd);
1046
1047 random_fd = -1;
1048 urandom_fd = -1;
1049 }
1050
1051 uid_t
1052 crgetuid(cred_t *cr)
1053 {
1054 return (0);
1055 }
1056
1057 gid_t
1058 crgetgid(cred_t *cr)
1059 {
1060 return (0);
1061 }
1062
1063 int
1064 crgetngroups(cred_t *cr)
1065 {
1066 return (0);
1067 }
1068
1069 gid_t *
1070 crgetgroups(cred_t *cr)
1071 {
1072 return (NULL);
1073 }
1074
1075 int
1076 zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
1077 {
1078 return (0);
1079 }
1080
1081 int
1082 zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
1083 {
1084 return (0);
1085 }
1086
1087 int
1088 zfs_secpolicy_destroy_perms(const char *name, cred_t *cr)
1089 {
1090 return (0);
1091 }
1092
1093 ksiddomain_t *
1094 ksid_lookupdomain(const char *dom)
1095 {
1096 ksiddomain_t *kd;
1097
1098 kd = umem_zalloc(sizeof (ksiddomain_t), UMEM_NOFAIL);
1099 kd->kd_name = spa_strdup(dom);
1100 return (kd);
1101 }
1102
1103 void
1104 ksiddomain_rele(ksiddomain_t *ksid)
1105 {
1106 spa_strfree(ksid->kd_name);
1107 umem_free(ksid, sizeof (ksiddomain_t));
1108 }
1109
1110 char *
1111 kmem_vasprintf(const char *fmt, va_list adx)
1112 {
1113 char *buf = NULL;
1114 va_list adx_copy;
1115
1116 va_copy(adx_copy, adx);
1117 VERIFY(vasprintf(&buf, fmt, adx_copy) != -1);
1118 va_end(adx_copy);
1119
1120 return (buf);
1121 }
1122
1123 char *
1124 kmem_asprintf(const char *fmt, ...)
1125 {
1126 char *buf = NULL;
1127 va_list adx;
1128
1129 va_start(adx, fmt);
1130 VERIFY(vasprintf(&buf, fmt, adx) != -1);
1131 va_end(adx);
1132
1133 return (buf);
1134 }
1135
1136 /* ARGSUSED */
1137 int
1138 zfs_onexit_fd_hold(int fd, minor_t *minorp)
1139 {
1140 *minorp = 0;
1141 return (0);
1142 }
1143
1144 /* ARGSUSED */
1145 void
1146 zfs_onexit_fd_rele(int fd)
1147 {
1148 }
1149
1150 /* ARGSUSED */
1151 int
1152 zfs_onexit_add_cb(minor_t minor, void (*func)(void *), void *data,
1153 uint64_t *action_handle)
1154 {
1155 return (0);
1156 }
1157
1158 /* ARGSUSED */
1159 int
1160 zfs_onexit_del_cb(minor_t minor, uint64_t action_handle, boolean_t fire)
1161 {
1162 return (0);
1163 }
1164
1165 /* ARGSUSED */
1166 int
1167 zfs_onexit_cb_data(minor_t minor, uint64_t action_handle, void **data)
1168 {
1169 return (0);
1170 }