]> git.proxmox.com Git - mirror_zfs.git/blob - lib/libzpool/kernel.c
Add linux user disk support
[mirror_zfs.git] / lib / libzpool / kernel.c
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25 #include <assert.h>
26 #include <fcntl.h>
27 #include <poll.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <zlib.h>
32 #include <sys/signal.h>
33 #include <sys/spa.h>
34 #include <sys/stat.h>
35 #include <sys/processor.h>
36 #include <sys/zfs_context.h>
37 #include <sys/utsname.h>
38 #include <sys/time.h>
39 #include <sys/mount.h> /* for BLKGETSIZE64 */
40 #include <sys/systeminfo.h>
41
42 /*
43 * Emulation of kernel services in userland.
44 */
45
46 int aok;
47 uint64_t physmem;
48 vnode_t *rootdir = (vnode_t *)0xabcd1234;
49 char hw_serial[HW_HOSTID_LEN];
50
51 struct utsname utsname = {
52 "userland", "libzpool", "1", "1", "na"
53 };
54
55 /* this only exists to have its address taken */
56 struct proc p0;
57
58 /*
59 * =========================================================================
60 * threads
61 * =========================================================================
62 */
63
64 pthread_cond_t kthread_cond = PTHREAD_COND_INITIALIZER;
65 pthread_mutex_t kthread_lock = PTHREAD_MUTEX_INITIALIZER;
66 pthread_key_t kthread_key;
67 int kthread_nr = 0;
68
69 static void
70 thread_init(void)
71 {
72 kthread_t *kt;
73
74 VERIFY3S(pthread_key_create(&kthread_key, NULL), ==, 0);
75
76 /* Create entry for primary kthread */
77 kt = umem_zalloc(sizeof(kthread_t), UMEM_NOFAIL);
78 kt->t_tid = pthread_self();
79 kt->t_func = NULL;
80
81 VERIFY3S(pthread_setspecific(kthread_key, kt), ==, 0);
82
83 /* Only the main thread should be running at the moment */
84 ASSERT3S(kthread_nr, ==, 0);
85 kthread_nr = 1;
86 }
87
88 static void
89 thread_fini(void)
90 {
91 kthread_t *kt = curthread;
92
93 ASSERT(pthread_equal(kt->t_tid, pthread_self()));
94 ASSERT3P(kt->t_func, ==, NULL);
95
96 umem_free(kt, sizeof(kthread_t));
97
98 /* Wait for all threads to exit via thread_exit() */
99 VERIFY3S(pthread_mutex_lock(&kthread_lock), ==, 0);
100
101 kthread_nr--; /* Main thread is exiting */
102
103 while (kthread_nr > 0)
104 VERIFY3S(pthread_cond_wait(&kthread_cond, &kthread_lock), ==,
105 0);
106
107 ASSERT3S(kthread_nr, ==, 0);
108 VERIFY3S(pthread_mutex_unlock(&kthread_lock), ==, 0);
109
110 VERIFY3S(pthread_key_delete(kthread_key), ==, 0);
111 }
112
113 kthread_t *
114 zk_thread_current(void)
115 {
116 kthread_t *kt = pthread_getspecific(kthread_key);
117
118 ASSERT3P(kt, !=, NULL);
119
120 return kt;
121 }
122
123 void *
124 zk_thread_helper(void *arg)
125 {
126 kthread_t *kt = (kthread_t *) arg;
127
128 VERIFY3S(pthread_setspecific(kthread_key, kt), ==, 0);
129
130 VERIFY3S(pthread_mutex_lock(&kthread_lock), ==, 0);
131 kthread_nr++;
132 VERIFY3S(pthread_mutex_unlock(&kthread_lock), ==, 0);
133
134 kt->t_tid = pthread_self();
135 ((thread_func_arg_t) kt->t_func)(kt->t_arg);
136
137 /* Unreachable, thread must exit with thread_exit() */
138 abort();
139
140 return NULL;
141 }
142
143 kthread_t *
144 zk_thread_create(caddr_t stk, size_t stksize, thread_func_t func, void *arg,
145 size_t len, proc_t *pp, int state, pri_t pri)
146 {
147 kthread_t *kt;
148 pthread_attr_t attr;
149 size_t stack;
150
151 ASSERT3S(state & ~TS_RUN, ==, 0);
152
153 kt = umem_zalloc(sizeof(kthread_t), UMEM_NOFAIL);
154 kt->t_func = func;
155 kt->t_arg = arg;
156
157 /*
158 * The Solaris kernel stack size is 24k for x86/x86_64.
159 * The Linux kernel stack size is 8k for x86/x86_64.
160 *
161 * We reduce the default stack size in userspace, to ensure
162 * we observe stack overruns in user space as well as in
163 * kernel space. PTHREAD_STACK_MIN is the minimum stack
164 * required for a NULL procedure in user space and is added
165 * in to the stack requirements.
166 *
167 * Some buggy NPTL threading implementations include the
168 * guard area within the stack size allocations. In
169 * this case we allocate an extra page to account for the
170 * guard area since we only have two pages of usable stack
171 * on Linux.
172 */
173
174 stack = PTHREAD_STACK_MIN + MAX(stksize, STACK_SIZE) +
175 EXTRA_GUARD_BYTES;
176
177 VERIFY3S(pthread_attr_init(&attr), ==, 0);
178 VERIFY3S(pthread_attr_setstacksize(&attr, stack), ==, 0);
179 VERIFY3S(pthread_attr_setguardsize(&attr, PAGESIZE), ==, 0);
180
181 VERIFY3S(pthread_create(&kt->t_tid, &attr, &zk_thread_helper, kt),
182 ==, 0);
183
184 VERIFY3S(pthread_attr_destroy(&attr), ==, 0);
185
186 return kt;
187 }
188
189 void
190 zk_thread_exit(void)
191 {
192 kthread_t *kt = curthread;
193
194 ASSERT(pthread_equal(kt->t_tid, pthread_self()));
195
196 umem_free(kt, sizeof(kthread_t));
197
198 pthread_mutex_lock(&kthread_lock);
199 kthread_nr--;
200 pthread_mutex_unlock(&kthread_lock);
201
202 pthread_cond_broadcast(&kthread_cond);
203 pthread_exit((void *)TS_MAGIC);
204 }
205
206 void
207 zk_thread_join(kt_did_t tid)
208 {
209 void *ret;
210
211 pthread_join((pthread_t)tid, &ret);
212 VERIFY3P(ret, ==, (void *)TS_MAGIC);
213 }
214
215 /*
216 * =========================================================================
217 * kstats
218 * =========================================================================
219 */
220 /*ARGSUSED*/
221 kstat_t *
222 kstat_create(char *module, int instance, char *name, char *class,
223 uchar_t type, ulong_t ndata, uchar_t ks_flag)
224 {
225 return (NULL);
226 }
227
228 /*ARGSUSED*/
229 void
230 kstat_install(kstat_t *ksp)
231 {}
232
233 /*ARGSUSED*/
234 void
235 kstat_delete(kstat_t *ksp)
236 {}
237
238 /*
239 * =========================================================================
240 * mutexes
241 * =========================================================================
242 */
243
244 void
245 mutex_init(kmutex_t *mp, char *name, int type, void *cookie)
246 {
247 ASSERT3S(type, ==, MUTEX_DEFAULT);
248 ASSERT3P(cookie, ==, NULL);
249 mp->m_owner = MTX_INIT;
250 mp->m_magic = MTX_MAGIC;
251 VERIFY3S(pthread_mutex_init(&mp->m_lock, NULL), ==, 0);
252 }
253
254 void
255 mutex_destroy(kmutex_t *mp)
256 {
257 ASSERT3U(mp->m_magic, ==, MTX_MAGIC);
258 ASSERT3P(mp->m_owner, ==, MTX_INIT);
259 VERIFY3S(pthread_mutex_destroy(&(mp)->m_lock), ==, 0);
260 mp->m_owner = MTX_DEST;
261 mp->m_magic = 0;
262 }
263
264 void
265 mutex_enter(kmutex_t *mp)
266 {
267 ASSERT3U(mp->m_magic, ==, MTX_MAGIC);
268 ASSERT3P(mp->m_owner, !=, MTX_DEST);
269 ASSERT3P(mp->m_owner, !=, curthread);
270 VERIFY3S(pthread_mutex_lock(&mp->m_lock), ==, 0);
271 ASSERT3P(mp->m_owner, ==, MTX_INIT);
272 mp->m_owner = curthread;
273 }
274
275 int
276 mutex_tryenter(kmutex_t *mp)
277 {
278 ASSERT3U(mp->m_magic, ==, MTX_MAGIC);
279 ASSERT3P(mp->m_owner, !=, MTX_DEST);
280 if (0 == pthread_mutex_trylock(&mp->m_lock)) {
281 ASSERT3P(mp->m_owner, ==, MTX_INIT);
282 mp->m_owner = curthread;
283 return (1);
284 } else {
285 return (0);
286 }
287 }
288
289 void
290 mutex_exit(kmutex_t *mp)
291 {
292 ASSERT3U(mp->m_magic, ==, MTX_MAGIC);
293 ASSERT3P(mutex_owner(mp), ==, curthread);
294 mp->m_owner = MTX_INIT;
295 VERIFY3S(pthread_mutex_unlock(&mp->m_lock), ==, 0);
296 }
297
298 void *
299 mutex_owner(kmutex_t *mp)
300 {
301 ASSERT3U(mp->m_magic, ==, MTX_MAGIC);
302 return (mp->m_owner);
303 }
304
305 int
306 mutex_held(kmutex_t *mp)
307 {
308 return (mp->m_owner == curthread);
309 }
310
311 /*
312 * =========================================================================
313 * rwlocks
314 * =========================================================================
315 */
316
317 void
318 rw_init(krwlock_t *rwlp, char *name, int type, void *arg)
319 {
320 ASSERT3S(type, ==, RW_DEFAULT);
321 ASSERT3P(arg, ==, NULL);
322 VERIFY3S(pthread_rwlock_init(&rwlp->rw_lock, NULL), ==, 0);
323 rwlp->rw_owner = RW_INIT;
324 rwlp->rw_wr_owner = RW_INIT;
325 rwlp->rw_readers = 0;
326 rwlp->rw_magic = RW_MAGIC;
327 }
328
329 void
330 rw_destroy(krwlock_t *rwlp)
331 {
332 ASSERT3U(rwlp->rw_magic, ==, RW_MAGIC);
333
334 VERIFY3S(pthread_rwlock_destroy(&rwlp->rw_lock), ==, 0);
335 rwlp->rw_magic = 0;
336 }
337
338 void
339 rw_enter(krwlock_t *rwlp, krw_t rw)
340 {
341 ASSERT3U(rwlp->rw_magic, ==, RW_MAGIC);
342 ASSERT3P(rwlp->rw_owner, !=, curthread);
343 ASSERT3P(rwlp->rw_wr_owner, !=, curthread);
344
345 if (rw == RW_READER) {
346 VERIFY3S(pthread_rwlock_rdlock(&rwlp->rw_lock), ==, 0);
347 ASSERT3P(rwlp->rw_wr_owner, ==, RW_INIT);
348
349 atomic_inc_uint(&rwlp->rw_readers);
350 } else {
351 VERIFY3S(pthread_rwlock_wrlock(&rwlp->rw_lock), ==, 0);
352 ASSERT3P(rwlp->rw_wr_owner, ==, RW_INIT);
353 ASSERT3U(rwlp->rw_readers, ==, 0);
354
355 rwlp->rw_wr_owner = curthread;
356 }
357
358 rwlp->rw_owner = curthread;
359 }
360
361 void
362 rw_exit(krwlock_t *rwlp)
363 {
364 ASSERT3U(rwlp->rw_magic, ==, RW_MAGIC);
365 ASSERT(RW_LOCK_HELD(rwlp));
366
367 if (RW_READ_HELD(rwlp))
368 atomic_dec_uint(&rwlp->rw_readers);
369 else
370 rwlp->rw_wr_owner = RW_INIT;
371
372 rwlp->rw_owner = RW_INIT;
373 VERIFY3S(pthread_rwlock_unlock(&rwlp->rw_lock), ==, 0);
374 }
375
376 int
377 rw_tryenter(krwlock_t *rwlp, krw_t rw)
378 {
379 int rv;
380
381 ASSERT3U(rwlp->rw_magic, ==, RW_MAGIC);
382
383 if (rw == RW_READER)
384 rv = pthread_rwlock_tryrdlock(&rwlp->rw_lock);
385 else
386 rv = pthread_rwlock_trywrlock(&rwlp->rw_lock);
387
388 if (rv == 0) {
389 ASSERT3P(rwlp->rw_wr_owner, ==, RW_INIT);
390
391 if (rw == RW_READER)
392 atomic_inc_uint(&rwlp->rw_readers);
393 else {
394 ASSERT3U(rwlp->rw_readers, ==, 0);
395 rwlp->rw_wr_owner = curthread;
396 }
397
398 rwlp->rw_owner = curthread;
399 return (1);
400 }
401
402 VERIFY3S(rv, ==, EBUSY);
403
404 return (0);
405 }
406
407 int
408 rw_tryupgrade(krwlock_t *rwlp)
409 {
410 ASSERT3U(rwlp->rw_magic, ==, RW_MAGIC);
411
412 return (0);
413 }
414
415 /*
416 * =========================================================================
417 * condition variables
418 * =========================================================================
419 */
420
421 void
422 cv_init(kcondvar_t *cv, char *name, int type, void *arg)
423 {
424 ASSERT3S(type, ==, CV_DEFAULT);
425 cv->cv_magic = CV_MAGIC;
426 VERIFY3S(pthread_cond_init(&cv->cv, NULL), ==, 0);
427 }
428
429 void
430 cv_destroy(kcondvar_t *cv)
431 {
432 ASSERT3U(cv->cv_magic, ==, CV_MAGIC);
433 VERIFY3S(pthread_cond_destroy(&cv->cv), ==, 0);
434 cv->cv_magic = 0;
435 }
436
437 void
438 cv_wait(kcondvar_t *cv, kmutex_t *mp)
439 {
440 ASSERT3U(cv->cv_magic, ==, CV_MAGIC);
441 ASSERT3P(mutex_owner(mp), ==, curthread);
442 mp->m_owner = MTX_INIT;
443 int ret = pthread_cond_wait(&cv->cv, &mp->m_lock);
444 if (ret != 0)
445 VERIFY3S(ret, ==, EINTR);
446 mp->m_owner = curthread;
447 }
448
449 clock_t
450 cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime)
451 {
452 int error;
453 struct timeval tv;
454 timestruc_t ts;
455 clock_t delta;
456
457 ASSERT3U(cv->cv_magic, ==, CV_MAGIC);
458
459 top:
460 delta = abstime - ddi_get_lbolt();
461 if (delta <= 0)
462 return (-1);
463
464 VERIFY(gettimeofday(&tv, NULL) == 0);
465
466 ts.tv_sec = tv.tv_sec + delta / hz;
467 ts.tv_nsec = tv.tv_usec * 1000 + (delta % hz) * (NANOSEC / hz);
468 if (ts.tv_nsec >= NANOSEC) {
469 ts.tv_sec++;
470 ts.tv_nsec -= NANOSEC;
471 }
472
473 ASSERT3P(mutex_owner(mp), ==, curthread);
474 mp->m_owner = MTX_INIT;
475 error = pthread_cond_timedwait(&cv->cv, &mp->m_lock, &ts);
476 mp->m_owner = curthread;
477
478 if (error == ETIMEDOUT)
479 return (-1);
480
481 if (error == EINTR)
482 goto top;
483
484 VERIFY3S(error, ==, 0);
485
486 return (1);
487 }
488
489 void
490 cv_signal(kcondvar_t *cv)
491 {
492 ASSERT3U(cv->cv_magic, ==, CV_MAGIC);
493 VERIFY3S(pthread_cond_signal(&cv->cv), ==, 0);
494 }
495
496 void
497 cv_broadcast(kcondvar_t *cv)
498 {
499 ASSERT3U(cv->cv_magic, ==, CV_MAGIC);
500 VERIFY3S(pthread_cond_broadcast(&cv->cv), ==, 0);
501 }
502
503 /*
504 * =========================================================================
505 * vnode operations
506 * =========================================================================
507 */
508 /*
509 * Note: for the xxxat() versions of these functions, we assume that the
510 * starting vp is always rootdir (which is true for spa_directory.c, the only
511 * ZFS consumer of these interfaces). We assert this is true, and then emulate
512 * them by adding '/' in front of the path.
513 */
514
515 /*ARGSUSED*/
516 int
517 vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3)
518 {
519 int fd;
520 vnode_t *vp;
521 int old_umask;
522 char *realpath;
523 struct stat64 st;
524 int err;
525
526 realpath = umem_alloc(MAXPATHLEN, UMEM_NOFAIL);
527
528 /*
529 * If we're accessing a real disk from userland, we need to use
530 * the character interface to avoid caching. This is particularly
531 * important if we're trying to look at a real in-kernel storage
532 * pool from userland, e.g. via zdb, because otherwise we won't
533 * see the changes occurring under the segmap cache.
534 * On the other hand, the stupid character device returns zero
535 * for its size. So -- gag -- we open the block device to get
536 * its size, and remember it for subsequent VOP_GETATTR().
537 */
538 #if defined(__sun__) || defined(__sun)
539 if (strncmp(path, "/dev/", 5) == 0) {
540 #else
541 if (0) {
542 #endif
543 char *dsk;
544 fd = open64(path, O_RDONLY);
545 if (fd == -1) {
546 err = errno;
547 free(realpath);
548 return (err);
549 }
550 if (fstat64(fd, &st) == -1) {
551 err = errno;
552 close(fd);
553 free(realpath);
554 return (err);
555 }
556 close(fd);
557 (void) sprintf(realpath, "%s", path);
558 dsk = strstr(path, "/dsk/");
559 if (dsk != NULL)
560 (void) sprintf(realpath + (dsk - path) + 1, "r%s",
561 dsk + 1);
562 } else {
563 (void) sprintf(realpath, "%s", path);
564 if (!(flags & FCREAT) && stat64(realpath, &st) == -1) {
565 err = errno;
566 free(realpath);
567 return (err);
568 }
569 }
570
571 if (!(flags & FCREAT) && S_ISBLK(st.st_mode)) {
572 #ifdef __linux__
573 flags |= O_DIRECT;
574 #endif
575 /* We shouldn't be writing to block devices in userspace */
576 VERIFY(!(flags & FWRITE));
577 }
578
579 if (flags & FCREAT)
580 old_umask = umask(0);
581
582 /*
583 * The construct 'flags - FREAD' conveniently maps combinations of
584 * FREAD and FWRITE to the corresponding O_RDONLY, O_WRONLY, and O_RDWR.
585 */
586 fd = open64(realpath, flags - FREAD, mode);
587 free(realpath);
588
589 if (flags & FCREAT)
590 (void) umask(old_umask);
591
592 if (fd == -1)
593 return (errno);
594
595 if (fstat64(fd, &st) == -1) {
596 err = errno;
597 close(fd);
598 return (err);
599 }
600
601 #ifdef __linux__
602 /* In Linux, use an ioctl to get the size of a block device. */
603 if (S_ISBLK(st.st_mode)) {
604 if (ioctl(fd, BLKGETSIZE64, &st.st_size) != 0) {
605 err = errno;
606 close(fd);
607 return (err);
608 }
609 }
610 #endif
611 (void) fcntl(fd, F_SETFD, FD_CLOEXEC);
612
613 *vpp = vp = umem_zalloc(sizeof (vnode_t), UMEM_NOFAIL);
614
615 vp->v_fd = fd;
616 vp->v_size = st.st_size;
617 vp->v_path = spa_strdup(path);
618
619 return (0);
620 }
621
622 /*ARGSUSED*/
623 int
624 vn_openat(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2,
625 int x3, vnode_t *startvp, int fd)
626 {
627 char *realpath = umem_alloc(strlen(path) + 2, UMEM_NOFAIL);
628 int ret;
629
630 ASSERT(startvp == rootdir);
631 (void) sprintf(realpath, "/%s", path);
632
633 /* fd ignored for now, need if want to simulate nbmand support */
634 ret = vn_open(realpath, x1, flags, mode, vpp, x2, x3);
635
636 umem_free(realpath, strlen(path) + 2);
637
638 return (ret);
639 }
640
641 /*ARGSUSED*/
642 int
643 vn_rdwr(int uio, vnode_t *vp, void *addr, ssize_t len, offset_t offset,
644 int x1, int x2, rlim64_t x3, void *x4, ssize_t *residp)
645 {
646 ssize_t rc, done = 0, split;
647
648 if (uio == UIO_READ) {
649 rc = pread64(vp->v_fd, addr, len, offset);
650 } else {
651 /*
652 * To simulate partial disk writes, we split writes into two
653 * system calls so that the process can be killed in between.
654 */
655 split = (len > 0 ? rand() % len : 0);
656 rc = pwrite64(vp->v_fd, addr, split, offset);
657 if (rc != -1) {
658 done = rc;
659 rc = pwrite64(vp->v_fd, (char *)addr + split,
660 len - split, offset + split);
661 }
662 }
663
664 #ifdef __linux__
665 if (rc == -1 && errno == EINVAL) {
666 /*
667 * Under Linux, this most likely means an alignment issue
668 * (memory or disk) due to O_DIRECT, so we abort() in order to
669 * catch the offender.
670 */
671 abort();
672 }
673 #endif
674 if (rc == -1)
675 return (errno);
676
677 done += rc;
678
679 if (residp)
680 *residp = len - done;
681 else if (done != len)
682 return (EIO);
683 return (0);
684 }
685
686 void
687 vn_close(vnode_t *vp)
688 {
689 close(vp->v_fd);
690 spa_strfree(vp->v_path);
691 umem_free(vp, sizeof (vnode_t));
692 }
693
694 /*
695 * At a minimum we need to update the size since vdev_reopen()
696 * will no longer call vn_openat().
697 */
698 int
699 fop_getattr(vnode_t *vp, vattr_t *vap)
700 {
701 struct stat64 st;
702
703 if (fstat64(vp->v_fd, &st) == -1) {
704 close(vp->v_fd);
705 return (errno);
706 }
707
708 vap->va_size = st.st_size;
709 return (0);
710 }
711
712 #ifdef ZFS_DEBUG
713
714 /*
715 * =========================================================================
716 * Figure out which debugging statements to print
717 * =========================================================================
718 */
719
720 static char *dprintf_string;
721 static int dprintf_print_all;
722
723 int
724 dprintf_find_string(const char *string)
725 {
726 char *tmp_str = dprintf_string;
727 int len = strlen(string);
728
729 /*
730 * Find out if this is a string we want to print.
731 * String format: file1.c,function_name1,file2.c,file3.c
732 */
733
734 while (tmp_str != NULL) {
735 if (strncmp(tmp_str, string, len) == 0 &&
736 (tmp_str[len] == ',' || tmp_str[len] == '\0'))
737 return (1);
738 tmp_str = strchr(tmp_str, ',');
739 if (tmp_str != NULL)
740 tmp_str++; /* Get rid of , */
741 }
742 return (0);
743 }
744
745 void
746 dprintf_setup(int *argc, char **argv)
747 {
748 int i, j;
749
750 /*
751 * Debugging can be specified two ways: by setting the
752 * environment variable ZFS_DEBUG, or by including a
753 * "debug=..." argument on the command line. The command
754 * line setting overrides the environment variable.
755 */
756
757 for (i = 1; i < *argc; i++) {
758 int len = strlen("debug=");
759 /* First look for a command line argument */
760 if (strncmp("debug=", argv[i], len) == 0) {
761 dprintf_string = argv[i] + len;
762 /* Remove from args */
763 for (j = i; j < *argc; j++)
764 argv[j] = argv[j+1];
765 argv[j] = NULL;
766 (*argc)--;
767 }
768 }
769
770 if (dprintf_string == NULL) {
771 /* Look for ZFS_DEBUG environment variable */
772 dprintf_string = getenv("ZFS_DEBUG");
773 }
774
775 /*
776 * Are we just turning on all debugging?
777 */
778 if (dprintf_find_string("on"))
779 dprintf_print_all = 1;
780 }
781
782 /*
783 * =========================================================================
784 * debug printfs
785 * =========================================================================
786 */
787 void
788 __dprintf(const char *file, const char *func, int line, const char *fmt, ...)
789 {
790 const char *newfile;
791 va_list adx;
792
793 /*
794 * Get rid of annoying "../common/" prefix to filename.
795 */
796 newfile = strrchr(file, '/');
797 if (newfile != NULL) {
798 newfile = newfile + 1; /* Get rid of leading / */
799 } else {
800 newfile = file;
801 }
802
803 if (dprintf_print_all ||
804 dprintf_find_string(newfile) ||
805 dprintf_find_string(func)) {
806 /* Print out just the function name if requested */
807 flockfile(stdout);
808 if (dprintf_find_string("pid"))
809 (void) printf("%d ", getpid());
810 if (dprintf_find_string("tid"))
811 (void) printf("%u ", (uint_t) pthread_self());
812 if (dprintf_find_string("cpu"))
813 (void) printf("%u ", getcpuid());
814 if (dprintf_find_string("time"))
815 (void) printf("%llu ", gethrtime());
816 if (dprintf_find_string("long"))
817 (void) printf("%s, line %d: ", newfile, line);
818 (void) printf("%s: ", func);
819 va_start(adx, fmt);
820 (void) vprintf(fmt, adx);
821 va_end(adx);
822 funlockfile(stdout);
823 }
824 }
825
826 #endif /* ZFS_DEBUG */
827
828 /*
829 * =========================================================================
830 * cmn_err() and panic()
831 * =========================================================================
832 */
833 static char ce_prefix[CE_IGNORE][10] = { "", "NOTICE: ", "WARNING: ", "" };
834 static char ce_suffix[CE_IGNORE][2] = { "", "\n", "\n", "" };
835
836 void
837 vpanic(const char *fmt, va_list adx)
838 {
839 (void) fprintf(stderr, "error: ");
840 (void) vfprintf(stderr, fmt, adx);
841 (void) fprintf(stderr, "\n");
842
843 abort(); /* think of it as a "user-level crash dump" */
844 }
845
846 void
847 panic(const char *fmt, ...)
848 {
849 va_list adx;
850
851 va_start(adx, fmt);
852 vpanic(fmt, adx);
853 va_end(adx);
854 }
855
856 void
857 vcmn_err(int ce, const char *fmt, va_list adx)
858 {
859 if (ce == CE_PANIC)
860 vpanic(fmt, adx);
861 if (ce != CE_NOTE) { /* suppress noise in userland stress testing */
862 (void) fprintf(stderr, "%s", ce_prefix[ce]);
863 (void) vfprintf(stderr, fmt, adx);
864 (void) fprintf(stderr, "%s", ce_suffix[ce]);
865 }
866 }
867
868 /*PRINTFLIKE2*/
869 void
870 cmn_err(int ce, const char *fmt, ...)
871 {
872 va_list adx;
873
874 va_start(adx, fmt);
875 vcmn_err(ce, fmt, adx);
876 va_end(adx);
877 }
878
879 /*
880 * =========================================================================
881 * kobj interfaces
882 * =========================================================================
883 */
884 struct _buf *
885 kobj_open_file(char *name)
886 {
887 struct _buf *file;
888 vnode_t *vp;
889
890 /* set vp as the _fd field of the file */
891 if (vn_openat(name, UIO_SYSSPACE, FREAD, 0, &vp, 0, 0, rootdir,
892 -1) != 0)
893 return ((void *)-1UL);
894
895 file = umem_zalloc(sizeof (struct _buf), UMEM_NOFAIL);
896 file->_fd = (intptr_t)vp;
897 return (file);
898 }
899
900 int
901 kobj_read_file(struct _buf *file, char *buf, unsigned size, unsigned off)
902 {
903 ssize_t resid;
904
905 vn_rdwr(UIO_READ, (vnode_t *)file->_fd, buf, size, (offset_t)off,
906 UIO_SYSSPACE, 0, 0, 0, &resid);
907
908 return (size - resid);
909 }
910
911 void
912 kobj_close_file(struct _buf *file)
913 {
914 vn_close((vnode_t *)file->_fd);
915 umem_free(file, sizeof (struct _buf));
916 }
917
918 int
919 kobj_get_filesize(struct _buf *file, uint64_t *size)
920 {
921 struct stat64 st;
922 vnode_t *vp = (vnode_t *)file->_fd;
923
924 if (fstat64(vp->v_fd, &st) == -1) {
925 vn_close(vp);
926 return (errno);
927 }
928 *size = st.st_size;
929 return (0);
930 }
931
932 /*
933 * =========================================================================
934 * misc routines
935 * =========================================================================
936 */
937
938 void
939 delay(clock_t ticks)
940 {
941 poll(0, 0, ticks * (1000 / hz));
942 }
943
944 /*
945 * Find highest one bit set.
946 * Returns bit number + 1 of highest bit that is set, otherwise returns 0.
947 * High order bit is 31 (or 63 in _LP64 kernel).
948 */
949 int
950 highbit(ulong_t i)
951 {
952 register int h = 1;
953
954 if (i == 0)
955 return (0);
956 #ifdef _LP64
957 if (i & 0xffffffff00000000ul) {
958 h += 32; i >>= 32;
959 }
960 #endif
961 if (i & 0xffff0000) {
962 h += 16; i >>= 16;
963 }
964 if (i & 0xff00) {
965 h += 8; i >>= 8;
966 }
967 if (i & 0xf0) {
968 h += 4; i >>= 4;
969 }
970 if (i & 0xc) {
971 h += 2; i >>= 2;
972 }
973 if (i & 0x2) {
974 h += 1;
975 }
976 return (h);
977 }
978
979 static int random_fd = -1, urandom_fd = -1;
980
981 static int
982 random_get_bytes_common(uint8_t *ptr, size_t len, int fd)
983 {
984 size_t resid = len;
985 ssize_t bytes;
986
987 ASSERT(fd != -1);
988
989 while (resid != 0) {
990 bytes = read(fd, ptr, resid);
991 ASSERT3S(bytes, >=, 0);
992 ptr += bytes;
993 resid -= bytes;
994 }
995
996 return (0);
997 }
998
999 int
1000 random_get_bytes(uint8_t *ptr, size_t len)
1001 {
1002 return (random_get_bytes_common(ptr, len, random_fd));
1003 }
1004
1005 int
1006 random_get_pseudo_bytes(uint8_t *ptr, size_t len)
1007 {
1008 return (random_get_bytes_common(ptr, len, urandom_fd));
1009 }
1010
1011 int
1012 ddi_strtoul(const char *hw_serial, char **nptr, int base, unsigned long *result)
1013 {
1014 char *end;
1015
1016 *result = strtoul(hw_serial, &end, base);
1017 if (*result == 0)
1018 return (errno);
1019 return (0);
1020 }
1021
1022 int
1023 ddi_strtoull(const char *str, char **nptr, int base, u_longlong_t *result)
1024 {
1025 char *end;
1026
1027 *result = strtoull(str, &end, base);
1028 if (*result == 0)
1029 return (errno);
1030 return (0);
1031 }
1032
1033 /*
1034 * =========================================================================
1035 * kernel emulation setup & teardown
1036 * =========================================================================
1037 */
1038 static int
1039 umem_out_of_memory(void)
1040 {
1041 char errmsg[] = "out of memory -- generating core dump\n";
1042
1043 (void) fprintf(stderr, "%s", errmsg);
1044 abort();
1045 return (0);
1046 }
1047
1048 void
1049 kernel_init(int mode)
1050 {
1051 umem_nofail_callback(umem_out_of_memory);
1052
1053 physmem = sysconf(_SC_PHYS_PAGES);
1054
1055 dprintf("physmem = %llu pages (%.2f GB)\n", physmem,
1056 (double)physmem * sysconf(_SC_PAGE_SIZE) / (1ULL << 30));
1057
1058 (void) snprintf(hw_serial, sizeof (hw_serial), "%ld",
1059 (mode & FWRITE) ? gethostid() : 0);
1060
1061 VERIFY((random_fd = open("/dev/random", O_RDONLY)) != -1);
1062 VERIFY((urandom_fd = open("/dev/urandom", O_RDONLY)) != -1);
1063
1064 thread_init();
1065 system_taskq_init();
1066
1067 spa_init(mode);
1068 }
1069
1070 void
1071 kernel_fini(void)
1072 {
1073 spa_fini();
1074
1075 system_taskq_fini();
1076 thread_fini();
1077
1078 close(random_fd);
1079 close(urandom_fd);
1080
1081 random_fd = -1;
1082 urandom_fd = -1;
1083 }
1084
1085 uid_t
1086 crgetuid(cred_t *cr)
1087 {
1088 return (0);
1089 }
1090
1091 gid_t
1092 crgetgid(cred_t *cr)
1093 {
1094 return (0);
1095 }
1096
1097 int
1098 crgetngroups(cred_t *cr)
1099 {
1100 return (0);
1101 }
1102
1103 gid_t *
1104 crgetgroups(cred_t *cr)
1105 {
1106 return (NULL);
1107 }
1108
1109 int
1110 zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
1111 {
1112 return (0);
1113 }
1114
1115 int
1116 zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
1117 {
1118 return (0);
1119 }
1120
1121 int
1122 zfs_secpolicy_destroy_perms(const char *name, cred_t *cr)
1123 {
1124 return (0);
1125 }
1126
1127 ksiddomain_t *
1128 ksid_lookupdomain(const char *dom)
1129 {
1130 ksiddomain_t *kd;
1131
1132 kd = umem_zalloc(sizeof (ksiddomain_t), UMEM_NOFAIL);
1133 kd->kd_name = spa_strdup(dom);
1134 return (kd);
1135 }
1136
1137 void
1138 ksiddomain_rele(ksiddomain_t *ksid)
1139 {
1140 spa_strfree(ksid->kd_name);
1141 umem_free(ksid, sizeof (ksiddomain_t));
1142 }
1143
1144 char *
1145 kmem_vasprintf(const char *fmt, va_list adx)
1146 {
1147 char *buf = NULL;
1148 va_list adx_copy;
1149
1150 va_copy(adx_copy, adx);
1151 VERIFY(vasprintf(&buf, fmt, adx_copy) != -1);
1152 va_end(adx_copy);
1153
1154 return (buf);
1155 }
1156
1157 char *
1158 kmem_asprintf(const char *fmt, ...)
1159 {
1160 char *buf = NULL;
1161 va_list adx;
1162
1163 va_start(adx, fmt);
1164 VERIFY(vasprintf(&buf, fmt, adx) != -1);
1165 va_end(adx);
1166
1167 return (buf);
1168 }
1169
1170 /* ARGSUSED */
1171 int
1172 zfs_onexit_fd_hold(int fd, minor_t *minorp)
1173 {
1174 *minorp = 0;
1175 return (0);
1176 }
1177
1178 /* ARGSUSED */
1179 void
1180 zfs_onexit_fd_rele(int fd)
1181 {
1182 }
1183
1184 /* ARGSUSED */
1185 int
1186 zfs_onexit_add_cb(minor_t minor, void (*func)(void *), void *data,
1187 uint64_t *action_handle)
1188 {
1189 return (0);
1190 }
1191
1192 /* ARGSUSED */
1193 int
1194 zfs_onexit_del_cb(minor_t minor, uint64_t action_handle, boolean_t fire)
1195 {
1196 return (0);
1197 }
1198
1199 /* ARGSUSED */
1200 int
1201 zfs_onexit_cb_data(minor_t minor, uint64_t action_handle, void **data)
1202 {
1203 return (0);
1204 }