]> git.proxmox.com Git - mirror_zfs.git/blame - lib/libzpool/kernel.c
Add linux unused code tracking
[mirror_zfs.git] / lib / libzpool / kernel.c
CommitLineData
34dc7c2f
BB
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
572e2857 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
34dc7c2f
BB
23 */
24
34dc7c2f
BB
25#include <assert.h>
26#include <fcntl.h>
27#include <poll.h>
28#include <stdio.h>
29#include <stdlib.h>
30#include <string.h>
31#include <zlib.h>
1e33ac1e 32#include <sys/signal.h>
34dc7c2f
BB
33#include <sys/spa.h>
34#include <sys/stat.h>
35#include <sys/processor.h>
36#include <sys/zfs_context.h>
34dc7c2f 37#include <sys/utsname.h>
d164b209 38#include <sys/systeminfo.h>
34dc7c2f
BB
39
40/*
41 * Emulation of kernel services in userland.
42 */
43
428870ff 44int aok;
34dc7c2f
BB
45uint64_t physmem;
46vnode_t *rootdir = (vnode_t *)0xabcd1234;
d164b209 47char hw_serial[HW_HOSTID_LEN];
34dc7c2f
BB
48
49struct utsname utsname = {
50 "userland", "libzpool", "1", "1", "na"
51};
52
428870ff
BB
53/* this only exists to have its address taken */
54struct proc p0;
55
34dc7c2f
BB
56/*
57 * =========================================================================
58 * threads
59 * =========================================================================
60 */
1e33ac1e
BB
61
62pthread_cond_t kthread_cond = PTHREAD_COND_INITIALIZER;
63pthread_mutex_t kthread_lock = PTHREAD_MUTEX_INITIALIZER;
64pthread_key_t kthread_key;
65int kthread_nr = 0;
66
67static void
68thread_init(void)
69{
70 kthread_t *kt;
71
72 VERIFY3S(pthread_key_create(&kthread_key, NULL), ==, 0);
73
74 /* Create entry for primary kthread */
75 kt = umem_zalloc(sizeof(kthread_t), UMEM_NOFAIL);
76 kt->t_tid = pthread_self();
77 kt->t_func = NULL;
78
79 VERIFY3S(pthread_setspecific(kthread_key, kt), ==, 0);
80
81 /* Only the main thread should be running at the moment */
82 ASSERT3S(kthread_nr, ==, 0);
83 kthread_nr = 1;
84}
85
86static void
87thread_fini(void)
88{
89 kthread_t *kt = curthread;
90
91 ASSERT(pthread_equal(kt->t_tid, pthread_self()));
92 ASSERT3P(kt->t_func, ==, NULL);
93
94 umem_free(kt, sizeof(kthread_t));
95
96 /* Wait for all threads to exit via thread_exit() */
97 VERIFY3S(pthread_mutex_lock(&kthread_lock), ==, 0);
98
99 kthread_nr--; /* Main thread is exiting */
100
101 while (kthread_nr > 0)
102 VERIFY3S(pthread_cond_wait(&kthread_cond, &kthread_lock), ==,
103 0);
104
105 ASSERT3S(kthread_nr, ==, 0);
106 VERIFY3S(pthread_mutex_unlock(&kthread_lock), ==, 0);
107
108 VERIFY3S(pthread_key_delete(kthread_key), ==, 0);
109}
110
34dc7c2f 111kthread_t *
1e33ac1e
BB
112zk_thread_current(void)
113{
114 kthread_t *kt = pthread_getspecific(kthread_key);
115
116 ASSERT3P(kt, !=, NULL);
117
118 return kt;
119}
120
121void *
122zk_thread_helper(void *arg)
34dc7c2f 123{
1e33ac1e
BB
124 kthread_t *kt = (kthread_t *) arg;
125
126 VERIFY3S(pthread_setspecific(kthread_key, kt), ==, 0);
34dc7c2f 127
1e33ac1e
BB
128 VERIFY3S(pthread_mutex_lock(&kthread_lock), ==, 0);
129 kthread_nr++;
130 VERIFY3S(pthread_mutex_unlock(&kthread_lock), ==, 0);
34dc7c2f 131
1e33ac1e
BB
132 kt->t_tid = pthread_self();
133 ((thread_func_arg_t) kt->t_func)(kt->t_arg);
134
135 /* Unreachable, thread must exit with thread_exit() */
136 abort();
137
138 return NULL;
139}
140
141kthread_t *
142zk_thread_create(caddr_t stk, size_t stksize, thread_func_t func, void *arg,
143 size_t len, proc_t *pp, int state, pri_t pri)
144{
145 kthread_t *kt;
146 pthread_attr_t attr;
147 size_t stack;
148
149 ASSERT3S(state & ~TS_RUN, ==, 0);
150
151 kt = umem_zalloc(sizeof(kthread_t), UMEM_NOFAIL);
152 kt->t_func = func;
153 kt->t_arg = arg;
154
155 /*
156 * The Solaris kernel stack size is 24k for x86/x86_64.
157 * The Linux kernel stack size is 8k for x86/x86_64.
158 *
159 * We reduce the default stack size in userspace, to ensure
160 * we observe stack overruns in user space as well as in
161 * kernel space. PTHREAD_STACK_MIN is the minimum stack
162 * required for a NULL procedure in user space and is added
163 * in to the stack requirements.
164 *
165 * Some buggy NPTL threading implementations include the
166 * guard area within the stack size allocations. In
167 * this case we allocate an extra page to account for the
168 * guard area since we only have two pages of usable stack
169 * on Linux.
170 */
171
172 stack = PTHREAD_STACK_MIN + MAX(stksize, STACK_SIZE) +
173 EXTRA_GUARD_BYTES;
174
175 VERIFY3S(pthread_attr_init(&attr), ==, 0);
176 VERIFY3S(pthread_attr_setstacksize(&attr, stack), ==, 0);
177 VERIFY3S(pthread_attr_setguardsize(&attr, PAGESIZE), ==, 0);
178
179 VERIFY3S(pthread_create(&kt->t_tid, &attr, &zk_thread_helper, kt),
180 ==, 0);
181
182 VERIFY3S(pthread_attr_destroy(&attr), ==, 0);
183
184 return kt;
185}
186
187void
188zk_thread_exit(void)
189{
190 kthread_t *kt = curthread;
191
192 ASSERT(pthread_equal(kt->t_tid, pthread_self()));
193
194 umem_free(kt, sizeof(kthread_t));
195
196 pthread_mutex_lock(&kthread_lock);
197 kthread_nr--;
198 pthread_mutex_unlock(&kthread_lock);
199
200 pthread_cond_broadcast(&kthread_cond);
201 pthread_exit((void *)TS_MAGIC);
202}
203
204void
205zk_thread_join(kt_did_t tid)
206{
207 void *ret;
208
209 pthread_join((pthread_t)tid, &ret);
210 VERIFY3P(ret, ==, (void *)TS_MAGIC);
34dc7c2f
BB
211}
212
213/*
214 * =========================================================================
215 * kstats
216 * =========================================================================
217 */
218/*ARGSUSED*/
219kstat_t *
220kstat_create(char *module, int instance, char *name, char *class,
221 uchar_t type, ulong_t ndata, uchar_t ks_flag)
222{
223 return (NULL);
224}
225
226/*ARGSUSED*/
227void
228kstat_install(kstat_t *ksp)
229{}
230
231/*ARGSUSED*/
232void
233kstat_delete(kstat_t *ksp)
234{}
235
236/*
237 * =========================================================================
238 * mutexes
239 * =========================================================================
240 */
1e33ac1e 241
34dc7c2f 242void
1e33ac1e 243mutex_init(kmutex_t *mp, char *name, int type, void *cookie)
34dc7c2f 244{
1e33ac1e
BB
245 ASSERT3S(type, ==, MUTEX_DEFAULT);
246 ASSERT3P(cookie, ==, NULL);
247 mp->m_owner = MTX_INIT;
248 mp->m_magic = MTX_MAGIC;
249 VERIFY3S(pthread_mutex_init(&mp->m_lock, NULL), ==, 0);
34dc7c2f
BB
250}
251
252void
1e33ac1e 253mutex_destroy(kmutex_t *mp)
34dc7c2f 254{
1e33ac1e
BB
255 ASSERT3U(mp->m_magic, ==, MTX_MAGIC);
256 ASSERT3P(mp->m_owner, ==, MTX_INIT);
257 VERIFY3S(pthread_mutex_destroy(&(mp)->m_lock), ==, 0);
258 mp->m_owner = MTX_DEST;
259 mp->m_magic = 0;
34dc7c2f
BB
260}
261
262void
263mutex_enter(kmutex_t *mp)
264{
1e33ac1e
BB
265 ASSERT3U(mp->m_magic, ==, MTX_MAGIC);
266 ASSERT3P(mp->m_owner, !=, MTX_DEST);
267 ASSERT3P(mp->m_owner, !=, curthread);
268 VERIFY3S(pthread_mutex_lock(&mp->m_lock), ==, 0);
269 ASSERT3P(mp->m_owner, ==, MTX_INIT);
34dc7c2f
BB
270 mp->m_owner = curthread;
271}
272
273int
274mutex_tryenter(kmutex_t *mp)
275{
1e33ac1e
BB
276 ASSERT3U(mp->m_magic, ==, MTX_MAGIC);
277 ASSERT3P(mp->m_owner, !=, MTX_DEST);
278 if (0 == pthread_mutex_trylock(&mp->m_lock)) {
279 ASSERT3P(mp->m_owner, ==, MTX_INIT);
34dc7c2f
BB
280 mp->m_owner = curthread;
281 return (1);
282 } else {
283 return (0);
284 }
285}
286
287void
288mutex_exit(kmutex_t *mp)
289{
1e33ac1e
BB
290 ASSERT3U(mp->m_magic, ==, MTX_MAGIC);
291 ASSERT3P(mutex_owner(mp), ==, curthread);
292 mp->m_owner = MTX_INIT;
293 VERIFY3S(pthread_mutex_unlock(&mp->m_lock), ==, 0);
34dc7c2f
BB
294}
295
296void *
297mutex_owner(kmutex_t *mp)
298{
1e33ac1e 299 ASSERT3U(mp->m_magic, ==, MTX_MAGIC);
34dc7c2f
BB
300 return (mp->m_owner);
301}
302
1e33ac1e
BB
303int
304mutex_held(kmutex_t *mp)
305{
306 return (mp->m_owner == curthread);
307}
308
34dc7c2f
BB
309/*
310 * =========================================================================
311 * rwlocks
312 * =========================================================================
313 */
1e33ac1e 314
34dc7c2f
BB
315void
316rw_init(krwlock_t *rwlp, char *name, int type, void *arg)
317{
1e33ac1e
BB
318 ASSERT3S(type, ==, RW_DEFAULT);
319 ASSERT3P(arg, ==, NULL);
320 VERIFY3S(pthread_rwlock_init(&rwlp->rw_lock, NULL), ==, 0);
321 rwlp->rw_owner = RW_INIT;
322 rwlp->rw_wr_owner = RW_INIT;
323 rwlp->rw_readers = 0;
324 rwlp->rw_magic = RW_MAGIC;
34dc7c2f
BB
325}
326
327void
328rw_destroy(krwlock_t *rwlp)
329{
1e33ac1e
BB
330 ASSERT3U(rwlp->rw_magic, ==, RW_MAGIC);
331
332 VERIFY3S(pthread_rwlock_destroy(&rwlp->rw_lock), ==, 0);
333 rwlp->rw_magic = 0;
34dc7c2f
BB
334}
335
336void
337rw_enter(krwlock_t *rwlp, krw_t rw)
338{
1e33ac1e
BB
339 ASSERT3U(rwlp->rw_magic, ==, RW_MAGIC);
340 ASSERT3P(rwlp->rw_owner, !=, curthread);
341 ASSERT3P(rwlp->rw_wr_owner, !=, curthread);
34dc7c2f 342
1e33ac1e
BB
343 if (rw == RW_READER) {
344 VERIFY3S(pthread_rwlock_rdlock(&rwlp->rw_lock), ==, 0);
345 ASSERT3P(rwlp->rw_wr_owner, ==, RW_INIT);
346
347 atomic_inc_uint(&rwlp->rw_readers);
348 } else {
349 VERIFY3S(pthread_rwlock_wrlock(&rwlp->rw_lock), ==, 0);
350 ASSERT3P(rwlp->rw_wr_owner, ==, RW_INIT);
351 ASSERT3U(rwlp->rw_readers, ==, 0);
352
353 rwlp->rw_wr_owner = curthread;
354 }
34dc7c2f
BB
355
356 rwlp->rw_owner = curthread;
357}
358
359void
360rw_exit(krwlock_t *rwlp)
361{
1e33ac1e
BB
362 ASSERT3U(rwlp->rw_magic, ==, RW_MAGIC);
363 ASSERT(RW_LOCK_HELD(rwlp));
364
365 if (RW_READ_HELD(rwlp))
366 atomic_dec_uint(&rwlp->rw_readers);
367 else
368 rwlp->rw_wr_owner = RW_INIT;
34dc7c2f 369
1e33ac1e
BB
370 rwlp->rw_owner = RW_INIT;
371 VERIFY3S(pthread_rwlock_unlock(&rwlp->rw_lock), ==, 0);
34dc7c2f
BB
372}
373
374int
375rw_tryenter(krwlock_t *rwlp, krw_t rw)
376{
377 int rv;
378
1e33ac1e 379 ASSERT3U(rwlp->rw_magic, ==, RW_MAGIC);
34dc7c2f
BB
380
381 if (rw == RW_READER)
1e33ac1e 382 rv = pthread_rwlock_tryrdlock(&rwlp->rw_lock);
34dc7c2f 383 else
1e33ac1e 384 rv = pthread_rwlock_trywrlock(&rwlp->rw_lock);
34dc7c2f
BB
385
386 if (rv == 0) {
1e33ac1e
BB
387 ASSERT3P(rwlp->rw_wr_owner, ==, RW_INIT);
388
389 if (rw == RW_READER)
390 atomic_inc_uint(&rwlp->rw_readers);
391 else {
392 ASSERT3U(rwlp->rw_readers, ==, 0);
393 rwlp->rw_wr_owner = curthread;
394 }
395
34dc7c2f
BB
396 rwlp->rw_owner = curthread;
397 return (1);
398 }
399
1e33ac1e
BB
400 VERIFY3S(rv, ==, EBUSY);
401
34dc7c2f
BB
402 return (0);
403}
404
34dc7c2f
BB
405int
406rw_tryupgrade(krwlock_t *rwlp)
407{
1e33ac1e 408 ASSERT3U(rwlp->rw_magic, ==, RW_MAGIC);
34dc7c2f
BB
409
410 return (0);
411}
412
413/*
414 * =========================================================================
415 * condition variables
416 * =========================================================================
417 */
1e33ac1e 418
34dc7c2f
BB
419void
420cv_init(kcondvar_t *cv, char *name, int type, void *arg)
421{
1e33ac1e
BB
422 ASSERT3S(type, ==, CV_DEFAULT);
423 cv->cv_magic = CV_MAGIC;
424 VERIFY3S(pthread_cond_init(&cv->cv, NULL), ==, 0);
34dc7c2f
BB
425}
426
427void
428cv_destroy(kcondvar_t *cv)
429{
1e33ac1e
BB
430 ASSERT3U(cv->cv_magic, ==, CV_MAGIC);
431 VERIFY3S(pthread_cond_destroy(&cv->cv), ==, 0);
432 cv->cv_magic = 0;
34dc7c2f
BB
433}
434
435void
436cv_wait(kcondvar_t *cv, kmutex_t *mp)
437{
1e33ac1e
BB
438 ASSERT3U(cv->cv_magic, ==, CV_MAGIC);
439 ASSERT3P(mutex_owner(mp), ==, curthread);
440 mp->m_owner = MTX_INIT;
441 int ret = pthread_cond_wait(&cv->cv, &mp->m_lock);
442 if (ret != 0)
443 VERIFY3S(ret, ==, EINTR);
34dc7c2f
BB
444 mp->m_owner = curthread;
445}
446
447clock_t
448cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime)
449{
450 int error;
1e33ac1e 451 struct timeval tv;
34dc7c2f
BB
452 timestruc_t ts;
453 clock_t delta;
454
1e33ac1e
BB
455 ASSERT3U(cv->cv_magic, ==, CV_MAGIC);
456
34dc7c2f 457top:
428870ff 458 delta = abstime - ddi_get_lbolt();
34dc7c2f
BB
459 if (delta <= 0)
460 return (-1);
461
1e33ac1e
BB
462 VERIFY(gettimeofday(&tv, NULL) == 0);
463
464 ts.tv_sec = tv.tv_sec + delta / hz;
465 ts.tv_nsec = tv.tv_usec * 1000 + (delta % hz) * (NANOSEC / hz);
466 if (ts.tv_nsec >= NANOSEC) {
467 ts.tv_sec++;
468 ts.tv_nsec -= NANOSEC;
469 }
34dc7c2f 470
1e33ac1e
BB
471 ASSERT3P(mutex_owner(mp), ==, curthread);
472 mp->m_owner = MTX_INIT;
473 error = pthread_cond_timedwait(&cv->cv, &mp->m_lock, &ts);
34dc7c2f
BB
474 mp->m_owner = curthread;
475
1e33ac1e 476 if (error == ETIMEDOUT)
34dc7c2f
BB
477 return (-1);
478
479 if (error == EINTR)
480 goto top;
481
1e33ac1e 482 VERIFY3S(error, ==, 0);
34dc7c2f
BB
483
484 return (1);
485}
486
487void
488cv_signal(kcondvar_t *cv)
489{
1e33ac1e
BB
490 ASSERT3U(cv->cv_magic, ==, CV_MAGIC);
491 VERIFY3S(pthread_cond_signal(&cv->cv), ==, 0);
34dc7c2f
BB
492}
493
494void
495cv_broadcast(kcondvar_t *cv)
496{
1e33ac1e
BB
497 ASSERT3U(cv->cv_magic, ==, CV_MAGIC);
498 VERIFY3S(pthread_cond_broadcast(&cv->cv), ==, 0);
34dc7c2f
BB
499}
500
501/*
502 * =========================================================================
503 * vnode operations
504 * =========================================================================
505 */
506/*
507 * Note: for the xxxat() versions of these functions, we assume that the
508 * starting vp is always rootdir (which is true for spa_directory.c, the only
509 * ZFS consumer of these interfaces). We assert this is true, and then emulate
510 * them by adding '/' in front of the path.
511 */
512
513/*ARGSUSED*/
514int
515vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3)
516{
517 int fd;
518 vnode_t *vp;
519 int old_umask;
5ae4e2c2 520 char *realpath;
34dc7c2f 521 struct stat64 st;
4d58b69d 522 int err;
34dc7c2f 523
5ae4e2c2
BB
524 realpath = umem_alloc(MAXPATHLEN, UMEM_NOFAIL);
525
34dc7c2f
BB
526 /*
527 * If we're accessing a real disk from userland, we need to use
528 * the character interface to avoid caching. This is particularly
529 * important if we're trying to look at a real in-kernel storage
530 * pool from userland, e.g. via zdb, because otherwise we won't
531 * see the changes occurring under the segmap cache.
532 * On the other hand, the stupid character device returns zero
533 * for its size. So -- gag -- we open the block device to get
534 * its size, and remember it for subsequent VOP_GETATTR().
535 */
536 if (strncmp(path, "/dev/", 5) == 0) {
537 char *dsk;
538 fd = open64(path, O_RDONLY);
5ae4e2c2
BB
539 if (fd == -1) {
540 err = errno;
541 free(realpath);
542 return (err);
543 }
34dc7c2f 544 if (fstat64(fd, &st) == -1) {
5ae4e2c2 545 err = errno;
34dc7c2f 546 close(fd);
5ae4e2c2
BB
547 free(realpath);
548 return (err);
34dc7c2f
BB
549 }
550 close(fd);
551 (void) sprintf(realpath, "%s", path);
552 dsk = strstr(path, "/dsk/");
553 if (dsk != NULL)
554 (void) sprintf(realpath + (dsk - path) + 1, "r%s",
555 dsk + 1);
556 } else {
557 (void) sprintf(realpath, "%s", path);
5ae4e2c2
BB
558 if (!(flags & FCREAT) && stat64(realpath, &st) == -1) {
559 err = errno;
560 free(realpath);
561 return (err);
562 }
34dc7c2f
BB
563 }
564
565 if (flags & FCREAT)
566 old_umask = umask(0);
567
568 /*
569 * The construct 'flags - FREAD' conveniently maps combinations of
570 * FREAD and FWRITE to the corresponding O_RDONLY, O_WRONLY, and O_RDWR.
571 */
572 fd = open64(realpath, flags - FREAD, mode);
5ae4e2c2 573 free(realpath);
34dc7c2f
BB
574
575 if (flags & FCREAT)
576 (void) umask(old_umask);
577
578 if (fd == -1)
579 return (errno);
580
581 if (fstat64(fd, &st) == -1) {
4d58b69d 582 err = errno;
34dc7c2f 583 close(fd);
4d58b69d 584 return (err);
34dc7c2f
BB
585 }
586
587 (void) fcntl(fd, F_SETFD, FD_CLOEXEC);
588
589 *vpp = vp = umem_zalloc(sizeof (vnode_t), UMEM_NOFAIL);
590
591 vp->v_fd = fd;
592 vp->v_size = st.st_size;
593 vp->v_path = spa_strdup(path);
594
595 return (0);
596}
597
598/*ARGSUSED*/
599int
600vn_openat(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2,
601 int x3, vnode_t *startvp, int fd)
602{
603 char *realpath = umem_alloc(strlen(path) + 2, UMEM_NOFAIL);
604 int ret;
605
606 ASSERT(startvp == rootdir);
607 (void) sprintf(realpath, "/%s", path);
608
609 /* fd ignored for now, need if want to simulate nbmand support */
610 ret = vn_open(realpath, x1, flags, mode, vpp, x2, x3);
611
612 umem_free(realpath, strlen(path) + 2);
613
614 return (ret);
615}
616
617/*ARGSUSED*/
618int
619vn_rdwr(int uio, vnode_t *vp, void *addr, ssize_t len, offset_t offset,
620 int x1, int x2, rlim64_t x3, void *x4, ssize_t *residp)
621{
4d58b69d 622 ssize_t rc, done = 0, split;
34dc7c2f
BB
623
624 if (uio == UIO_READ) {
4d58b69d 625 rc = pread64(vp->v_fd, addr, len, offset);
34dc7c2f
BB
626 } else {
627 /*
628 * To simulate partial disk writes, we split writes into two
629 * system calls so that the process can be killed in between.
630 */
631 split = (len > 0 ? rand() % len : 0);
4d58b69d
RC
632 rc = pwrite64(vp->v_fd, addr, split, offset);
633 if (rc != -1) {
634 done = rc;
635 rc = pwrite64(vp->v_fd, (char *)addr + split,
636 len - split, offset + split);
637 }
34dc7c2f
BB
638 }
639
4d58b69d 640 if (rc == -1)
34dc7c2f 641 return (errno);
4d58b69d
RC
642
643 done += rc;
644
34dc7c2f 645 if (residp)
4d58b69d
RC
646 *residp = len - done;
647 else if (done != len)
34dc7c2f
BB
648 return (EIO);
649 return (0);
650}
651
652void
653vn_close(vnode_t *vp)
654{
655 close(vp->v_fd);
656 spa_strfree(vp->v_path);
657 umem_free(vp, sizeof (vnode_t));
658}
659
428870ff
BB
660/*
661 * At a minimum we need to update the size since vdev_reopen()
662 * will no longer call vn_openat().
663 */
664int
665fop_getattr(vnode_t *vp, vattr_t *vap)
666{
667 struct stat64 st;
668
669 if (fstat64(vp->v_fd, &st) == -1) {
670 close(vp->v_fd);
671 return (errno);
672 }
673
674 vap->va_size = st.st_size;
675 return (0);
676}
677
34dc7c2f
BB
678#ifdef ZFS_DEBUG
679
680/*
681 * =========================================================================
682 * Figure out which debugging statements to print
683 * =========================================================================
684 */
685
686static char *dprintf_string;
687static int dprintf_print_all;
688
689int
690dprintf_find_string(const char *string)
691{
692 char *tmp_str = dprintf_string;
693 int len = strlen(string);
694
695 /*
696 * Find out if this is a string we want to print.
697 * String format: file1.c,function_name1,file2.c,file3.c
698 */
699
700 while (tmp_str != NULL) {
701 if (strncmp(tmp_str, string, len) == 0 &&
702 (tmp_str[len] == ',' || tmp_str[len] == '\0'))
703 return (1);
704 tmp_str = strchr(tmp_str, ',');
705 if (tmp_str != NULL)
706 tmp_str++; /* Get rid of , */
707 }
708 return (0);
709}
710
711void
712dprintf_setup(int *argc, char **argv)
713{
714 int i, j;
715
716 /*
717 * Debugging can be specified two ways: by setting the
718 * environment variable ZFS_DEBUG, or by including a
719 * "debug=..." argument on the command line. The command
720 * line setting overrides the environment variable.
721 */
722
723 for (i = 1; i < *argc; i++) {
724 int len = strlen("debug=");
725 /* First look for a command line argument */
726 if (strncmp("debug=", argv[i], len) == 0) {
727 dprintf_string = argv[i] + len;
728 /* Remove from args */
729 for (j = i; j < *argc; j++)
730 argv[j] = argv[j+1];
731 argv[j] = NULL;
732 (*argc)--;
733 }
734 }
735
736 if (dprintf_string == NULL) {
737 /* Look for ZFS_DEBUG environment variable */
738 dprintf_string = getenv("ZFS_DEBUG");
739 }
740
741 /*
742 * Are we just turning on all debugging?
743 */
744 if (dprintf_find_string("on"))
745 dprintf_print_all = 1;
746}
747
748/*
749 * =========================================================================
750 * debug printfs
751 * =========================================================================
752 */
753void
754__dprintf(const char *file, const char *func, int line, const char *fmt, ...)
755{
756 const char *newfile;
757 va_list adx;
758
759 /*
760 * Get rid of annoying "../common/" prefix to filename.
761 */
762 newfile = strrchr(file, '/');
763 if (newfile != NULL) {
764 newfile = newfile + 1; /* Get rid of leading / */
765 } else {
766 newfile = file;
767 }
768
769 if (dprintf_print_all ||
770 dprintf_find_string(newfile) ||
771 dprintf_find_string(func)) {
772 /* Print out just the function name if requested */
773 flockfile(stdout);
774 if (dprintf_find_string("pid"))
775 (void) printf("%d ", getpid());
776 if (dprintf_find_string("tid"))
1e33ac1e 777 (void) printf("%u ", (uint_t) pthread_self());
34dc7c2f
BB
778 if (dprintf_find_string("cpu"))
779 (void) printf("%u ", getcpuid());
780 if (dprintf_find_string("time"))
781 (void) printf("%llu ", gethrtime());
782 if (dprintf_find_string("long"))
783 (void) printf("%s, line %d: ", newfile, line);
784 (void) printf("%s: ", func);
785 va_start(adx, fmt);
786 (void) vprintf(fmt, adx);
787 va_end(adx);
788 funlockfile(stdout);
789 }
790}
791
792#endif /* ZFS_DEBUG */
793
794/*
795 * =========================================================================
796 * cmn_err() and panic()
797 * =========================================================================
798 */
799static char ce_prefix[CE_IGNORE][10] = { "", "NOTICE: ", "WARNING: ", "" };
800static char ce_suffix[CE_IGNORE][2] = { "", "\n", "\n", "" };
801
802void
803vpanic(const char *fmt, va_list adx)
804{
805 (void) fprintf(stderr, "error: ");
806 (void) vfprintf(stderr, fmt, adx);
807 (void) fprintf(stderr, "\n");
808
809 abort(); /* think of it as a "user-level crash dump" */
810}
811
812void
813panic(const char *fmt, ...)
814{
815 va_list adx;
816
817 va_start(adx, fmt);
818 vpanic(fmt, adx);
819 va_end(adx);
820}
821
822void
823vcmn_err(int ce, const char *fmt, va_list adx)
824{
825 if (ce == CE_PANIC)
826 vpanic(fmt, adx);
827 if (ce != CE_NOTE) { /* suppress noise in userland stress testing */
828 (void) fprintf(stderr, "%s", ce_prefix[ce]);
829 (void) vfprintf(stderr, fmt, adx);
830 (void) fprintf(stderr, "%s", ce_suffix[ce]);
831 }
832}
833
834/*PRINTFLIKE2*/
835void
836cmn_err(int ce, const char *fmt, ...)
837{
838 va_list adx;
839
840 va_start(adx, fmt);
841 vcmn_err(ce, fmt, adx);
842 va_end(adx);
843}
844
845/*
846 * =========================================================================
847 * kobj interfaces
848 * =========================================================================
849 */
850struct _buf *
851kobj_open_file(char *name)
852{
853 struct _buf *file;
854 vnode_t *vp;
855
856 /* set vp as the _fd field of the file */
857 if (vn_openat(name, UIO_SYSSPACE, FREAD, 0, &vp, 0, 0, rootdir,
858 -1) != 0)
859 return ((void *)-1UL);
860
861 file = umem_zalloc(sizeof (struct _buf), UMEM_NOFAIL);
862 file->_fd = (intptr_t)vp;
863 return (file);
864}
865
866int
867kobj_read_file(struct _buf *file, char *buf, unsigned size, unsigned off)
868{
869 ssize_t resid;
870
871 vn_rdwr(UIO_READ, (vnode_t *)file->_fd, buf, size, (offset_t)off,
872 UIO_SYSSPACE, 0, 0, 0, &resid);
873
874 return (size - resid);
875}
876
877void
878kobj_close_file(struct _buf *file)
879{
880 vn_close((vnode_t *)file->_fd);
881 umem_free(file, sizeof (struct _buf));
882}
883
884int
885kobj_get_filesize(struct _buf *file, uint64_t *size)
886{
887 struct stat64 st;
888 vnode_t *vp = (vnode_t *)file->_fd;
889
890 if (fstat64(vp->v_fd, &st) == -1) {
891 vn_close(vp);
892 return (errno);
893 }
894 *size = st.st_size;
895 return (0);
896}
897
898/*
899 * =========================================================================
900 * misc routines
901 * =========================================================================
902 */
903
904void
905delay(clock_t ticks)
906{
907 poll(0, 0, ticks * (1000 / hz));
908}
909
910/*
911 * Find highest one bit set.
912 * Returns bit number + 1 of highest bit that is set, otherwise returns 0.
913 * High order bit is 31 (or 63 in _LP64 kernel).
914 */
915int
916highbit(ulong_t i)
917{
918 register int h = 1;
919
920 if (i == 0)
921 return (0);
922#ifdef _LP64
923 if (i & 0xffffffff00000000ul) {
924 h += 32; i >>= 32;
925 }
926#endif
927 if (i & 0xffff0000) {
928 h += 16; i >>= 16;
929 }
930 if (i & 0xff00) {
931 h += 8; i >>= 8;
932 }
933 if (i & 0xf0) {
934 h += 4; i >>= 4;
935 }
936 if (i & 0xc) {
937 h += 2; i >>= 2;
938 }
939 if (i & 0x2) {
940 h += 1;
941 }
942 return (h);
943}
944
945static int random_fd = -1, urandom_fd = -1;
946
947static int
948random_get_bytes_common(uint8_t *ptr, size_t len, int fd)
949{
950 size_t resid = len;
951 ssize_t bytes;
952
953 ASSERT(fd != -1);
954
955 while (resid != 0) {
956 bytes = read(fd, ptr, resid);
957 ASSERT3S(bytes, >=, 0);
958 ptr += bytes;
959 resid -= bytes;
960 }
961
962 return (0);
963}
964
965int
966random_get_bytes(uint8_t *ptr, size_t len)
967{
968 return (random_get_bytes_common(ptr, len, random_fd));
969}
970
971int
972random_get_pseudo_bytes(uint8_t *ptr, size_t len)
973{
974 return (random_get_bytes_common(ptr, len, urandom_fd));
975}
976
977int
978ddi_strtoul(const char *hw_serial, char **nptr, int base, unsigned long *result)
979{
980 char *end;
981
982 *result = strtoul(hw_serial, &end, base);
983 if (*result == 0)
984 return (errno);
985 return (0);
986}
987
428870ff
BB
988int
989ddi_strtoull(const char *str, char **nptr, int base, u_longlong_t *result)
990{
991 char *end;
992
993 *result = strtoull(str, &end, base);
994 if (*result == 0)
995 return (errno);
996 return (0);
997}
998
34dc7c2f
BB
999/*
1000 * =========================================================================
1001 * kernel emulation setup & teardown
1002 * =========================================================================
1003 */
1004static int
1005umem_out_of_memory(void)
1006{
1007 char errmsg[] = "out of memory -- generating core dump\n";
1008
0e5b68e0 1009 (void) fprintf(stderr, "%s", errmsg);
34dc7c2f
BB
1010 abort();
1011 return (0);
1012}
1013
1014void
1015kernel_init(int mode)
1016{
1017 umem_nofail_callback(umem_out_of_memory);
1018
1019 physmem = sysconf(_SC_PHYS_PAGES);
1020
1021 dprintf("physmem = %llu pages (%.2f GB)\n", physmem,
1022 (double)physmem * sysconf(_SC_PAGE_SIZE) / (1ULL << 30));
1023
428870ff
BB
1024 (void) snprintf(hw_serial, sizeof (hw_serial), "%ld",
1025 (mode & FWRITE) ? gethostid() : 0);
34dc7c2f
BB
1026
1027 VERIFY((random_fd = open("/dev/random", O_RDONLY)) != -1);
1028 VERIFY((urandom_fd = open("/dev/urandom", O_RDONLY)) != -1);
1029
1e33ac1e 1030 thread_init();
b128c09f
BB
1031 system_taskq_init();
1032
34dc7c2f
BB
1033 spa_init(mode);
1034}
1035
1036void
1037kernel_fini(void)
1038{
1039 spa_fini();
1040
428870ff 1041 system_taskq_fini();
1e33ac1e 1042 thread_fini();
428870ff 1043
34dc7c2f
BB
1044 close(random_fd);
1045 close(urandom_fd);
1046
1047 random_fd = -1;
1048 urandom_fd = -1;
1049}
1050
34dc7c2f
BB
1051uid_t
1052crgetuid(cred_t *cr)
1053{
1054 return (0);
1055}
1056
1057gid_t
1058crgetgid(cred_t *cr)
1059{
1060 return (0);
1061}
1062
1063int
1064crgetngroups(cred_t *cr)
1065{
1066 return (0);
1067}
1068
1069gid_t *
1070crgetgroups(cred_t *cr)
1071{
1072 return (NULL);
1073}
1074
1075int
1076zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
1077{
1078 return (0);
1079}
1080
1081int
1082zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
1083{
1084 return (0);
1085}
1086
1087int
1088zfs_secpolicy_destroy_perms(const char *name, cred_t *cr)
1089{
1090 return (0);
1091}
1092
1093ksiddomain_t *
1094ksid_lookupdomain(const char *dom)
1095{
1096 ksiddomain_t *kd;
1097
1098 kd = umem_zalloc(sizeof (ksiddomain_t), UMEM_NOFAIL);
1099 kd->kd_name = spa_strdup(dom);
1100 return (kd);
1101}
1102
1103void
1104ksiddomain_rele(ksiddomain_t *ksid)
1105{
1106 spa_strfree(ksid->kd_name);
1107 umem_free(ksid, sizeof (ksiddomain_t));
1108}
428870ff 1109
428870ff 1110char *
00b46022 1111kmem_vasprintf(const char *fmt, va_list adx)
428870ff 1112{
00b46022
BB
1113 char *buf = NULL;
1114 va_list adx_copy;
428870ff 1115
00b46022
BB
1116 va_copy(adx_copy, adx);
1117 VERIFY(vasprintf(&buf, fmt, adx_copy) != -1);
1118 va_end(adx_copy);
428870ff 1119
00b46022
BB
1120 return (buf);
1121}
1122
1123char *
1124kmem_asprintf(const char *fmt, ...)
1125{
1126 char *buf = NULL;
1127 va_list adx;
428870ff
BB
1128
1129 va_start(adx, fmt);
00b46022 1130 VERIFY(vasprintf(&buf, fmt, adx) != -1);
428870ff
BB
1131 va_end(adx);
1132
1133 return (buf);
1134}
572e2857
BB
1135
1136/* ARGSUSED */
1137int
1138zfs_onexit_fd_hold(int fd, minor_t *minorp)
1139{
1140 *minorp = 0;
1141 return (0);
1142}
1143
1144/* ARGSUSED */
1145void
1146zfs_onexit_fd_rele(int fd)
1147{
1148}
1149
1150/* ARGSUSED */
1151int
1152zfs_onexit_add_cb(minor_t minor, void (*func)(void *), void *data,
1153 uint64_t *action_handle)
1154{
1155 return (0);
1156}
1157
1158/* ARGSUSED */
1159int
1160zfs_onexit_del_cb(minor_t minor, uint64_t action_handle, boolean_t fire)
1161{
1162 return (0);
1163}
1164
1165/* ARGSUSED */
1166int
1167zfs_onexit_cb_data(minor_t minor, uint64_t action_handle, void **data)
1168{
1169 return (0);
1170}