4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
27 * Copyright (c) 2012, Joyent, Inc. All rights reserved.
28 * Copyright (c) 2013 by Delphix. All rights reserved.
31 #ifndef _SYS_ZFS_CONTEXT_H
32 #define _SYS_ZFS_CONTEXT_H
37 #include <sys/types.h>
38 #include <sys/t_lock.h>
39 #include <sys/atomic.h>
40 #include <sys/sysmacros.h>
41 #include <sys/bitmap.h>
42 #include <sys/cmn_err.h>
44 #include <sys/taskq.h>
46 #include <sys/param.h>
47 #include <sys/systm.h>
48 #include <sys/cpuvar.h>
52 #include <sys/debug.h>
53 #include <sys/random.h>
54 #include <sys/byteorder.h>
55 #include <sys/systm.h>
57 #include <sys/uio_impl.h>
58 #include <sys/dirent.h>
60 #include <vm/seg_kmem.h>
63 #include <sys/zfs_debug.h>
64 #include <sys/zfs_delay.h>
65 #include <sys/fm/fs/zfs.h>
66 #include <sys/sunddi.h>
67 #include <sys/ctype.h>
69 #include <linux/dcache_compat.h>
75 #define _SYS_CONDVAR_H
102 #include <sys/mman.h>
103 #include <sys/note.h>
104 #include <sys/types.h>
105 #include <sys/cred.h>
106 #include <sys/sysmacros.h>
107 #include <sys/bitmap.h>
108 #include <sys/resource.h>
109 #include <sys/byteorder.h>
110 #include <sys/list.h>
112 #include <sys/zfs_debug.h>
114 #include <sys/kstat.h>
115 #include <sys/u8_textprep.h>
116 #include <sys/fm/fs/zfs.h>
117 #include <sys/sunddi.h>
118 #include <sys/debug.h>
124 #define noinline __attribute__((noinline))
131 * Note that we are not using the debugging levels.
134 #define CE_CONT 0 /* continuation */
135 #define CE_NOTE 1 /* notice */
136 #define CE_WARN 2 /* warning */
137 #define CE_PANIC 3 /* panic */
138 #define CE_IGNORE 4 /* print nothing */
146 extern void dprintf_setup(int *argc
, char **argv
);
147 extern void __dprintf(const char *file
, const char *func
,
148 int line
, const char *fmt
, ...);
149 extern void cmn_err(int, const char *, ...);
150 extern void vcmn_err(int, const char *, __va_list
);
151 extern void panic(const char *, ...);
152 extern void vpanic(const char *, __va_list
);
154 #define fm_panic panic
158 * DTrace SDT probes have different signatures in userland than they do in
159 * kernel. If they're being used in kernel code, re-define them out of
160 * existence for their counterparts in libzpool.
165 #endif /* DTRACE_PROBE */
166 #define DTRACE_PROBE(a) \
171 #endif /* DTRACE_PROBE1 */
172 #define DTRACE_PROBE1(a, b, c) \
173 ZFS_PROBE1(#a, (unsigned long)c)
177 #endif /* DTRACE_PROBE2 */
178 #define DTRACE_PROBE2(a, b, c, d, e) \
179 ZFS_PROBE2(#a, (unsigned long)c, (unsigned long)e)
183 #endif /* DTRACE_PROBE3 */
184 #define DTRACE_PROBE3(a, b, c, d, e, f, g) \
185 ZFS_PROBE3(#a, (unsigned long)c, (unsigned long)e, (unsigned long)g)
189 #endif /* DTRACE_PROBE4 */
190 #define DTRACE_PROBE4(a, b, c, d, e, f, g, h, i) \
191 ZFS_PROBE4(#a, (unsigned long)c, (unsigned long)e, (unsigned long)g, \
195 * We use the comma operator so that this macro can be used without much
196 * additional code. For example, "return (EINVAL);" becomes
197 * "return (SET_ERROR(EINVAL));". Note that the argument will be evaluated
198 * twice, so it should not have side effects (e.g. something like:
199 * "return (SET_ERROR(log_error(EINVAL, info)));" would log the error twice).
201 #define SET_ERROR(err) (ZFS_SET_ERROR(err), err)
203 #define SET_ERROR(err) (err)
208 #define TS_MAGIC 0x72f158ab4261e538ull
209 #define TS_RUN 0x00000002
211 #define STACK_SIZE 8192 /* Linux x86 and amd64 */
213 #define STACK_SIZE 24576 /* Solaris */
216 /* in libzpool, p0 exists only to have its address taken */
217 typedef struct proc
{
218 uintptr_t this_is_never_used_dont_dereference_it
;
221 extern struct proc p0
;
222 #define curproc (&p0)
224 typedef void (*thread_func_t
)(void *);
225 typedef void (*thread_func_arg_t
)(void *);
226 typedef pthread_t kt_did_t
;
228 #define kpreempt(x) ((void)0)
230 typedef struct kthread
{
232 thread_func_t t_func
;
236 #define curthread zk_thread_current()
237 #define getcomm() "unknown"
238 #define thread_exit zk_thread_exit
239 #define thread_create(stk, stksize, func, arg, len, pp, state, pri) \
240 zk_thread_create(stk, stksize, (thread_func_t)func, arg, \
241 len, NULL, state, pri, PTHREAD_CREATE_DETACHED)
242 #define thread_join(t) zk_thread_join(t)
243 #define newproc(f, a, cid, pri, ctp, pid) (ENOSYS)
245 extern kthread_t
*zk_thread_current(void);
246 extern void zk_thread_exit(void);
247 extern kthread_t
*zk_thread_create(caddr_t stk
, size_t stksize
,
248 thread_func_t func
, void *arg
, size_t len
,
249 proc_t
*pp
, int state
, pri_t pri
, int detachstate
);
250 extern void zk_thread_join(kt_did_t tid
);
252 #define kpreempt_disable() ((void)0)
253 #define kpreempt_enable() ((void)0)
257 #define issig(why) (FALSE)
258 #define ISSIG(thr, why) (FALSE)
263 #define MTX_MAGIC 0x9522f51362a6e326ull
264 #define MTX_INIT ((void *)NULL)
265 #define MTX_DEST ((void *)-1UL)
267 typedef struct kmutex
{
270 pthread_mutex_t m_lock
;
273 #define MUTEX_DEFAULT 0
274 #define MUTEX_HELD(m) ((m)->m_owner == curthread)
275 #define MUTEX_NOT_HELD(m) (!MUTEX_HELD(m))
277 extern void mutex_init(kmutex_t
*mp
, char *name
, int type
, void *cookie
);
278 extern void mutex_destroy(kmutex_t
*mp
);
279 extern void mutex_enter(kmutex_t
*mp
);
280 extern void mutex_exit(kmutex_t
*mp
);
281 extern int mutex_tryenter(kmutex_t
*mp
);
282 extern void *mutex_owner(kmutex_t
*mp
);
283 extern int mutex_held(kmutex_t
*mp
);
288 #define RW_MAGIC 0x4d31fb123648e78aull
289 #define RW_INIT ((void *)NULL)
290 #define RW_DEST ((void *)-1UL)
292 typedef struct krwlock
{
296 pthread_rwlock_t rw_lock
;
304 #define RW_DEFAULT RW_READER
306 #define RW_READ_HELD(x) ((x)->rw_readers > 0)
307 #define RW_WRITE_HELD(x) ((x)->rw_wr_owner == curthread)
308 #define RW_LOCK_HELD(x) (RW_READ_HELD(x) || RW_WRITE_HELD(x))
311 #define RW_LOCK_HELD(x) (RW_READ_HELD(x) || RW_WRITE_HELD(x))
314 #define RW_LOCK_HELD(x) (RW_READ_HELD(x) || RW_WRITE_HELD(x))
316 extern void rw_init(krwlock_t
*rwlp
, char *name
, int type
, void *arg
);
317 extern void rw_destroy(krwlock_t
*rwlp
);
318 extern void rw_enter(krwlock_t
*rwlp
, krw_t rw
);
319 extern int rw_tryenter(krwlock_t
*rwlp
, krw_t rw
);
320 extern int rw_tryupgrade(krwlock_t
*rwlp
);
321 extern void rw_exit(krwlock_t
*rwlp
);
322 #define rw_downgrade(rwlp) do { } while (0)
324 extern uid_t
crgetuid(cred_t
*cr
);
325 extern uid_t
crgetruid(cred_t
*cr
);
326 extern gid_t
crgetgid(cred_t
*cr
);
327 extern int crgetngroups(cred_t
*cr
);
328 extern gid_t
*crgetgroups(cred_t
*cr
);
331 * Condition variables
333 #define CV_MAGIC 0xd31ea9a83b1b30c4ull
335 typedef struct kcondvar
{
342 extern void cv_init(kcondvar_t
*cv
, char *name
, int type
, void *arg
);
343 extern void cv_destroy(kcondvar_t
*cv
);
344 extern void cv_wait(kcondvar_t
*cv
, kmutex_t
*mp
);
345 extern clock_t cv_timedwait(kcondvar_t
*cv
, kmutex_t
*mp
, clock_t abstime
);
346 extern clock_t cv_timedwait_hires(kcondvar_t
*cvp
, kmutex_t
*mp
, hrtime_t tim
,
347 hrtime_t res
, int flag
);
348 extern void cv_signal(kcondvar_t
*cv
);
349 extern void cv_broadcast(kcondvar_t
*cv
);
350 #define cv_timedwait_interruptible(cv, mp, at) cv_timedwait(cv, mp, at)
351 #define cv_wait_interruptible(cv, mp) cv_wait(cv, mp)
352 #define cv_wait_io(cv, mp) cv_wait(cv, mp)
355 * Thread-specific data
357 #define tsd_get(k) pthread_getspecific(k)
358 #define tsd_set(k, v) pthread_setspecific(k, v)
359 #define tsd_create(kp, d) pthread_key_create(kp, d)
360 #define tsd_destroy(kp) /* nothing */
363 * Thread-specific data
365 #define tsd_get(k) pthread_getspecific(k)
366 #define tsd_set(k, v) pthread_setspecific(k, v)
367 #define tsd_create(kp, d) pthread_key_create(kp, d)
368 #define tsd_destroy(kp) /* nothing */
371 * kstat creation, installation and deletion
373 extern kstat_t
*kstat_create(const char *, int,
374 const char *, const char *, uchar_t
, ulong_t
, uchar_t
);
375 extern void kstat_install(kstat_t
*);
376 extern void kstat_delete(kstat_t
*);
377 extern void kstat_waitq_enter(kstat_io_t
*);
378 extern void kstat_waitq_exit(kstat_io_t
*);
379 extern void kstat_runq_enter(kstat_io_t
*);
380 extern void kstat_runq_exit(kstat_io_t
*);
381 extern void kstat_waitq_to_runq(kstat_io_t
*);
382 extern void kstat_runq_back_to_waitq(kstat_io_t
*);
383 extern void kstat_set_raw_ops(kstat_t
*ksp
,
384 int (*headers
)(char *buf
, size_t size
),
385 int (*data
)(char *buf
, size_t size
, void *data
),
386 void *(*addr
)(kstat_t
*ksp
, loff_t index
));
391 #define KM_SLEEP UMEM_NOFAIL
392 #define KM_PUSHPAGE KM_SLEEP
393 #define KM_NOSLEEP UMEM_DEFAULT
394 #define KM_NODEBUG 0x0
395 #define KMC_NODEBUG UMC_NODEBUG
398 #define kmem_alloc(_s, _f) umem_alloc(_s, _f)
399 #define kmem_zalloc(_s, _f) umem_zalloc(_s, _f)
400 #define kmem_free(_b, _s) umem_free(_b, _s)
401 #define vmem_alloc(_s, _f) kmem_alloc(_s, _f)
402 #define vmem_zalloc(_s, _f) kmem_zalloc(_s, _f)
403 #define vmem_free(_b, _s) kmem_free(_b, _s)
404 #define kmem_cache_create(_a, _b, _c, _d, _e, _f, _g, _h, _i) \
405 umem_cache_create(_a, _b, _c, _d, _e, _f, _g, _h, _i)
406 #define kmem_cache_destroy(_c) umem_cache_destroy(_c)
407 #define kmem_cache_alloc(_c, _f) umem_cache_alloc(_c, _f)
408 #define kmem_cache_free(_c, _b) umem_cache_free(_c, _b)
409 #define kmem_debugging() 0
410 #define kmem_cache_reap_now(_c) /* nothing */
411 #define kmem_cache_set_move(_c, _cb) /* nothing */
412 #define POINTER_INVALIDATE(_pp) /* nothing */
413 #define POINTER_IS_VALID(_p) 0
415 typedef umem_cache_t kmem_cache_t
;
417 typedef enum kmem_cbrc
{
428 typedef struct taskq taskq_t
;
429 typedef uintptr_t taskqid_t
;
430 typedef void (task_func_t
)(void *);
432 typedef struct taskq_ent
{
433 struct taskq_ent
*tqent_next
;
434 struct taskq_ent
*tqent_prev
;
435 task_func_t
*tqent_func
;
437 uintptr_t tqent_flags
;
440 #define TQENT_FLAG_PREALLOC 0x1 /* taskq_dispatch_ent used */
442 #define TASKQ_PREPOPULATE 0x0001
443 #define TASKQ_CPR_SAFE 0x0002 /* Use CPR safe protocol */
444 #define TASKQ_DYNAMIC 0x0004 /* Use dynamic thread scheduling */
445 #define TASKQ_THREADS_CPU_PCT 0x0008 /* Scale # threads by # cpus */
446 #define TASKQ_DC_BATCH 0x0010 /* Mark threads as batch */
448 #define TQ_SLEEP KM_SLEEP /* Can block for memory */
449 #define TQ_NOSLEEP KM_NOSLEEP /* cannot block for memory; may fail */
450 #define TQ_PUSHPAGE KM_PUSHPAGE /* Cannot perform I/O */
451 #define TQ_NOQUEUE 0x02 /* Do not enqueue if can't dispatch */
452 #define TQ_FRONT 0x08 /* Queue in front */
454 extern taskq_t
*system_taskq
;
456 extern taskq_t
*taskq_create(const char *, int, pri_t
, int, int, uint_t
);
457 #define taskq_create_proc(a, b, c, d, e, p, f) \
458 (taskq_create(a, b, c, d, e, f))
459 #define taskq_create_sysdc(a, b, d, e, p, dc, f) \
460 (taskq_create(a, b, maxclsyspri, d, e, f))
461 extern taskqid_t
taskq_dispatch(taskq_t
*, task_func_t
, void *, uint_t
);
462 extern taskqid_t
taskq_dispatch_delay(taskq_t
*, task_func_t
, void *, uint_t
,
464 extern void taskq_dispatch_ent(taskq_t
*, task_func_t
, void *, uint_t
,
466 extern int taskq_empty_ent(taskq_ent_t
*);
467 extern void taskq_init_ent(taskq_ent_t
*);
468 extern void taskq_destroy(taskq_t
*);
469 extern void taskq_wait(taskq_t
*);
470 extern void taskq_wait_id(taskq_t
*, taskqid_t
);
471 extern int taskq_member(taskq_t
*, kthread_t
*);
472 extern int taskq_cancel_id(taskq_t
*, taskqid_t
);
473 extern void system_taskq_init(void);
474 extern void system_taskq_fini(void);
476 #define XVA_MAPSIZE 3
477 #define XVA_MAGIC 0x78766174
482 typedef struct vnode
{
488 #define AV_SCANSTAMP_SZ 32 /* length of anti-virus scanstamp */
490 typedef struct xoptattr
{
491 timestruc_t xoa_createtime
; /* Create time of file */
494 uint8_t xoa_readonly
;
496 uint8_t xoa_nounlink
;
497 uint8_t xoa_immutable
;
498 uint8_t xoa_appendonly
;
500 uint8_t xoa_settable
;
502 uint8_t xoa_av_quarantined
;
503 uint8_t xoa_av_modified
;
504 uint8_t xoa_av_scanstamp
[AV_SCANSTAMP_SZ
];
510 typedef struct vattr
{
511 uint_t va_mask
; /* bit-mask of attributes */
512 u_offset_t va_size
; /* file size in bytes */
516 typedef struct xvattr
{
517 vattr_t xva_vattr
; /* Embedded vattr structure */
518 uint32_t xva_magic
; /* Magic Number */
519 uint32_t xva_mapsize
; /* Size of attr bitmap (32-bit words) */
520 uint32_t *xva_rtnattrmapp
; /* Ptr to xva_rtnattrmap[] */
521 uint32_t xva_reqattrmap
[XVA_MAPSIZE
]; /* Requested attrs */
522 uint32_t xva_rtnattrmap
[XVA_MAPSIZE
]; /* Returned attrs */
523 xoptattr_t xva_xoptattrs
; /* Optional attributes */
526 typedef struct vsecattr
{
527 uint_t vsa_mask
; /* See below */
528 int vsa_aclcnt
; /* ACL entry count */
529 void *vsa_aclentp
; /* pointer to ACL entries */
530 int vsa_dfaclcnt
; /* default ACL entry count */
531 void *vsa_dfaclentp
; /* pointer to default ACL entries */
532 size_t vsa_aclentsz
; /* ACE size in bytes of vsa_aclentp */
535 #define AT_TYPE 0x00001
536 #define AT_MODE 0x00002
537 #define AT_UID 0x00004
538 #define AT_GID 0x00008
539 #define AT_FSID 0x00010
540 #define AT_NODEID 0x00020
541 #define AT_NLINK 0x00040
542 #define AT_SIZE 0x00080
543 #define AT_ATIME 0x00100
544 #define AT_MTIME 0x00200
545 #define AT_CTIME 0x00400
546 #define AT_RDEV 0x00800
547 #define AT_BLKSIZE 0x01000
548 #define AT_NBLOCKS 0x02000
549 #define AT_SEQ 0x08000
550 #define AT_XVATTR 0x10000
554 extern int fop_getattr(vnode_t
*vp
, vattr_t
*vap
);
556 #define VOP_CLOSE(vp, f, c, o, cr, ct) vn_close(vp)
557 #define VOP_PUTPAGE(vp, of, sz, fl, cr, ct) 0
558 #define VOP_GETATTR(vp, vap, fl, cr, ct) fop_getattr((vp), (vap));
560 #define VOP_FSYNC(vp, f, cr, ct) fsync((vp)->v_fd)
562 #define VN_RELE(vp) vn_close(vp)
564 extern int vn_open(char *path
, int x1
, int oflags
, int mode
, vnode_t
**vpp
,
566 extern int vn_openat(char *path
, int x1
, int oflags
, int mode
, vnode_t
**vpp
,
567 int x2
, int x3
, vnode_t
*vp
, int fd
);
568 extern int vn_rdwr(int uio
, vnode_t
*vp
, void *addr
, ssize_t len
,
569 offset_t offset
, int x1
, int x2
, rlim64_t x3
, void *x4
, ssize_t
*residp
);
570 extern void vn_close(vnode_t
*vp
);
572 #define vn_remove(path, x1, x2) remove(path)
573 #define vn_rename(from, to, seg) rename((from), (to))
574 #define vn_is_readonly(vp) B_FALSE
576 extern vnode_t
*rootdir
;
578 #include <sys/file.h> /* for FREAD, FWRITE, etc */
583 #define ddi_get_lbolt() (gethrtime() >> 23)
584 #define ddi_get_lbolt64() (gethrtime() >> 23)
585 #define hz 119 /* frequency when using gethrtime() >> 23 for lbolt */
587 extern void delay(clock_t ticks
);
589 #define SEC_TO_TICK(sec) ((sec) * hz)
590 #define MSEC_TO_TICK(msec) ((msec) / (MILLISEC / hz))
591 #define USEC_TO_TICK(usec) ((usec) / (MICROSEC / hz))
592 #define NSEC_TO_TICK(usec) ((usec) / (NANOSEC / hz))
594 #define gethrestime_sec() time(NULL)
595 #define gethrestime(t) \
597 (t)->tv_sec = gethrestime_sec();\
603 #define minclsyspri 60
604 #define maxclsyspri 99
606 #define CPU_SEQID (pthread_self() & (max_ncpus - 1))
611 #define ptob(x) ((x) * PAGESIZE)
613 extern uint64_t physmem
;
615 extern int highbit(ulong_t i
);
616 extern int random_get_bytes(uint8_t *ptr
, size_t len
);
617 extern int random_get_pseudo_bytes(uint8_t *ptr
, size_t len
);
619 extern void kernel_init(int);
620 extern void kernel_fini(void);
623 extern void nicenum(uint64_t num
, char *buf
);
624 extern void show_pool_stats(struct spa
*);
626 typedef struct callb_cpr
{
630 #define CALLB_CPR_INIT(cp, lockp, func, name) { \
631 (cp)->cc_lockp = lockp; \
634 #define CALLB_CPR_SAFE_BEGIN(cp) { \
635 ASSERT(MUTEX_HELD((cp)->cc_lockp)); \
638 #define CALLB_CPR_SAFE_END(cp, lockp) { \
639 ASSERT(MUTEX_HELD((cp)->cc_lockp)); \
642 #define CALLB_CPR_EXIT(cp) { \
643 ASSERT(MUTEX_HELD((cp)->cc_lockp)); \
644 mutex_exit((cp)->cc_lockp); \
647 #define zone_dataset_visible(x, y) (1)
648 #define INGLOBALZONE(z) (1)
650 extern char *kmem_vasprintf(const char *fmt
, va_list adx
);
651 extern char *kmem_asprintf(const char *fmt
, ...);
652 #define strfree(str) kmem_free((str), strlen(str) + 1)
655 * Hostname information
657 extern char hw_serial
[]; /* for userland-emulated hostid access */
658 extern int ddi_strtoul(const char *str
, char **nptr
, int base
,
659 unsigned long *result
);
661 extern int ddi_strtoull(const char *str
, char **nptr
, int base
,
662 u_longlong_t
*result
);
664 /* ZFS Boot Related stuff. */
674 typedef struct ace_object
{
676 uint32_t a_access_mask
;
679 uint8_t a_obj_type
[16];
680 uint8_t a_inherit_obj_type
[16];
684 #define ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE 0x05
685 #define ACE_ACCESS_DENIED_OBJECT_ACE_TYPE 0x06
686 #define ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE 0x07
687 #define ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE 0x08
689 extern struct _buf
*kobj_open_file(char *name
);
690 extern int kobj_read_file(struct _buf
*file
, char *buf
, unsigned size
,
692 extern void kobj_close_file(struct _buf
*file
);
693 extern int kobj_get_filesize(struct _buf
*file
, uint64_t *size
);
694 extern int zfs_secpolicy_snapshot_perms(const char *name
, cred_t
*cr
);
695 extern int zfs_secpolicy_rename_perms(const char *from
, const char *to
,
697 extern int zfs_secpolicy_destroy_perms(const char *name
, cred_t
*cr
);
698 extern zoneid_t
getzoneid(void);
701 typedef struct ksiddomain
{
707 ksiddomain_t
*ksid_lookupdomain(const char *);
708 void ksiddomain_rele(ksiddomain_t
*);
710 #define DDI_SLEEP KM_SLEEP
711 #define ddi_log_sysevent(_a, _b, _c, _d, _e, _f, _g) \
712 sysevent_post_event(_c, _d, _b, "libzpool", _e, _f)
714 #define zfs_sleep_until(wakeup) \
716 hrtime_t delta = wakeup - gethrtime(); \
717 struct timespec ts; \
718 ts.tv_sec = delta / NANOSEC; \
719 ts.tv_nsec = delta % NANOSEC; \
720 (void) nanosleep(&ts, NULL); \
725 #endif /* _SYS_ZFS_CONTEXT_H */