]> git.proxmox.com Git - mirror_zfs.git/blame - lib/libspl/include/sys/kstat.h
FreeBSD: Add zfs_link_create() error handling
[mirror_zfs.git] / lib / libspl / include / sys / kstat.h
CommitLineData
a26baf28
BB
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
1d3ba0bf 9 * or https://opensource.org/licenses/CDDL-1.0.
a26baf28
BB
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26#ifndef _SYS_KSTAT_H
27#define _SYS_KSTAT_H
28
29
30
31/*
32 * Definition of general kernel statistics structures and /dev/kstat ioctls
33 */
34
35#include <sys/types.h>
36#include <sys/time.h>
37
38#ifdef __cplusplus
39extern "C" {
40#endif
41
42typedef int kid_t; /* unique kstat id */
43
44/*
45 * Kernel statistics driver (/dev/kstat) ioctls
46 */
47
48#define KSTAT_IOC_BASE ('K' << 8)
49
50#define KSTAT_IOC_CHAIN_ID KSTAT_IOC_BASE | 0x01
51#define KSTAT_IOC_READ KSTAT_IOC_BASE | 0x02
52#define KSTAT_IOC_WRITE KSTAT_IOC_BASE | 0x03
53
54/*
55 * /dev/kstat ioctl usage (kd denotes /dev/kstat descriptor):
56 *
57 * kcid = ioctl(kd, KSTAT_IOC_CHAIN_ID, NULL);
58 * kcid = ioctl(kd, KSTAT_IOC_READ, kstat_t *);
59 * kcid = ioctl(kd, KSTAT_IOC_WRITE, kstat_t *);
60 */
61
761b8ec6 62#define KSTAT_STRLEN 255 /* 254 chars + NULL; must be 16 * n - 1 */
a26baf28
BB
63
64/*
65 * The generic kstat header
66 */
67
68typedef struct kstat {
69 /*
70 * Fields relevant to both kernel and user
71 */
72 hrtime_t ks_crtime; /* creation time (from gethrtime()) */
73 struct kstat *ks_next; /* kstat chain linkage */
74 kid_t ks_kid; /* unique kstat ID */
75 char ks_module[KSTAT_STRLEN]; /* provider module name */
76 uchar_t ks_resv; /* reserved, currently just padding */
77 int ks_instance; /* provider module's instance */
78 char ks_name[KSTAT_STRLEN]; /* kstat name */
79 uchar_t ks_type; /* kstat data type */
80 char ks_class[KSTAT_STRLEN]; /* kstat class */
81 uchar_t ks_flags; /* kstat flags */
82 void *ks_data; /* kstat type-specific data */
83 uint_t ks_ndata; /* # of type-specific data records */
84 size_t ks_data_size; /* total size of kstat data section */
78595377 85 hrtime_t ks_snaptime; /* time of last data snapshot */
a26baf28
BB
86 /*
87 * Fields relevant to kernel only
88 */
89 int (*ks_update)(struct kstat *, int); /* dynamic update */
90 void *ks_private; /* arbitrary provider-private data */
91 int (*ks_snapshot)(struct kstat *, void *, int);
92 void *ks_lock; /* protects this kstat's data */
93} kstat_t;
94
a26baf28
BB
95/*
96 * kstat structure and locking strategy
97 *
98 * Each kstat consists of a header section (a kstat_t) and a data section.
99 * The system maintains a set of kstats, protected by kstat_chain_lock.
100 * kstat_chain_lock protects all additions to/deletions from this set,
101 * as well as all changes to kstat headers. kstat data sections are
102 * *optionally* protected by the per-kstat ks_lock. If ks_lock is non-NULL,
103 * kstat clients (e.g. /dev/kstat) will acquire this lock for all of their
104 * operations on that kstat. It is up to the kstat provider to decide whether
105 * guaranteeing consistent data to kstat clients is sufficiently important
106 * to justify the locking cost. Note, however, that most statistic updates
107 * already occur under one of the provider's mutexes, so if the provider sets
108 * ks_lock to point to that mutex, then kstat data locking is free.
109 *
110 * NOTE: variable-size kstats MUST employ kstat data locking, to prevent
111 * data-size races with kstat clients.
112 *
113 * NOTE: ks_lock is really of type (kmutex_t *); it is declared as (void *)
114 * in the kstat header so that users don't have to be exposed to all of the
115 * kernel's lock-related data structures.
116 */
117
118#if defined(_KERNEL)
119
120#define KSTAT_ENTER(k) \
121 { kmutex_t *lp = (k)->ks_lock; if (lp) mutex_enter(lp); }
122
123#define KSTAT_EXIT(k) \
124 { kmutex_t *lp = (k)->ks_lock; if (lp) mutex_exit(lp); }
125
126#define KSTAT_UPDATE(k, rw) (*(k)->ks_update)((k), (rw))
127
128#define KSTAT_SNAPSHOT(k, buf, rw) (*(k)->ks_snapshot)((k), (buf), (rw))
129
130#endif /* defined(_KERNEL) */
131
132/*
133 * kstat time
134 *
135 * All times associated with kstats (e.g. creation time, snapshot time,
136 * kstat_timer_t and kstat_io_t timestamps, etc.) are 64-bit nanosecond values,
137 * as returned by gethrtime(). The accuracy of these timestamps is machine
138 * dependent, but the precision (units) is the same across all platforms.
139 */
140
141/*
142 * kstat identity (KID)
143 *
144 * Each kstat is assigned a unique KID (kstat ID) when it is added to the
145 * global kstat chain. The KID is used as a cookie by /dev/kstat to
146 * request information about the corresponding kstat. There is also
147 * an identity associated with the entire kstat chain, kstat_chain_id,
148 * which is bumped each time a kstat is added or deleted. /dev/kstat uses
149 * the chain ID to detect changes in the kstat chain (e.g., a new disk
150 * coming online) between ioctl()s.
151 */
152
153/*
154 * kstat module, kstat instance
155 *
156 * ks_module and ks_instance contain the name and instance of the module
157 * that created the kstat. In cases where there can only be one instance,
158 * ks_instance is 0. The kernel proper (/kernel/unix) uses "unix" as its
159 * module name.
160 */
161
162/*
163 * kstat name
164 *
165 * ks_name gives a meaningful name to a kstat. The full kstat namespace
166 * is module.instance.name, so the name only need be unique within a
167 * module. kstat_create() will fail if you try to create a kstat with
168 * an already-used (ks_module, ks_instance, ks_name) triplet. Spaces are
169 * allowed in kstat names, but strongly discouraged, since they hinder
170 * awk-style processing at user level.
171 */
172
173/*
174 * kstat type
175 *
176 * The kstat mechanism provides several flavors of kstat data, defined
177 * below. The "raw" kstat type is just treated as an array of bytes; you
178 * can use this to export any kind of data you want.
179 *
180 * Some kstat types allow multiple data structures per kstat, e.g.
181 * KSTAT_TYPE_NAMED; others do not. This is part of the spec for each
182 * kstat data type.
183 *
184 * User-level tools should *not* rely on the #define KSTAT_NUM_TYPES. To
185 * get this information, read out the standard system kstat "kstat_types".
186 */
187
188#define KSTAT_TYPE_RAW 0 /* can be anything */
189 /* ks_ndata >= 1 */
190#define KSTAT_TYPE_NAMED 1 /* name/value pair */
191 /* ks_ndata >= 1 */
192#define KSTAT_TYPE_INTR 2 /* interrupt statistics */
193 /* ks_ndata == 1 */
194#define KSTAT_TYPE_IO 3 /* I/O statistics */
195 /* ks_ndata == 1 */
196#define KSTAT_TYPE_TIMER 4 /* event timer */
197 /* ks_ndata >= 1 */
198
76463d40 199#define KSTAT_NUM_TYPES 5
a26baf28
BB
200
201/*
202 * kstat class
203 *
204 * Each kstat can be characterized as belonging to some broad class
205 * of statistics, e.g. disk, tape, net, vm, streams, etc. This field
206 * can be used as a filter to extract related kstats. The following
207 * values are currently in use: disk, tape, net, controller, vm, kvm,
208 * hat, streams, kstat, and misc. (The kstat class encompasses things
209 * like kstat_types.)
210 */
211
212/*
213 * kstat flags
214 *
215 * Any of the following flags may be passed to kstat_create(). They are
216 * all zero by default.
217 *
218 * KSTAT_FLAG_VIRTUAL:
219 *
220 * Tells kstat_create() not to allocate memory for the
221 * kstat data section; instead, you will set the ks_data
222 * field to point to the data you wish to export. This
223 * provides a convenient way to export existing data
224 * structures.
225 *
226 * KSTAT_FLAG_VAR_SIZE:
227 *
228 * The size of the kstat you are creating will vary over time.
229 * For example, you may want to use the kstat mechanism to
230 * export a linked list. NOTE: The kstat framework does not
231 * manage the data section, so all variable-size kstats must be
232 * virtual kstats. Moreover, variable-size kstats MUST employ
233 * kstat data locking to prevent data-size races with kstat
234 * clients. See the section on "kstat snapshot" for details.
235 *
236 * KSTAT_FLAG_WRITABLE:
237 *
238 * Makes the kstat's data section writable by root.
239 * The ks_snapshot routine (see below) does not need to check for
240 * this; permission checking is handled in the kstat driver.
241 *
242 * KSTAT_FLAG_PERSISTENT:
243 *
244 * Indicates that this kstat is to be persistent over time.
245 * For persistent kstats, kstat_delete() simply marks the
246 * kstat as dormant; a subsequent kstat_create() reactivates
247 * the kstat. This feature is provided so that statistics
248 * are not lost across driver close/open (e.g., raw disk I/O
249 * on a disk with no mounted partitions.)
250 * NOTE: Persistent kstats cannot be virtual, since ks_data
251 * points to garbage as soon as the driver goes away.
252 *
253 * The following flags are maintained by the kstat framework:
254 *
255 * KSTAT_FLAG_DORMANT:
256 *
257 * For persistent kstats, indicates that the kstat is in the
258 * dormant state (e.g., the corresponding device is closed).
259 *
260 * KSTAT_FLAG_INVALID:
261 *
262 * This flag is set when a kstat is in a transitional state,
263 * e.g. between kstat_create() and kstat_install().
264 * kstat clients must not attempt to access the kstat's data
265 * if this flag is set.
266 */
267
268#define KSTAT_FLAG_VIRTUAL 0x01
269#define KSTAT_FLAG_VAR_SIZE 0x02
270#define KSTAT_FLAG_WRITABLE 0x04
271#define KSTAT_FLAG_PERSISTENT 0x08
272#define KSTAT_FLAG_DORMANT 0x10
273#define KSTAT_FLAG_INVALID 0x20
f0ed6c74
TH
274#define KSTAT_FLAG_LONGSTRINGS 0x40
275#define KSTAT_FLAG_NO_HEADERS 0x80
a26baf28
BB
276
277/*
278 * Dynamic update support
279 *
280 * The kstat mechanism allows for an optional ks_update function to update
281 * kstat data. This is useful for drivers where the underlying device
282 * keeps cheap hardware stats, but extraction is expensive. Instead of
283 * constantly keeping the kstat data section up to date, you can supply a
284 * ks_update function which updates the kstat's data section on demand.
285 * To take advantage of this feature, simply set the ks_update field before
286 * calling kstat_install().
287 *
288 * The ks_update function, if supplied, must have the following structure:
289 *
290 * int
291 * foo_kstat_update(kstat_t *ksp, int rw)
292 * {
293 * if (rw == KSTAT_WRITE) {
294 * ... update the native stats from ksp->ks_data;
295 * return EACCES if you don't support this
296 * } else {
297 * ... update ksp->ks_data from the native stats
298 * }
299 * }
300 *
301 * The ks_update return codes are: 0 for success, EACCES if you don't allow
302 * KSTAT_WRITE, and EIO for any other type of error.
303 *
304 * In general, the ks_update function may need to refer to provider-private
305 * data; for example, it may need a pointer to the provider's raw statistics.
306 * The ks_private field is available for this purpose. Its use is entirely
307 * at the provider's discretion.
308 *
309 * All variable-size kstats MUST supply a ks_update routine, which computes
310 * and sets ks_data_size (and ks_ndata if that is meaningful), since these
311 * are needed to perform kstat snapshots (see below).
312 *
313 * No kstat locking should be done inside the ks_update routine. The caller
314 * will already be holding the kstat's ks_lock (to ensure consistent data).
315 */
316
317#define KSTAT_READ 0
318#define KSTAT_WRITE 1
319
320/*
321 * Kstat snapshot
322 *
323 * In order to get a consistent view of a kstat's data, clients must obey
324 * the kstat's locking strategy. However, these clients may need to perform
325 * operations on the data which could cause a fault (e.g. copyout()), or
326 * operations which are simply expensive. Doing so could cause deadlock
327 * (e.g. if you're holding a disk's kstat lock which is ultimately required
328 * to resolve a copyout() fault), performance degradation (since the providers'
329 * activity is serialized at the kstat lock), device timing problems, etc.
330 *
331 * To avoid these problems, kstat data is provided via snapshots. Taking
332 * a snapshot is a simple process: allocate a wired-down kernel buffer,
333 * acquire the kstat's data lock, copy the data into the buffer ("take the
334 * snapshot"), and release the lock. This ensures that the kstat's data lock
335 * will be held as briefly as possible, and that no faults will occur while
336 * the lock is held.
337 *
338 * Normally, the snapshot is taken by default_kstat_snapshot(), which
339 * timestamps the data (sets ks_snaptime), copies it, and does a little
340 * massaging to deal with incomplete transactions on i/o kstats. However,
341 * this routine only works for kstats with contiguous data (the typical case).
342 * If you create a kstat whose data is, say, a linked list, you must provide
343 * your own ks_snapshot routine. The routine you supply must have the
344 * following prototype (replace "foo" with something appropriate):
345 *
346 * int foo_kstat_snapshot(kstat_t *ksp, void *buf, int rw);
347 *
348 * The minimal snapshot routine -- one which copies contiguous data that
349 * doesn't need any massaging -- would be this:
350 *
351 * ksp->ks_snaptime = gethrtime();
352 * if (rw == KSTAT_WRITE)
861166b0 353 * memcpy(ksp->ks_data, buf, ksp->ks_data_size);
a26baf28 354 * else
861166b0 355 * memcpy(buf, ksp->ks_data, ksp->ks_data_size);
a26baf28
BB
356 * return (0);
357 *
358 * A more illuminating example is taking a snapshot of a linked list:
359 *
360 * ksp->ks_snaptime = gethrtime();
361 * if (rw == KSTAT_WRITE)
362 * return (EACCES); ... See below ...
363 * for (foo = first_foo; foo; foo = foo->next) {
861166b0 364 * memcpy(buf, foo, sizeof (struct foo));
a26baf28
BB
365 * buf = ((struct foo *) buf) + 1;
366 * }
367 * return (0);
368 *
369 * In the example above, we have decided that we don't want to allow
370 * KSTAT_WRITE access, so we return EACCES if this is attempted.
371 *
372 * The key points are:
373 *
374 * (1) ks_snaptime must be set (via gethrtime()) to timestamp the data.
375 * (2) Data gets copied from the kstat to the buffer on KSTAT_READ,
376 * and from the buffer to the kstat on KSTAT_WRITE.
377 * (3) ks_snapshot return values are: 0 for success, EACCES if you
378 * don't allow KSTAT_WRITE, and EIO for any other type of error.
379 *
380 * Named kstats (see section on "Named statistics" below) containing long
381 * strings (KSTAT_DATA_STRING) need special handling. The kstat driver
382 * assumes that all strings are copied into the buffer after the array of
383 * named kstats, and the pointers (KSTAT_NAMED_STR_PTR()) are updated to point
384 * into the copy within the buffer. The default snapshot routine does this,
385 * but overriding routines should contain at least the following:
386 *
387 * if (rw == KSTAT_READ) {
388 * kstat_named_t *knp = buf;
389 * char *end = knp + ksp->ks_ndata;
390 * uint_t i;
391 *
392 * ... Do the regular copy ...
861166b0 393 * memcpy(buf, ksp->ks_data, sizeof (kstat_named_t) * ksp->ks_ndata);
a26baf28
BB
394 *
395 * for (i = 0; i < ksp->ks_ndata; i++, knp++) {
396 * if (knp[i].data_type == KSTAT_DATA_STRING &&
397 * KSTAT_NAMED_STR_PTR(knp) != NULL) {
861166b0 398 * memcpy(end, KSTAT_NAMED_STR_PTR(knp),
a26baf28
BB
399 * KSTAT_NAMED_STR_BUFLEN(knp));
400 * KSTAT_NAMED_STR_PTR(knp) = end;
401 * end += KSTAT_NAMED_STR_BUFLEN(knp);
402 * }
403 * }
404 */
405
406/*
407 * Named statistics.
408 *
409 * List of arbitrary name=value statistics.
410 */
411
412typedef struct kstat_named {
413 char name[KSTAT_STRLEN]; /* name of counter */
414 uchar_t data_type; /* data type */
415 union {
416 char c[16]; /* enough for 128-bit ints */
417 int32_t i32;
418 uint32_t ui32;
419 struct {
420 union {
421 char *ptr; /* NULL-term string */
422#if defined(_KERNEL) && defined(_MULTI_DATAMODEL)
423 caddr32_t ptr32;
424#endif
425 char __pad[8]; /* 64-bit padding */
426 } addr;
427 uint32_t len; /* # bytes for strlen + '\0' */
428 } str;
429/*
430 * The int64_t and uint64_t types are not valid for a maximally conformant
431 * 32-bit compilation environment (cc -Xc) using compilers prior to the
432 * introduction of C99 conforming compiler (reference ISO/IEC 9899:1990).
433 * In these cases, the visibility of i64 and ui64 is only permitted for
434 * 64-bit compilation environments or 32-bit non-maximally conformant
435 * C89 or C90 ANSI C compilation environments (cc -Xt and cc -Xa). In the
436 * C99 ANSI C compilation environment, the long long type is supported.
6b575417 437 * The _INT64_TYPE is defined by the implementation (see sys/inttypes.h).
a26baf28
BB
438 */
439#if defined(_INT64_TYPE)
440 int64_t i64;
441 uint64_t ui64;
442#endif
443 long l;
444 ulong_t ul;
445
446 /* These structure members are obsolete */
447
448 longlong_t ll;
449 u_longlong_t ull;
450 float f;
451 double d;
452 } value; /* value of counter */
453} kstat_named_t;
454
455#define KSTAT_DATA_CHAR 0
456#define KSTAT_DATA_INT32 1
457#define KSTAT_DATA_UINT32 2
458#define KSTAT_DATA_INT64 3
459#define KSTAT_DATA_UINT64 4
460
461#if !defined(_LP64)
462#define KSTAT_DATA_LONG KSTAT_DATA_INT32
463#define KSTAT_DATA_ULONG KSTAT_DATA_UINT32
464#else
465#if !defined(_KERNEL)
466#define KSTAT_DATA_LONG KSTAT_DATA_INT64
467#define KSTAT_DATA_ULONG KSTAT_DATA_UINT64
468#else
469#define KSTAT_DATA_LONG 7 /* only visible to the kernel */
470#define KSTAT_DATA_ULONG 8 /* only visible to the kernel */
471#endif /* !_KERNEL */
472#endif /* !_LP64 */
473
474/*
475 * Statistics exporting named kstats with long strings (KSTAT_DATA_STRING)
476 * may not make the assumption that ks_data_size is equal to (ks_ndata * sizeof
477 * (kstat_named_t)). ks_data_size in these cases is equal to the sum of the
478 * amount of space required to store the strings (ie, the sum of
479 * KSTAT_NAMED_STR_BUFLEN() for all KSTAT_DATA_STRING statistics) plus the
480 * space required to store the kstat_named_t's.
481 *
482 * The default update routine will update ks_data_size automatically for
483 * variable-length kstats containing long strings (using the default update
484 * routine only makes sense if the string is the only thing that is changing
485 * in size, and ks_ndata is constant). Fixed-length kstats containing long
486 * strings must explicitly change ks_data_size (after creation but before
487 * initialization) to reflect the correct amount of space required for the
488 * long strings and the kstat_named_t's.
489 */
490#define KSTAT_DATA_STRING 9
491
492/* These types are obsolete */
493
494#define KSTAT_DATA_LONGLONG KSTAT_DATA_INT64
495#define KSTAT_DATA_ULONGLONG KSTAT_DATA_UINT64
496#define KSTAT_DATA_FLOAT 5
497#define KSTAT_DATA_DOUBLE 6
498
499#define KSTAT_NAMED_PTR(kptr) ((kstat_named_t *)(kptr)->ks_data)
500
501/*
502 * Retrieve the pointer of the string contained in the given named kstat.
503 */
504#define KSTAT_NAMED_STR_PTR(knptr) ((knptr)->value.str.addr.ptr)
505
506/*
507 * Retrieve the length of the buffer required to store the string in the given
508 * named kstat.
509 */
510#define KSTAT_NAMED_STR_BUFLEN(knptr) ((knptr)->value.str.len)
511
512/*
513 * Interrupt statistics.
514 *
515 * An interrupt is a hard interrupt (sourced from the hardware device
516 * itself), a soft interrupt (induced by the system via the use of
517 * some system interrupt source), a watchdog interrupt (induced by
518 * a periodic timer call), spurious (an interrupt entry point was
519 * entered but there was no interrupt condition to service),
520 * or multiple service (an interrupt condition was detected and
521 * serviced just prior to returning from any of the other types).
522 *
523 * Measurement of the spurious class of interrupts is useful for
524 * autovectored devices in order to pinpoint any interrupt latency
525 * problems in a particular system configuration.
526 *
527 * Devices that have more than one interrupt of the same
528 * type should use multiple structures.
529 */
530
531#define KSTAT_INTR_HARD 0
532#define KSTAT_INTR_SOFT 1
533#define KSTAT_INTR_WATCHDOG 2
534#define KSTAT_INTR_SPURIOUS 3
535#define KSTAT_INTR_MULTSVC 4
536
537#define KSTAT_NUM_INTRS 5
538
539typedef struct kstat_intr {
540 uint_t intrs[KSTAT_NUM_INTRS]; /* interrupt counters */
541} kstat_intr_t;
542
543#define KSTAT_INTR_PTR(kptr) ((kstat_intr_t *)(kptr)->ks_data)
544
545/*
546 * I/O statistics.
547 */
548
549typedef struct kstat_io {
550
551 /*
552 * Basic counters.
553 *
554 * The counters should be updated at the end of service
555 * (e.g., just prior to calling biodone()).
556 */
557
558 u_longlong_t nread; /* number of bytes read */
559 u_longlong_t nwritten; /* number of bytes written */
560 uint_t reads; /* number of read operations */
561 uint_t writes; /* number of write operations */
562
563 /*
564 * Accumulated time and queue length statistics.
565 *
566 * Accumulated time statistics are kept as a running sum
567 * of "active" time. Queue length statistics are kept as a
568 * running sum of the product of queue length and elapsed time
569 * at that length -- i.e., a Riemann sum for queue length
570 * integrated against time. (You can also think of the active time
571 * as a Riemann sum, for the boolean function (queue_length > 0)
572 * integrated against time, or you can think of it as the
573 * Lebesgue measure of the set on which queue_length > 0.)
574 *
575 * ^
576 * | _________
577 * 8 | i4 |
578 * | | |
579 * Queue 6 | |
580 * Length | _________ | |
581 * 4 | i2 |_______| |
582 * | | i3 |
583 * 2_______| |
584 * | i1 |
585 * |_______________________________|
586 * Time-> t1 t2 t3 t4
587 *
588 * At each change of state (entry or exit from the queue),
589 * we add the elapsed time (since the previous state change)
590 * to the active time if the queue length was non-zero during
591 * that interval; and we add the product of the elapsed time
592 * times the queue length to the running length*time sum.
593 *
594 * This method is generalizable to measuring residency
595 * in any defined system: instead of queue lengths, think
596 * of "outstanding RPC calls to server X".
597 *
598 * A large number of I/O subsystems have at least two basic
599 * "lists" of transactions they manage: one for transactions
600 * that have been accepted for processing but for which processing
601 * has yet to begin, and one for transactions which are actively
602 * being processed (but not done). For this reason, two cumulative
603 * time statistics are defined here: wait (pre-service) time,
604 * and run (service) time.
605 *
606 * All times are 64-bit nanoseconds (hrtime_t), as returned by
607 * gethrtime().
608 *
609 * The units of cumulative busy time are accumulated nanoseconds.
610 * The units of cumulative length*time products are elapsed time
611 * times queue length.
612 *
613 * Updates to the fields below are performed implicitly by calls to
614 * these five functions:
615 *
616 * kstat_waitq_enter()
617 * kstat_waitq_exit()
618 * kstat_runq_enter()
619 * kstat_runq_exit()
620 *
621 * kstat_waitq_to_runq() (see below)
622 * kstat_runq_back_to_waitq() (see below)
623 *
624 * Since kstat_waitq_exit() is typically followed immediately
625 * by kstat_runq_enter(), there is a single kstat_waitq_to_runq()
626 * function which performs both operations. This is a performance
627 * win since only one timestamp is required.
628 *
629 * In some instances, it may be necessary to move a request from
630 * the run queue back to the wait queue, e.g. for write throttling.
631 * For these situations, call kstat_runq_back_to_waitq().
632 *
633 * These fields should never be updated by any other means.
634 */
635
636 hrtime_t wtime; /* cumulative wait (pre-service) time */
637 hrtime_t wlentime; /* cumulative wait length*time product */
638 hrtime_t wlastupdate; /* last time wait queue changed */
639 hrtime_t rtime; /* cumulative run (service) time */
640 hrtime_t rlentime; /* cumulative run length*time product */
641 hrtime_t rlastupdate; /* last time run queue changed */
642
643 uint_t wcnt; /* count of elements in wait state */
644 uint_t rcnt; /* count of elements in run state */
645
646} kstat_io_t;
647
648#define KSTAT_IO_PTR(kptr) ((kstat_io_t *)(kptr)->ks_data)
649
650/*
651 * Event timer statistics - cumulative elapsed time and number of events.
652 *
653 * Updates to these fields are performed implicitly by calls to
654 * kstat_timer_start() and kstat_timer_stop().
655 */
656
657typedef struct kstat_timer {
658 char name[KSTAT_STRLEN]; /* event name */
659 uchar_t resv; /* reserved */
660 u_longlong_t num_events; /* number of events */
661 hrtime_t elapsed_time; /* cumulative elapsed time */
662 hrtime_t min_time; /* shortest event duration */
663 hrtime_t max_time; /* longest event duration */
664 hrtime_t start_time; /* previous event start time */
665 hrtime_t stop_time; /* previous event stop time */
666} kstat_timer_t;
667
668#define KSTAT_TIMER_PTR(kptr) ((kstat_timer_t *)(kptr)->ks_data)
669
670#if defined(_KERNEL)
671
672#include <sys/t_lock.h>
673
674extern kid_t kstat_chain_id; /* bumped at each state change */
675extern void kstat_init(void); /* initialize kstat framework */
676
677/*
678 * Adding and deleting kstats.
679 *
680 * The typical sequence to add a kstat is:
681 *
682 * ksp = kstat_create(module, instance, name, class, type, ndata, flags);
683 * if (ksp) {
684 * ... provider initialization, if necessary
685 * kstat_install(ksp);
686 * }
687 *
688 * There are three logically distinct steps here:
689 *
690 * Step 1: System Initialization (kstat_create)
691 *
692 * kstat_create() performs system initialization. kstat_create()
693 * allocates memory for the entire kstat (header plus data), initializes
694 * all header fields, initializes the data section to all zeroes, assigns
695 * a unique KID, and puts the kstat onto the system's kstat chain.
696 * The returned kstat is marked invalid (KSTAT_FLAG_INVALID is set),
697 * because the provider (caller) has not yet had a chance to initialize
698 * the data section.
699 *
700 * By default, kstats are exported to all zones on the system. A kstat may be
701 * created via kstat_create_zone() to specify a zone to which the statistics
702 * should be exported. kstat_zone_add() may be used to specify additional
703 * zones to which the statistics are to be exported.
704 *
705 * Step 2: Provider Initialization
706 *
707 * The provider performs any necessary initialization of the data section,
708 * e.g. setting the name fields in a KSTAT_TYPE_NAMED. Virtual kstats set
709 * the ks_data field at this time. The provider may also set the ks_update,
710 * ks_snapshot, ks_private, and ks_lock fields if necessary.
711 *
712 * Step 3: Installation (kstat_install)
713 *
714 * Once the kstat is completely initialized, kstat_install() clears the
715 * INVALID flag, thus making the kstat accessible to the outside world.
716 * kstat_install() also clears the DORMANT flag for persistent kstats.
717 *
718 * Removing a kstat from the system
719 *
720 * kstat_delete(ksp) removes ksp from the kstat chain and frees all
721 * associated system resources. NOTE: When you call kstat_delete(),
722 * you must NOT be holding that kstat's ks_lock. Otherwise, you may
723 * deadlock with a kstat reader.
724 *
725 * Persistent kstats
726 *
727 * From the provider's point of view, persistence is transparent. The only
728 * difference between ephemeral (normal) kstats and persistent kstats
729 * is that you pass KSTAT_FLAG_PERSISTENT to kstat_create(). Magically,
730 * this has the effect of making your data visible even when you're
731 * not home. Persistence is important to tools like iostat, which want
732 * to get a meaningful picture of disk activity. Without persistence,
733 * raw disk i/o statistics could never accumulate: they would come and
734 * go with each open/close of the raw device.
735 *
736 * The magic of persistence works by slightly altering the behavior of
737 * kstat_create() and kstat_delete(). The first call to kstat_create()
738 * creates a new kstat, as usual. However, kstat_delete() does not
739 * actually delete the kstat: it performs one final update of the data
740 * (i.e., calls the ks_update routine), marks the kstat as dormant, and
741 * sets the ks_lock, ks_update, ks_private, and ks_snapshot fields back
742 * to their default values (since they might otherwise point to garbage,
743 * e.g. if the provider is going away). kstat clients can still access
744 * the dormant kstat just like a live kstat; they just continue to see
745 * the final data values as long as the kstat remains dormant.
746 * All subsequent kstat_create() calls simply find the already-existing,
747 * dormant kstat and return a pointer to it, without altering any fields.
748 * The provider then performs its usual initialization sequence, and
749 * calls kstat_install(). kstat_install() uses the old data values to
750 * initialize the native data (i.e., ks_update is called with KSTAT_WRITE),
751 * thus making it seem like you were never gone.
752 */
753
754extern kstat_t *kstat_create(const char *, int, const char *, const char *,
755 uchar_t, uint_t, uchar_t);
756extern kstat_t *kstat_create_zone(const char *, int, const char *,
757 const char *, uchar_t, uint_t, uchar_t, zoneid_t);
758extern void kstat_install(kstat_t *);
759extern void kstat_delete(kstat_t *);
760extern void kstat_named_setstr(kstat_named_t *knp, const char *src);
761extern void kstat_set_string(char *, const char *);
762extern void kstat_delete_byname(const char *, int, const char *);
763extern void kstat_delete_byname_zone(const char *, int, const char *, zoneid_t);
764extern void kstat_named_init(kstat_named_t *, const char *, uchar_t);
765extern void kstat_timer_init(kstat_timer_t *, const char *);
a26baf28
BB
766extern void kstat_timer_start(kstat_timer_t *);
767extern void kstat_timer_stop(kstat_timer_t *);
768
769extern void kstat_zone_add(kstat_t *, zoneid_t);
770extern void kstat_zone_remove(kstat_t *, zoneid_t);
771extern int kstat_zone_find(kstat_t *, zoneid_t);
772
773extern kstat_t *kstat_hold_bykid(kid_t kid, zoneid_t);
774extern kstat_t *kstat_hold_byname(const char *, int, const char *, zoneid_t);
775extern void kstat_rele(kstat_t *);
776
777#endif /* defined(_KERNEL) */
778
779#ifdef __cplusplus
780}
781#endif
782
783#endif /* _SYS_KSTAT_H */