]>
Commit | Line | Data |
---|---|---|
34dc7c2f BB |
1 | /* |
2 | * CDDL HEADER START | |
3 | * | |
4 | * The contents of this file are subject to the terms of the | |
b128c09f BB |
5 | * Common Development and Distribution License (the "License"). |
6 | * You may not use this file except in compliance with the License. | |
34dc7c2f BB |
7 | * |
8 | * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | |
9 | * or http://www.opensolaris.org/os/licensing. | |
10 | * See the License for the specific language governing permissions | |
11 | * and limitations under the License. | |
12 | * | |
13 | * When distributing Covered Code, include this CDDL HEADER in each | |
14 | * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | |
15 | * If applicable, add the following below this CDDL HEADER, with the | |
16 | * fields enclosed by brackets "[]" replaced with your own identifying | |
17 | * information: Portions Copyright [yyyy] [name of copyright owner] | |
18 | * | |
19 | * CDDL HEADER END | |
20 | */ | |
b128c09f | 21 | |
34dc7c2f | 22 | /* |
b128c09f | 23 | * Copyright 2008 Sun Microsystems, Inc. All rights reserved. |
34dc7c2f BB |
24 | * Use is subject to license terms. |
25 | */ | |
34dc7c2f | 26 | |
b128c09f | 27 | #pragma ident "%Z%%M% %I% %E% SMI" |
34dc7c2f | 28 | |
b128c09f | 29 | /* |
34dc7c2f BB |
30 | * based on usr/src/uts/common/os/kmem.c r1.64 from 2001/12/18 |
31 | * | |
32 | * The slab allocator, as described in the following two papers: | |
33 | * | |
34 | * Jeff Bonwick, | |
35 | * The Slab Allocator: An Object-Caching Kernel Memory Allocator. | |
36 | * Proceedings of the Summer 1994 Usenix Conference. | |
37 | * Available as /shared/sac/PSARC/1994/028/materials/kmem.pdf. | |
38 | * | |
39 | * Jeff Bonwick and Jonathan Adams, | |
40 | * Magazines and vmem: Extending the Slab Allocator to Many CPUs and | |
41 | * Arbitrary Resources. | |
42 | * Proceedings of the 2001 Usenix Conference. | |
43 | * Available as /shared/sac/PSARC/2000/550/materials/vmem.pdf. | |
44 | * | |
45 | * 1. Overview | |
46 | * ----------- | |
47 | * umem is very close to kmem in implementation. There are four major | |
48 | * areas of divergence: | |
49 | * | |
50 | * * Initialization | |
51 | * | |
52 | * * CPU handling | |
53 | * | |
54 | * * umem_update() | |
55 | * | |
56 | * * KM_SLEEP v.s. UMEM_NOFAIL | |
57 | * | |
b128c09f | 58 | * * lock ordering |
34dc7c2f BB |
59 | * |
60 | * 2. Initialization | |
61 | * ----------------- | |
62 | * kmem is initialized early on in boot, and knows that no one will call | |
63 | * into it before it is ready. umem does not have these luxuries. Instead, | |
64 | * initialization is divided into two phases: | |
65 | * | |
66 | * * library initialization, and | |
67 | * | |
68 | * * first use | |
69 | * | |
70 | * umem's full initialization happens at the time of the first allocation | |
71 | * request (via malloc() and friends, umem_alloc(), or umem_zalloc()), | |
72 | * or the first call to umem_cache_create(). | |
73 | * | |
74 | * umem_free(), and umem_cache_alloc() do not require special handling, | |
75 | * since the only way to get valid arguments for them is to successfully | |
76 | * call a function from the first group. | |
77 | * | |
78 | * 2.1. Library Initialization: umem_startup() | |
79 | * ------------------------------------------- | |
80 | * umem_startup() is libumem.so's .init section. It calls pthread_atfork() | |
81 | * to install the handlers necessary for umem's Fork1-Safety. Because of | |
82 | * race condition issues, all other pre-umem_init() initialization is done | |
83 | * statically (i.e. by the dynamic linker). | |
84 | * | |
85 | * For standalone use, umem_startup() returns everything to its initial | |
86 | * state. | |
87 | * | |
88 | * 2.2. First use: umem_init() | |
89 | * ------------------------------ | |
90 | * The first time any memory allocation function is used, we have to | |
91 | * create the backing caches and vmem arenas which are needed for it. | |
92 | * umem_init() is the central point for that task. When it completes, | |
93 | * umem_ready is either UMEM_READY (all set) or UMEM_READY_INIT_FAILED (unable | |
94 | * to initialize, probably due to lack of memory). | |
95 | * | |
96 | * There are four different paths from which umem_init() is called: | |
97 | * | |
98 | * * from umem_alloc() or umem_zalloc(), with 0 < size < UMEM_MAXBUF, | |
99 | * | |
100 | * * from umem_alloc() or umem_zalloc(), with size > UMEM_MAXBUF, | |
101 | * | |
102 | * * from umem_cache_create(), and | |
103 | * | |
104 | * * from memalign(), with align > UMEM_ALIGN. | |
105 | * | |
106 | * The last three just check if umem is initialized, and call umem_init() | |
107 | * if it is not. For performance reasons, the first case is more complicated. | |
108 | * | |
109 | * 2.2.1. umem_alloc()/umem_zalloc(), with 0 < size < UMEM_MAXBUF | |
110 | * ----------------------------------------------------------------- | |
111 | * In this case, umem_cache_alloc(&umem_null_cache, ...) is called. | |
112 | * There is special case code in which causes any allocation on | |
113 | * &umem_null_cache to fail by returning (NULL), regardless of the | |
114 | * flags argument. | |
115 | * | |
116 | * So umem_cache_alloc() returns NULL, and umem_alloc()/umem_zalloc() call | |
117 | * umem_alloc_retry(). umem_alloc_retry() sees that the allocation | |
118 | * was agains &umem_null_cache, and calls umem_init(). | |
119 | * | |
120 | * If initialization is successful, umem_alloc_retry() returns 1, which | |
121 | * causes umem_alloc()/umem_zalloc() to start over, which causes it to load | |
122 | * the (now valid) cache pointer from umem_alloc_table. | |
123 | * | |
124 | * 2.2.2. Dealing with race conditions | |
125 | * ----------------------------------- | |
126 | * There are a couple race conditions resulting from the initialization | |
127 | * code that we have to guard against: | |
128 | * | |
129 | * * In umem_cache_create(), there is a special UMC_INTERNAL cflag | |
130 | * that is passed for caches created during initialization. It | |
131 | * is illegal for a user to try to create a UMC_INTERNAL cache. | |
132 | * This allows initialization to proceed, but any other | |
133 | * umem_cache_create()s will block by calling umem_init(). | |
134 | * | |
135 | * * Since umem_null_cache has a 1-element cache_cpu, it's cache_cpu_mask | |
136 | * is always zero. umem_cache_alloc uses cp->cache_cpu_mask to | |
137 | * mask the cpu number. This prevents a race between grabbing a | |
138 | * cache pointer out of umem_alloc_table and growing the cpu array. | |
139 | * | |
140 | * | |
141 | * 3. CPU handling | |
142 | * --------------- | |
143 | * kmem uses the CPU's sequence number to determine which "cpu cache" to | |
144 | * use for an allocation. Currently, there is no way to get the sequence | |
145 | * number in userspace. | |
146 | * | |
147 | * umem keeps track of cpu information in umem_cpus, an array of umem_max_ncpus | |
148 | * umem_cpu_t structures. CURCPU() is a a "hint" function, which we then mask | |
149 | * with either umem_cpu_mask or cp->cache_cpu_mask to find the actual "cpu" id. | |
150 | * The mechanics of this is all in the CPU(mask) macro. | |
151 | * | |
152 | * Currently, umem uses _lwp_self() as its hint. | |
153 | * | |
154 | * | |
155 | * 4. The update thread | |
156 | * -------------------- | |
157 | * kmem uses a task queue, kmem_taskq, to do periodic maintenance on | |
158 | * every kmem cache. vmem has a periodic timeout for hash table resizing. | |
159 | * The kmem_taskq also provides a separate context for kmem_cache_reap()'s | |
160 | * to be done in, avoiding issues of the context of kmem_reap() callers. | |
161 | * | |
162 | * Instead, umem has the concept of "updates", which are asynchronous requests | |
163 | * for work attached to single caches. All caches with pending work are | |
164 | * on a doubly linked list rooted at the umem_null_cache. All update state | |
165 | * is protected by the umem_update_lock mutex, and the umem_update_cv is used | |
166 | * for notification between threads. | |
167 | * | |
168 | * 4.1. Cache states with regards to updates | |
169 | * ----------------------------------------- | |
170 | * A given cache is in one of three states: | |
171 | * | |
172 | * Inactive cache_uflags is zero, cache_u{next,prev} are NULL | |
173 | * | |
174 | * Work Requested cache_uflags is non-zero (but UMU_ACTIVE is not set), | |
175 | * cache_u{next,prev} link the cache onto the global | |
176 | * update list | |
177 | * | |
178 | * Active cache_uflags has UMU_ACTIVE set, cache_u{next,prev} | |
179 | * are NULL, and either umem_update_thr or | |
180 | * umem_st_update_thr are actively doing work on the | |
181 | * cache. | |
182 | * | |
183 | * An update can be added to any cache in any state -- if the cache is | |
184 | * Inactive, it transitions to being Work Requested. If the cache is | |
185 | * Active, the worker will notice the new update and act on it before | |
186 | * transitioning the cache to the Inactive state. | |
187 | * | |
188 | * If a cache is in the Active state, UMU_NOTIFY can be set, which asks | |
189 | * the worker to broadcast the umem_update_cv when it has finished. | |
190 | * | |
191 | * 4.2. Update interface | |
192 | * --------------------- | |
193 | * umem_add_update() adds an update to a particular cache. | |
194 | * umem_updateall() adds an update to all caches. | |
195 | * umem_remove_updates() returns a cache to the Inactive state. | |
196 | * | |
197 | * umem_process_updates() process all caches in the Work Requested state. | |
198 | * | |
199 | * 4.3. Reaping | |
200 | * ------------ | |
201 | * When umem_reap() is called (at the time of heap growth), it schedule | |
202 | * UMU_REAP updates on every cache. It then checks to see if the update | |
203 | * thread exists (umem_update_thr != 0). If it is, it broadcasts | |
204 | * the umem_update_cv to wake the update thread up, and returns. | |
205 | * | |
206 | * If the update thread does not exist (umem_update_thr == 0), and the | |
207 | * program currently has multiple threads, umem_reap() attempts to create | |
208 | * a new update thread. | |
209 | * | |
210 | * If the process is not multithreaded, or the creation fails, umem_reap() | |
211 | * calls umem_st_update() to do an inline update. | |
212 | * | |
213 | * 4.4. The update thread | |
214 | * ---------------------- | |
215 | * The update thread spends most of its time in cond_timedwait() on the | |
216 | * umem_update_cv. It wakes up under two conditions: | |
217 | * | |
218 | * * The timedwait times out, in which case it needs to run a global | |
219 | * update, or | |
220 | * | |
221 | * * someone cond_broadcast(3THR)s the umem_update_cv, in which case | |
222 | * it needs to check if there are any caches in the Work Requested | |
223 | * state. | |
224 | * | |
225 | * When it is time for another global update, umem calls umem_cache_update() | |
226 | * on every cache, then calls vmem_update(), which tunes the vmem structures. | |
227 | * umem_cache_update() can request further work using umem_add_update(). | |
228 | * | |
229 | * After any work from the global update completes, the update timer is | |
230 | * reset to umem_reap_interval seconds in the future. This makes the | |
231 | * updates self-throttling. | |
232 | * | |
233 | * Reaps are similarly self-throttling. After a UMU_REAP update has | |
234 | * been scheduled on all caches, umem_reap() sets a flag and wakes up the | |
235 | * update thread. The update thread notices the flag, and resets the | |
236 | * reap state. | |
237 | * | |
238 | * 4.5. Inline updates | |
239 | * ------------------- | |
240 | * If the update thread is not running, umem_st_update() is used instead. It | |
241 | * immediately does a global update (as above), then calls | |
242 | * umem_process_updates() to process both the reaps that umem_reap() added and | |
243 | * any work generated by the global update. Afterwards, it resets the reap | |
244 | * state. | |
245 | * | |
246 | * While the umem_st_update() is running, umem_st_update_thr holds the thread | |
247 | * id of the thread performing the update. | |
248 | * | |
249 | * 4.6. Updates and fork1() | |
250 | * ------------------------ | |
251 | * umem has fork1() pre- and post-handlers which lock up (and release) every | |
252 | * mutex in every cache. They also lock up the umem_update_lock. Since | |
253 | * fork1() only copies over a single lwp, other threads (including the update | |
254 | * thread) could have been actively using a cache in the parent. This | |
255 | * can lead to inconsistencies in the child process. | |
256 | * | |
257 | * Because we locked all of the mutexes, the only possible inconsistancies are: | |
258 | * | |
259 | * * a umem_cache_alloc() could leak its buffer. | |
260 | * | |
261 | * * a caller of umem_depot_alloc() could leak a magazine, and all the | |
262 | * buffers contained in it. | |
263 | * | |
264 | * * a cache could be in the Active update state. In the child, there | |
265 | * would be no thread actually working on it. | |
266 | * | |
267 | * * a umem_hash_rescale() could leak the new hash table. | |
268 | * | |
269 | * * a umem_magazine_resize() could be in progress. | |
270 | * | |
271 | * * a umem_reap() could be in progress. | |
272 | * | |
273 | * The memory leaks we can't do anything about. umem_release_child() resets | |
274 | * the update state, moves any caches in the Active state to the Work Requested | |
275 | * state. This might cause some updates to be re-run, but UMU_REAP and | |
276 | * UMU_HASH_RESCALE are effectively idempotent, and the worst that can | |
277 | * happen from umem_magazine_resize() is resizing the magazine twice in close | |
278 | * succession. | |
279 | * | |
280 | * Much of the cleanup in umem_release_child() is skipped if | |
281 | * umem_st_update_thr == thr_self(). This is so that applications which call | |
282 | * fork1() from a cache callback does not break. Needless to say, any such | |
283 | * application is tremendously broken. | |
284 | * | |
285 | * | |
286 | * 5. KM_SLEEP v.s. UMEM_NOFAIL | |
287 | * ---------------------------- | |
288 | * Allocations against kmem and vmem have two basic modes: SLEEP and | |
289 | * NOSLEEP. A sleeping allocation is will go to sleep (waiting for | |
290 | * more memory) instead of failing (returning NULL). | |
291 | * | |
292 | * SLEEP allocations presume an extremely multithreaded model, with | |
293 | * a lot of allocation and deallocation activity. umem cannot presume | |
294 | * that its clients have any particular type of behavior. Instead, | |
295 | * it provides two types of allocations: | |
296 | * | |
297 | * * UMEM_DEFAULT, equivalent to KM_NOSLEEP (i.e. return NULL on | |
298 | * failure) | |
299 | * | |
300 | * * UMEM_NOFAIL, which, on failure, calls an optional callback | |
301 | * (registered with umem_nofail_callback()). | |
302 | * | |
303 | * The callback is invoked with no locks held, and can do an arbitrary | |
304 | * amount of work. It then has a choice between: | |
305 | * | |
306 | * * Returning UMEM_CALLBACK_RETRY, which will cause the allocation | |
307 | * to be restarted. | |
308 | * | |
309 | * * Returning UMEM_CALLBACK_EXIT(status), which will cause exit(2) | |
310 | * to be invoked with status. If multiple threads attempt to do | |
311 | * this simultaneously, only one will call exit(2). | |
312 | * | |
313 | * * Doing some kind of non-local exit (thr_exit(3thr), longjmp(3C), | |
314 | * etc.) | |
315 | * | |
316 | * The default callback returns UMEM_CALLBACK_EXIT(255). | |
317 | * | |
318 | * To have these callbacks without risk of state corruption (in the case of | |
319 | * a non-local exit), we have to ensure that the callbacks get invoked | |
320 | * close to the original allocation, with no inconsistent state or held | |
321 | * locks. The following steps are taken: | |
322 | * | |
323 | * * All invocations of vmem are VM_NOSLEEP. | |
324 | * | |
325 | * * All constructor callbacks (which can themselves to allocations) | |
326 | * are passed UMEM_DEFAULT as their required allocation argument. This | |
327 | * way, the constructor will fail, allowing the highest-level allocation | |
328 | * invoke the nofail callback. | |
329 | * | |
330 | * If a constructor callback _does_ do a UMEM_NOFAIL allocation, and | |
331 | * the nofail callback does a non-local exit, we will leak the | |
332 | * partially-constructed buffer. | |
b128c09f BB |
333 | * |
334 | * | |
335 | * 6. Lock Ordering | |
336 | * ---------------- | |
337 | * umem has a few more locks than kmem does, mostly in the update path. The | |
338 | * overall lock ordering (earlier locks must be acquired first) is: | |
339 | * | |
340 | * umem_init_lock | |
341 | * | |
342 | * vmem_list_lock | |
343 | * vmem_nosleep_lock.vmpl_mutex | |
344 | * vmem_t's: | |
345 | * vm_lock | |
346 | * sbrk_lock | |
347 | * | |
348 | * umem_cache_lock | |
349 | * umem_update_lock | |
350 | * umem_flags_lock | |
351 | * umem_cache_t's: | |
352 | * cache_cpu[*].cc_lock | |
353 | * cache_depot_lock | |
354 | * cache_lock | |
355 | * umem_log_header_t's: | |
356 | * lh_cpu[*].clh_lock | |
357 | * lh_lock | |
34dc7c2f BB |
358 | */ |
359 | ||
34dc7c2f BB |
360 | #include <umem_impl.h> |
361 | #include <sys/vmem_impl_user.h> | |
362 | #include "umem_base.h" | |
363 | #include "vmem_base.h" | |
364 | ||
34dc7c2f | 365 | #include <sys/processor.h> |
34dc7c2f | 366 | #include <sys/sysmacros.h> |
34dc7c2f | 367 | |
34dc7c2f | 368 | #include <alloca.h> |
34dc7c2f BB |
369 | #include <errno.h> |
370 | #include <limits.h> | |
371 | #include <stdio.h> | |
372 | #include <stdlib.h> | |
373 | #include <string.h> | |
34dc7c2f | 374 | #include <strings.h> |
34dc7c2f | 375 | #include <signal.h> |
34dc7c2f | 376 | #include <unistd.h> |
34dc7c2f | 377 | #include <atomic.h> |
34dc7c2f BB |
378 | |
379 | #include "misc.h" | |
380 | ||
381 | #define UMEM_VMFLAGS(umflag) (VM_NOSLEEP) | |
382 | ||
383 | size_t pagesize; | |
384 | ||
385 | /* | |
386 | * The default set of caches to back umem_alloc(). | |
387 | * These sizes should be reevaluated periodically. | |
388 | * | |
389 | * We want allocations that are multiples of the coherency granularity | |
390 | * (64 bytes) to be satisfied from a cache which is a multiple of 64 | |
391 | * bytes, so that it will be 64-byte aligned. For all multiples of 64, | |
392 | * the next kmem_cache_size greater than or equal to it must be a | |
393 | * multiple of 64. | |
b128c09f BB |
394 | * |
395 | * This table must be in sorted order, from smallest to highest. The | |
396 | * highest slot must be UMEM_MAXBUF, and every slot afterwards must be | |
397 | * zero. | |
34dc7c2f | 398 | */ |
b128c09f | 399 | static int umem_alloc_sizes[] = { |
34dc7c2f BB |
400 | #ifdef _LP64 |
401 | 1 * 8, | |
402 | 1 * 16, | |
403 | 2 * 16, | |
404 | 3 * 16, | |
405 | #else | |
406 | 1 * 8, | |
407 | 2 * 8, | |
408 | 3 * 8, | |
409 | 4 * 8, 5 * 8, 6 * 8, 7 * 8, | |
410 | #endif | |
411 | 4 * 16, 5 * 16, 6 * 16, 7 * 16, | |
412 | 4 * 32, 5 * 32, 6 * 32, 7 * 32, | |
413 | 4 * 64, 5 * 64, 6 * 64, 7 * 64, | |
414 | 4 * 128, 5 * 128, 6 * 128, 7 * 128, | |
415 | P2ALIGN(8192 / 7, 64), | |
416 | P2ALIGN(8192 / 6, 64), | |
417 | P2ALIGN(8192 / 5, 64), | |
b128c09f | 418 | P2ALIGN(8192 / 4, 64), 2304, |
34dc7c2f | 419 | P2ALIGN(8192 / 3, 64), |
b128c09f BB |
420 | P2ALIGN(8192 / 2, 64), 4544, |
421 | P2ALIGN(8192 / 1, 64), 9216, | |
34dc7c2f | 422 | 4096 * 3, |
b128c09f BB |
423 | UMEM_MAXBUF, /* = 8192 * 2 */ |
424 | /* 24 slots for user expansion */ | |
425 | 0, 0, 0, 0, 0, 0, 0, 0, | |
426 | 0, 0, 0, 0, 0, 0, 0, 0, | |
427 | 0, 0, 0, 0, 0, 0, 0, 0, | |
34dc7c2f BB |
428 | }; |
429 | #define NUM_ALLOC_SIZES (sizeof (umem_alloc_sizes) / sizeof (*umem_alloc_sizes)) | |
430 | ||
34dc7c2f BB |
431 | static umem_magtype_t umem_magtype[] = { |
432 | { 1, 8, 3200, 65536 }, | |
433 | { 3, 16, 256, 32768 }, | |
434 | { 7, 32, 64, 16384 }, | |
435 | { 15, 64, 0, 8192 }, | |
436 | { 31, 64, 0, 4096 }, | |
437 | { 47, 64, 0, 2048 }, | |
438 | { 63, 64, 0, 1024 }, | |
439 | { 95, 64, 0, 512 }, | |
440 | { 143, 64, 0, 0 }, | |
441 | }; | |
442 | ||
443 | /* | |
444 | * umem tunables | |
445 | */ | |
446 | uint32_t umem_max_ncpus; /* # of CPU caches. */ | |
447 | ||
448 | uint32_t umem_stack_depth = 15; /* # stack frames in a bufctl_audit */ | |
449 | uint32_t umem_reap_interval = 10; /* max reaping rate (seconds) */ | |
450 | uint_t umem_depot_contention = 2; /* max failed trylocks per real interval */ | |
451 | uint_t umem_abort = 1; /* whether to abort on error */ | |
452 | uint_t umem_output = 0; /* whether to write to standard error */ | |
453 | uint_t umem_logging = 0; /* umem_log_enter() override */ | |
454 | uint32_t umem_mtbf = 0; /* mean time between failures [default: off] */ | |
455 | size_t umem_transaction_log_size; /* size of transaction log */ | |
456 | size_t umem_content_log_size; /* size of content log */ | |
457 | size_t umem_failure_log_size; /* failure log [4 pages per CPU] */ | |
458 | size_t umem_slab_log_size; /* slab create log [4 pages per CPU] */ | |
459 | size_t umem_content_maxsave = 256; /* UMF_CONTENTS max bytes to log */ | |
460 | size_t umem_lite_minsize = 0; /* minimum buffer size for UMF_LITE */ | |
461 | size_t umem_lite_maxalign = 1024; /* maximum buffer alignment for UMF_LITE */ | |
462 | size_t umem_maxverify; /* maximum bytes to inspect in debug routines */ | |
463 | size_t umem_minfirewall; /* hardware-enforced redzone threshold */ | |
464 | ||
465 | uint_t umem_flags = 0; | |
466 | ||
b128c09f BB |
467 | mutex_t umem_init_lock; /* locks initialization */ |
468 | cond_t umem_init_cv; /* initialization CV */ | |
34dc7c2f BB |
469 | thread_t umem_init_thr; /* thread initializing */ |
470 | int umem_init_env_ready; /* environ pre-initted */ | |
471 | int umem_ready = UMEM_READY_STARTUP; | |
472 | ||
473 | static umem_nofail_callback_t *nofail_callback; | |
b128c09f | 474 | static mutex_t umem_nofail_exit_lock; |
34dc7c2f BB |
475 | static thread_t umem_nofail_exit_thr; |
476 | ||
477 | static umem_cache_t *umem_slab_cache; | |
478 | static umem_cache_t *umem_bufctl_cache; | |
479 | static umem_cache_t *umem_bufctl_audit_cache; | |
480 | ||
b128c09f | 481 | mutex_t umem_flags_lock; |
34dc7c2f BB |
482 | |
483 | static vmem_t *heap_arena; | |
484 | static vmem_alloc_t *heap_alloc; | |
485 | static vmem_free_t *heap_free; | |
486 | ||
487 | static vmem_t *umem_internal_arena; | |
488 | static vmem_t *umem_cache_arena; | |
489 | static vmem_t *umem_hash_arena; | |
490 | static vmem_t *umem_log_arena; | |
491 | static vmem_t *umem_oversize_arena; | |
492 | static vmem_t *umem_va_arena; | |
493 | static vmem_t *umem_default_arena; | |
494 | static vmem_t *umem_firewall_va_arena; | |
495 | static vmem_t *umem_firewall_arena; | |
496 | ||
497 | vmem_t *umem_memalign_arena; | |
498 | ||
499 | umem_log_header_t *umem_transaction_log; | |
500 | umem_log_header_t *umem_content_log; | |
501 | umem_log_header_t *umem_failure_log; | |
502 | umem_log_header_t *umem_slab_log; | |
503 | ||
b128c09f | 504 | #define CPUHINT() (thr_self()) |
34dc7c2f BB |
505 | #define CPUHINT_MAX() INT_MAX |
506 | ||
507 | #define CPU(mask) (umem_cpus + (CPUHINT() & (mask))) | |
508 | static umem_cpu_t umem_startup_cpu = { /* initial, single, cpu */ | |
509 | UMEM_CACHE_SIZE(0), | |
510 | 0 | |
511 | }; | |
512 | ||
513 | static uint32_t umem_cpu_mask = 0; /* global cpu mask */ | |
514 | static umem_cpu_t *umem_cpus = &umem_startup_cpu; /* cpu list */ | |
515 | ||
516 | volatile uint32_t umem_reaping; | |
517 | ||
518 | thread_t umem_update_thr; | |
519 | struct timeval umem_update_next; /* timeofday of next update */ | |
520 | volatile thread_t umem_st_update_thr; /* only used when single-thd */ | |
521 | ||
522 | #define IN_UPDATE() (thr_self() == umem_update_thr || \ | |
523 | thr_self() == umem_st_update_thr) | |
524 | #define IN_REAP() IN_UPDATE() | |
525 | ||
b128c09f BB |
526 | mutex_t umem_update_lock; /* cache_u{next,prev,flags} */ |
527 | cond_t umem_update_cv; | |
34dc7c2f BB |
528 | |
529 | volatile hrtime_t umem_reap_next; /* min hrtime of next reap */ | |
530 | ||
b128c09f | 531 | mutex_t umem_cache_lock; /* inter-cache linkage only */ |
34dc7c2f BB |
532 | |
533 | #ifdef UMEM_STANDALONE | |
534 | umem_cache_t umem_null_cache; | |
535 | static const umem_cache_t umem_null_cache_template = { | |
536 | #else | |
537 | umem_cache_t umem_null_cache = { | |
538 | #endif | |
539 | 0, 0, 0, 0, 0, | |
540 | 0, 0, | |
541 | 0, 0, | |
542 | 0, 0, | |
543 | "invalid_cache", | |
544 | 0, 0, | |
545 | NULL, NULL, NULL, NULL, | |
546 | NULL, | |
547 | 0, 0, 0, 0, | |
548 | &umem_null_cache, &umem_null_cache, | |
549 | &umem_null_cache, &umem_null_cache, | |
550 | 0, | |
551 | DEFAULTMUTEX, /* start of slab layer */ | |
552 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
553 | &umem_null_cache.cache_nullslab, | |
554 | { | |
555 | &umem_null_cache, | |
556 | NULL, | |
557 | &umem_null_cache.cache_nullslab, | |
558 | &umem_null_cache.cache_nullslab, | |
559 | NULL, | |
560 | -1, | |
561 | 0 | |
562 | }, | |
563 | NULL, | |
564 | NULL, | |
565 | DEFAULTMUTEX, /* start of depot layer */ | |
566 | NULL, { | |
567 | NULL, 0, 0, 0, 0 | |
568 | }, { | |
569 | NULL, 0, 0, 0, 0 | |
570 | }, { | |
571 | { | |
572 | DEFAULTMUTEX, /* start of CPU cache */ | |
573 | 0, 0, NULL, NULL, -1, -1, 0 | |
574 | } | |
575 | } | |
576 | }; | |
577 | ||
578 | #define ALLOC_TABLE_4 \ | |
579 | &umem_null_cache, &umem_null_cache, &umem_null_cache, &umem_null_cache | |
580 | ||
581 | #define ALLOC_TABLE_64 \ | |
582 | ALLOC_TABLE_4, ALLOC_TABLE_4, ALLOC_TABLE_4, ALLOC_TABLE_4, \ | |
583 | ALLOC_TABLE_4, ALLOC_TABLE_4, ALLOC_TABLE_4, ALLOC_TABLE_4, \ | |
584 | ALLOC_TABLE_4, ALLOC_TABLE_4, ALLOC_TABLE_4, ALLOC_TABLE_4, \ | |
585 | ALLOC_TABLE_4, ALLOC_TABLE_4, ALLOC_TABLE_4, ALLOC_TABLE_4 | |
586 | ||
587 | #define ALLOC_TABLE_1024 \ | |
588 | ALLOC_TABLE_64, ALLOC_TABLE_64, ALLOC_TABLE_64, ALLOC_TABLE_64, \ | |
589 | ALLOC_TABLE_64, ALLOC_TABLE_64, ALLOC_TABLE_64, ALLOC_TABLE_64, \ | |
590 | ALLOC_TABLE_64, ALLOC_TABLE_64, ALLOC_TABLE_64, ALLOC_TABLE_64, \ | |
591 | ALLOC_TABLE_64, ALLOC_TABLE_64, ALLOC_TABLE_64, ALLOC_TABLE_64 | |
592 | ||
593 | static umem_cache_t *umem_alloc_table[UMEM_MAXBUF >> UMEM_ALIGN_SHIFT] = { | |
594 | ALLOC_TABLE_1024, | |
595 | ALLOC_TABLE_1024 | |
596 | }; | |
597 | ||
598 | ||
599 | /* Used to constrain audit-log stack traces */ | |
600 | caddr_t umem_min_stack; | |
601 | caddr_t umem_max_stack; | |
602 | ||
603 | ||
34dc7c2f BB |
604 | #define UMERR_MODIFIED 0 /* buffer modified while on freelist */ |
605 | #define UMERR_REDZONE 1 /* redzone violation (write past end of buf) */ | |
606 | #define UMERR_DUPFREE 2 /* freed a buffer twice */ | |
607 | #define UMERR_BADADDR 3 /* freed a bad (unallocated) address */ | |
608 | #define UMERR_BADBUFTAG 4 /* buftag corrupted */ | |
609 | #define UMERR_BADBUFCTL 5 /* bufctl corrupted */ | |
610 | #define UMERR_BADCACHE 6 /* freed a buffer to the wrong cache */ | |
611 | #define UMERR_BADSIZE 7 /* alloc size != free size */ | |
612 | #define UMERR_BADBASE 8 /* buffer base address wrong */ | |
613 | ||
614 | struct { | |
615 | hrtime_t ump_timestamp; /* timestamp of error */ | |
616 | int ump_error; /* type of umem error (UMERR_*) */ | |
617 | void *ump_buffer; /* buffer that induced abort */ | |
618 | void *ump_realbuf; /* real start address for buffer */ | |
619 | umem_cache_t *ump_cache; /* buffer's cache according to client */ | |
620 | umem_cache_t *ump_realcache; /* actual cache containing buffer */ | |
621 | umem_slab_t *ump_slab; /* slab accoring to umem_findslab() */ | |
622 | umem_bufctl_t *ump_bufctl; /* bufctl */ | |
623 | } umem_abort_info; | |
624 | ||
625 | static void | |
626 | copy_pattern(uint64_t pattern, void *buf_arg, size_t size) | |
627 | { | |
628 | uint64_t *bufend = (uint64_t *)((char *)buf_arg + size); | |
629 | uint64_t *buf = buf_arg; | |
630 | ||
631 | while (buf < bufend) | |
632 | *buf++ = pattern; | |
633 | } | |
634 | ||
635 | static void * | |
636 | verify_pattern(uint64_t pattern, void *buf_arg, size_t size) | |
637 | { | |
638 | uint64_t *bufend = (uint64_t *)((char *)buf_arg + size); | |
639 | uint64_t *buf; | |
640 | ||
641 | for (buf = buf_arg; buf < bufend; buf++) | |
642 | if (*buf != pattern) | |
643 | return (buf); | |
644 | return (NULL); | |
645 | } | |
646 | ||
647 | static void * | |
648 | verify_and_copy_pattern(uint64_t old, uint64_t new, void *buf_arg, size_t size) | |
649 | { | |
650 | uint64_t *bufend = (uint64_t *)((char *)buf_arg + size); | |
651 | uint64_t *buf; | |
652 | ||
653 | for (buf = buf_arg; buf < bufend; buf++) { | |
654 | if (*buf != old) { | |
655 | copy_pattern(old, buf_arg, | |
656 | (char *)buf - (char *)buf_arg); | |
657 | return (buf); | |
658 | } | |
659 | *buf = new; | |
660 | } | |
661 | ||
662 | return (NULL); | |
663 | } | |
664 | ||
665 | void | |
666 | umem_cache_applyall(void (*func)(umem_cache_t *)) | |
667 | { | |
668 | umem_cache_t *cp; | |
669 | ||
670 | (void) mutex_lock(&umem_cache_lock); | |
671 | for (cp = umem_null_cache.cache_next; cp != &umem_null_cache; | |
672 | cp = cp->cache_next) | |
673 | func(cp); | |
674 | (void) mutex_unlock(&umem_cache_lock); | |
675 | } | |
676 | ||
677 | static void | |
678 | umem_add_update_unlocked(umem_cache_t *cp, int flags) | |
679 | { | |
680 | umem_cache_t *cnext, *cprev; | |
681 | ||
682 | flags &= ~UMU_ACTIVE; | |
683 | ||
684 | if (!flags) | |
685 | return; | |
686 | ||
687 | if (cp->cache_uflags & UMU_ACTIVE) { | |
688 | cp->cache_uflags |= flags; | |
689 | } else { | |
690 | if (cp->cache_unext != NULL) { | |
691 | ASSERT(cp->cache_uflags != 0); | |
692 | cp->cache_uflags |= flags; | |
693 | } else { | |
694 | ASSERT(cp->cache_uflags == 0); | |
695 | cp->cache_uflags = flags; | |
696 | cp->cache_unext = cnext = &umem_null_cache; | |
697 | cp->cache_uprev = cprev = umem_null_cache.cache_uprev; | |
698 | cnext->cache_uprev = cp; | |
699 | cprev->cache_unext = cp; | |
700 | } | |
701 | } | |
702 | } | |
703 | ||
704 | static void | |
705 | umem_add_update(umem_cache_t *cp, int flags) | |
706 | { | |
707 | (void) mutex_lock(&umem_update_lock); | |
708 | ||
709 | umem_add_update_unlocked(cp, flags); | |
710 | ||
711 | if (!IN_UPDATE()) | |
712 | (void) cond_broadcast(&umem_update_cv); | |
713 | ||
714 | (void) mutex_unlock(&umem_update_lock); | |
715 | } | |
716 | ||
717 | /* | |
718 | * Remove a cache from the update list, waiting for any in-progress work to | |
719 | * complete first. | |
720 | */ | |
721 | static void | |
722 | umem_remove_updates(umem_cache_t *cp) | |
723 | { | |
724 | (void) mutex_lock(&umem_update_lock); | |
725 | ||
726 | /* | |
727 | * Get it out of the active state | |
728 | */ | |
729 | while (cp->cache_uflags & UMU_ACTIVE) { | |
b128c09f BB |
730 | int cancel_state; |
731 | ||
34dc7c2f BB |
732 | ASSERT(cp->cache_unext == NULL); |
733 | ||
734 | cp->cache_uflags |= UMU_NOTIFY; | |
735 | ||
736 | /* | |
737 | * Make sure the update state is sane, before we wait | |
738 | */ | |
739 | ASSERT(umem_update_thr != 0 || umem_st_update_thr != 0); | |
740 | ASSERT(umem_update_thr != thr_self() && | |
741 | umem_st_update_thr != thr_self()); | |
742 | ||
b128c09f BB |
743 | (void) pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, |
744 | &cancel_state); | |
745 | (void) cond_wait(&umem_update_cv, &umem_update_lock); | |
746 | (void) pthread_setcancelstate(cancel_state, NULL); | |
34dc7c2f BB |
747 | } |
748 | /* | |
749 | * Get it out of the Work Requested state | |
750 | */ | |
751 | if (cp->cache_unext != NULL) { | |
752 | cp->cache_uprev->cache_unext = cp->cache_unext; | |
753 | cp->cache_unext->cache_uprev = cp->cache_uprev; | |
754 | cp->cache_uprev = cp->cache_unext = NULL; | |
755 | cp->cache_uflags = 0; | |
756 | } | |
757 | /* | |
758 | * Make sure it is in the Inactive state | |
759 | */ | |
760 | ASSERT(cp->cache_unext == NULL && cp->cache_uflags == 0); | |
761 | (void) mutex_unlock(&umem_update_lock); | |
762 | } | |
763 | ||
764 | static void | |
765 | umem_updateall(int flags) | |
766 | { | |
767 | umem_cache_t *cp; | |
768 | ||
769 | /* | |
770 | * NOTE: To prevent deadlock, umem_cache_lock is always acquired first. | |
771 | * | |
772 | * (umem_add_update is called from things run via umem_cache_applyall) | |
773 | */ | |
774 | (void) mutex_lock(&umem_cache_lock); | |
775 | (void) mutex_lock(&umem_update_lock); | |
776 | ||
777 | for (cp = umem_null_cache.cache_next; cp != &umem_null_cache; | |
778 | cp = cp->cache_next) | |
779 | umem_add_update_unlocked(cp, flags); | |
780 | ||
781 | if (!IN_UPDATE()) | |
782 | (void) cond_broadcast(&umem_update_cv); | |
783 | ||
784 | (void) mutex_unlock(&umem_update_lock); | |
785 | (void) mutex_unlock(&umem_cache_lock); | |
786 | } | |
787 | ||
788 | /* | |
789 | * Debugging support. Given a buffer address, find its slab. | |
790 | */ | |
791 | static umem_slab_t * | |
792 | umem_findslab(umem_cache_t *cp, void *buf) | |
793 | { | |
794 | umem_slab_t *sp; | |
795 | ||
796 | (void) mutex_lock(&cp->cache_lock); | |
797 | for (sp = cp->cache_nullslab.slab_next; | |
798 | sp != &cp->cache_nullslab; sp = sp->slab_next) { | |
799 | if (UMEM_SLAB_MEMBER(sp, buf)) { | |
800 | (void) mutex_unlock(&cp->cache_lock); | |
801 | return (sp); | |
802 | } | |
803 | } | |
804 | (void) mutex_unlock(&cp->cache_lock); | |
805 | ||
806 | return (NULL); | |
807 | } | |
808 | ||
809 | static void | |
810 | umem_error(int error, umem_cache_t *cparg, void *bufarg) | |
811 | { | |
812 | umem_buftag_t *btp = NULL; | |
813 | umem_bufctl_t *bcp = NULL; | |
814 | umem_cache_t *cp = cparg; | |
815 | umem_slab_t *sp; | |
816 | uint64_t *off; | |
817 | void *buf = bufarg; | |
818 | ||
819 | int old_logging = umem_logging; | |
820 | ||
821 | umem_logging = 0; /* stop logging when a bad thing happens */ | |
822 | ||
823 | umem_abort_info.ump_timestamp = gethrtime(); | |
824 | ||
825 | sp = umem_findslab(cp, buf); | |
826 | if (sp == NULL) { | |
827 | for (cp = umem_null_cache.cache_prev; cp != &umem_null_cache; | |
828 | cp = cp->cache_prev) { | |
829 | if ((sp = umem_findslab(cp, buf)) != NULL) | |
830 | break; | |
831 | } | |
832 | } | |
833 | ||
834 | if (sp == NULL) { | |
835 | cp = NULL; | |
836 | error = UMERR_BADADDR; | |
837 | } else { | |
838 | if (cp != cparg) | |
839 | error = UMERR_BADCACHE; | |
840 | else | |
841 | buf = (char *)bufarg - ((uintptr_t)bufarg - | |
842 | (uintptr_t)sp->slab_base) % cp->cache_chunksize; | |
843 | if (buf != bufarg) | |
844 | error = UMERR_BADBASE; | |
845 | if (cp->cache_flags & UMF_BUFTAG) | |
846 | btp = UMEM_BUFTAG(cp, buf); | |
847 | if (cp->cache_flags & UMF_HASH) { | |
848 | (void) mutex_lock(&cp->cache_lock); | |
849 | for (bcp = *UMEM_HASH(cp, buf); bcp; bcp = bcp->bc_next) | |
850 | if (bcp->bc_addr == buf) | |
851 | break; | |
852 | (void) mutex_unlock(&cp->cache_lock); | |
853 | if (bcp == NULL && btp != NULL) | |
854 | bcp = btp->bt_bufctl; | |
855 | if (umem_findslab(cp->cache_bufctl_cache, bcp) == | |
856 | NULL || P2PHASE((uintptr_t)bcp, UMEM_ALIGN) || | |
857 | bcp->bc_addr != buf) { | |
858 | error = UMERR_BADBUFCTL; | |
859 | bcp = NULL; | |
860 | } | |
861 | } | |
862 | } | |
863 | ||
864 | umem_abort_info.ump_error = error; | |
865 | umem_abort_info.ump_buffer = bufarg; | |
866 | umem_abort_info.ump_realbuf = buf; | |
867 | umem_abort_info.ump_cache = cparg; | |
868 | umem_abort_info.ump_realcache = cp; | |
869 | umem_abort_info.ump_slab = sp; | |
870 | umem_abort_info.ump_bufctl = bcp; | |
871 | ||
872 | umem_printf("umem allocator: "); | |
873 | ||
874 | switch (error) { | |
875 | ||
876 | case UMERR_MODIFIED: | |
877 | umem_printf("buffer modified after being freed\n"); | |
878 | off = verify_pattern(UMEM_FREE_PATTERN, buf, cp->cache_verify); | |
879 | if (off == NULL) /* shouldn't happen */ | |
880 | off = buf; | |
881 | umem_printf("modification occurred at offset 0x%lx " | |
882 | "(0x%llx replaced by 0x%llx)\n", | |
883 | (uintptr_t)off - (uintptr_t)buf, | |
884 | (longlong_t)UMEM_FREE_PATTERN, (longlong_t)*off); | |
885 | break; | |
886 | ||
887 | case UMERR_REDZONE: | |
888 | umem_printf("redzone violation: write past end of buffer\n"); | |
889 | break; | |
890 | ||
891 | case UMERR_BADADDR: | |
892 | umem_printf("invalid free: buffer not in cache\n"); | |
893 | break; | |
894 | ||
895 | case UMERR_DUPFREE: | |
896 | umem_printf("duplicate free: buffer freed twice\n"); | |
897 | break; | |
898 | ||
899 | case UMERR_BADBUFTAG: | |
900 | umem_printf("boundary tag corrupted\n"); | |
901 | umem_printf("bcp ^ bxstat = %lx, should be %lx\n", | |
902 | (intptr_t)btp->bt_bufctl ^ btp->bt_bxstat, | |
903 | UMEM_BUFTAG_FREE); | |
904 | break; | |
905 | ||
906 | case UMERR_BADBUFCTL: | |
907 | umem_printf("bufctl corrupted\n"); | |
908 | break; | |
909 | ||
910 | case UMERR_BADCACHE: | |
911 | umem_printf("buffer freed to wrong cache\n"); | |
912 | umem_printf("buffer was allocated from %s,\n", cp->cache_name); | |
913 | umem_printf("caller attempting free to %s.\n", | |
914 | cparg->cache_name); | |
915 | break; | |
916 | ||
917 | case UMERR_BADSIZE: | |
918 | umem_printf("bad free: free size (%u) != alloc size (%u)\n", | |
919 | UMEM_SIZE_DECODE(((uint32_t *)btp)[0]), | |
920 | UMEM_SIZE_DECODE(((uint32_t *)btp)[1])); | |
921 | break; | |
922 | ||
923 | case UMERR_BADBASE: | |
924 | umem_printf("bad free: free address (%p) != alloc address " | |
925 | "(%p)\n", bufarg, buf); | |
926 | break; | |
927 | } | |
928 | ||
929 | umem_printf("buffer=%p bufctl=%p cache: %s\n", | |
930 | bufarg, (void *)bcp, cparg->cache_name); | |
931 | ||
932 | if (bcp != NULL && (cp->cache_flags & UMF_AUDIT) && | |
933 | error != UMERR_BADBUFCTL) { | |
934 | int d; | |
935 | timespec_t ts; | |
936 | hrtime_t diff; | |
937 | umem_bufctl_audit_t *bcap = (umem_bufctl_audit_t *)bcp; | |
938 | ||
939 | diff = umem_abort_info.ump_timestamp - bcap->bc_timestamp; | |
940 | ts.tv_sec = diff / NANOSEC; | |
941 | ts.tv_nsec = diff % NANOSEC; | |
942 | ||
943 | umem_printf("previous transaction on buffer %p:\n", buf); | |
944 | umem_printf("thread=%p time=T-%ld.%09ld slab=%p cache: %s\n", | |
945 | (void *)(intptr_t)bcap->bc_thread, ts.tv_sec, ts.tv_nsec, | |
946 | (void *)sp, cp->cache_name); | |
947 | for (d = 0; d < MIN(bcap->bc_depth, umem_stack_depth); d++) { | |
948 | (void) print_sym((void *)bcap->bc_stack[d]); | |
949 | umem_printf("\n"); | |
950 | } | |
951 | } | |
952 | ||
953 | umem_err_recoverable("umem: heap corruption detected"); | |
954 | ||
955 | umem_logging = old_logging; /* resume logging */ | |
956 | } | |
957 | ||
958 | void | |
959 | umem_nofail_callback(umem_nofail_callback_t *cb) | |
960 | { | |
961 | nofail_callback = cb; | |
962 | } | |
963 | ||
964 | static int | |
965 | umem_alloc_retry(umem_cache_t *cp, int umflag) | |
966 | { | |
967 | if (cp == &umem_null_cache) { | |
968 | if (umem_init()) | |
969 | return (1); /* retry */ | |
970 | /* | |
971 | * Initialization failed. Do normal failure processing. | |
972 | */ | |
973 | } | |
974 | if (umflag & UMEM_NOFAIL) { | |
975 | int def_result = UMEM_CALLBACK_EXIT(255); | |
976 | int result = def_result; | |
977 | umem_nofail_callback_t *callback = nofail_callback; | |
978 | ||
979 | if (callback != NULL) | |
980 | result = callback(); | |
981 | ||
982 | if (result == UMEM_CALLBACK_RETRY) | |
983 | return (1); | |
984 | ||
985 | if ((result & ~0xFF) != UMEM_CALLBACK_EXIT(0)) { | |
986 | log_message("nofail callback returned %x\n", result); | |
987 | result = def_result; | |
988 | } | |
989 | ||
990 | /* | |
991 | * only one thread will call exit | |
992 | */ | |
993 | if (umem_nofail_exit_thr == thr_self()) | |
994 | umem_panic("recursive UMEM_CALLBACK_EXIT()\n"); | |
995 | ||
996 | (void) mutex_lock(&umem_nofail_exit_lock); | |
997 | umem_nofail_exit_thr = thr_self(); | |
998 | exit(result & 0xFF); | |
999 | /*NOTREACHED*/ | |
1000 | } | |
1001 | return (0); | |
1002 | } | |
1003 | ||
1004 | static umem_log_header_t * | |
1005 | umem_log_init(size_t logsize) | |
1006 | { | |
1007 | umem_log_header_t *lhp; | |
1008 | int nchunks = 4 * umem_max_ncpus; | |
1009 | size_t lhsize = offsetof(umem_log_header_t, lh_cpu[umem_max_ncpus]); | |
1010 | int i; | |
1011 | ||
1012 | if (logsize == 0) | |
1013 | return (NULL); | |
1014 | ||
1015 | /* | |
1016 | * Make sure that lhp->lh_cpu[] is nicely aligned | |
1017 | * to prevent false sharing of cache lines. | |
1018 | */ | |
1019 | lhsize = P2ROUNDUP(lhsize, UMEM_ALIGN); | |
1020 | lhp = vmem_xalloc(umem_log_arena, lhsize, 64, P2NPHASE(lhsize, 64), 0, | |
1021 | NULL, NULL, VM_NOSLEEP); | |
1022 | if (lhp == NULL) | |
1023 | goto fail; | |
1024 | ||
1025 | bzero(lhp, lhsize); | |
1026 | ||
1027 | (void) mutex_init(&lhp->lh_lock, USYNC_THREAD, NULL); | |
1028 | lhp->lh_nchunks = nchunks; | |
1029 | lhp->lh_chunksize = P2ROUNDUP(logsize / nchunks, PAGESIZE); | |
1030 | if (lhp->lh_chunksize == 0) | |
1031 | lhp->lh_chunksize = PAGESIZE; | |
1032 | ||
1033 | lhp->lh_base = vmem_alloc(umem_log_arena, | |
1034 | lhp->lh_chunksize * nchunks, VM_NOSLEEP); | |
1035 | if (lhp->lh_base == NULL) | |
1036 | goto fail; | |
1037 | ||
1038 | lhp->lh_free = vmem_alloc(umem_log_arena, | |
1039 | nchunks * sizeof (int), VM_NOSLEEP); | |
1040 | if (lhp->lh_free == NULL) | |
1041 | goto fail; | |
1042 | ||
1043 | bzero(lhp->lh_base, lhp->lh_chunksize * nchunks); | |
1044 | ||
1045 | for (i = 0; i < umem_max_ncpus; i++) { | |
1046 | umem_cpu_log_header_t *clhp = &lhp->lh_cpu[i]; | |
1047 | (void) mutex_init(&clhp->clh_lock, USYNC_THREAD, NULL); | |
1048 | clhp->clh_chunk = i; | |
1049 | } | |
1050 | ||
1051 | for (i = umem_max_ncpus; i < nchunks; i++) | |
1052 | lhp->lh_free[i] = i; | |
1053 | ||
1054 | lhp->lh_head = umem_max_ncpus; | |
1055 | lhp->lh_tail = 0; | |
1056 | ||
1057 | return (lhp); | |
1058 | ||
1059 | fail: | |
1060 | if (lhp != NULL) { | |
1061 | if (lhp->lh_base != NULL) | |
1062 | vmem_free(umem_log_arena, lhp->lh_base, | |
1063 | lhp->lh_chunksize * nchunks); | |
1064 | ||
1065 | vmem_xfree(umem_log_arena, lhp, lhsize); | |
1066 | } | |
1067 | return (NULL); | |
1068 | } | |
1069 | ||
1070 | static void * | |
1071 | umem_log_enter(umem_log_header_t *lhp, void *data, size_t size) | |
1072 | { | |
1073 | void *logspace; | |
1074 | umem_cpu_log_header_t *clhp = | |
b128c09f | 1075 | &lhp->lh_cpu[CPU(umem_cpu_mask)->cpu_number]; |
34dc7c2f BB |
1076 | |
1077 | if (lhp == NULL || umem_logging == 0) | |
1078 | return (NULL); | |
1079 | ||
1080 | (void) mutex_lock(&clhp->clh_lock); | |
1081 | clhp->clh_hits++; | |
1082 | if (size > clhp->clh_avail) { | |
1083 | (void) mutex_lock(&lhp->lh_lock); | |
1084 | lhp->lh_hits++; | |
1085 | lhp->lh_free[lhp->lh_tail] = clhp->clh_chunk; | |
1086 | lhp->lh_tail = (lhp->lh_tail + 1) % lhp->lh_nchunks; | |
1087 | clhp->clh_chunk = lhp->lh_free[lhp->lh_head]; | |
1088 | lhp->lh_head = (lhp->lh_head + 1) % lhp->lh_nchunks; | |
1089 | clhp->clh_current = lhp->lh_base + | |
1090 | clhp->clh_chunk * lhp->lh_chunksize; | |
1091 | clhp->clh_avail = lhp->lh_chunksize; | |
1092 | if (size > lhp->lh_chunksize) | |
1093 | size = lhp->lh_chunksize; | |
1094 | (void) mutex_unlock(&lhp->lh_lock); | |
1095 | } | |
1096 | logspace = clhp->clh_current; | |
1097 | clhp->clh_current += size; | |
1098 | clhp->clh_avail -= size; | |
1099 | bcopy(data, logspace, size); | |
1100 | (void) mutex_unlock(&clhp->clh_lock); | |
1101 | return (logspace); | |
1102 | } | |
1103 | ||
1104 | #define UMEM_AUDIT(lp, cp, bcp) \ | |
1105 | { \ | |
1106 | umem_bufctl_audit_t *_bcp = (umem_bufctl_audit_t *)(bcp); \ | |
1107 | _bcp->bc_timestamp = gethrtime(); \ | |
1108 | _bcp->bc_thread = thr_self(); \ | |
1109 | _bcp->bc_depth = getpcstack(_bcp->bc_stack, umem_stack_depth, \ | |
1110 | (cp != NULL) && (cp->cache_flags & UMF_CHECKSIGNAL)); \ | |
1111 | _bcp->bc_lastlog = umem_log_enter((lp), _bcp, \ | |
1112 | UMEM_BUFCTL_AUDIT_SIZE); \ | |
1113 | } | |
1114 | ||
1115 | static void | |
1116 | umem_log_event(umem_log_header_t *lp, umem_cache_t *cp, | |
1117 | umem_slab_t *sp, void *addr) | |
1118 | { | |
1119 | umem_bufctl_audit_t *bcp; | |
1120 | UMEM_LOCAL_BUFCTL_AUDIT(&bcp); | |
1121 | ||
1122 | bzero(bcp, UMEM_BUFCTL_AUDIT_SIZE); | |
1123 | bcp->bc_addr = addr; | |
1124 | bcp->bc_slab = sp; | |
1125 | bcp->bc_cache = cp; | |
1126 | UMEM_AUDIT(lp, cp, bcp); | |
1127 | } | |
1128 | ||
1129 | /* | |
1130 | * Create a new slab for cache cp. | |
1131 | */ | |
1132 | static umem_slab_t * | |
1133 | umem_slab_create(umem_cache_t *cp, int umflag) | |
1134 | { | |
1135 | size_t slabsize = cp->cache_slabsize; | |
1136 | size_t chunksize = cp->cache_chunksize; | |
1137 | int cache_flags = cp->cache_flags; | |
1138 | size_t color, chunks; | |
1139 | char *buf, *slab; | |
1140 | umem_slab_t *sp; | |
1141 | umem_bufctl_t *bcp; | |
1142 | vmem_t *vmp = cp->cache_arena; | |
1143 | ||
1144 | color = cp->cache_color + cp->cache_align; | |
1145 | if (color > cp->cache_maxcolor) | |
1146 | color = cp->cache_mincolor; | |
1147 | cp->cache_color = color; | |
1148 | ||
1149 | slab = vmem_alloc(vmp, slabsize, UMEM_VMFLAGS(umflag)); | |
1150 | ||
1151 | if (slab == NULL) | |
1152 | goto vmem_alloc_failure; | |
1153 | ||
1154 | ASSERT(P2PHASE((uintptr_t)slab, vmp->vm_quantum) == 0); | |
1155 | ||
1156 | if (!(cp->cache_cflags & UMC_NOTOUCH) && | |
1157 | (cp->cache_flags & UMF_DEADBEEF)) | |
1158 | copy_pattern(UMEM_UNINITIALIZED_PATTERN, slab, slabsize); | |
1159 | ||
1160 | if (cache_flags & UMF_HASH) { | |
1161 | if ((sp = _umem_cache_alloc(umem_slab_cache, umflag)) == NULL) | |
1162 | goto slab_alloc_failure; | |
1163 | chunks = (slabsize - color) / chunksize; | |
1164 | } else { | |
1165 | sp = UMEM_SLAB(cp, slab); | |
1166 | chunks = (slabsize - sizeof (umem_slab_t) - color) / chunksize; | |
1167 | } | |
1168 | ||
1169 | sp->slab_cache = cp; | |
1170 | sp->slab_head = NULL; | |
1171 | sp->slab_refcnt = 0; | |
1172 | sp->slab_base = buf = slab + color; | |
1173 | sp->slab_chunks = chunks; | |
1174 | ||
1175 | ASSERT(chunks > 0); | |
1176 | while (chunks-- != 0) { | |
1177 | if (cache_flags & UMF_HASH) { | |
1178 | bcp = _umem_cache_alloc(cp->cache_bufctl_cache, umflag); | |
1179 | if (bcp == NULL) | |
1180 | goto bufctl_alloc_failure; | |
1181 | if (cache_flags & UMF_AUDIT) { | |
1182 | umem_bufctl_audit_t *bcap = | |
1183 | (umem_bufctl_audit_t *)bcp; | |
1184 | bzero(bcap, UMEM_BUFCTL_AUDIT_SIZE); | |
1185 | bcap->bc_cache = cp; | |
1186 | } | |
1187 | bcp->bc_addr = buf; | |
1188 | bcp->bc_slab = sp; | |
1189 | } else { | |
1190 | bcp = UMEM_BUFCTL(cp, buf); | |
1191 | } | |
1192 | if (cache_flags & UMF_BUFTAG) { | |
1193 | umem_buftag_t *btp = UMEM_BUFTAG(cp, buf); | |
1194 | btp->bt_redzone = UMEM_REDZONE_PATTERN; | |
1195 | btp->bt_bufctl = bcp; | |
1196 | btp->bt_bxstat = (intptr_t)bcp ^ UMEM_BUFTAG_FREE; | |
1197 | if (cache_flags & UMF_DEADBEEF) { | |
1198 | copy_pattern(UMEM_FREE_PATTERN, buf, | |
1199 | cp->cache_verify); | |
1200 | } | |
1201 | } | |
1202 | bcp->bc_next = sp->slab_head; | |
1203 | sp->slab_head = bcp; | |
1204 | buf += chunksize; | |
1205 | } | |
1206 | ||
1207 | umem_log_event(umem_slab_log, cp, sp, slab); | |
1208 | ||
1209 | return (sp); | |
1210 | ||
1211 | bufctl_alloc_failure: | |
1212 | ||
1213 | while ((bcp = sp->slab_head) != NULL) { | |
1214 | sp->slab_head = bcp->bc_next; | |
1215 | _umem_cache_free(cp->cache_bufctl_cache, bcp); | |
1216 | } | |
1217 | _umem_cache_free(umem_slab_cache, sp); | |
1218 | ||
1219 | slab_alloc_failure: | |
1220 | ||
1221 | vmem_free(vmp, slab, slabsize); | |
1222 | ||
1223 | vmem_alloc_failure: | |
1224 | ||
1225 | umem_log_event(umem_failure_log, cp, NULL, NULL); | |
1226 | atomic_add_64(&cp->cache_alloc_fail, 1); | |
1227 | ||
1228 | return (NULL); | |
1229 | } | |
1230 | ||
1231 | /* | |
1232 | * Destroy a slab. | |
1233 | */ | |
1234 | static void | |
1235 | umem_slab_destroy(umem_cache_t *cp, umem_slab_t *sp) | |
1236 | { | |
1237 | vmem_t *vmp = cp->cache_arena; | |
1238 | void *slab = (void *)P2ALIGN((uintptr_t)sp->slab_base, vmp->vm_quantum); | |
1239 | ||
1240 | if (cp->cache_flags & UMF_HASH) { | |
1241 | umem_bufctl_t *bcp; | |
1242 | while ((bcp = sp->slab_head) != NULL) { | |
1243 | sp->slab_head = bcp->bc_next; | |
1244 | _umem_cache_free(cp->cache_bufctl_cache, bcp); | |
1245 | } | |
1246 | _umem_cache_free(umem_slab_cache, sp); | |
1247 | } | |
1248 | vmem_free(vmp, slab, cp->cache_slabsize); | |
1249 | } | |
1250 | ||
1251 | /* | |
1252 | * Allocate a raw (unconstructed) buffer from cp's slab layer. | |
1253 | */ | |
1254 | static void * | |
1255 | umem_slab_alloc(umem_cache_t *cp, int umflag) | |
1256 | { | |
1257 | umem_bufctl_t *bcp, **hash_bucket; | |
1258 | umem_slab_t *sp; | |
1259 | void *buf; | |
1260 | ||
1261 | (void) mutex_lock(&cp->cache_lock); | |
1262 | cp->cache_slab_alloc++; | |
1263 | sp = cp->cache_freelist; | |
1264 | ASSERT(sp->slab_cache == cp); | |
1265 | if (sp->slab_head == NULL) { | |
1266 | /* | |
1267 | * The freelist is empty. Create a new slab. | |
1268 | */ | |
1269 | (void) mutex_unlock(&cp->cache_lock); | |
1270 | if (cp == &umem_null_cache) | |
1271 | return (NULL); | |
1272 | if ((sp = umem_slab_create(cp, umflag)) == NULL) | |
1273 | return (NULL); | |
1274 | (void) mutex_lock(&cp->cache_lock); | |
1275 | cp->cache_slab_create++; | |
1276 | if ((cp->cache_buftotal += sp->slab_chunks) > cp->cache_bufmax) | |
1277 | cp->cache_bufmax = cp->cache_buftotal; | |
1278 | sp->slab_next = cp->cache_freelist; | |
1279 | sp->slab_prev = cp->cache_freelist->slab_prev; | |
1280 | sp->slab_next->slab_prev = sp; | |
1281 | sp->slab_prev->slab_next = sp; | |
1282 | cp->cache_freelist = sp; | |
1283 | } | |
1284 | ||
1285 | sp->slab_refcnt++; | |
1286 | ASSERT(sp->slab_refcnt <= sp->slab_chunks); | |
1287 | ||
1288 | /* | |
1289 | * If we're taking the last buffer in the slab, | |
1290 | * remove the slab from the cache's freelist. | |
1291 | */ | |
1292 | bcp = sp->slab_head; | |
1293 | if ((sp->slab_head = bcp->bc_next) == NULL) { | |
1294 | cp->cache_freelist = sp->slab_next; | |
1295 | ASSERT(sp->slab_refcnt == sp->slab_chunks); | |
1296 | } | |
1297 | ||
1298 | if (cp->cache_flags & UMF_HASH) { | |
1299 | /* | |
1300 | * Add buffer to allocated-address hash table. | |
1301 | */ | |
1302 | buf = bcp->bc_addr; | |
1303 | hash_bucket = UMEM_HASH(cp, buf); | |
1304 | bcp->bc_next = *hash_bucket; | |
1305 | *hash_bucket = bcp; | |
1306 | if ((cp->cache_flags & (UMF_AUDIT | UMF_BUFTAG)) == UMF_AUDIT) { | |
1307 | UMEM_AUDIT(umem_transaction_log, cp, bcp); | |
1308 | } | |
1309 | } else { | |
1310 | buf = UMEM_BUF(cp, bcp); | |
1311 | } | |
1312 | ||
1313 | ASSERT(UMEM_SLAB_MEMBER(sp, buf)); | |
1314 | ||
1315 | (void) mutex_unlock(&cp->cache_lock); | |
1316 | ||
1317 | return (buf); | |
1318 | } | |
1319 | ||
1320 | /* | |
1321 | * Free a raw (unconstructed) buffer to cp's slab layer. | |
1322 | */ | |
1323 | static void | |
1324 | umem_slab_free(umem_cache_t *cp, void *buf) | |
1325 | { | |
1326 | umem_slab_t *sp; | |
1327 | umem_bufctl_t *bcp, **prev_bcpp; | |
1328 | ||
1329 | ASSERT(buf != NULL); | |
1330 | ||
1331 | (void) mutex_lock(&cp->cache_lock); | |
1332 | cp->cache_slab_free++; | |
1333 | ||
1334 | if (cp->cache_flags & UMF_HASH) { | |
1335 | /* | |
1336 | * Look up buffer in allocated-address hash table. | |
1337 | */ | |
1338 | prev_bcpp = UMEM_HASH(cp, buf); | |
1339 | while ((bcp = *prev_bcpp) != NULL) { | |
1340 | if (bcp->bc_addr == buf) { | |
1341 | *prev_bcpp = bcp->bc_next; | |
1342 | sp = bcp->bc_slab; | |
1343 | break; | |
1344 | } | |
1345 | cp->cache_lookup_depth++; | |
1346 | prev_bcpp = &bcp->bc_next; | |
1347 | } | |
1348 | } else { | |
1349 | bcp = UMEM_BUFCTL(cp, buf); | |
1350 | sp = UMEM_SLAB(cp, buf); | |
1351 | } | |
1352 | ||
1353 | if (bcp == NULL || sp->slab_cache != cp || !UMEM_SLAB_MEMBER(sp, buf)) { | |
1354 | (void) mutex_unlock(&cp->cache_lock); | |
1355 | umem_error(UMERR_BADADDR, cp, buf); | |
1356 | return; | |
1357 | } | |
1358 | ||
1359 | if ((cp->cache_flags & (UMF_AUDIT | UMF_BUFTAG)) == UMF_AUDIT) { | |
1360 | if (cp->cache_flags & UMF_CONTENTS) | |
1361 | ((umem_bufctl_audit_t *)bcp)->bc_contents = | |
1362 | umem_log_enter(umem_content_log, buf, | |
1363 | cp->cache_contents); | |
1364 | UMEM_AUDIT(umem_transaction_log, cp, bcp); | |
1365 | } | |
1366 | ||
1367 | /* | |
1368 | * If this slab isn't currently on the freelist, put it there. | |
1369 | */ | |
1370 | if (sp->slab_head == NULL) { | |
1371 | ASSERT(sp->slab_refcnt == sp->slab_chunks); | |
1372 | ASSERT(cp->cache_freelist != sp); | |
1373 | sp->slab_next->slab_prev = sp->slab_prev; | |
1374 | sp->slab_prev->slab_next = sp->slab_next; | |
1375 | sp->slab_next = cp->cache_freelist; | |
1376 | sp->slab_prev = cp->cache_freelist->slab_prev; | |
1377 | sp->slab_next->slab_prev = sp; | |
1378 | sp->slab_prev->slab_next = sp; | |
1379 | cp->cache_freelist = sp; | |
1380 | } | |
1381 | ||
1382 | bcp->bc_next = sp->slab_head; | |
1383 | sp->slab_head = bcp; | |
1384 | ||
1385 | ASSERT(sp->slab_refcnt >= 1); | |
1386 | if (--sp->slab_refcnt == 0) { | |
1387 | /* | |
1388 | * There are no outstanding allocations from this slab, | |
1389 | * so we can reclaim the memory. | |
1390 | */ | |
1391 | sp->slab_next->slab_prev = sp->slab_prev; | |
1392 | sp->slab_prev->slab_next = sp->slab_next; | |
1393 | if (sp == cp->cache_freelist) | |
1394 | cp->cache_freelist = sp->slab_next; | |
1395 | cp->cache_slab_destroy++; | |
1396 | cp->cache_buftotal -= sp->slab_chunks; | |
1397 | (void) mutex_unlock(&cp->cache_lock); | |
1398 | umem_slab_destroy(cp, sp); | |
1399 | return; | |
1400 | } | |
1401 | (void) mutex_unlock(&cp->cache_lock); | |
1402 | } | |
1403 | ||
1404 | static int | |
1405 | umem_cache_alloc_debug(umem_cache_t *cp, void *buf, int umflag) | |
1406 | { | |
1407 | umem_buftag_t *btp = UMEM_BUFTAG(cp, buf); | |
1408 | umem_bufctl_audit_t *bcp = (umem_bufctl_audit_t *)btp->bt_bufctl; | |
1409 | uint32_t mtbf; | |
1410 | int flags_nfatal; | |
1411 | ||
1412 | if (btp->bt_bxstat != ((intptr_t)bcp ^ UMEM_BUFTAG_FREE)) { | |
1413 | umem_error(UMERR_BADBUFTAG, cp, buf); | |
1414 | return (-1); | |
1415 | } | |
1416 | ||
1417 | btp->bt_bxstat = (intptr_t)bcp ^ UMEM_BUFTAG_ALLOC; | |
1418 | ||
1419 | if ((cp->cache_flags & UMF_HASH) && bcp->bc_addr != buf) { | |
1420 | umem_error(UMERR_BADBUFCTL, cp, buf); | |
1421 | return (-1); | |
1422 | } | |
1423 | ||
1424 | btp->bt_redzone = UMEM_REDZONE_PATTERN; | |
1425 | ||
1426 | if (cp->cache_flags & UMF_DEADBEEF) { | |
1427 | if (verify_and_copy_pattern(UMEM_FREE_PATTERN, | |
1428 | UMEM_UNINITIALIZED_PATTERN, buf, cp->cache_verify)) { | |
1429 | umem_error(UMERR_MODIFIED, cp, buf); | |
1430 | return (-1); | |
1431 | } | |
1432 | } | |
1433 | ||
1434 | if ((mtbf = umem_mtbf | cp->cache_mtbf) != 0 && | |
1435 | gethrtime() % mtbf == 0 && | |
1436 | (umflag & (UMEM_FATAL_FLAGS)) == 0) { | |
1437 | umem_log_event(umem_failure_log, cp, NULL, NULL); | |
1438 | } else { | |
1439 | mtbf = 0; | |
1440 | } | |
1441 | ||
1442 | /* | |
1443 | * We do not pass fatal flags on to the constructor. This prevents | |
1444 | * leaking buffers in the event of a subordinate constructor failing. | |
1445 | */ | |
1446 | flags_nfatal = UMEM_DEFAULT; | |
1447 | if (mtbf || (cp->cache_constructor != NULL && | |
1448 | cp->cache_constructor(buf, cp->cache_private, flags_nfatal) != 0)) { | |
1449 | atomic_add_64(&cp->cache_alloc_fail, 1); | |
1450 | btp->bt_bxstat = (intptr_t)bcp ^ UMEM_BUFTAG_FREE; | |
1451 | copy_pattern(UMEM_FREE_PATTERN, buf, cp->cache_verify); | |
1452 | umem_slab_free(cp, buf); | |
1453 | return (-1); | |
1454 | } | |
1455 | ||
1456 | if (cp->cache_flags & UMF_AUDIT) { | |
1457 | UMEM_AUDIT(umem_transaction_log, cp, bcp); | |
1458 | } | |
1459 | ||
1460 | return (0); | |
1461 | } | |
1462 | ||
1463 | static int | |
1464 | umem_cache_free_debug(umem_cache_t *cp, void *buf) | |
1465 | { | |
1466 | umem_buftag_t *btp = UMEM_BUFTAG(cp, buf); | |
1467 | umem_bufctl_audit_t *bcp = (umem_bufctl_audit_t *)btp->bt_bufctl; | |
1468 | umem_slab_t *sp; | |
1469 | ||
1470 | if (btp->bt_bxstat != ((intptr_t)bcp ^ UMEM_BUFTAG_ALLOC)) { | |
1471 | if (btp->bt_bxstat == ((intptr_t)bcp ^ UMEM_BUFTAG_FREE)) { | |
1472 | umem_error(UMERR_DUPFREE, cp, buf); | |
1473 | return (-1); | |
1474 | } | |
1475 | sp = umem_findslab(cp, buf); | |
1476 | if (sp == NULL || sp->slab_cache != cp) | |
1477 | umem_error(UMERR_BADADDR, cp, buf); | |
1478 | else | |
1479 | umem_error(UMERR_REDZONE, cp, buf); | |
1480 | return (-1); | |
1481 | } | |
1482 | ||
1483 | btp->bt_bxstat = (intptr_t)bcp ^ UMEM_BUFTAG_FREE; | |
1484 | ||
1485 | if ((cp->cache_flags & UMF_HASH) && bcp->bc_addr != buf) { | |
1486 | umem_error(UMERR_BADBUFCTL, cp, buf); | |
1487 | return (-1); | |
1488 | } | |
1489 | ||
1490 | if (btp->bt_redzone != UMEM_REDZONE_PATTERN) { | |
1491 | umem_error(UMERR_REDZONE, cp, buf); | |
1492 | return (-1); | |
1493 | } | |
1494 | ||
1495 | if (cp->cache_flags & UMF_AUDIT) { | |
1496 | if (cp->cache_flags & UMF_CONTENTS) | |
1497 | bcp->bc_contents = umem_log_enter(umem_content_log, | |
1498 | buf, cp->cache_contents); | |
1499 | UMEM_AUDIT(umem_transaction_log, cp, bcp); | |
1500 | } | |
1501 | ||
1502 | if (cp->cache_destructor != NULL) | |
1503 | cp->cache_destructor(buf, cp->cache_private); | |
1504 | ||
1505 | if (cp->cache_flags & UMF_DEADBEEF) | |
1506 | copy_pattern(UMEM_FREE_PATTERN, buf, cp->cache_verify); | |
1507 | ||
1508 | return (0); | |
1509 | } | |
1510 | ||
1511 | /* | |
1512 | * Free each object in magazine mp to cp's slab layer, and free mp itself. | |
1513 | */ | |
1514 | static void | |
1515 | umem_magazine_destroy(umem_cache_t *cp, umem_magazine_t *mp, int nrounds) | |
1516 | { | |
1517 | int round; | |
1518 | ||
1519 | ASSERT(cp->cache_next == NULL || IN_UPDATE()); | |
1520 | ||
1521 | for (round = 0; round < nrounds; round++) { | |
1522 | void *buf = mp->mag_round[round]; | |
1523 | ||
1524 | if ((cp->cache_flags & UMF_DEADBEEF) && | |
1525 | verify_pattern(UMEM_FREE_PATTERN, buf, | |
1526 | cp->cache_verify) != NULL) { | |
1527 | umem_error(UMERR_MODIFIED, cp, buf); | |
1528 | continue; | |
1529 | } | |
1530 | ||
1531 | if (!(cp->cache_flags & UMF_BUFTAG) && | |
1532 | cp->cache_destructor != NULL) | |
1533 | cp->cache_destructor(buf, cp->cache_private); | |
1534 | ||
1535 | umem_slab_free(cp, buf); | |
1536 | } | |
1537 | ASSERT(UMEM_MAGAZINE_VALID(cp, mp)); | |
1538 | _umem_cache_free(cp->cache_magtype->mt_cache, mp); | |
1539 | } | |
1540 | ||
1541 | /* | |
1542 | * Allocate a magazine from the depot. | |
1543 | */ | |
1544 | static umem_magazine_t * | |
1545 | umem_depot_alloc(umem_cache_t *cp, umem_maglist_t *mlp) | |
1546 | { | |
1547 | umem_magazine_t *mp; | |
1548 | ||
1549 | /* | |
1550 | * If we can't get the depot lock without contention, | |
1551 | * update our contention count. We use the depot | |
1552 | * contention rate to determine whether we need to | |
1553 | * increase the magazine size for better scalability. | |
1554 | */ | |
1555 | if (mutex_trylock(&cp->cache_depot_lock) != 0) { | |
1556 | (void) mutex_lock(&cp->cache_depot_lock); | |
1557 | cp->cache_depot_contention++; | |
1558 | } | |
1559 | ||
1560 | if ((mp = mlp->ml_list) != NULL) { | |
1561 | ASSERT(UMEM_MAGAZINE_VALID(cp, mp)); | |
1562 | mlp->ml_list = mp->mag_next; | |
1563 | if (--mlp->ml_total < mlp->ml_min) | |
1564 | mlp->ml_min = mlp->ml_total; | |
1565 | mlp->ml_alloc++; | |
1566 | } | |
1567 | ||
1568 | (void) mutex_unlock(&cp->cache_depot_lock); | |
1569 | ||
1570 | return (mp); | |
1571 | } | |
1572 | ||
1573 | /* | |
1574 | * Free a magazine to the depot. | |
1575 | */ | |
1576 | static void | |
1577 | umem_depot_free(umem_cache_t *cp, umem_maglist_t *mlp, umem_magazine_t *mp) | |
1578 | { | |
1579 | (void) mutex_lock(&cp->cache_depot_lock); | |
1580 | ASSERT(UMEM_MAGAZINE_VALID(cp, mp)); | |
1581 | mp->mag_next = mlp->ml_list; | |
1582 | mlp->ml_list = mp; | |
1583 | mlp->ml_total++; | |
1584 | (void) mutex_unlock(&cp->cache_depot_lock); | |
1585 | } | |
1586 | ||
1587 | /* | |
1588 | * Update the working set statistics for cp's depot. | |
1589 | */ | |
1590 | static void | |
1591 | umem_depot_ws_update(umem_cache_t *cp) | |
1592 | { | |
1593 | (void) mutex_lock(&cp->cache_depot_lock); | |
1594 | cp->cache_full.ml_reaplimit = cp->cache_full.ml_min; | |
1595 | cp->cache_full.ml_min = cp->cache_full.ml_total; | |
1596 | cp->cache_empty.ml_reaplimit = cp->cache_empty.ml_min; | |
1597 | cp->cache_empty.ml_min = cp->cache_empty.ml_total; | |
1598 | (void) mutex_unlock(&cp->cache_depot_lock); | |
1599 | } | |
1600 | ||
1601 | /* | |
1602 | * Reap all magazines that have fallen out of the depot's working set. | |
1603 | */ | |
1604 | static void | |
1605 | umem_depot_ws_reap(umem_cache_t *cp) | |
1606 | { | |
1607 | long reap; | |
1608 | umem_magazine_t *mp; | |
1609 | ||
1610 | ASSERT(cp->cache_next == NULL || IN_REAP()); | |
1611 | ||
1612 | reap = MIN(cp->cache_full.ml_reaplimit, cp->cache_full.ml_min); | |
1613 | while (reap-- && (mp = umem_depot_alloc(cp, &cp->cache_full)) != NULL) | |
1614 | umem_magazine_destroy(cp, mp, cp->cache_magtype->mt_magsize); | |
1615 | ||
1616 | reap = MIN(cp->cache_empty.ml_reaplimit, cp->cache_empty.ml_min); | |
1617 | while (reap-- && (mp = umem_depot_alloc(cp, &cp->cache_empty)) != NULL) | |
1618 | umem_magazine_destroy(cp, mp, 0); | |
1619 | } | |
1620 | ||
1621 | static void | |
1622 | umem_cpu_reload(umem_cpu_cache_t *ccp, umem_magazine_t *mp, int rounds) | |
1623 | { | |
1624 | ASSERT((ccp->cc_loaded == NULL && ccp->cc_rounds == -1) || | |
1625 | (ccp->cc_loaded && ccp->cc_rounds + rounds == ccp->cc_magsize)); | |
1626 | ASSERT(ccp->cc_magsize > 0); | |
1627 | ||
1628 | ccp->cc_ploaded = ccp->cc_loaded; | |
1629 | ccp->cc_prounds = ccp->cc_rounds; | |
1630 | ccp->cc_loaded = mp; | |
1631 | ccp->cc_rounds = rounds; | |
1632 | } | |
1633 | ||
1634 | /* | |
1635 | * Allocate a constructed object from cache cp. | |
1636 | */ | |
34dc7c2f | 1637 | #pragma weak umem_cache_alloc = _umem_cache_alloc |
34dc7c2f BB |
1638 | void * |
1639 | _umem_cache_alloc(umem_cache_t *cp, int umflag) | |
1640 | { | |
1641 | umem_cpu_cache_t *ccp; | |
1642 | umem_magazine_t *fmp; | |
1643 | void *buf; | |
1644 | int flags_nfatal; | |
1645 | ||
1646 | retry: | |
1647 | ccp = UMEM_CPU_CACHE(cp, CPU(cp->cache_cpu_mask)); | |
1648 | (void) mutex_lock(&ccp->cc_lock); | |
1649 | for (;;) { | |
1650 | /* | |
1651 | * If there's an object available in the current CPU's | |
1652 | * loaded magazine, just take it and return. | |
1653 | */ | |
1654 | if (ccp->cc_rounds > 0) { | |
1655 | buf = ccp->cc_loaded->mag_round[--ccp->cc_rounds]; | |
1656 | ccp->cc_alloc++; | |
1657 | (void) mutex_unlock(&ccp->cc_lock); | |
1658 | if ((ccp->cc_flags & UMF_BUFTAG) && | |
1659 | umem_cache_alloc_debug(cp, buf, umflag) == -1) { | |
1660 | if (umem_alloc_retry(cp, umflag)) { | |
1661 | goto retry; | |
1662 | } | |
1663 | ||
1664 | return (NULL); | |
1665 | } | |
1666 | return (buf); | |
1667 | } | |
1668 | ||
1669 | /* | |
1670 | * The loaded magazine is empty. If the previously loaded | |
1671 | * magazine was full, exchange them and try again. | |
1672 | */ | |
1673 | if (ccp->cc_prounds > 0) { | |
1674 | umem_cpu_reload(ccp, ccp->cc_ploaded, ccp->cc_prounds); | |
1675 | continue; | |
1676 | } | |
1677 | ||
1678 | /* | |
1679 | * If the magazine layer is disabled, break out now. | |
1680 | */ | |
1681 | if (ccp->cc_magsize == 0) | |
1682 | break; | |
1683 | ||
1684 | /* | |
1685 | * Try to get a full magazine from the depot. | |
1686 | */ | |
1687 | fmp = umem_depot_alloc(cp, &cp->cache_full); | |
1688 | if (fmp != NULL) { | |
1689 | if (ccp->cc_ploaded != NULL) | |
1690 | umem_depot_free(cp, &cp->cache_empty, | |
1691 | ccp->cc_ploaded); | |
1692 | umem_cpu_reload(ccp, fmp, ccp->cc_magsize); | |
1693 | continue; | |
1694 | } | |
1695 | ||
1696 | /* | |
1697 | * There are no full magazines in the depot, | |
1698 | * so fall through to the slab layer. | |
1699 | */ | |
1700 | break; | |
1701 | } | |
1702 | (void) mutex_unlock(&ccp->cc_lock); | |
1703 | ||
1704 | /* | |
1705 | * We couldn't allocate a constructed object from the magazine layer, | |
1706 | * so get a raw buffer from the slab layer and apply its constructor. | |
1707 | */ | |
1708 | buf = umem_slab_alloc(cp, umflag); | |
1709 | ||
1710 | if (buf == NULL) { | |
1711 | if (cp == &umem_null_cache) | |
1712 | return (NULL); | |
1713 | if (umem_alloc_retry(cp, umflag)) { | |
1714 | goto retry; | |
1715 | } | |
1716 | ||
1717 | return (NULL); | |
1718 | } | |
1719 | ||
1720 | if (cp->cache_flags & UMF_BUFTAG) { | |
1721 | /* | |
1722 | * Let umem_cache_alloc_debug() apply the constructor for us. | |
1723 | */ | |
1724 | if (umem_cache_alloc_debug(cp, buf, umflag) == -1) { | |
1725 | if (umem_alloc_retry(cp, umflag)) { | |
1726 | goto retry; | |
1727 | } | |
1728 | return (NULL); | |
1729 | } | |
1730 | return (buf); | |
1731 | } | |
1732 | ||
1733 | /* | |
1734 | * We do not pass fatal flags on to the constructor. This prevents | |
1735 | * leaking buffers in the event of a subordinate constructor failing. | |
1736 | */ | |
1737 | flags_nfatal = UMEM_DEFAULT; | |
1738 | if (cp->cache_constructor != NULL && | |
1739 | cp->cache_constructor(buf, cp->cache_private, flags_nfatal) != 0) { | |
1740 | atomic_add_64(&cp->cache_alloc_fail, 1); | |
1741 | umem_slab_free(cp, buf); | |
1742 | ||
1743 | if (umem_alloc_retry(cp, umflag)) { | |
1744 | goto retry; | |
1745 | } | |
1746 | return (NULL); | |
1747 | } | |
1748 | ||
1749 | return (buf); | |
1750 | } | |
1751 | ||
1752 | /* | |
1753 | * Free a constructed object to cache cp. | |
1754 | */ | |
34dc7c2f | 1755 | #pragma weak umem_cache_free = _umem_cache_free |
34dc7c2f BB |
1756 | void |
1757 | _umem_cache_free(umem_cache_t *cp, void *buf) | |
1758 | { | |
1759 | umem_cpu_cache_t *ccp = UMEM_CPU_CACHE(cp, CPU(cp->cache_cpu_mask)); | |
1760 | umem_magazine_t *emp; | |
1761 | umem_magtype_t *mtp; | |
1762 | ||
1763 | if (ccp->cc_flags & UMF_BUFTAG) | |
1764 | if (umem_cache_free_debug(cp, buf) == -1) | |
1765 | return; | |
1766 | ||
1767 | (void) mutex_lock(&ccp->cc_lock); | |
1768 | for (;;) { | |
1769 | /* | |
1770 | * If there's a slot available in the current CPU's | |
1771 | * loaded magazine, just put the object there and return. | |
1772 | */ | |
1773 | if ((uint_t)ccp->cc_rounds < ccp->cc_magsize) { | |
1774 | ccp->cc_loaded->mag_round[ccp->cc_rounds++] = buf; | |
1775 | ccp->cc_free++; | |
1776 | (void) mutex_unlock(&ccp->cc_lock); | |
1777 | return; | |
1778 | } | |
1779 | ||
1780 | /* | |
1781 | * The loaded magazine is full. If the previously loaded | |
1782 | * magazine was empty, exchange them and try again. | |
1783 | */ | |
1784 | if (ccp->cc_prounds == 0) { | |
1785 | umem_cpu_reload(ccp, ccp->cc_ploaded, ccp->cc_prounds); | |
1786 | continue; | |
1787 | } | |
1788 | ||
1789 | /* | |
1790 | * If the magazine layer is disabled, break out now. | |
1791 | */ | |
1792 | if (ccp->cc_magsize == 0) | |
1793 | break; | |
1794 | ||
1795 | /* | |
1796 | * Try to get an empty magazine from the depot. | |
1797 | */ | |
1798 | emp = umem_depot_alloc(cp, &cp->cache_empty); | |
1799 | if (emp != NULL) { | |
1800 | if (ccp->cc_ploaded != NULL) | |
1801 | umem_depot_free(cp, &cp->cache_full, | |
1802 | ccp->cc_ploaded); | |
1803 | umem_cpu_reload(ccp, emp, 0); | |
1804 | continue; | |
1805 | } | |
1806 | ||
1807 | /* | |
1808 | * There are no empty magazines in the depot, | |
1809 | * so try to allocate a new one. We must drop all locks | |
1810 | * across umem_cache_alloc() because lower layers may | |
1811 | * attempt to allocate from this cache. | |
1812 | */ | |
1813 | mtp = cp->cache_magtype; | |
1814 | (void) mutex_unlock(&ccp->cc_lock); | |
1815 | emp = _umem_cache_alloc(mtp->mt_cache, UMEM_DEFAULT); | |
1816 | (void) mutex_lock(&ccp->cc_lock); | |
1817 | ||
1818 | if (emp != NULL) { | |
1819 | /* | |
1820 | * We successfully allocated an empty magazine. | |
1821 | * However, we had to drop ccp->cc_lock to do it, | |
1822 | * so the cache's magazine size may have changed. | |
1823 | * If so, free the magazine and try again. | |
1824 | */ | |
1825 | if (ccp->cc_magsize != mtp->mt_magsize) { | |
1826 | (void) mutex_unlock(&ccp->cc_lock); | |
1827 | _umem_cache_free(mtp->mt_cache, emp); | |
1828 | (void) mutex_lock(&ccp->cc_lock); | |
1829 | continue; | |
1830 | } | |
1831 | ||
1832 | /* | |
1833 | * We got a magazine of the right size. Add it to | |
1834 | * the depot and try the whole dance again. | |
1835 | */ | |
1836 | umem_depot_free(cp, &cp->cache_empty, emp); | |
1837 | continue; | |
1838 | } | |
1839 | ||
1840 | /* | |
1841 | * We couldn't allocate an empty magazine, | |
1842 | * so fall through to the slab layer. | |
1843 | */ | |
1844 | break; | |
1845 | } | |
1846 | (void) mutex_unlock(&ccp->cc_lock); | |
1847 | ||
1848 | /* | |
1849 | * We couldn't free our constructed object to the magazine layer, | |
1850 | * so apply its destructor and free it to the slab layer. | |
1851 | * Note that if UMF_BUFTAG is in effect, umem_cache_free_debug() | |
1852 | * will have already applied the destructor. | |
1853 | */ | |
1854 | if (!(cp->cache_flags & UMF_BUFTAG) && cp->cache_destructor != NULL) | |
1855 | cp->cache_destructor(buf, cp->cache_private); | |
1856 | ||
1857 | umem_slab_free(cp, buf); | |
1858 | } | |
1859 | ||
34dc7c2f | 1860 | #pragma weak umem_zalloc = _umem_zalloc |
34dc7c2f BB |
1861 | void * |
1862 | _umem_zalloc(size_t size, int umflag) | |
1863 | { | |
1864 | size_t index = (size - 1) >> UMEM_ALIGN_SHIFT; | |
1865 | void *buf; | |
1866 | ||
1867 | retry: | |
1868 | if (index < UMEM_MAXBUF >> UMEM_ALIGN_SHIFT) { | |
1869 | umem_cache_t *cp = umem_alloc_table[index]; | |
1870 | buf = _umem_cache_alloc(cp, umflag); | |
1871 | if (buf != NULL) { | |
1872 | if (cp->cache_flags & UMF_BUFTAG) { | |
1873 | umem_buftag_t *btp = UMEM_BUFTAG(cp, buf); | |
1874 | ((uint8_t *)buf)[size] = UMEM_REDZONE_BYTE; | |
1875 | ((uint32_t *)btp)[1] = UMEM_SIZE_ENCODE(size); | |
1876 | } | |
1877 | bzero(buf, size); | |
1878 | } else if (umem_alloc_retry(cp, umflag)) | |
1879 | goto retry; | |
1880 | } else { | |
1881 | buf = _umem_alloc(size, umflag); /* handles failure */ | |
1882 | if (buf != NULL) | |
1883 | bzero(buf, size); | |
1884 | } | |
1885 | return (buf); | |
1886 | } | |
1887 | ||
34dc7c2f | 1888 | #pragma weak umem_alloc = _umem_alloc |
34dc7c2f BB |
1889 | void * |
1890 | _umem_alloc(size_t size, int umflag) | |
1891 | { | |
1892 | size_t index = (size - 1) >> UMEM_ALIGN_SHIFT; | |
1893 | void *buf; | |
1894 | umem_alloc_retry: | |
1895 | if (index < UMEM_MAXBUF >> UMEM_ALIGN_SHIFT) { | |
1896 | umem_cache_t *cp = umem_alloc_table[index]; | |
1897 | buf = _umem_cache_alloc(cp, umflag); | |
1898 | if ((cp->cache_flags & UMF_BUFTAG) && buf != NULL) { | |
1899 | umem_buftag_t *btp = UMEM_BUFTAG(cp, buf); | |
1900 | ((uint8_t *)buf)[size] = UMEM_REDZONE_BYTE; | |
1901 | ((uint32_t *)btp)[1] = UMEM_SIZE_ENCODE(size); | |
1902 | } | |
1903 | if (buf == NULL && umem_alloc_retry(cp, umflag)) | |
1904 | goto umem_alloc_retry; | |
1905 | return (buf); | |
1906 | } | |
1907 | if (size == 0) | |
1908 | return (NULL); | |
1909 | if (umem_oversize_arena == NULL) { | |
1910 | if (umem_init()) | |
1911 | ASSERT(umem_oversize_arena != NULL); | |
1912 | else | |
1913 | return (NULL); | |
1914 | } | |
1915 | buf = vmem_alloc(umem_oversize_arena, size, UMEM_VMFLAGS(umflag)); | |
1916 | if (buf == NULL) { | |
1917 | umem_log_event(umem_failure_log, NULL, NULL, (void *)size); | |
1918 | if (umem_alloc_retry(NULL, umflag)) | |
1919 | goto umem_alloc_retry; | |
1920 | } | |
1921 | return (buf); | |
1922 | } | |
1923 | ||
34dc7c2f | 1924 | #pragma weak umem_alloc_align = _umem_alloc_align |
34dc7c2f BB |
1925 | void * |
1926 | _umem_alloc_align(size_t size, size_t align, int umflag) | |
1927 | { | |
1928 | void *buf; | |
1929 | ||
1930 | if (size == 0) | |
1931 | return (NULL); | |
1932 | if ((align & (align - 1)) != 0) | |
1933 | return (NULL); | |
1934 | if (align < UMEM_ALIGN) | |
1935 | align = UMEM_ALIGN; | |
1936 | ||
1937 | umem_alloc_align_retry: | |
1938 | if (umem_memalign_arena == NULL) { | |
1939 | if (umem_init()) | |
1940 | ASSERT(umem_oversize_arena != NULL); | |
1941 | else | |
1942 | return (NULL); | |
1943 | } | |
1944 | buf = vmem_xalloc(umem_memalign_arena, size, align, 0, 0, NULL, NULL, | |
1945 | UMEM_VMFLAGS(umflag)); | |
1946 | if (buf == NULL) { | |
1947 | umem_log_event(umem_failure_log, NULL, NULL, (void *)size); | |
1948 | if (umem_alloc_retry(NULL, umflag)) | |
1949 | goto umem_alloc_align_retry; | |
1950 | } | |
1951 | return (buf); | |
1952 | } | |
1953 | ||
34dc7c2f | 1954 | #pragma weak umem_free = _umem_free |
34dc7c2f BB |
1955 | void |
1956 | _umem_free(void *buf, size_t size) | |
1957 | { | |
1958 | size_t index = (size - 1) >> UMEM_ALIGN_SHIFT; | |
1959 | ||
1960 | if (index < UMEM_MAXBUF >> UMEM_ALIGN_SHIFT) { | |
1961 | umem_cache_t *cp = umem_alloc_table[index]; | |
1962 | if (cp->cache_flags & UMF_BUFTAG) { | |
1963 | umem_buftag_t *btp = UMEM_BUFTAG(cp, buf); | |
1964 | uint32_t *ip = (uint32_t *)btp; | |
1965 | if (ip[1] != UMEM_SIZE_ENCODE(size)) { | |
1966 | if (*(uint64_t *)buf == UMEM_FREE_PATTERN) { | |
1967 | umem_error(UMERR_DUPFREE, cp, buf); | |
1968 | return; | |
1969 | } | |
1970 | if (UMEM_SIZE_VALID(ip[1])) { | |
1971 | ip[0] = UMEM_SIZE_ENCODE(size); | |
1972 | umem_error(UMERR_BADSIZE, cp, buf); | |
1973 | } else { | |
1974 | umem_error(UMERR_REDZONE, cp, buf); | |
1975 | } | |
1976 | return; | |
1977 | } | |
1978 | if (((uint8_t *)buf)[size] != UMEM_REDZONE_BYTE) { | |
1979 | umem_error(UMERR_REDZONE, cp, buf); | |
1980 | return; | |
1981 | } | |
1982 | btp->bt_redzone = UMEM_REDZONE_PATTERN; | |
1983 | } | |
1984 | _umem_cache_free(cp, buf); | |
1985 | } else { | |
1986 | if (buf == NULL && size == 0) | |
1987 | return; | |
1988 | vmem_free(umem_oversize_arena, buf, size); | |
1989 | } | |
1990 | } | |
1991 | ||
34dc7c2f | 1992 | #pragma weak umem_free_align = _umem_free_align |
34dc7c2f BB |
1993 | void |
1994 | _umem_free_align(void *buf, size_t size) | |
1995 | { | |
1996 | if (buf == NULL && size == 0) | |
1997 | return; | |
1998 | vmem_xfree(umem_memalign_arena, buf, size); | |
1999 | } | |
2000 | ||
2001 | static void * | |
2002 | umem_firewall_va_alloc(vmem_t *vmp, size_t size, int vmflag) | |
2003 | { | |
2004 | size_t realsize = size + vmp->vm_quantum; | |
2005 | ||
2006 | /* | |
2007 | * Annoying edge case: if 'size' is just shy of ULONG_MAX, adding | |
2008 | * vm_quantum will cause integer wraparound. Check for this, and | |
2009 | * blow off the firewall page in this case. Note that such a | |
2010 | * giant allocation (the entire address space) can never be | |
2011 | * satisfied, so it will either fail immediately (VM_NOSLEEP) | |
2012 | * or sleep forever (VM_SLEEP). Thus, there is no need for a | |
2013 | * corresponding check in umem_firewall_va_free(). | |
2014 | */ | |
2015 | if (realsize < size) | |
2016 | realsize = size; | |
2017 | ||
2018 | return (vmem_alloc(vmp, realsize, vmflag | VM_NEXTFIT)); | |
2019 | } | |
2020 | ||
2021 | static void | |
2022 | umem_firewall_va_free(vmem_t *vmp, void *addr, size_t size) | |
2023 | { | |
2024 | vmem_free(vmp, addr, size + vmp->vm_quantum); | |
2025 | } | |
2026 | ||
2027 | /* | |
2028 | * Reclaim all unused memory from a cache. | |
2029 | */ | |
2030 | static void | |
2031 | umem_cache_reap(umem_cache_t *cp) | |
2032 | { | |
2033 | /* | |
2034 | * Ask the cache's owner to free some memory if possible. | |
2035 | * The idea is to handle things like the inode cache, which | |
2036 | * typically sits on a bunch of memory that it doesn't truly | |
2037 | * *need*. Reclaim policy is entirely up to the owner; this | |
2038 | * callback is just an advisory plea for help. | |
2039 | */ | |
2040 | if (cp->cache_reclaim != NULL) | |
2041 | cp->cache_reclaim(cp->cache_private); | |
2042 | ||
2043 | umem_depot_ws_reap(cp); | |
2044 | } | |
2045 | ||
2046 | /* | |
2047 | * Purge all magazines from a cache and set its magazine limit to zero. | |
2048 | * All calls are serialized by being done by the update thread, except for | |
2049 | * the final call from umem_cache_destroy(). | |
2050 | */ | |
2051 | static void | |
2052 | umem_cache_magazine_purge(umem_cache_t *cp) | |
2053 | { | |
2054 | umem_cpu_cache_t *ccp; | |
2055 | umem_magazine_t *mp, *pmp; | |
2056 | int rounds, prounds, cpu_seqid; | |
2057 | ||
2058 | ASSERT(cp->cache_next == NULL || IN_UPDATE()); | |
2059 | ||
2060 | for (cpu_seqid = 0; cpu_seqid < umem_max_ncpus; cpu_seqid++) { | |
2061 | ccp = &cp->cache_cpu[cpu_seqid]; | |
2062 | ||
2063 | (void) mutex_lock(&ccp->cc_lock); | |
2064 | mp = ccp->cc_loaded; | |
2065 | pmp = ccp->cc_ploaded; | |
2066 | rounds = ccp->cc_rounds; | |
2067 | prounds = ccp->cc_prounds; | |
2068 | ccp->cc_loaded = NULL; | |
2069 | ccp->cc_ploaded = NULL; | |
2070 | ccp->cc_rounds = -1; | |
2071 | ccp->cc_prounds = -1; | |
2072 | ccp->cc_magsize = 0; | |
2073 | (void) mutex_unlock(&ccp->cc_lock); | |
2074 | ||
2075 | if (mp) | |
2076 | umem_magazine_destroy(cp, mp, rounds); | |
2077 | if (pmp) | |
2078 | umem_magazine_destroy(cp, pmp, prounds); | |
2079 | } | |
2080 | ||
2081 | /* | |
2082 | * Updating the working set statistics twice in a row has the | |
2083 | * effect of setting the working set size to zero, so everything | |
2084 | * is eligible for reaping. | |
2085 | */ | |
2086 | umem_depot_ws_update(cp); | |
2087 | umem_depot_ws_update(cp); | |
2088 | ||
2089 | umem_depot_ws_reap(cp); | |
2090 | } | |
2091 | ||
2092 | /* | |
2093 | * Enable per-cpu magazines on a cache. | |
2094 | */ | |
2095 | static void | |
2096 | umem_cache_magazine_enable(umem_cache_t *cp) | |
2097 | { | |
2098 | int cpu_seqid; | |
2099 | ||
2100 | if (cp->cache_flags & UMF_NOMAGAZINE) | |
2101 | return; | |
2102 | ||
2103 | for (cpu_seqid = 0; cpu_seqid < umem_max_ncpus; cpu_seqid++) { | |
2104 | umem_cpu_cache_t *ccp = &cp->cache_cpu[cpu_seqid]; | |
2105 | (void) mutex_lock(&ccp->cc_lock); | |
2106 | ccp->cc_magsize = cp->cache_magtype->mt_magsize; | |
2107 | (void) mutex_unlock(&ccp->cc_lock); | |
2108 | } | |
2109 | ||
2110 | } | |
2111 | ||
2112 | /* | |
2113 | * Recompute a cache's magazine size. The trade-off is that larger magazines | |
2114 | * provide a higher transfer rate with the depot, while smaller magazines | |
2115 | * reduce memory consumption. Magazine resizing is an expensive operation; | |
2116 | * it should not be done frequently. | |
2117 | * | |
2118 | * Changes to the magazine size are serialized by only having one thread | |
2119 | * doing updates. (the update thread) | |
2120 | * | |
2121 | * Note: at present this only grows the magazine size. It might be useful | |
2122 | * to allow shrinkage too. | |
2123 | */ | |
2124 | static void | |
2125 | umem_cache_magazine_resize(umem_cache_t *cp) | |
2126 | { | |
2127 | umem_magtype_t *mtp = cp->cache_magtype; | |
2128 | ||
2129 | ASSERT(IN_UPDATE()); | |
2130 | ||
2131 | if (cp->cache_chunksize < mtp->mt_maxbuf) { | |
2132 | umem_cache_magazine_purge(cp); | |
2133 | (void) mutex_lock(&cp->cache_depot_lock); | |
2134 | cp->cache_magtype = ++mtp; | |
2135 | cp->cache_depot_contention_prev = | |
2136 | cp->cache_depot_contention + INT_MAX; | |
2137 | (void) mutex_unlock(&cp->cache_depot_lock); | |
2138 | umem_cache_magazine_enable(cp); | |
2139 | } | |
2140 | } | |
2141 | ||
2142 | /* | |
2143 | * Rescale a cache's hash table, so that the table size is roughly the | |
2144 | * cache size. We want the average lookup time to be extremely small. | |
2145 | */ | |
2146 | static void | |
2147 | umem_hash_rescale(umem_cache_t *cp) | |
2148 | { | |
2149 | umem_bufctl_t **old_table, **new_table, *bcp; | |
2150 | size_t old_size, new_size, h; | |
2151 | ||
2152 | ASSERT(IN_UPDATE()); | |
2153 | ||
2154 | new_size = MAX(UMEM_HASH_INITIAL, | |
2155 | 1 << (highbit(3 * cp->cache_buftotal + 4) - 2)); | |
2156 | old_size = cp->cache_hash_mask + 1; | |
2157 | ||
2158 | if ((old_size >> 1) <= new_size && new_size <= (old_size << 1)) | |
2159 | return; | |
2160 | ||
2161 | new_table = vmem_alloc(umem_hash_arena, new_size * sizeof (void *), | |
2162 | VM_NOSLEEP); | |
2163 | if (new_table == NULL) | |
2164 | return; | |
2165 | bzero(new_table, new_size * sizeof (void *)); | |
2166 | ||
2167 | (void) mutex_lock(&cp->cache_lock); | |
2168 | ||
2169 | old_size = cp->cache_hash_mask + 1; | |
2170 | old_table = cp->cache_hash_table; | |
2171 | ||
2172 | cp->cache_hash_mask = new_size - 1; | |
2173 | cp->cache_hash_table = new_table; | |
2174 | cp->cache_rescale++; | |
2175 | ||
2176 | for (h = 0; h < old_size; h++) { | |
2177 | bcp = old_table[h]; | |
2178 | while (bcp != NULL) { | |
2179 | void *addr = bcp->bc_addr; | |
2180 | umem_bufctl_t *next_bcp = bcp->bc_next; | |
2181 | umem_bufctl_t **hash_bucket = UMEM_HASH(cp, addr); | |
2182 | bcp->bc_next = *hash_bucket; | |
2183 | *hash_bucket = bcp; | |
2184 | bcp = next_bcp; | |
2185 | } | |
2186 | } | |
2187 | ||
2188 | (void) mutex_unlock(&cp->cache_lock); | |
2189 | ||
2190 | vmem_free(umem_hash_arena, old_table, old_size * sizeof (void *)); | |
2191 | } | |
2192 | ||
2193 | /* | |
2194 | * Perform periodic maintenance on a cache: hash rescaling, | |
2195 | * depot working-set update, and magazine resizing. | |
2196 | */ | |
2197 | void | |
2198 | umem_cache_update(umem_cache_t *cp) | |
2199 | { | |
2200 | int update_flags = 0; | |
2201 | ||
2202 | ASSERT(MUTEX_HELD(&umem_cache_lock)); | |
2203 | ||
2204 | /* | |
2205 | * If the cache has become much larger or smaller than its hash table, | |
2206 | * fire off a request to rescale the hash table. | |
2207 | */ | |
2208 | (void) mutex_lock(&cp->cache_lock); | |
2209 | ||
2210 | if ((cp->cache_flags & UMF_HASH) && | |
2211 | (cp->cache_buftotal > (cp->cache_hash_mask << 1) || | |
2212 | (cp->cache_buftotal < (cp->cache_hash_mask >> 1) && | |
2213 | cp->cache_hash_mask > UMEM_HASH_INITIAL))) | |
2214 | update_flags |= UMU_HASH_RESCALE; | |
2215 | ||
2216 | (void) mutex_unlock(&cp->cache_lock); | |
2217 | ||
2218 | /* | |
2219 | * Update the depot working set statistics. | |
2220 | */ | |
2221 | umem_depot_ws_update(cp); | |
2222 | ||
2223 | /* | |
2224 | * If there's a lot of contention in the depot, | |
2225 | * increase the magazine size. | |
2226 | */ | |
2227 | (void) mutex_lock(&cp->cache_depot_lock); | |
2228 | ||
2229 | if (cp->cache_chunksize < cp->cache_magtype->mt_maxbuf && | |
2230 | (int)(cp->cache_depot_contention - | |
2231 | cp->cache_depot_contention_prev) > umem_depot_contention) | |
2232 | update_flags |= UMU_MAGAZINE_RESIZE; | |
2233 | ||
2234 | cp->cache_depot_contention_prev = cp->cache_depot_contention; | |
2235 | ||
2236 | (void) mutex_unlock(&cp->cache_depot_lock); | |
2237 | ||
2238 | if (update_flags) | |
2239 | umem_add_update(cp, update_flags); | |
2240 | } | |
2241 | ||
2242 | /* | |
2243 | * Runs all pending updates. | |
2244 | * | |
2245 | * The update lock must be held on entrance, and will be held on exit. | |
2246 | */ | |
2247 | void | |
2248 | umem_process_updates(void) | |
2249 | { | |
2250 | ASSERT(MUTEX_HELD(&umem_update_lock)); | |
2251 | ||
2252 | while (umem_null_cache.cache_unext != &umem_null_cache) { | |
2253 | int notify = 0; | |
2254 | umem_cache_t *cp = umem_null_cache.cache_unext; | |
2255 | ||
2256 | cp->cache_uprev->cache_unext = cp->cache_unext; | |
2257 | cp->cache_unext->cache_uprev = cp->cache_uprev; | |
2258 | cp->cache_uprev = cp->cache_unext = NULL; | |
2259 | ||
2260 | ASSERT(!(cp->cache_uflags & UMU_ACTIVE)); | |
2261 | ||
2262 | while (cp->cache_uflags) { | |
2263 | int uflags = (cp->cache_uflags |= UMU_ACTIVE); | |
2264 | (void) mutex_unlock(&umem_update_lock); | |
2265 | ||
2266 | /* | |
2267 | * The order here is important. Each step can speed up | |
2268 | * later steps. | |
2269 | */ | |
2270 | ||
2271 | if (uflags & UMU_HASH_RESCALE) | |
2272 | umem_hash_rescale(cp); | |
2273 | ||
2274 | if (uflags & UMU_MAGAZINE_RESIZE) | |
2275 | umem_cache_magazine_resize(cp); | |
2276 | ||
2277 | if (uflags & UMU_REAP) | |
2278 | umem_cache_reap(cp); | |
2279 | ||
2280 | (void) mutex_lock(&umem_update_lock); | |
2281 | ||
2282 | /* | |
2283 | * check if anyone has requested notification | |
2284 | */ | |
2285 | if (cp->cache_uflags & UMU_NOTIFY) { | |
2286 | uflags |= UMU_NOTIFY; | |
2287 | notify = 1; | |
2288 | } | |
2289 | cp->cache_uflags &= ~uflags; | |
2290 | } | |
2291 | if (notify) | |
2292 | (void) cond_broadcast(&umem_update_cv); | |
2293 | } | |
2294 | } | |
2295 | ||
2296 | #ifndef UMEM_STANDALONE | |
2297 | static void | |
2298 | umem_st_update(void) | |
2299 | { | |
2300 | ASSERT(MUTEX_HELD(&umem_update_lock)); | |
2301 | ASSERT(umem_update_thr == 0 && umem_st_update_thr == 0); | |
2302 | ||
2303 | umem_st_update_thr = thr_self(); | |
2304 | ||
2305 | (void) mutex_unlock(&umem_update_lock); | |
2306 | ||
2307 | vmem_update(NULL); | |
2308 | umem_cache_applyall(umem_cache_update); | |
2309 | ||
2310 | (void) mutex_lock(&umem_update_lock); | |
2311 | ||
2312 | umem_process_updates(); /* does all of the requested work */ | |
2313 | ||
2314 | umem_reap_next = gethrtime() + | |
2315 | (hrtime_t)umem_reap_interval * NANOSEC; | |
2316 | ||
2317 | umem_reaping = UMEM_REAP_DONE; | |
2318 | ||
2319 | umem_st_update_thr = 0; | |
2320 | } | |
2321 | #endif | |
2322 | ||
2323 | /* | |
2324 | * Reclaim all unused memory from all caches. Called from vmem when memory | |
2325 | * gets tight. Must be called with no locks held. | |
2326 | * | |
2327 | * This just requests a reap on all caches, and notifies the update thread. | |
2328 | */ | |
2329 | void | |
2330 | umem_reap(void) | |
2331 | { | |
2332 | #ifndef UMEM_STANDALONE | |
2333 | extern int __nthreads(void); | |
2334 | #endif | |
2335 | ||
2336 | if (umem_ready != UMEM_READY || umem_reaping != UMEM_REAP_DONE || | |
2337 | gethrtime() < umem_reap_next) | |
2338 | return; | |
2339 | ||
2340 | (void) mutex_lock(&umem_update_lock); | |
2341 | ||
2342 | if (umem_reaping != UMEM_REAP_DONE || gethrtime() < umem_reap_next) { | |
2343 | (void) mutex_unlock(&umem_update_lock); | |
2344 | return; | |
2345 | } | |
34dc7c2f BB |
2346 | umem_reaping = UMEM_REAP_ADDING; /* lock out other reaps */ |
2347 | ||
2348 | (void) mutex_unlock(&umem_update_lock); | |
2349 | ||
2350 | umem_updateall(UMU_REAP); | |
2351 | ||
2352 | (void) mutex_lock(&umem_update_lock); | |
2353 | ||
2354 | umem_reaping = UMEM_REAP_ACTIVE; | |
2355 | ||
2356 | /* Standalone is single-threaded */ | |
2357 | #ifndef UMEM_STANDALONE | |
2358 | if (umem_update_thr == 0) { | |
2359 | /* | |
2360 | * The update thread does not exist. If the process is | |
2361 | * multi-threaded, create it. If not, or the creation fails, | |
2362 | * do the update processing inline. | |
2363 | */ | |
2364 | ASSERT(umem_st_update_thr == 0); | |
2365 | ||
2366 | if (__nthreads() <= 1 || umem_create_update_thread() == 0) | |
2367 | umem_st_update(); | |
2368 | } | |
2369 | ||
2370 | (void) cond_broadcast(&umem_update_cv); /* wake up the update thread */ | |
2371 | #endif | |
2372 | ||
2373 | (void) mutex_unlock(&umem_update_lock); | |
2374 | } | |
2375 | ||
2376 | umem_cache_t * | |
2377 | umem_cache_create( | |
2378 | char *name, /* descriptive name for this cache */ | |
2379 | size_t bufsize, /* size of the objects it manages */ | |
2380 | size_t align, /* required object alignment */ | |
2381 | umem_constructor_t *constructor, /* object constructor */ | |
2382 | umem_destructor_t *destructor, /* object destructor */ | |
2383 | umem_reclaim_t *reclaim, /* memory reclaim callback */ | |
2384 | void *private, /* pass-thru arg for constr/destr/reclaim */ | |
2385 | vmem_t *vmp, /* vmem source for slab allocation */ | |
2386 | int cflags) /* cache creation flags */ | |
2387 | { | |
2388 | int cpu_seqid; | |
2389 | size_t chunksize; | |
2390 | umem_cache_t *cp, *cnext, *cprev; | |
2391 | umem_magtype_t *mtp; | |
2392 | size_t csize; | |
2393 | size_t phase; | |
2394 | ||
2395 | /* | |
2396 | * The init thread is allowed to create internal and quantum caches. | |
2397 | * | |
2398 | * Other threads must wait until until initialization is complete. | |
2399 | */ | |
2400 | if (umem_init_thr == thr_self()) | |
2401 | ASSERT((cflags & (UMC_INTERNAL | UMC_QCACHE)) != 0); | |
2402 | else { | |
2403 | ASSERT(!(cflags & UMC_INTERNAL)); | |
2404 | if (umem_ready != UMEM_READY && umem_init() == 0) { | |
2405 | errno = EAGAIN; | |
2406 | return (NULL); | |
2407 | } | |
2408 | } | |
2409 | ||
2410 | csize = UMEM_CACHE_SIZE(umem_max_ncpus); | |
2411 | phase = P2NPHASE(csize, UMEM_CPU_CACHE_SIZE); | |
2412 | ||
2413 | if (vmp == NULL) | |
2414 | vmp = umem_default_arena; | |
2415 | ||
2416 | ASSERT(P2PHASE(phase, UMEM_ALIGN) == 0); | |
2417 | ||
2418 | /* | |
2419 | * Check that the arguments are reasonable | |
2420 | */ | |
2421 | if ((align & (align - 1)) != 0 || align > vmp->vm_quantum || | |
2422 | ((cflags & UMC_NOHASH) && (cflags & UMC_NOTOUCH)) || | |
2423 | name == NULL || bufsize == 0) { | |
2424 | errno = EINVAL; | |
2425 | return (NULL); | |
2426 | } | |
2427 | ||
2428 | /* | |
2429 | * If align == 0, we set it to the minimum required alignment. | |
2430 | * | |
2431 | * If align < UMEM_ALIGN, we round it up to UMEM_ALIGN, unless | |
2432 | * UMC_NOTOUCH was passed. | |
2433 | */ | |
2434 | if (align == 0) { | |
2435 | if (P2ROUNDUP(bufsize, UMEM_ALIGN) >= UMEM_SECOND_ALIGN) | |
2436 | align = UMEM_SECOND_ALIGN; | |
2437 | else | |
2438 | align = UMEM_ALIGN; | |
2439 | } else if (align < UMEM_ALIGN && (cflags & UMC_NOTOUCH) == 0) | |
2440 | align = UMEM_ALIGN; | |
2441 | ||
2442 | ||
2443 | /* | |
2444 | * Get a umem_cache structure. We arrange that cp->cache_cpu[] | |
2445 | * is aligned on a UMEM_CPU_CACHE_SIZE boundary to prevent | |
2446 | * false sharing of per-CPU data. | |
2447 | */ | |
2448 | cp = vmem_xalloc(umem_cache_arena, csize, UMEM_CPU_CACHE_SIZE, phase, | |
2449 | 0, NULL, NULL, VM_NOSLEEP); | |
2450 | ||
2451 | if (cp == NULL) { | |
2452 | errno = EAGAIN; | |
2453 | return (NULL); | |
2454 | } | |
2455 | ||
2456 | bzero(cp, csize); | |
2457 | ||
2458 | (void) mutex_lock(&umem_flags_lock); | |
2459 | if (umem_flags & UMF_RANDOMIZE) | |
2460 | umem_flags = (((umem_flags | ~UMF_RANDOM) + 1) & UMF_RANDOM) | | |
2461 | UMF_RANDOMIZE; | |
2462 | cp->cache_flags = umem_flags | (cflags & UMF_DEBUG); | |
2463 | (void) mutex_unlock(&umem_flags_lock); | |
2464 | ||
2465 | /* | |
2466 | * Make sure all the various flags are reasonable. | |
2467 | */ | |
2468 | if (cp->cache_flags & UMF_LITE) { | |
2469 | if (bufsize >= umem_lite_minsize && | |
2470 | align <= umem_lite_maxalign && | |
2471 | P2PHASE(bufsize, umem_lite_maxalign) != 0) { | |
2472 | cp->cache_flags |= UMF_BUFTAG; | |
2473 | cp->cache_flags &= ~(UMF_AUDIT | UMF_FIREWALL); | |
2474 | } else { | |
2475 | cp->cache_flags &= ~UMF_DEBUG; | |
2476 | } | |
2477 | } | |
2478 | ||
2479 | if ((cflags & UMC_QCACHE) && (cp->cache_flags & UMF_AUDIT)) | |
2480 | cp->cache_flags |= UMF_NOMAGAZINE; | |
2481 | ||
2482 | if (cflags & UMC_NODEBUG) | |
2483 | cp->cache_flags &= ~UMF_DEBUG; | |
2484 | ||
2485 | if (cflags & UMC_NOTOUCH) | |
2486 | cp->cache_flags &= ~UMF_TOUCH; | |
2487 | ||
2488 | if (cflags & UMC_NOHASH) | |
2489 | cp->cache_flags &= ~(UMF_AUDIT | UMF_FIREWALL); | |
2490 | ||
2491 | if (cflags & UMC_NOMAGAZINE) | |
2492 | cp->cache_flags |= UMF_NOMAGAZINE; | |
2493 | ||
2494 | if ((cp->cache_flags & UMF_AUDIT) && !(cflags & UMC_NOTOUCH)) | |
2495 | cp->cache_flags |= UMF_REDZONE; | |
2496 | ||
2497 | if ((cp->cache_flags & UMF_BUFTAG) && bufsize >= umem_minfirewall && | |
2498 | !(cp->cache_flags & UMF_LITE) && !(cflags & UMC_NOHASH)) | |
2499 | cp->cache_flags |= UMF_FIREWALL; | |
2500 | ||
2501 | if (vmp != umem_default_arena || umem_firewall_arena == NULL) | |
2502 | cp->cache_flags &= ~UMF_FIREWALL; | |
2503 | ||
2504 | if (cp->cache_flags & UMF_FIREWALL) { | |
2505 | cp->cache_flags &= ~UMF_BUFTAG; | |
2506 | cp->cache_flags |= UMF_NOMAGAZINE; | |
2507 | ASSERT(vmp == umem_default_arena); | |
2508 | vmp = umem_firewall_arena; | |
2509 | } | |
2510 | ||
2511 | /* | |
2512 | * Set cache properties. | |
2513 | */ | |
2514 | (void) strncpy(cp->cache_name, name, sizeof (cp->cache_name) - 1); | |
2515 | cp->cache_bufsize = bufsize; | |
2516 | cp->cache_align = align; | |
2517 | cp->cache_constructor = constructor; | |
2518 | cp->cache_destructor = destructor; | |
2519 | cp->cache_reclaim = reclaim; | |
2520 | cp->cache_private = private; | |
2521 | cp->cache_arena = vmp; | |
2522 | cp->cache_cflags = cflags; | |
2523 | cp->cache_cpu_mask = umem_cpu_mask; | |
2524 | ||
2525 | /* | |
2526 | * Determine the chunk size. | |
2527 | */ | |
2528 | chunksize = bufsize; | |
2529 | ||
2530 | if (align >= UMEM_ALIGN) { | |
2531 | chunksize = P2ROUNDUP(chunksize, UMEM_ALIGN); | |
2532 | cp->cache_bufctl = chunksize - UMEM_ALIGN; | |
2533 | } | |
2534 | ||
2535 | if (cp->cache_flags & UMF_BUFTAG) { | |
2536 | cp->cache_bufctl = chunksize; | |
2537 | cp->cache_buftag = chunksize; | |
2538 | chunksize += sizeof (umem_buftag_t); | |
2539 | } | |
2540 | ||
2541 | if (cp->cache_flags & UMF_DEADBEEF) { | |
2542 | cp->cache_verify = MIN(cp->cache_buftag, umem_maxverify); | |
2543 | if (cp->cache_flags & UMF_LITE) | |
2544 | cp->cache_verify = MIN(cp->cache_verify, UMEM_ALIGN); | |
2545 | } | |
2546 | ||
2547 | cp->cache_contents = MIN(cp->cache_bufctl, umem_content_maxsave); | |
2548 | ||
2549 | cp->cache_chunksize = chunksize = P2ROUNDUP(chunksize, align); | |
2550 | ||
2551 | if (chunksize < bufsize) { | |
2552 | errno = ENOMEM; | |
2553 | goto fail; | |
2554 | } | |
2555 | ||
2556 | /* | |
2557 | * Now that we know the chunk size, determine the optimal slab size. | |
2558 | */ | |
2559 | if (vmp == umem_firewall_arena) { | |
2560 | cp->cache_slabsize = P2ROUNDUP(chunksize, vmp->vm_quantum); | |
2561 | cp->cache_mincolor = cp->cache_slabsize - chunksize; | |
2562 | cp->cache_maxcolor = cp->cache_mincolor; | |
2563 | cp->cache_flags |= UMF_HASH; | |
2564 | ASSERT(!(cp->cache_flags & UMF_BUFTAG)); | |
2565 | } else if ((cflags & UMC_NOHASH) || (!(cflags & UMC_NOTOUCH) && | |
2566 | !(cp->cache_flags & UMF_AUDIT) && | |
2567 | chunksize < vmp->vm_quantum / UMEM_VOID_FRACTION)) { | |
2568 | cp->cache_slabsize = vmp->vm_quantum; | |
2569 | cp->cache_mincolor = 0; | |
2570 | cp->cache_maxcolor = | |
2571 | (cp->cache_slabsize - sizeof (umem_slab_t)) % chunksize; | |
2572 | ||
2573 | if (chunksize + sizeof (umem_slab_t) > cp->cache_slabsize) { | |
2574 | errno = EINVAL; | |
2575 | goto fail; | |
2576 | } | |
2577 | ASSERT(!(cp->cache_flags & UMF_AUDIT)); | |
2578 | } else { | |
2579 | size_t chunks, bestfit, waste, slabsize; | |
2580 | size_t minwaste = LONG_MAX; | |
2581 | ||
2582 | for (chunks = 1; chunks <= UMEM_VOID_FRACTION; chunks++) { | |
2583 | slabsize = P2ROUNDUP(chunksize * chunks, | |
2584 | vmp->vm_quantum); | |
2585 | /* | |
2586 | * check for overflow | |
2587 | */ | |
2588 | if ((slabsize / chunks) < chunksize) { | |
2589 | errno = ENOMEM; | |
2590 | goto fail; | |
2591 | } | |
2592 | chunks = slabsize / chunksize; | |
2593 | waste = (slabsize % chunksize) / chunks; | |
2594 | if (waste < minwaste) { | |
2595 | minwaste = waste; | |
2596 | bestfit = slabsize; | |
2597 | } | |
2598 | } | |
2599 | if (cflags & UMC_QCACHE) | |
2600 | bestfit = MAX(1 << highbit(3 * vmp->vm_qcache_max), 64); | |
2601 | cp->cache_slabsize = bestfit; | |
2602 | cp->cache_mincolor = 0; | |
2603 | cp->cache_maxcolor = bestfit % chunksize; | |
2604 | cp->cache_flags |= UMF_HASH; | |
2605 | } | |
2606 | ||
2607 | if (cp->cache_flags & UMF_HASH) { | |
2608 | ASSERT(!(cflags & UMC_NOHASH)); | |
2609 | cp->cache_bufctl_cache = (cp->cache_flags & UMF_AUDIT) ? | |
2610 | umem_bufctl_audit_cache : umem_bufctl_cache; | |
2611 | } | |
2612 | ||
2613 | if (cp->cache_maxcolor >= vmp->vm_quantum) | |
2614 | cp->cache_maxcolor = vmp->vm_quantum - 1; | |
2615 | ||
2616 | cp->cache_color = cp->cache_mincolor; | |
2617 | ||
2618 | /* | |
2619 | * Initialize the rest of the slab layer. | |
2620 | */ | |
2621 | (void) mutex_init(&cp->cache_lock, USYNC_THREAD, NULL); | |
2622 | ||
2623 | cp->cache_freelist = &cp->cache_nullslab; | |
2624 | cp->cache_nullslab.slab_cache = cp; | |
2625 | cp->cache_nullslab.slab_refcnt = -1; | |
2626 | cp->cache_nullslab.slab_next = &cp->cache_nullslab; | |
2627 | cp->cache_nullslab.slab_prev = &cp->cache_nullslab; | |
2628 | ||
2629 | if (cp->cache_flags & UMF_HASH) { | |
2630 | cp->cache_hash_table = vmem_alloc(umem_hash_arena, | |
2631 | UMEM_HASH_INITIAL * sizeof (void *), VM_NOSLEEP); | |
2632 | if (cp->cache_hash_table == NULL) { | |
2633 | errno = EAGAIN; | |
2634 | goto fail_lock; | |
2635 | } | |
2636 | bzero(cp->cache_hash_table, | |
2637 | UMEM_HASH_INITIAL * sizeof (void *)); | |
2638 | cp->cache_hash_mask = UMEM_HASH_INITIAL - 1; | |
2639 | cp->cache_hash_shift = highbit((ulong_t)chunksize) - 1; | |
2640 | } | |
2641 | ||
2642 | /* | |
2643 | * Initialize the depot. | |
2644 | */ | |
2645 | (void) mutex_init(&cp->cache_depot_lock, USYNC_THREAD, NULL); | |
2646 | ||
2647 | for (mtp = umem_magtype; chunksize <= mtp->mt_minbuf; mtp++) | |
2648 | continue; | |
2649 | ||
2650 | cp->cache_magtype = mtp; | |
2651 | ||
2652 | /* | |
2653 | * Initialize the CPU layer. | |
2654 | */ | |
2655 | for (cpu_seqid = 0; cpu_seqid < umem_max_ncpus; cpu_seqid++) { | |
2656 | umem_cpu_cache_t *ccp = &cp->cache_cpu[cpu_seqid]; | |
2657 | (void) mutex_init(&ccp->cc_lock, USYNC_THREAD, NULL); | |
2658 | ccp->cc_flags = cp->cache_flags; | |
2659 | ccp->cc_rounds = -1; | |
2660 | ccp->cc_prounds = -1; | |
2661 | } | |
2662 | ||
2663 | /* | |
2664 | * Add the cache to the global list. This makes it visible | |
2665 | * to umem_update(), so the cache must be ready for business. | |
2666 | */ | |
2667 | (void) mutex_lock(&umem_cache_lock); | |
2668 | cp->cache_next = cnext = &umem_null_cache; | |
2669 | cp->cache_prev = cprev = umem_null_cache.cache_prev; | |
2670 | cnext->cache_prev = cp; | |
2671 | cprev->cache_next = cp; | |
2672 | (void) mutex_unlock(&umem_cache_lock); | |
2673 | ||
2674 | if (umem_ready == UMEM_READY) | |
2675 | umem_cache_magazine_enable(cp); | |
2676 | ||
2677 | return (cp); | |
2678 | ||
2679 | fail_lock: | |
2680 | (void) mutex_destroy(&cp->cache_lock); | |
2681 | fail: | |
2682 | vmem_xfree(umem_cache_arena, cp, csize); | |
2683 | return (NULL); | |
2684 | } | |
2685 | ||
2686 | void | |
2687 | umem_cache_destroy(umem_cache_t *cp) | |
2688 | { | |
2689 | int cpu_seqid; | |
2690 | ||
2691 | /* | |
2692 | * Remove the cache from the global cache list so that no new updates | |
2693 | * will be scheduled on its behalf, wait for any pending tasks to | |
2694 | * complete, purge the cache, and then destroy it. | |
2695 | */ | |
2696 | (void) mutex_lock(&umem_cache_lock); | |
2697 | cp->cache_prev->cache_next = cp->cache_next; | |
2698 | cp->cache_next->cache_prev = cp->cache_prev; | |
2699 | cp->cache_prev = cp->cache_next = NULL; | |
2700 | (void) mutex_unlock(&umem_cache_lock); | |
2701 | ||
2702 | umem_remove_updates(cp); | |
2703 | ||
2704 | umem_cache_magazine_purge(cp); | |
2705 | ||
2706 | (void) mutex_lock(&cp->cache_lock); | |
2707 | if (cp->cache_buftotal != 0) | |
2708 | log_message("umem_cache_destroy: '%s' (%p) not empty\n", | |
2709 | cp->cache_name, (void *)cp); | |
2710 | cp->cache_reclaim = NULL; | |
2711 | /* | |
2712 | * The cache is now dead. There should be no further activity. | |
2713 | * We enforce this by setting land mines in the constructor and | |
2714 | * destructor routines that induce a segmentation fault if invoked. | |
2715 | */ | |
2716 | cp->cache_constructor = (umem_constructor_t *)1; | |
2717 | cp->cache_destructor = (umem_destructor_t *)2; | |
2718 | (void) mutex_unlock(&cp->cache_lock); | |
2719 | ||
2720 | if (cp->cache_hash_table != NULL) | |
2721 | vmem_free(umem_hash_arena, cp->cache_hash_table, | |
2722 | (cp->cache_hash_mask + 1) * sizeof (void *)); | |
2723 | ||
2724 | for (cpu_seqid = 0; cpu_seqid < umem_max_ncpus; cpu_seqid++) | |
2725 | (void) mutex_destroy(&cp->cache_cpu[cpu_seqid].cc_lock); | |
2726 | ||
2727 | (void) mutex_destroy(&cp->cache_depot_lock); | |
2728 | (void) mutex_destroy(&cp->cache_lock); | |
2729 | ||
2730 | vmem_free(umem_cache_arena, cp, UMEM_CACHE_SIZE(umem_max_ncpus)); | |
2731 | } | |
2732 | ||
b128c09f BB |
2733 | void |
2734 | umem_alloc_sizes_clear(void) | |
2735 | { | |
2736 | int i; | |
2737 | ||
2738 | umem_alloc_sizes[0] = UMEM_MAXBUF; | |
2739 | for (i = 1; i < NUM_ALLOC_SIZES; i++) | |
2740 | umem_alloc_sizes[i] = 0; | |
2741 | } | |
2742 | ||
2743 | void | |
2744 | umem_alloc_sizes_add(size_t size_arg) | |
2745 | { | |
2746 | int i, j; | |
2747 | size_t size = size_arg; | |
2748 | ||
2749 | if (size == 0) { | |
2750 | log_message("size_add: cannot add zero-sized cache\n", | |
2751 | size, UMEM_MAXBUF); | |
2752 | return; | |
2753 | } | |
2754 | ||
2755 | if (size > UMEM_MAXBUF) { | |
2756 | log_message("size_add: %ld > %d, cannot add\n", size, | |
2757 | UMEM_MAXBUF); | |
2758 | return; | |
2759 | } | |
2760 | ||
2761 | if (umem_alloc_sizes[NUM_ALLOC_SIZES - 1] != 0) { | |
2762 | log_message("size_add: no space in alloc_table for %d\n", | |
2763 | size); | |
2764 | return; | |
2765 | } | |
2766 | ||
2767 | if (P2PHASE(size, UMEM_ALIGN) != 0) { | |
2768 | size = P2ROUNDUP(size, UMEM_ALIGN); | |
2769 | log_message("size_add: rounding %d up to %d\n", size_arg, | |
2770 | size); | |
2771 | } | |
2772 | ||
2773 | for (i = 0; i < NUM_ALLOC_SIZES; i++) { | |
2774 | int cur = umem_alloc_sizes[i]; | |
2775 | if (cur == size) { | |
2776 | log_message("size_add: %ld already in table\n", | |
2777 | size); | |
2778 | return; | |
2779 | } | |
2780 | if (cur > size) | |
2781 | break; | |
2782 | } | |
2783 | ||
2784 | for (j = NUM_ALLOC_SIZES - 1; j > i; j--) | |
2785 | umem_alloc_sizes[j] = umem_alloc_sizes[j-1]; | |
2786 | umem_alloc_sizes[i] = size; | |
2787 | } | |
2788 | ||
2789 | void | |
2790 | umem_alloc_sizes_remove(size_t size) | |
2791 | { | |
2792 | int i; | |
2793 | ||
2794 | if (size == UMEM_MAXBUF) { | |
2795 | log_message("size_remove: cannot remove %ld\n", size); | |
2796 | return; | |
2797 | } | |
2798 | ||
2799 | for (i = 0; i < NUM_ALLOC_SIZES; i++) { | |
2800 | int cur = umem_alloc_sizes[i]; | |
2801 | if (cur == size) | |
2802 | break; | |
2803 | else if (cur > size || cur == 0) { | |
2804 | log_message("size_remove: %ld not found in table\n", | |
2805 | size); | |
2806 | return; | |
2807 | } | |
2808 | } | |
2809 | ||
2810 | for (; i + 1 < NUM_ALLOC_SIZES; i++) | |
2811 | umem_alloc_sizes[i] = umem_alloc_sizes[i+1]; | |
2812 | umem_alloc_sizes[i] = 0; | |
2813 | } | |
2814 | ||
34dc7c2f BB |
2815 | static int |
2816 | umem_cache_init(void) | |
2817 | { | |
2818 | int i; | |
2819 | size_t size, max_size; | |
2820 | umem_cache_t *cp; | |
2821 | umem_magtype_t *mtp; | |
2822 | char name[UMEM_CACHE_NAMELEN + 1]; | |
2823 | umem_cache_t *umem_alloc_caches[NUM_ALLOC_SIZES]; | |
2824 | ||
2825 | for (i = 0; i < sizeof (umem_magtype) / sizeof (*mtp); i++) { | |
2826 | mtp = &umem_magtype[i]; | |
2827 | (void) snprintf(name, sizeof (name), "umem_magazine_%d", | |
2828 | mtp->mt_magsize); | |
2829 | mtp->mt_cache = umem_cache_create(name, | |
2830 | (mtp->mt_magsize + 1) * sizeof (void *), | |
2831 | mtp->mt_align, NULL, NULL, NULL, NULL, | |
2832 | umem_internal_arena, UMC_NOHASH | UMC_INTERNAL); | |
2833 | if (mtp->mt_cache == NULL) | |
2834 | return (0); | |
2835 | } | |
2836 | ||
2837 | umem_slab_cache = umem_cache_create("umem_slab_cache", | |
2838 | sizeof (umem_slab_t), 0, NULL, NULL, NULL, NULL, | |
2839 | umem_internal_arena, UMC_NOHASH | UMC_INTERNAL); | |
2840 | ||
2841 | if (umem_slab_cache == NULL) | |
2842 | return (0); | |
2843 | ||
2844 | umem_bufctl_cache = umem_cache_create("umem_bufctl_cache", | |
2845 | sizeof (umem_bufctl_t), 0, NULL, NULL, NULL, NULL, | |
2846 | umem_internal_arena, UMC_NOHASH | UMC_INTERNAL); | |
2847 | ||
2848 | if (umem_bufctl_cache == NULL) | |
2849 | return (0); | |
2850 | ||
2851 | /* | |
2852 | * The size of the umem_bufctl_audit structure depends upon | |
2853 | * umem_stack_depth. See umem_impl.h for details on the size | |
2854 | * restrictions. | |
2855 | */ | |
2856 | ||
2857 | size = UMEM_BUFCTL_AUDIT_SIZE_DEPTH(umem_stack_depth); | |
2858 | max_size = UMEM_BUFCTL_AUDIT_MAX_SIZE; | |
2859 | ||
2860 | if (size > max_size) { /* too large -- truncate */ | |
2861 | int max_frames = UMEM_MAX_STACK_DEPTH; | |
2862 | ||
2863 | ASSERT(UMEM_BUFCTL_AUDIT_SIZE_DEPTH(max_frames) <= max_size); | |
2864 | ||
2865 | umem_stack_depth = max_frames; | |
2866 | size = UMEM_BUFCTL_AUDIT_SIZE_DEPTH(umem_stack_depth); | |
2867 | } | |
2868 | ||
2869 | umem_bufctl_audit_cache = umem_cache_create("umem_bufctl_audit_cache", | |
2870 | size, 0, NULL, NULL, NULL, NULL, umem_internal_arena, | |
2871 | UMC_NOHASH | UMC_INTERNAL); | |
2872 | ||
2873 | if (umem_bufctl_audit_cache == NULL) | |
2874 | return (0); | |
2875 | ||
2876 | if (vmem_backend & VMEM_BACKEND_MMAP) | |
2877 | umem_va_arena = vmem_create("umem_va", | |
2878 | NULL, 0, pagesize, | |
2879 | vmem_alloc, vmem_free, heap_arena, | |
2880 | 8 * pagesize, VM_NOSLEEP); | |
2881 | else | |
2882 | umem_va_arena = heap_arena; | |
2883 | ||
2884 | if (umem_va_arena == NULL) | |
2885 | return (0); | |
2886 | ||
2887 | umem_default_arena = vmem_create("umem_default", | |
2888 | NULL, 0, pagesize, | |
2889 | heap_alloc, heap_free, umem_va_arena, | |
2890 | 0, VM_NOSLEEP); | |
2891 | ||
2892 | if (umem_default_arena == NULL) | |
2893 | return (0); | |
2894 | ||
2895 | /* | |
2896 | * make sure the umem_alloc table initializer is correct | |
2897 | */ | |
2898 | i = sizeof (umem_alloc_table) / sizeof (*umem_alloc_table); | |
2899 | ASSERT(umem_alloc_table[i - 1] == &umem_null_cache); | |
2900 | ||
2901 | /* | |
2902 | * Create the default caches to back umem_alloc() | |
2903 | */ | |
2904 | for (i = 0; i < NUM_ALLOC_SIZES; i++) { | |
2905 | size_t cache_size = umem_alloc_sizes[i]; | |
2906 | size_t align = 0; | |
b128c09f BB |
2907 | |
2908 | if (cache_size == 0) | |
2909 | break; /* 0 terminates the list */ | |
2910 | ||
34dc7c2f BB |
2911 | /* |
2912 | * If they allocate a multiple of the coherency granularity, | |
2913 | * they get a coherency-granularity-aligned address. | |
2914 | */ | |
2915 | if (IS_P2ALIGNED(cache_size, 64)) | |
2916 | align = 64; | |
2917 | if (IS_P2ALIGNED(cache_size, pagesize)) | |
2918 | align = pagesize; | |
2919 | (void) snprintf(name, sizeof (name), "umem_alloc_%lu", | |
2920 | (long)cache_size); | |
2921 | ||
2922 | cp = umem_cache_create(name, cache_size, align, | |
2923 | NULL, NULL, NULL, NULL, NULL, UMC_INTERNAL); | |
2924 | if (cp == NULL) | |
2925 | return (0); | |
2926 | ||
2927 | umem_alloc_caches[i] = cp; | |
2928 | } | |
2929 | ||
2930 | /* | |
2931 | * Initialization cannot fail at this point. Make the caches | |
2932 | * visible to umem_alloc() and friends. | |
2933 | */ | |
2934 | size = UMEM_ALIGN; | |
2935 | for (i = 0; i < NUM_ALLOC_SIZES; i++) { | |
2936 | size_t cache_size = umem_alloc_sizes[i]; | |
2937 | ||
b128c09f BB |
2938 | if (cache_size == 0) |
2939 | break; /* 0 terminates the list */ | |
2940 | ||
34dc7c2f BB |
2941 | cp = umem_alloc_caches[i]; |
2942 | ||
2943 | while (size <= cache_size) { | |
2944 | umem_alloc_table[(size - 1) >> UMEM_ALIGN_SHIFT] = cp; | |
2945 | size += UMEM_ALIGN; | |
2946 | } | |
2947 | } | |
b128c09f | 2948 | ASSERT(size - UMEM_ALIGN == UMEM_MAXBUF); |
34dc7c2f BB |
2949 | return (1); |
2950 | } | |
2951 | ||
2952 | /* | |
2953 | * umem_startup() is called early on, and must be called explicitly if we're | |
2954 | * the standalone version. | |
2955 | */ | |
b128c09f | 2956 | #ifdef UMEM_STANDALONE |
34dc7c2f | 2957 | void |
b128c09f BB |
2958 | #else |
2959 | #pragma init(umem_startup) | |
2960 | static void | |
2961 | #endif | |
2962 | umem_startup(caddr_t start, size_t len, size_t pagesize, caddr_t minstack, | |
2963 | caddr_t maxstack) | |
34dc7c2f | 2964 | { |
34dc7c2f BB |
2965 | #ifdef UMEM_STANDALONE |
2966 | int idx; | |
2967 | /* Standalone doesn't fork */ | |
2968 | #else | |
2969 | umem_forkhandler_init(); /* register the fork handler */ | |
2970 | #endif | |
2971 | ||
2972 | #ifdef __lint | |
2973 | /* make lint happy */ | |
2974 | minstack = maxstack; | |
2975 | #endif | |
2976 | ||
2977 | #ifdef UMEM_STANDALONE | |
2978 | umem_ready = UMEM_READY_STARTUP; | |
2979 | umem_init_env_ready = 0; | |
2980 | ||
2981 | umem_min_stack = minstack; | |
2982 | umem_max_stack = maxstack; | |
2983 | ||
2984 | nofail_callback = NULL; | |
2985 | umem_slab_cache = NULL; | |
2986 | umem_bufctl_cache = NULL; | |
2987 | umem_bufctl_audit_cache = NULL; | |
2988 | heap_arena = NULL; | |
2989 | heap_alloc = NULL; | |
2990 | heap_free = NULL; | |
2991 | umem_internal_arena = NULL; | |
2992 | umem_cache_arena = NULL; | |
2993 | umem_hash_arena = NULL; | |
2994 | umem_log_arena = NULL; | |
2995 | umem_oversize_arena = NULL; | |
2996 | umem_va_arena = NULL; | |
2997 | umem_default_arena = NULL; | |
2998 | umem_firewall_va_arena = NULL; | |
2999 | umem_firewall_arena = NULL; | |
3000 | umem_memalign_arena = NULL; | |
3001 | umem_transaction_log = NULL; | |
3002 | umem_content_log = NULL; | |
3003 | umem_failure_log = NULL; | |
3004 | umem_slab_log = NULL; | |
3005 | umem_cpu_mask = 0; | |
3006 | ||
3007 | umem_cpus = &umem_startup_cpu; | |
3008 | umem_startup_cpu.cpu_cache_offset = UMEM_CACHE_SIZE(0); | |
3009 | umem_startup_cpu.cpu_number = 0; | |
3010 | ||
3011 | bcopy(&umem_null_cache_template, &umem_null_cache, | |
3012 | sizeof (umem_cache_t)); | |
3013 | ||
3014 | for (idx = 0; idx < (UMEM_MAXBUF >> UMEM_ALIGN_SHIFT); idx++) | |
3015 | umem_alloc_table[idx] = &umem_null_cache; | |
3016 | #endif | |
3017 | ||
3018 | /* | |
3019 | * Perform initialization specific to the way we've been compiled | |
3020 | * (library or standalone) | |
3021 | */ | |
3022 | umem_type_init(start, len, pagesize); | |
3023 | ||
3024 | vmem_startup(); | |
3025 | } | |
3026 | ||
3027 | int | |
3028 | umem_init(void) | |
3029 | { | |
3030 | size_t maxverify, minfirewall; | |
3031 | size_t size; | |
3032 | int idx; | |
3033 | umem_cpu_t *new_cpus; | |
3034 | ||
3035 | vmem_t *memalign_arena, *oversize_arena; | |
3036 | ||
3037 | if (thr_self() != umem_init_thr) { | |
3038 | /* | |
3039 | * The usual case -- non-recursive invocation of umem_init(). | |
3040 | */ | |
3041 | (void) mutex_lock(&umem_init_lock); | |
3042 | if (umem_ready != UMEM_READY_STARTUP) { | |
3043 | /* | |
3044 | * someone else beat us to initializing umem. Wait | |
3045 | * for them to complete, then return. | |
3046 | */ | |
b128c09f BB |
3047 | while (umem_ready == UMEM_READY_INITING) { |
3048 | int cancel_state; | |
3049 | ||
3050 | (void) pthread_setcancelstate( | |
3051 | PTHREAD_CANCEL_DISABLE, &cancel_state); | |
3052 | (void) cond_wait(&umem_init_cv, | |
34dc7c2f | 3053 | &umem_init_lock); |
b128c09f BB |
3054 | (void) pthread_setcancelstate( |
3055 | cancel_state, NULL); | |
3056 | } | |
34dc7c2f BB |
3057 | ASSERT(umem_ready == UMEM_READY || |
3058 | umem_ready == UMEM_READY_INIT_FAILED); | |
3059 | (void) mutex_unlock(&umem_init_lock); | |
3060 | return (umem_ready == UMEM_READY); | |
3061 | } | |
3062 | ||
3063 | ASSERT(umem_ready == UMEM_READY_STARTUP); | |
3064 | ASSERT(umem_init_env_ready == 0); | |
3065 | ||
3066 | umem_ready = UMEM_READY_INITING; | |
3067 | umem_init_thr = thr_self(); | |
3068 | ||
3069 | (void) mutex_unlock(&umem_init_lock); | |
3070 | umem_setup_envvars(0); /* can recurse -- see below */ | |
3071 | if (umem_init_env_ready) { | |
3072 | /* | |
3073 | * initialization was completed already | |
3074 | */ | |
3075 | ASSERT(umem_ready == UMEM_READY || | |
3076 | umem_ready == UMEM_READY_INIT_FAILED); | |
3077 | ASSERT(umem_init_thr == 0); | |
3078 | return (umem_ready == UMEM_READY); | |
3079 | } | |
3080 | } else if (!umem_init_env_ready) { | |
3081 | /* | |
3082 | * The umem_setup_envvars() call (above) makes calls into | |
3083 | * the dynamic linker and directly into user-supplied code. | |
3084 | * Since we cannot know what that code will do, we could be | |
3085 | * recursively invoked (by, say, a malloc() call in the code | |
3086 | * itself, or in a (C++) _init section it causes to be fired). | |
3087 | * | |
3088 | * This code is where we end up if such recursion occurs. We | |
3089 | * first clean up any partial results in the envvar code, then | |
3090 | * proceed to finish initialization processing in the recursive | |
3091 | * call. The original call will notice this, and return | |
3092 | * immediately. | |
3093 | */ | |
3094 | umem_setup_envvars(1); /* clean up any partial state */ | |
3095 | } else { | |
3096 | umem_panic( | |
3097 | "recursive allocation while initializing umem\n"); | |
3098 | } | |
3099 | umem_init_env_ready = 1; | |
3100 | ||
3101 | /* | |
3102 | * From this point until we finish, recursion into umem_init() will | |
3103 | * cause a umem_panic(). | |
3104 | */ | |
3105 | maxverify = minfirewall = ULONG_MAX; | |
3106 | ||
3107 | /* LINTED constant condition */ | |
3108 | if (sizeof (umem_cpu_cache_t) != UMEM_CPU_CACHE_SIZE) { | |
3109 | umem_panic("sizeof (umem_cpu_cache_t) = %d, should be %d\n", | |
3110 | sizeof (umem_cpu_cache_t), UMEM_CPU_CACHE_SIZE); | |
3111 | } | |
3112 | ||
3113 | umem_max_ncpus = umem_get_max_ncpus(); | |
3114 | ||
3115 | /* | |
3116 | * load tunables from environment | |
3117 | */ | |
3118 | umem_process_envvars(); | |
3119 | ||
3120 | if (issetugid()) | |
3121 | umem_mtbf = 0; | |
3122 | ||
3123 | /* | |
3124 | * set up vmem | |
3125 | */ | |
3126 | if (!(umem_flags & UMF_AUDIT)) | |
3127 | vmem_no_debug(); | |
3128 | ||
3129 | heap_arena = vmem_heap_arena(&heap_alloc, &heap_free); | |
3130 | ||
3131 | pagesize = heap_arena->vm_quantum; | |
3132 | ||
3133 | umem_internal_arena = vmem_create("umem_internal", NULL, 0, pagesize, | |
3134 | heap_alloc, heap_free, heap_arena, 0, VM_NOSLEEP); | |
3135 | ||
3136 | umem_default_arena = umem_internal_arena; | |
3137 | ||
3138 | if (umem_internal_arena == NULL) | |
3139 | goto fail; | |
3140 | ||
3141 | umem_cache_arena = vmem_create("umem_cache", NULL, 0, UMEM_ALIGN, | |
3142 | vmem_alloc, vmem_free, umem_internal_arena, 0, VM_NOSLEEP); | |
3143 | ||
3144 | umem_hash_arena = vmem_create("umem_hash", NULL, 0, UMEM_ALIGN, | |
3145 | vmem_alloc, vmem_free, umem_internal_arena, 0, VM_NOSLEEP); | |
3146 | ||
3147 | umem_log_arena = vmem_create("umem_log", NULL, 0, UMEM_ALIGN, | |
3148 | heap_alloc, heap_free, heap_arena, 0, VM_NOSLEEP); | |
3149 | ||
3150 | umem_firewall_va_arena = vmem_create("umem_firewall_va", | |
3151 | NULL, 0, pagesize, | |
3152 | umem_firewall_va_alloc, umem_firewall_va_free, heap_arena, | |
3153 | 0, VM_NOSLEEP); | |
3154 | ||
3155 | if (umem_cache_arena == NULL || umem_hash_arena == NULL || | |
3156 | umem_log_arena == NULL || umem_firewall_va_arena == NULL) | |
3157 | goto fail; | |
3158 | ||
3159 | umem_firewall_arena = vmem_create("umem_firewall", NULL, 0, pagesize, | |
3160 | heap_alloc, heap_free, umem_firewall_va_arena, 0, | |
3161 | VM_NOSLEEP); | |
3162 | ||
3163 | if (umem_firewall_arena == NULL) | |
3164 | goto fail; | |
3165 | ||
3166 | oversize_arena = vmem_create("umem_oversize", NULL, 0, pagesize, | |
3167 | heap_alloc, heap_free, minfirewall < ULONG_MAX ? | |
3168 | umem_firewall_va_arena : heap_arena, 0, VM_NOSLEEP); | |
3169 | ||
3170 | memalign_arena = vmem_create("umem_memalign", NULL, 0, UMEM_ALIGN, | |
3171 | heap_alloc, heap_free, minfirewall < ULONG_MAX ? | |
3172 | umem_firewall_va_arena : heap_arena, 0, VM_NOSLEEP); | |
3173 | ||
3174 | if (oversize_arena == NULL || memalign_arena == NULL) | |
3175 | goto fail; | |
3176 | ||
3177 | if (umem_max_ncpus > CPUHINT_MAX()) | |
3178 | umem_max_ncpus = CPUHINT_MAX(); | |
3179 | ||
3180 | while ((umem_max_ncpus & (umem_max_ncpus - 1)) != 0) | |
3181 | umem_max_ncpus++; | |
3182 | ||
3183 | if (umem_max_ncpus == 0) | |
3184 | umem_max_ncpus = 1; | |
3185 | ||
3186 | size = umem_max_ncpus * sizeof (umem_cpu_t); | |
3187 | new_cpus = vmem_alloc(umem_internal_arena, size, VM_NOSLEEP); | |
3188 | if (new_cpus == NULL) | |
3189 | goto fail; | |
3190 | ||
3191 | bzero(new_cpus, size); | |
3192 | for (idx = 0; idx < umem_max_ncpus; idx++) { | |
3193 | new_cpus[idx].cpu_number = idx; | |
3194 | new_cpus[idx].cpu_cache_offset = UMEM_CACHE_SIZE(idx); | |
3195 | } | |
3196 | umem_cpus = new_cpus; | |
3197 | umem_cpu_mask = (umem_max_ncpus - 1); | |
3198 | ||
3199 | if (umem_maxverify == 0) | |
3200 | umem_maxverify = maxverify; | |
3201 | ||
3202 | if (umem_minfirewall == 0) | |
3203 | umem_minfirewall = minfirewall; | |
3204 | ||
3205 | /* | |
3206 | * Set up updating and reaping | |
3207 | */ | |
3208 | umem_reap_next = gethrtime() + NANOSEC; | |
3209 | ||
3210 | #ifndef UMEM_STANDALONE | |
3211 | (void) gettimeofday(&umem_update_next, NULL); | |
3212 | #endif | |
3213 | ||
3214 | /* | |
3215 | * Set up logging -- failure here is okay, since it will just disable | |
3216 | * the logs | |
3217 | */ | |
3218 | if (umem_logging) { | |
3219 | umem_transaction_log = umem_log_init(umem_transaction_log_size); | |
3220 | umem_content_log = umem_log_init(umem_content_log_size); | |
3221 | umem_failure_log = umem_log_init(umem_failure_log_size); | |
3222 | umem_slab_log = umem_log_init(umem_slab_log_size); | |
3223 | } | |
3224 | ||
3225 | /* | |
3226 | * Set up caches -- if successful, initialization cannot fail, since | |
3227 | * allocations from other threads can now succeed. | |
3228 | */ | |
3229 | if (umem_cache_init() == 0) { | |
3230 | log_message("unable to create initial caches\n"); | |
3231 | goto fail; | |
3232 | } | |
3233 | umem_oversize_arena = oversize_arena; | |
3234 | umem_memalign_arena = memalign_arena; | |
3235 | ||
3236 | umem_cache_applyall(umem_cache_magazine_enable); | |
3237 | ||
3238 | /* | |
3239 | * initialization done, ready to go | |
3240 | */ | |
3241 | (void) mutex_lock(&umem_init_lock); | |
3242 | umem_ready = UMEM_READY; | |
3243 | umem_init_thr = 0; | |
3244 | (void) cond_broadcast(&umem_init_cv); | |
3245 | (void) mutex_unlock(&umem_init_lock); | |
3246 | return (1); | |
3247 | ||
3248 | fail: | |
3249 | log_message("umem initialization failed\n"); | |
3250 | ||
3251 | (void) mutex_lock(&umem_init_lock); | |
3252 | umem_ready = UMEM_READY_INIT_FAILED; | |
3253 | umem_init_thr = 0; | |
3254 | (void) cond_broadcast(&umem_init_cv); | |
3255 | (void) mutex_unlock(&umem_init_lock); | |
3256 | return (0); | |
3257 | } |