/* Thread data for thread that performed the allocation. */
prof_tdata_t *tdata;
+ /*
+ * Copy of tdata->thr_{uid,discrim}, necessary because tdata may be
+ * defunct during teardown.
+ */
+ uint64_t thr_uid;
+ uint64_t thr_discrim;
+
/* Profiling counters, protected by tdata->lock. */
prof_cnt_t cnts;
/* Associated global context. */
prof_gctx_t *gctx;
+ /*
+ * UID that distinguishes multiple tctx's created by the same thread,
+ * but coexisting in gctx->tctxs. There are two ways that such
+ * coexistence can occur:
+ * - A dumper thread can cause a tctx to be retained in the purgatory
+ * state.
+ * - Although a single "producer" thread must create all tctx's which
+ * share the same thr_uid, multiple "consumers" can each concurrently
+ * execute portions of prof_tctx_destroy(). prof_tctx_destroy() only
+ * gets called once each time cnts.cur{objs,bytes} drop to 0, but this
+ * threshold can be hit again before the first consumer finishes
+ * executing prof_tctx_destroy().
+ */
+ uint64_t tctx_uid;
+
/* Linkage into gctx's tctxs. */
rb_node(prof_tctx_t) tctx_link;
rb_node(prof_tdata_t) tdata_link;
+ /*
+ * Counter used to initialize prof_tctx_t's tctx_uid. No locking is
+ * necessary when incrementing this field, because only one thread ever
+ * does so.
+ */
+ uint64_t tctx_uid_next;
+
/*
* Hash of (prof_bt_t *)-->(prof_tctx_t *). Each thread tracks
* backtraces for which it has non-zero allocation/deallocation counters
/* Accessed via prof_active_[gs]et{_unlocked,}(). */
extern bool prof_active;
+/* Accessed via prof_gdump_[gs]et{_unlocked,}(). */
+extern bool prof_gdump_val;
+
/*
* Profile dump interval, measured in bytes allocated. Each arena triggers a
* profile dump when it reaches this threshold. The effect is that the
bool prof_thread_active_set(bool active);
bool prof_thread_active_init_get(void);
bool prof_thread_active_init_set(bool active_init);
+bool prof_gdump_get(void);
+bool prof_gdump_set(bool active);
void prof_boot0(void);
void prof_boot1(void);
bool prof_boot2(void);
#ifndef JEMALLOC_ENABLE_INLINE
bool prof_active_get_unlocked(void);
+bool prof_gdump_get_unlocked(void);
prof_tdata_t *prof_tdata_get(tsd_t *tsd, bool create);
bool prof_sample_accum_update(tsd_t *tsd, size_t usize, bool commit,
prof_tdata_t **tdata_out);
-prof_tctx_t *prof_alloc_prep(tsd_t *tsd, size_t usize, bool update);
+prof_tctx_t *prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active,
+ bool update);
prof_tctx_t *prof_tctx_get(const void *ptr);
-void prof_tctx_set(const void *ptr, prof_tctx_t *tctx);
+void prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx);
+void prof_tctx_reset(const void *ptr, size_t usize, const void *old_ptr,
+ prof_tctx_t *tctx);
void prof_malloc_sample_object(const void *ptr, size_t usize,
prof_tctx_t *tctx);
void prof_malloc(const void *ptr, size_t usize, prof_tctx_t *tctx);
void prof_realloc(tsd_t *tsd, const void *ptr, size_t usize,
- prof_tctx_t *tctx, bool updated, size_t old_usize, prof_tctx_t *old_tctx);
+ prof_tctx_t *tctx, bool prof_active, bool updated, const void *old_ptr,
+ size_t old_usize, prof_tctx_t *old_tctx);
void prof_free(tsd_t *tsd, const void *ptr, size_t usize);
#endif
return (prof_active);
}
+JEMALLOC_ALWAYS_INLINE bool
+prof_gdump_get_unlocked(void)
+{
+
+ /*
+ * No locking is used when reading prof_gdump_val in the fast path, so
+ * there are no guarantees regarding how long it will take for all
+ * threads to notice state changes.
+ */
+ return (prof_gdump_val);
+}
+
JEMALLOC_ALWAYS_INLINE prof_tdata_t *
prof_tdata_get(tsd_t *tsd, bool create)
{
JEMALLOC_ALWAYS_INLINE prof_tctx_t *
prof_tctx_get(const void *ptr)
{
- prof_tctx_t *ret;
- arena_chunk_t *chunk;
cassert(config_prof);
assert(ptr != NULL);
- chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
- if (chunk != ptr) {
- /* Region. */
- ret = arena_prof_tctx_get(ptr);
- } else
- ret = huge_prof_tctx_get(ptr);
+ return (arena_prof_tctx_get(ptr));
+}
- return (ret);
+JEMALLOC_ALWAYS_INLINE void
+prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx)
+{
+
+ cassert(config_prof);
+ assert(ptr != NULL);
+
+ arena_prof_tctx_set(ptr, usize, tctx);
}
JEMALLOC_ALWAYS_INLINE void
-prof_tctx_set(const void *ptr, prof_tctx_t *tctx)
+prof_tctx_reset(const void *ptr, size_t usize, const void *old_ptr,
+ prof_tctx_t *old_tctx)
{
- arena_chunk_t *chunk;
cassert(config_prof);
assert(ptr != NULL);
- chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
- if (chunk != ptr) {
- /* Region. */
- arena_prof_tctx_set(ptr, tctx);
- } else
- huge_prof_tctx_set(ptr, tctx);
+ arena_prof_tctx_reset(ptr, usize, old_ptr, old_tctx);
}
JEMALLOC_ALWAYS_INLINE bool
cassert(config_prof);
tdata = prof_tdata_get(tsd, true);
- if ((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)
+ if (unlikely((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX))
tdata = NULL;
if (tdata_out != NULL)
*tdata_out = tdata;
- if (tdata == NULL)
+ if (unlikely(tdata == NULL))
return (true);
- if (tdata->bytes_until_sample >= usize) {
+ if (likely(tdata->bytes_until_sample >= usize)) {
if (update)
tdata->bytes_until_sample -= usize;
return (true);
}
JEMALLOC_ALWAYS_INLINE prof_tctx_t *
-prof_alloc_prep(tsd_t *tsd, size_t usize, bool update)
+prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active, bool update)
{
prof_tctx_t *ret;
prof_tdata_t *tdata;
assert(usize == s2u(usize));
- if (!prof_active_get_unlocked() || likely(prof_sample_accum_update(tsd,
- usize, update, &tdata)))
+ if (!prof_active || likely(prof_sample_accum_update(tsd, usize, update,
+ &tdata)))
ret = (prof_tctx_t *)(uintptr_t)1U;
else {
bt_init(&bt, tdata->vec);
if (unlikely((uintptr_t)tctx > (uintptr_t)1U))
prof_malloc_sample_object(ptr, usize, tctx);
else
- prof_tctx_set(ptr, (prof_tctx_t *)(uintptr_t)1U);
+ prof_tctx_set(ptr, usize, (prof_tctx_t *)(uintptr_t)1U);
}
JEMALLOC_ALWAYS_INLINE void
prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
- bool updated, size_t old_usize, prof_tctx_t *old_tctx)
+ bool prof_active, bool updated, const void *old_ptr, size_t old_usize,
+ prof_tctx_t *old_tctx)
{
+ bool sampled, old_sampled;
cassert(config_prof);
assert(ptr != NULL || (uintptr_t)tctx <= (uintptr_t)1U);
- if (!updated && ptr != NULL) {
+ if (prof_active && !updated && ptr != NULL) {
assert(usize == isalloc(ptr, true));
if (prof_sample_accum_update(tsd, usize, true, NULL)) {
/*
- * Don't sample. The usize passed to PROF_ALLOC_PREP()
+ * Don't sample. The usize passed to prof_alloc_prep()
* was larger than what actually got allocated, so a
* backtrace was captured for this allocation, even
* though its actual usize was insufficient to cross the
}
}
- if (unlikely((uintptr_t)old_tctx > (uintptr_t)1U))
- prof_free_sampled_object(tsd, old_usize, old_tctx);
- if (unlikely((uintptr_t)tctx > (uintptr_t)1U))
+ sampled = ((uintptr_t)tctx > (uintptr_t)1U);
+ old_sampled = ((uintptr_t)old_tctx > (uintptr_t)1U);
+
+ if (unlikely(sampled))
prof_malloc_sample_object(ptr, usize, tctx);
else
- prof_tctx_set(ptr, (prof_tctx_t *)(uintptr_t)1U);
+ prof_tctx_reset(ptr, usize, old_ptr, old_tctx);
+
+ if (unlikely(old_sampled))
+ prof_free_sampled_object(tsd, old_usize, old_tctx);
}
JEMALLOC_ALWAYS_INLINE void