Imported Upstream version 1.9.0+dfsg1

[rustc.git] / src / jemalloc / include / jemalloc / internal / prof.h
diff --git a/src/jemalloc/include/jemalloc/internal/prof.h b/src/jemalloc/include/jemalloc/internal/prof.h

index c8014717ece797d7e864ef9da27b2e08c25a979c..a25502a9f3989aad83203f241aefc653e8d0d4a2 100644 (file)
--- a/src/jemalloc/include/jemalloc/internal/prof.h
+++ b/src/jemalloc/include/jemalloc/internal/prof.h
@@ -89,12 +89,34 @@ struct prof_tctx_s {
         /* Thread data for thread that performed the allocation. */
         prof_tdata_t            *tdata;
  
+       /*
+        * Copy of tdata->thr_{uid,discrim}, necessary because tdata may be
+        * defunct during teardown.
+        */
+       uint64_t                thr_uid;
+       uint64_t                thr_discrim;
+
         /* Profiling counters, protected by tdata->lock. */
         prof_cnt_t              cnts;
  
         /* Associated global context. */
         prof_gctx_t             *gctx;
  
+       /*
+        * UID that distinguishes multiple tctx's created by the same thread,
+        * but coexisting in gctx->tctxs.  There are two ways that such
+        * coexistence can occur:
+        * - A dumper thread can cause a tctx to be retained in the purgatory
+        *   state.
+        * - Although a single "producer" thread must create all tctx's which
+        *   share the same thr_uid, multiple "consumers" can each concurrently
+        *   execute portions of prof_tctx_destroy().  prof_tctx_destroy() only
+        *   gets called once each time cnts.cur{objs,bytes} drop to 0, but this
+        *   threshold can be hit again before the first consumer finishes
+        *   executing prof_tctx_destroy().
+        */
+       uint64_t                tctx_uid;
+
         /* Linkage into gctx's tctxs. */
         rb_node(prof_tctx_t)    tctx_link;
  
@@ -171,6 +193,13 @@ struct prof_tdata_s {
  
         rb_node(prof_tdata_t)   tdata_link;
  
+       /*
+        * Counter used to initialize prof_tctx_t's tctx_uid.  No locking is
+        * necessary when incrementing this field, because only one thread ever
+        * does so.
+        */
+       uint64_t                tctx_uid_next;
+
         /*
          * Hash of (prof_bt_t *)-->(prof_tctx_t *).  Each thread tracks
          * backtraces for which it has non-zero allocation/deallocation counters
@@ -233,6 +262,9 @@ extern char opt_prof_prefix[
  /* Accessed via prof_active_[gs]et{_unlocked,}(). */
  extern bool    prof_active;
  
+/* Accessed via prof_gdump_[gs]et{_unlocked,}(). */
+extern bool    prof_gdump_val;
+
  /*
   * Profile dump interval, measured in bytes allocated.  Each arena triggers a
   * profile dump when it reaches this threshold.  The effect is that the
@@ -279,6 +311,8 @@ bool        prof_thread_active_get(void);
  bool   prof_thread_active_set(bool active);
  bool   prof_thread_active_init_get(void);
  bool   prof_thread_active_init_set(bool active_init);
+bool   prof_gdump_get(void);
+bool   prof_gdump_set(bool active);
  void   prof_boot0(void);
  void   prof_boot1(void);
  bool   prof_boot2(void);
@@ -293,17 +327,22 @@ void      prof_sample_threshold_update(prof_tdata_t *tdata);
  
  #ifndef JEMALLOC_ENABLE_INLINE
  bool   prof_active_get_unlocked(void);
+bool   prof_gdump_get_unlocked(void);
  prof_tdata_t   *prof_tdata_get(tsd_t *tsd, bool create);
  bool   prof_sample_accum_update(tsd_t *tsd, size_t usize, bool commit,
      prof_tdata_t **tdata_out);
-prof_tctx_t    *prof_alloc_prep(tsd_t *tsd, size_t usize, bool update);
+prof_tctx_t    *prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active,
+    bool update);
  prof_tctx_t    *prof_tctx_get(const void *ptr);
-void   prof_tctx_set(const void *ptr, prof_tctx_t *tctx);
+void   prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx);
+void   prof_tctx_reset(const void *ptr, size_t usize, const void *old_ptr,
+    prof_tctx_t *tctx);
  void   prof_malloc_sample_object(const void *ptr, size_t usize,
      prof_tctx_t *tctx);
  void   prof_malloc(const void *ptr, size_t usize, prof_tctx_t *tctx);
  void   prof_realloc(tsd_t *tsd, const void *ptr, size_t usize,
-    prof_tctx_t *tctx, bool updated, size_t old_usize, prof_tctx_t *old_tctx);
+    prof_tctx_t *tctx, bool prof_active, bool updated, const void *old_ptr,
+    size_t old_usize, prof_tctx_t *old_tctx);
  void   prof_free(tsd_t *tsd, const void *ptr, size_t usize);
  #endif
  
@@ -321,6 +360,18 @@ prof_active_get_unlocked(void)
         return (prof_active);
  }
  
+JEMALLOC_ALWAYS_INLINE bool
+prof_gdump_get_unlocked(void)
+{
+
+       /*
+        * No locking is used when reading prof_gdump_val in the fast path, so
+        * there are no guarantees regarding how long it will take for all
+        * threads to notice state changes.
+        */
+       return (prof_gdump_val);
+}
+
  JEMALLOC_ALWAYS_INLINE prof_tdata_t *
  prof_tdata_get(tsd_t *tsd, bool create)
  {
@@ -348,36 +399,32 @@ prof_tdata_get(tsd_t *tsd, bool create)
  JEMALLOC_ALWAYS_INLINE prof_tctx_t *
  prof_tctx_get(const void *ptr)
  {
-       prof_tctx_t *ret;
-       arena_chunk_t *chunk;
  
         cassert(config_prof);
         assert(ptr != NULL);
  
-       chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
-       if (chunk != ptr) {
-               /* Region. */
-               ret = arena_prof_tctx_get(ptr);
-       } else
-               ret = huge_prof_tctx_get(ptr);
+       return (arena_prof_tctx_get(ptr));
+}
  
-       return (ret);
+JEMALLOC_ALWAYS_INLINE void
+prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx)
+{
+
+       cassert(config_prof);
+       assert(ptr != NULL);
+
+       arena_prof_tctx_set(ptr, usize, tctx);
  }
  
  JEMALLOC_ALWAYS_INLINE void
-prof_tctx_set(const void *ptr, prof_tctx_t *tctx)
+prof_tctx_reset(const void *ptr, size_t usize, const void *old_ptr,
+    prof_tctx_t *old_tctx)
  {
-       arena_chunk_t *chunk;
  
         cassert(config_prof);
         assert(ptr != NULL);
  
-       chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
-       if (chunk != ptr) {
-               /* Region. */
-               arena_prof_tctx_set(ptr, tctx);
-       } else
-               huge_prof_tctx_set(ptr, tctx);
+       arena_prof_tctx_reset(ptr, usize, old_ptr, old_tctx);
  }
  
  JEMALLOC_ALWAYS_INLINE bool
@@ -389,16 +436,16 @@ prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
         cassert(config_prof);
  
         tdata = prof_tdata_get(tsd, true);
-       if ((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)
+       if (unlikely((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX))
                 tdata = NULL;
  
         if (tdata_out != NULL)
                 *tdata_out = tdata;
  
-       if (tdata == NULL)
+       if (unlikely(tdata == NULL))
                 return (true);
  
-       if (tdata->bytes_until_sample >= usize) {
+       if (likely(tdata->bytes_until_sample >= usize)) {
                 if (update)
                         tdata->bytes_until_sample -= usize;
                 return (true);
@@ -411,7 +458,7 @@ prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
  }
  
  JEMALLOC_ALWAYS_INLINE prof_tctx_t *
-prof_alloc_prep(tsd_t *tsd, size_t usize, bool update)
+prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active, bool update)
  {
         prof_tctx_t *ret;
         prof_tdata_t *tdata;
@@ -419,8 +466,8 @@ prof_alloc_prep(tsd_t *tsd, size_t usize, bool update)
  
         assert(usize == s2u(usize));
  
-       if (!prof_active_get_unlocked() || likely(prof_sample_accum_update(tsd,
-           usize, update, &tdata)))
+       if (!prof_active || likely(prof_sample_accum_update(tsd, usize, update,
+           &tdata)))
                 ret = (prof_tctx_t *)(uintptr_t)1U;
         else {
                 bt_init(&bt, tdata->vec);
@@ -442,22 +489,24 @@ prof_malloc(const void *ptr, size_t usize, prof_tctx_t *tctx)
         if (unlikely((uintptr_t)tctx > (uintptr_t)1U))
                 prof_malloc_sample_object(ptr, usize, tctx);
         else
-               prof_tctx_set(ptr, (prof_tctx_t *)(uintptr_t)1U);
+               prof_tctx_set(ptr, usize, (prof_tctx_t *)(uintptr_t)1U);
  }
  
  JEMALLOC_ALWAYS_INLINE void
  prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
-    bool updated, size_t old_usize, prof_tctx_t *old_tctx)
+    bool prof_active, bool updated, const void *old_ptr, size_t old_usize,
+    prof_tctx_t *old_tctx)
  {
+       bool sampled, old_sampled;
  
         cassert(config_prof);
         assert(ptr != NULL || (uintptr_t)tctx <= (uintptr_t)1U);
  
-       if (!updated && ptr != NULL) {
+       if (prof_active && !updated && ptr != NULL) {
                 assert(usize == isalloc(ptr, true));
                 if (prof_sample_accum_update(tsd, usize, true, NULL)) {
                         /*
-                        * Don't sample.  The usize passed to PROF_ALLOC_PREP()
+                        * Don't sample.  The usize passed to prof_alloc_prep()
                          * was larger than what actually got allocated, so a
                          * backtrace was captured for this allocation, even
                          * though its actual usize was insufficient to cross the
@@ -467,12 +516,16 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
                 }
         }
  
-       if (unlikely((uintptr_t)old_tctx > (uintptr_t)1U))
-               prof_free_sampled_object(tsd, old_usize, old_tctx);
-       if (unlikely((uintptr_t)tctx > (uintptr_t)1U))
+       sampled = ((uintptr_t)tctx > (uintptr_t)1U);
+       old_sampled = ((uintptr_t)old_tctx > (uintptr_t)1U);
+
+       if (unlikely(sampled))
                 prof_malloc_sample_object(ptr, usize, tctx);
         else
-               prof_tctx_set(ptr, (prof_tctx_t *)(uintptr_t)1U);
+               prof_tctx_reset(ptr, usize, old_ptr, old_tctx);
+
+       if (unlikely(old_sampled))
+               prof_free_sampled_object(tsd, old_usize, old_tctx);
  }
  
  JEMALLOC_ALWAYS_INLINE void