]> git.proxmox.com Git - ceph.git/blobdiff - ceph/src/os/bluestore/BlueStore.cc
update sources to 12.2.8
[ceph.git] / ceph / src / os / bluestore / BlueStore.cc
index 6fdd11b170b2403af505c2cf68717d686314893f..47112cc70abad4933c8a5648a411416d808d09d1 100644 (file)
@@ -802,15 +802,15 @@ void BlueStore::Cache::trim(
   target_buffer = min(target_bytes - target_meta, target_buffer);
 
   if (current <= target_bytes) {
-    dout(10) << __func__
-            << " shard target " << pretty_si_t(target_bytes)
+    dout(30) << __func__
+            << " shard target " << byte_u_t(target_bytes)
             << " meta/data ratios " << target_meta_ratio
             << " + " << target_data_ratio << " ("
-            << pretty_si_t(target_meta) << " + "
-            << pretty_si_t(target_buffer) << "), "
-            << " current " << pretty_si_t(current) << " ("
-            << pretty_si_t(current_meta) << " + "
-            << pretty_si_t(current_buffer) << ")"
+            << byte_u_t(target_meta) << " + "
+            << byte_u_t(target_buffer) << "), "
+            << " current " << byte_u_t(current) << " ("
+            << byte_u_t(current_meta) << " + "
+            << byte_u_t(current_buffer) << ")"
             << dendl;
     return;
   }
@@ -831,17 +831,17 @@ void BlueStore::Cache::trim(
   uint64_t max_meta = current_meta - free_meta;
   uint64_t max_onodes = max_meta / bytes_per_onode;
 
-  dout(10) << __func__
-          << " shard target " << pretty_si_t(target_bytes)
+  dout(20) << __func__
+          << " shard target " << byte_u_t(target_bytes)
           << " ratio " << target_meta_ratio << " ("
-          << pretty_si_t(target_meta) << " + "
-          << pretty_si_t(target_buffer) << "), "
-          << " current " << pretty_si_t(current) << " ("
-          << pretty_si_t(current_meta) << " + "
-          << pretty_si_t(current_buffer) << "),"
-          << " need_to_free " << pretty_si_t(need_to_free) << " ("
-          << pretty_si_t(free_meta) << " + "
-          << pretty_si_t(free_buffer) << ")"
+          << byte_u_t(target_meta) << " + "
+          << byte_u_t(target_buffer) << "), "
+          << " current " << byte_u_t(current) << " ("
+          << byte_u_t(current_meta) << " + "
+          << byte_u_t(current_buffer) << "),"
+          << " need_to_free " << byte_u_t(need_to_free) << " ("
+          << byte_u_t(free_meta) << " + "
+          << byte_u_t(free_buffer) << ")"
           << " -> max " << max_onodes << " onodes + "
           << max_buffer << " buffer"
           << dendl;
@@ -1137,7 +1137,7 @@ void BlueStore::TwoQCache::_trim(uint64_t onode_max, uint64_t buffer_max)
     }
 
     if (evicted > 0) {
-      dout(20) << __func__ << " evicted " << prettybyte_t(evicted)
+      dout(20) << __func__ << " evicted " << byte_u_t(evicted)
                << " from warm_in list, done evicting warm_in buffers"
                << dendl;
     }
@@ -1163,7 +1163,7 @@ void BlueStore::TwoQCache::_trim(uint64_t onode_max, uint64_t buffer_max)
     }
 
     if (evicted > 0) {
-      dout(20) << __func__ << " evicted " << prettybyte_t(evicted)
+      dout(20) << __func__ << " evicted " << byte_u_t(evicted)
                << " from hot list, done evicting hot buffers"
                << dendl;
     }
@@ -1650,11 +1650,6 @@ BlueStore::SharedBlob::SharedBlob(uint64_t i, Collection *_coll)
 
 BlueStore::SharedBlob::~SharedBlob()
 {
-  if (get_cache()) {   // the dummy instances have a nullptr
-    std::lock_guard<std::recursive_mutex> l(get_cache()->lock);
-    bc._clear(get_cache());
-    get_cache()->rm_blob();
-  }
   if (loaded && persistent) {
     delete persistent; 
   }
@@ -1666,8 +1661,17 @@ void BlueStore::SharedBlob::put()
     ldout(coll->store->cct, 20) << __func__ << " " << this
                             << " removing self from set " << get_parent()
                             << dendl;
-    if (get_parent()) {
-      get_parent()->remove(this);
+  again:
+    auto coll_snap = coll;
+    if (coll_snap) {
+      std::lock_guard<std::recursive_mutex> l(coll_snap->cache->lock);
+      if (coll_snap != coll) {
+       goto again;
+      }
+      coll_snap->shared_blob_set.remove(this);
+
+      bc._clear(coll_snap->cache);
+      coll_snap->cache->rm_blob();
     }
     delete this;
   }
@@ -3729,6 +3733,23 @@ void BlueStore::_set_blob_size()
            << std::dec << dendl;
 }
 
+void BlueStore::_set_finisher_num()
+{
+  if (cct->_conf->bluestore_shard_finishers) {
+    if (cct->_conf->osd_op_num_shards) {
+      m_finisher_num = cct->_conf->osd_op_num_shards;
+    } else {
+      assert(bdev);
+      if (bdev->is_rotational()) {
+       m_finisher_num = cct->_conf->osd_op_num_shards_hdd;
+      } else {
+       m_finisher_num = cct->_conf->osd_op_num_shards_ssd;
+      }
+    }
+  }
+  assert(m_finisher_num != 0);
+}
+
 int BlueStore::_set_cache_sizes()
 {
   assert(bdev);
@@ -3884,11 +3905,11 @@ void BlueStore::_init_logger()
   b.add_u64_counter(l_bluestore_compress_rejected_count, "compress_rejected_count",
     "Sum for compress ops rejected due to low net gain of space");
   b.add_u64_counter(l_bluestore_write_pad_bytes, "write_pad_bytes",
-    "Sum for write-op padded bytes");
+                   "Sum for write-op padded bytes", NULL, 0, unit_t(BYTES));
   b.add_u64_counter(l_bluestore_deferred_write_ops, "deferred_write_ops",
                    "Sum for deferred write op");
   b.add_u64_counter(l_bluestore_deferred_write_bytes, "deferred_write_bytes",
-                   "Sum for deferred write bytes", "def");
+                   "Sum for deferred write bytes", "def", 0, unit_t(BYTES));
   b.add_u64_counter(l_bluestore_write_penalty_read_ops, "write_penalty_read_ops",
                    "Sum for write penalty read ops");
   b.add_u64(l_bluestore_allocated, "bluestore_allocated",
@@ -3920,22 +3941,22 @@ void BlueStore::_init_logger()
   b.add_u64(l_bluestore_buffers, "bluestore_buffers",
            "Number of buffers in cache");
   b.add_u64(l_bluestore_buffer_bytes, "bluestore_buffer_bytes",
-           "Number of buffer bytes in cache");
+           "Number of buffer bytes in cache", NULL, 0, unit_t(BYTES));
   b.add_u64(l_bluestore_buffer_hit_bytes, "bluestore_buffer_hit_bytes",
-    "Sum for bytes of read hit in the cache");
+           "Sum for bytes of read hit in the cache", NULL, 0, unit_t(BYTES));
   b.add_u64(l_bluestore_buffer_miss_bytes, "bluestore_buffer_miss_bytes",
-    "Sum for bytes of read missed in the cache");
+           "Sum for bytes of read missed in the cache", NULL, 0, unit_t(BYTES));
 
   b.add_u64_counter(l_bluestore_write_big, "bluestore_write_big",
                    "Large aligned writes into fresh blobs");
   b.add_u64_counter(l_bluestore_write_big_bytes, "bluestore_write_big_bytes",
-                   "Large aligned writes into fresh blobs (bytes)");
+                   "Large aligned writes into fresh blobs (bytes)", NULL, 0, unit_t(BYTES));
   b.add_u64_counter(l_bluestore_write_big_blobs, "bluestore_write_big_blobs",
                    "Large aligned writes into fresh blobs (blobs)");
   b.add_u64_counter(l_bluestore_write_small, "bluestore_write_small",
                    "Small writes into existing or sparse small blobs");
   b.add_u64_counter(l_bluestore_write_small_bytes, "bluestore_write_small_bytes",
-                   "Small writes into existing or sparse small blobs (bytes)");
+                   "Small writes into existing or sparse small blobs (bytes)", NULL, 0, unit_t(BYTES));
   b.add_u64_counter(l_bluestore_write_small_unused,
                    "bluestore_write_small_unused",
                    "Small writes into unused portion of existing blob");
@@ -4335,7 +4356,7 @@ int BlueStore::_open_alloc()
     bytes += length;
   }
   fm->enumerate_reset();
-  dout(1) << __func__ << " loaded " << pretty_si_t(bytes)
+  dout(1) << __func__ << " loaded " << byte_u_t(bytes)
          << " in " << num << " extents"
          << dendl;
 
@@ -4872,9 +4893,9 @@ int BlueStore::_balance_bluefs_freespace(PExtentVector *extents)
   float bluefs_ratio = (float)bluefs_free / (float)total_free;
 
   dout(10) << __func__
-          << " bluefs " << pretty_si_t(bluefs_free)
+          << " bluefs " << byte_u_t(bluefs_free)
           << " free (" << bluefs_free_ratio
-          << ") bluestore " << pretty_si_t(my_free)
+          << ") bluestore " << byte_u_t(my_free)
           << " free (" << my_free_ratio
           << "), bluefs_ratio " << bluefs_ratio
           << dendl;
@@ -4885,14 +4906,14 @@ int BlueStore::_balance_bluefs_freespace(PExtentVector *extents)
     gift = cct->_conf->bluestore_bluefs_gift_ratio * total_free;
     dout(10) << __func__ << " bluefs_ratio " << bluefs_ratio
             << " < min_ratio " << cct->_conf->bluestore_bluefs_min_ratio
-            << ", should gift " << pretty_si_t(gift) << dendl;
+            << ", should gift " << byte_u_t(gift) << dendl;
   } else if (bluefs_ratio > cct->_conf->bluestore_bluefs_max_ratio) {
     reclaim = cct->_conf->bluestore_bluefs_reclaim_ratio * total_free;
     if (bluefs_total - reclaim < cct->_conf->bluestore_bluefs_min)
       reclaim = bluefs_total - cct->_conf->bluestore_bluefs_min;
     dout(10) << __func__ << " bluefs_ratio " << bluefs_ratio
             << " > max_ratio " << cct->_conf->bluestore_bluefs_max_ratio
-            << ", should reclaim " << pretty_si_t(reclaim) << dendl;
+            << ", should reclaim " << byte_u_t(reclaim) << dendl;
   }
 
   // don't take over too much of the freespace
@@ -4902,7 +4923,7 @@ int BlueStore::_balance_bluefs_freespace(PExtentVector *extents)
     uint64_t g = cct->_conf->bluestore_bluefs_min - bluefs_total;
     dout(10) << __func__ << " bluefs_total " << bluefs_total
             << " < min " << cct->_conf->bluestore_bluefs_min
-            << ", should gift " << pretty_si_t(g) << dendl;
+            << ", should gift " << byte_u_t(g) << dendl;
     if (g > gift)
       gift = g;
     reclaim = 0;
@@ -4913,7 +4934,7 @@ int BlueStore::_balance_bluefs_freespace(PExtentVector *extents)
     uint64_t g = min_free - bluefs_free;
     dout(10) << __func__ << " bluefs_free " << bluefs_total
             << " < min " << min_free
-            << ", should gift " << pretty_si_t(g) << dendl;
+            << ", should gift " << byte_u_t(g) << dendl;
     if (g > gift)
       gift = g;
     reclaim = 0;
@@ -4926,7 +4947,7 @@ int BlueStore::_balance_bluefs_freespace(PExtentVector *extents)
     // hard cap to fit into 32 bits
     gift = MIN(gift, 1ull<<31);
     dout(10) << __func__ << " gifting " << gift
-            << " (" << pretty_si_t(gift) << ")" << dendl;
+            << " (" << byte_u_t(gift) << ")" << dendl;
 
     // fixme: just do one allocation to start...
     int r = alloc->reserve(gift);
@@ -4968,7 +4989,7 @@ int BlueStore::_balance_bluefs_freespace(PExtentVector *extents)
     // hard cap to fit into 32 bits
     reclaim = MIN(reclaim, 1ull<<31);
     dout(10) << __func__ << " reclaiming " << reclaim
-            << " (" << pretty_si_t(reclaim) << ")" << dendl;
+            << " (" << byte_u_t(reclaim) << ")" << dendl;
 
     while (reclaim > 0) {
       // NOTE: this will block and do IO.
@@ -5120,7 +5141,7 @@ int BlueStore::_setup_block_symlink_or_file(
          }
        }
        dout(1) << __func__ << " resized " << name << " file to "
-               << pretty_si_t(size) << "B" << dendl;
+               << byte_u_t(size) << dendl;
       }
       VOID_TEMP_FAILURE_RETRY(::close(fd));
     } else {
@@ -7720,6 +7741,8 @@ int BlueStore::_open_super_meta()
   _set_compression();
   _set_blob_size();
 
+  _set_finisher_num();
+
   return 0;
 }
 
@@ -7911,8 +7934,6 @@ void BlueStore::_txc_state_proc(TransContext *txc)
       }
       return;
     case TransContext::STATE_KV_SUBMITTED:
-      txc->log_state_latency(logger, l_bluestore_state_kv_committing_lat);
-      txc->state = TransContext::STATE_KV_DONE;
       _txc_committed_kv(txc);
       // ** fall-thru **
 
@@ -8163,9 +8184,14 @@ void BlueStore::_txc_committed_kv(TransContext *txc)
     txc->onreadable = NULL;
   }
 
-  if (!txc->oncommits.empty()) {
-    finishers[n]->queue(txc->oncommits);
+  {
+    std::lock_guard<std::mutex> l(txc->osr->qlock);
+    txc->state = TransContext::STATE_KV_DONE;
+    if (!txc->oncommits.empty()) {
+      finishers[n]->queue(txc->oncommits);
+    }
   }
+  txc->log_state_latency(logger, l_bluestore_state_kv_committing_lat);
 }
 
 void BlueStore::_txc_finish(TransContext *txc)
@@ -8348,21 +8374,6 @@ void BlueStore::_kv_start()
 {
   dout(10) << __func__ << dendl;
 
-  if (cct->_conf->bluestore_shard_finishers) {
-    if (cct->_conf->osd_op_num_shards) {
-      m_finisher_num = cct->_conf->osd_op_num_shards;
-    } else {
-      assert(bdev);
-      if (bdev->is_rotational()) {
-        m_finisher_num = cct->_conf->osd_op_num_shards_hdd;
-      } else {
-        m_finisher_num = cct->_conf->osd_op_num_shards_ssd;
-      }
-    }
-  }
-
-  assert(m_finisher_num != 0);
-
   for (int i = 0; i < m_finisher_num; ++i) {
     ostringstream oss;
     oss << "finisher-" << i;
@@ -10413,6 +10424,7 @@ int BlueStore::_do_gc(
 {
   auto& extents_to_collect = gc.get_extents_to_collect();
 
+  bool dirty_range_updated = false;
   WriteContext wctx_gc;
   wctx_gc.fork(wctx); // make a clone for garbage collection
 
@@ -10429,12 +10441,17 @@ int BlueStore::_do_gc(
 
     if (*dirty_start > it->offset) {
       *dirty_start = it->offset;
+      dirty_range_updated = true;
     }
 
     if (*dirty_end < it->offset + it->length) {
       *dirty_end = it->offset + it->length;
+      dirty_range_updated = true;
     }
   }
+  if (dirty_range_updated) {
+    o->extent_map.fault_range(db, *dirty_start, *dirty_end);
+  }
 
   dout(30) << __func__ << " alloc write" << dendl;
   int r = _do_alloc_write(txc, c, o, &wctx_gc);
@@ -10516,9 +10533,10 @@ int BlueStore::_do_write(
              << dendl;
         goto out;
       }
+      dout(20)<<__func__<<" gc range is " << std::hex << dirty_start
+             << "~" << dirty_end - dirty_start << std::dec << dendl;
     }
   }
-
   o->extent_map.compress_extent_map(dirty_start, dirty_end - dirty_start);
   o->extent_map.dirty_range(dirty_start, dirty_end - dirty_start);