update sources to v12.1.4

[ceph.git] / ceph / src / os / bluestore / BlueStore.h
diff --git a/ceph/src/os/bluestore/BlueStore.h b/ceph/src/os/bluestore/BlueStore.h

index 1114e7b4f47cbfc2e4bf6a74eb6719cc971bf019..8c5eb4a02b4e5de2b3c7a3f00d49f2a4633194eb 100644 (file)
--- a/ceph/src/os/bluestore/BlueStore.h
+++ b/ceph/src/os/bluestore/BlueStore.h
@@ -263,7 +263,19 @@ public:
        buffer_map[b->offset].reset(b);
        if (b->is_writing()) {
         b->data.reassign_to_mempool(mempool::mempool_bluestore_writing);
-        writing.push_back(*b);
+        if (writing.empty() || writing.rbegin()->seq <= b->seq) {
+          writing.push_back(*b);
+        } else {
+          auto it = writing.begin();
+          while (it->seq < b->seq) {
+            ++it;
+          }
+
+          assert(it->seq >= b->seq);
+          // note that this will insert b before it
+          // hence the order is maintained
+          writing.insert(it, *b);
+        }
        } else {
         b->data.reassign_to_mempool(mempool::mempool_bluestore_cache_data);
         cache->_add_buffer(b, level, near);
@@ -499,7 +511,7 @@ public:
               get_blob().can_split_at(blob_offset);
      }
  
-    bool try_reuse_blob(uint32_t min_alloc_size,
+    bool can_reuse_blob(uint32_t min_alloc_size,
                         uint32_t target_blob_size,
                         uint32_t b_offset,
                         uint32_t *length0);
@@ -512,10 +524,10 @@ public:
  #endif
      }
  
-    const bluestore_blob_t& get_blob() const {
+    inline const bluestore_blob_t& get_blob() const {
        return blob;
      }
-    bluestore_blob_t& dirty_blob() {
+    inline bluestore_blob_t& dirty_blob() {
  #ifdef CACHE_BLOB_BL
        blob_bl.clear();
  #endif
@@ -1544,8 +1556,6 @@ public:
      IOContext ioc;
      bool had_ios = false;  ///< true if we submitted IOs before our kv txn
  
-    CollectionRef first_collection;  ///< first referenced collection
-
      uint64_t seq = 0;
      utime_t start;
      utime_t last_stamp;
@@ -1831,7 +1841,7 @@ private:
    interval_set<uint64_t> bluefs_extents;  ///< block extents owned by bluefs
    interval_set<uint64_t> bluefs_extents_reclaiming; ///< currently reclaiming
  
-  std::mutex deferred_lock;
+  std::mutex deferred_lock, deferred_submit_lock;
    std::atomic<uint64_t> deferred_seq = {0};
    deferred_osr_queue_t deferred_queue; ///< osr's with deferred io pending
    int deferred_queue_size = 0;         ///< num txc's queued across all osrs
@@ -1875,24 +1885,26 @@ private:
    size_t block_size_order = 0; ///< bits to shift to get block size
  
    uint64_t min_alloc_size = 0; ///< minimum allocation unit (power of 2)
-  std::atomic<int> deferred_batch_ops = {0}; ///< deferred batch size
-
    ///< bits for min_alloc_size
-  std::atomic<uint8_t> min_alloc_size_order = {0};
+  uint8_t min_alloc_size_order = 0;
    static_assert(std::numeric_limits<uint8_t>::max() >
                 std::numeric_limits<decltype(min_alloc_size)>::digits,
                 "not enough bits for min_alloc_size");
  
-  ///< size threshold for forced deferred writes
-  std::atomic<uint64_t> prefer_deferred_size = {0};
-
    ///< maximum allocation unit (power of 2)
    std::atomic<uint64_t> max_alloc_size = {0};
  
+  ///< number threshold for forced deferred writes
+  std::atomic<int> deferred_batch_ops = {0};
+
+  ///< size threshold for forced deferred writes
+  std::atomic<uint64_t> prefer_deferred_size = {0};
+
    ///< approx cost per io, in bytes
    std::atomic<uint64_t> throttle_cost_per_io = {0};
  
-  std::atomic<Compressor::CompressionMode> comp_mode = {Compressor::COMP_NONE}; ///< compression mode
+  std::atomic<Compressor::CompressionMode> comp_mode =
+    {Compressor::COMP_NONE}; ///< compression mode
    CompressorRef compressor;
    std::atomic<uint64_t> comp_min_blob_size = {0};
    std::atomic<uint64_t> comp_max_blob_size = {0};
@@ -1903,6 +1915,7 @@ private:
    uint64_t kv_throttle_costs = 0;
  
    // cache trim control
+  uint64_t cache_size = 0;      ///< total cache size
    float cache_meta_ratio = 0;   ///< cache ratio dedicated to metadata
    float cache_kv_ratio = 0;     ///< cache ratio dedicated to kv (e.g., rocksdb)
    float cache_data_ratio = 0;   ///< cache ratio dedicated to object data
@@ -1974,7 +1987,7 @@ private:
  
    int _open_super_meta();
  
-  void open_statfs();
+  void _open_statfs();
  
    int _reconcile_bluefs_freespace();
    int _balance_bluefs_freespace(PExtentVector *extents);
@@ -2022,12 +2035,8 @@ private:
  
    bluestore_deferred_op_t *_get_deferred_op(TransContext *txc, OnodeRef o);
    void _deferred_queue(TransContext *txc);
-  void deferred_try_submit() {
-    std::lock_guard<std::mutex> l(deferred_lock);
-    _deferred_try_submit();
-  }
-  void _deferred_try_submit();
-  void _deferred_submit(OpSequencer *osr);
+  void deferred_try_submit();
+  void _deferred_submit_unlock(OpSequencer *osr);
    void _deferred_aio_finish(OpSequencer *osr);
    int _deferred_replay();
  
@@ -2071,7 +2080,6 @@ private:
  
    void _apply_padding(uint64_t head_pad,
                       uint64_t tail_pad,
-                     bufferlist& bl,
                       bufferlist& padded);
  
    // -- ondisk version ---
@@ -2101,6 +2109,18 @@ public:
    bool allows_journal() override { return false; };
  
    bool is_rotational() override;
+  bool is_journal_rotational() override;
+
+  string get_default_device_class() override {
+    string device_class;
+    map<string, string> metadata;
+    collect_metadata(&metadata);
+    auto it = metadata.find("bluestore_bdev_type");
+    if (it != metadata.end()) {
+      device_class = it->second;
+    }
+    return device_class;
+  }
  
    static int get_block_device_fsid(CephContext* cct, const string& path,
                                    uuid_d *fsid);
@@ -2184,16 +2204,14 @@ public:
      uint64_t offset,
      size_t len,
      bufferlist& bl,
-    uint32_t op_flags = 0,
-    bool allow_eio = false) override;
+    uint32_t op_flags = 0) override;
    int read(
      CollectionHandle &c,
      const ghobject_t& oid,
      uint64_t offset,
      size_t len,
      bufferlist& bl,
-    uint32_t op_flags = 0,
-    bool allow_eio = false) override;
+    uint32_t op_flags = 0) override;
    int _do_read(
      Collection *c,
      OnodeRef o,
@@ -2338,8 +2356,8 @@ public:
  
      objectstore_perf_stat_t get_cur_stats() const {
        objectstore_perf_stat_t ret;
-      ret.os_commit_latency = os_commit_latency.avg();
-      ret.os_apply_latency = os_apply_latency.avg();
+      ret.os_commit_latency = os_commit_latency.current_avg();
+      ret.os_apply_latency = os_apply_latency.current_avg();
        return ret;
      }
  
@@ -2369,6 +2387,11 @@ public:
      RWLock::WLocker l(debug_read_error_lock);
      debug_mdata_error_objects.insert(o);
    }
+  void compact() override {
+    assert(db);
+    db->compact();
+  }
+  
  private:
    bool _debug_data_eio(const ghobject_t& o) {
      if (!cct->_conf->bluestore_debug_inject_read_err) {
@@ -2568,7 +2591,7 @@ private:
                    OnodeRef o,
                    uint64_t offset,
                    set<SharedBlob*> *maybe_unshared_blobs=0);
-  void _truncate(TransContext *txc,
+  int _truncate(TransContext *txc,
                 CollectionRef& c,
                 OnodeRef& o,
                 uint64_t offset);