]> git.proxmox.com Git - ceph.git/blobdiff - ceph/src/os/bluestore/BlueFS.h
import 15.2.0 Octopus source
[ceph.git] / ceph / src / os / bluestore / BlueFS.h
index fb89c213d2ad910968345777e9e3b2bfea79000c..8045f39c44963fa4e343195f32cdbf5c08e4a619 100644 (file)
@@ -7,13 +7,15 @@
 #include <mutex>
 
 #include "bluefs_types.h"
-#include "common/RefCountedObj.h"
 #include "BlockDevice.h"
 
-#include "boost/intrusive/list.hpp"
-#include <boost/intrusive_ptr.hpp>
+#include "common/RefCountedObj.h"
+#include "common/ceph_context.h"
+#include "global/global_context.h"
+#include "include/common_fwd.h"
 
-class PerfCounters;
+#include "boost/intrusive/list.hpp"
+#include "boost/dynamic_bitset.hpp"
 
 class Allocator;
 
@@ -69,8 +71,27 @@ public:
    * @params
    * alloc_size - allocation unit size to check
    */
-  virtual size_t available_freespace(uint64_t alloc_size) = 0;
+  virtual uint64_t available_freespace(uint64_t alloc_size) = 0;
+};
+
+class BlueFSVolumeSelector {
+public:
+  typedef std::vector<std::pair<std::string, uint64_t>> paths;
+
+  virtual ~BlueFSVolumeSelector() {
+  }
+  virtual void* get_hint_by_device(uint8_t dev) const = 0;
+  virtual void* get_hint_by_dir(const string& dirname) const = 0;
+
+  virtual void add_usage(void* file_hint, const bluefs_fnode_t& fnode) = 0;
+  virtual void sub_usage(void* file_hint, const bluefs_fnode_t& fnode) = 0;
+  virtual void add_usage(void* file_hint, uint64_t fsize) = 0;
+  virtual void sub_usage(void* file_hint, uint64_t fsize) = 0;
+  virtual uint8_t select_prefer_bdev(void* hint) = 0;
+  virtual void get_paths(const std::string& base, paths& res) const = 0;
+  virtual void dump(ostream& sout) = 0;
 };
+class BlueFS;
 
 class BlueFS {
 public:
@@ -101,15 +122,20 @@ public:
     std::atomic_int num_readers, num_writers;
     std::atomic_int num_reading;
 
+    void* vselector_hint = nullptr;
+
+  private:
+    FRIEND_MAKE_REF(File);
     File()
-      : RefCountedObject(NULL, 0),
+      :
        refs(0),
        dirty_seq(0),
        locked(false),
        deleted(false),
        num_readers(0),
        num_writers(0),
-       num_reading(0)
+       num_reading(0),
+        vselector_hint(nullptr)
       {}
     ~File() override {
       ceph_assert(num_readers.load() == 0);
@@ -117,15 +143,8 @@ public:
       ceph_assert(num_reading.load() == 0);
       ceph_assert(!locked);
     }
-
-    friend void intrusive_ptr_add_ref(File *f) {
-      f->get();
-    }
-    friend void intrusive_ptr_release(File *f) {
-      f->put();
-    }
   };
-  typedef boost::intrusive_ptr<File> FileRef;
+  using FileRef = ceph::ref_t<File>;
 
   typedef boost::intrusive::list<
       File,
@@ -139,22 +158,17 @@ public:
 
     mempool::bluefs::map<string,FileRef> file_map;
 
-    Dir() : RefCountedObject(NULL, 0) {}
-
-    friend void intrusive_ptr_add_ref(Dir *d) {
-      d->get();
-    }
-    friend void intrusive_ptr_release(Dir *d) {
-      d->put();
-    }
+  private:
+    FRIEND_MAKE_REF(Dir);
+    Dir() = default;
   };
-  typedef boost::intrusive_ptr<Dir> DirRef;
+  using DirRef = ceph::ref_t<Dir>;
 
   struct FileWriter {
     MEMPOOL_CLASS_HELPERS();
 
     FileRef file;
-    uint64_t pos;           ///< start offset for buffer
+    uint64_t pos = 0;       ///< start offset for buffer
     bufferlist buffer;      ///< new data to write (at end of file)
     bufferlist tail_block;  ///< existing partial block at end of file, if any
     bufferlist::page_aligned_appender buffer_appender;  //< for const char* only
@@ -166,14 +180,13 @@ public:
     std::array<bool, MAX_BDEV> dirty_devs;
 
     FileWriter(FileRef f)
-      : file(f),
-       pos(0),
+      : file(std::move(f)),
        buffer_appender(buffer.get_page_aligned_appender(
                          g_conf()->bluefs_alloc_size / CEPH_PAGE_SIZE)) {
       ++file->num_writers;
       iocv.fill(nullptr);
       dirty_devs.fill(false);
-      if (f->fnode.ino == 1) {
+      if (file->fnode.ino == 1) {
        write_hint = WRITE_LIFE_MEDIUM;
       }
     }
@@ -203,20 +216,18 @@ public:
   struct FileReaderBuffer {
     MEMPOOL_CLASS_HELPERS();
 
-    uint64_t bl_off;        ///< prefetch buffer logical offset
+    uint64_t bl_off = 0;    ///< prefetch buffer logical offset
     bufferlist bl;          ///< prefetch buffer
-    uint64_t pos;           ///< current logical offset
+    uint64_t pos = 0;       ///< current logical offset
     uint64_t max_prefetch;  ///< max allowed prefetch
 
     explicit FileReaderBuffer(uint64_t mpf)
-      : bl_off(0),
-       pos(0),
-       max_prefetch(mpf) {}
+      : max_prefetch(mpf) {}
 
-    uint64_t get_buf_end() {
+    uint64_t get_buf_end() const {
       return bl_off + bl.length();
     }
-    uint64_t get_buf_remaining(uint64_t p) {
+    uint64_t get_buf_remaining(uint64_t p) const {
       if (p >= bl_off && p < bl_off + bl.length())
        return bl_off + bl.length() - p;
       return 0;
@@ -259,7 +270,7 @@ public:
     MEMPOOL_CLASS_HELPERS();
 
     FileRef file;
-    explicit FileLock(FileRef f) : file(f) {}
+    explicit FileLock(FileRef f) : file(std::move(f)) {}
   };
 
 private:
@@ -308,10 +319,12 @@ private:
   vector<Allocator*> alloc;                   ///< allocators for bdevs
   vector<uint64_t> alloc_size;                ///< alloc size for each device
   vector<interval_set<uint64_t>> pending_release; ///< extents to release
+  vector<interval_set<uint64_t>> block_unused_too_granular;
 
   BlockDevice::aio_callback_t discard_cb[3]; //discard callbacks for each dev
 
   BlueFSDeviceExpander* slow_dev_expander = nullptr;
+  std::unique_ptr<BlueFSVolumeSelector> vselector;
 
   class SocketHook;
   SocketHook* asok_hook = nullptr;
@@ -362,11 +375,12 @@ private:
   void _compact_log_sync();
   void _compact_log_async(std::unique_lock<ceph::mutex>& l);
 
-  void _rewrite_log_sync(bool allocate_with_fallback,
-                        int super_dev,
-                        int log_dev,
-                        int new_log_dev,
-                        int flags);
+  void _rewrite_log_and_layout_sync(bool allocate_with_fallback,
+                                   int super_dev,
+                                   int log_dev,
+                                   int new_log_dev,
+                                   int flags,
+                                   std::optional<bluefs_layout_t> layout);
 
   //void _aio_finish(void *priv);
 
@@ -387,13 +401,22 @@ private:
   int _read_random(
     FileReader *h,   ///< [in] read from here
     uint64_t offset, ///< [in] offset
-    size_t len,      ///< [in] this many bytes
+    uint64_t len,    ///< [in] this many bytes
     char *out);      ///< [out] optional: or copy it here
 
   void _invalidate_cache(FileRef f, uint64_t offset, uint64_t length);
 
   int _open_super();
   int _write_super(int dev);
+  int _check_new_allocations(const bluefs_fnode_t& fnode,
+    size_t dev_count,
+    boost::dynamic_bitset<uint64_t>* owned_blocks,
+    boost::dynamic_bitset<uint64_t>* used_blocks);
+  int _verify_alloc_granularity(
+    __u8 id, uint64_t offset, uint64_t length,
+    const char *op);
+  int _adjust_granularity(
+    __u8 id, uint64_t *offset, uint64_t *length, bool alloc);
   int _replay(bool noop, bool to_stdout = false); ///< replay journal
 
   FileWriter *_create_writer(FileRef f);
@@ -415,10 +438,11 @@ public:
   ~BlueFS();
 
   // the super is always stored on bdev 0
-  int mkfs(uuid_d osd_uuid);
+  int mkfs(uuid_d osd_uuid, const bluefs_layout_t& layout);
   int mount();
+  int maybe_verify_layout(const bluefs_layout_t& layout) const;
   void umount();
-  int prepare_new_device(int id);
+  int prepare_new_device(int id, const bluefs_layout_t& layout);
   
   int log_dump();
 
@@ -432,11 +456,13 @@ public:
   int device_migrate_to_new(
     CephContext *cct,
     const set<int>& devs_source,
-    int dev_target);
+    int dev_target,
+    const bluefs_layout_t& layout);
   int device_migrate_to_existing(
     CephContext *cct,
     const set<int>& devs_source,
-    int dev_target);
+    int dev_target,
+    const bluefs_layout_t& layout);
 
   uint64_t get_used();
   uint64_t get_total(unsigned id);
@@ -483,7 +509,6 @@ public:
   int lock_file(const string& dirname, const string& filename, FileLock **p);
   int unlock_file(FileLock *l);
 
-  void flush_log();
   void compact_log();
 
   /// sync any uncommitted state to disk
@@ -492,6 +517,17 @@ public:
   void set_slow_device_expander(BlueFSDeviceExpander* a) {
     slow_dev_expander = a;
   }
+  void set_volume_selector(BlueFSVolumeSelector* s) {
+    vselector.reset(s);
+  }
+  void dump_volume_selector(ostream& sout) {
+    vselector->dump(sout);
+  }
+  void get_vselector_paths(const std::string& base,
+                           BlueFSVolumeSelector::paths& res) const {
+    return vselector->get_paths(base, res);
+  }
+
   int add_block_device(unsigned bdev, const string& path, bool trim,
                       bool shared_with_bluestore=false);
   bool bdev_support_label(unsigned id);
@@ -551,6 +587,48 @@ public:
     return _truncate(h, offset);
   }
 
+  /// test purpose methods
+  void debug_inject_duplicate_gift(unsigned bdev, uint64_t offset, uint64_t len);
+  const PerfCounters* get_perf_counters() const {
+    return logger;
+  }
+};
+
+class OriginalVolumeSelector : public BlueFSVolumeSelector {
+  uint64_t wal_total;
+  uint64_t db_total;
+  uint64_t slow_total;
+
+public:
+  OriginalVolumeSelector(
+    uint64_t _wal_total,
+    uint64_t _db_total,
+    uint64_t _slow_total)
+    : wal_total(_wal_total), db_total(_db_total), slow_total(_slow_total) {}
+
+  void* get_hint_by_device(uint8_t dev) const override;
+  void* get_hint_by_dir(const string& dirname) const override;
+
+  void add_usage(void* hint, const bluefs_fnode_t& fnode) override {
+    // do nothing
+    return;
+  }
+  void sub_usage(void* hint, const bluefs_fnode_t& fnode) override {
+    // do nothing
+    return;
+  }
+  void add_usage(void* hint, uint64_t fsize) override {
+    // do nothing
+    return;
+  }
+  void sub_usage(void* hint, uint64_t fsize) override {
+    // do nothing
+    return;
+  }
+
+  uint8_t select_prefer_bdev(void* hint) override;
+  void get_paths(const std::string& base, paths& res) const override;
+  void dump(ostream& sout) override;
 };
 
 #endif