#include <mutex>
#include "bluefs_types.h"
-#include "common/RefCountedObj.h"
#include "BlockDevice.h"
-#include "boost/intrusive/list.hpp"
-#include <boost/intrusive_ptr.hpp>
+#include "common/RefCountedObj.h"
+#include "common/ceph_context.h"
+#include "global/global_context.h"
+#include "include/common_fwd.h"
-class PerfCounters;
+#include "boost/intrusive/list.hpp"
+#include "boost/dynamic_bitset.hpp"
class Allocator;
* @params
* alloc_size - allocation unit size to check
*/
- virtual size_t available_freespace(uint64_t alloc_size) = 0;
+ virtual uint64_t available_freespace(uint64_t alloc_size) = 0;
+};
+
+class BlueFSVolumeSelector {
+public:
+ typedef std::vector<std::pair<std::string, uint64_t>> paths;
+
+ virtual ~BlueFSVolumeSelector() {
+ }
+ virtual void* get_hint_by_device(uint8_t dev) const = 0;
+ virtual void* get_hint_by_dir(const string& dirname) const = 0;
+
+ virtual void add_usage(void* file_hint, const bluefs_fnode_t& fnode) = 0;
+ virtual void sub_usage(void* file_hint, const bluefs_fnode_t& fnode) = 0;
+ virtual void add_usage(void* file_hint, uint64_t fsize) = 0;
+ virtual void sub_usage(void* file_hint, uint64_t fsize) = 0;
+ virtual uint8_t select_prefer_bdev(void* hint) = 0;
+ virtual void get_paths(const std::string& base, paths& res) const = 0;
+ virtual void dump(ostream& sout) = 0;
};
+class BlueFS;
class BlueFS {
public:
std::atomic_int num_readers, num_writers;
std::atomic_int num_reading;
+ void* vselector_hint = nullptr;
+
+ private:
+ FRIEND_MAKE_REF(File);
File()
- : RefCountedObject(NULL, 0),
+ :
refs(0),
dirty_seq(0),
locked(false),
deleted(false),
num_readers(0),
num_writers(0),
- num_reading(0)
+ num_reading(0),
+ vselector_hint(nullptr)
{}
~File() override {
ceph_assert(num_readers.load() == 0);
ceph_assert(num_reading.load() == 0);
ceph_assert(!locked);
}
-
- friend void intrusive_ptr_add_ref(File *f) {
- f->get();
- }
- friend void intrusive_ptr_release(File *f) {
- f->put();
- }
};
- typedef boost::intrusive_ptr<File> FileRef;
+ using FileRef = ceph::ref_t<File>;
typedef boost::intrusive::list<
File,
mempool::bluefs::map<string,FileRef> file_map;
- Dir() : RefCountedObject(NULL, 0) {}
-
- friend void intrusive_ptr_add_ref(Dir *d) {
- d->get();
- }
- friend void intrusive_ptr_release(Dir *d) {
- d->put();
- }
+ private:
+ FRIEND_MAKE_REF(Dir);
+ Dir() = default;
};
- typedef boost::intrusive_ptr<Dir> DirRef;
+ using DirRef = ceph::ref_t<Dir>;
struct FileWriter {
MEMPOOL_CLASS_HELPERS();
FileRef file;
- uint64_t pos; ///< start offset for buffer
+ uint64_t pos = 0; ///< start offset for buffer
bufferlist buffer; ///< new data to write (at end of file)
bufferlist tail_block; ///< existing partial block at end of file, if any
bufferlist::page_aligned_appender buffer_appender; //< for const char* only
std::array<bool, MAX_BDEV> dirty_devs;
FileWriter(FileRef f)
- : file(f),
- pos(0),
+ : file(std::move(f)),
buffer_appender(buffer.get_page_aligned_appender(
g_conf()->bluefs_alloc_size / CEPH_PAGE_SIZE)) {
++file->num_writers;
iocv.fill(nullptr);
dirty_devs.fill(false);
- if (f->fnode.ino == 1) {
+ if (file->fnode.ino == 1) {
write_hint = WRITE_LIFE_MEDIUM;
}
}
struct FileReaderBuffer {
MEMPOOL_CLASS_HELPERS();
- uint64_t bl_off; ///< prefetch buffer logical offset
+ uint64_t bl_off = 0; ///< prefetch buffer logical offset
bufferlist bl; ///< prefetch buffer
- uint64_t pos; ///< current logical offset
+ uint64_t pos = 0; ///< current logical offset
uint64_t max_prefetch; ///< max allowed prefetch
explicit FileReaderBuffer(uint64_t mpf)
- : bl_off(0),
- pos(0),
- max_prefetch(mpf) {}
+ : max_prefetch(mpf) {}
- uint64_t get_buf_end() {
+ uint64_t get_buf_end() const {
return bl_off + bl.length();
}
- uint64_t get_buf_remaining(uint64_t p) {
+ uint64_t get_buf_remaining(uint64_t p) const {
if (p >= bl_off && p < bl_off + bl.length())
return bl_off + bl.length() - p;
return 0;
MEMPOOL_CLASS_HELPERS();
FileRef file;
- explicit FileLock(FileRef f) : file(f) {}
+ explicit FileLock(FileRef f) : file(std::move(f)) {}
};
private:
vector<Allocator*> alloc; ///< allocators for bdevs
vector<uint64_t> alloc_size; ///< alloc size for each device
vector<interval_set<uint64_t>> pending_release; ///< extents to release
+ vector<interval_set<uint64_t>> block_unused_too_granular;
BlockDevice::aio_callback_t discard_cb[3]; //discard callbacks for each dev
BlueFSDeviceExpander* slow_dev_expander = nullptr;
+ std::unique_ptr<BlueFSVolumeSelector> vselector;
class SocketHook;
SocketHook* asok_hook = nullptr;
void _compact_log_sync();
void _compact_log_async(std::unique_lock<ceph::mutex>& l);
- void _rewrite_log_sync(bool allocate_with_fallback,
- int super_dev,
- int log_dev,
- int new_log_dev,
- int flags);
+ void _rewrite_log_and_layout_sync(bool allocate_with_fallback,
+ int super_dev,
+ int log_dev,
+ int new_log_dev,
+ int flags,
+ std::optional<bluefs_layout_t> layout);
//void _aio_finish(void *priv);
int _read_random(
FileReader *h, ///< [in] read from here
uint64_t offset, ///< [in] offset
- size_t len, ///< [in] this many bytes
+ uint64_t len, ///< [in] this many bytes
char *out); ///< [out] optional: or copy it here
void _invalidate_cache(FileRef f, uint64_t offset, uint64_t length);
int _open_super();
int _write_super(int dev);
+ int _check_new_allocations(const bluefs_fnode_t& fnode,
+ size_t dev_count,
+ boost::dynamic_bitset<uint64_t>* owned_blocks,
+ boost::dynamic_bitset<uint64_t>* used_blocks);
+ int _verify_alloc_granularity(
+ __u8 id, uint64_t offset, uint64_t length,
+ const char *op);
+ int _adjust_granularity(
+ __u8 id, uint64_t *offset, uint64_t *length, bool alloc);
int _replay(bool noop, bool to_stdout = false); ///< replay journal
FileWriter *_create_writer(FileRef f);
~BlueFS();
// the super is always stored on bdev 0
- int mkfs(uuid_d osd_uuid);
+ int mkfs(uuid_d osd_uuid, const bluefs_layout_t& layout);
int mount();
+ int maybe_verify_layout(const bluefs_layout_t& layout) const;
void umount();
- int prepare_new_device(int id);
+ int prepare_new_device(int id, const bluefs_layout_t& layout);
int log_dump();
int device_migrate_to_new(
CephContext *cct,
const set<int>& devs_source,
- int dev_target);
+ int dev_target,
+ const bluefs_layout_t& layout);
int device_migrate_to_existing(
CephContext *cct,
const set<int>& devs_source,
- int dev_target);
+ int dev_target,
+ const bluefs_layout_t& layout);
uint64_t get_used();
uint64_t get_total(unsigned id);
int lock_file(const string& dirname, const string& filename, FileLock **p);
int unlock_file(FileLock *l);
- void flush_log();
void compact_log();
/// sync any uncommitted state to disk
void set_slow_device_expander(BlueFSDeviceExpander* a) {
slow_dev_expander = a;
}
+ void set_volume_selector(BlueFSVolumeSelector* s) {
+ vselector.reset(s);
+ }
+ void dump_volume_selector(ostream& sout) {
+ vselector->dump(sout);
+ }
+ void get_vselector_paths(const std::string& base,
+ BlueFSVolumeSelector::paths& res) const {
+ return vselector->get_paths(base, res);
+ }
+
int add_block_device(unsigned bdev, const string& path, bool trim,
bool shared_with_bluestore=false);
bool bdev_support_label(unsigned id);
return _truncate(h, offset);
}
+ /// test purpose methods
+ void debug_inject_duplicate_gift(unsigned bdev, uint64_t offset, uint64_t len);
+ const PerfCounters* get_perf_counters() const {
+ return logger;
+ }
+};
+
+class OriginalVolumeSelector : public BlueFSVolumeSelector {
+ uint64_t wal_total;
+ uint64_t db_total;
+ uint64_t slow_total;
+
+public:
+ OriginalVolumeSelector(
+ uint64_t _wal_total,
+ uint64_t _db_total,
+ uint64_t _slow_total)
+ : wal_total(_wal_total), db_total(_db_total), slow_total(_slow_total) {}
+
+ void* get_hint_by_device(uint8_t dev) const override;
+ void* get_hint_by_dir(const string& dirname) const override;
+
+ void add_usage(void* hint, const bluefs_fnode_t& fnode) override {
+ // do nothing
+ return;
+ }
+ void sub_usage(void* hint, const bluefs_fnode_t& fnode) override {
+ // do nothing
+ return;
+ }
+ void add_usage(void* hint, uint64_t fsize) override {
+ // do nothing
+ return;
+ }
+ void sub_usage(void* hint, uint64_t fsize) override {
+ // do nothing
+ return;
+ }
+
+ uint8_t select_prefer_bdev(void* hint) override;
+ void get_paths(const std::string& base, paths& res) const override;
+ void dump(ostream& sout) override;
};
#endif