]> git.proxmox.com Git - ceph.git/blame - ceph/src/test/fio/fio_ceph_objectstore.cc
update sources to v12.1.1
[ceph.git] / ceph / src / test / fio / fio_ceph_objectstore.cc
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph ObjectStore engine
5 *
6 * IO engine using Ceph's ObjectStore class to test low-level performance of
7 * Ceph OSDs.
8 *
9 */
10
11#include <memory>
12#include <system_error>
13#include <vector>
14
15#include "os/ObjectStore.h"
16#include "global/global_init.h"
17#include "common/errno.h"
18#include "include/intarith.h"
19#include "include/stringify.h"
224ce89b 20#include "common/perf_counters.h"
7c673cae
FG
21
22#include <fio.h>
23#include <optgroup.h>
24
25#include "include/assert.h" // fio.h clobbers our assert.h
26
27#define dout_context g_ceph_context
28#define dout_subsys ceph_subsys_
29
30namespace {
31
32/// fio configuration options read from the job file
33struct Options {
34 thread_data* td;
35 char* conf;
36};
37
38template <class Func> // void Func(fio_option&)
39fio_option make_option(Func&& func)
40{
41 // zero-initialize and set common defaults
42 auto o = fio_option{};
43 o.category = FIO_OPT_C_ENGINE;
44 o.group = FIO_OPT_G_RBD;
45 func(std::ref(o));
46 return o;
47}
48
49static std::vector<fio_option> ceph_options{
50 make_option([] (fio_option& o) {
51 o.name = "conf";
52 o.lname = "ceph configuration file";
53 o.type = FIO_OPT_STR_STORE;
54 o.help = "Path to a ceph configuration file";
55 o.off1 = offsetof(Options, conf);
56 }),
57 {} // fio expects a 'null'-terminated list
58};
59
60
61/// global engine state shared between all jobs within the process. this
62/// includes g_ceph_context and the ObjectStore instance
63struct Engine {
64 /// the initial g_ceph_context reference to be dropped on destruction
65 boost::intrusive_ptr<CephContext> cct;
66 std::unique_ptr<ObjectStore> os;
67
68 std::mutex lock;
69 int ref_count;
70
71 Engine(const thread_data* td);
72 ~Engine();
73
74 static Engine* get_instance(thread_data* td) {
75 // note: creates an Engine with the options associated with the first job
76 static Engine engine(td);
77 return &engine;
78 }
79
80 void ref() {
81 std::lock_guard<std::mutex> l(lock);
82 ++ref_count;
83 }
84 void deref() {
85 std::lock_guard<std::mutex> l(lock);
86 --ref_count;
87 if (!ref_count) {
88 ostringstream ostr;
89 Formatter* f = Formatter::create("json-pretty", "json-pretty", "json-pretty");
224ce89b 90 cct->get_perfcounters_collection()->dump_formatted(f, false);
31f18b77 91 ostr << "FIO plugin ";
7c673cae 92 f->flush(ostr);
31f18b77
FG
93 if (g_conf->rocksdb_perf) {
94 os->get_db_statistics(f);
95 ostr << "FIO get_db_statistics ";
96 f->flush(ostr);
97 }
7c673cae
FG
98 delete f;
99 os->umount();
31f18b77 100 dout(0) << ostr.str() << dendl;
7c673cae
FG
101 }
102 }
103};
104
105Engine::Engine(const thread_data* td) : ref_count(0)
106{
107 // add the ceph command line arguments
108 auto o = static_cast<const Options*>(td->eo);
109 if (!o->conf) {
110 throw std::runtime_error("missing conf option for ceph configuration file");
111 }
112 std::vector<const char*> args{
113 "-i", "0", // identify as osd.0 for osd_data and osd_journal
114 "--conf", o->conf, // use the requested conf file
115 };
116 if (td->o.directory) { // allow conf files to use ${fio_dir} for data
117 args.emplace_back("--fio_dir");
118 args.emplace_back(td->o.directory);
119 }
120
121 // claim the g_ceph_context reference and release it on destruction
122 cct = global_init(nullptr, args, CEPH_ENTITY_TYPE_OSD,
123 CODE_ENVIRONMENT_UTILITY, 0);
124 common_init_finish(g_ceph_context);
125
126 // create the ObjectStore
127 os.reset(ObjectStore::create(g_ceph_context,
128 g_conf->osd_objectstore,
129 g_conf->osd_data,
130 g_conf->osd_journal));
131 if (!os)
132 throw std::runtime_error("bad objectstore type " + g_conf->osd_objectstore);
133
31f18b77
FG
134 unsigned num_shards;
135 if(g_conf->osd_op_num_shards)
136 num_shards = g_conf->osd_op_num_shards;
137 else if(os->is_rotational())
138 num_shards = g_conf->osd_op_num_shards_hdd;
139 else
140 num_shards = g_conf->osd_op_num_shards_ssd;
141 os->set_cache_shards(num_shards);
7c673cae
FG
142
143 int r = os->mkfs();
144 if (r < 0)
145 throw std::system_error(-r, std::system_category(), "mkfs failed");
146
147 r = os->mount();
148 if (r < 0)
149 throw std::system_error(-r, std::system_category(), "mount failed");
150}
151
152Engine::~Engine()
153{
154 assert(!ref_count);
155}
156
157
158struct Collection {
159 spg_t pg;
160 coll_t cid;
161 ObjectStore::Sequencer sequencer;
162
163 // use big pool ids to avoid clashing with existing collections
164 static constexpr int64_t MIN_POOL_ID = 0x0000ffffffffffff;
165
166 Collection(const spg_t& pg)
31f18b77
FG
167 : pg(pg), cid(pg), sequencer(stringify(pg)) {
168 sequencer.shard_hint = pg;
169 }
7c673cae
FG
170};
171
172struct Object {
173 ghobject_t oid;
174 Collection& coll;
175
176 Object(const char* name, Collection& coll)
177 : oid(hobject_t(name, "", CEPH_NOSNAP, coll.pg.ps(), coll.pg.pool(), "")),
178 coll(coll) {}
179};
180
181/// treat each fio job like a separate pool with its own collections and objects
182struct Job {
183 Engine* engine; //< shared ptr to the global Engine
184 std::vector<Collection> collections; //< spread objects over collections
185 std::vector<Object> objects; //< associate an object with each fio_file
186 std::vector<io_u*> events; //< completions for fio_ceph_os_event()
187 const bool unlink; //< unlink objects on destruction
188
189 Job(Engine* engine, const thread_data* td);
190 ~Job();
191};
192
193Job::Job(Engine* engine, const thread_data* td)
194 : engine(engine),
195 events(td->o.iodepth),
196 unlink(td->o.unlink)
197{
198 engine->ref();
199 // use the fio thread_number for our unique pool id
200 const uint64_t pool = Collection::MIN_POOL_ID + td->thread_number;
201
202 // create a collection for each object, up to osd_pool_default_pg_num
203 uint32_t count = g_conf->osd_pool_default_pg_num;
204 if (count > td->o.nr_files)
205 count = td->o.nr_files;
206
207 assert(count > 0);
208 collections.reserve(count);
209
210 const int split_bits = cbits(count - 1);
211
212 ObjectStore::Transaction t;
213 for (uint32_t i = 0; i < count; i++) {
214 auto pg = spg_t{pg_t{i, pool}};
215 collections.emplace_back(pg);
216
217 auto& cid = collections.back().cid;
218 if (!engine->os->collection_exists(cid))
219 t.create_collection(cid, split_bits);
220 }
221
222 const uint64_t file_size = td->o.size / max(1u, td->o.nr_files);
223
224 // create an object for each file in the job
225 for (uint32_t i = 0; i < td->o.nr_files; i++) {
226 auto f = td->files[i];
227 f->real_file_size = file_size;
228 f->engine_pos = i;
229
230 // associate each object with a collection in a round-robin fashion
231 auto& coll = collections[i % collections.size()];
232
233 objects.emplace_back(f->file_name, coll);
234 auto& oid = objects.back().oid;
235
236 t.touch(coll.cid, oid);
237 t.truncate(coll.cid, oid, file_size);
238 }
239
240 // apply the entire transaction synchronously
241 ObjectStore::Sequencer sequencer("job init");
242 int r = engine->os->apply_transaction(&sequencer, std::move(t));
243 if (r) {
244 engine->deref();
245 throw std::system_error(r, std::system_category(), "job init");
246 }
247}
248
249Job::~Job()
250{
251 if (unlink) {
252 ObjectStore::Transaction t;
253 // remove our objects
254 for (auto& obj : objects) {
255 t.remove(obj.coll.cid, obj.oid);
256 }
257 // remove our collections
258 for (auto& coll : collections) {
259 t.remove_collection(coll.cid);
260 }
261 ObjectStore::Sequencer sequencer("job cleanup");
262 int r = engine->os->apply_transaction(&sequencer, std::move(t));
263 if (r)
264 derr << "job cleanup failed with " << cpp_strerror(-r) << dendl;
265 }
266 engine->deref();
267}
268
269
270int fio_ceph_os_setup(thread_data* td)
271{
272 // if there are multiple jobs, they must run in the same process against a
273 // single instance of the ObjectStore. explicitly disable fio's default
274 // job-per-process configuration
275 td->o.use_thread = 1;
276
277 try {
278 // get or create the global Engine instance
279 auto engine = Engine::get_instance(td);
280 // create a Job for this thread
281 td->io_ops_data = new Job(engine, td);
282 } catch (std::exception& e) {
283 std::cerr << "setup failed with " << e.what() << std::endl;
284 return -1;
285 }
286 return 0;
287}
288
289void fio_ceph_os_cleanup(thread_data* td)
290{
291 auto job = static_cast<Job*>(td->io_ops_data);
292 td->io_ops_data = nullptr;
293 delete job;
294}
295
296
297io_u* fio_ceph_os_event(thread_data* td, int event)
298{
299 // return the requested event from fio_ceph_os_getevents()
300 auto job = static_cast<Job*>(td->io_ops_data);
301 return job->events[event];
302}
303
304int fio_ceph_os_getevents(thread_data* td, unsigned int min,
305 unsigned int max, const timespec* t)
306{
307 auto job = static_cast<Job*>(td->io_ops_data);
308 unsigned int events = 0;
309 io_u* u;
310 unsigned int i;
311
312 // loop through inflight ios until we find 'min' completions
313 do {
314 io_u_qiter(&td->io_u_all, u, i) {
315 if (!(u->flags & IO_U_F_FLIGHT))
316 continue;
317
318 if (u->engine_data) {
319 u->engine_data = nullptr;
320 job->events[events] = u;
321 events++;
322 }
323 }
324 if (events >= min)
325 break;
326 usleep(100);
327 } while (1);
328
329 return events;
330}
331
332/// completion context for ObjectStore::queue_transaction()
333class UnitComplete : public Context {
334 io_u* u;
335 public:
336 UnitComplete(io_u* u) : u(u) {}
337 void finish(int r) {
338 // mark the pointer to indicate completion for fio_ceph_os_getevents()
339 u->engine_data = reinterpret_cast<void*>(1ull);
340 }
341};
342
343int fio_ceph_os_queue(thread_data* td, io_u* u)
344{
345 fio_ro_check(td, u);
346
347 auto job = static_cast<Job*>(td->io_ops_data);
348 auto& object = job->objects[u->file->engine_pos];
349 auto& coll = object.coll;
350 auto& os = job->engine->os;
351
352 if (u->ddir == DDIR_WRITE) {
353 // provide a hint if we're likely to read this data back
354 const int flags = td_rw(td) ? CEPH_OSD_OP_FLAG_FADVISE_WILLNEED : 0;
355
356 bufferlist bl;
357 bl.push_back(buffer::copy(reinterpret_cast<char*>(u->xfer_buf),
358 u->xfer_buflen ) );
359
360 // enqueue a write transaction on the collection's sequencer
361 ObjectStore::Transaction t;
362 t.write(coll.cid, object.oid, u->offset, u->xfer_buflen, bl, flags);
363 os->queue_transaction(&coll.sequencer,
364 std::move(t),
365 nullptr,
366 new UnitComplete(u));
367 return FIO_Q_QUEUED;
368 }
369
370 if (u->ddir == DDIR_READ) {
371 // ObjectStore reads are synchronous, so make the call and return COMPLETED
372 bufferlist bl;
373 int r = os->read(coll.cid, object.oid, u->offset, u->xfer_buflen, bl);
374 if (r < 0) {
375 u->error = r;
376 td_verror(td, u->error, "xfer");
377 } else {
378 bl.copy(0, bl.length(), static_cast<char*>(u->xfer_buf));
379 u->resid = u->xfer_buflen - r;
380 }
381 return FIO_Q_COMPLETED;
382 }
383
384 derr << "WARNING: Only DDIR_READ and DDIR_WRITE are supported!" << dendl;
385 u->error = -EINVAL;
386 td_verror(td, u->error, "xfer");
387 return FIO_Q_COMPLETED;
388}
389
390int fio_ceph_os_commit(thread_data* td)
391{
392 // commit() allows the engine to batch up queued requests to be submitted all
393 // at once. it would be natural for queue() to collect transactions in a list,
394 // and use commit() to pass them all to ObjectStore::queue_transactions(). but
395 // because we spread objects over multiple collections, we a) need to use a
396 // different sequencer for each collection, and b) are less likely to see a
397 // benefit from batching requests within a collection
398 return 0;
399}
400
401// open/close are noops. we set the FIO_DISKLESSIO flag in ioengine_ops to
402// prevent fio from creating the files
403int fio_ceph_os_open(thread_data* td, fio_file* f) { return 0; }
404int fio_ceph_os_close(thread_data* td, fio_file* f) { return 0; }
405
406int fio_ceph_os_io_u_init(thread_data* td, io_u* u)
407{
408 // no data is allocated, we just use the pointer as a boolean 'completed' flag
409 u->engine_data = nullptr;
410 return 0;
411}
412
413void fio_ceph_os_io_u_free(thread_data* td, io_u* u)
414{
415 u->engine_data = nullptr;
416}
417
418
419// ioengine_ops for get_ioengine()
420struct ceph_ioengine : public ioengine_ops {
421 ceph_ioengine() : ioengine_ops({}) {
422 name = "ceph-os";
423 version = FIO_IOOPS_VERSION;
424 flags = FIO_DISKLESSIO;
425 setup = fio_ceph_os_setup;
426 queue = fio_ceph_os_queue;
427 commit = fio_ceph_os_commit;
428 getevents = fio_ceph_os_getevents;
429 event = fio_ceph_os_event;
430 cleanup = fio_ceph_os_cleanup;
431 open_file = fio_ceph_os_open;
432 close_file = fio_ceph_os_close;
433 io_u_init = fio_ceph_os_io_u_init;
434 io_u_free = fio_ceph_os_io_u_free;
435 options = ceph_options.data();
436 option_struct_size = sizeof(struct Options);
437 }
438};
439
440} // anonymous namespace
441
442extern "C" {
443// the exported fio engine interface
444void get_ioengine(struct ioengine_ops** ioengine_ptr) {
445 static ceph_ioengine ioengine;
446 *ioengine_ptr = &ioengine;
447}
448} // extern "C"