]> git.proxmox.com Git - ceph.git/blob - ceph/src/test/fio/fio_ceph_objectstore.cc
604c44c59f1d2fd17016d51b72067b54eaf01b4e
[ceph.git] / ceph / src / test / fio / fio_ceph_objectstore.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 /*
4 * Ceph ObjectStore engine
5 *
6 * IO engine using Ceph's ObjectStore class to test low-level performance of
7 * Ceph OSDs.
8 *
9 */
10
11 #include <memory>
12 #include <system_error>
13 #include <vector>
14
15 #include "os/ObjectStore.h"
16 #include "global/global_init.h"
17 #include "common/errno.h"
18 #include "include/intarith.h"
19 #include "include/stringify.h"
20
21 #include <fio.h>
22 #include <optgroup.h>
23
24 #include "include/assert.h" // fio.h clobbers our assert.h
25
26 #define dout_context g_ceph_context
27 #define dout_subsys ceph_subsys_
28
29 namespace {
30
31 /// fio configuration options read from the job file
32 struct Options {
33 thread_data* td;
34 char* conf;
35 };
36
37 template <class Func> // void Func(fio_option&)
38 fio_option make_option(Func&& func)
39 {
40 // zero-initialize and set common defaults
41 auto o = fio_option{};
42 o.category = FIO_OPT_C_ENGINE;
43 o.group = FIO_OPT_G_RBD;
44 func(std::ref(o));
45 return o;
46 }
47
48 static std::vector<fio_option> ceph_options{
49 make_option([] (fio_option& o) {
50 o.name = "conf";
51 o.lname = "ceph configuration file";
52 o.type = FIO_OPT_STR_STORE;
53 o.help = "Path to a ceph configuration file";
54 o.off1 = offsetof(Options, conf);
55 }),
56 {} // fio expects a 'null'-terminated list
57 };
58
59
60 /// global engine state shared between all jobs within the process. this
61 /// includes g_ceph_context and the ObjectStore instance
62 struct Engine {
63 /// the initial g_ceph_context reference to be dropped on destruction
64 boost::intrusive_ptr<CephContext> cct;
65 std::unique_ptr<ObjectStore> os;
66
67 std::mutex lock;
68 int ref_count;
69
70 Engine(const thread_data* td);
71 ~Engine();
72
73 static Engine* get_instance(thread_data* td) {
74 // note: creates an Engine with the options associated with the first job
75 static Engine engine(td);
76 return &engine;
77 }
78
79 void ref() {
80 std::lock_guard<std::mutex> l(lock);
81 ++ref_count;
82 }
83 void deref() {
84 std::lock_guard<std::mutex> l(lock);
85 --ref_count;
86 if (!ref_count) {
87 ostringstream ostr;
88 Formatter* f = Formatter::create("json-pretty", "json-pretty", "json-pretty");
89 os->dump_perf_counters(f);
90 ostr << "FIO plugin ";
91 f->flush(ostr);
92 if (g_conf->rocksdb_perf) {
93 os->get_db_statistics(f);
94 ostr << "FIO get_db_statistics ";
95 f->flush(ostr);
96 }
97 delete f;
98 os->umount();
99 dout(0) << ostr.str() << dendl;
100 }
101 }
102 };
103
104 Engine::Engine(const thread_data* td) : ref_count(0)
105 {
106 // add the ceph command line arguments
107 auto o = static_cast<const Options*>(td->eo);
108 if (!o->conf) {
109 throw std::runtime_error("missing conf option for ceph configuration file");
110 }
111 std::vector<const char*> args{
112 "-i", "0", // identify as osd.0 for osd_data and osd_journal
113 "--conf", o->conf, // use the requested conf file
114 };
115 if (td->o.directory) { // allow conf files to use ${fio_dir} for data
116 args.emplace_back("--fio_dir");
117 args.emplace_back(td->o.directory);
118 }
119
120 // claim the g_ceph_context reference and release it on destruction
121 cct = global_init(nullptr, args, CEPH_ENTITY_TYPE_OSD,
122 CODE_ENVIRONMENT_UTILITY, 0);
123 common_init_finish(g_ceph_context);
124
125 // create the ObjectStore
126 os.reset(ObjectStore::create(g_ceph_context,
127 g_conf->osd_objectstore,
128 g_conf->osd_data,
129 g_conf->osd_journal));
130 if (!os)
131 throw std::runtime_error("bad objectstore type " + g_conf->osd_objectstore);
132
133 unsigned num_shards;
134 if(g_conf->osd_op_num_shards)
135 num_shards = g_conf->osd_op_num_shards;
136 else if(os->is_rotational())
137 num_shards = g_conf->osd_op_num_shards_hdd;
138 else
139 num_shards = g_conf->osd_op_num_shards_ssd;
140 os->set_cache_shards(num_shards);
141
142 int r = os->mkfs();
143 if (r < 0)
144 throw std::system_error(-r, std::system_category(), "mkfs failed");
145
146 r = os->mount();
147 if (r < 0)
148 throw std::system_error(-r, std::system_category(), "mount failed");
149 }
150
151 Engine::~Engine()
152 {
153 assert(!ref_count);
154 }
155
156
157 struct Collection {
158 spg_t pg;
159 coll_t cid;
160 ObjectStore::Sequencer sequencer;
161
162 // use big pool ids to avoid clashing with existing collections
163 static constexpr int64_t MIN_POOL_ID = 0x0000ffffffffffff;
164
165 Collection(const spg_t& pg)
166 : pg(pg), cid(pg), sequencer(stringify(pg)) {
167 sequencer.shard_hint = pg;
168 }
169 };
170
171 struct Object {
172 ghobject_t oid;
173 Collection& coll;
174
175 Object(const char* name, Collection& coll)
176 : oid(hobject_t(name, "", CEPH_NOSNAP, coll.pg.ps(), coll.pg.pool(), "")),
177 coll(coll) {}
178 };
179
180 /// treat each fio job like a separate pool with its own collections and objects
181 struct Job {
182 Engine* engine; //< shared ptr to the global Engine
183 std::vector<Collection> collections; //< spread objects over collections
184 std::vector<Object> objects; //< associate an object with each fio_file
185 std::vector<io_u*> events; //< completions for fio_ceph_os_event()
186 const bool unlink; //< unlink objects on destruction
187
188 Job(Engine* engine, const thread_data* td);
189 ~Job();
190 };
191
192 Job::Job(Engine* engine, const thread_data* td)
193 : engine(engine),
194 events(td->o.iodepth),
195 unlink(td->o.unlink)
196 {
197 engine->ref();
198 // use the fio thread_number for our unique pool id
199 const uint64_t pool = Collection::MIN_POOL_ID + td->thread_number;
200
201 // create a collection for each object, up to osd_pool_default_pg_num
202 uint32_t count = g_conf->osd_pool_default_pg_num;
203 if (count > td->o.nr_files)
204 count = td->o.nr_files;
205
206 assert(count > 0);
207 collections.reserve(count);
208
209 const int split_bits = cbits(count - 1);
210
211 ObjectStore::Transaction t;
212 for (uint32_t i = 0; i < count; i++) {
213 auto pg = spg_t{pg_t{i, pool}};
214 collections.emplace_back(pg);
215
216 auto& cid = collections.back().cid;
217 if (!engine->os->collection_exists(cid))
218 t.create_collection(cid, split_bits);
219 }
220
221 const uint64_t file_size = td->o.size / max(1u, td->o.nr_files);
222
223 // create an object for each file in the job
224 for (uint32_t i = 0; i < td->o.nr_files; i++) {
225 auto f = td->files[i];
226 f->real_file_size = file_size;
227 f->engine_pos = i;
228
229 // associate each object with a collection in a round-robin fashion
230 auto& coll = collections[i % collections.size()];
231
232 objects.emplace_back(f->file_name, coll);
233 auto& oid = objects.back().oid;
234
235 t.touch(coll.cid, oid);
236 t.truncate(coll.cid, oid, file_size);
237 }
238
239 // apply the entire transaction synchronously
240 ObjectStore::Sequencer sequencer("job init");
241 int r = engine->os->apply_transaction(&sequencer, std::move(t));
242 if (r) {
243 engine->deref();
244 throw std::system_error(r, std::system_category(), "job init");
245 }
246 }
247
248 Job::~Job()
249 {
250 if (unlink) {
251 ObjectStore::Transaction t;
252 // remove our objects
253 for (auto& obj : objects) {
254 t.remove(obj.coll.cid, obj.oid);
255 }
256 // remove our collections
257 for (auto& coll : collections) {
258 t.remove_collection(coll.cid);
259 }
260 ObjectStore::Sequencer sequencer("job cleanup");
261 int r = engine->os->apply_transaction(&sequencer, std::move(t));
262 if (r)
263 derr << "job cleanup failed with " << cpp_strerror(-r) << dendl;
264 }
265 engine->deref();
266 }
267
268
269 int fio_ceph_os_setup(thread_data* td)
270 {
271 // if there are multiple jobs, they must run in the same process against a
272 // single instance of the ObjectStore. explicitly disable fio's default
273 // job-per-process configuration
274 td->o.use_thread = 1;
275
276 try {
277 // get or create the global Engine instance
278 auto engine = Engine::get_instance(td);
279 // create a Job for this thread
280 td->io_ops_data = new Job(engine, td);
281 } catch (std::exception& e) {
282 std::cerr << "setup failed with " << e.what() << std::endl;
283 return -1;
284 }
285 return 0;
286 }
287
288 void fio_ceph_os_cleanup(thread_data* td)
289 {
290 auto job = static_cast<Job*>(td->io_ops_data);
291 td->io_ops_data = nullptr;
292 delete job;
293 }
294
295
296 io_u* fio_ceph_os_event(thread_data* td, int event)
297 {
298 // return the requested event from fio_ceph_os_getevents()
299 auto job = static_cast<Job*>(td->io_ops_data);
300 return job->events[event];
301 }
302
303 int fio_ceph_os_getevents(thread_data* td, unsigned int min,
304 unsigned int max, const timespec* t)
305 {
306 auto job = static_cast<Job*>(td->io_ops_data);
307 unsigned int events = 0;
308 io_u* u;
309 unsigned int i;
310
311 // loop through inflight ios until we find 'min' completions
312 do {
313 io_u_qiter(&td->io_u_all, u, i) {
314 if (!(u->flags & IO_U_F_FLIGHT))
315 continue;
316
317 if (u->engine_data) {
318 u->engine_data = nullptr;
319 job->events[events] = u;
320 events++;
321 }
322 }
323 if (events >= min)
324 break;
325 usleep(100);
326 } while (1);
327
328 return events;
329 }
330
331 /// completion context for ObjectStore::queue_transaction()
332 class UnitComplete : public Context {
333 io_u* u;
334 public:
335 UnitComplete(io_u* u) : u(u) {}
336 void finish(int r) {
337 // mark the pointer to indicate completion for fio_ceph_os_getevents()
338 u->engine_data = reinterpret_cast<void*>(1ull);
339 }
340 };
341
342 int fio_ceph_os_queue(thread_data* td, io_u* u)
343 {
344 fio_ro_check(td, u);
345
346 auto job = static_cast<Job*>(td->io_ops_data);
347 auto& object = job->objects[u->file->engine_pos];
348 auto& coll = object.coll;
349 auto& os = job->engine->os;
350
351 if (u->ddir == DDIR_WRITE) {
352 // provide a hint if we're likely to read this data back
353 const int flags = td_rw(td) ? CEPH_OSD_OP_FLAG_FADVISE_WILLNEED : 0;
354
355 bufferlist bl;
356 bl.push_back(buffer::copy(reinterpret_cast<char*>(u->xfer_buf),
357 u->xfer_buflen ) );
358
359 // enqueue a write transaction on the collection's sequencer
360 ObjectStore::Transaction t;
361 t.write(coll.cid, object.oid, u->offset, u->xfer_buflen, bl, flags);
362 os->queue_transaction(&coll.sequencer,
363 std::move(t),
364 nullptr,
365 new UnitComplete(u));
366 return FIO_Q_QUEUED;
367 }
368
369 if (u->ddir == DDIR_READ) {
370 // ObjectStore reads are synchronous, so make the call and return COMPLETED
371 bufferlist bl;
372 int r = os->read(coll.cid, object.oid, u->offset, u->xfer_buflen, bl);
373 if (r < 0) {
374 u->error = r;
375 td_verror(td, u->error, "xfer");
376 } else {
377 bl.copy(0, bl.length(), static_cast<char*>(u->xfer_buf));
378 u->resid = u->xfer_buflen - r;
379 }
380 return FIO_Q_COMPLETED;
381 }
382
383 derr << "WARNING: Only DDIR_READ and DDIR_WRITE are supported!" << dendl;
384 u->error = -EINVAL;
385 td_verror(td, u->error, "xfer");
386 return FIO_Q_COMPLETED;
387 }
388
389 int fio_ceph_os_commit(thread_data* td)
390 {
391 // commit() allows the engine to batch up queued requests to be submitted all
392 // at once. it would be natural for queue() to collect transactions in a list,
393 // and use commit() to pass them all to ObjectStore::queue_transactions(). but
394 // because we spread objects over multiple collections, we a) need to use a
395 // different sequencer for each collection, and b) are less likely to see a
396 // benefit from batching requests within a collection
397 return 0;
398 }
399
400 // open/close are noops. we set the FIO_DISKLESSIO flag in ioengine_ops to
401 // prevent fio from creating the files
402 int fio_ceph_os_open(thread_data* td, fio_file* f) { return 0; }
403 int fio_ceph_os_close(thread_data* td, fio_file* f) { return 0; }
404
405 int fio_ceph_os_io_u_init(thread_data* td, io_u* u)
406 {
407 // no data is allocated, we just use the pointer as a boolean 'completed' flag
408 u->engine_data = nullptr;
409 return 0;
410 }
411
412 void fio_ceph_os_io_u_free(thread_data* td, io_u* u)
413 {
414 u->engine_data = nullptr;
415 }
416
417
418 // ioengine_ops for get_ioengine()
419 struct ceph_ioengine : public ioengine_ops {
420 ceph_ioengine() : ioengine_ops({}) {
421 name = "ceph-os";
422 version = FIO_IOOPS_VERSION;
423 flags = FIO_DISKLESSIO;
424 setup = fio_ceph_os_setup;
425 queue = fio_ceph_os_queue;
426 commit = fio_ceph_os_commit;
427 getevents = fio_ceph_os_getevents;
428 event = fio_ceph_os_event;
429 cleanup = fio_ceph_os_cleanup;
430 open_file = fio_ceph_os_open;
431 close_file = fio_ceph_os_close;
432 io_u_init = fio_ceph_os_io_u_init;
433 io_u_free = fio_ceph_os_io_u_free;
434 options = ceph_options.data();
435 option_struct_size = sizeof(struct Options);
436 }
437 };
438
439 } // anonymous namespace
440
441 extern "C" {
442 // the exported fio engine interface
443 void get_ioengine(struct ioengine_ops** ioengine_ptr) {
444 static ceph_ioengine ioengine;
445 *ioengine_ptr = &ioengine;
446 }
447 } // extern "C"