]> git.proxmox.com Git - ceph.git/blob - ceph/src/os/ObjectStore.h
update sources to 12.2.7
[ceph.git] / ceph / src / os / ObjectStore.h
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 /*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14 #ifndef CEPH_OBJECTSTORE_H
15 #define CEPH_OBJECTSTORE_H
16
17 #include "include/Context.h"
18 #include "include/buffer.h"
19 #include "include/types.h"
20 #include "osd/osd_types.h"
21 #include "common/TrackedOp.h"
22 #include "common/WorkQueue.h"
23 #include "ObjectMap.h"
24
25 #include <errno.h>
26 #include <sys/stat.h>
27 #include <vector>
28 #include <map>
29
30 #if defined(DARWIN) || defined(__FreeBSD__) || defined(__sun)
31 #include <sys/statvfs.h>
32 #else
33 #include <sys/vfs.h> /* or <sys/statfs.h> */
34 #endif /* DARWIN */
35
36 #define OPS_PER_PTR 32
37
38 class CephContext;
39
40 using std::vector;
41 using std::string;
42 using std::map;
43
44 namespace ceph {
45 class Formatter;
46 }
47
48 /*
49 * low-level interface to the local OSD file system
50 */
51
52 class Logger;
53
54
55 static inline void encode(const map<string,bufferptr> *attrset, bufferlist &bl) {
56 ::encode(*attrset, bl);
57 }
58
59 // this isn't the best place for these, but...
60 void decode_str_str_map_to_bl(bufferlist::iterator& p, bufferlist *out);
61 void decode_str_set_to_bl(bufferlist::iterator& p, bufferlist *out);
62
63 // Flag bits
64 typedef uint32_t osflagbits_t;
65 const int SKIP_JOURNAL_REPLAY = 1 << 0;
66 const int SKIP_MOUNT_OMAP = 1 << 1;
67
68 class ObjectStore {
69 protected:
70 string path;
71
72 public:
73 CephContext* cct;
74 /**
75 * create - create an ObjectStore instance.
76 *
77 * This is invoked once at initialization time.
78 *
79 * @param type type of store. This is a string from the configuration file.
80 * @param data path (or other descriptor) for data
81 * @param journal path (or other descriptor) for journal (optional)
82 * @param flags which filestores should check if applicable
83 */
84 static ObjectStore *create(CephContext *cct,
85 const string& type,
86 const string& data,
87 const string& journal,
88 osflagbits_t flags = 0);
89
90 /**
91 * probe a block device to learn the uuid of the owning OSD
92 *
93 * @param cct cct
94 * @param path path to device
95 * @param fsid [out] osd uuid
96 */
97 static int probe_block_device_fsid(
98 CephContext *cct,
99 const string& path,
100 uuid_d *fsid);
101
102 /**
103 * Fetch Object Store statistics.
104 *
105 * Currently only latency of write and apply times are measured.
106 *
107 * This appears to be called with nothing locked.
108 */
109 virtual objectstore_perf_stat_t get_cur_stats() = 0;
110
111 /**
112 * Fetch Object Store performance counters.
113 *
114 *
115 * This appears to be called with nothing locked.
116 */
117 virtual const PerfCounters* get_perf_counters() const = 0;
118
119 /**
120 * a sequencer orders transactions
121 *
122 * Any transactions queued under a given sequencer will be applied in
123 * sequence. Transactions queued under different sequencers may run
124 * in parallel.
125 *
126 * Clients of ObjectStore create and maintain their own Sequencer objects.
127 * When a list of transactions is queued the caller specifies a Sequencer to be used.
128 *
129 */
130
131 /**
132 * ABC for Sequencer implementation, private to the ObjectStore derived class.
133 * created in ...::queue_transaction(s)
134 */
135 struct Sequencer_impl : public RefCountedObject {
136 CephContext* cct;
137
138 // block until any previous transactions are visible. specifically,
139 // collection_list and collection_empty need to reflect prior operations.
140 virtual void flush() = 0;
141
142 // called when we are done with the impl. the impl may have a different
143 // (longer) lifecycle than the Sequencer.
144 virtual void discard() {}
145
146 /**
147 * Async flush_commit
148 *
149 * There are two cases:
150 * 1) sequencer is currently idle: the method returns true. c is
151 * not touched.
152 * 2) sequencer is not idle: the method returns false and c is
153 * called asyncronously with a value of 0 once all transactions
154 * queued on this sequencer prior to the call have been applied
155 * and committed.
156 */
157 virtual bool flush_commit(
158 Context *c ///< [in] context to call upon flush/commit
159 ) = 0; ///< @return true if idle, false otherwise
160
161 Sequencer_impl(CephContext* cct) : RefCountedObject(NULL, 0), cct(cct) {}
162 ~Sequencer_impl() override {}
163 };
164 typedef boost::intrusive_ptr<Sequencer_impl> Sequencer_implRef;
165
166 /**
167 * External (opaque) sequencer implementation
168 */
169 struct Sequencer {
170 string name;
171 spg_t shard_hint;
172 Sequencer_implRef p;
173
174 explicit Sequencer(string n)
175 : name(n), shard_hint(spg_t()), p(NULL) {
176 }
177 ~Sequencer() {
178 if (p)
179 p->discard(); // tell impl we are done with it
180 }
181
182 /// return a unique string identifier for this sequencer
183 const string& get_name() const {
184 return name;
185 }
186 /// wait for any queued transactions on this sequencer to apply
187 void flush() {
188 if (p)
189 p->flush();
190 }
191
192 /// @see Sequencer_impl::flush_commit()
193 bool flush_commit(Context *c) {
194 if (!p) {
195 return true;
196 } else {
197 return p->flush_commit(c);
198 }
199 }
200 };
201
202 struct CollectionImpl : public RefCountedObject {
203 virtual const coll_t &get_cid() = 0;
204 CollectionImpl() : RefCountedObject(NULL, 0) {}
205 };
206 typedef boost::intrusive_ptr<CollectionImpl> CollectionHandle;
207
208 struct CompatCollectionHandle : public CollectionImpl {
209 coll_t cid;
210 explicit CompatCollectionHandle(coll_t c) : cid(c) {}
211 const coll_t &get_cid() override {
212 return cid;
213 }
214 };
215
216 /*********************************
217 *
218 * Object Contents and semantics
219 *
220 * All ObjectStore objects are identified as a named object
221 * (ghobject_t and hobject_t) in a named collection (coll_t).
222 * ObjectStore operations support the creation, mutation, deletion
223 * and enumeration of objects within a collection. Enumeration is
224 * in sorted key order (where keys are sorted by hash). Object names
225 * are globally unique.
226 *
227 * Each object has four distinct parts: byte data, xattrs, omap_header
228 * and omap entries.
229 *
230 * The data portion of an object is conceptually equivalent to a
231 * file in a file system. Random and Partial access for both read
232 * and write operations is required. The ability to have a sparse
233 * implementation of the data portion of an object is beneficial for
234 * some workloads, but not required. There is a system-wide limit on
235 * the maximum size of an object, which is typically around 100 MB.
236 *
237 * Xattrs are equivalent to the extended attributes of file
238 * systems. Xattrs are a set of key/value pairs. Sub-value access
239 * is not required. It is possible to enumerate the set of xattrs in
240 * key order. At the implementation level, xattrs are used
241 * exclusively internal to Ceph and the implementer can expect the
242 * total size of all of the xattrs on an object to be relatively
243 * small, i.e., less than 64KB. Much of Ceph assumes that accessing
244 * xattrs on temporally adjacent object accesses (recent past or
245 * near future) is inexpensive.
246 *
247 * omap_header is a single blob of data. It can be read or written
248 * in total.
249 *
250 * Omap entries are conceptually the same as xattrs
251 * but in a different address space. In other words, you can have
252 * the same key as an xattr and an omap entry and they have distinct
253 * values. Enumeration of xattrs doesn't include omap entries and
254 * vice versa. The size and access characteristics of omap entries
255 * are very different from xattrs. In particular, the value portion
256 * of an omap entry can be quite large (MBs). More importantly, the
257 * interface must support efficient range queries on omap entries even
258 * when there are a large numbers of entries.
259 *
260 *********************************/
261
262 /*******************************
263 *
264 * Collections
265 *
266 * A collection is simply a grouping of objects. Collections have
267 * names (coll_t) and can be enumerated in order. Like an
268 * individual object, a collection also has a set of xattrs.
269 *
270 *
271 */
272
273
274 /*********************************
275 * transaction
276 *
277 * A Transaction represents a sequence of primitive mutation
278 * operations.
279 *
280 * Three events in the life of a Transaction result in
281 * callbacks. Any Transaction can contain any number of callback
282 * objects (Context) for any combination of the three classes of
283 * callbacks:
284 *
285 * on_applied_sync, on_applied, and on_commit.
286 *
287 * The "on_applied" and "on_applied_sync" callbacks are invoked when
288 * the modifications requested by the Transaction are visible to
289 * subsequent ObjectStore operations, i.e., the results are
290 * readable. The only conceptual difference between on_applied and
291 * on_applied_sync is the specific thread and locking environment in
292 * which the callbacks operate. "on_applied_sync" is called
293 * directly by an ObjectStore execution thread. It is expected to
294 * execute quickly and must not acquire any locks of the calling
295 * environment. Conversely, "on_applied" is called from the separate
296 * Finisher thread, meaning that it can contend for calling
297 * environment locks. NB, on_applied and on_applied_sync are
298 * sometimes called on_readable and on_readable_sync.
299 *
300 * The "on_commit" callback is also called from the Finisher thread
301 * and indicates that all of the mutations have been durably
302 * committed to stable storage (i.e., are now software/hardware
303 * crashproof).
304 *
305 * At the implementation level, each mutation primitive (and its
306 * associated data) can be serialized to a single buffer. That
307 * serialization, however, does not copy any data, but (using the
308 * bufferlist library) will reference the original buffers. This
309 * implies that the buffer that contains the data being submitted
310 * must remain stable until the on_commit callback completes. In
311 * practice, bufferlist handles all of this for you and this
312 * subtlety is only relevant if you are referencing an existing
313 * buffer via buffer::raw_static.
314 *
315 * Some implementations of ObjectStore choose to implement their own
316 * form of journaling that uses the serialized form of a
317 * Transaction. This requires that the encode/decode logic properly
318 * version itself and handle version upgrades that might change the
319 * format of the encoded Transaction. This has already happened a
320 * couple of times and the Transaction object contains some helper
321 * variables that aid in this legacy decoding:
322 *
323 * sobject_encoding detects an older/simpler version of oid
324 * present in pre-bobtail versions of ceph. use_pool_override
325 * also detects a situation where the pool of an oid can be
326 * override for legacy operations/buffers. For non-legacy
327 * implementation of ObjectStore, neither of these fields is
328 * relevant.
329 *
330 *
331 * TRANSACTION ISOLATION
332 *
333 * Except as noted below, isolation is the responsibility of the
334 * caller. In other words, if any storage element (storage element
335 * == any of the four portions of an object as described above) is
336 * altered by a transaction (including deletion), the caller
337 * promises not to attempt to read that element while the
338 * transaction is pending (here pending means from the time of
339 * issuance until the "on_applied_sync" callback has been
340 * received). Violations of isolation need not be detected by
341 * ObjectStore and there is no corresponding error mechanism for
342 * reporting an isolation violation (crashing would be the
343 * appropriate way to report an isolation violation if detected).
344 *
345 * Enumeration operations may violate transaction isolation as
346 * described above when a storage element is being created or
347 * deleted as part of a transaction. In this case, ObjectStore is
348 * allowed to consider the enumeration operation to either precede
349 * or follow the violating transaction element. In other words, the
350 * presence/absence of the mutated element in the enumeration is
351 * entirely at the discretion of ObjectStore. The arbitrary ordering
352 * applies independently to each transaction element. For example,
353 * if a transaction contains two mutating elements "create A" and
354 * "delete B". And an enumeration operation is performed while this
355 * transaction is pending. It is permissable for ObjectStore to
356 * report any of the four possible combinations of the existence of
357 * A and B.
358 *
359 */
360 class Transaction {
361 public:
362 enum {
363 OP_NOP = 0,
364 OP_TOUCH = 9, // cid, oid
365 OP_WRITE = 10, // cid, oid, offset, len, bl
366 OP_ZERO = 11, // cid, oid, offset, len
367 OP_TRUNCATE = 12, // cid, oid, len
368 OP_REMOVE = 13, // cid, oid
369 OP_SETATTR = 14, // cid, oid, attrname, bl
370 OP_SETATTRS = 15, // cid, oid, attrset
371 OP_RMATTR = 16, // cid, oid, attrname
372 OP_CLONE = 17, // cid, oid, newoid
373 OP_CLONERANGE = 18, // cid, oid, newoid, offset, len
374 OP_CLONERANGE2 = 30, // cid, oid, newoid, srcoff, len, dstoff
375
376 OP_TRIMCACHE = 19, // cid, oid, offset, len **DEPRECATED**
377
378 OP_MKCOLL = 20, // cid
379 OP_RMCOLL = 21, // cid
380 OP_COLL_ADD = 22, // cid, oldcid, oid
381 OP_COLL_REMOVE = 23, // cid, oid
382 OP_COLL_SETATTR = 24, // cid, attrname, bl
383 OP_COLL_RMATTR = 25, // cid, attrname
384 OP_COLL_SETATTRS = 26, // cid, attrset
385 OP_COLL_MOVE = 8, // newcid, oldcid, oid
386
387 OP_STARTSYNC = 27, // start a sync
388
389 OP_RMATTRS = 28, // cid, oid
390 OP_COLL_RENAME = 29, // cid, newcid
391
392 OP_OMAP_CLEAR = 31, // cid
393 OP_OMAP_SETKEYS = 32, // cid, attrset
394 OP_OMAP_RMKEYS = 33, // cid, keyset
395 OP_OMAP_SETHEADER = 34, // cid, header
396 OP_SPLIT_COLLECTION = 35, // cid, bits, destination
397 OP_SPLIT_COLLECTION2 = 36, /* cid, bits, destination
398 doesn't create the destination */
399 OP_OMAP_RMKEYRANGE = 37, // cid, oid, firstkey, lastkey
400 OP_COLL_MOVE_RENAME = 38, // oldcid, oldoid, newcid, newoid
401
402 OP_SETALLOCHINT = 39, // cid, oid, object_size, write_size
403 OP_COLL_HINT = 40, // cid, type, bl
404
405 OP_TRY_RENAME = 41, // oldcid, oldoid, newoid
406
407 OP_COLL_SET_BITS = 42, // cid, bits
408 };
409
410 // Transaction hint type
411 enum {
412 COLL_HINT_EXPECTED_NUM_OBJECTS = 1,
413 };
414
415 struct Op {
416 __le32 op;
417 __le32 cid;
418 __le32 oid;
419 __le64 off;
420 __le64 len;
421 __le32 dest_cid;
422 __le32 dest_oid; //OP_CLONE, OP_CLONERANGE
423 __le64 dest_off; //OP_CLONERANGE
424 union {
425 struct {
426 __le32 hint_type; //OP_COLL_HINT
427 };
428 struct {
429 __le32 alloc_hint_flags; //OP_SETALLOCHINT
430 };
431 };
432 __le64 expected_object_size; //OP_SETALLOCHINT
433 __le64 expected_write_size; //OP_SETALLOCHINT
434 __le32 split_bits; //OP_SPLIT_COLLECTION2,OP_COLL_SET_BITS,
435 //OP_MKCOLL
436 __le32 split_rem; //OP_SPLIT_COLLECTION2
437 } __attribute__ ((packed)) ;
438
439 struct TransactionData {
440 __le64 ops;
441 __le32 largest_data_len;
442 __le32 largest_data_off;
443 __le32 largest_data_off_in_data_bl;
444 __le32 fadvise_flags;
445
446 TransactionData() noexcept :
447 ops(0),
448 largest_data_len(0),
449 largest_data_off(0),
450 largest_data_off_in_data_bl(0),
451 fadvise_flags(0) { }
452
453 // override default move operations to reset default values
454 TransactionData(TransactionData&& other) noexcept :
455 ops(other.ops),
456 largest_data_len(other.largest_data_len),
457 largest_data_off(other.largest_data_off),
458 largest_data_off_in_data_bl(other.largest_data_off_in_data_bl),
459 fadvise_flags(other.fadvise_flags) {
460 other.ops = 0;
461 other.largest_data_len = 0;
462 other.largest_data_off = 0;
463 other.largest_data_off_in_data_bl = 0;
464 other.fadvise_flags = 0;
465 }
466 TransactionData& operator=(TransactionData&& other) noexcept {
467 ops = other.ops;
468 largest_data_len = other.largest_data_len;
469 largest_data_off = other.largest_data_off;
470 largest_data_off_in_data_bl = other.largest_data_off_in_data_bl;
471 fadvise_flags = other.fadvise_flags;
472 other.ops = 0;
473 other.largest_data_len = 0;
474 other.largest_data_off = 0;
475 other.largest_data_off_in_data_bl = 0;
476 other.fadvise_flags = 0;
477 return *this;
478 }
479
480 TransactionData(const TransactionData& other) = default;
481 TransactionData& operator=(const TransactionData& other) = default;
482
483 void encode(bufferlist& bl) const {
484 bl.append((char*)this, sizeof(TransactionData));
485 }
486 void decode(bufferlist::iterator &bl) {
487 bl.copy(sizeof(TransactionData), (char*)this);
488 }
489 } __attribute__ ((packed)) ;
490
491 private:
492 TransactionData data;
493
494 void *osr {nullptr}; // NULL on replay
495
496 map<coll_t, __le32> coll_index;
497 map<ghobject_t, __le32> object_index;
498
499 __le32 coll_id {0};
500 __le32 object_id {0};
501
502 bufferlist data_bl;
503 bufferlist op_bl;
504
505 bufferptr op_ptr;
506
507 list<Context *> on_applied;
508 list<Context *> on_commit;
509 list<Context *> on_applied_sync;
510
511 public:
512 Transaction() = default;
513
514 explicit Transaction(bufferlist::iterator &dp) {
515 decode(dp);
516 }
517 explicit Transaction(bufferlist &nbl) {
518 bufferlist::iterator dp = nbl.begin();
519 decode(dp);
520 }
521
522 // override default move operations to reset default values
523 Transaction(Transaction&& other) noexcept :
524 data(std::move(other.data)),
525 osr(other.osr),
526 coll_index(std::move(other.coll_index)),
527 object_index(std::move(other.object_index)),
528 coll_id(other.coll_id),
529 object_id(other.object_id),
530 data_bl(std::move(other.data_bl)),
531 op_bl(std::move(other.op_bl)),
532 op_ptr(std::move(other.op_ptr)),
533 on_applied(std::move(other.on_applied)),
534 on_commit(std::move(other.on_commit)),
535 on_applied_sync(std::move(other.on_applied_sync)) {
536 other.osr = nullptr;
537 other.coll_id = 0;
538 other.object_id = 0;
539 }
540
541 Transaction& operator=(Transaction&& other) noexcept {
542 data = std::move(other.data);
543 osr = other.osr;
544 coll_index = std::move(other.coll_index);
545 object_index = std::move(other.object_index);
546 coll_id = other.coll_id;
547 object_id = other.object_id;
548 data_bl = std::move(other.data_bl);
549 op_bl = std::move(other.op_bl);
550 op_ptr = std::move(other.op_ptr);
551 on_applied = std::move(other.on_applied);
552 on_commit = std::move(other.on_commit);
553 on_applied_sync = std::move(other.on_applied_sync);
554 other.osr = nullptr;
555 other.coll_id = 0;
556 other.object_id = 0;
557 return *this;
558 }
559
560 Transaction(const Transaction& other) = default;
561 Transaction& operator=(const Transaction& other) = default;
562
563 /* Operations on callback contexts */
564 void register_on_applied(Context *c) {
565 if (!c) return;
566 on_applied.push_back(c);
567 }
568 void register_on_commit(Context *c) {
569 if (!c) return;
570 on_commit.push_back(c);
571 }
572 void register_on_applied_sync(Context *c) {
573 if (!c) return;
574 on_applied_sync.push_back(c);
575 }
576 void register_on_complete(Context *c) {
577 if (!c) return;
578 RunOnDeleteRef _complete (std::make_shared<RunOnDelete>(c));
579 register_on_applied(new ContainerContext<RunOnDeleteRef>(_complete));
580 register_on_commit(new ContainerContext<RunOnDeleteRef>(_complete));
581 }
582
583 static void collect_contexts(
584 vector<Transaction>& t,
585 Context **out_on_applied,
586 Context **out_on_commit,
587 Context **out_on_applied_sync) {
588 assert(out_on_applied);
589 assert(out_on_commit);
590 assert(out_on_applied_sync);
591 list<Context *> on_applied, on_commit, on_applied_sync;
592 for (vector<Transaction>::iterator i = t.begin();
593 i != t.end();
594 ++i) {
595 on_applied.splice(on_applied.end(), (*i).on_applied);
596 on_commit.splice(on_commit.end(), (*i).on_commit);
597 on_applied_sync.splice(on_applied_sync.end(), (*i).on_applied_sync);
598 }
599 *out_on_applied = C_Contexts::list_to_context(on_applied);
600 *out_on_commit = C_Contexts::list_to_context(on_commit);
601 *out_on_applied_sync = C_Contexts::list_to_context(on_applied_sync);
602 }
603
604 Context *get_on_applied() {
605 return C_Contexts::list_to_context(on_applied);
606 }
607 Context *get_on_commit() {
608 return C_Contexts::list_to_context(on_commit);
609 }
610 Context *get_on_applied_sync() {
611 return C_Contexts::list_to_context(on_applied_sync);
612 }
613
614 void set_fadvise_flags(uint32_t flags) {
615 data.fadvise_flags = flags;
616 }
617 void set_fadvise_flag(uint32_t flag) {
618 data.fadvise_flags = data.fadvise_flags | flag;
619 }
620 uint32_t get_fadvise_flags() { return data.fadvise_flags; }
621
622 void swap(Transaction& other) noexcept {
623 std::swap(data, other.data);
624 std::swap(on_applied, other.on_applied);
625 std::swap(on_commit, other.on_commit);
626 std::swap(on_applied_sync, other.on_applied_sync);
627
628 std::swap(coll_index, other.coll_index);
629 std::swap(object_index, other.object_index);
630 std::swap(coll_id, other.coll_id);
631 std::swap(object_id, other.object_id);
632 op_bl.swap(other.op_bl);
633 data_bl.swap(other.data_bl);
634 }
635
636 void _update_op(Op* op,
637 vector<__le32> &cm,
638 vector<__le32> &om) {
639
640 switch (op->op) {
641 case OP_NOP:
642 case OP_STARTSYNC:
643 break;
644
645 case OP_TOUCH:
646 case OP_REMOVE:
647 case OP_SETATTR:
648 case OP_SETATTRS:
649 case OP_RMATTR:
650 case OP_RMATTRS:
651 case OP_COLL_REMOVE:
652 case OP_OMAP_CLEAR:
653 case OP_OMAP_SETKEYS:
654 case OP_OMAP_RMKEYS:
655 case OP_OMAP_RMKEYRANGE:
656 case OP_OMAP_SETHEADER:
657 case OP_WRITE:
658 case OP_ZERO:
659 case OP_TRUNCATE:
660 case OP_SETALLOCHINT:
661 assert(op->cid < cm.size());
662 assert(op->oid < om.size());
663 op->cid = cm[op->cid];
664 op->oid = om[op->oid];
665 break;
666
667 case OP_CLONERANGE2:
668 case OP_CLONE:
669 assert(op->cid < cm.size());
670 assert(op->oid < om.size());
671 assert(op->dest_oid < om.size());
672 op->cid = cm[op->cid];
673 op->oid = om[op->oid];
674 op->dest_oid = om[op->dest_oid];
675 break;
676
677 case OP_MKCOLL:
678 case OP_RMCOLL:
679 case OP_COLL_SETATTR:
680 case OP_COLL_RMATTR:
681 case OP_COLL_SETATTRS:
682 case OP_COLL_HINT:
683 case OP_COLL_SET_BITS:
684 assert(op->cid < cm.size());
685 op->cid = cm[op->cid];
686 break;
687
688 case OP_COLL_ADD:
689 assert(op->cid < cm.size());
690 assert(op->oid < om.size());
691 assert(op->dest_cid < om.size());
692 op->cid = cm[op->cid];
693 op->dest_cid = cm[op->dest_cid];
694 op->oid = om[op->oid];
695 break;
696
697 case OP_COLL_MOVE_RENAME:
698 assert(op->cid < cm.size());
699 assert(op->oid < om.size());
700 assert(op->dest_cid < cm.size());
701 assert(op->dest_oid < om.size());
702 op->cid = cm[op->cid];
703 op->oid = om[op->oid];
704 op->dest_cid = cm[op->dest_cid];
705 op->dest_oid = om[op->dest_oid];
706 break;
707
708 case OP_TRY_RENAME:
709 assert(op->cid < cm.size());
710 assert(op->oid < om.size());
711 assert(op->dest_oid < om.size());
712 op->cid = cm[op->cid];
713 op->oid = om[op->oid];
714 op->dest_oid = om[op->dest_oid];
715 break;
716
717 case OP_SPLIT_COLLECTION2:
718 assert(op->cid < cm.size());
719 assert(op->dest_cid < cm.size());
720 op->cid = cm[op->cid];
721 op->dest_cid = cm[op->dest_cid];
722 break;
723
724 default:
725 assert(0 == "Unkown OP");
726 }
727 }
728 void _update_op_bl(
729 bufferlist& bl,
730 vector<__le32> &cm,
731 vector<__le32> &om) {
732
733 list<bufferptr> list = bl.buffers();
734 std::list<bufferptr>::iterator p;
735
736 for(p = list.begin(); p != list.end(); ++p) {
737 assert(p->length() % sizeof(Op) == 0);
738
739 char* raw_p = p->c_str();
740 char* raw_end = raw_p + p->length();
741 while (raw_p < raw_end) {
742 _update_op(reinterpret_cast<Op*>(raw_p), cm, om);
743 raw_p += sizeof(Op);
744 }
745 }
746 }
747 /// Append the operations of the parameter to this Transaction. Those operations are removed from the parameter Transaction
748 void append(Transaction& other) {
749
750 data.ops += other.data.ops;
751 if (other.data.largest_data_len > data.largest_data_len) {
752 data.largest_data_len = other.data.largest_data_len;
753 data.largest_data_off = other.data.largest_data_off;
754 data.largest_data_off_in_data_bl = data_bl.length() + other.data.largest_data_off_in_data_bl;
755 }
756 data.fadvise_flags |= other.data.fadvise_flags;
757 on_applied.splice(on_applied.end(), other.on_applied);
758 on_commit.splice(on_commit.end(), other.on_commit);
759 on_applied_sync.splice(on_applied_sync.end(), other.on_applied_sync);
760
761 //append coll_index & object_index
762 vector<__le32> cm(other.coll_index.size());
763 map<coll_t, __le32>::iterator coll_index_p;
764 for (coll_index_p = other.coll_index.begin();
765 coll_index_p != other.coll_index.end();
766 ++coll_index_p) {
767 cm[coll_index_p->second] = _get_coll_id(coll_index_p->first);
768 }
769
770 vector<__le32> om(other.object_index.size());
771 map<ghobject_t, __le32>::iterator object_index_p;
772 for (object_index_p = other.object_index.begin();
773 object_index_p != other.object_index.end();
774 ++object_index_p) {
775 om[object_index_p->second] = _get_object_id(object_index_p->first);
776 }
777
778 //the other.op_bl SHOULD NOT be changes during append operation,
779 //we use additional bufferlist to avoid this problem
780 bufferptr other_op_bl_ptr(other.op_bl.length());
781 other.op_bl.copy(0, other.op_bl.length(), other_op_bl_ptr.c_str());
782 bufferlist other_op_bl;
783 other_op_bl.append(other_op_bl_ptr);
784
785 //update other_op_bl with cm & om
786 //When the other is appended to current transaction, all coll_index and
787 //object_index in other.op_buffer should be updated by new index of the
788 //combined transaction
789 _update_op_bl(other_op_bl, cm, om);
790
791 //append op_bl
792 op_bl.append(other_op_bl);
793 //append data_bl
794 data_bl.append(other.data_bl);
795 }
796
797 /** Inquires about the Transaction as a whole. */
798
799 /// How big is the encoded Transaction buffer?
800 uint64_t get_encoded_bytes() {
801 //layout: data_bl + op_bl + coll_index + object_index + data
802
803 // coll_index size, object_index size and sizeof(transaction_data)
804 // all here, so they may be computed at compile-time
805 size_t final_size = sizeof(__u32) * 2 + sizeof(data);
806
807 // coll_index second and object_index second
808 final_size += (coll_index.size() + object_index.size()) * sizeof(__le32);
809
810 // coll_index first
811 for (auto p = coll_index.begin(); p != coll_index.end(); ++p) {
812 final_size += p->first.encoded_size();
813 }
814
815 // object_index first
816 for (auto p = object_index.begin(); p != object_index.end(); ++p) {
817 final_size += p->first.encoded_size();
818 }
819
820 return data_bl.length() +
821 op_bl.length() +
822 final_size;
823 }
824
825 /// Retain old version for regression testing purposes
826 uint64_t get_encoded_bytes_test() {
827 //layout: data_bl + op_bl + coll_index + object_index + data
828 bufferlist bl;
829 ::encode(coll_index, bl);
830 ::encode(object_index, bl);
831
832 return data_bl.length() +
833 op_bl.length() +
834 bl.length() +
835 sizeof(data);
836 }
837
838 uint64_t get_num_bytes() {
839 return get_encoded_bytes();
840 }
841 /// Size of largest data buffer to the "write" operation encountered so far
842 uint32_t get_data_length() {
843 return data.largest_data_len;
844 }
845 /// offset within the encoded buffer to the start of the largest data buffer that's encoded
846 uint32_t get_data_offset() {
847 if (data.largest_data_off_in_data_bl) {
848 return data.largest_data_off_in_data_bl +
849 sizeof(__u8) + // encode struct_v
850 sizeof(__u8) + // encode compat_v
851 sizeof(__u32) + // encode len
852 sizeof(__u32); // data_bl len
853 }
854 return 0; // none
855 }
856 /// offset of buffer as aligned to destination within object.
857 int get_data_alignment() {
858 if (!data.largest_data_len)
859 return -1;
860 return (0 - get_data_offset()) & ~CEPH_PAGE_MASK;
861 }
862 /// Is the Transaction empty (no operations)
863 bool empty() {
864 return !data.ops;
865 }
866 /// Number of operations in the transation
867 int get_num_ops() {
868 return data.ops;
869 }
870
871 void set_osr(void *s) {
872 osr = s;
873 }
874
875 void *get_osr() {
876 return osr;
877 }
878
879 /**
880 * iterator
881 *
882 * Helper object to parse Transactions.
883 *
884 * ObjectStore instances use this object to step down the encoded
885 * buffer decoding operation codes and parameters as we go.
886 *
887 */
888 class iterator {
889 Transaction *t;
890
891 uint64_t ops;
892 char* op_buffer_p;
893
894 bufferlist::iterator data_bl_p;
895
896 public:
897 vector<coll_t> colls;
898 vector<ghobject_t> objects;
899
900 private:
901 explicit iterator(Transaction *t)
902 : t(t),
903 data_bl_p(t->data_bl.begin()),
904 colls(t->coll_index.size()),
905 objects(t->object_index.size()) {
906
907 ops = t->data.ops;
908 op_buffer_p = t->op_bl.get_contiguous(0, t->data.ops * sizeof(Op));
909
910 map<coll_t, __le32>::iterator coll_index_p;
911 for (coll_index_p = t->coll_index.begin();
912 coll_index_p != t->coll_index.end();
913 ++coll_index_p) {
914 colls[coll_index_p->second] = coll_index_p->first;
915 }
916
917 map<ghobject_t, __le32>::iterator object_index_p;
918 for (object_index_p = t->object_index.begin();
919 object_index_p != t->object_index.end();
920 ++object_index_p) {
921 objects[object_index_p->second] = object_index_p->first;
922 }
923 }
924
925 friend class Transaction;
926
927 public:
928
929 bool have_op() {
930 return ops > 0;
931 }
932 Op* decode_op() {
933 assert(ops > 0);
934
935 Op* op = reinterpret_cast<Op*>(op_buffer_p);
936 op_buffer_p += sizeof(Op);
937 ops--;
938
939 return op;
940 }
941 string decode_string() {
942 string s;
943 ::decode(s, data_bl_p);
944 return s;
945 }
946 void decode_bp(bufferptr& bp) {
947 ::decode(bp, data_bl_p);
948 }
949 void decode_bl(bufferlist& bl) {
950 ::decode(bl, data_bl_p);
951 }
952 void decode_attrset(map<string,bufferptr>& aset) {
953 ::decode(aset, data_bl_p);
954 }
955 void decode_attrset(map<string,bufferlist>& aset) {
956 ::decode(aset, data_bl_p);
957 }
958 void decode_attrset_bl(bufferlist *pbl) {
959 decode_str_str_map_to_bl(data_bl_p, pbl);
960 }
961 void decode_keyset(set<string> &keys){
962 ::decode(keys, data_bl_p);
963 }
964 void decode_keyset_bl(bufferlist *pbl){
965 decode_str_set_to_bl(data_bl_p, pbl);
966 }
967
968 const ghobject_t &get_oid(__le32 oid_id) {
969 assert(oid_id < objects.size());
970 return objects[oid_id];
971 }
972 const coll_t &get_cid(__le32 cid_id) {
973 assert(cid_id < colls.size());
974 return colls[cid_id];
975 }
976 uint32_t get_fadvise_flags() const {
977 return t->get_fadvise_flags();
978 }
979 };
980
981 iterator begin() {
982 return iterator(this);
983 }
984
985 private:
986 void _build_actions_from_tbl();
987
988 /**
989 * Helper functions to encode the various mutation elements of a
990 * transaction. These are 1:1 with the operation codes (see
991 * enumeration above). These routines ensure that the
992 * encoder/creator of a transaction gets the right data in the
993 * right place. Sadly, there's no corresponding version nor any
994 * form of seat belts for the decoder.
995 */
996 Op* _get_next_op() {
997 if (op_ptr.length() == 0 || op_ptr.offset() >= op_ptr.length()) {
998 op_ptr = bufferptr(sizeof(Op) * OPS_PER_PTR);
999 }
1000 bufferptr ptr(op_ptr, 0, sizeof(Op));
1001 op_bl.append(ptr);
1002
1003 op_ptr.set_offset(op_ptr.offset() + sizeof(Op));
1004
1005 char* p = ptr.c_str();
1006 memset(p, 0, sizeof(Op));
1007 return reinterpret_cast<Op*>(p);
1008 }
1009 __le32 _get_coll_id(const coll_t& coll) {
1010 map<coll_t, __le32>::iterator c = coll_index.find(coll);
1011 if (c != coll_index.end())
1012 return c->second;
1013
1014 __le32 index_id = coll_id++;
1015 coll_index[coll] = index_id;
1016 return index_id;
1017 }
1018 __le32 _get_object_id(const ghobject_t& oid) {
1019 map<ghobject_t, __le32>::iterator o = object_index.find(oid);
1020 if (o != object_index.end())
1021 return o->second;
1022
1023 __le32 index_id = object_id++;
1024 object_index[oid] = index_id;
1025 return index_id;
1026 }
1027
1028 public:
1029 /// Commence a global file system sync operation.
1030 void start_sync() {
1031 Op* _op = _get_next_op();
1032 _op->op = OP_STARTSYNC;
1033 data.ops++;
1034 }
1035 /// noop. 'nuf said
1036 void nop() {
1037 Op* _op = _get_next_op();
1038 _op->op = OP_NOP;
1039 data.ops++;
1040 }
1041 /**
1042 * touch
1043 *
1044 * Ensure the existance of an object in a collection. Create an
1045 * empty object if necessary
1046 */
1047 void touch(const coll_t& cid, const ghobject_t& oid) {
1048 Op* _op = _get_next_op();
1049 _op->op = OP_TOUCH;
1050 _op->cid = _get_coll_id(cid);
1051 _op->oid = _get_object_id(oid);
1052 data.ops++;
1053 }
1054 /**
1055 * Write data to an offset within an object. If the object is too
1056 * small, it is expanded as needed. It is possible to specify an
1057 * offset beyond the current end of an object and it will be
1058 * expanded as needed. Simple implementations of ObjectStore will
1059 * just zero the data between the old end of the object and the
1060 * newly provided data. More sophisticated implementations of
1061 * ObjectStore will omit the untouched data and store it as a
1062 * "hole" in the file.
1063 *
1064 * Note that a 0-length write does not affect the size of the object.
1065 */
1066 void write(const coll_t& cid, const ghobject_t& oid, uint64_t off, uint64_t len,
1067 const bufferlist& write_data, uint32_t flags = 0) {
1068 uint32_t orig_len = data_bl.length();
1069 Op* _op = _get_next_op();
1070 _op->op = OP_WRITE;
1071 _op->cid = _get_coll_id(cid);
1072 _op->oid = _get_object_id(oid);
1073 _op->off = off;
1074 _op->len = len;
1075 ::encode(write_data, data_bl);
1076
1077 assert(len == write_data.length());
1078 data.fadvise_flags = data.fadvise_flags | flags;
1079 if (write_data.length() > data.largest_data_len) {
1080 data.largest_data_len = write_data.length();
1081 data.largest_data_off = off;
1082 data.largest_data_off_in_data_bl = orig_len + sizeof(__u32); // we are about to
1083 }
1084 data.ops++;
1085 }
1086 /**
1087 * zero out the indicated byte range within an object. Some
1088 * ObjectStore instances may optimize this to release the
1089 * underlying storage space.
1090 *
1091 * If the zero range extends beyond the end of the object, the object
1092 * size is extended, just as if we were writing a buffer full of zeros.
1093 * EXCEPT if the length is 0, in which case (just like a 0-length write)
1094 * we do not adjust the object size.
1095 */
1096 void zero(const coll_t& cid, const ghobject_t& oid, uint64_t off, uint64_t len) {
1097 Op* _op = _get_next_op();
1098 _op->op = OP_ZERO;
1099 _op->cid = _get_coll_id(cid);
1100 _op->oid = _get_object_id(oid);
1101 _op->off = off;
1102 _op->len = len;
1103 data.ops++;
1104 }
1105 /// Discard all data in the object beyond the specified size.
1106 void truncate(const coll_t& cid, const ghobject_t& oid, uint64_t off) {
1107 Op* _op = _get_next_op();
1108 _op->op = OP_TRUNCATE;
1109 _op->cid = _get_coll_id(cid);
1110 _op->oid = _get_object_id(oid);
1111 _op->off = off;
1112 data.ops++;
1113 }
1114 /// Remove an object. All four parts of the object are removed.
1115 void remove(const coll_t& cid, const ghobject_t& oid) {
1116 Op* _op = _get_next_op();
1117 _op->op = OP_REMOVE;
1118 _op->cid = _get_coll_id(cid);
1119 _op->oid = _get_object_id(oid);
1120 data.ops++;
1121 }
1122 /// Set an xattr of an object
1123 void setattr(const coll_t& cid, const ghobject_t& oid, const char* name, bufferlist& val) {
1124 string n(name);
1125 setattr(cid, oid, n, val);
1126 }
1127 /// Set an xattr of an object
1128 void setattr(const coll_t& cid, const ghobject_t& oid, const string& s, bufferlist& val) {
1129 Op* _op = _get_next_op();
1130 _op->op = OP_SETATTR;
1131 _op->cid = _get_coll_id(cid);
1132 _op->oid = _get_object_id(oid);
1133 ::encode(s, data_bl);
1134 ::encode(val, data_bl);
1135 data.ops++;
1136 }
1137 /// Set multiple xattrs of an object
1138 void setattrs(const coll_t& cid, const ghobject_t& oid, const map<string,bufferptr>& attrset) {
1139 Op* _op = _get_next_op();
1140 _op->op = OP_SETATTRS;
1141 _op->cid = _get_coll_id(cid);
1142 _op->oid = _get_object_id(oid);
1143 ::encode(attrset, data_bl);
1144 data.ops++;
1145 }
1146 /// Set multiple xattrs of an object
1147 void setattrs(const coll_t& cid, const ghobject_t& oid, const map<string,bufferlist>& attrset) {
1148 Op* _op = _get_next_op();
1149 _op->op = OP_SETATTRS;
1150 _op->cid = _get_coll_id(cid);
1151 _op->oid = _get_object_id(oid);
1152 ::encode(attrset, data_bl);
1153 data.ops++;
1154 }
1155 /// remove an xattr from an object
1156 void rmattr(const coll_t& cid, const ghobject_t& oid, const char *name) {
1157 string n(name);
1158 rmattr(cid, oid, n);
1159 }
1160 /// remove an xattr from an object
1161 void rmattr(const coll_t& cid, const ghobject_t& oid, const string& s) {
1162 Op* _op = _get_next_op();
1163 _op->op = OP_RMATTR;
1164 _op->cid = _get_coll_id(cid);
1165 _op->oid = _get_object_id(oid);
1166 ::encode(s, data_bl);
1167 data.ops++;
1168 }
1169 /// remove all xattrs from an object
1170 void rmattrs(const coll_t& cid, const ghobject_t& oid) {
1171 Op* _op = _get_next_op();
1172 _op->op = OP_RMATTRS;
1173 _op->cid = _get_coll_id(cid);
1174 _op->oid = _get_object_id(oid);
1175 data.ops++;
1176 }
1177 /**
1178 * Clone an object into another object.
1179 *
1180 * Low-cost (e.g., O(1)) cloning (if supported) is best, but
1181 * fallback to an O(n) copy is allowed. All four parts of the
1182 * object are cloned (data, xattrs, omap header, omap
1183 * entries).
1184 *
1185 * The destination named object may already exist, in
1186 * which case its previous contents are discarded.
1187 */
1188 void clone(const coll_t& cid, const ghobject_t& oid,
1189 const ghobject_t& noid) {
1190 Op* _op = _get_next_op();
1191 _op->op = OP_CLONE;
1192 _op->cid = _get_coll_id(cid);
1193 _op->oid = _get_object_id(oid);
1194 _op->dest_oid = _get_object_id(noid);
1195 data.ops++;
1196 }
1197 /**
1198 * Clone a byte range from one object to another.
1199 *
1200 * The data portion of the destination object receives a copy of a
1201 * portion of the data from the source object. None of the other
1202 * three parts of an object is copied from the source.
1203 *
1204 * The destination object size may be extended to the dstoff + len.
1205 *
1206 * The source range *must* overlap with the source object data. If it does
1207 * not the result is undefined.
1208 */
1209 void clone_range(const coll_t& cid, const ghobject_t& oid,
1210 const ghobject_t& noid,
1211 uint64_t srcoff, uint64_t srclen, uint64_t dstoff) {
1212 Op* _op = _get_next_op();
1213 _op->op = OP_CLONERANGE2;
1214 _op->cid = _get_coll_id(cid);
1215 _op->oid = _get_object_id(oid);
1216 _op->dest_oid = _get_object_id(noid);
1217 _op->off = srcoff;
1218 _op->len = srclen;
1219 _op->dest_off = dstoff;
1220 data.ops++;
1221 }
1222
1223 /// Create the collection
1224 void create_collection(const coll_t& cid, int bits) {
1225 Op* _op = _get_next_op();
1226 _op->op = OP_MKCOLL;
1227 _op->cid = _get_coll_id(cid);
1228 _op->split_bits = bits;
1229 data.ops++;
1230 }
1231
1232 /**
1233 * Give the collection a hint.
1234 *
1235 * @param cid - collection id.
1236 * @param type - hint type.
1237 * @param hint - the hint payload, which contains the customized
1238 * data along with the hint type.
1239 */
1240 void collection_hint(const coll_t& cid, uint32_t type, const bufferlist& hint) {
1241 Op* _op = _get_next_op();
1242 _op->op = OP_COLL_HINT;
1243 _op->cid = _get_coll_id(cid);
1244 _op->hint_type = type;
1245 ::encode(hint, data_bl);
1246 data.ops++;
1247 }
1248
1249 /// remove the collection, the collection must be empty
1250 void remove_collection(const coll_t& cid) {
1251 Op* _op = _get_next_op();
1252 _op->op = OP_RMCOLL;
1253 _op->cid = _get_coll_id(cid);
1254 data.ops++;
1255 }
1256 void collection_move(const coll_t& cid, coll_t oldcid, const ghobject_t& oid)
1257 __attribute__ ((deprecated)) {
1258 // NOTE: we encode this as a fixed combo of ADD + REMOVE. they
1259 // always appear together, so this is effectively a single MOVE.
1260 Op* _op = _get_next_op();
1261 _op->op = OP_COLL_ADD;
1262 _op->cid = _get_coll_id(oldcid);
1263 _op->oid = _get_object_id(oid);
1264 _op->dest_cid = _get_coll_id(cid);
1265 data.ops++;
1266
1267 _op = _get_next_op();
1268 _op->op = OP_COLL_REMOVE;
1269 _op->cid = _get_coll_id(oldcid);
1270 _op->oid = _get_object_id(oid);
1271 data.ops++;
1272 }
1273 void collection_move_rename(const coll_t& oldcid, const ghobject_t& oldoid,
1274 coll_t cid, const ghobject_t& oid) {
1275 Op* _op = _get_next_op();
1276 _op->op = OP_COLL_MOVE_RENAME;
1277 _op->cid = _get_coll_id(oldcid);
1278 _op->oid = _get_object_id(oldoid);
1279 _op->dest_cid = _get_coll_id(cid);
1280 _op->dest_oid = _get_object_id(oid);
1281 data.ops++;
1282 }
1283 void try_rename(coll_t cid, const ghobject_t& oldoid,
1284 const ghobject_t& oid) {
1285 Op* _op = _get_next_op();
1286 _op->op = OP_TRY_RENAME;
1287 _op->cid = _get_coll_id(cid);
1288 _op->oid = _get_object_id(oldoid);
1289 _op->dest_oid = _get_object_id(oid);
1290 data.ops++;
1291 }
1292
1293 /// Remove omap from oid
1294 void omap_clear(
1295 coll_t cid, ///< [in] Collection containing oid
1296 const ghobject_t &oid ///< [in] Object from which to remove omap
1297 ) {
1298 Op* _op = _get_next_op();
1299 _op->op = OP_OMAP_CLEAR;
1300 _op->cid = _get_coll_id(cid);
1301 _op->oid = _get_object_id(oid);
1302 data.ops++;
1303 }
1304 /// Set keys on oid omap. Replaces duplicate keys.
1305 void omap_setkeys(
1306 const coll_t& cid, ///< [in] Collection containing oid
1307 const ghobject_t &oid, ///< [in] Object to update
1308 const map<string, bufferlist> &attrset ///< [in] Replacement keys and values
1309 ) {
1310 Op* _op = _get_next_op();
1311 _op->op = OP_OMAP_SETKEYS;
1312 _op->cid = _get_coll_id(cid);
1313 _op->oid = _get_object_id(oid);
1314 ::encode(attrset, data_bl);
1315 data.ops++;
1316 }
1317
1318 /// Set keys on an oid omap (bufferlist variant).
1319 void omap_setkeys(
1320 coll_t cid, ///< [in] Collection containing oid
1321 const ghobject_t &oid, ///< [in] Object to update
1322 const bufferlist &attrset_bl ///< [in] Replacement keys and values
1323 ) {
1324 Op* _op = _get_next_op();
1325 _op->op = OP_OMAP_SETKEYS;
1326 _op->cid = _get_coll_id(cid);
1327 _op->oid = _get_object_id(oid);
1328 data_bl.append(attrset_bl);
1329 data.ops++;
1330 }
1331
1332 /// Remove keys from oid omap
1333 void omap_rmkeys(
1334 coll_t cid, ///< [in] Collection containing oid
1335 const ghobject_t &oid, ///< [in] Object from which to remove the omap
1336 const set<string> &keys ///< [in] Keys to clear
1337 ) {
1338 Op* _op = _get_next_op();
1339 _op->op = OP_OMAP_RMKEYS;
1340 _op->cid = _get_coll_id(cid);
1341 _op->oid = _get_object_id(oid);
1342 ::encode(keys, data_bl);
1343 data.ops++;
1344 }
1345
1346 /// Remove keys from oid omap
1347 void omap_rmkeys(
1348 coll_t cid, ///< [in] Collection containing oid
1349 const ghobject_t &oid, ///< [in] Object from which to remove the omap
1350 const bufferlist &keys_bl ///< [in] Keys to clear
1351 ) {
1352 Op* _op = _get_next_op();
1353 _op->op = OP_OMAP_RMKEYS;
1354 _op->cid = _get_coll_id(cid);
1355 _op->oid = _get_object_id(oid);
1356 data_bl.append(keys_bl);
1357 data.ops++;
1358 }
1359
1360 /// Remove key range from oid omap
1361 void omap_rmkeyrange(
1362 coll_t cid, ///< [in] Collection containing oid
1363 const ghobject_t &oid, ///< [in] Object from which to remove the omap keys
1364 const string& first, ///< [in] first key in range
1365 const string& last ///< [in] first key past range, range is [first,last)
1366 ) {
1367 Op* _op = _get_next_op();
1368 _op->op = OP_OMAP_RMKEYRANGE;
1369 _op->cid = _get_coll_id(cid);
1370 _op->oid = _get_object_id(oid);
1371 ::encode(first, data_bl);
1372 ::encode(last, data_bl);
1373 data.ops++;
1374 }
1375
1376 /// Set omap header
1377 void omap_setheader(
1378 coll_t cid, ///< [in] Collection containing oid
1379 const ghobject_t &oid, ///< [in] Object
1380 const bufferlist &bl ///< [in] Header value
1381 ) {
1382 Op* _op = _get_next_op();
1383 _op->op = OP_OMAP_SETHEADER;
1384 _op->cid = _get_coll_id(cid);
1385 _op->oid = _get_object_id(oid);
1386 ::encode(bl, data_bl);
1387 data.ops++;
1388 }
1389
1390 /// Split collection based on given prefixes, objects matching the specified bits/rem are
1391 /// moved to the new collection
1392 void split_collection(
1393 coll_t cid,
1394 uint32_t bits,
1395 uint32_t rem,
1396 coll_t destination) {
1397 Op* _op = _get_next_op();
1398 _op->op = OP_SPLIT_COLLECTION2;
1399 _op->cid = _get_coll_id(cid);
1400 _op->dest_cid = _get_coll_id(destination);
1401 _op->split_bits = bits;
1402 _op->split_rem = rem;
1403 data.ops++;
1404 }
1405
1406 void collection_set_bits(
1407 coll_t cid,
1408 int bits) {
1409 Op* _op = _get_next_op();
1410 _op->op = OP_COLL_SET_BITS;
1411 _op->cid = _get_coll_id(cid);
1412 _op->split_bits = bits;
1413 data.ops++;
1414 }
1415
1416 /// Set allocation hint for an object
1417 /// make 0 values(expected_object_size, expected_write_size) noops for all implementations
1418 void set_alloc_hint(
1419 coll_t cid,
1420 const ghobject_t &oid,
1421 uint64_t expected_object_size,
1422 uint64_t expected_write_size,
1423 uint32_t flags
1424 ) {
1425 Op* _op = _get_next_op();
1426 _op->op = OP_SETALLOCHINT;
1427 _op->cid = _get_coll_id(cid);
1428 _op->oid = _get_object_id(oid);
1429 _op->expected_object_size = expected_object_size;
1430 _op->expected_write_size = expected_write_size;
1431 _op->alloc_hint_flags = flags;
1432 data.ops++;
1433 }
1434
1435 void encode(bufferlist& bl) const {
1436 //layout: data_bl + op_bl + coll_index + object_index + data
1437 ENCODE_START(9, 9, bl);
1438 ::encode(data_bl, bl);
1439 ::encode(op_bl, bl);
1440 ::encode(coll_index, bl);
1441 ::encode(object_index, bl);
1442 data.encode(bl);
1443 ENCODE_FINISH(bl);
1444 }
1445
1446 void decode(bufferlist::iterator &bl) {
1447 DECODE_START(9, bl);
1448 DECODE_OLDEST(9);
1449
1450 ::decode(data_bl, bl);
1451 ::decode(op_bl, bl);
1452 ::decode(coll_index, bl);
1453 ::decode(object_index, bl);
1454 data.decode(bl);
1455 coll_id = coll_index.size();
1456 object_id = object_index.size();
1457
1458 DECODE_FINISH(bl);
1459 }
1460
1461 void dump(ceph::Formatter *f);
1462 static void generate_test_instances(list<Transaction*>& o);
1463 };
1464
1465 // synchronous wrappers
1466 unsigned apply_transaction(Sequencer *osr, Transaction&& t, Context *ondisk=0) {
1467 vector<Transaction> tls;
1468 tls.push_back(std::move(t));
1469 return apply_transactions(osr, tls, ondisk);
1470 }
1471 unsigned apply_transactions(Sequencer *osr, vector<Transaction>& tls, Context *ondisk=0);
1472
1473 int queue_transaction(Sequencer *osr, Transaction&& t, Context *onreadable, Context *ondisk=0,
1474 Context *onreadable_sync=0,
1475 TrackedOpRef op = TrackedOpRef(),
1476 ThreadPool::TPHandle *handle = NULL) {
1477 vector<Transaction> tls;
1478 tls.push_back(std::move(t));
1479 return queue_transactions(osr, tls, onreadable, ondisk, onreadable_sync,
1480 op, handle);
1481 }
1482
1483 int queue_transactions(Sequencer *osr, vector<Transaction>& tls,
1484 Context *onreadable, Context *ondisk=0,
1485 Context *onreadable_sync=0,
1486 TrackedOpRef op = TrackedOpRef(),
1487 ThreadPool::TPHandle *handle = NULL) {
1488 assert(!tls.empty());
1489 tls.back().register_on_applied(onreadable);
1490 tls.back().register_on_commit(ondisk);
1491 tls.back().register_on_applied_sync(onreadable_sync);
1492 return queue_transactions(osr, tls, op, handle);
1493 }
1494
1495 virtual int queue_transactions(
1496 Sequencer *osr, vector<Transaction>& tls,
1497 TrackedOpRef op = TrackedOpRef(),
1498 ThreadPool::TPHandle *handle = NULL) = 0;
1499
1500
1501 int queue_transactions(
1502 Sequencer *osr,
1503 vector<Transaction>& tls,
1504 Context *onreadable,
1505 Context *oncommit,
1506 Context *onreadable_sync,
1507 Context *oncomplete,
1508 TrackedOpRef op);
1509
1510 int queue_transaction(
1511 Sequencer *osr,
1512 Transaction&& t,
1513 Context *onreadable,
1514 Context *oncommit,
1515 Context *onreadable_sync,
1516 Context *oncomplete,
1517 TrackedOpRef op) {
1518
1519 vector<Transaction> tls;
1520 tls.push_back(std::move(t));
1521 return queue_transactions(
1522 osr, tls, onreadable, oncommit, onreadable_sync, oncomplete, op);
1523 }
1524
1525 public:
1526 ObjectStore(CephContext* cct,
1527 const std::string& path_) : path(path_), cct(cct) {}
1528 virtual ~ObjectStore() {}
1529
1530 // no copying
1531 explicit ObjectStore(const ObjectStore& o) = delete;
1532 const ObjectStore& operator=(const ObjectStore& o) = delete;
1533
1534 // versioning
1535 virtual int upgrade() {
1536 return 0;
1537 }
1538
1539 virtual void get_db_statistics(Formatter *f) { }
1540 virtual void generate_db_histogram(Formatter *f) { }
1541 virtual void flush_cache() { }
1542 virtual void dump_perf_counters(Formatter *f) {}
1543
1544 virtual string get_type() = 0;
1545
1546 // mgmt
1547 virtual bool test_mount_in_use() = 0;
1548 virtual int mount() = 0;
1549 virtual int umount() = 0;
1550 virtual int fsck(bool deep) {
1551 return -EOPNOTSUPP;
1552 }
1553 virtual int repair(bool deep) {
1554 return -EOPNOTSUPP;
1555 }
1556
1557 virtual void set_cache_shards(unsigned num) { }
1558
1559 /**
1560 * Returns 0 if the hobject is valid, -error otherwise
1561 *
1562 * Errors:
1563 * -ENAMETOOLONG: locator/namespace/name too large
1564 */
1565 virtual int validate_hobject_key(const hobject_t &obj) const = 0;
1566
1567 virtual unsigned get_max_attr_name_length() = 0;
1568 virtual int mkfs() = 0; // wipe
1569 virtual int mkjournal() = 0; // journal only
1570 virtual bool needs_journal() = 0; //< requires a journal
1571 virtual bool wants_journal() = 0; //< prefers a journal
1572 virtual bool allows_journal() = 0; //< allows a journal
1573
1574 /**
1575 * is_rotational
1576 *
1577 * Check whether store is backed by a rotational (HDD) or non-rotational
1578 * (SSD) device.
1579 *
1580 * This must be usable *before* the store is mounted.
1581 *
1582 * @return true for HDD, false for SSD
1583 */
1584 virtual bool is_rotational() {
1585 return true;
1586 }
1587
1588 /**
1589 * is_journal_rotational
1590 *
1591 * Check whether journal is backed by a rotational (HDD) or non-rotational
1592 * (SSD) device.
1593 *
1594 *
1595 * @return true for HDD, false for SSD
1596 */
1597 virtual bool is_journal_rotational() {
1598 return true;
1599 }
1600
1601 virtual string get_default_device_class() {
1602 return is_rotational() ? "hdd" : "ssd";
1603 }
1604
1605 virtual bool can_sort_nibblewise() {
1606 return false; // assume a backend cannot, unless it says otherwise
1607 }
1608
1609 virtual int statfs(struct store_statfs_t *buf) = 0;
1610
1611 virtual void collect_metadata(map<string,string> *pm) { }
1612
1613 /**
1614 * write_meta - write a simple configuration key out-of-band
1615 *
1616 * Write a simple key/value pair for basic store configuration
1617 * (e.g., a uuid or magic number) to an unopened/unmounted store.
1618 * The default implementation writes this to a plaintext file in the
1619 * path.
1620 *
1621 * A newline is appended.
1622 *
1623 * @param key key name (e.g., "fsid")
1624 * @param value value (e.g., a uuid rendered as a string)
1625 * @returns 0 for success, or an error code
1626 */
1627 virtual int write_meta(const std::string& key,
1628 const std::string& value);
1629
1630 /**
1631 * read_meta - read a simple configuration key out-of-band
1632 *
1633 * Read a simple key value to an unopened/mounted store.
1634 *
1635 * Trailing whitespace is stripped off.
1636 *
1637 * @param key key name
1638 * @param value pointer to value string
1639 * @returns 0 for success, or an error code
1640 */
1641 virtual int read_meta(const std::string& key,
1642 std::string *value);
1643
1644 /**
1645 * get ideal max value for collection_list()
1646 *
1647 * default to some arbitrary values; the implementation will override.
1648 */
1649 virtual int get_ideal_list_max() { return 64; }
1650
1651
1652 /**
1653 * get a collection handle
1654 *
1655 * Provide a trivial handle as a default to avoid converting legacy
1656 * implementations.
1657 */
1658 virtual CollectionHandle open_collection(const coll_t &cid) {
1659 return new CompatCollectionHandle(cid);
1660 }
1661
1662
1663 /**
1664 * Synchronous read operations
1665 */
1666
1667 /**
1668 * exists -- Test for existance of object
1669 *
1670 * @param cid collection for object
1671 * @param oid oid of object
1672 * @returns true if object exists, false otherwise
1673 */
1674 virtual bool exists(const coll_t& cid, const ghobject_t& oid) = 0; // useful?
1675 virtual bool exists(CollectionHandle& c, const ghobject_t& oid) {
1676 return exists(c->get_cid(), oid);
1677 }
1678 /**
1679 * set_collection_opts -- set pool options for a collectioninformation for an object
1680 *
1681 * @param cid collection
1682 * @param opts new collection options
1683 * @returns 0 on success, negative error code on failure.
1684 */
1685 virtual int set_collection_opts(
1686 const coll_t& cid,
1687 const pool_opts_t& opts) = 0;
1688
1689 /**
1690 * stat -- get information for an object
1691 *
1692 * @param cid collection for object
1693 * @param oid oid of object
1694 * @param st output information for the object
1695 * @param allow_eio if false, assert on -EIO operation failure
1696 * @returns 0 on success, negative error code on failure.
1697 */
1698 virtual int stat(
1699 const coll_t& cid,
1700 const ghobject_t& oid,
1701 struct stat *st,
1702 bool allow_eio = false) = 0; // struct stat?
1703 virtual int stat(
1704 CollectionHandle &c,
1705 const ghobject_t& oid,
1706 struct stat *st,
1707 bool allow_eio = false) {
1708 return stat(c->get_cid(), oid, st, allow_eio);
1709 }
1710
1711 /**
1712 * read -- read a byte range of data from an object
1713 *
1714 * Note: if reading from an offset past the end of the object, we
1715 * return 0 (not, say, -EINVAL).
1716 *
1717 * @param cid collection for object
1718 * @param oid oid of object
1719 * @param offset location offset of first byte to be read
1720 * @param len number of bytes to be read
1721 * @param bl output bufferlist
1722 * @param op_flags is CEPH_OSD_OP_FLAG_*
1723 * @param allow_eio if false, assert on -EIO operation failure
1724 * @returns number of bytes read on success, or negative error code on failure.
1725 */
1726 virtual int read(
1727 const coll_t& cid,
1728 const ghobject_t& oid,
1729 uint64_t offset,
1730 size_t len,
1731 bufferlist& bl,
1732 uint32_t op_flags = 0) = 0;
1733 virtual int read(
1734 CollectionHandle &c,
1735 const ghobject_t& oid,
1736 uint64_t offset,
1737 size_t len,
1738 bufferlist& bl,
1739 uint32_t op_flags = 0) {
1740 return read(c->get_cid(), oid, offset, len, bl, op_flags);
1741 }
1742
1743 /**
1744 * fiemap -- get extent map of data of an object
1745 *
1746 * Returns an encoded map of the extents of an object's data portion
1747 * (map<offset,size>).
1748 *
1749 * A non-enlightened implementation is free to return the extent (offset, len)
1750 * as the sole extent.
1751 *
1752 * @param cid collection for object
1753 * @param oid oid of object
1754 * @param offset location offset of first byte to be read
1755 * @param len number of bytes to be read
1756 * @param bl output bufferlist for extent map information.
1757 * @returns 0 on success, negative error code on failure.
1758 */
1759 virtual int fiemap(const coll_t& cid, const ghobject_t& oid,
1760 uint64_t offset, size_t len, bufferlist& bl) = 0;
1761 virtual int fiemap(const coll_t& cid, const ghobject_t& oid,
1762 uint64_t offset, size_t len,
1763 map<uint64_t, uint64_t>& destmap) = 0;
1764 virtual int fiemap(CollectionHandle& c, const ghobject_t& oid,
1765 uint64_t offset, size_t len, bufferlist& bl) {
1766 return fiemap(c->get_cid(), oid, offset, len, bl);
1767 }
1768 virtual int fiemap(CollectionHandle& c, const ghobject_t& oid,
1769 uint64_t offset, size_t len, map<uint64_t, uint64_t>& destmap) {
1770 return fiemap(c->get_cid(), oid, offset, len, destmap);
1771 }
1772
1773 /**
1774 * getattr -- get an xattr of an object
1775 *
1776 * @param cid collection for object
1777 * @param oid oid of object
1778 * @param name name of attr to read
1779 * @param value place to put output result.
1780 * @returns 0 on success, negative error code on failure.
1781 */
1782 virtual int getattr(const coll_t& cid, const ghobject_t& oid,
1783 const char *name, bufferptr& value) = 0;
1784 virtual int getattr(CollectionHandle &c, const ghobject_t& oid,
1785 const char *name, bufferptr& value) {
1786 return getattr(c->get_cid(), oid, name, value);
1787 }
1788
1789 /**
1790 * getattr -- get an xattr of an object
1791 *
1792 * @param cid collection for object
1793 * @param oid oid of object
1794 * @param name name of attr to read
1795 * @param value place to put output result.
1796 * @returns 0 on success, negative error code on failure.
1797 */
1798 int getattr(const coll_t& cid, const ghobject_t& oid, const char *name, bufferlist& value) {
1799 bufferptr bp;
1800 int r = getattr(cid, oid, name, bp);
1801 if (bp.length())
1802 value.push_back(bp);
1803 return r;
1804 }
1805 int getattr(
1806 coll_t cid, const ghobject_t& oid,
1807 const string& name, bufferlist& value) {
1808 bufferptr bp;
1809 int r = getattr(cid, oid, name.c_str(), bp);
1810 value.push_back(bp);
1811 return r;
1812 }
1813 int getattr(
1814 CollectionHandle &c, const ghobject_t& oid,
1815 const string& name, bufferlist& value) {
1816 bufferptr bp;
1817 int r = getattr(c, oid, name.c_str(), bp);
1818 value.push_back(bp);
1819 return r;
1820 }
1821
1822 /**
1823 * getattrs -- get all of the xattrs of an object
1824 *
1825 * @param cid collection for object
1826 * @param oid oid of object
1827 * @param aset place to put output result.
1828 * @returns 0 on success, negative error code on failure.
1829 */
1830 virtual int getattrs(const coll_t& cid, const ghobject_t& oid,
1831 map<string,bufferptr>& aset) = 0;
1832 virtual int getattrs(CollectionHandle &c, const ghobject_t& oid,
1833 map<string,bufferptr>& aset) {
1834 return getattrs(c->get_cid(), oid, aset);
1835 }
1836
1837 /**
1838 * getattrs -- get all of the xattrs of an object
1839 *
1840 * @param cid collection for object
1841 * @param oid oid of object
1842 * @param aset place to put output result.
1843 * @returns 0 on success, negative error code on failure.
1844 */
1845 int getattrs(const coll_t& cid, const ghobject_t& oid, map<string,bufferlist>& aset) {
1846 map<string,bufferptr> bmap;
1847 int r = getattrs(cid, oid, bmap);
1848 for (map<string,bufferptr>::iterator i = bmap.begin();
1849 i != bmap.end();
1850 ++i) {
1851 aset[i->first].append(i->second);
1852 }
1853 return r;
1854 }
1855 int getattrs(CollectionHandle &c, const ghobject_t& oid,
1856 map<string,bufferlist>& aset) {
1857 map<string,bufferptr> bmap;
1858 int r = getattrs(c, oid, bmap);
1859 for (map<string,bufferptr>::iterator i = bmap.begin();
1860 i != bmap.end();
1861 ++i) {
1862 aset[i->first].append(i->second);
1863 }
1864 return r;
1865 }
1866
1867
1868 // collections
1869
1870 /**
1871 * list_collections -- get all of the collections known to this ObjectStore
1872 *
1873 * @param ls list of the collections in sorted order.
1874 * @returns 0 on success, negative error code on failure.
1875 */
1876 virtual int list_collections(vector<coll_t>& ls) = 0;
1877
1878 /**
1879 * does a collection exist?
1880 *
1881 * @param c collection
1882 * @returns true if it exists, false otherwise
1883 */
1884 virtual bool collection_exists(const coll_t& c) = 0;
1885
1886 /**
1887 * is a collection empty?
1888 *
1889 * @param c collection
1890 * @param empty true if the specified collection is empty, false otherwise
1891 * @returns 0 on success, negative error code on failure.
1892 */
1893 virtual int collection_empty(const coll_t& c, bool *empty) = 0;
1894
1895 /**
1896 * return the number of significant bits of the coll_t::pgid.
1897 *
1898 * This should return what the last create_collection or split_collection
1899 * set. A legacy backend may return -EAGAIN if the value is unavailable
1900 * (because we upgraded from an older version, e.g., FileStore).
1901 */
1902 virtual int collection_bits(const coll_t& c) = 0;
1903
1904
1905 /**
1906 * list contents of a collection that fall in the range [start, end) and no more than a specified many result
1907 *
1908 * @param c collection
1909 * @param start list object that sort >= this value
1910 * @param end list objects that sort < this value
1911 * @param max return no more than this many results
1912 * @param seq return no objects with snap < seq
1913 * @param ls [out] result
1914 * @param next [out] next item sorts >= this value
1915 * @return zero on success, or negative error
1916 */
1917 virtual int collection_list(const coll_t& c,
1918 const ghobject_t& start, const ghobject_t& end,
1919 int max,
1920 vector<ghobject_t> *ls, ghobject_t *next) = 0;
1921 virtual int collection_list(CollectionHandle &c,
1922 const ghobject_t& start, const ghobject_t& end,
1923 int max,
1924 vector<ghobject_t> *ls, ghobject_t *next) {
1925 return collection_list(c->get_cid(), start, end, max, ls, next);
1926 }
1927
1928
1929 /// OMAP
1930 /// Get omap contents
1931 virtual int omap_get(
1932 const coll_t& c, ///< [in] Collection containing oid
1933 const ghobject_t &oid, ///< [in] Object containing omap
1934 bufferlist *header, ///< [out] omap header
1935 map<string, bufferlist> *out /// < [out] Key to value map
1936 ) = 0;
1937 virtual int omap_get(
1938 CollectionHandle &c, ///< [in] Collection containing oid
1939 const ghobject_t &oid, ///< [in] Object containing omap
1940 bufferlist *header, ///< [out] omap header
1941 map<string, bufferlist> *out /// < [out] Key to value map
1942 ) {
1943 return omap_get(c->get_cid(), oid, header, out);
1944 }
1945
1946 /// Get omap header
1947 virtual int omap_get_header(
1948 const coll_t& c, ///< [in] Collection containing oid
1949 const ghobject_t &oid, ///< [in] Object containing omap
1950 bufferlist *header, ///< [out] omap header
1951 bool allow_eio = false ///< [in] don't assert on eio
1952 ) = 0;
1953 virtual int omap_get_header(
1954 CollectionHandle &c, ///< [in] Collection containing oid
1955 const ghobject_t &oid, ///< [in] Object containing omap
1956 bufferlist *header, ///< [out] omap header
1957 bool allow_eio = false ///< [in] don't assert on eio
1958 ) {
1959 return omap_get_header(c->get_cid(), oid, header, allow_eio);
1960 }
1961
1962 /// Get keys defined on oid
1963 virtual int omap_get_keys(
1964 const coll_t& c, ///< [in] Collection containing oid
1965 const ghobject_t &oid, ///< [in] Object containing omap
1966 set<string> *keys ///< [out] Keys defined on oid
1967 ) = 0;
1968 virtual int omap_get_keys(
1969 CollectionHandle &c, ///< [in] Collection containing oid
1970 const ghobject_t &oid, ///< [in] Object containing omap
1971 set<string> *keys ///< [out] Keys defined on oid
1972 ) {
1973 return omap_get_keys(c->get_cid(), oid, keys);
1974 }
1975
1976 /// Get key values
1977 virtual int omap_get_values(
1978 const coll_t& c, ///< [in] Collection containing oid
1979 const ghobject_t &oid, ///< [in] Object containing omap
1980 const set<string> &keys, ///< [in] Keys to get
1981 map<string, bufferlist> *out ///< [out] Returned keys and values
1982 ) = 0;
1983 virtual int omap_get_values(
1984 CollectionHandle &c, ///< [in] Collection containing oid
1985 const ghobject_t &oid, ///< [in] Object containing omap
1986 const set<string> &keys, ///< [in] Keys to get
1987 map<string, bufferlist> *out ///< [out] Returned keys and values
1988 ) {
1989 return omap_get_values(c->get_cid(), oid, keys, out);
1990 }
1991
1992 /// Filters keys into out which are defined on oid
1993 virtual int omap_check_keys(
1994 const coll_t& c, ///< [in] Collection containing oid
1995 const ghobject_t &oid, ///< [in] Object containing omap
1996 const set<string> &keys, ///< [in] Keys to check
1997 set<string> *out ///< [out] Subset of keys defined on oid
1998 ) = 0;
1999 virtual int omap_check_keys(
2000 CollectionHandle &c, ///< [in] Collection containing oid
2001 const ghobject_t &oid, ///< [in] Object containing omap
2002 const set<string> &keys, ///< [in] Keys to check
2003 set<string> *out ///< [out] Subset of keys defined on oid
2004 ) {
2005 return omap_check_keys(c->get_cid(), oid, keys, out);
2006 }
2007
2008 /**
2009 * Returns an object map iterator
2010 *
2011 * Warning! The returned iterator is an implicit lock on filestore
2012 * operations in c. Do not use filestore methods on c while the returned
2013 * iterator is live. (Filling in a transaction is no problem).
2014 *
2015 * @return iterator, null on error
2016 */
2017 virtual ObjectMap::ObjectMapIterator get_omap_iterator(
2018 const coll_t& c, ///< [in] collection
2019 const ghobject_t &oid ///< [in] object
2020 ) = 0;
2021 virtual ObjectMap::ObjectMapIterator get_omap_iterator(
2022 CollectionHandle &c, ///< [in] collection
2023 const ghobject_t &oid ///< [in] object
2024 ) {
2025 return get_omap_iterator(c->get_cid(), oid);
2026 }
2027
2028 virtual int flush_journal() { return -EOPNOTSUPP; }
2029
2030 virtual int dump_journal(ostream& out) { return -EOPNOTSUPP; }
2031
2032 virtual int snapshot(const string& name) { return -EOPNOTSUPP; }
2033
2034 /**
2035 * Set and get internal fsid for this instance. No external data is modified
2036 */
2037 virtual void set_fsid(uuid_d u) = 0;
2038 virtual uuid_d get_fsid() = 0;
2039
2040 /**
2041 * Estimates additional disk space used by the specified amount of objects and caused by file allocation granularity and metadata store
2042 * - num objects - total (including witeouts) object count to measure used space for.
2043 */
2044 virtual uint64_t estimate_objects_overhead(uint64_t num_objects) = 0;
2045
2046
2047 // DEBUG
2048 virtual void inject_data_error(const ghobject_t &oid) {}
2049 virtual void inject_mdata_error(const ghobject_t &oid) {}
2050
2051 virtual void compact() {}
2052 virtual bool has_builtin_csum() const {
2053 return false;
2054 }
2055 };
2056 WRITE_CLASS_ENCODER(ObjectStore::Transaction)
2057 WRITE_CLASS_ENCODER(ObjectStore::Transaction::TransactionData)
2058
2059 static inline void intrusive_ptr_add_ref(ObjectStore::Sequencer_impl *s) {
2060 s->get();
2061 }
2062 static inline void intrusive_ptr_release(ObjectStore::Sequencer_impl *s) {
2063 s->put();
2064 }
2065
2066 ostream& operator<<(ostream& out, const ObjectStore::Sequencer& s);
2067 ostream& operator<<(ostream& out, const ObjectStore::Transaction& tx);
2068
2069 #endif