]> git.proxmox.com Git - ceph.git/blame - ceph/src/os/ObjectStore.h
buildsys: auto-determine current version for makefile
[ceph.git] / ceph / src / os / ObjectStore.h
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14#ifndef CEPH_OBJECTSTORE_H
15#define CEPH_OBJECTSTORE_H
16
17#include "include/Context.h"
18#include "include/buffer.h"
19#include "include/types.h"
11fdf7f2 20#include "include/stringify.h"
7c673cae
FG
21#include "osd/osd_types.h"
22#include "common/TrackedOp.h"
23#include "common/WorkQueue.h"
24#include "ObjectMap.h"
25
26#include <errno.h>
27#include <sys/stat.h>
28#include <vector>
29#include <map>
30
11fdf7f2 31#if defined(__APPLE__) || defined(__FreeBSD__) || defined(__sun)
7c673cae
FG
32#include <sys/statvfs.h>
33#else
34#include <sys/vfs.h> /* or <sys/statfs.h> */
11fdf7f2 35#endif
7c673cae
FG
36
37#define OPS_PER_PTR 32
38
39class CephContext;
40
41using std::vector;
42using std::string;
43using std::map;
44
45namespace ceph {
46 class Formatter;
47}
48
49/*
50 * low-level interface to the local OSD file system
51 */
52
53class Logger;
11fdf7f2 54class ContextQueue;
7c673cae
FG
55
56static inline void encode(const map<string,bufferptr> *attrset, bufferlist &bl) {
11fdf7f2 57 encode(*attrset, bl);
7c673cae
FG
58}
59
60// this isn't the best place for these, but...
11fdf7f2
TL
61void decode_str_str_map_to_bl(bufferlist::const_iterator& p, bufferlist *out);
62void decode_str_set_to_bl(bufferlist::const_iterator& p, bufferlist *out);
7c673cae
FG
63
64// Flag bits
65typedef uint32_t osflagbits_t;
66const int SKIP_JOURNAL_REPLAY = 1 << 0;
67const int SKIP_MOUNT_OMAP = 1 << 1;
68
69class ObjectStore {
70protected:
71 string path;
72
73public:
74 CephContext* cct;
75 /**
76 * create - create an ObjectStore instance.
77 *
78 * This is invoked once at initialization time.
79 *
80 * @param type type of store. This is a string from the configuration file.
81 * @param data path (or other descriptor) for data
82 * @param journal path (or other descriptor) for journal (optional)
83 * @param flags which filestores should check if applicable
84 */
85 static ObjectStore *create(CephContext *cct,
86 const string& type,
87 const string& data,
88 const string& journal,
89 osflagbits_t flags = 0);
90
91 /**
92 * probe a block device to learn the uuid of the owning OSD
93 *
94 * @param cct cct
95 * @param path path to device
96 * @param fsid [out] osd uuid
97 */
98 static int probe_block_device_fsid(
99 CephContext *cct,
100 const string& path,
101 uuid_d *fsid);
102
103 /**
104 * Fetch Object Store statistics.
105 *
106 * Currently only latency of write and apply times are measured.
107 *
108 * This appears to be called with nothing locked.
109 */
110 virtual objectstore_perf_stat_t get_cur_stats() = 0;
111
112 /**
113 * Fetch Object Store performance counters.
114 *
115 *
116 * This appears to be called with nothing locked.
117 */
118 virtual const PerfCounters* get_perf_counters() const = 0;
119
120 /**
11fdf7f2 121 * a collection also orders transactions
7c673cae 122 *
11fdf7f2
TL
123 * Any transactions queued under a given collection will be applied in
124 * sequence. Transactions queued under different collections may run
7c673cae
FG
125 * in parallel.
126 *
11fdf7f2
TL
127 * ObjectStore users my get collection handles with open_collection() (or,
128 * for bootstrapping a new collection, create_new_collection()).
7c673cae 129 */
11fdf7f2
TL
130 struct CollectionImpl : public RefCountedObject {
131 const coll_t cid;
7c673cae 132
11fdf7f2
TL
133 CollectionImpl(const coll_t& c)
134 : RefCountedObject(NULL, 0),
135 cid(c) {}
7c673cae 136
11fdf7f2 137 /// wait for any queued transactions to apply
7c673cae
FG
138 // block until any previous transactions are visible. specifically,
139 // collection_list and collection_empty need to reflect prior operations.
140 virtual void flush() = 0;
141
7c673cae
FG
142 /**
143 * Async flush_commit
144 *
145 * There are two cases:
11fdf7f2 146 * 1) collection is currently idle: the method returns true. c is
7c673cae 147 * not touched.
11fdf7f2
TL
148 * 2) collection is not idle: the method returns false and c is
149 * called asynchronously with a value of 0 once all transactions
150 * queued on this collection prior to the call have been applied
7c673cae
FG
151 * and committed.
152 */
11fdf7f2 153 virtual bool flush_commit(Context *c) = 0;
7c673cae 154
11fdf7f2
TL
155 const coll_t &get_cid() {
156 return cid;
7c673cae
FG
157 }
158 };
7c673cae
FG
159 typedef boost::intrusive_ptr<CollectionImpl> CollectionHandle;
160
7c673cae
FG
161
162 /*********************************
163 *
164 * Object Contents and semantics
165 *
166 * All ObjectStore objects are identified as a named object
167 * (ghobject_t and hobject_t) in a named collection (coll_t).
168 * ObjectStore operations support the creation, mutation, deletion
169 * and enumeration of objects within a collection. Enumeration is
170 * in sorted key order (where keys are sorted by hash). Object names
171 * are globally unique.
172 *
173 * Each object has four distinct parts: byte data, xattrs, omap_header
174 * and omap entries.
175 *
176 * The data portion of an object is conceptually equivalent to a
177 * file in a file system. Random and Partial access for both read
178 * and write operations is required. The ability to have a sparse
179 * implementation of the data portion of an object is beneficial for
180 * some workloads, but not required. There is a system-wide limit on
181 * the maximum size of an object, which is typically around 100 MB.
182 *
183 * Xattrs are equivalent to the extended attributes of file
184 * systems. Xattrs are a set of key/value pairs. Sub-value access
185 * is not required. It is possible to enumerate the set of xattrs in
186 * key order. At the implementation level, xattrs are used
187 * exclusively internal to Ceph and the implementer can expect the
188 * total size of all of the xattrs on an object to be relatively
189 * small, i.e., less than 64KB. Much of Ceph assumes that accessing
190 * xattrs on temporally adjacent object accesses (recent past or
191 * near future) is inexpensive.
192 *
193 * omap_header is a single blob of data. It can be read or written
194 * in total.
195 *
196 * Omap entries are conceptually the same as xattrs
197 * but in a different address space. In other words, you can have
198 * the same key as an xattr and an omap entry and they have distinct
199 * values. Enumeration of xattrs doesn't include omap entries and
200 * vice versa. The size and access characteristics of omap entries
201 * are very different from xattrs. In particular, the value portion
202 * of an omap entry can be quite large (MBs). More importantly, the
203 * interface must support efficient range queries on omap entries even
204 * when there are a large numbers of entries.
205 *
206 *********************************/
207
208 /*******************************
209 *
210 * Collections
211 *
212 * A collection is simply a grouping of objects. Collections have
213 * names (coll_t) and can be enumerated in order. Like an
214 * individual object, a collection also has a set of xattrs.
215 *
216 *
217 */
218
219
220 /*********************************
221 * transaction
222 *
223 * A Transaction represents a sequence of primitive mutation
224 * operations.
225 *
226 * Three events in the life of a Transaction result in
227 * callbacks. Any Transaction can contain any number of callback
228 * objects (Context) for any combination of the three classes of
229 * callbacks:
230 *
231 * on_applied_sync, on_applied, and on_commit.
232 *
233 * The "on_applied" and "on_applied_sync" callbacks are invoked when
234 * the modifications requested by the Transaction are visible to
235 * subsequent ObjectStore operations, i.e., the results are
236 * readable. The only conceptual difference between on_applied and
237 * on_applied_sync is the specific thread and locking environment in
238 * which the callbacks operate. "on_applied_sync" is called
239 * directly by an ObjectStore execution thread. It is expected to
240 * execute quickly and must not acquire any locks of the calling
241 * environment. Conversely, "on_applied" is called from the separate
242 * Finisher thread, meaning that it can contend for calling
243 * environment locks. NB, on_applied and on_applied_sync are
244 * sometimes called on_readable and on_readable_sync.
245 *
246 * The "on_commit" callback is also called from the Finisher thread
247 * and indicates that all of the mutations have been durably
248 * committed to stable storage (i.e., are now software/hardware
249 * crashproof).
250 *
251 * At the implementation level, each mutation primitive (and its
252 * associated data) can be serialized to a single buffer. That
253 * serialization, however, does not copy any data, but (using the
254 * bufferlist library) will reference the original buffers. This
255 * implies that the buffer that contains the data being submitted
256 * must remain stable until the on_commit callback completes. In
257 * practice, bufferlist handles all of this for you and this
258 * subtlety is only relevant if you are referencing an existing
259 * buffer via buffer::raw_static.
260 *
261 * Some implementations of ObjectStore choose to implement their own
262 * form of journaling that uses the serialized form of a
263 * Transaction. This requires that the encode/decode logic properly
264 * version itself and handle version upgrades that might change the
265 * format of the encoded Transaction. This has already happened a
266 * couple of times and the Transaction object contains some helper
267 * variables that aid in this legacy decoding:
268 *
269 * sobject_encoding detects an older/simpler version of oid
270 * present in pre-bobtail versions of ceph. use_pool_override
271 * also detects a situation where the pool of an oid can be
11fdf7f2
TL
272 * overridden for legacy operations/buffers. For non-legacy
273 * implementations of ObjectStore, neither of these fields are
7c673cae
FG
274 * relevant.
275 *
276 *
277 * TRANSACTION ISOLATION
278 *
11fdf7f2 279 * Except as noted above, isolation is the responsibility of the
7c673cae
FG
280 * caller. In other words, if any storage element (storage element
281 * == any of the four portions of an object as described above) is
282 * altered by a transaction (including deletion), the caller
283 * promises not to attempt to read that element while the
284 * transaction is pending (here pending means from the time of
285 * issuance until the "on_applied_sync" callback has been
286 * received). Violations of isolation need not be detected by
287 * ObjectStore and there is no corresponding error mechanism for
288 * reporting an isolation violation (crashing would be the
289 * appropriate way to report an isolation violation if detected).
290 *
291 * Enumeration operations may violate transaction isolation as
292 * described above when a storage element is being created or
293 * deleted as part of a transaction. In this case, ObjectStore is
294 * allowed to consider the enumeration operation to either precede
295 * or follow the violating transaction element. In other words, the
296 * presence/absence of the mutated element in the enumeration is
297 * entirely at the discretion of ObjectStore. The arbitrary ordering
298 * applies independently to each transaction element. For example,
299 * if a transaction contains two mutating elements "create A" and
300 * "delete B". And an enumeration operation is performed while this
11fdf7f2 301 * transaction is pending. It is permissible for ObjectStore to
7c673cae
FG
302 * report any of the four possible combinations of the existence of
303 * A and B.
304 *
305 */
306 class Transaction {
307 public:
308 enum {
309 OP_NOP = 0,
310 OP_TOUCH = 9, // cid, oid
311 OP_WRITE = 10, // cid, oid, offset, len, bl
312 OP_ZERO = 11, // cid, oid, offset, len
313 OP_TRUNCATE = 12, // cid, oid, len
314 OP_REMOVE = 13, // cid, oid
315 OP_SETATTR = 14, // cid, oid, attrname, bl
316 OP_SETATTRS = 15, // cid, oid, attrset
317 OP_RMATTR = 16, // cid, oid, attrname
318 OP_CLONE = 17, // cid, oid, newoid
319 OP_CLONERANGE = 18, // cid, oid, newoid, offset, len
320 OP_CLONERANGE2 = 30, // cid, oid, newoid, srcoff, len, dstoff
321
322 OP_TRIMCACHE = 19, // cid, oid, offset, len **DEPRECATED**
323
324 OP_MKCOLL = 20, // cid
325 OP_RMCOLL = 21, // cid
326 OP_COLL_ADD = 22, // cid, oldcid, oid
327 OP_COLL_REMOVE = 23, // cid, oid
328 OP_COLL_SETATTR = 24, // cid, attrname, bl
329 OP_COLL_RMATTR = 25, // cid, attrname
330 OP_COLL_SETATTRS = 26, // cid, attrset
331 OP_COLL_MOVE = 8, // newcid, oldcid, oid
332
7c673cae
FG
333 OP_RMATTRS = 28, // cid, oid
334 OP_COLL_RENAME = 29, // cid, newcid
335
336 OP_OMAP_CLEAR = 31, // cid
337 OP_OMAP_SETKEYS = 32, // cid, attrset
338 OP_OMAP_RMKEYS = 33, // cid, keyset
339 OP_OMAP_SETHEADER = 34, // cid, header
340 OP_SPLIT_COLLECTION = 35, // cid, bits, destination
341 OP_SPLIT_COLLECTION2 = 36, /* cid, bits, destination
342 doesn't create the destination */
343 OP_OMAP_RMKEYRANGE = 37, // cid, oid, firstkey, lastkey
344 OP_COLL_MOVE_RENAME = 38, // oldcid, oldoid, newcid, newoid
345
346 OP_SETALLOCHINT = 39, // cid, oid, object_size, write_size
347 OP_COLL_HINT = 40, // cid, type, bl
348
349 OP_TRY_RENAME = 41, // oldcid, oldoid, newoid
350
351 OP_COLL_SET_BITS = 42, // cid, bits
11fdf7f2
TL
352
353 OP_MERGE_COLLECTION = 43, // cid, destination
7c673cae
FG
354 };
355
356 // Transaction hint type
357 enum {
358 COLL_HINT_EXPECTED_NUM_OBJECTS = 1,
359 };
360
361 struct Op {
362 __le32 op;
363 __le32 cid;
364 __le32 oid;
365 __le64 off;
366 __le64 len;
367 __le32 dest_cid;
368 __le32 dest_oid; //OP_CLONE, OP_CLONERANGE
369 __le64 dest_off; //OP_CLONERANGE
370 union {
371 struct {
372 __le32 hint_type; //OP_COLL_HINT
373 };
374 struct {
375 __le32 alloc_hint_flags; //OP_SETALLOCHINT
376 };
377 };
378 __le64 expected_object_size; //OP_SETALLOCHINT
379 __le64 expected_write_size; //OP_SETALLOCHINT
380 __le32 split_bits; //OP_SPLIT_COLLECTION2,OP_COLL_SET_BITS,
381 //OP_MKCOLL
382 __le32 split_rem; //OP_SPLIT_COLLECTION2
383 } __attribute__ ((packed)) ;
384
385 struct TransactionData {
386 __le64 ops;
387 __le32 largest_data_len;
388 __le32 largest_data_off;
389 __le32 largest_data_off_in_data_bl;
390 __le32 fadvise_flags;
391
392 TransactionData() noexcept :
393 ops(0),
394 largest_data_len(0),
395 largest_data_off(0),
396 largest_data_off_in_data_bl(0),
397 fadvise_flags(0) { }
398
399 // override default move operations to reset default values
400 TransactionData(TransactionData&& other) noexcept :
401 ops(other.ops),
402 largest_data_len(other.largest_data_len),
403 largest_data_off(other.largest_data_off),
404 largest_data_off_in_data_bl(other.largest_data_off_in_data_bl),
405 fadvise_flags(other.fadvise_flags) {
406 other.ops = 0;
407 other.largest_data_len = 0;
408 other.largest_data_off = 0;
409 other.largest_data_off_in_data_bl = 0;
410 other.fadvise_flags = 0;
411 }
412 TransactionData& operator=(TransactionData&& other) noexcept {
413 ops = other.ops;
414 largest_data_len = other.largest_data_len;
415 largest_data_off = other.largest_data_off;
416 largest_data_off_in_data_bl = other.largest_data_off_in_data_bl;
417 fadvise_flags = other.fadvise_flags;
418 other.ops = 0;
419 other.largest_data_len = 0;
420 other.largest_data_off = 0;
421 other.largest_data_off_in_data_bl = 0;
422 other.fadvise_flags = 0;
423 return *this;
424 }
425
426 TransactionData(const TransactionData& other) = default;
427 TransactionData& operator=(const TransactionData& other) = default;
428
429 void encode(bufferlist& bl) const {
430 bl.append((char*)this, sizeof(TransactionData));
431 }
11fdf7f2 432 void decode(bufferlist::const_iterator &bl) {
7c673cae
FG
433 bl.copy(sizeof(TransactionData), (char*)this);
434 }
435 } __attribute__ ((packed)) ;
436
437 private:
438 TransactionData data;
439
7c673cae
FG
440 map<coll_t, __le32> coll_index;
441 map<ghobject_t, __le32> object_index;
442
443 __le32 coll_id {0};
444 __le32 object_id {0};
445
446 bufferlist data_bl;
447 bufferlist op_bl;
448
7c673cae
FG
449 list<Context *> on_applied;
450 list<Context *> on_commit;
451 list<Context *> on_applied_sync;
452
453 public:
454 Transaction() = default;
455
11fdf7f2 456 explicit Transaction(bufferlist::const_iterator &dp) {
7c673cae
FG
457 decode(dp);
458 }
459 explicit Transaction(bufferlist &nbl) {
11fdf7f2 460 auto dp = nbl.cbegin();
7c673cae
FG
461 decode(dp);
462 }
463
464 // override default move operations to reset default values
465 Transaction(Transaction&& other) noexcept :
466 data(std::move(other.data)),
7c673cae
FG
467 coll_index(std::move(other.coll_index)),
468 object_index(std::move(other.object_index)),
469 coll_id(other.coll_id),
470 object_id(other.object_id),
471 data_bl(std::move(other.data_bl)),
472 op_bl(std::move(other.op_bl)),
7c673cae
FG
473 on_applied(std::move(other.on_applied)),
474 on_commit(std::move(other.on_commit)),
475 on_applied_sync(std::move(other.on_applied_sync)) {
7c673cae
FG
476 other.coll_id = 0;
477 other.object_id = 0;
478 }
479
480 Transaction& operator=(Transaction&& other) noexcept {
481 data = std::move(other.data);
7c673cae
FG
482 coll_index = std::move(other.coll_index);
483 object_index = std::move(other.object_index);
484 coll_id = other.coll_id;
485 object_id = other.object_id;
486 data_bl = std::move(other.data_bl);
487 op_bl = std::move(other.op_bl);
7c673cae
FG
488 on_applied = std::move(other.on_applied);
489 on_commit = std::move(other.on_commit);
490 on_applied_sync = std::move(other.on_applied_sync);
7c673cae
FG
491 other.coll_id = 0;
492 other.object_id = 0;
493 return *this;
494 }
495
496 Transaction(const Transaction& other) = default;
497 Transaction& operator=(const Transaction& other) = default;
498
11fdf7f2
TL
499 // expose object_index for FileStore::Op's benefit
500 const map<ghobject_t, __le32>& get_object_index() const {
501 return object_index;
502 }
503
7c673cae
FG
504 /* Operations on callback contexts */
505 void register_on_applied(Context *c) {
506 if (!c) return;
507 on_applied.push_back(c);
508 }
509 void register_on_commit(Context *c) {
510 if (!c) return;
511 on_commit.push_back(c);
512 }
513 void register_on_applied_sync(Context *c) {
514 if (!c) return;
515 on_applied_sync.push_back(c);
516 }
517 void register_on_complete(Context *c) {
518 if (!c) return;
519 RunOnDeleteRef _complete (std::make_shared<RunOnDelete>(c));
520 register_on_applied(new ContainerContext<RunOnDeleteRef>(_complete));
521 register_on_commit(new ContainerContext<RunOnDeleteRef>(_complete));
522 }
11fdf7f2
TL
523 bool has_contexts() const {
524 return
525 !on_commit.empty() ||
526 !on_applied.empty() ||
527 !on_applied_sync.empty();
528 }
7c673cae
FG
529
530 static void collect_contexts(
531 vector<Transaction>& t,
532 Context **out_on_applied,
533 Context **out_on_commit,
534 Context **out_on_applied_sync) {
11fdf7f2
TL
535 ceph_assert(out_on_applied);
536 ceph_assert(out_on_commit);
537 ceph_assert(out_on_applied_sync);
7c673cae 538 list<Context *> on_applied, on_commit, on_applied_sync;
11fdf7f2
TL
539 for (auto& i : t) {
540 on_applied.splice(on_applied.end(), i.on_applied);
541 on_commit.splice(on_commit.end(), i.on_commit);
542 on_applied_sync.splice(on_applied_sync.end(), i.on_applied_sync);
7c673cae
FG
543 }
544 *out_on_applied = C_Contexts::list_to_context(on_applied);
545 *out_on_commit = C_Contexts::list_to_context(on_commit);
546 *out_on_applied_sync = C_Contexts::list_to_context(on_applied_sync);
547 }
11fdf7f2
TL
548 static void collect_contexts(
549 vector<Transaction>& t,
550 list<Context*> *out_on_applied,
551 list<Context*> *out_on_commit,
552 list<Context*> *out_on_applied_sync) {
553 ceph_assert(out_on_applied);
554 ceph_assert(out_on_commit);
555 ceph_assert(out_on_applied_sync);
556 for (auto& i : t) {
557 out_on_applied->splice(out_on_applied->end(), i.on_applied);
558 out_on_commit->splice(out_on_commit->end(), i.on_commit);
559 out_on_applied_sync->splice(out_on_applied_sync->end(),
560 i.on_applied_sync);
561 }
562 }
7c673cae
FG
563
564 Context *get_on_applied() {
565 return C_Contexts::list_to_context(on_applied);
566 }
567 Context *get_on_commit() {
568 return C_Contexts::list_to_context(on_commit);
569 }
570 Context *get_on_applied_sync() {
571 return C_Contexts::list_to_context(on_applied_sync);
572 }
573
574 void set_fadvise_flags(uint32_t flags) {
575 data.fadvise_flags = flags;
576 }
577 void set_fadvise_flag(uint32_t flag) {
578 data.fadvise_flags = data.fadvise_flags | flag;
579 }
580 uint32_t get_fadvise_flags() { return data.fadvise_flags; }
581
582 void swap(Transaction& other) noexcept {
583 std::swap(data, other.data);
584 std::swap(on_applied, other.on_applied);
585 std::swap(on_commit, other.on_commit);
586 std::swap(on_applied_sync, other.on_applied_sync);
587
588 std::swap(coll_index, other.coll_index);
589 std::swap(object_index, other.object_index);
590 std::swap(coll_id, other.coll_id);
591 std::swap(object_id, other.object_id);
592 op_bl.swap(other.op_bl);
593 data_bl.swap(other.data_bl);
594 }
595
596 void _update_op(Op* op,
597 vector<__le32> &cm,
598 vector<__le32> &om) {
599
600 switch (op->op) {
601 case OP_NOP:
7c673cae
FG
602 break;
603
604 case OP_TOUCH:
605 case OP_REMOVE:
606 case OP_SETATTR:
607 case OP_SETATTRS:
608 case OP_RMATTR:
609 case OP_RMATTRS:
610 case OP_COLL_REMOVE:
611 case OP_OMAP_CLEAR:
612 case OP_OMAP_SETKEYS:
613 case OP_OMAP_RMKEYS:
614 case OP_OMAP_RMKEYRANGE:
615 case OP_OMAP_SETHEADER:
616 case OP_WRITE:
617 case OP_ZERO:
618 case OP_TRUNCATE:
619 case OP_SETALLOCHINT:
11fdf7f2
TL
620 ceph_assert(op->cid < cm.size());
621 ceph_assert(op->oid < om.size());
7c673cae
FG
622 op->cid = cm[op->cid];
623 op->oid = om[op->oid];
624 break;
625
626 case OP_CLONERANGE2:
627 case OP_CLONE:
11fdf7f2
TL
628 ceph_assert(op->cid < cm.size());
629 ceph_assert(op->oid < om.size());
630 ceph_assert(op->dest_oid < om.size());
7c673cae
FG
631 op->cid = cm[op->cid];
632 op->oid = om[op->oid];
633 op->dest_oid = om[op->dest_oid];
634 break;
635
636 case OP_MKCOLL:
637 case OP_RMCOLL:
638 case OP_COLL_SETATTR:
639 case OP_COLL_RMATTR:
640 case OP_COLL_SETATTRS:
641 case OP_COLL_HINT:
642 case OP_COLL_SET_BITS:
11fdf7f2 643 ceph_assert(op->cid < cm.size());
7c673cae
FG
644 op->cid = cm[op->cid];
645 break;
646
647 case OP_COLL_ADD:
11fdf7f2
TL
648 ceph_assert(op->cid < cm.size());
649 ceph_assert(op->oid < om.size());
650 ceph_assert(op->dest_cid < om.size());
7c673cae
FG
651 op->cid = cm[op->cid];
652 op->dest_cid = cm[op->dest_cid];
653 op->oid = om[op->oid];
654 break;
655
656 case OP_COLL_MOVE_RENAME:
11fdf7f2
TL
657 ceph_assert(op->cid < cm.size());
658 ceph_assert(op->oid < om.size());
659 ceph_assert(op->dest_cid < cm.size());
660 ceph_assert(op->dest_oid < om.size());
7c673cae
FG
661 op->cid = cm[op->cid];
662 op->oid = om[op->oid];
663 op->dest_cid = cm[op->dest_cid];
664 op->dest_oid = om[op->dest_oid];
665 break;
666
667 case OP_TRY_RENAME:
11fdf7f2
TL
668 ceph_assert(op->cid < cm.size());
669 ceph_assert(op->oid < om.size());
670 ceph_assert(op->dest_oid < om.size());
7c673cae
FG
671 op->cid = cm[op->cid];
672 op->oid = om[op->oid];
673 op->dest_oid = om[op->dest_oid];
674 break;
675
676 case OP_SPLIT_COLLECTION2:
11fdf7f2
TL
677 ceph_assert(op->cid < cm.size());
678 ceph_assert(op->dest_cid < cm.size());
679 op->cid = cm[op->cid];
680 op->dest_cid = cm[op->dest_cid];
681 break;
682
683 case OP_MERGE_COLLECTION:
684 ceph_assert(op->cid < cm.size());
685 ceph_assert(op->dest_cid < cm.size());
7c673cae
FG
686 op->cid = cm[op->cid];
687 op->dest_cid = cm[op->dest_cid];
688 break;
689
690 default:
11fdf7f2 691 ceph_abort_msg("Unknown OP");
7c673cae
FG
692 }
693 }
694 void _update_op_bl(
695 bufferlist& bl,
696 vector<__le32> &cm,
697 vector<__le32> &om) {
11fdf7f2
TL
698 for (auto& bp : bl.buffers()) {
699 ceph_assert(bp.length() % sizeof(Op) == 0);
7c673cae 700
11fdf7f2
TL
701 char* raw_p = const_cast<char*>(bp.c_str());
702 char* raw_end = raw_p + bp.length();
7c673cae
FG
703 while (raw_p < raw_end) {
704 _update_op(reinterpret_cast<Op*>(raw_p), cm, om);
705 raw_p += sizeof(Op);
706 }
707 }
708 }
709 /// Append the operations of the parameter to this Transaction. Those operations are removed from the parameter Transaction
710 void append(Transaction& other) {
711
712 data.ops += other.data.ops;
713 if (other.data.largest_data_len > data.largest_data_len) {
714 data.largest_data_len = other.data.largest_data_len;
715 data.largest_data_off = other.data.largest_data_off;
716 data.largest_data_off_in_data_bl = data_bl.length() + other.data.largest_data_off_in_data_bl;
717 }
718 data.fadvise_flags |= other.data.fadvise_flags;
719 on_applied.splice(on_applied.end(), other.on_applied);
720 on_commit.splice(on_commit.end(), other.on_commit);
721 on_applied_sync.splice(on_applied_sync.end(), other.on_applied_sync);
722
723 //append coll_index & object_index
724 vector<__le32> cm(other.coll_index.size());
725 map<coll_t, __le32>::iterator coll_index_p;
726 for (coll_index_p = other.coll_index.begin();
727 coll_index_p != other.coll_index.end();
728 ++coll_index_p) {
729 cm[coll_index_p->second] = _get_coll_id(coll_index_p->first);
730 }
731
732 vector<__le32> om(other.object_index.size());
733 map<ghobject_t, __le32>::iterator object_index_p;
734 for (object_index_p = other.object_index.begin();
735 object_index_p != other.object_index.end();
736 ++object_index_p) {
737 om[object_index_p->second] = _get_object_id(object_index_p->first);
738 }
739
740 //the other.op_bl SHOULD NOT be changes during append operation,
741 //we use additional bufferlist to avoid this problem
7c673cae 742 bufferlist other_op_bl;
11fdf7f2
TL
743 {
744 bufferptr other_op_bl_ptr(other.op_bl.length());
745 other.op_bl.copy(0, other.op_bl.length(), other_op_bl_ptr.c_str());
746 other_op_bl.append(std::move(other_op_bl_ptr));
747 }
7c673cae
FG
748
749 //update other_op_bl with cm & om
750 //When the other is appended to current transaction, all coll_index and
751 //object_index in other.op_buffer should be updated by new index of the
752 //combined transaction
753 _update_op_bl(other_op_bl, cm, om);
754
755 //append op_bl
756 op_bl.append(other_op_bl);
757 //append data_bl
758 data_bl.append(other.data_bl);
759 }
760
761 /** Inquires about the Transaction as a whole. */
762
763 /// How big is the encoded Transaction buffer?
764 uint64_t get_encoded_bytes() {
765 //layout: data_bl + op_bl + coll_index + object_index + data
766
767 // coll_index size, object_index size and sizeof(transaction_data)
768 // all here, so they may be computed at compile-time
769 size_t final_size = sizeof(__u32) * 2 + sizeof(data);
770
771 // coll_index second and object_index second
772 final_size += (coll_index.size() + object_index.size()) * sizeof(__le32);
773
774 // coll_index first
775 for (auto p = coll_index.begin(); p != coll_index.end(); ++p) {
776 final_size += p->first.encoded_size();
777 }
778
779 // object_index first
780 for (auto p = object_index.begin(); p != object_index.end(); ++p) {
781 final_size += p->first.encoded_size();
782 }
783
784 return data_bl.length() +
785 op_bl.length() +
786 final_size;
787 }
788
789 /// Retain old version for regression testing purposes
790 uint64_t get_encoded_bytes_test() {
11fdf7f2 791 using ceph::encode;
7c673cae
FG
792 //layout: data_bl + op_bl + coll_index + object_index + data
793 bufferlist bl;
11fdf7f2
TL
794 encode(coll_index, bl);
795 encode(object_index, bl);
7c673cae
FG
796
797 return data_bl.length() +
798 op_bl.length() +
799 bl.length() +
800 sizeof(data);
801 }
802
803 uint64_t get_num_bytes() {
804 return get_encoded_bytes();
805 }
806 /// Size of largest data buffer to the "write" operation encountered so far
807 uint32_t get_data_length() {
808 return data.largest_data_len;
809 }
810 /// offset within the encoded buffer to the start of the largest data buffer that's encoded
811 uint32_t get_data_offset() {
812 if (data.largest_data_off_in_data_bl) {
813 return data.largest_data_off_in_data_bl +
814 sizeof(__u8) + // encode struct_v
815 sizeof(__u8) + // encode compat_v
816 sizeof(__u32) + // encode len
817 sizeof(__u32); // data_bl len
818 }
819 return 0; // none
820 }
821 /// offset of buffer as aligned to destination within object.
822 int get_data_alignment() {
823 if (!data.largest_data_len)
11fdf7f2 824 return 0;
7c673cae
FG
825 return (0 - get_data_offset()) & ~CEPH_PAGE_MASK;
826 }
827 /// Is the Transaction empty (no operations)
828 bool empty() {
829 return !data.ops;
830 }
11fdf7f2 831 /// Number of operations in the transaction
7c673cae
FG
832 int get_num_ops() {
833 return data.ops;
834 }
835
7c673cae
FG
836 /**
837 * iterator
838 *
839 * Helper object to parse Transactions.
840 *
841 * ObjectStore instances use this object to step down the encoded
842 * buffer decoding operation codes and parameters as we go.
843 *
844 */
845 class iterator {
846 Transaction *t;
847
848 uint64_t ops;
849 char* op_buffer_p;
850
11fdf7f2 851 bufferlist::const_iterator data_bl_p;
7c673cae
FG
852
853 public:
854 vector<coll_t> colls;
855 vector<ghobject_t> objects;
856
857 private:
858 explicit iterator(Transaction *t)
859 : t(t),
11fdf7f2 860 data_bl_p(t->data_bl.cbegin()),
7c673cae
FG
861 colls(t->coll_index.size()),
862 objects(t->object_index.size()) {
863
864 ops = t->data.ops;
11fdf7f2 865 op_buffer_p = t->op_bl.c_str();
7c673cae
FG
866
867 map<coll_t, __le32>::iterator coll_index_p;
868 for (coll_index_p = t->coll_index.begin();
869 coll_index_p != t->coll_index.end();
870 ++coll_index_p) {
871 colls[coll_index_p->second] = coll_index_p->first;
872 }
873
874 map<ghobject_t, __le32>::iterator object_index_p;
875 for (object_index_p = t->object_index.begin();
876 object_index_p != t->object_index.end();
877 ++object_index_p) {
878 objects[object_index_p->second] = object_index_p->first;
879 }
880 }
881
882 friend class Transaction;
883
884 public:
885
886 bool have_op() {
887 return ops > 0;
888 }
889 Op* decode_op() {
11fdf7f2 890 ceph_assert(ops > 0);
7c673cae
FG
891
892 Op* op = reinterpret_cast<Op*>(op_buffer_p);
893 op_buffer_p += sizeof(Op);
894 ops--;
895
896 return op;
897 }
898 string decode_string() {
11fdf7f2 899 using ceph::decode;
7c673cae 900 string s;
11fdf7f2 901 decode(s, data_bl_p);
7c673cae
FG
902 return s;
903 }
904 void decode_bp(bufferptr& bp) {
11fdf7f2
TL
905 using ceph::decode;
906 decode(bp, data_bl_p);
7c673cae
FG
907 }
908 void decode_bl(bufferlist& bl) {
11fdf7f2
TL
909 using ceph::decode;
910 decode(bl, data_bl_p);
7c673cae
FG
911 }
912 void decode_attrset(map<string,bufferptr>& aset) {
11fdf7f2
TL
913 using ceph::decode;
914 decode(aset, data_bl_p);
7c673cae
FG
915 }
916 void decode_attrset(map<string,bufferlist>& aset) {
11fdf7f2
TL
917 using ceph::decode;
918 decode(aset, data_bl_p);
7c673cae
FG
919 }
920 void decode_attrset_bl(bufferlist *pbl) {
921 decode_str_str_map_to_bl(data_bl_p, pbl);
922 }
923 void decode_keyset(set<string> &keys){
11fdf7f2
TL
924 using ceph::decode;
925 decode(keys, data_bl_p);
7c673cae
FG
926 }
927 void decode_keyset_bl(bufferlist *pbl){
928 decode_str_set_to_bl(data_bl_p, pbl);
929 }
930
931 const ghobject_t &get_oid(__le32 oid_id) {
11fdf7f2 932 ceph_assert(oid_id < objects.size());
7c673cae
FG
933 return objects[oid_id];
934 }
935 const coll_t &get_cid(__le32 cid_id) {
11fdf7f2 936 ceph_assert(cid_id < colls.size());
7c673cae
FG
937 return colls[cid_id];
938 }
939 uint32_t get_fadvise_flags() const {
940 return t->get_fadvise_flags();
941 }
942 };
943
944 iterator begin() {
945 return iterator(this);
946 }
947
948private:
949 void _build_actions_from_tbl();
950
951 /**
952 * Helper functions to encode the various mutation elements of a
953 * transaction. These are 1:1 with the operation codes (see
954 * enumeration above). These routines ensure that the
955 * encoder/creator of a transaction gets the right data in the
956 * right place. Sadly, there's no corresponding version nor any
957 * form of seat belts for the decoder.
958 */
959 Op* _get_next_op() {
11fdf7f2
TL
960 if (op_bl.get_append_buffer_unused_tail_length() < sizeof(Op)) {
961 op_bl.reserve(sizeof(Op) * OPS_PER_PTR);
7c673cae 962 }
11fdf7f2
TL
963 // append_hole ensures bptr merging. Even huge number of ops
964 // shouldn't result in overpopulating bl::_buffers.
965 char* const p = op_bl.append_hole(sizeof(Op)).c_str();
7c673cae
FG
966 memset(p, 0, sizeof(Op));
967 return reinterpret_cast<Op*>(p);
968 }
969 __le32 _get_coll_id(const coll_t& coll) {
970 map<coll_t, __le32>::iterator c = coll_index.find(coll);
971 if (c != coll_index.end())
972 return c->second;
973
974 __le32 index_id = coll_id++;
975 coll_index[coll] = index_id;
976 return index_id;
977 }
978 __le32 _get_object_id(const ghobject_t& oid) {
979 map<ghobject_t, __le32>::iterator o = object_index.find(oid);
980 if (o != object_index.end())
981 return o->second;
982
983 __le32 index_id = object_id++;
984 object_index[oid] = index_id;
985 return index_id;
986 }
987
988public:
7c673cae
FG
989 /// noop. 'nuf said
990 void nop() {
991 Op* _op = _get_next_op();
992 _op->op = OP_NOP;
993 data.ops++;
994 }
995 /**
996 * touch
997 *
998 * Ensure the existance of an object in a collection. Create an
999 * empty object if necessary
1000 */
1001 void touch(const coll_t& cid, const ghobject_t& oid) {
1002 Op* _op = _get_next_op();
1003 _op->op = OP_TOUCH;
1004 _op->cid = _get_coll_id(cid);
1005 _op->oid = _get_object_id(oid);
1006 data.ops++;
1007 }
1008 /**
1009 * Write data to an offset within an object. If the object is too
1010 * small, it is expanded as needed. It is possible to specify an
1011 * offset beyond the current end of an object and it will be
1012 * expanded as needed. Simple implementations of ObjectStore will
1013 * just zero the data between the old end of the object and the
1014 * newly provided data. More sophisticated implementations of
1015 * ObjectStore will omit the untouched data and store it as a
1016 * "hole" in the file.
b32b8144
FG
1017 *
1018 * Note that a 0-length write does not affect the size of the object.
7c673cae
FG
1019 */
1020 void write(const coll_t& cid, const ghobject_t& oid, uint64_t off, uint64_t len,
1021 const bufferlist& write_data, uint32_t flags = 0) {
11fdf7f2 1022 using ceph::encode;
7c673cae
FG
1023 uint32_t orig_len = data_bl.length();
1024 Op* _op = _get_next_op();
1025 _op->op = OP_WRITE;
1026 _op->cid = _get_coll_id(cid);
1027 _op->oid = _get_object_id(oid);
1028 _op->off = off;
1029 _op->len = len;
11fdf7f2 1030 encode(write_data, data_bl);
7c673cae 1031
11fdf7f2 1032 ceph_assert(len == write_data.length());
7c673cae
FG
1033 data.fadvise_flags = data.fadvise_flags | flags;
1034 if (write_data.length() > data.largest_data_len) {
1035 data.largest_data_len = write_data.length();
1036 data.largest_data_off = off;
1037 data.largest_data_off_in_data_bl = orig_len + sizeof(__u32); // we are about to
1038 }
1039 data.ops++;
1040 }
1041 /**
1042 * zero out the indicated byte range within an object. Some
1043 * ObjectStore instances may optimize this to release the
1044 * underlying storage space.
b32b8144
FG
1045 *
1046 * If the zero range extends beyond the end of the object, the object
1047 * size is extended, just as if we were writing a buffer full of zeros.
1048 * EXCEPT if the length is 0, in which case (just like a 0-length write)
1049 * we do not adjust the object size.
7c673cae
FG
1050 */
1051 void zero(const coll_t& cid, const ghobject_t& oid, uint64_t off, uint64_t len) {
1052 Op* _op = _get_next_op();
1053 _op->op = OP_ZERO;
1054 _op->cid = _get_coll_id(cid);
1055 _op->oid = _get_object_id(oid);
1056 _op->off = off;
1057 _op->len = len;
1058 data.ops++;
1059 }
1060 /// Discard all data in the object beyond the specified size.
1061 void truncate(const coll_t& cid, const ghobject_t& oid, uint64_t off) {
1062 Op* _op = _get_next_op();
1063 _op->op = OP_TRUNCATE;
1064 _op->cid = _get_coll_id(cid);
1065 _op->oid = _get_object_id(oid);
1066 _op->off = off;
1067 data.ops++;
1068 }
1069 /// Remove an object. All four parts of the object are removed.
1070 void remove(const coll_t& cid, const ghobject_t& oid) {
1071 Op* _op = _get_next_op();
1072 _op->op = OP_REMOVE;
1073 _op->cid = _get_coll_id(cid);
1074 _op->oid = _get_object_id(oid);
1075 data.ops++;
1076 }
1077 /// Set an xattr of an object
1078 void setattr(const coll_t& cid, const ghobject_t& oid, const char* name, bufferlist& val) {
1079 string n(name);
1080 setattr(cid, oid, n, val);
1081 }
1082 /// Set an xattr of an object
1083 void setattr(const coll_t& cid, const ghobject_t& oid, const string& s, bufferlist& val) {
11fdf7f2 1084 using ceph::encode;
7c673cae
FG
1085 Op* _op = _get_next_op();
1086 _op->op = OP_SETATTR;
1087 _op->cid = _get_coll_id(cid);
1088 _op->oid = _get_object_id(oid);
11fdf7f2
TL
1089 encode(s, data_bl);
1090 encode(val, data_bl);
7c673cae
FG
1091 data.ops++;
1092 }
1093 /// Set multiple xattrs of an object
1094 void setattrs(const coll_t& cid, const ghobject_t& oid, const map<string,bufferptr>& attrset) {
11fdf7f2 1095 using ceph::encode;
7c673cae
FG
1096 Op* _op = _get_next_op();
1097 _op->op = OP_SETATTRS;
1098 _op->cid = _get_coll_id(cid);
1099 _op->oid = _get_object_id(oid);
11fdf7f2 1100 encode(attrset, data_bl);
7c673cae
FG
1101 data.ops++;
1102 }
1103 /// Set multiple xattrs of an object
1104 void setattrs(const coll_t& cid, const ghobject_t& oid, const map<string,bufferlist>& attrset) {
11fdf7f2 1105 using ceph::encode;
7c673cae
FG
1106 Op* _op = _get_next_op();
1107 _op->op = OP_SETATTRS;
1108 _op->cid = _get_coll_id(cid);
1109 _op->oid = _get_object_id(oid);
11fdf7f2 1110 encode(attrset, data_bl);
7c673cae
FG
1111 data.ops++;
1112 }
1113 /// remove an xattr from an object
1114 void rmattr(const coll_t& cid, const ghobject_t& oid, const char *name) {
1115 string n(name);
1116 rmattr(cid, oid, n);
1117 }
1118 /// remove an xattr from an object
1119 void rmattr(const coll_t& cid, const ghobject_t& oid, const string& s) {
11fdf7f2 1120 using ceph::encode;
7c673cae
FG
1121 Op* _op = _get_next_op();
1122 _op->op = OP_RMATTR;
1123 _op->cid = _get_coll_id(cid);
1124 _op->oid = _get_object_id(oid);
11fdf7f2 1125 encode(s, data_bl);
7c673cae
FG
1126 data.ops++;
1127 }
1128 /// remove all xattrs from an object
1129 void rmattrs(const coll_t& cid, const ghobject_t& oid) {
1130 Op* _op = _get_next_op();
1131 _op->op = OP_RMATTRS;
1132 _op->cid = _get_coll_id(cid);
1133 _op->oid = _get_object_id(oid);
1134 data.ops++;
1135 }
1136 /**
1137 * Clone an object into another object.
1138 *
1139 * Low-cost (e.g., O(1)) cloning (if supported) is best, but
1140 * fallback to an O(n) copy is allowed. All four parts of the
1141 * object are cloned (data, xattrs, omap header, omap
1142 * entries).
1143 *
1144 * The destination named object may already exist, in
1145 * which case its previous contents are discarded.
1146 */
1147 void clone(const coll_t& cid, const ghobject_t& oid,
1148 const ghobject_t& noid) {
1149 Op* _op = _get_next_op();
1150 _op->op = OP_CLONE;
1151 _op->cid = _get_coll_id(cid);
1152 _op->oid = _get_object_id(oid);
1153 _op->dest_oid = _get_object_id(noid);
1154 data.ops++;
1155 }
1156 /**
1157 * Clone a byte range from one object to another.
1158 *
1159 * The data portion of the destination object receives a copy of a
1160 * portion of the data from the source object. None of the other
1161 * three parts of an object is copied from the source.
1162 *
1163 * The destination object size may be extended to the dstoff + len.
1164 *
1165 * The source range *must* overlap with the source object data. If it does
1166 * not the result is undefined.
1167 */
1168 void clone_range(const coll_t& cid, const ghobject_t& oid,
1169 const ghobject_t& noid,
1170 uint64_t srcoff, uint64_t srclen, uint64_t dstoff) {
1171 Op* _op = _get_next_op();
1172 _op->op = OP_CLONERANGE2;
1173 _op->cid = _get_coll_id(cid);
1174 _op->oid = _get_object_id(oid);
1175 _op->dest_oid = _get_object_id(noid);
1176 _op->off = srcoff;
1177 _op->len = srclen;
1178 _op->dest_off = dstoff;
1179 data.ops++;
1180 }
1181
1182 /// Create the collection
1183 void create_collection(const coll_t& cid, int bits) {
1184 Op* _op = _get_next_op();
1185 _op->op = OP_MKCOLL;
1186 _op->cid = _get_coll_id(cid);
1187 _op->split_bits = bits;
1188 data.ops++;
1189 }
1190
1191 /**
1192 * Give the collection a hint.
1193 *
1194 * @param cid - collection id.
1195 * @param type - hint type.
1196 * @param hint - the hint payload, which contains the customized
1197 * data along with the hint type.
1198 */
1199 void collection_hint(const coll_t& cid, uint32_t type, const bufferlist& hint) {
11fdf7f2 1200 using ceph::encode;
7c673cae
FG
1201 Op* _op = _get_next_op();
1202 _op->op = OP_COLL_HINT;
1203 _op->cid = _get_coll_id(cid);
1204 _op->hint_type = type;
11fdf7f2 1205 encode(hint, data_bl);
7c673cae
FG
1206 data.ops++;
1207 }
1208
1209 /// remove the collection, the collection must be empty
1210 void remove_collection(const coll_t& cid) {
1211 Op* _op = _get_next_op();
1212 _op->op = OP_RMCOLL;
1213 _op->cid = _get_coll_id(cid);
1214 data.ops++;
1215 }
11fdf7f2 1216 void collection_move(const coll_t& cid, const coll_t &oldcid, const ghobject_t& oid)
7c673cae
FG
1217 __attribute__ ((deprecated)) {
1218 // NOTE: we encode this as a fixed combo of ADD + REMOVE. they
1219 // always appear together, so this is effectively a single MOVE.
1220 Op* _op = _get_next_op();
1221 _op->op = OP_COLL_ADD;
1222 _op->cid = _get_coll_id(oldcid);
1223 _op->oid = _get_object_id(oid);
1224 _op->dest_cid = _get_coll_id(cid);
1225 data.ops++;
1226
1227 _op = _get_next_op();
1228 _op->op = OP_COLL_REMOVE;
1229 _op->cid = _get_coll_id(oldcid);
1230 _op->oid = _get_object_id(oid);
1231 data.ops++;
1232 }
1233 void collection_move_rename(const coll_t& oldcid, const ghobject_t& oldoid,
11fdf7f2 1234 const coll_t &cid, const ghobject_t& oid) {
7c673cae
FG
1235 Op* _op = _get_next_op();
1236 _op->op = OP_COLL_MOVE_RENAME;
1237 _op->cid = _get_coll_id(oldcid);
1238 _op->oid = _get_object_id(oldoid);
1239 _op->dest_cid = _get_coll_id(cid);
1240 _op->dest_oid = _get_object_id(oid);
1241 data.ops++;
1242 }
11fdf7f2 1243 void try_rename(const coll_t &cid, const ghobject_t& oldoid,
7c673cae
FG
1244 const ghobject_t& oid) {
1245 Op* _op = _get_next_op();
1246 _op->op = OP_TRY_RENAME;
1247 _op->cid = _get_coll_id(cid);
1248 _op->oid = _get_object_id(oldoid);
1249 _op->dest_oid = _get_object_id(oid);
1250 data.ops++;
1251 }
1252
1253 /// Remove omap from oid
1254 void omap_clear(
11fdf7f2 1255 const coll_t &cid, ///< [in] Collection containing oid
7c673cae
FG
1256 const ghobject_t &oid ///< [in] Object from which to remove omap
1257 ) {
1258 Op* _op = _get_next_op();
1259 _op->op = OP_OMAP_CLEAR;
1260 _op->cid = _get_coll_id(cid);
1261 _op->oid = _get_object_id(oid);
1262 data.ops++;
1263 }
1264 /// Set keys on oid omap. Replaces duplicate keys.
1265 void omap_setkeys(
1266 const coll_t& cid, ///< [in] Collection containing oid
1267 const ghobject_t &oid, ///< [in] Object to update
1268 const map<string, bufferlist> &attrset ///< [in] Replacement keys and values
1269 ) {
11fdf7f2 1270 using ceph::encode;
7c673cae
FG
1271 Op* _op = _get_next_op();
1272 _op->op = OP_OMAP_SETKEYS;
1273 _op->cid = _get_coll_id(cid);
1274 _op->oid = _get_object_id(oid);
11fdf7f2 1275 encode(attrset, data_bl);
7c673cae
FG
1276 data.ops++;
1277 }
1278
1279 /// Set keys on an oid omap (bufferlist variant).
1280 void omap_setkeys(
11fdf7f2 1281 const coll_t &cid, ///< [in] Collection containing oid
7c673cae
FG
1282 const ghobject_t &oid, ///< [in] Object to update
1283 const bufferlist &attrset_bl ///< [in] Replacement keys and values
1284 ) {
1285 Op* _op = _get_next_op();
1286 _op->op = OP_OMAP_SETKEYS;
1287 _op->cid = _get_coll_id(cid);
1288 _op->oid = _get_object_id(oid);
1289 data_bl.append(attrset_bl);
1290 data.ops++;
1291 }
1292
1293 /// Remove keys from oid omap
1294 void omap_rmkeys(
11fdf7f2 1295 const coll_t &cid, ///< [in] Collection containing oid
7c673cae
FG
1296 const ghobject_t &oid, ///< [in] Object from which to remove the omap
1297 const set<string> &keys ///< [in] Keys to clear
1298 ) {
11fdf7f2 1299 using ceph::encode;
7c673cae
FG
1300 Op* _op = _get_next_op();
1301 _op->op = OP_OMAP_RMKEYS;
1302 _op->cid = _get_coll_id(cid);
1303 _op->oid = _get_object_id(oid);
11fdf7f2 1304 encode(keys, data_bl);
7c673cae
FG
1305 data.ops++;
1306 }
1307
1308 /// Remove keys from oid omap
1309 void omap_rmkeys(
11fdf7f2 1310 const coll_t &cid, ///< [in] Collection containing oid
7c673cae
FG
1311 const ghobject_t &oid, ///< [in] Object from which to remove the omap
1312 const bufferlist &keys_bl ///< [in] Keys to clear
1313 ) {
1314 Op* _op = _get_next_op();
1315 _op->op = OP_OMAP_RMKEYS;
1316 _op->cid = _get_coll_id(cid);
1317 _op->oid = _get_object_id(oid);
1318 data_bl.append(keys_bl);
1319 data.ops++;
1320 }
1321
1322 /// Remove key range from oid omap
1323 void omap_rmkeyrange(
11fdf7f2 1324 const coll_t &cid, ///< [in] Collection containing oid
7c673cae
FG
1325 const ghobject_t &oid, ///< [in] Object from which to remove the omap keys
1326 const string& first, ///< [in] first key in range
1327 const string& last ///< [in] first key past range, range is [first,last)
1328 ) {
11fdf7f2 1329 using ceph::encode;
7c673cae
FG
1330 Op* _op = _get_next_op();
1331 _op->op = OP_OMAP_RMKEYRANGE;
1332 _op->cid = _get_coll_id(cid);
1333 _op->oid = _get_object_id(oid);
11fdf7f2
TL
1334 encode(first, data_bl);
1335 encode(last, data_bl);
7c673cae
FG
1336 data.ops++;
1337 }
1338
1339 /// Set omap header
1340 void omap_setheader(
11fdf7f2 1341 const coll_t &cid, ///< [in] Collection containing oid
7c673cae
FG
1342 const ghobject_t &oid, ///< [in] Object
1343 const bufferlist &bl ///< [in] Header value
1344 ) {
11fdf7f2 1345 using ceph::encode;
7c673cae
FG
1346 Op* _op = _get_next_op();
1347 _op->op = OP_OMAP_SETHEADER;
1348 _op->cid = _get_coll_id(cid);
1349 _op->oid = _get_object_id(oid);
11fdf7f2 1350 encode(bl, data_bl);
7c673cae
FG
1351 data.ops++;
1352 }
1353
1354 /// Split collection based on given prefixes, objects matching the specified bits/rem are
1355 /// moved to the new collection
1356 void split_collection(
11fdf7f2 1357 const coll_t &cid,
7c673cae
FG
1358 uint32_t bits,
1359 uint32_t rem,
11fdf7f2 1360 const coll_t &destination) {
7c673cae
FG
1361 Op* _op = _get_next_op();
1362 _op->op = OP_SPLIT_COLLECTION2;
1363 _op->cid = _get_coll_id(cid);
1364 _op->dest_cid = _get_coll_id(destination);
1365 _op->split_bits = bits;
1366 _op->split_rem = rem;
1367 data.ops++;
1368 }
1369
11fdf7f2
TL
1370 /// Merge collection into another.
1371 void merge_collection(
7c673cae 1372 coll_t cid,
11fdf7f2
TL
1373 coll_t destination,
1374 uint32_t bits) {
1375 Op* _op = _get_next_op();
1376 _op->op = OP_MERGE_COLLECTION;
1377 _op->cid = _get_coll_id(cid);
1378 _op->dest_cid = _get_coll_id(destination);
1379 _op->split_bits = bits;
1380 data.ops++;
1381 }
1382
1383 void collection_set_bits(
1384 const coll_t &cid,
7c673cae
FG
1385 int bits) {
1386 Op* _op = _get_next_op();
1387 _op->op = OP_COLL_SET_BITS;
1388 _op->cid = _get_coll_id(cid);
1389 _op->split_bits = bits;
1390 data.ops++;
1391 }
1392
1393 /// Set allocation hint for an object
1394 /// make 0 values(expected_object_size, expected_write_size) noops for all implementations
1395 void set_alloc_hint(
11fdf7f2 1396 const coll_t &cid,
7c673cae
FG
1397 const ghobject_t &oid,
1398 uint64_t expected_object_size,
1399 uint64_t expected_write_size,
1400 uint32_t flags
1401 ) {
1402 Op* _op = _get_next_op();
1403 _op->op = OP_SETALLOCHINT;
1404 _op->cid = _get_coll_id(cid);
1405 _op->oid = _get_object_id(oid);
1406 _op->expected_object_size = expected_object_size;
1407 _op->expected_write_size = expected_write_size;
1408 _op->alloc_hint_flags = flags;
1409 data.ops++;
1410 }
1411
1412 void encode(bufferlist& bl) const {
1413 //layout: data_bl + op_bl + coll_index + object_index + data
1414 ENCODE_START(9, 9, bl);
11fdf7f2
TL
1415 encode(data_bl, bl);
1416 encode(op_bl, bl);
1417 encode(coll_index, bl);
1418 encode(object_index, bl);
7c673cae
FG
1419 data.encode(bl);
1420 ENCODE_FINISH(bl);
1421 }
1422
11fdf7f2 1423 void decode(bufferlist::const_iterator &bl) {
7c673cae
FG
1424 DECODE_START(9, bl);
1425 DECODE_OLDEST(9);
1426
11fdf7f2
TL
1427 decode(data_bl, bl);
1428 decode(op_bl, bl);
1429 decode(coll_index, bl);
1430 decode(object_index, bl);
7c673cae
FG
1431 data.decode(bl);
1432 coll_id = coll_index.size();
1433 object_id = object_index.size();
1434
1435 DECODE_FINISH(bl);
1436 }
1437
1438 void dump(ceph::Formatter *f);
1439 static void generate_test_instances(list<Transaction*>& o);
1440 };
1441
11fdf7f2
TL
1442 int queue_transaction(CollectionHandle& ch,
1443 Transaction&& t,
1444 TrackedOpRef op = TrackedOpRef(),
1445 ThreadPool::TPHandle *handle = NULL) {
7c673cae
FG
1446 vector<Transaction> tls;
1447 tls.push_back(std::move(t));
11fdf7f2 1448 return queue_transactions(ch, tls, op, handle);
7c673cae
FG
1449 }
1450
1451 virtual int queue_transactions(
11fdf7f2 1452 CollectionHandle& ch, vector<Transaction>& tls,
7c673cae
FG
1453 TrackedOpRef op = TrackedOpRef(),
1454 ThreadPool::TPHandle *handle = NULL) = 0;
1455
1456
7c673cae
FG
1457 public:
1458 ObjectStore(CephContext* cct,
1459 const std::string& path_) : path(path_), cct(cct) {}
1460 virtual ~ObjectStore() {}
1461
1462 // no copying
1463 explicit ObjectStore(const ObjectStore& o) = delete;
1464 const ObjectStore& operator=(const ObjectStore& o) = delete;
1465
1466 // versioning
1467 virtual int upgrade() {
1468 return 0;
1469 }
1470
1471 virtual void get_db_statistics(Formatter *f) { }
1472 virtual void generate_db_histogram(Formatter *f) { }
11fdf7f2 1473 virtual int flush_cache(ostream *os = NULL) { return -1; }
7c673cae 1474 virtual void dump_perf_counters(Formatter *f) {}
11fdf7f2
TL
1475 virtual void dump_cache_stats(Formatter *f) {}
1476 virtual void dump_cache_stats(ostream& os) {}
7c673cae
FG
1477
1478 virtual string get_type() = 0;
1479
1480 // mgmt
1481 virtual bool test_mount_in_use() = 0;
1482 virtual int mount() = 0;
1483 virtual int umount() = 0;
1484 virtual int fsck(bool deep) {
1485 return -EOPNOTSUPP;
1486 }
3efd9988
FG
1487 virtual int repair(bool deep) {
1488 return -EOPNOTSUPP;
1489 }
7c673cae
FG
1490
1491 virtual void set_cache_shards(unsigned num) { }
1492
1493 /**
1494 * Returns 0 if the hobject is valid, -error otherwise
1495 *
1496 * Errors:
1497 * -ENAMETOOLONG: locator/namespace/name too large
1498 */
1499 virtual int validate_hobject_key(const hobject_t &obj) const = 0;
1500
1501 virtual unsigned get_max_attr_name_length() = 0;
1502 virtual int mkfs() = 0; // wipe
1503 virtual int mkjournal() = 0; // journal only
1504 virtual bool needs_journal() = 0; //< requires a journal
1505 virtual bool wants_journal() = 0; //< prefers a journal
1506 virtual bool allows_journal() = 0; //< allows a journal
1507
11fdf7f2
TL
1508 /// enumerate hardware devices (by 'devname', e.g., 'sda' as in /sys/block/sda)
1509 virtual int get_devices(std::set<string> *devls) {
1510 return -EOPNOTSUPP;
1511 }
1512
1513 /// true if a txn is readable immediately after it is queued.
1514 virtual bool is_sync_onreadable() const {
1515 return true;
1516 }
1517
31f18b77
FG
1518 /**
1519 * is_rotational
1520 *
1521 * Check whether store is backed by a rotational (HDD) or non-rotational
1522 * (SSD) device.
1523 *
1524 * This must be usable *before* the store is mounted.
1525 *
1526 * @return true for HDD, false for SSD
1527 */
1528 virtual bool is_rotational() {
1529 return true;
1530 }
1531
d2e6a577
FG
1532 /**
1533 * is_journal_rotational
1534 *
1535 * Check whether journal is backed by a rotational (HDD) or non-rotational
1536 * (SSD) device.
1537 *
1538 *
1539 * @return true for HDD, false for SSD
1540 */
1541 virtual bool is_journal_rotational() {
1542 return true;
1543 }
1544
224ce89b
WB
1545 virtual string get_default_device_class() {
1546 return is_rotational() ? "hdd" : "ssd";
1547 }
1548
11fdf7f2
TL
1549 virtual int get_numa_node(
1550 int *numa_node,
1551 set<int> *nodes,
1552 set<string> *failed) {
1553 return -EOPNOTSUPP;
1554 }
1555
1556
7c673cae
FG
1557 virtual bool can_sort_nibblewise() {
1558 return false; // assume a backend cannot, unless it says otherwise
1559 }
1560
11fdf7f2
TL
1561 virtual int statfs(struct store_statfs_t *buf,
1562 osd_alert_list_t* alerts = nullptr) = 0;
1563 virtual int pool_statfs(uint64_t pool_id, struct store_statfs_t *buf) = 0;
7c673cae
FG
1564
1565 virtual void collect_metadata(map<string,string> *pm) { }
1566
1567 /**
1568 * write_meta - write a simple configuration key out-of-band
1569 *
1570 * Write a simple key/value pair for basic store configuration
1571 * (e.g., a uuid or magic number) to an unopened/unmounted store.
1572 * The default implementation writes this to a plaintext file in the
1573 * path.
1574 *
1575 * A newline is appended.
1576 *
1577 * @param key key name (e.g., "fsid")
1578 * @param value value (e.g., a uuid rendered as a string)
1579 * @returns 0 for success, or an error code
1580 */
1581 virtual int write_meta(const std::string& key,
1582 const std::string& value);
1583
1584 /**
1585 * read_meta - read a simple configuration key out-of-band
1586 *
1587 * Read a simple key value to an unopened/mounted store.
1588 *
1589 * Trailing whitespace is stripped off.
1590 *
1591 * @param key key name
1592 * @param value pointer to value string
1593 * @returns 0 for success, or an error code
1594 */
1595 virtual int read_meta(const std::string& key,
1596 std::string *value);
1597
1598 /**
1599 * get ideal max value for collection_list()
1600 *
1601 * default to some arbitrary values; the implementation will override.
1602 */
1603 virtual int get_ideal_list_max() { return 64; }
1604
1605
1606 /**
1607 * get a collection handle
1608 *
1609 * Provide a trivial handle as a default to avoid converting legacy
1610 * implementations.
1611 */
11fdf7f2
TL
1612 virtual CollectionHandle open_collection(const coll_t &cid) = 0;
1613
1614 /**
1615 * get a collection handle for a soon-to-be-created collection
1616 *
1617 * This handle must be used by queue_transaction that includes a
1618 * create_collection call in order to become valid. It will become the
1619 * reference to the created collection.
1620 */
1621 virtual CollectionHandle create_new_collection(const coll_t &cid) = 0;
7c673cae 1622
11fdf7f2
TL
1623 /**
1624 * set ContextQueue for a collection
1625 *
1626 * After that, oncommits of Transaction will queue into commit_queue.
1627 * And osd ShardThread will call oncommits.
1628 */
1629 virtual void set_collection_commit_queue(const coll_t &cid, ContextQueue *commit_queue) = 0;
7c673cae
FG
1630
1631 /**
1632 * Synchronous read operations
1633 */
1634
1635 /**
1636 * exists -- Test for existance of object
1637 *
1638 * @param cid collection for object
1639 * @param oid oid of object
1640 * @returns true if object exists, false otherwise
1641 */
11fdf7f2 1642 virtual bool exists(CollectionHandle& c, const ghobject_t& oid) = 0;
7c673cae
FG
1643 /**
1644 * set_collection_opts -- set pool options for a collectioninformation for an object
1645 *
1646 * @param cid collection
1647 * @param opts new collection options
1648 * @returns 0 on success, negative error code on failure.
1649 */
1650 virtual int set_collection_opts(
11fdf7f2 1651 CollectionHandle& c,
7c673cae
FG
1652 const pool_opts_t& opts) = 0;
1653
1654 /**
1655 * stat -- get information for an object
1656 *
1657 * @param cid collection for object
1658 * @param oid oid of object
1659 * @param st output information for the object
1660 * @param allow_eio if false, assert on -EIO operation failure
1661 * @returns 0 on success, negative error code on failure.
1662 */
7c673cae
FG
1663 virtual int stat(
1664 CollectionHandle &c,
1665 const ghobject_t& oid,
1666 struct stat *st,
11fdf7f2 1667 bool allow_eio = false) = 0;
7c673cae
FG
1668 /**
1669 * read -- read a byte range of data from an object
1670 *
1671 * Note: if reading from an offset past the end of the object, we
1672 * return 0 (not, say, -EINVAL).
1673 *
1674 * @param cid collection for object
1675 * @param oid oid of object
1676 * @param offset location offset of first byte to be read
1677 * @param len number of bytes to be read
1678 * @param bl output bufferlist
1679 * @param op_flags is CEPH_OSD_OP_FLAG_*
7c673cae
FG
1680 * @returns number of bytes read on success, or negative error code on failure.
1681 */
7c673cae
FG
1682 virtual int read(
1683 CollectionHandle &c,
1684 const ghobject_t& oid,
1685 uint64_t offset,
1686 size_t len,
1687 bufferlist& bl,
11fdf7f2 1688 uint32_t op_flags = 0) = 0;
7c673cae
FG
1689
1690 /**
1691 * fiemap -- get extent map of data of an object
1692 *
1693 * Returns an encoded map of the extents of an object's data portion
1694 * (map<offset,size>).
1695 *
1696 * A non-enlightened implementation is free to return the extent (offset, len)
1697 * as the sole extent.
1698 *
1699 * @param cid collection for object
1700 * @param oid oid of object
1701 * @param offset location offset of first byte to be read
1702 * @param len number of bytes to be read
1703 * @param bl output bufferlist for extent map information.
1704 * @returns 0 on success, negative error code on failure.
1705 */
7c673cae 1706 virtual int fiemap(CollectionHandle& c, const ghobject_t& oid,
11fdf7f2 1707 uint64_t offset, size_t len, bufferlist& bl) = 0;
7c673cae 1708 virtual int fiemap(CollectionHandle& c, const ghobject_t& oid,
11fdf7f2 1709 uint64_t offset, size_t len, map<uint64_t, uint64_t>& destmap) = 0;
7c673cae
FG
1710
1711 /**
1712 * getattr -- get an xattr of an object
1713 *
1714 * @param cid collection for object
1715 * @param oid oid of object
1716 * @param name name of attr to read
1717 * @param value place to put output result.
1718 * @returns 0 on success, negative error code on failure.
1719 */
7c673cae 1720 virtual int getattr(CollectionHandle &c, const ghobject_t& oid,
11fdf7f2 1721 const char *name, bufferptr& value) = 0;
7c673cae
FG
1722
1723 /**
1724 * getattr -- get an xattr of an object
1725 *
1726 * @param cid collection for object
1727 * @param oid oid of object
1728 * @param name name of attr to read
1729 * @param value place to put output result.
1730 * @returns 0 on success, negative error code on failure.
1731 */
7c673cae
FG
1732 int getattr(
1733 CollectionHandle &c, const ghobject_t& oid,
1734 const string& name, bufferlist& value) {
1735 bufferptr bp;
1736 int r = getattr(c, oid, name.c_str(), bp);
1737 value.push_back(bp);
1738 return r;
1739 }
1740
1741 /**
1742 * getattrs -- get all of the xattrs of an object
1743 *
1744 * @param cid collection for object
1745 * @param oid oid of object
1746 * @param aset place to put output result.
1747 * @returns 0 on success, negative error code on failure.
1748 */
7c673cae 1749 virtual int getattrs(CollectionHandle &c, const ghobject_t& oid,
11fdf7f2 1750 map<string,bufferptr>& aset) = 0;
7c673cae
FG
1751
1752 /**
1753 * getattrs -- get all of the xattrs of an object
1754 *
1755 * @param cid collection for object
1756 * @param oid oid of object
1757 * @param aset place to put output result.
1758 * @returns 0 on success, negative error code on failure.
1759 */
7c673cae
FG
1760 int getattrs(CollectionHandle &c, const ghobject_t& oid,
1761 map<string,bufferlist>& aset) {
1762 map<string,bufferptr> bmap;
1763 int r = getattrs(c, oid, bmap);
1764 for (map<string,bufferptr>::iterator i = bmap.begin();
1765 i != bmap.end();
1766 ++i) {
1767 aset[i->first].append(i->second);
1768 }
1769 return r;
1770 }
1771
1772
1773 // collections
1774
1775 /**
1776 * list_collections -- get all of the collections known to this ObjectStore
1777 *
1778 * @param ls list of the collections in sorted order.
1779 * @returns 0 on success, negative error code on failure.
1780 */
1781 virtual int list_collections(vector<coll_t>& ls) = 0;
1782
1783 /**
1784 * does a collection exist?
1785 *
1786 * @param c collection
1787 * @returns true if it exists, false otherwise
1788 */
1789 virtual bool collection_exists(const coll_t& c) = 0;
1790
1791 /**
1792 * is a collection empty?
1793 *
1794 * @param c collection
1795 * @param empty true if the specified collection is empty, false otherwise
1796 * @returns 0 on success, negative error code on failure.
1797 */
11fdf7f2 1798 virtual int collection_empty(CollectionHandle& c, bool *empty) = 0;
7c673cae
FG
1799
1800 /**
1801 * return the number of significant bits of the coll_t::pgid.
1802 *
1803 * This should return what the last create_collection or split_collection
1804 * set. A legacy backend may return -EAGAIN if the value is unavailable
1805 * (because we upgraded from an older version, e.g., FileStore).
1806 */
11fdf7f2 1807 virtual int collection_bits(CollectionHandle& c) = 0;
7c673cae
FG
1808
1809
1810 /**
1811 * list contents of a collection that fall in the range [start, end) and no more than a specified many result
1812 *
1813 * @param c collection
1814 * @param start list object that sort >= this value
1815 * @param end list objects that sort < this value
1816 * @param max return no more than this many results
1817 * @param seq return no objects with snap < seq
1818 * @param ls [out] result
1819 * @param next [out] next item sorts >= this value
1820 * @return zero on success, or negative error
1821 */
7c673cae
FG
1822 virtual int collection_list(CollectionHandle &c,
1823 const ghobject_t& start, const ghobject_t& end,
1824 int max,
11fdf7f2 1825 vector<ghobject_t> *ls, ghobject_t *next) = 0;
7c673cae
FG
1826
1827
1828 /// OMAP
1829 /// Get omap contents
7c673cae
FG
1830 virtual int omap_get(
1831 CollectionHandle &c, ///< [in] Collection containing oid
1832 const ghobject_t &oid, ///< [in] Object containing omap
1833 bufferlist *header, ///< [out] omap header
1834 map<string, bufferlist> *out /// < [out] Key to value map
11fdf7f2 1835 ) = 0;
7c673cae
FG
1836
1837 /// Get omap header
7c673cae
FG
1838 virtual int omap_get_header(
1839 CollectionHandle &c, ///< [in] Collection containing oid
1840 const ghobject_t &oid, ///< [in] Object containing omap
1841 bufferlist *header, ///< [out] omap header
1842 bool allow_eio = false ///< [in] don't assert on eio
11fdf7f2 1843 ) = 0;
7c673cae
FG
1844
1845 /// Get keys defined on oid
7c673cae
FG
1846 virtual int omap_get_keys(
1847 CollectionHandle &c, ///< [in] Collection containing oid
1848 const ghobject_t &oid, ///< [in] Object containing omap
1849 set<string> *keys ///< [out] Keys defined on oid
11fdf7f2 1850 ) = 0;
7c673cae
FG
1851
1852 /// Get key values
7c673cae
FG
1853 virtual int omap_get_values(
1854 CollectionHandle &c, ///< [in] Collection containing oid
1855 const ghobject_t &oid, ///< [in] Object containing omap
1856 const set<string> &keys, ///< [in] Keys to get
1857 map<string, bufferlist> *out ///< [out] Returned keys and values
11fdf7f2 1858 ) = 0;
7c673cae
FG
1859
1860 /// Filters keys into out which are defined on oid
7c673cae
FG
1861 virtual int omap_check_keys(
1862 CollectionHandle &c, ///< [in] Collection containing oid
1863 const ghobject_t &oid, ///< [in] Object containing omap
1864 const set<string> &keys, ///< [in] Keys to check
1865 set<string> *out ///< [out] Subset of keys defined on oid
11fdf7f2 1866 ) = 0;
7c673cae
FG
1867
1868 /**
1869 * Returns an object map iterator
1870 *
1871 * Warning! The returned iterator is an implicit lock on filestore
1872 * operations in c. Do not use filestore methods on c while the returned
1873 * iterator is live. (Filling in a transaction is no problem).
1874 *
1875 * @return iterator, null on error
1876 */
7c673cae
FG
1877 virtual ObjectMap::ObjectMapIterator get_omap_iterator(
1878 CollectionHandle &c, ///< [in] collection
1879 const ghobject_t &oid ///< [in] object
11fdf7f2 1880 ) = 0;
7c673cae
FG
1881
1882 virtual int flush_journal() { return -EOPNOTSUPP; }
1883
1884 virtual int dump_journal(ostream& out) { return -EOPNOTSUPP; }
1885
1886 virtual int snapshot(const string& name) { return -EOPNOTSUPP; }
1887
1888 /**
1889 * Set and get internal fsid for this instance. No external data is modified
1890 */
1891 virtual void set_fsid(uuid_d u) = 0;
1892 virtual uuid_d get_fsid() = 0;
1893
1894 /**
1895 * Estimates additional disk space used by the specified amount of objects and caused by file allocation granularity and metadata store
1896 * - num objects - total (including witeouts) object count to measure used space for.
1897 */
1898 virtual uint64_t estimate_objects_overhead(uint64_t num_objects) = 0;
1899
1900
1901 // DEBUG
1902 virtual void inject_data_error(const ghobject_t &oid) {}
1903 virtual void inject_mdata_error(const ghobject_t &oid) {}
224ce89b
WB
1904
1905 virtual void compact() {}
28e407b8
AA
1906 virtual bool has_builtin_csum() const {
1907 return false;
1908 }
7c673cae
FG
1909};
1910WRITE_CLASS_ENCODER(ObjectStore::Transaction)
1911WRITE_CLASS_ENCODER(ObjectStore::Transaction::TransactionData)
1912
7c673cae
FG
1913ostream& operator<<(ostream& out, const ObjectStore::Transaction& tx);
1914
1915#endif