]> git.proxmox.com Git - ceph.git/blame - ceph/src/os/Transaction.h
import 15.2.9
[ceph.git] / ceph / src / os / Transaction.h
CommitLineData
9f95a23c 1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
11fdf7f2
TL
2// vim: ts=8 sw=2 smarttab
3
9f95a23c
TL
4#pragma once
5
11fdf7f2
TL
6#include <map>
7
8#include "include/int_types.h"
9#include "include/buffer.h"
10#include "osd/osd_types.h"
11
9f95a23c
TL
12#define OPS_PER_PTR 32
13
14void decode_str_str_map_to_bl(ceph::buffer::list::const_iterator& p, ceph::buffer::list *out);
15void decode_str_set_to_bl(ceph::buffer::list::const_iterator& p, ceph::buffer::list *out);
16
17
11fdf7f2
TL
18/*********************************
19 * transaction
20 *
21 * A Transaction represents a sequence of primitive mutation
22 * operations.
23 *
24 * Three events in the life of a Transaction result in
25 * callbacks. Any Transaction can contain any number of callback
26 * objects (Context) for any combination of the three classes of
27 * callbacks:
28 *
29 * on_applied_sync, on_applied, and on_commit.
30 *
31 * The "on_applied" and "on_applied_sync" callbacks are invoked when
32 * the modifications requested by the Transaction are visible to
33 * subsequent ObjectStore operations, i.e., the results are
34 * readable. The only conceptual difference between on_applied and
35 * on_applied_sync is the specific thread and locking environment in
36 * which the callbacks operate. "on_applied_sync" is called
37 * directly by an ObjectStore execution thread. It is expected to
38 * execute quickly and must not acquire any locks of the calling
39 * environment. Conversely, "on_applied" is called from the separate
40 * Finisher thread, meaning that it can contend for calling
41 * environment locks. NB, on_applied and on_applied_sync are
42 * sometimes called on_readable and on_readable_sync.
43 *
44 * The "on_commit" callback is also called from the Finisher thread
45 * and indicates that all of the mutations have been durably
46 * committed to stable storage (i.e., are now software/hardware
47 * crashproof).
48 *
49 * At the implementation level, each mutation primitive (and its
50 * associated data) can be serialized to a single buffer. That
51 * serialization, however, does not copy any data, but (using the
9f95a23c 52 * ceph::buffer::list library) will reference the original buffers. This
11fdf7f2
TL
53 * implies that the buffer that contains the data being submitted
54 * must remain stable until the on_commit callback completes. In
9f95a23c 55 * practice, ceph::buffer::list handles all of this for you and this
11fdf7f2
TL
56 * subtlety is only relevant if you are referencing an existing
57 * buffer via buffer::raw_static.
58 *
59 * Some implementations of ObjectStore choose to implement their own
60 * form of journaling that uses the serialized form of a
61 * Transaction. This requires that the encode/decode logic properly
62 * version itself and handle version upgrades that might change the
63 * format of the encoded Transaction. This has already happened a
64 * couple of times and the Transaction object contains some helper
65 * variables that aid in this legacy decoding:
66 *
67 * sobject_encoding detects an older/simpler version of oid
68 * present in pre-bobtail versions of ceph. use_pool_override
69 * also detects a situation where the pool of an oid can be
70 * overridden for legacy operations/buffers. For non-legacy
71 * implementations of ObjectStore, neither of these fields are
72 * relevant.
73 *
74 *
75 * TRANSACTION ISOLATION
76 *
77 * Except as noted above, isolation is the responsibility of the
78 * caller. In other words, if any storage element (storage element
79 * == any of the four portions of an object as described above) is
80 * altered by a transaction (including deletion), the caller
81 * promises not to attempt to read that element while the
82 * transaction is pending (here pending means from the time of
83 * issuance until the "on_applied_sync" callback has been
84 * received). Violations of isolation need not be detected by
85 * ObjectStore and there is no corresponding error mechanism for
86 * reporting an isolation violation (crashing would be the
87 * appropriate way to report an isolation violation if detected).
88 *
89 * Enumeration operations may violate transaction isolation as
90 * described above when a storage element is being created or
91 * deleted as part of a transaction. In this case, ObjectStore is
92 * allowed to consider the enumeration operation to either precede
93 * or follow the violating transaction element. In other words, the
94 * presence/absence of the mutated element in the enumeration is
95 * entirely at the discretion of ObjectStore. The arbitrary ordering
96 * applies independently to each transaction element. For example,
97 * if a transaction contains two mutating elements "create A" and
98 * "delete B". And an enumeration operation is performed while this
99 * transaction is pending. It is permissible for ObjectStore to
100 * report any of the four possible combinations of the existence of
101 * A and B.
102 *
103 */
104namespace ceph::os {
105class Transaction {
106public:
107 enum {
108 OP_NOP = 0,
9f95a23c 109 OP_CREATE = 7, // cid, oid
11fdf7f2
TL
110 OP_TOUCH = 9, // cid, oid
111 OP_WRITE = 10, // cid, oid, offset, len, bl
112 OP_ZERO = 11, // cid, oid, offset, len
113 OP_TRUNCATE = 12, // cid, oid, len
114 OP_REMOVE = 13, // cid, oid
115 OP_SETATTR = 14, // cid, oid, attrname, bl
116 OP_SETATTRS = 15, // cid, oid, attrset
117 OP_RMATTR = 16, // cid, oid, attrname
118 OP_CLONE = 17, // cid, oid, newoid
119 OP_CLONERANGE = 18, // cid, oid, newoid, offset, len
120 OP_CLONERANGE2 = 30, // cid, oid, newoid, srcoff, len, dstoff
121
122 OP_TRIMCACHE = 19, // cid, oid, offset, len **DEPRECATED**
123
124 OP_MKCOLL = 20, // cid
125 OP_RMCOLL = 21, // cid
126 OP_COLL_ADD = 22, // cid, oldcid, oid
127 OP_COLL_REMOVE = 23, // cid, oid
128 OP_COLL_SETATTR = 24, // cid, attrname, bl
129 OP_COLL_RMATTR = 25, // cid, attrname
130 OP_COLL_SETATTRS = 26, // cid, attrset
131 OP_COLL_MOVE = 8, // newcid, oldcid, oid
132
133 OP_RMATTRS = 28, // cid, oid
134 OP_COLL_RENAME = 29, // cid, newcid
135
136 OP_OMAP_CLEAR = 31, // cid
137 OP_OMAP_SETKEYS = 32, // cid, attrset
138 OP_OMAP_RMKEYS = 33, // cid, keyset
139 OP_OMAP_SETHEADER = 34, // cid, header
140 OP_SPLIT_COLLECTION = 35, // cid, bits, destination
141 OP_SPLIT_COLLECTION2 = 36, /* cid, bits, destination
9f95a23c 142 doesn't create the destination */
11fdf7f2
TL
143 OP_OMAP_RMKEYRANGE = 37, // cid, oid, firstkey, lastkey
144 OP_COLL_MOVE_RENAME = 38, // oldcid, oldoid, newcid, newoid
9f95a23c 145
11fdf7f2
TL
146 OP_SETALLOCHINT = 39, // cid, oid, object_size, write_size
147 OP_COLL_HINT = 40, // cid, type, bl
148
149 OP_TRY_RENAME = 41, // oldcid, oldoid, newoid
150
151 OP_COLL_SET_BITS = 42, // cid, bits
152
153 OP_MERGE_COLLECTION = 43, // cid, destination
154 };
155
156 // Transaction hint type
157 enum {
158 COLL_HINT_EXPECTED_NUM_OBJECTS = 1,
159 };
160
161 struct Op {
eafe8130
TL
162 ceph_le32 op;
163 ceph_le32 cid;
164 ceph_le32 oid;
165 ceph_le64 off;
166 ceph_le64 len;
167 ceph_le32 dest_cid;
168 ceph_le32 dest_oid; //OP_CLONE, OP_CLONERANGE
169 ceph_le64 dest_off; //OP_CLONERANGE
11fdf7f2 170 union {
9f95a23c
TL
171 struct {
172 ceph_le32 hint_type; //OP_COLL_HINT
173 } __attribute__ ((packed));
174 struct {
175 ceph_le32 alloc_hint_flags; //OP_SETALLOCHINT
176 } __attribute__ ((packed));
177 } __attribute__ ((packed));
eafe8130
TL
178 ceph_le64 expected_object_size; //OP_SETALLOCHINT
179 ceph_le64 expected_write_size; //OP_SETALLOCHINT
180 ceph_le32 split_bits; //OP_SPLIT_COLLECTION2,OP_COLL_SET_BITS,
11fdf7f2 181 //OP_MKCOLL
eafe8130 182 ceph_le32 split_rem; //OP_SPLIT_COLLECTION2
11fdf7f2
TL
183 } __attribute__ ((packed)) ;
184
185 struct TransactionData {
eafe8130
TL
186 ceph_le64 ops;
187 ceph_le32 largest_data_len;
188 ceph_le32 largest_data_off;
189 ceph_le32 largest_data_off_in_data_bl;
190 ceph_le32 fadvise_flags;
11fdf7f2
TL
191
192 TransactionData() noexcept :
9f95a23c 193 ops(init_le64(0)),
eafe8130
TL
194 largest_data_len(init_le32(0)),
195 largest_data_off(init_le32(0)),
196 largest_data_off_in_data_bl(init_le32(0)),
9f95a23c 197 fadvise_flags(init_le32(0)) { }
11fdf7f2 198
9f95a23c 199 // override default move operations to reset default values
11fdf7f2 200 TransactionData(TransactionData&& other) noexcept :
9f95a23c 201 ops(other.ops),
11fdf7f2
TL
202 largest_data_len(other.largest_data_len),
203 largest_data_off(other.largest_data_off),
204 largest_data_off_in_data_bl(other.largest_data_off_in_data_bl),
205 fadvise_flags(other.fadvise_flags) {
206 other.ops = 0;
207 other.largest_data_len = 0;
208 other.largest_data_off = 0;
209 other.largest_data_off_in_data_bl = 0;
210 other.fadvise_flags = 0;
211 }
212 TransactionData& operator=(TransactionData&& other) noexcept {
213 ops = other.ops;
214 largest_data_len = other.largest_data_len;
215 largest_data_off = other.largest_data_off;
216 largest_data_off_in_data_bl = other.largest_data_off_in_data_bl;
217 fadvise_flags = other.fadvise_flags;
218 other.ops = 0;
219 other.largest_data_len = 0;
220 other.largest_data_off = 0;
221 other.largest_data_off_in_data_bl = 0;
222 other.fadvise_flags = 0;
223 return *this;
224 }
225
226 TransactionData(const TransactionData& other) = default;
227 TransactionData& operator=(const TransactionData& other) = default;
228
9f95a23c 229 void encode(ceph::buffer::list& bl) const {
11fdf7f2
TL
230 bl.append((char*)this, sizeof(TransactionData));
231 }
9f95a23c 232 void decode(ceph::buffer::list::const_iterator &bl) {
11fdf7f2
TL
233 bl.copy(sizeof(TransactionData), (char*)this);
234 }
235 } __attribute__ ((packed)) ;
236
237private:
238 TransactionData data;
239
9f95a23c
TL
240 std::map<coll_t, uint32_t> coll_index;
241 std::map<ghobject_t, uint32_t> object_index;
11fdf7f2 242
9f95a23c
TL
243 uint32_t coll_id = 0;
244 uint32_t object_id = 0;
11fdf7f2 245
9f95a23c
TL
246 ceph::buffer::list data_bl;
247 ceph::buffer::list op_bl;
11fdf7f2
TL
248
249 std::list<Context *> on_applied;
250 std::list<Context *> on_commit;
251 std::list<Context *> on_applied_sync;
252
253public:
254 Transaction() = default;
255
9f95a23c 256 explicit Transaction(ceph::buffer::list::const_iterator &dp) {
11fdf7f2
TL
257 decode(dp);
258 }
9f95a23c 259 explicit Transaction(ceph::buffer::list &nbl) {
11fdf7f2
TL
260 auto dp = nbl.cbegin();
261 decode(dp);
262 }
263
264 // override default move operations to reset default values
265 Transaction(Transaction&& other) noexcept :
266 data(std::move(other.data)),
267 coll_index(std::move(other.coll_index)),
268 object_index(std::move(other.object_index)),
269 coll_id(other.coll_id),
270 object_id(other.object_id),
271 data_bl(std::move(other.data_bl)),
272 op_bl(std::move(other.op_bl)),
273 on_applied(std::move(other.on_applied)),
274 on_commit(std::move(other.on_commit)),
275 on_applied_sync(std::move(other.on_applied_sync)) {
276 other.coll_id = 0;
277 other.object_id = 0;
278 }
279
280 Transaction& operator=(Transaction&& other) noexcept {
281 data = std::move(other.data);
282 coll_index = std::move(other.coll_index);
283 object_index = std::move(other.object_index);
284 coll_id = other.coll_id;
285 object_id = other.object_id;
286 data_bl = std::move(other.data_bl);
287 op_bl = std::move(other.op_bl);
288 on_applied = std::move(other.on_applied);
289 on_commit = std::move(other.on_commit);
290 on_applied_sync = std::move(other.on_applied_sync);
291 other.coll_id = 0;
292 other.object_id = 0;
293 return *this;
294 }
295
296 Transaction(const Transaction& other) = default;
297 Transaction& operator=(const Transaction& other) = default;
298
299 // expose object_index for FileStore::Op's benefit
9f95a23c 300 const std::map<ghobject_t, uint32_t>& get_object_index() const {
11fdf7f2
TL
301 return object_index;
302 }
303
304 /* Operations on callback contexts */
305 void register_on_applied(Context *c) {
306 if (!c) return;
307 on_applied.push_back(c);
308 }
309 void register_on_commit(Context *c) {
310 if (!c) return;
311 on_commit.push_back(c);
312 }
313 void register_on_applied_sync(Context *c) {
314 if (!c) return;
315 on_applied_sync.push_back(c);
316 }
317 void register_on_complete(Context *c) {
318 if (!c) return;
319 RunOnDeleteRef _complete (std::make_shared<RunOnDelete>(c));
320 register_on_applied(new ContainerContext<RunOnDeleteRef>(_complete));
321 register_on_commit(new ContainerContext<RunOnDeleteRef>(_complete));
322 }
323 bool has_contexts() const {
324 return
9f95a23c
TL
325 !on_commit.empty() ||
326 !on_applied.empty() ||
327 !on_applied_sync.empty();
11fdf7f2
TL
328 }
329
9f95a23c
TL
330 static void collect_contexts(
331 std::vector<Transaction>& t,
332 Context **out_on_applied,
333 Context **out_on_commit,
334 Context **out_on_applied_sync) {
11fdf7f2
TL
335 ceph_assert(out_on_applied);
336 ceph_assert(out_on_commit);
337 ceph_assert(out_on_applied_sync);
338 std::list<Context *> on_applied, on_commit, on_applied_sync;
339 for (auto& i : t) {
9f95a23c
TL
340 on_applied.splice(on_applied.end(), i.on_applied);
341 on_commit.splice(on_commit.end(), i.on_commit);
342 on_applied_sync.splice(on_applied_sync.end(), i.on_applied_sync);
11fdf7f2
TL
343 }
344 *out_on_applied = C_Contexts::list_to_context(on_applied);
345 *out_on_commit = C_Contexts::list_to_context(on_commit);
346 *out_on_applied_sync = C_Contexts::list_to_context(on_applied_sync);
347 }
9f95a23c
TL
348 static void collect_contexts(
349 std::vector<Transaction>& t,
350 std::list<Context*> *out_on_applied,
351 std::list<Context*> *out_on_commit,
352 std::list<Context*> *out_on_applied_sync) {
11fdf7f2
TL
353 ceph_assert(out_on_applied);
354 ceph_assert(out_on_commit);
355 ceph_assert(out_on_applied_sync);
356 for (auto& i : t) {
9f95a23c
TL
357 out_on_applied->splice(out_on_applied->end(), i.on_applied);
358 out_on_commit->splice(out_on_commit->end(), i.on_commit);
359 out_on_applied_sync->splice(out_on_applied_sync->end(),
360 i.on_applied_sync);
11fdf7f2
TL
361 }
362 }
9f95a23c
TL
363 static Context *collect_all_contexts(
364 Transaction& t) {
365 list<Context*> contexts;
366 contexts.splice(contexts.end(), t.on_applied);
367 contexts.splice(contexts.end(), t.on_commit);
368 contexts.splice(contexts.end(), t.on_applied_sync);
369 return C_Contexts::list_to_context(contexts);
370 }
11fdf7f2
TL
371
372 Context *get_on_applied() {
373 return C_Contexts::list_to_context(on_applied);
374 }
375 Context *get_on_commit() {
376 return C_Contexts::list_to_context(on_commit);
377 }
378 Context *get_on_applied_sync() {
379 return C_Contexts::list_to_context(on_applied_sync);
380 }
381
382 void set_fadvise_flags(uint32_t flags) {
383 data.fadvise_flags = flags;
384 }
385 void set_fadvise_flag(uint32_t flag) {
386 data.fadvise_flags = data.fadvise_flags | flag;
387 }
388 uint32_t get_fadvise_flags() { return data.fadvise_flags; }
389
390 void swap(Transaction& other) noexcept {
391 std::swap(data, other.data);
392 std::swap(on_applied, other.on_applied);
393 std::swap(on_commit, other.on_commit);
394 std::swap(on_applied_sync, other.on_applied_sync);
9f95a23c 395
11fdf7f2
TL
396 std::swap(coll_index, other.coll_index);
397 std::swap(object_index, other.object_index);
398 std::swap(coll_id, other.coll_id);
399 std::swap(object_id, other.object_id);
400 op_bl.swap(other.op_bl);
401 data_bl.swap(other.data_bl);
402 }
403
404 void _update_op(Op* op,
9f95a23c
TL
405 std::vector<uint32_t> &cm,
406 std::vector<uint32_t> &om) {
11fdf7f2
TL
407
408 switch (op->op) {
409 case OP_NOP:
410 break;
411
9f95a23c 412 case OP_CREATE:
11fdf7f2
TL
413 case OP_TOUCH:
414 case OP_REMOVE:
415 case OP_SETATTR:
416 case OP_SETATTRS:
417 case OP_RMATTR:
418 case OP_RMATTRS:
419 case OP_COLL_REMOVE:
420 case OP_OMAP_CLEAR:
421 case OP_OMAP_SETKEYS:
422 case OP_OMAP_RMKEYS:
423 case OP_OMAP_RMKEYRANGE:
424 case OP_OMAP_SETHEADER:
425 case OP_WRITE:
426 case OP_ZERO:
427 case OP_TRUNCATE:
428 case OP_SETALLOCHINT:
429 ceph_assert(op->cid < cm.size());
430 ceph_assert(op->oid < om.size());
431 op->cid = cm[op->cid];
432 op->oid = om[op->oid];
433 break;
434
435 case OP_CLONERANGE2:
436 case OP_CLONE:
437 ceph_assert(op->cid < cm.size());
438 ceph_assert(op->oid < om.size());
439 ceph_assert(op->dest_oid < om.size());
440 op->cid = cm[op->cid];
441 op->oid = om[op->oid];
442 op->dest_oid = om[op->dest_oid];
443 break;
444
445 case OP_MKCOLL:
446 case OP_RMCOLL:
447 case OP_COLL_SETATTR:
448 case OP_COLL_RMATTR:
449 case OP_COLL_SETATTRS:
450 case OP_COLL_HINT:
451 case OP_COLL_SET_BITS:
452 ceph_assert(op->cid < cm.size());
453 op->cid = cm[op->cid];
454 break;
455
456 case OP_COLL_ADD:
457 ceph_assert(op->cid < cm.size());
458 ceph_assert(op->oid < om.size());
459 ceph_assert(op->dest_cid < om.size());
460 op->cid = cm[op->cid];
461 op->dest_cid = cm[op->dest_cid];
462 op->oid = om[op->oid];
463 break;
9f95a23c 464
11fdf7f2
TL
465 case OP_COLL_MOVE_RENAME:
466 ceph_assert(op->cid < cm.size());
467 ceph_assert(op->oid < om.size());
468 ceph_assert(op->dest_cid < cm.size());
469 ceph_assert(op->dest_oid < om.size());
470 op->cid = cm[op->cid];
471 op->oid = om[op->oid];
472 op->dest_cid = cm[op->dest_cid];
473 op->dest_oid = om[op->dest_oid];
474 break;
9f95a23c 475
11fdf7f2
TL
476 case OP_TRY_RENAME:
477 ceph_assert(op->cid < cm.size());
478 ceph_assert(op->oid < om.size());
479 ceph_assert(op->dest_oid < om.size());
480 op->cid = cm[op->cid];
481 op->oid = om[op->oid];
482 op->dest_oid = om[op->dest_oid];
9f95a23c
TL
483 break;
484
11fdf7f2
TL
485 case OP_SPLIT_COLLECTION2:
486 ceph_assert(op->cid < cm.size());
9f95a23c 487 ceph_assert(op->dest_cid < cm.size());
11fdf7f2
TL
488 op->cid = cm[op->cid];
489 op->dest_cid = cm[op->dest_cid];
490 break;
9f95a23c 491
11fdf7f2
TL
492 case OP_MERGE_COLLECTION:
493 ceph_assert(op->cid < cm.size());
9f95a23c 494 ceph_assert(op->dest_cid < cm.size());
11fdf7f2
TL
495 op->cid = cm[op->cid];
496 op->dest_cid = cm[op->dest_cid];
497 break;
9f95a23c 498
11fdf7f2
TL
499 default:
500 ceph_abort_msg("Unknown OP");
501 }
502 }
503 void _update_op_bl(
9f95a23c
TL
504 ceph::buffer::list& bl,
505 std::vector<uint32_t> &cm,
506 std::vector<uint32_t> &om) {
11fdf7f2
TL
507 for (auto& bp : bl.buffers()) {
508 ceph_assert(bp.length() % sizeof(Op) == 0);
9f95a23c 509
11fdf7f2
TL
510 char* raw_p = const_cast<char*>(bp.c_str());
511 char* raw_end = raw_p + bp.length();
512 while (raw_p < raw_end) {
9f95a23c
TL
513 _update_op(reinterpret_cast<Op*>(raw_p), cm, om);
514 raw_p += sizeof(Op);
11fdf7f2
TL
515 }
516 }
517 }
9f95a23c 518 /// Append the operations of the parameter to this Transaction. Those operations are removed from the parameter Transaction
11fdf7f2
TL
519 void append(Transaction& other) {
520
eafe8130 521 data.ops = data.ops + other.data.ops;
11fdf7f2 522 if (other.data.largest_data_len > data.largest_data_len) {
9f95a23c
TL
523 data.largest_data_len = other.data.largest_data_len;
524 data.largest_data_off = other.data.largest_data_off;
525 data.largest_data_off_in_data_bl = data_bl.length() + other.data.largest_data_off_in_data_bl;
11fdf7f2 526 }
eafe8130 527 data.fadvise_flags = data.fadvise_flags | other.data.fadvise_flags;
11fdf7f2
TL
528 on_applied.splice(on_applied.end(), other.on_applied);
529 on_commit.splice(on_commit.end(), other.on_commit);
530 on_applied_sync.splice(on_applied_sync.end(), other.on_applied_sync);
9f95a23c 531
11fdf7f2 532 //append coll_index & object_index
9f95a23c
TL
533 std::vector<uint32_t> cm(other.coll_index.size());
534 std::map<coll_t, uint32_t>::iterator coll_index_p;
11fdf7f2 535 for (coll_index_p = other.coll_index.begin();
9f95a23c
TL
536 coll_index_p != other.coll_index.end();
537 ++coll_index_p) {
11fdf7f2
TL
538 cm[coll_index_p->second] = _get_coll_id(coll_index_p->first);
539 }
9f95a23c
TL
540
541 std::vector<uint32_t> om(other.object_index.size());
542 std::map<ghobject_t, uint32_t>::iterator object_index_p;
11fdf7f2 543 for (object_index_p = other.object_index.begin();
9f95a23c
TL
544 object_index_p != other.object_index.end();
545 ++object_index_p) {
11fdf7f2 546 om[object_index_p->second] = _get_object_id(object_index_p->first);
9f95a23c
TL
547 }
548
11fdf7f2 549 //the other.op_bl SHOULD NOT be changes during append operation,
9f95a23c
TL
550 //we use additional ceph::buffer::list to avoid this problem
551 ceph::buffer::list other_op_bl;
11fdf7f2 552 {
9f95a23c
TL
553 ceph::buffer::ptr other_op_bl_ptr(other.op_bl.length());
554 other.op_bl.begin().copy(other.op_bl.length(), other_op_bl_ptr.c_str());
11fdf7f2
TL
555 other_op_bl.append(std::move(other_op_bl_ptr));
556 }
9f95a23c 557
11fdf7f2
TL
558 //update other_op_bl with cm & om
559 //When the other is appended to current transaction, all coll_index and
560 //object_index in other.op_buffer should be updated by new index of the
561 //combined transaction
562 _update_op_bl(other_op_bl, cm, om);
9f95a23c 563
11fdf7f2
TL
564 //append op_bl
565 op_bl.append(other_op_bl);
566 //append data_bl
567 data_bl.append(other.data_bl);
568 }
9f95a23c 569
11fdf7f2 570 /** Inquires about the Transaction as a whole. */
9f95a23c 571
11fdf7f2
TL
572 /// How big is the encoded Transaction buffer?
573 uint64_t get_encoded_bytes() {
574 //layout: data_bl + op_bl + coll_index + object_index + data
9f95a23c 575
11fdf7f2
TL
576 // coll_index size, object_index size and sizeof(transaction_data)
577 // all here, so they may be computed at compile-time
578 size_t final_size = sizeof(__u32) * 2 + sizeof(data);
9f95a23c 579
11fdf7f2 580 // coll_index second and object_index second
9f95a23c
TL
581 final_size += (coll_index.size() + object_index.size()) * sizeof(__u32);
582
11fdf7f2
TL
583 // coll_index first
584 for (auto p = coll_index.begin(); p != coll_index.end(); ++p) {
9f95a23c 585 final_size += p->first.encoded_size();
11fdf7f2 586 }
9f95a23c 587
11fdf7f2
TL
588 // object_index first
589 for (auto p = object_index.begin(); p != object_index.end(); ++p) {
9f95a23c 590 final_size += p->first.encoded_size();
11fdf7f2 591 }
9f95a23c 592
11fdf7f2 593 return data_bl.length() +
9f95a23c
TL
594 op_bl.length() +
595 final_size;
11fdf7f2 596 }
9f95a23c 597
11fdf7f2
TL
598 /// Retain old version for regression testing purposes
599 uint64_t get_encoded_bytes_test() {
600 using ceph::encode;
601 //layout: data_bl + op_bl + coll_index + object_index + data
9f95a23c 602 ceph::buffer::list bl;
11fdf7f2
TL
603 encode(coll_index, bl);
604 encode(object_index, bl);
9f95a23c 605
11fdf7f2 606 return data_bl.length() +
9f95a23c
TL
607 op_bl.length() +
608 bl.length() +
609 sizeof(data);
11fdf7f2 610 }
9f95a23c 611
11fdf7f2
TL
612 uint64_t get_num_bytes() {
613 return get_encoded_bytes();
614 }
615 /// Size of largest data buffer to the "write" operation encountered so far
616 uint32_t get_data_length() {
617 return data.largest_data_len;
618 }
9f95a23c 619 /// offset within the encoded buffer to the start of the largest data buffer that's encoded
11fdf7f2
TL
620 uint32_t get_data_offset() {
621 if (data.largest_data_off_in_data_bl) {
9f95a23c
TL
622 return data.largest_data_off_in_data_bl +
623 sizeof(__u8) + // encode struct_v
624 sizeof(__u8) + // encode compat_v
625 sizeof(__u32) + // encode len
626 sizeof(__u32); // data_bl len
11fdf7f2
TL
627 }
628 return 0; // none
629 }
630 /// offset of buffer as aligned to destination within object.
631 int get_data_alignment() {
632 if (!data.largest_data_len)
9f95a23c 633 return 0;
11fdf7f2
TL
634 return (0 - get_data_offset()) & ~CEPH_PAGE_MASK;
635 }
636 /// Is the Transaction empty (no operations)
637 bool empty() {
638 return !data.ops;
639 }
640 /// Number of operations in the transaction
641 int get_num_ops() {
642 return data.ops;
643 }
9f95a23c 644
11fdf7f2
TL
645 /**
646 * iterator
647 *
648 * Helper object to parse Transactions.
649 *
650 * ObjectStore instances use this object to step down the encoded
651 * buffer decoding operation codes and parameters as we go.
652 *
653 */
654 class iterator {
655 Transaction *t;
9f95a23c 656
11fdf7f2
TL
657 uint64_t ops;
658 char* op_buffer_p;
9f95a23c
TL
659
660 ceph::buffer::list::const_iterator data_bl_p;
661
11fdf7f2 662 public:
9f95a23c
TL
663 std::vector<coll_t> colls;
664 std::vector<ghobject_t> objects;
665
11fdf7f2
TL
666 private:
667 explicit iterator(Transaction *t)
668 : t(t),
9f95a23c
TL
669 data_bl_p(t->data_bl.cbegin()),
670 colls(t->coll_index.size()),
671 objects(t->object_index.size()) {
672
11fdf7f2
TL
673 ops = t->data.ops;
674 op_buffer_p = t->op_bl.c_str();
9f95a23c
TL
675
676 std::map<coll_t, uint32_t>::iterator coll_index_p;
11fdf7f2 677 for (coll_index_p = t->coll_index.begin();
9f95a23c
TL
678 coll_index_p != t->coll_index.end();
679 ++coll_index_p) {
680 colls[coll_index_p->second] = coll_index_p->first;
11fdf7f2 681 }
9f95a23c
TL
682
683 std::map<ghobject_t, uint32_t>::iterator object_index_p;
11fdf7f2 684 for (object_index_p = t->object_index.begin();
9f95a23c
TL
685 object_index_p != t->object_index.end();
686 ++object_index_p) {
687 objects[object_index_p->second] = object_index_p->first;
11fdf7f2
TL
688 }
689 }
9f95a23c 690
11fdf7f2 691 friend class Transaction;
9f95a23c 692
11fdf7f2 693 public:
9f95a23c 694
11fdf7f2
TL
695 bool have_op() {
696 return ops > 0;
697 }
698 Op* decode_op() {
699 ceph_assert(ops > 0);
9f95a23c 700
11fdf7f2
TL
701 Op* op = reinterpret_cast<Op*>(op_buffer_p);
702 op_buffer_p += sizeof(Op);
703 ops--;
9f95a23c 704
11fdf7f2
TL
705 return op;
706 }
9f95a23c
TL
707 std::string decode_string() {
708 using ceph::decode;
709 std::string s;
11fdf7f2
TL
710 decode(s, data_bl_p);
711 return s;
712 }
9f95a23c
TL
713 void decode_bp(ceph::buffer::ptr& bp) {
714 using ceph::decode;
11fdf7f2
TL
715 decode(bp, data_bl_p);
716 }
9f95a23c
TL
717 void decode_bl(ceph::buffer::list& bl) {
718 using ceph::decode;
11fdf7f2
TL
719 decode(bl, data_bl_p);
720 }
9f95a23c
TL
721 void decode_attrset(std::map<std::string,ceph::buffer::ptr>& aset) {
722 using ceph::decode;
11fdf7f2
TL
723 decode(aset, data_bl_p);
724 }
9f95a23c
TL
725 void decode_attrset(std::map<std::string,ceph::buffer::list>& aset) {
726 using ceph::decode;
11fdf7f2
TL
727 decode(aset, data_bl_p);
728 }
9f95a23c
TL
729 void decode_attrset_bl(ceph::buffer::list *pbl) {
730 decode_str_str_map_to_bl(data_bl_p, pbl);
731 }
732 void decode_keyset(std::set<std::string> &keys){
733 using ceph::decode;
11fdf7f2
TL
734 decode(keys, data_bl_p);
735 }
9f95a23c
TL
736 void decode_keyset_bl(ceph::buffer::list *pbl){
737 decode_str_set_to_bl(data_bl_p, pbl);
738 }
739
740 const ghobject_t &get_oid(uint32_t oid_id) {
11fdf7f2
TL
741 ceph_assert(oid_id < objects.size());
742 return objects[oid_id];
743 }
9f95a23c 744 const coll_t &get_cid(uint32_t cid_id) {
11fdf7f2
TL
745 ceph_assert(cid_id < colls.size());
746 return colls[cid_id];
747 }
748 uint32_t get_fadvise_flags() const {
9f95a23c 749 return t->get_fadvise_flags();
11fdf7f2
TL
750 }
751 };
9f95a23c 752
11fdf7f2 753 iterator begin() {
9f95a23c 754 return iterator(this);
11fdf7f2 755 }
9f95a23c 756
11fdf7f2
TL
757private:
758 void _build_actions_from_tbl();
759
11fdf7f2
TL
760 /**
761 * Helper functions to encode the various mutation elements of a
762 * transaction. These are 1:1 with the operation codes (see
763 * enumeration above). These routines ensure that the
764 * encoder/creator of a transaction gets the right data in the
765 * right place. Sadly, there's no corresponding version nor any
766 * form of seat belts for the decoder.
767 */
768 Op* _get_next_op() {
769 if (op_bl.get_append_buffer_unused_tail_length() < sizeof(Op)) {
770 op_bl.reserve(sizeof(Op) * OPS_PER_PTR);
771 }
772 // append_hole ensures bptr merging. Even huge number of ops
773 // shouldn't result in overpopulating bl::_buffers.
774 char* const p = op_bl.append_hole(sizeof(Op)).c_str();
775 memset(p, 0, sizeof(Op));
776 return reinterpret_cast<Op*>(p);
777 }
9f95a23c
TL
778 uint32_t _get_coll_id(const coll_t& coll) {
779 std::map<coll_t, uint32_t>::iterator c = coll_index.find(coll);
11fdf7f2
TL
780 if (c != coll_index.end())
781 return c->second;
9f95a23c
TL
782
783 uint32_t index_id = coll_id++;
11fdf7f2
TL
784 coll_index[coll] = index_id;
785 return index_id;
786 }
9f95a23c
TL
787 uint32_t _get_object_id(const ghobject_t& oid) {
788 std::map<ghobject_t, uint32_t>::iterator o = object_index.find(oid);
11fdf7f2
TL
789 if (o != object_index.end())
790 return o->second;
9f95a23c
TL
791
792 uint32_t index_id = object_id++;
11fdf7f2
TL
793 object_index[oid] = index_id;
794 return index_id;
795 }
9f95a23c 796
11fdf7f2
TL
797public:
798 /// noop. 'nuf said
799 void nop() {
800 Op* _op = _get_next_op();
801 _op->op = OP_NOP;
eafe8130 802 data.ops = data.ops + 1;
11fdf7f2 803 }
9f95a23c
TL
804 /**
805 * create
806 *
807 * create an object that does not yet exist
808 * (behavior is undefined if the object already exists)
809 */
810 void create(const coll_t& cid, const ghobject_t& oid) {
811 Op* _op = _get_next_op();
812 _op->op = OP_CREATE;
813 _op->cid = _get_coll_id(cid);
814 _op->oid = _get_object_id(oid);
815 data.ops = data.ops + 1;
816 }
11fdf7f2
TL
817 /**
818 * touch
819 *
820 * Ensure the existance of an object in a collection. Create an
821 * empty object if necessary
822 */
823 void touch(const coll_t& cid, const ghobject_t& oid) {
824 Op* _op = _get_next_op();
825 _op->op = OP_TOUCH;
826 _op->cid = _get_coll_id(cid);
827 _op->oid = _get_object_id(oid);
eafe8130 828 data.ops = data.ops + 1;
11fdf7f2
TL
829 }
830 /**
831 * Write data to an offset within an object. If the object is too
832 * small, it is expanded as needed. It is possible to specify an
833 * offset beyond the current end of an object and it will be
834 * expanded as needed. Simple implementations of ObjectStore will
835 * just zero the data between the old end of the object and the
836 * newly provided data. More sophisticated implementations of
837 * ObjectStore will omit the untouched data and store it as a
838 * "hole" in the file.
839 *
840 * Note that a 0-length write does not affect the size of the object.
841 */
842 void write(const coll_t& cid, const ghobject_t& oid, uint64_t off, uint64_t len,
9f95a23c 843 const ceph::buffer::list& write_data, uint32_t flags = 0) {
11fdf7f2
TL
844 using ceph::encode;
845 uint32_t orig_len = data_bl.length();
846 Op* _op = _get_next_op();
847 _op->op = OP_WRITE;
848 _op->cid = _get_coll_id(cid);
849 _op->oid = _get_object_id(oid);
850 _op->off = off;
851 _op->len = len;
852 encode(write_data, data_bl);
9f95a23c 853
11fdf7f2
TL
854 ceph_assert(len == write_data.length());
855 data.fadvise_flags = data.fadvise_flags | flags;
856 if (write_data.length() > data.largest_data_len) {
9f95a23c
TL
857 data.largest_data_len = write_data.length();
858 data.largest_data_off = off;
859 data.largest_data_off_in_data_bl = orig_len + sizeof(__u32); // we are about to
11fdf7f2 860 }
eafe8130 861 data.ops = data.ops + 1;
11fdf7f2
TL
862 }
863 /**
864 * zero out the indicated byte range within an object. Some
865 * ObjectStore instances may optimize this to release the
866 * underlying storage space.
867 *
868 * If the zero range extends beyond the end of the object, the object
869 * size is extended, just as if we were writing a buffer full of zeros.
870 * EXCEPT if the length is 0, in which case (just like a 0-length write)
871 * we do not adjust the object size.
872 */
873 void zero(const coll_t& cid, const ghobject_t& oid, uint64_t off, uint64_t len) {
874 Op* _op = _get_next_op();
875 _op->op = OP_ZERO;
876 _op->cid = _get_coll_id(cid);
877 _op->oid = _get_object_id(oid);
878 _op->off = off;
879 _op->len = len;
eafe8130 880 data.ops = data.ops + 1;
11fdf7f2
TL
881 }
882 /// Discard all data in the object beyond the specified size.
883 void truncate(const coll_t& cid, const ghobject_t& oid, uint64_t off) {
884 Op* _op = _get_next_op();
885 _op->op = OP_TRUNCATE;
886 _op->cid = _get_coll_id(cid);
887 _op->oid = _get_object_id(oid);
888 _op->off = off;
eafe8130 889 data.ops = data.ops + 1;
11fdf7f2
TL
890 }
891 /// Remove an object. All four parts of the object are removed.
892 void remove(const coll_t& cid, const ghobject_t& oid) {
893 Op* _op = _get_next_op();
894 _op->op = OP_REMOVE;
895 _op->cid = _get_coll_id(cid);
896 _op->oid = _get_object_id(oid);
eafe8130 897 data.ops = data.ops + 1;
11fdf7f2
TL
898 }
899 /// Set an xattr of an object
9f95a23c
TL
900 void setattr(const coll_t& cid, const ghobject_t& oid, const char* name, ceph::buffer::list& val) {
901 std::string n(name);
11fdf7f2
TL
902 setattr(cid, oid, n, val);
903 }
904 /// Set an xattr of an object
9f95a23c 905 void setattr(const coll_t& cid, const ghobject_t& oid, const std::string& s, ceph::buffer::list& val) {
11fdf7f2
TL
906 using ceph::encode;
907 Op* _op = _get_next_op();
908 _op->op = OP_SETATTR;
909 _op->cid = _get_coll_id(cid);
910 _op->oid = _get_object_id(oid);
911 encode(s, data_bl);
912 encode(val, data_bl);
eafe8130 913 data.ops = data.ops + 1;
11fdf7f2
TL
914 }
915 /// Set multiple xattrs of an object
9f95a23c 916 void setattrs(const coll_t& cid, const ghobject_t& oid, const std::map<std::string,ceph::buffer::ptr>& attrset) {
11fdf7f2
TL
917 using ceph::encode;
918 Op* _op = _get_next_op();
919 _op->op = OP_SETATTRS;
920 _op->cid = _get_coll_id(cid);
921 _op->oid = _get_object_id(oid);
922 encode(attrset, data_bl);
eafe8130 923 data.ops = data.ops + 1;
11fdf7f2
TL
924 }
925 /// Set multiple xattrs of an object
9f95a23c 926 void setattrs(const coll_t& cid, const ghobject_t& oid, const std::map<std::string,ceph::buffer::list>& attrset) {
11fdf7f2
TL
927 using ceph::encode;
928 Op* _op = _get_next_op();
929 _op->op = OP_SETATTRS;
930 _op->cid = _get_coll_id(cid);
931 _op->oid = _get_object_id(oid);
932 encode(attrset, data_bl);
eafe8130 933 data.ops = data.ops + 1;
11fdf7f2
TL
934 }
935 /// remove an xattr from an object
936 void rmattr(const coll_t& cid, const ghobject_t& oid, const char *name) {
9f95a23c 937 std::string n(name);
11fdf7f2
TL
938 rmattr(cid, oid, n);
939 }
940 /// remove an xattr from an object
9f95a23c 941 void rmattr(const coll_t& cid, const ghobject_t& oid, const std::string& s) {
11fdf7f2
TL
942 using ceph::encode;
943 Op* _op = _get_next_op();
944 _op->op = OP_RMATTR;
945 _op->cid = _get_coll_id(cid);
946 _op->oid = _get_object_id(oid);
947 encode(s, data_bl);
eafe8130 948 data.ops = data.ops + 1;
11fdf7f2
TL
949 }
950 /// remove all xattrs from an object
951 void rmattrs(const coll_t& cid, const ghobject_t& oid) {
952 Op* _op = _get_next_op();
953 _op->op = OP_RMATTRS;
954 _op->cid = _get_coll_id(cid);
955 _op->oid = _get_object_id(oid);
eafe8130 956 data.ops = data.ops + 1;
11fdf7f2
TL
957 }
958 /**
959 * Clone an object into another object.
960 *
961 * Low-cost (e.g., O(1)) cloning (if supported) is best, but
962 * fallback to an O(n) copy is allowed. All four parts of the
963 * object are cloned (data, xattrs, omap header, omap
964 * entries).
965 *
966 * The destination named object may already exist, in
967 * which case its previous contents are discarded.
968 */
969 void clone(const coll_t& cid, const ghobject_t& oid,
9f95a23c 970 const ghobject_t& noid) {
11fdf7f2
TL
971 Op* _op = _get_next_op();
972 _op->op = OP_CLONE;
973 _op->cid = _get_coll_id(cid);
974 _op->oid = _get_object_id(oid);
975 _op->dest_oid = _get_object_id(noid);
eafe8130 976 data.ops = data.ops + 1;
11fdf7f2
TL
977 }
978 /**
979 * Clone a byte range from one object to another.
980 *
981 * The data portion of the destination object receives a copy of a
982 * portion of the data from the source object. None of the other
983 * three parts of an object is copied from the source.
984 *
985 * The destination object size may be extended to the dstoff + len.
986 *
987 * The source range *must* overlap with the source object data. If it does
988 * not the result is undefined.
989 */
990 void clone_range(const coll_t& cid, const ghobject_t& oid,
9f95a23c
TL
991 const ghobject_t& noid,
992 uint64_t srcoff, uint64_t srclen, uint64_t dstoff) {
11fdf7f2
TL
993 Op* _op = _get_next_op();
994 _op->op = OP_CLONERANGE2;
995 _op->cid = _get_coll_id(cid);
996 _op->oid = _get_object_id(oid);
997 _op->dest_oid = _get_object_id(noid);
998 _op->off = srcoff;
999 _op->len = srclen;
1000 _op->dest_off = dstoff;
eafe8130 1001 data.ops = data.ops + 1;
11fdf7f2 1002 }
9f95a23c 1003
11fdf7f2
TL
1004 /// Create the collection
1005 void create_collection(const coll_t& cid, int bits) {
1006 Op* _op = _get_next_op();
1007 _op->op = OP_MKCOLL;
1008 _op->cid = _get_coll_id(cid);
1009 _op->split_bits = bits;
eafe8130 1010 data.ops = data.ops + 1;
11fdf7f2 1011 }
9f95a23c 1012
11fdf7f2
TL
1013 /**
1014 * Give the collection a hint.
1015 *
1016 * @param cid - collection id.
1017 * @param type - hint type.
1018 * @param hint - the hint payload, which contains the customized
1019 * data along with the hint type.
1020 */
9f95a23c 1021 void collection_hint(const coll_t& cid, uint32_t type, const ceph::buffer::list& hint) {
11fdf7f2
TL
1022 using ceph::encode;
1023 Op* _op = _get_next_op();
1024 _op->op = OP_COLL_HINT;
1025 _op->cid = _get_coll_id(cid);
1026 _op->hint_type = type;
1027 encode(hint, data_bl);
eafe8130 1028 data.ops = data.ops + 1;
11fdf7f2 1029 }
9f95a23c 1030
11fdf7f2
TL
1031 /// remove the collection, the collection must be empty
1032 void remove_collection(const coll_t& cid) {
1033 Op* _op = _get_next_op();
1034 _op->op = OP_RMCOLL;
1035 _op->cid = _get_coll_id(cid);
eafe8130 1036 data.ops = data.ops + 1;
11fdf7f2 1037 }
9f95a23c 1038 void collection_move(const coll_t& cid, const coll_t &oldcid, const ghobject_t& oid)
11fdf7f2 1039 __attribute__ ((deprecated)) {
9f95a23c
TL
1040 // NOTE: we encode this as a fixed combo of ADD + REMOVE. they
1041 // always appear together, so this is effectively a single MOVE.
1042 Op* _op = _get_next_op();
1043 _op->op = OP_COLL_ADD;
1044 _op->cid = _get_coll_id(oldcid);
1045 _op->oid = _get_object_id(oid);
1046 _op->dest_cid = _get_coll_id(cid);
1047 data.ops = data.ops + 1;
1048
1049 _op = _get_next_op();
1050 _op->op = OP_COLL_REMOVE;
1051 _op->cid = _get_coll_id(oldcid);
1052 _op->oid = _get_object_id(oid);
1053 data.ops = data.ops + 1;
1054 }
11fdf7f2 1055 void collection_move_rename(const coll_t& oldcid, const ghobject_t& oldoid,
9f95a23c 1056 const coll_t &cid, const ghobject_t& oid) {
11fdf7f2
TL
1057 Op* _op = _get_next_op();
1058 _op->op = OP_COLL_MOVE_RENAME;
1059 _op->cid = _get_coll_id(oldcid);
1060 _op->oid = _get_object_id(oldoid);
1061 _op->dest_cid = _get_coll_id(cid);
1062 _op->dest_oid = _get_object_id(oid);
eafe8130 1063 data.ops = data.ops + 1;
11fdf7f2
TL
1064 }
1065 void try_rename(const coll_t &cid, const ghobject_t& oldoid,
9f95a23c 1066 const ghobject_t& oid) {
11fdf7f2
TL
1067 Op* _op = _get_next_op();
1068 _op->op = OP_TRY_RENAME;
1069 _op->cid = _get_coll_id(cid);
1070 _op->oid = _get_object_id(oldoid);
1071 _op->dest_oid = _get_object_id(oid);
eafe8130 1072 data.ops = data.ops + 1;
11fdf7f2 1073 }
9f95a23c 1074
11fdf7f2
TL
1075 /// Remove omap from oid
1076 void omap_clear(
1077 const coll_t &cid, ///< [in] Collection containing oid
1078 const ghobject_t &oid ///< [in] Object from which to remove omap
1079 ) {
1080 Op* _op = _get_next_op();
1081 _op->op = OP_OMAP_CLEAR;
1082 _op->cid = _get_coll_id(cid);
1083 _op->oid = _get_object_id(oid);
eafe8130 1084 data.ops = data.ops + 1;
11fdf7f2
TL
1085 }
1086 /// Set keys on oid omap. Replaces duplicate keys.
1087 void omap_setkeys(
1088 const coll_t& cid, ///< [in] Collection containing oid
1089 const ghobject_t &oid, ///< [in] Object to update
9f95a23c 1090 const std::map<std::string, ceph::buffer::list> &attrset ///< [in] Replacement keys and values
11fdf7f2
TL
1091 ) {
1092 using ceph::encode;
1093 Op* _op = _get_next_op();
1094 _op->op = OP_OMAP_SETKEYS;
1095 _op->cid = _get_coll_id(cid);
1096 _op->oid = _get_object_id(oid);
1097 encode(attrset, data_bl);
eafe8130 1098 data.ops = data.ops + 1;
11fdf7f2
TL
1099 }
1100
9f95a23c 1101 /// Set keys on an oid omap (ceph::buffer::list variant).
11fdf7f2
TL
1102 void omap_setkeys(
1103 const coll_t &cid, ///< [in] Collection containing oid
1104 const ghobject_t &oid, ///< [in] Object to update
9f95a23c
TL
1105 const ceph::buffer::list &attrset_bl ///< [in] Replacement keys and values
1106 ) {
11fdf7f2
TL
1107 Op* _op = _get_next_op();
1108 _op->op = OP_OMAP_SETKEYS;
1109 _op->cid = _get_coll_id(cid);
1110 _op->oid = _get_object_id(oid);
1111 data_bl.append(attrset_bl);
eafe8130 1112 data.ops = data.ops + 1;
11fdf7f2
TL
1113 }
1114
1115 /// Remove keys from oid omap
1116 void omap_rmkeys(
1117 const coll_t &cid, ///< [in] Collection containing oid
1118 const ghobject_t &oid, ///< [in] Object from which to remove the omap
9f95a23c 1119 const std::set<std::string> &keys ///< [in] Keys to clear
11fdf7f2
TL
1120 ) {
1121 using ceph::encode;
1122 Op* _op = _get_next_op();
1123 _op->op = OP_OMAP_RMKEYS;
1124 _op->cid = _get_coll_id(cid);
1125 _op->oid = _get_object_id(oid);
1126 encode(keys, data_bl);
eafe8130 1127 data.ops = data.ops + 1;
11fdf7f2
TL
1128 }
1129
9f95a23c
TL
1130 /// Remove key from oid omap
1131 void omap_rmkey(
1132 const coll_t &cid, ///< [in] Collection containing oid
1133 const ghobject_t &oid, ///< [in] Object from which to remove the omap
1134 const std::string& key ///< [in] Keys to clear
1135 ) {
1136 Op* _op = _get_next_op();
1137 _op->op = OP_OMAP_RMKEYS;
1138 _op->cid = _get_coll_id(cid);
1139 _op->oid = _get_object_id(oid);
1140 using ceph::encode;
1141 encode((uint32_t)1, data_bl);
1142 encode(key, data_bl);
1143 data.ops = data.ops + 1;
1144 }
1145
11fdf7f2
TL
1146 /// Remove keys from oid omap
1147 void omap_rmkeys(
1148 const coll_t &cid, ///< [in] Collection containing oid
1149 const ghobject_t &oid, ///< [in] Object from which to remove the omap
9f95a23c 1150 const ceph::buffer::list &keys_bl ///< [in] Keys to clear
11fdf7f2
TL
1151 ) {
1152 Op* _op = _get_next_op();
1153 _op->op = OP_OMAP_RMKEYS;
1154 _op->cid = _get_coll_id(cid);
1155 _op->oid = _get_object_id(oid);
1156 data_bl.append(keys_bl);
eafe8130 1157 data.ops = data.ops + 1;
11fdf7f2 1158 }
9f95a23c 1159
11fdf7f2
TL
1160 /// Remove key range from oid omap
1161 void omap_rmkeyrange(
1162 const coll_t &cid, ///< [in] Collection containing oid
1163 const ghobject_t &oid, ///< [in] Object from which to remove the omap keys
9f95a23c
TL
1164 const std::string& first, ///< [in] first key in range
1165 const std::string& last ///< [in] first key past range, range is [first,last)
11fdf7f2
TL
1166 ) {
1167 using ceph::encode;
1168 Op* _op = _get_next_op();
1169 _op->op = OP_OMAP_RMKEYRANGE;
1170 _op->cid = _get_coll_id(cid);
1171 _op->oid = _get_object_id(oid);
1172 encode(first, data_bl);
1173 encode(last, data_bl);
eafe8130 1174 data.ops = data.ops + 1;
11fdf7f2
TL
1175 }
1176
9f95a23c
TL
1177 /// Remove key range from oid omap
1178 void omap_rmkeyrange(
1179 const coll_t cid, ///< [in] Collection containing oid
1180 const ghobject_t &oid, ///< [in] Object from which to remove the omap keys
1181 const bufferlist &keys_bl ///< [in] range of keys to clear
1182 ) {
1183 Op* _op = _get_next_op();
1184 _op->op = OP_OMAP_RMKEYRANGE;
1185 _op->cid = _get_coll_id(cid);
1186 _op->oid = _get_object_id(oid);
1187 data_bl.append(keys_bl);
1188 data.ops = data.ops + 1;
1189 }
1190
11fdf7f2
TL
1191 /// Set omap header
1192 void omap_setheader(
1193 const coll_t &cid, ///< [in] Collection containing oid
1194 const ghobject_t &oid, ///< [in] Object
9f95a23c 1195 const ceph::buffer::list &bl ///< [in] Header value
11fdf7f2
TL
1196 ) {
1197 using ceph::encode;
1198 Op* _op = _get_next_op();
1199 _op->op = OP_OMAP_SETHEADER;
1200 _op->cid = _get_coll_id(cid);
1201 _op->oid = _get_object_id(oid);
1202 encode(bl, data_bl);
eafe8130 1203 data.ops = data.ops + 1;
11fdf7f2
TL
1204 }
1205
9f95a23c
TL
1206 /// Split collection based on given prefixes, objects matching the specified bits/rem are
1207 /// moved to the new collection
11fdf7f2
TL
1208 void split_collection(
1209 const coll_t &cid,
1210 uint32_t bits,
1211 uint32_t rem,
1212 const coll_t &destination) {
1213 Op* _op = _get_next_op();
1214 _op->op = OP_SPLIT_COLLECTION2;
1215 _op->cid = _get_coll_id(cid);
1216 _op->dest_cid = _get_coll_id(destination);
1217 _op->split_bits = bits;
1218 _op->split_rem = rem;
eafe8130 1219 data.ops = data.ops + 1;
11fdf7f2
TL
1220 }
1221
1222 /// Merge collection into another.
1223 void merge_collection(
1224 coll_t cid,
1225 coll_t destination,
1226 uint32_t bits) {
1227 Op* _op = _get_next_op();
1228 _op->op = OP_MERGE_COLLECTION;
1229 _op->cid = _get_coll_id(cid);
1230 _op->dest_cid = _get_coll_id(destination);
1231 _op->split_bits = bits;
eafe8130 1232 data.ops = data.ops + 1;
11fdf7f2
TL
1233 }
1234
1235 void collection_set_bits(
1236 const coll_t &cid,
1237 int bits) {
1238 Op* _op = _get_next_op();
1239 _op->op = OP_COLL_SET_BITS;
1240 _op->cid = _get_coll_id(cid);
1241 _op->split_bits = bits;
eafe8130 1242 data.ops = data.ops + 1;
11fdf7f2
TL
1243 }
1244
1245 /// Set allocation hint for an object
1246 /// make 0 values(expected_object_size, expected_write_size) noops for all implementations
1247 void set_alloc_hint(
1248 const coll_t &cid,
1249 const ghobject_t &oid,
1250 uint64_t expected_object_size,
1251 uint64_t expected_write_size,
1252 uint32_t flags
1253 ) {
1254 Op* _op = _get_next_op();
1255 _op->op = OP_SETALLOCHINT;
1256 _op->cid = _get_coll_id(cid);
1257 _op->oid = _get_object_id(oid);
1258 _op->expected_object_size = expected_object_size;
1259 _op->expected_write_size = expected_write_size;
1260 _op->alloc_hint_flags = flags;
eafe8130 1261 data.ops = data.ops + 1;
11fdf7f2
TL
1262 }
1263
9f95a23c 1264 void encode(ceph::buffer::list& bl) const {
11fdf7f2
TL
1265 //layout: data_bl + op_bl + coll_index + object_index + data
1266 ENCODE_START(9, 9, bl);
1267 encode(data_bl, bl);
1268 encode(op_bl, bl);
1269 encode(coll_index, bl);
1270 encode(object_index, bl);
1271 data.encode(bl);
1272 ENCODE_FINISH(bl);
1273 }
1274
9f95a23c 1275 void decode(ceph::buffer::list::const_iterator &bl) {
11fdf7f2
TL
1276 DECODE_START(9, bl);
1277 DECODE_OLDEST(9);
1278
1279 decode(data_bl, bl);
1280 decode(op_bl, bl);
1281 decode(coll_index, bl);
1282 decode(object_index, bl);
1283 data.decode(bl);
1284 coll_id = coll_index.size();
1285 object_id = object_index.size();
1286
1287 DECODE_FINISH(bl);
1288 }
1289
1290 void dump(ceph::Formatter *f);
9f95a23c 1291 static void generate_test_instances(std::list<Transaction*>& o);
11fdf7f2 1292};
9f95a23c
TL
1293WRITE_CLASS_ENCODER(Transaction)
1294WRITE_CLASS_ENCODER(Transaction::TransactionData)
1295
1296std::ostream& operator<<(std::ostream& out, const Transaction& tx);
1297
11fdf7f2 1298}