]> git.proxmox.com Git - ceph.git/blame - ceph/src/os/Transaction.h
import quincy beta 17.1.0
[ceph.git] / ceph / src / os / Transaction.h
CommitLineData
9f95a23c 1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
11fdf7f2
TL
2// vim: ts=8 sw=2 smarttab
3
9f95a23c
TL
4#pragma once
5
11fdf7f2
TL
6#include <map>
7
f67539c2 8#include "include/Context.h"
11fdf7f2
TL
9#include "include/int_types.h"
10#include "include/buffer.h"
f67539c2 11
11fdf7f2
TL
12#include "osd/osd_types.h"
13
9f95a23c
TL
14#define OPS_PER_PTR 32
15
16void decode_str_str_map_to_bl(ceph::buffer::list::const_iterator& p, ceph::buffer::list *out);
17void decode_str_set_to_bl(ceph::buffer::list::const_iterator& p, ceph::buffer::list *out);
18
19
11fdf7f2
TL
20/*********************************
21 * transaction
22 *
23 * A Transaction represents a sequence of primitive mutation
24 * operations.
25 *
26 * Three events in the life of a Transaction result in
27 * callbacks. Any Transaction can contain any number of callback
28 * objects (Context) for any combination of the three classes of
29 * callbacks:
30 *
31 * on_applied_sync, on_applied, and on_commit.
32 *
33 * The "on_applied" and "on_applied_sync" callbacks are invoked when
34 * the modifications requested by the Transaction are visible to
35 * subsequent ObjectStore operations, i.e., the results are
36 * readable. The only conceptual difference between on_applied and
37 * on_applied_sync is the specific thread and locking environment in
38 * which the callbacks operate. "on_applied_sync" is called
39 * directly by an ObjectStore execution thread. It is expected to
40 * execute quickly and must not acquire any locks of the calling
41 * environment. Conversely, "on_applied" is called from the separate
42 * Finisher thread, meaning that it can contend for calling
43 * environment locks. NB, on_applied and on_applied_sync are
44 * sometimes called on_readable and on_readable_sync.
45 *
46 * The "on_commit" callback is also called from the Finisher thread
47 * and indicates that all of the mutations have been durably
48 * committed to stable storage (i.e., are now software/hardware
49 * crashproof).
50 *
51 * At the implementation level, each mutation primitive (and its
52 * associated data) can be serialized to a single buffer. That
53 * serialization, however, does not copy any data, but (using the
9f95a23c 54 * ceph::buffer::list library) will reference the original buffers. This
11fdf7f2
TL
55 * implies that the buffer that contains the data being submitted
56 * must remain stable until the on_commit callback completes. In
9f95a23c 57 * practice, ceph::buffer::list handles all of this for you and this
11fdf7f2
TL
58 * subtlety is only relevant if you are referencing an existing
59 * buffer via buffer::raw_static.
60 *
61 * Some implementations of ObjectStore choose to implement their own
62 * form of journaling that uses the serialized form of a
63 * Transaction. This requires that the encode/decode logic properly
64 * version itself and handle version upgrades that might change the
65 * format of the encoded Transaction. This has already happened a
66 * couple of times and the Transaction object contains some helper
67 * variables that aid in this legacy decoding:
68 *
69 * sobject_encoding detects an older/simpler version of oid
70 * present in pre-bobtail versions of ceph. use_pool_override
71 * also detects a situation where the pool of an oid can be
72 * overridden for legacy operations/buffers. For non-legacy
73 * implementations of ObjectStore, neither of these fields are
74 * relevant.
75 *
76 *
77 * TRANSACTION ISOLATION
78 *
79 * Except as noted above, isolation is the responsibility of the
80 * caller. In other words, if any storage element (storage element
81 * == any of the four portions of an object as described above) is
82 * altered by a transaction (including deletion), the caller
83 * promises not to attempt to read that element while the
84 * transaction is pending (here pending means from the time of
85 * issuance until the "on_applied_sync" callback has been
86 * received). Violations of isolation need not be detected by
87 * ObjectStore and there is no corresponding error mechanism for
88 * reporting an isolation violation (crashing would be the
89 * appropriate way to report an isolation violation if detected).
90 *
91 * Enumeration operations may violate transaction isolation as
92 * described above when a storage element is being created or
93 * deleted as part of a transaction. In this case, ObjectStore is
94 * allowed to consider the enumeration operation to either precede
95 * or follow the violating transaction element. In other words, the
96 * presence/absence of the mutated element in the enumeration is
97 * entirely at the discretion of ObjectStore. The arbitrary ordering
98 * applies independently to each transaction element. For example,
99 * if a transaction contains two mutating elements "create A" and
100 * "delete B". And an enumeration operation is performed while this
101 * transaction is pending. It is permissible for ObjectStore to
102 * report any of the four possible combinations of the existence of
103 * A and B.
104 *
105 */
106namespace ceph::os {
107class Transaction {
108public:
109 enum {
110 OP_NOP = 0,
9f95a23c 111 OP_CREATE = 7, // cid, oid
11fdf7f2
TL
112 OP_TOUCH = 9, // cid, oid
113 OP_WRITE = 10, // cid, oid, offset, len, bl
114 OP_ZERO = 11, // cid, oid, offset, len
115 OP_TRUNCATE = 12, // cid, oid, len
116 OP_REMOVE = 13, // cid, oid
117 OP_SETATTR = 14, // cid, oid, attrname, bl
118 OP_SETATTRS = 15, // cid, oid, attrset
119 OP_RMATTR = 16, // cid, oid, attrname
120 OP_CLONE = 17, // cid, oid, newoid
121 OP_CLONERANGE = 18, // cid, oid, newoid, offset, len
122 OP_CLONERANGE2 = 30, // cid, oid, newoid, srcoff, len, dstoff
123
124 OP_TRIMCACHE = 19, // cid, oid, offset, len **DEPRECATED**
125
126 OP_MKCOLL = 20, // cid
127 OP_RMCOLL = 21, // cid
128 OP_COLL_ADD = 22, // cid, oldcid, oid
129 OP_COLL_REMOVE = 23, // cid, oid
130 OP_COLL_SETATTR = 24, // cid, attrname, bl
131 OP_COLL_RMATTR = 25, // cid, attrname
132 OP_COLL_SETATTRS = 26, // cid, attrset
133 OP_COLL_MOVE = 8, // newcid, oldcid, oid
134
135 OP_RMATTRS = 28, // cid, oid
136 OP_COLL_RENAME = 29, // cid, newcid
137
138 OP_OMAP_CLEAR = 31, // cid
139 OP_OMAP_SETKEYS = 32, // cid, attrset
140 OP_OMAP_RMKEYS = 33, // cid, keyset
141 OP_OMAP_SETHEADER = 34, // cid, header
142 OP_SPLIT_COLLECTION = 35, // cid, bits, destination
143 OP_SPLIT_COLLECTION2 = 36, /* cid, bits, destination
9f95a23c 144 doesn't create the destination */
11fdf7f2
TL
145 OP_OMAP_RMKEYRANGE = 37, // cid, oid, firstkey, lastkey
146 OP_COLL_MOVE_RENAME = 38, // oldcid, oldoid, newcid, newoid
9f95a23c 147
11fdf7f2
TL
148 OP_SETALLOCHINT = 39, // cid, oid, object_size, write_size
149 OP_COLL_HINT = 40, // cid, type, bl
150
151 OP_TRY_RENAME = 41, // oldcid, oldoid, newoid
152
153 OP_COLL_SET_BITS = 42, // cid, bits
154
155 OP_MERGE_COLLECTION = 43, // cid, destination
156 };
157
158 // Transaction hint type
159 enum {
160 COLL_HINT_EXPECTED_NUM_OBJECTS = 1,
161 };
162
163 struct Op {
eafe8130
TL
164 ceph_le32 op;
165 ceph_le32 cid;
166 ceph_le32 oid;
167 ceph_le64 off;
168 ceph_le64 len;
169 ceph_le32 dest_cid;
170 ceph_le32 dest_oid; //OP_CLONE, OP_CLONERANGE
171 ceph_le64 dest_off; //OP_CLONERANGE
f67539c2 172 ceph_le32 hint; //OP_COLL_HINT,OP_SETALLOCHINT
eafe8130
TL
173 ceph_le64 expected_object_size; //OP_SETALLOCHINT
174 ceph_le64 expected_write_size; //OP_SETALLOCHINT
175 ceph_le32 split_bits; //OP_SPLIT_COLLECTION2,OP_COLL_SET_BITS,
11fdf7f2 176 //OP_MKCOLL
eafe8130 177 ceph_le32 split_rem; //OP_SPLIT_COLLECTION2
11fdf7f2
TL
178 } __attribute__ ((packed)) ;
179
180 struct TransactionData {
eafe8130
TL
181 ceph_le64 ops;
182 ceph_le32 largest_data_len;
183 ceph_le32 largest_data_off;
184 ceph_le32 largest_data_off_in_data_bl;
185 ceph_le32 fadvise_flags;
11fdf7f2
TL
186
187 TransactionData() noexcept :
20effc67
TL
188 ops(0),
189 largest_data_len(0),
190 largest_data_off(0),
191 largest_data_off_in_data_bl(0),
192 fadvise_flags(0) { }
11fdf7f2 193
9f95a23c 194 // override default move operations to reset default values
11fdf7f2 195 TransactionData(TransactionData&& other) noexcept :
9f95a23c 196 ops(other.ops),
11fdf7f2
TL
197 largest_data_len(other.largest_data_len),
198 largest_data_off(other.largest_data_off),
199 largest_data_off_in_data_bl(other.largest_data_off_in_data_bl),
200 fadvise_flags(other.fadvise_flags) {
201 other.ops = 0;
202 other.largest_data_len = 0;
203 other.largest_data_off = 0;
204 other.largest_data_off_in_data_bl = 0;
205 other.fadvise_flags = 0;
206 }
207 TransactionData& operator=(TransactionData&& other) noexcept {
208 ops = other.ops;
209 largest_data_len = other.largest_data_len;
210 largest_data_off = other.largest_data_off;
211 largest_data_off_in_data_bl = other.largest_data_off_in_data_bl;
212 fadvise_flags = other.fadvise_flags;
213 other.ops = 0;
214 other.largest_data_len = 0;
215 other.largest_data_off = 0;
216 other.largest_data_off_in_data_bl = 0;
217 other.fadvise_flags = 0;
218 return *this;
219 }
220
221 TransactionData(const TransactionData& other) = default;
222 TransactionData& operator=(const TransactionData& other) = default;
223
9f95a23c 224 void encode(ceph::buffer::list& bl) const {
11fdf7f2
TL
225 bl.append((char*)this, sizeof(TransactionData));
226 }
9f95a23c 227 void decode(ceph::buffer::list::const_iterator &bl) {
11fdf7f2
TL
228 bl.copy(sizeof(TransactionData), (char*)this);
229 }
230 } __attribute__ ((packed)) ;
231
232private:
233 TransactionData data;
234
9f95a23c
TL
235 std::map<coll_t, uint32_t> coll_index;
236 std::map<ghobject_t, uint32_t> object_index;
11fdf7f2 237
9f95a23c
TL
238 uint32_t coll_id = 0;
239 uint32_t object_id = 0;
11fdf7f2 240
9f95a23c
TL
241 ceph::buffer::list data_bl;
242 ceph::buffer::list op_bl;
11fdf7f2
TL
243
244 std::list<Context *> on_applied;
245 std::list<Context *> on_commit;
246 std::list<Context *> on_applied_sync;
247
248public:
249 Transaction() = default;
250
9f95a23c 251 explicit Transaction(ceph::buffer::list::const_iterator &dp) {
11fdf7f2
TL
252 decode(dp);
253 }
9f95a23c 254 explicit Transaction(ceph::buffer::list &nbl) {
11fdf7f2
TL
255 auto dp = nbl.cbegin();
256 decode(dp);
257 }
258
259 // override default move operations to reset default values
260 Transaction(Transaction&& other) noexcept :
261 data(std::move(other.data)),
262 coll_index(std::move(other.coll_index)),
263 object_index(std::move(other.object_index)),
264 coll_id(other.coll_id),
265 object_id(other.object_id),
266 data_bl(std::move(other.data_bl)),
267 op_bl(std::move(other.op_bl)),
268 on_applied(std::move(other.on_applied)),
269 on_commit(std::move(other.on_commit)),
270 on_applied_sync(std::move(other.on_applied_sync)) {
271 other.coll_id = 0;
272 other.object_id = 0;
273 }
274
275 Transaction& operator=(Transaction&& other) noexcept {
276 data = std::move(other.data);
277 coll_index = std::move(other.coll_index);
278 object_index = std::move(other.object_index);
279 coll_id = other.coll_id;
280 object_id = other.object_id;
281 data_bl = std::move(other.data_bl);
282 op_bl = std::move(other.op_bl);
283 on_applied = std::move(other.on_applied);
284 on_commit = std::move(other.on_commit);
285 on_applied_sync = std::move(other.on_applied_sync);
286 other.coll_id = 0;
287 other.object_id = 0;
288 return *this;
289 }
290
291 Transaction(const Transaction& other) = default;
292 Transaction& operator=(const Transaction& other) = default;
293
294 // expose object_index for FileStore::Op's benefit
9f95a23c 295 const std::map<ghobject_t, uint32_t>& get_object_index() const {
11fdf7f2
TL
296 return object_index;
297 }
298
299 /* Operations on callback contexts */
300 void register_on_applied(Context *c) {
301 if (!c) return;
302 on_applied.push_back(c);
303 }
304 void register_on_commit(Context *c) {
305 if (!c) return;
306 on_commit.push_back(c);
307 }
308 void register_on_applied_sync(Context *c) {
309 if (!c) return;
310 on_applied_sync.push_back(c);
311 }
312 void register_on_complete(Context *c) {
313 if (!c) return;
314 RunOnDeleteRef _complete (std::make_shared<RunOnDelete>(c));
315 register_on_applied(new ContainerContext<RunOnDeleteRef>(_complete));
316 register_on_commit(new ContainerContext<RunOnDeleteRef>(_complete));
317 }
318 bool has_contexts() const {
319 return
9f95a23c
TL
320 !on_commit.empty() ||
321 !on_applied.empty() ||
322 !on_applied_sync.empty();
11fdf7f2
TL
323 }
324
9f95a23c
TL
325 static void collect_contexts(
326 std::vector<Transaction>& t,
327 Context **out_on_applied,
328 Context **out_on_commit,
329 Context **out_on_applied_sync) {
11fdf7f2
TL
330 ceph_assert(out_on_applied);
331 ceph_assert(out_on_commit);
332 ceph_assert(out_on_applied_sync);
333 std::list<Context *> on_applied, on_commit, on_applied_sync;
334 for (auto& i : t) {
9f95a23c
TL
335 on_applied.splice(on_applied.end(), i.on_applied);
336 on_commit.splice(on_commit.end(), i.on_commit);
337 on_applied_sync.splice(on_applied_sync.end(), i.on_applied_sync);
11fdf7f2
TL
338 }
339 *out_on_applied = C_Contexts::list_to_context(on_applied);
340 *out_on_commit = C_Contexts::list_to_context(on_commit);
341 *out_on_applied_sync = C_Contexts::list_to_context(on_applied_sync);
342 }
9f95a23c
TL
343 static void collect_contexts(
344 std::vector<Transaction>& t,
345 std::list<Context*> *out_on_applied,
346 std::list<Context*> *out_on_commit,
347 std::list<Context*> *out_on_applied_sync) {
11fdf7f2
TL
348 ceph_assert(out_on_applied);
349 ceph_assert(out_on_commit);
350 ceph_assert(out_on_applied_sync);
351 for (auto& i : t) {
9f95a23c
TL
352 out_on_applied->splice(out_on_applied->end(), i.on_applied);
353 out_on_commit->splice(out_on_commit->end(), i.on_commit);
354 out_on_applied_sync->splice(out_on_applied_sync->end(),
355 i.on_applied_sync);
11fdf7f2
TL
356 }
357 }
9f95a23c
TL
358 static Context *collect_all_contexts(
359 Transaction& t) {
f67539c2 360 std::list<Context*> contexts;
9f95a23c
TL
361 contexts.splice(contexts.end(), t.on_applied);
362 contexts.splice(contexts.end(), t.on_commit);
363 contexts.splice(contexts.end(), t.on_applied_sync);
364 return C_Contexts::list_to_context(contexts);
365 }
11fdf7f2
TL
366
367 Context *get_on_applied() {
368 return C_Contexts::list_to_context(on_applied);
369 }
370 Context *get_on_commit() {
371 return C_Contexts::list_to_context(on_commit);
372 }
373 Context *get_on_applied_sync() {
374 return C_Contexts::list_to_context(on_applied_sync);
375 }
376
377 void set_fadvise_flags(uint32_t flags) {
378 data.fadvise_flags = flags;
379 }
380 void set_fadvise_flag(uint32_t flag) {
381 data.fadvise_flags = data.fadvise_flags | flag;
382 }
383 uint32_t get_fadvise_flags() { return data.fadvise_flags; }
384
385 void swap(Transaction& other) noexcept {
386 std::swap(data, other.data);
387 std::swap(on_applied, other.on_applied);
388 std::swap(on_commit, other.on_commit);
389 std::swap(on_applied_sync, other.on_applied_sync);
9f95a23c 390
11fdf7f2
TL
391 std::swap(coll_index, other.coll_index);
392 std::swap(object_index, other.object_index);
393 std::swap(coll_id, other.coll_id);
394 std::swap(object_id, other.object_id);
395 op_bl.swap(other.op_bl);
396 data_bl.swap(other.data_bl);
397 }
398
399 void _update_op(Op* op,
9f95a23c
TL
400 std::vector<uint32_t> &cm,
401 std::vector<uint32_t> &om) {
11fdf7f2
TL
402
403 switch (op->op) {
404 case OP_NOP:
405 break;
406
9f95a23c 407 case OP_CREATE:
11fdf7f2
TL
408 case OP_TOUCH:
409 case OP_REMOVE:
410 case OP_SETATTR:
411 case OP_SETATTRS:
412 case OP_RMATTR:
413 case OP_RMATTRS:
414 case OP_COLL_REMOVE:
415 case OP_OMAP_CLEAR:
416 case OP_OMAP_SETKEYS:
417 case OP_OMAP_RMKEYS:
418 case OP_OMAP_RMKEYRANGE:
419 case OP_OMAP_SETHEADER:
420 case OP_WRITE:
421 case OP_ZERO:
422 case OP_TRUNCATE:
423 case OP_SETALLOCHINT:
424 ceph_assert(op->cid < cm.size());
425 ceph_assert(op->oid < om.size());
426 op->cid = cm[op->cid];
427 op->oid = om[op->oid];
428 break;
429
430 case OP_CLONERANGE2:
431 case OP_CLONE:
432 ceph_assert(op->cid < cm.size());
433 ceph_assert(op->oid < om.size());
434 ceph_assert(op->dest_oid < om.size());
435 op->cid = cm[op->cid];
436 op->oid = om[op->oid];
437 op->dest_oid = om[op->dest_oid];
438 break;
439
440 case OP_MKCOLL:
441 case OP_RMCOLL:
442 case OP_COLL_SETATTR:
443 case OP_COLL_RMATTR:
444 case OP_COLL_SETATTRS:
445 case OP_COLL_HINT:
446 case OP_COLL_SET_BITS:
447 ceph_assert(op->cid < cm.size());
448 op->cid = cm[op->cid];
449 break;
450
451 case OP_COLL_ADD:
452 ceph_assert(op->cid < cm.size());
453 ceph_assert(op->oid < om.size());
454 ceph_assert(op->dest_cid < om.size());
455 op->cid = cm[op->cid];
456 op->dest_cid = cm[op->dest_cid];
457 op->oid = om[op->oid];
458 break;
9f95a23c 459
11fdf7f2
TL
460 case OP_COLL_MOVE_RENAME:
461 ceph_assert(op->cid < cm.size());
462 ceph_assert(op->oid < om.size());
463 ceph_assert(op->dest_cid < cm.size());
464 ceph_assert(op->dest_oid < om.size());
465 op->cid = cm[op->cid];
466 op->oid = om[op->oid];
467 op->dest_cid = cm[op->dest_cid];
468 op->dest_oid = om[op->dest_oid];
469 break;
9f95a23c 470
11fdf7f2
TL
471 case OP_TRY_RENAME:
472 ceph_assert(op->cid < cm.size());
473 ceph_assert(op->oid < om.size());
474 ceph_assert(op->dest_oid < om.size());
475 op->cid = cm[op->cid];
476 op->oid = om[op->oid];
477 op->dest_oid = om[op->dest_oid];
9f95a23c
TL
478 break;
479
11fdf7f2
TL
480 case OP_SPLIT_COLLECTION2:
481 ceph_assert(op->cid < cm.size());
9f95a23c 482 ceph_assert(op->dest_cid < cm.size());
11fdf7f2
TL
483 op->cid = cm[op->cid];
484 op->dest_cid = cm[op->dest_cid];
485 break;
9f95a23c 486
11fdf7f2
TL
487 case OP_MERGE_COLLECTION:
488 ceph_assert(op->cid < cm.size());
9f95a23c 489 ceph_assert(op->dest_cid < cm.size());
11fdf7f2
TL
490 op->cid = cm[op->cid];
491 op->dest_cid = cm[op->dest_cid];
492 break;
9f95a23c 493
11fdf7f2
TL
494 default:
495 ceph_abort_msg("Unknown OP");
496 }
497 }
498 void _update_op_bl(
9f95a23c
TL
499 ceph::buffer::list& bl,
500 std::vector<uint32_t> &cm,
501 std::vector<uint32_t> &om) {
11fdf7f2
TL
502 for (auto& bp : bl.buffers()) {
503 ceph_assert(bp.length() % sizeof(Op) == 0);
9f95a23c 504
11fdf7f2
TL
505 char* raw_p = const_cast<char*>(bp.c_str());
506 char* raw_end = raw_p + bp.length();
507 while (raw_p < raw_end) {
9f95a23c
TL
508 _update_op(reinterpret_cast<Op*>(raw_p), cm, om);
509 raw_p += sizeof(Op);
11fdf7f2
TL
510 }
511 }
512 }
9f95a23c 513 /// Append the operations of the parameter to this Transaction. Those operations are removed from the parameter Transaction
11fdf7f2
TL
514 void append(Transaction& other) {
515
eafe8130 516 data.ops = data.ops + other.data.ops;
11fdf7f2 517 if (other.data.largest_data_len > data.largest_data_len) {
9f95a23c
TL
518 data.largest_data_len = other.data.largest_data_len;
519 data.largest_data_off = other.data.largest_data_off;
520 data.largest_data_off_in_data_bl = data_bl.length() + other.data.largest_data_off_in_data_bl;
11fdf7f2 521 }
eafe8130 522 data.fadvise_flags = data.fadvise_flags | other.data.fadvise_flags;
11fdf7f2
TL
523 on_applied.splice(on_applied.end(), other.on_applied);
524 on_commit.splice(on_commit.end(), other.on_commit);
525 on_applied_sync.splice(on_applied_sync.end(), other.on_applied_sync);
9f95a23c 526
11fdf7f2 527 //append coll_index & object_index
9f95a23c
TL
528 std::vector<uint32_t> cm(other.coll_index.size());
529 std::map<coll_t, uint32_t>::iterator coll_index_p;
11fdf7f2 530 for (coll_index_p = other.coll_index.begin();
9f95a23c
TL
531 coll_index_p != other.coll_index.end();
532 ++coll_index_p) {
11fdf7f2
TL
533 cm[coll_index_p->second] = _get_coll_id(coll_index_p->first);
534 }
9f95a23c
TL
535
536 std::vector<uint32_t> om(other.object_index.size());
537 std::map<ghobject_t, uint32_t>::iterator object_index_p;
11fdf7f2 538 for (object_index_p = other.object_index.begin();
9f95a23c
TL
539 object_index_p != other.object_index.end();
540 ++object_index_p) {
11fdf7f2 541 om[object_index_p->second] = _get_object_id(object_index_p->first);
9f95a23c
TL
542 }
543
11fdf7f2 544 //the other.op_bl SHOULD NOT be changes during append operation,
9f95a23c
TL
545 //we use additional ceph::buffer::list to avoid this problem
546 ceph::buffer::list other_op_bl;
11fdf7f2 547 {
9f95a23c
TL
548 ceph::buffer::ptr other_op_bl_ptr(other.op_bl.length());
549 other.op_bl.begin().copy(other.op_bl.length(), other_op_bl_ptr.c_str());
11fdf7f2
TL
550 other_op_bl.append(std::move(other_op_bl_ptr));
551 }
9f95a23c 552
11fdf7f2
TL
553 //update other_op_bl with cm & om
554 //When the other is appended to current transaction, all coll_index and
555 //object_index in other.op_buffer should be updated by new index of the
556 //combined transaction
557 _update_op_bl(other_op_bl, cm, om);
9f95a23c 558
11fdf7f2
TL
559 //append op_bl
560 op_bl.append(other_op_bl);
561 //append data_bl
562 data_bl.append(other.data_bl);
563 }
9f95a23c 564
11fdf7f2 565 /** Inquires about the Transaction as a whole. */
9f95a23c 566
11fdf7f2
TL
567 /// How big is the encoded Transaction buffer?
568 uint64_t get_encoded_bytes() {
569 //layout: data_bl + op_bl + coll_index + object_index + data
9f95a23c 570
11fdf7f2
TL
571 // coll_index size, object_index size and sizeof(transaction_data)
572 // all here, so they may be computed at compile-time
573 size_t final_size = sizeof(__u32) * 2 + sizeof(data);
9f95a23c 574
11fdf7f2 575 // coll_index second and object_index second
9f95a23c
TL
576 final_size += (coll_index.size() + object_index.size()) * sizeof(__u32);
577
11fdf7f2
TL
578 // coll_index first
579 for (auto p = coll_index.begin(); p != coll_index.end(); ++p) {
9f95a23c 580 final_size += p->first.encoded_size();
11fdf7f2 581 }
9f95a23c 582
11fdf7f2
TL
583 // object_index first
584 for (auto p = object_index.begin(); p != object_index.end(); ++p) {
9f95a23c 585 final_size += p->first.encoded_size();
11fdf7f2 586 }
9f95a23c 587
11fdf7f2 588 return data_bl.length() +
9f95a23c
TL
589 op_bl.length() +
590 final_size;
11fdf7f2 591 }
9f95a23c 592
11fdf7f2
TL
593 /// Retain old version for regression testing purposes
594 uint64_t get_encoded_bytes_test() {
595 using ceph::encode;
596 //layout: data_bl + op_bl + coll_index + object_index + data
9f95a23c 597 ceph::buffer::list bl;
11fdf7f2
TL
598 encode(coll_index, bl);
599 encode(object_index, bl);
9f95a23c 600
11fdf7f2 601 return data_bl.length() +
9f95a23c
TL
602 op_bl.length() +
603 bl.length() +
604 sizeof(data);
11fdf7f2 605 }
9f95a23c 606
11fdf7f2
TL
607 uint64_t get_num_bytes() {
608 return get_encoded_bytes();
609 }
610 /// Size of largest data buffer to the "write" operation encountered so far
611 uint32_t get_data_length() {
612 return data.largest_data_len;
613 }
9f95a23c 614 /// offset within the encoded buffer to the start of the largest data buffer that's encoded
11fdf7f2
TL
615 uint32_t get_data_offset() {
616 if (data.largest_data_off_in_data_bl) {
9f95a23c
TL
617 return data.largest_data_off_in_data_bl +
618 sizeof(__u8) + // encode struct_v
619 sizeof(__u8) + // encode compat_v
620 sizeof(__u32) + // encode len
621 sizeof(__u32); // data_bl len
11fdf7f2
TL
622 }
623 return 0; // none
624 }
625 /// offset of buffer as aligned to destination within object.
626 int get_data_alignment() {
627 if (!data.largest_data_len)
9f95a23c 628 return 0;
11fdf7f2
TL
629 return (0 - get_data_offset()) & ~CEPH_PAGE_MASK;
630 }
631 /// Is the Transaction empty (no operations)
632 bool empty() {
633 return !data.ops;
634 }
635 /// Number of operations in the transaction
636 int get_num_ops() {
637 return data.ops;
638 }
9f95a23c 639
11fdf7f2
TL
640 /**
641 * iterator
642 *
643 * Helper object to parse Transactions.
644 *
645 * ObjectStore instances use this object to step down the encoded
646 * buffer decoding operation codes and parameters as we go.
647 *
648 */
649 class iterator {
650 Transaction *t;
9f95a23c 651
11fdf7f2
TL
652 uint64_t ops;
653 char* op_buffer_p;
9f95a23c
TL
654
655 ceph::buffer::list::const_iterator data_bl_p;
656
11fdf7f2 657 public:
9f95a23c
TL
658 std::vector<coll_t> colls;
659 std::vector<ghobject_t> objects;
660
11fdf7f2
TL
661 private:
662 explicit iterator(Transaction *t)
663 : t(t),
9f95a23c
TL
664 data_bl_p(t->data_bl.cbegin()),
665 colls(t->coll_index.size()),
666 objects(t->object_index.size()) {
667
11fdf7f2
TL
668 ops = t->data.ops;
669 op_buffer_p = t->op_bl.c_str();
9f95a23c
TL
670
671 std::map<coll_t, uint32_t>::iterator coll_index_p;
11fdf7f2 672 for (coll_index_p = t->coll_index.begin();
9f95a23c
TL
673 coll_index_p != t->coll_index.end();
674 ++coll_index_p) {
675 colls[coll_index_p->second] = coll_index_p->first;
11fdf7f2 676 }
9f95a23c
TL
677
678 std::map<ghobject_t, uint32_t>::iterator object_index_p;
11fdf7f2 679 for (object_index_p = t->object_index.begin();
9f95a23c
TL
680 object_index_p != t->object_index.end();
681 ++object_index_p) {
682 objects[object_index_p->second] = object_index_p->first;
11fdf7f2
TL
683 }
684 }
9f95a23c 685
11fdf7f2 686 friend class Transaction;
9f95a23c 687
11fdf7f2 688 public:
9f95a23c 689
11fdf7f2
TL
690 bool have_op() {
691 return ops > 0;
692 }
693 Op* decode_op() {
694 ceph_assert(ops > 0);
9f95a23c 695
11fdf7f2
TL
696 Op* op = reinterpret_cast<Op*>(op_buffer_p);
697 op_buffer_p += sizeof(Op);
698 ops--;
9f95a23c 699
11fdf7f2
TL
700 return op;
701 }
9f95a23c
TL
702 std::string decode_string() {
703 using ceph::decode;
704 std::string s;
11fdf7f2
TL
705 decode(s, data_bl_p);
706 return s;
707 }
9f95a23c
TL
708 void decode_bp(ceph::buffer::ptr& bp) {
709 using ceph::decode;
11fdf7f2
TL
710 decode(bp, data_bl_p);
711 }
9f95a23c
TL
712 void decode_bl(ceph::buffer::list& bl) {
713 using ceph::decode;
11fdf7f2
TL
714 decode(bl, data_bl_p);
715 }
9f95a23c
TL
716 void decode_attrset(std::map<std::string,ceph::buffer::ptr>& aset) {
717 using ceph::decode;
11fdf7f2
TL
718 decode(aset, data_bl_p);
719 }
9f95a23c
TL
720 void decode_attrset(std::map<std::string,ceph::buffer::list>& aset) {
721 using ceph::decode;
11fdf7f2
TL
722 decode(aset, data_bl_p);
723 }
9f95a23c
TL
724 void decode_attrset_bl(ceph::buffer::list *pbl) {
725 decode_str_str_map_to_bl(data_bl_p, pbl);
726 }
727 void decode_keyset(std::set<std::string> &keys){
728 using ceph::decode;
11fdf7f2
TL
729 decode(keys, data_bl_p);
730 }
9f95a23c
TL
731 void decode_keyset_bl(ceph::buffer::list *pbl){
732 decode_str_set_to_bl(data_bl_p, pbl);
733 }
734
735 const ghobject_t &get_oid(uint32_t oid_id) {
11fdf7f2
TL
736 ceph_assert(oid_id < objects.size());
737 return objects[oid_id];
738 }
9f95a23c 739 const coll_t &get_cid(uint32_t cid_id) {
11fdf7f2
TL
740 ceph_assert(cid_id < colls.size());
741 return colls[cid_id];
742 }
743 uint32_t get_fadvise_flags() const {
9f95a23c 744 return t->get_fadvise_flags();
11fdf7f2 745 }
f67539c2 746
20effc67 747 const std::vector<ghobject_t> &get_objects() const {
f67539c2
TL
748 return objects;
749 }
11fdf7f2 750 };
9f95a23c 751
11fdf7f2 752 iterator begin() {
9f95a23c 753 return iterator(this);
11fdf7f2 754 }
9f95a23c 755
11fdf7f2
TL
756private:
757 void _build_actions_from_tbl();
758
11fdf7f2
TL
759 /**
760 * Helper functions to encode the various mutation elements of a
761 * transaction. These are 1:1 with the operation codes (see
762 * enumeration above). These routines ensure that the
763 * encoder/creator of a transaction gets the right data in the
764 * right place. Sadly, there's no corresponding version nor any
765 * form of seat belts for the decoder.
766 */
767 Op* _get_next_op() {
768 if (op_bl.get_append_buffer_unused_tail_length() < sizeof(Op)) {
769 op_bl.reserve(sizeof(Op) * OPS_PER_PTR);
770 }
771 // append_hole ensures bptr merging. Even huge number of ops
772 // shouldn't result in overpopulating bl::_buffers.
773 char* const p = op_bl.append_hole(sizeof(Op)).c_str();
774 memset(p, 0, sizeof(Op));
775 return reinterpret_cast<Op*>(p);
776 }
9f95a23c
TL
777 uint32_t _get_coll_id(const coll_t& coll) {
778 std::map<coll_t, uint32_t>::iterator c = coll_index.find(coll);
11fdf7f2
TL
779 if (c != coll_index.end())
780 return c->second;
9f95a23c
TL
781
782 uint32_t index_id = coll_id++;
11fdf7f2
TL
783 coll_index[coll] = index_id;
784 return index_id;
785 }
9f95a23c
TL
786 uint32_t _get_object_id(const ghobject_t& oid) {
787 std::map<ghobject_t, uint32_t>::iterator o = object_index.find(oid);
11fdf7f2
TL
788 if (o != object_index.end())
789 return o->second;
9f95a23c
TL
790
791 uint32_t index_id = object_id++;
11fdf7f2
TL
792 object_index[oid] = index_id;
793 return index_id;
794 }
9f95a23c 795
11fdf7f2
TL
796public:
797 /// noop. 'nuf said
798 void nop() {
799 Op* _op = _get_next_op();
800 _op->op = OP_NOP;
eafe8130 801 data.ops = data.ops + 1;
11fdf7f2 802 }
9f95a23c
TL
803 /**
804 * create
805 *
806 * create an object that does not yet exist
807 * (behavior is undefined if the object already exists)
808 */
809 void create(const coll_t& cid, const ghobject_t& oid) {
810 Op* _op = _get_next_op();
811 _op->op = OP_CREATE;
812 _op->cid = _get_coll_id(cid);
813 _op->oid = _get_object_id(oid);
814 data.ops = data.ops + 1;
815 }
11fdf7f2
TL
816 /**
817 * touch
818 *
819 * Ensure the existance of an object in a collection. Create an
820 * empty object if necessary
821 */
822 void touch(const coll_t& cid, const ghobject_t& oid) {
823 Op* _op = _get_next_op();
824 _op->op = OP_TOUCH;
825 _op->cid = _get_coll_id(cid);
826 _op->oid = _get_object_id(oid);
eafe8130 827 data.ops = data.ops + 1;
11fdf7f2
TL
828 }
829 /**
830 * Write data to an offset within an object. If the object is too
831 * small, it is expanded as needed. It is possible to specify an
832 * offset beyond the current end of an object and it will be
833 * expanded as needed. Simple implementations of ObjectStore will
834 * just zero the data between the old end of the object and the
835 * newly provided data. More sophisticated implementations of
836 * ObjectStore will omit the untouched data and store it as a
837 * "hole" in the file.
838 *
839 * Note that a 0-length write does not affect the size of the object.
840 */
841 void write(const coll_t& cid, const ghobject_t& oid, uint64_t off, uint64_t len,
9f95a23c 842 const ceph::buffer::list& write_data, uint32_t flags = 0) {
11fdf7f2
TL
843 using ceph::encode;
844 uint32_t orig_len = data_bl.length();
845 Op* _op = _get_next_op();
846 _op->op = OP_WRITE;
847 _op->cid = _get_coll_id(cid);
848 _op->oid = _get_object_id(oid);
849 _op->off = off;
850 _op->len = len;
851 encode(write_data, data_bl);
9f95a23c 852
11fdf7f2
TL
853 ceph_assert(len == write_data.length());
854 data.fadvise_flags = data.fadvise_flags | flags;
855 if (write_data.length() > data.largest_data_len) {
9f95a23c
TL
856 data.largest_data_len = write_data.length();
857 data.largest_data_off = off;
858 data.largest_data_off_in_data_bl = orig_len + sizeof(__u32); // we are about to
11fdf7f2 859 }
eafe8130 860 data.ops = data.ops + 1;
11fdf7f2
TL
861 }
862 /**
863 * zero out the indicated byte range within an object. Some
864 * ObjectStore instances may optimize this to release the
865 * underlying storage space.
866 *
867 * If the zero range extends beyond the end of the object, the object
868 * size is extended, just as if we were writing a buffer full of zeros.
869 * EXCEPT if the length is 0, in which case (just like a 0-length write)
870 * we do not adjust the object size.
871 */
872 void zero(const coll_t& cid, const ghobject_t& oid, uint64_t off, uint64_t len) {
873 Op* _op = _get_next_op();
874 _op->op = OP_ZERO;
875 _op->cid = _get_coll_id(cid);
876 _op->oid = _get_object_id(oid);
877 _op->off = off;
878 _op->len = len;
eafe8130 879 data.ops = data.ops + 1;
11fdf7f2
TL
880 }
881 /// Discard all data in the object beyond the specified size.
882 void truncate(const coll_t& cid, const ghobject_t& oid, uint64_t off) {
883 Op* _op = _get_next_op();
884 _op->op = OP_TRUNCATE;
885 _op->cid = _get_coll_id(cid);
886 _op->oid = _get_object_id(oid);
887 _op->off = off;
eafe8130 888 data.ops = data.ops + 1;
11fdf7f2
TL
889 }
890 /// Remove an object. All four parts of the object are removed.
891 void remove(const coll_t& cid, const ghobject_t& oid) {
892 Op* _op = _get_next_op();
893 _op->op = OP_REMOVE;
894 _op->cid = _get_coll_id(cid);
895 _op->oid = _get_object_id(oid);
eafe8130 896 data.ops = data.ops + 1;
11fdf7f2
TL
897 }
898 /// Set an xattr of an object
9f95a23c
TL
899 void setattr(const coll_t& cid, const ghobject_t& oid, const char* name, ceph::buffer::list& val) {
900 std::string n(name);
11fdf7f2
TL
901 setattr(cid, oid, n, val);
902 }
903 /// Set an xattr of an object
9f95a23c 904 void setattr(const coll_t& cid, const ghobject_t& oid, const std::string& s, ceph::buffer::list& val) {
11fdf7f2
TL
905 using ceph::encode;
906 Op* _op = _get_next_op();
907 _op->op = OP_SETATTR;
908 _op->cid = _get_coll_id(cid);
909 _op->oid = _get_object_id(oid);
910 encode(s, data_bl);
911 encode(val, data_bl);
eafe8130 912 data.ops = data.ops + 1;
11fdf7f2
TL
913 }
914 /// Set multiple xattrs of an object
20effc67
TL
915 void setattrs(const coll_t& cid,
916 const ghobject_t& oid,
917 const std::map<std::string,ceph::buffer::ptr,std::less<>>& attrset) {
11fdf7f2
TL
918 using ceph::encode;
919 Op* _op = _get_next_op();
920 _op->op = OP_SETATTRS;
921 _op->cid = _get_coll_id(cid);
922 _op->oid = _get_object_id(oid);
923 encode(attrset, data_bl);
eafe8130 924 data.ops = data.ops + 1;
11fdf7f2
TL
925 }
926 /// Set multiple xattrs of an object
20effc67
TL
927 void setattrs(const coll_t& cid,
928 const ghobject_t& oid,
929 const std::map<std::string,ceph::buffer::list,std::less<>>& attrset) {
11fdf7f2
TL
930 using ceph::encode;
931 Op* _op = _get_next_op();
932 _op->op = OP_SETATTRS;
933 _op->cid = _get_coll_id(cid);
934 _op->oid = _get_object_id(oid);
935 encode(attrset, data_bl);
eafe8130 936 data.ops = data.ops + 1;
11fdf7f2
TL
937 }
938 /// remove an xattr from an object
939 void rmattr(const coll_t& cid, const ghobject_t& oid, const char *name) {
9f95a23c 940 std::string n(name);
11fdf7f2
TL
941 rmattr(cid, oid, n);
942 }
943 /// remove an xattr from an object
9f95a23c 944 void rmattr(const coll_t& cid, const ghobject_t& oid, const std::string& s) {
11fdf7f2
TL
945 using ceph::encode;
946 Op* _op = _get_next_op();
947 _op->op = OP_RMATTR;
948 _op->cid = _get_coll_id(cid);
949 _op->oid = _get_object_id(oid);
950 encode(s, data_bl);
eafe8130 951 data.ops = data.ops + 1;
11fdf7f2
TL
952 }
953 /// remove all xattrs from an object
954 void rmattrs(const coll_t& cid, const ghobject_t& oid) {
955 Op* _op = _get_next_op();
956 _op->op = OP_RMATTRS;
957 _op->cid = _get_coll_id(cid);
958 _op->oid = _get_object_id(oid);
eafe8130 959 data.ops = data.ops + 1;
11fdf7f2
TL
960 }
961 /**
962 * Clone an object into another object.
963 *
964 * Low-cost (e.g., O(1)) cloning (if supported) is best, but
965 * fallback to an O(n) copy is allowed. All four parts of the
966 * object are cloned (data, xattrs, omap header, omap
967 * entries).
968 *
969 * The destination named object may already exist, in
970 * which case its previous contents are discarded.
971 */
972 void clone(const coll_t& cid, const ghobject_t& oid,
9f95a23c 973 const ghobject_t& noid) {
11fdf7f2
TL
974 Op* _op = _get_next_op();
975 _op->op = OP_CLONE;
976 _op->cid = _get_coll_id(cid);
977 _op->oid = _get_object_id(oid);
978 _op->dest_oid = _get_object_id(noid);
eafe8130 979 data.ops = data.ops + 1;
11fdf7f2
TL
980 }
981 /**
982 * Clone a byte range from one object to another.
983 *
984 * The data portion of the destination object receives a copy of a
985 * portion of the data from the source object. None of the other
986 * three parts of an object is copied from the source.
987 *
988 * The destination object size may be extended to the dstoff + len.
989 *
990 * The source range *must* overlap with the source object data. If it does
991 * not the result is undefined.
992 */
993 void clone_range(const coll_t& cid, const ghobject_t& oid,
9f95a23c
TL
994 const ghobject_t& noid,
995 uint64_t srcoff, uint64_t srclen, uint64_t dstoff) {
11fdf7f2
TL
996 Op* _op = _get_next_op();
997 _op->op = OP_CLONERANGE2;
998 _op->cid = _get_coll_id(cid);
999 _op->oid = _get_object_id(oid);
1000 _op->dest_oid = _get_object_id(noid);
1001 _op->off = srcoff;
1002 _op->len = srclen;
1003 _op->dest_off = dstoff;
eafe8130 1004 data.ops = data.ops + 1;
11fdf7f2 1005 }
9f95a23c 1006
11fdf7f2
TL
1007 /// Create the collection
1008 void create_collection(const coll_t& cid, int bits) {
1009 Op* _op = _get_next_op();
1010 _op->op = OP_MKCOLL;
1011 _op->cid = _get_coll_id(cid);
1012 _op->split_bits = bits;
eafe8130 1013 data.ops = data.ops + 1;
11fdf7f2 1014 }
9f95a23c 1015
11fdf7f2
TL
1016 /**
1017 * Give the collection a hint.
1018 *
1019 * @param cid - collection id.
1020 * @param type - hint type.
1021 * @param hint - the hint payload, which contains the customized
1022 * data along with the hint type.
1023 */
9f95a23c 1024 void collection_hint(const coll_t& cid, uint32_t type, const ceph::buffer::list& hint) {
11fdf7f2
TL
1025 using ceph::encode;
1026 Op* _op = _get_next_op();
1027 _op->op = OP_COLL_HINT;
1028 _op->cid = _get_coll_id(cid);
f67539c2 1029 _op->hint = type;
11fdf7f2 1030 encode(hint, data_bl);
eafe8130 1031 data.ops = data.ops + 1;
11fdf7f2 1032 }
9f95a23c 1033
11fdf7f2
TL
1034 /// remove the collection, the collection must be empty
1035 void remove_collection(const coll_t& cid) {
1036 Op* _op = _get_next_op();
1037 _op->op = OP_RMCOLL;
1038 _op->cid = _get_coll_id(cid);
eafe8130 1039 data.ops = data.ops + 1;
11fdf7f2 1040 }
9f95a23c 1041 void collection_move(const coll_t& cid, const coll_t &oldcid, const ghobject_t& oid)
11fdf7f2 1042 __attribute__ ((deprecated)) {
9f95a23c
TL
1043 // NOTE: we encode this as a fixed combo of ADD + REMOVE. they
1044 // always appear together, so this is effectively a single MOVE.
1045 Op* _op = _get_next_op();
1046 _op->op = OP_COLL_ADD;
1047 _op->cid = _get_coll_id(oldcid);
1048 _op->oid = _get_object_id(oid);
1049 _op->dest_cid = _get_coll_id(cid);
1050 data.ops = data.ops + 1;
1051
1052 _op = _get_next_op();
1053 _op->op = OP_COLL_REMOVE;
1054 _op->cid = _get_coll_id(oldcid);
1055 _op->oid = _get_object_id(oid);
1056 data.ops = data.ops + 1;
1057 }
11fdf7f2 1058 void collection_move_rename(const coll_t& oldcid, const ghobject_t& oldoid,
9f95a23c 1059 const coll_t &cid, const ghobject_t& oid) {
11fdf7f2
TL
1060 Op* _op = _get_next_op();
1061 _op->op = OP_COLL_MOVE_RENAME;
1062 _op->cid = _get_coll_id(oldcid);
1063 _op->oid = _get_object_id(oldoid);
1064 _op->dest_cid = _get_coll_id(cid);
1065 _op->dest_oid = _get_object_id(oid);
eafe8130 1066 data.ops = data.ops + 1;
11fdf7f2
TL
1067 }
1068 void try_rename(const coll_t &cid, const ghobject_t& oldoid,
9f95a23c 1069 const ghobject_t& oid) {
11fdf7f2
TL
1070 Op* _op = _get_next_op();
1071 _op->op = OP_TRY_RENAME;
1072 _op->cid = _get_coll_id(cid);
1073 _op->oid = _get_object_id(oldoid);
1074 _op->dest_oid = _get_object_id(oid);
eafe8130 1075 data.ops = data.ops + 1;
11fdf7f2 1076 }
9f95a23c 1077
11fdf7f2
TL
1078 /// Remove omap from oid
1079 void omap_clear(
1080 const coll_t &cid, ///< [in] Collection containing oid
1081 const ghobject_t &oid ///< [in] Object from which to remove omap
1082 ) {
1083 Op* _op = _get_next_op();
1084 _op->op = OP_OMAP_CLEAR;
1085 _op->cid = _get_coll_id(cid);
1086 _op->oid = _get_object_id(oid);
eafe8130 1087 data.ops = data.ops + 1;
11fdf7f2
TL
1088 }
1089 /// Set keys on oid omap. Replaces duplicate keys.
1090 void omap_setkeys(
1091 const coll_t& cid, ///< [in] Collection containing oid
1092 const ghobject_t &oid, ///< [in] Object to update
9f95a23c 1093 const std::map<std::string, ceph::buffer::list> &attrset ///< [in] Replacement keys and values
11fdf7f2
TL
1094 ) {
1095 using ceph::encode;
1096 Op* _op = _get_next_op();
1097 _op->op = OP_OMAP_SETKEYS;
1098 _op->cid = _get_coll_id(cid);
1099 _op->oid = _get_object_id(oid);
1100 encode(attrset, data_bl);
eafe8130 1101 data.ops = data.ops + 1;
11fdf7f2
TL
1102 }
1103
9f95a23c 1104 /// Set keys on an oid omap (ceph::buffer::list variant).
11fdf7f2
TL
1105 void omap_setkeys(
1106 const coll_t &cid, ///< [in] Collection containing oid
1107 const ghobject_t &oid, ///< [in] Object to update
9f95a23c
TL
1108 const ceph::buffer::list &attrset_bl ///< [in] Replacement keys and values
1109 ) {
11fdf7f2
TL
1110 Op* _op = _get_next_op();
1111 _op->op = OP_OMAP_SETKEYS;
1112 _op->cid = _get_coll_id(cid);
1113 _op->oid = _get_object_id(oid);
1114 data_bl.append(attrset_bl);
eafe8130 1115 data.ops = data.ops + 1;
11fdf7f2
TL
1116 }
1117
1118 /// Remove keys from oid omap
1119 void omap_rmkeys(
1120 const coll_t &cid, ///< [in] Collection containing oid
1121 const ghobject_t &oid, ///< [in] Object from which to remove the omap
9f95a23c 1122 const std::set<std::string> &keys ///< [in] Keys to clear
11fdf7f2
TL
1123 ) {
1124 using ceph::encode;
1125 Op* _op = _get_next_op();
1126 _op->op = OP_OMAP_RMKEYS;
1127 _op->cid = _get_coll_id(cid);
1128 _op->oid = _get_object_id(oid);
1129 encode(keys, data_bl);
eafe8130 1130 data.ops = data.ops + 1;
11fdf7f2
TL
1131 }
1132
9f95a23c
TL
1133 /// Remove key from oid omap
1134 void omap_rmkey(
1135 const coll_t &cid, ///< [in] Collection containing oid
1136 const ghobject_t &oid, ///< [in] Object from which to remove the omap
1137 const std::string& key ///< [in] Keys to clear
1138 ) {
1139 Op* _op = _get_next_op();
1140 _op->op = OP_OMAP_RMKEYS;
1141 _op->cid = _get_coll_id(cid);
1142 _op->oid = _get_object_id(oid);
1143 using ceph::encode;
1144 encode((uint32_t)1, data_bl);
1145 encode(key, data_bl);
1146 data.ops = data.ops + 1;
1147 }
1148
11fdf7f2
TL
1149 /// Remove keys from oid omap
1150 void omap_rmkeys(
1151 const coll_t &cid, ///< [in] Collection containing oid
1152 const ghobject_t &oid, ///< [in] Object from which to remove the omap
9f95a23c 1153 const ceph::buffer::list &keys_bl ///< [in] Keys to clear
11fdf7f2
TL
1154 ) {
1155 Op* _op = _get_next_op();
1156 _op->op = OP_OMAP_RMKEYS;
1157 _op->cid = _get_coll_id(cid);
1158 _op->oid = _get_object_id(oid);
1159 data_bl.append(keys_bl);
eafe8130 1160 data.ops = data.ops + 1;
11fdf7f2 1161 }
9f95a23c 1162
11fdf7f2
TL
1163 /// Remove key range from oid omap
1164 void omap_rmkeyrange(
1165 const coll_t &cid, ///< [in] Collection containing oid
1166 const ghobject_t &oid, ///< [in] Object from which to remove the omap keys
9f95a23c
TL
1167 const std::string& first, ///< [in] first key in range
1168 const std::string& last ///< [in] first key past range, range is [first,last)
11fdf7f2
TL
1169 ) {
1170 using ceph::encode;
1171 Op* _op = _get_next_op();
1172 _op->op = OP_OMAP_RMKEYRANGE;
1173 _op->cid = _get_coll_id(cid);
1174 _op->oid = _get_object_id(oid);
1175 encode(first, data_bl);
1176 encode(last, data_bl);
eafe8130 1177 data.ops = data.ops + 1;
11fdf7f2
TL
1178 }
1179
9f95a23c
TL
1180 /// Remove key range from oid omap
1181 void omap_rmkeyrange(
1182 const coll_t cid, ///< [in] Collection containing oid
1183 const ghobject_t &oid, ///< [in] Object from which to remove the omap keys
1184 const bufferlist &keys_bl ///< [in] range of keys to clear
1185 ) {
1186 Op* _op = _get_next_op();
1187 _op->op = OP_OMAP_RMKEYRANGE;
1188 _op->cid = _get_coll_id(cid);
1189 _op->oid = _get_object_id(oid);
1190 data_bl.append(keys_bl);
1191 data.ops = data.ops + 1;
1192 }
1193
11fdf7f2
TL
1194 /// Set omap header
1195 void omap_setheader(
1196 const coll_t &cid, ///< [in] Collection containing oid
1197 const ghobject_t &oid, ///< [in] Object
9f95a23c 1198 const ceph::buffer::list &bl ///< [in] Header value
11fdf7f2
TL
1199 ) {
1200 using ceph::encode;
1201 Op* _op = _get_next_op();
1202 _op->op = OP_OMAP_SETHEADER;
1203 _op->cid = _get_coll_id(cid);
1204 _op->oid = _get_object_id(oid);
1205 encode(bl, data_bl);
eafe8130 1206 data.ops = data.ops + 1;
11fdf7f2
TL
1207 }
1208
9f95a23c
TL
1209 /// Split collection based on given prefixes, objects matching the specified bits/rem are
1210 /// moved to the new collection
11fdf7f2
TL
1211 void split_collection(
1212 const coll_t &cid,
1213 uint32_t bits,
1214 uint32_t rem,
1215 const coll_t &destination) {
1216 Op* _op = _get_next_op();
1217 _op->op = OP_SPLIT_COLLECTION2;
1218 _op->cid = _get_coll_id(cid);
1219 _op->dest_cid = _get_coll_id(destination);
1220 _op->split_bits = bits;
1221 _op->split_rem = rem;
eafe8130 1222 data.ops = data.ops + 1;
11fdf7f2
TL
1223 }
1224
1225 /// Merge collection into another.
1226 void merge_collection(
1227 coll_t cid,
1228 coll_t destination,
1229 uint32_t bits) {
1230 Op* _op = _get_next_op();
1231 _op->op = OP_MERGE_COLLECTION;
1232 _op->cid = _get_coll_id(cid);
1233 _op->dest_cid = _get_coll_id(destination);
1234 _op->split_bits = bits;
eafe8130 1235 data.ops = data.ops + 1;
11fdf7f2
TL
1236 }
1237
1238 void collection_set_bits(
1239 const coll_t &cid,
1240 int bits) {
1241 Op* _op = _get_next_op();
1242 _op->op = OP_COLL_SET_BITS;
1243 _op->cid = _get_coll_id(cid);
1244 _op->split_bits = bits;
eafe8130 1245 data.ops = data.ops + 1;
11fdf7f2
TL
1246 }
1247
1248 /// Set allocation hint for an object
1249 /// make 0 values(expected_object_size, expected_write_size) noops for all implementations
1250 void set_alloc_hint(
1251 const coll_t &cid,
1252 const ghobject_t &oid,
1253 uint64_t expected_object_size,
1254 uint64_t expected_write_size,
1255 uint32_t flags
1256 ) {
1257 Op* _op = _get_next_op();
1258 _op->op = OP_SETALLOCHINT;
1259 _op->cid = _get_coll_id(cid);
1260 _op->oid = _get_object_id(oid);
1261 _op->expected_object_size = expected_object_size;
1262 _op->expected_write_size = expected_write_size;
f67539c2 1263 _op->hint = flags;
eafe8130 1264 data.ops = data.ops + 1;
11fdf7f2
TL
1265 }
1266
9f95a23c 1267 void encode(ceph::buffer::list& bl) const {
11fdf7f2
TL
1268 //layout: data_bl + op_bl + coll_index + object_index + data
1269 ENCODE_START(9, 9, bl);
1270 encode(data_bl, bl);
1271 encode(op_bl, bl);
1272 encode(coll_index, bl);
1273 encode(object_index, bl);
1274 data.encode(bl);
1275 ENCODE_FINISH(bl);
1276 }
1277
9f95a23c 1278 void decode(ceph::buffer::list::const_iterator &bl) {
11fdf7f2
TL
1279 DECODE_START(9, bl);
1280 DECODE_OLDEST(9);
1281
1282 decode(data_bl, bl);
1283 decode(op_bl, bl);
1284 decode(coll_index, bl);
1285 decode(object_index, bl);
1286 data.decode(bl);
1287 coll_id = coll_index.size();
1288 object_id = object_index.size();
1289
1290 DECODE_FINISH(bl);
1291 }
1292
1293 void dump(ceph::Formatter *f);
9f95a23c 1294 static void generate_test_instances(std::list<Transaction*>& o);
11fdf7f2 1295};
9f95a23c
TL
1296WRITE_CLASS_ENCODER(Transaction)
1297WRITE_CLASS_ENCODER(Transaction::TransactionData)
1298
1299std::ostream& operator<<(std::ostream& out, const Transaction& tx);
1300
11fdf7f2 1301}