]>
Commit | Line | Data |
---|---|---|
9f95a23c | 1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
11fdf7f2 TL |
2 | // vim: ts=8 sw=2 smarttab |
3 | ||
9f95a23c TL |
4 | #pragma once |
5 | ||
11fdf7f2 TL |
6 | #include <map> |
7 | ||
f67539c2 | 8 | #include "include/Context.h" |
11fdf7f2 TL |
9 | #include "include/int_types.h" |
10 | #include "include/buffer.h" | |
f67539c2 | 11 | |
11fdf7f2 TL |
12 | #include "osd/osd_types.h" |
13 | ||
9f95a23c TL |
14 | #define OPS_PER_PTR 32 |
15 | ||
16 | void decode_str_str_map_to_bl(ceph::buffer::list::const_iterator& p, ceph::buffer::list *out); | |
17 | void decode_str_set_to_bl(ceph::buffer::list::const_iterator& p, ceph::buffer::list *out); | |
18 | ||
19 | ||
11fdf7f2 TL |
20 | /********************************* |
21 | * transaction | |
22 | * | |
23 | * A Transaction represents a sequence of primitive mutation | |
24 | * operations. | |
25 | * | |
26 | * Three events in the life of a Transaction result in | |
27 | * callbacks. Any Transaction can contain any number of callback | |
28 | * objects (Context) for any combination of the three classes of | |
29 | * callbacks: | |
30 | * | |
31 | * on_applied_sync, on_applied, and on_commit. | |
32 | * | |
33 | * The "on_applied" and "on_applied_sync" callbacks are invoked when | |
34 | * the modifications requested by the Transaction are visible to | |
35 | * subsequent ObjectStore operations, i.e., the results are | |
36 | * readable. The only conceptual difference between on_applied and | |
37 | * on_applied_sync is the specific thread and locking environment in | |
38 | * which the callbacks operate. "on_applied_sync" is called | |
39 | * directly by an ObjectStore execution thread. It is expected to | |
40 | * execute quickly and must not acquire any locks of the calling | |
41 | * environment. Conversely, "on_applied" is called from the separate | |
42 | * Finisher thread, meaning that it can contend for calling | |
43 | * environment locks. NB, on_applied and on_applied_sync are | |
44 | * sometimes called on_readable and on_readable_sync. | |
45 | * | |
46 | * The "on_commit" callback is also called from the Finisher thread | |
47 | * and indicates that all of the mutations have been durably | |
48 | * committed to stable storage (i.e., are now software/hardware | |
49 | * crashproof). | |
50 | * | |
51 | * At the implementation level, each mutation primitive (and its | |
52 | * associated data) can be serialized to a single buffer. That | |
53 | * serialization, however, does not copy any data, but (using the | |
9f95a23c | 54 | * ceph::buffer::list library) will reference the original buffers. This |
11fdf7f2 TL |
55 | * implies that the buffer that contains the data being submitted |
56 | * must remain stable until the on_commit callback completes. In | |
9f95a23c | 57 | * practice, ceph::buffer::list handles all of this for you and this |
11fdf7f2 TL |
58 | * subtlety is only relevant if you are referencing an existing |
59 | * buffer via buffer::raw_static. | |
60 | * | |
61 | * Some implementations of ObjectStore choose to implement their own | |
62 | * form of journaling that uses the serialized form of a | |
63 | * Transaction. This requires that the encode/decode logic properly | |
64 | * version itself and handle version upgrades that might change the | |
65 | * format of the encoded Transaction. This has already happened a | |
66 | * couple of times and the Transaction object contains some helper | |
67 | * variables that aid in this legacy decoding: | |
68 | * | |
69 | * sobject_encoding detects an older/simpler version of oid | |
70 | * present in pre-bobtail versions of ceph. use_pool_override | |
71 | * also detects a situation where the pool of an oid can be | |
72 | * overridden for legacy operations/buffers. For non-legacy | |
73 | * implementations of ObjectStore, neither of these fields are | |
74 | * relevant. | |
75 | * | |
76 | * | |
77 | * TRANSACTION ISOLATION | |
78 | * | |
79 | * Except as noted above, isolation is the responsibility of the | |
80 | * caller. In other words, if any storage element (storage element | |
81 | * == any of the four portions of an object as described above) is | |
82 | * altered by a transaction (including deletion), the caller | |
83 | * promises not to attempt to read that element while the | |
84 | * transaction is pending (here pending means from the time of | |
85 | * issuance until the "on_applied_sync" callback has been | |
86 | * received). Violations of isolation need not be detected by | |
87 | * ObjectStore and there is no corresponding error mechanism for | |
88 | * reporting an isolation violation (crashing would be the | |
89 | * appropriate way to report an isolation violation if detected). | |
90 | * | |
91 | * Enumeration operations may violate transaction isolation as | |
92 | * described above when a storage element is being created or | |
93 | * deleted as part of a transaction. In this case, ObjectStore is | |
94 | * allowed to consider the enumeration operation to either precede | |
95 | * or follow the violating transaction element. In other words, the | |
96 | * presence/absence of the mutated element in the enumeration is | |
97 | * entirely at the discretion of ObjectStore. The arbitrary ordering | |
98 | * applies independently to each transaction element. For example, | |
99 | * if a transaction contains two mutating elements "create A" and | |
100 | * "delete B". And an enumeration operation is performed while this | |
101 | * transaction is pending. It is permissible for ObjectStore to | |
102 | * report any of the four possible combinations of the existence of | |
103 | * A and B. | |
104 | * | |
105 | */ | |
106 | namespace ceph::os { | |
107 | class Transaction { | |
108 | public: | |
109 | enum { | |
110 | OP_NOP = 0, | |
9f95a23c | 111 | OP_CREATE = 7, // cid, oid |
11fdf7f2 TL |
112 | OP_TOUCH = 9, // cid, oid |
113 | OP_WRITE = 10, // cid, oid, offset, len, bl | |
114 | OP_ZERO = 11, // cid, oid, offset, len | |
115 | OP_TRUNCATE = 12, // cid, oid, len | |
116 | OP_REMOVE = 13, // cid, oid | |
117 | OP_SETATTR = 14, // cid, oid, attrname, bl | |
118 | OP_SETATTRS = 15, // cid, oid, attrset | |
119 | OP_RMATTR = 16, // cid, oid, attrname | |
120 | OP_CLONE = 17, // cid, oid, newoid | |
121 | OP_CLONERANGE = 18, // cid, oid, newoid, offset, len | |
122 | OP_CLONERANGE2 = 30, // cid, oid, newoid, srcoff, len, dstoff | |
123 | ||
124 | OP_TRIMCACHE = 19, // cid, oid, offset, len **DEPRECATED** | |
125 | ||
126 | OP_MKCOLL = 20, // cid | |
127 | OP_RMCOLL = 21, // cid | |
128 | OP_COLL_ADD = 22, // cid, oldcid, oid | |
129 | OP_COLL_REMOVE = 23, // cid, oid | |
130 | OP_COLL_SETATTR = 24, // cid, attrname, bl | |
131 | OP_COLL_RMATTR = 25, // cid, attrname | |
132 | OP_COLL_SETATTRS = 26, // cid, attrset | |
133 | OP_COLL_MOVE = 8, // newcid, oldcid, oid | |
134 | ||
135 | OP_RMATTRS = 28, // cid, oid | |
136 | OP_COLL_RENAME = 29, // cid, newcid | |
137 | ||
138 | OP_OMAP_CLEAR = 31, // cid | |
139 | OP_OMAP_SETKEYS = 32, // cid, attrset | |
140 | OP_OMAP_RMKEYS = 33, // cid, keyset | |
141 | OP_OMAP_SETHEADER = 34, // cid, header | |
142 | OP_SPLIT_COLLECTION = 35, // cid, bits, destination | |
143 | OP_SPLIT_COLLECTION2 = 36, /* cid, bits, destination | |
9f95a23c | 144 | doesn't create the destination */ |
11fdf7f2 TL |
145 | OP_OMAP_RMKEYRANGE = 37, // cid, oid, firstkey, lastkey |
146 | OP_COLL_MOVE_RENAME = 38, // oldcid, oldoid, newcid, newoid | |
9f95a23c | 147 | |
11fdf7f2 TL |
148 | OP_SETALLOCHINT = 39, // cid, oid, object_size, write_size |
149 | OP_COLL_HINT = 40, // cid, type, bl | |
150 | ||
151 | OP_TRY_RENAME = 41, // oldcid, oldoid, newoid | |
152 | ||
153 | OP_COLL_SET_BITS = 42, // cid, bits | |
154 | ||
155 | OP_MERGE_COLLECTION = 43, // cid, destination | |
156 | }; | |
157 | ||
158 | // Transaction hint type | |
159 | enum { | |
160 | COLL_HINT_EXPECTED_NUM_OBJECTS = 1, | |
161 | }; | |
162 | ||
163 | struct Op { | |
eafe8130 TL |
164 | ceph_le32 op; |
165 | ceph_le32 cid; | |
166 | ceph_le32 oid; | |
167 | ceph_le64 off; | |
168 | ceph_le64 len; | |
169 | ceph_le32 dest_cid; | |
170 | ceph_le32 dest_oid; //OP_CLONE, OP_CLONERANGE | |
171 | ceph_le64 dest_off; //OP_CLONERANGE | |
f67539c2 | 172 | ceph_le32 hint; //OP_COLL_HINT,OP_SETALLOCHINT |
eafe8130 TL |
173 | ceph_le64 expected_object_size; //OP_SETALLOCHINT |
174 | ceph_le64 expected_write_size; //OP_SETALLOCHINT | |
175 | ceph_le32 split_bits; //OP_SPLIT_COLLECTION2,OP_COLL_SET_BITS, | |
11fdf7f2 | 176 | //OP_MKCOLL |
eafe8130 | 177 | ceph_le32 split_rem; //OP_SPLIT_COLLECTION2 |
11fdf7f2 TL |
178 | } __attribute__ ((packed)) ; |
179 | ||
180 | struct TransactionData { | |
eafe8130 TL |
181 | ceph_le64 ops; |
182 | ceph_le32 largest_data_len; | |
183 | ceph_le32 largest_data_off; | |
184 | ceph_le32 largest_data_off_in_data_bl; | |
185 | ceph_le32 fadvise_flags; | |
11fdf7f2 TL |
186 | |
187 | TransactionData() noexcept : | |
20effc67 TL |
188 | ops(0), |
189 | largest_data_len(0), | |
190 | largest_data_off(0), | |
191 | largest_data_off_in_data_bl(0), | |
192 | fadvise_flags(0) { } | |
11fdf7f2 | 193 | |
9f95a23c | 194 | // override default move operations to reset default values |
11fdf7f2 | 195 | TransactionData(TransactionData&& other) noexcept : |
9f95a23c | 196 | ops(other.ops), |
11fdf7f2 TL |
197 | largest_data_len(other.largest_data_len), |
198 | largest_data_off(other.largest_data_off), | |
199 | largest_data_off_in_data_bl(other.largest_data_off_in_data_bl), | |
200 | fadvise_flags(other.fadvise_flags) { | |
201 | other.ops = 0; | |
202 | other.largest_data_len = 0; | |
203 | other.largest_data_off = 0; | |
204 | other.largest_data_off_in_data_bl = 0; | |
205 | other.fadvise_flags = 0; | |
206 | } | |
207 | TransactionData& operator=(TransactionData&& other) noexcept { | |
208 | ops = other.ops; | |
209 | largest_data_len = other.largest_data_len; | |
210 | largest_data_off = other.largest_data_off; | |
211 | largest_data_off_in_data_bl = other.largest_data_off_in_data_bl; | |
212 | fadvise_flags = other.fadvise_flags; | |
213 | other.ops = 0; | |
214 | other.largest_data_len = 0; | |
215 | other.largest_data_off = 0; | |
216 | other.largest_data_off_in_data_bl = 0; | |
217 | other.fadvise_flags = 0; | |
218 | return *this; | |
219 | } | |
220 | ||
221 | TransactionData(const TransactionData& other) = default; | |
222 | TransactionData& operator=(const TransactionData& other) = default; | |
223 | ||
9f95a23c | 224 | void encode(ceph::buffer::list& bl) const { |
11fdf7f2 TL |
225 | bl.append((char*)this, sizeof(TransactionData)); |
226 | } | |
9f95a23c | 227 | void decode(ceph::buffer::list::const_iterator &bl) { |
11fdf7f2 TL |
228 | bl.copy(sizeof(TransactionData), (char*)this); |
229 | } | |
230 | } __attribute__ ((packed)) ; | |
231 | ||
232 | private: | |
233 | TransactionData data; | |
234 | ||
9f95a23c TL |
235 | std::map<coll_t, uint32_t> coll_index; |
236 | std::map<ghobject_t, uint32_t> object_index; | |
11fdf7f2 | 237 | |
9f95a23c TL |
238 | uint32_t coll_id = 0; |
239 | uint32_t object_id = 0; | |
11fdf7f2 | 240 | |
9f95a23c TL |
241 | ceph::buffer::list data_bl; |
242 | ceph::buffer::list op_bl; | |
11fdf7f2 TL |
243 | |
244 | std::list<Context *> on_applied; | |
245 | std::list<Context *> on_commit; | |
246 | std::list<Context *> on_applied_sync; | |
247 | ||
248 | public: | |
249 | Transaction() = default; | |
250 | ||
9f95a23c | 251 | explicit Transaction(ceph::buffer::list::const_iterator &dp) { |
11fdf7f2 TL |
252 | decode(dp); |
253 | } | |
9f95a23c | 254 | explicit Transaction(ceph::buffer::list &nbl) { |
11fdf7f2 TL |
255 | auto dp = nbl.cbegin(); |
256 | decode(dp); | |
257 | } | |
258 | ||
259 | // override default move operations to reset default values | |
260 | Transaction(Transaction&& other) noexcept : | |
261 | data(std::move(other.data)), | |
262 | coll_index(std::move(other.coll_index)), | |
263 | object_index(std::move(other.object_index)), | |
264 | coll_id(other.coll_id), | |
265 | object_id(other.object_id), | |
266 | data_bl(std::move(other.data_bl)), | |
267 | op_bl(std::move(other.op_bl)), | |
268 | on_applied(std::move(other.on_applied)), | |
269 | on_commit(std::move(other.on_commit)), | |
270 | on_applied_sync(std::move(other.on_applied_sync)) { | |
271 | other.coll_id = 0; | |
272 | other.object_id = 0; | |
273 | } | |
274 | ||
275 | Transaction& operator=(Transaction&& other) noexcept { | |
276 | data = std::move(other.data); | |
277 | coll_index = std::move(other.coll_index); | |
278 | object_index = std::move(other.object_index); | |
279 | coll_id = other.coll_id; | |
280 | object_id = other.object_id; | |
281 | data_bl = std::move(other.data_bl); | |
282 | op_bl = std::move(other.op_bl); | |
283 | on_applied = std::move(other.on_applied); | |
284 | on_commit = std::move(other.on_commit); | |
285 | on_applied_sync = std::move(other.on_applied_sync); | |
286 | other.coll_id = 0; | |
287 | other.object_id = 0; | |
288 | return *this; | |
289 | } | |
290 | ||
291 | Transaction(const Transaction& other) = default; | |
292 | Transaction& operator=(const Transaction& other) = default; | |
293 | ||
294 | // expose object_index for FileStore::Op's benefit | |
9f95a23c | 295 | const std::map<ghobject_t, uint32_t>& get_object_index() const { |
11fdf7f2 TL |
296 | return object_index; |
297 | } | |
298 | ||
299 | /* Operations on callback contexts */ | |
300 | void register_on_applied(Context *c) { | |
301 | if (!c) return; | |
302 | on_applied.push_back(c); | |
303 | } | |
304 | void register_on_commit(Context *c) { | |
305 | if (!c) return; | |
306 | on_commit.push_back(c); | |
307 | } | |
308 | void register_on_applied_sync(Context *c) { | |
309 | if (!c) return; | |
310 | on_applied_sync.push_back(c); | |
311 | } | |
312 | void register_on_complete(Context *c) { | |
313 | if (!c) return; | |
314 | RunOnDeleteRef _complete (std::make_shared<RunOnDelete>(c)); | |
315 | register_on_applied(new ContainerContext<RunOnDeleteRef>(_complete)); | |
316 | register_on_commit(new ContainerContext<RunOnDeleteRef>(_complete)); | |
317 | } | |
318 | bool has_contexts() const { | |
319 | return | |
9f95a23c TL |
320 | !on_commit.empty() || |
321 | !on_applied.empty() || | |
322 | !on_applied_sync.empty(); | |
11fdf7f2 TL |
323 | } |
324 | ||
9f95a23c TL |
325 | static void collect_contexts( |
326 | std::vector<Transaction>& t, | |
327 | Context **out_on_applied, | |
328 | Context **out_on_commit, | |
329 | Context **out_on_applied_sync) { | |
11fdf7f2 TL |
330 | ceph_assert(out_on_applied); |
331 | ceph_assert(out_on_commit); | |
332 | ceph_assert(out_on_applied_sync); | |
333 | std::list<Context *> on_applied, on_commit, on_applied_sync; | |
334 | for (auto& i : t) { | |
9f95a23c TL |
335 | on_applied.splice(on_applied.end(), i.on_applied); |
336 | on_commit.splice(on_commit.end(), i.on_commit); | |
337 | on_applied_sync.splice(on_applied_sync.end(), i.on_applied_sync); | |
11fdf7f2 TL |
338 | } |
339 | *out_on_applied = C_Contexts::list_to_context(on_applied); | |
340 | *out_on_commit = C_Contexts::list_to_context(on_commit); | |
341 | *out_on_applied_sync = C_Contexts::list_to_context(on_applied_sync); | |
342 | } | |
9f95a23c TL |
343 | static void collect_contexts( |
344 | std::vector<Transaction>& t, | |
345 | std::list<Context*> *out_on_applied, | |
346 | std::list<Context*> *out_on_commit, | |
347 | std::list<Context*> *out_on_applied_sync) { | |
11fdf7f2 TL |
348 | ceph_assert(out_on_applied); |
349 | ceph_assert(out_on_commit); | |
350 | ceph_assert(out_on_applied_sync); | |
351 | for (auto& i : t) { | |
9f95a23c TL |
352 | out_on_applied->splice(out_on_applied->end(), i.on_applied); |
353 | out_on_commit->splice(out_on_commit->end(), i.on_commit); | |
354 | out_on_applied_sync->splice(out_on_applied_sync->end(), | |
355 | i.on_applied_sync); | |
11fdf7f2 TL |
356 | } |
357 | } | |
9f95a23c TL |
358 | static Context *collect_all_contexts( |
359 | Transaction& t) { | |
f67539c2 | 360 | std::list<Context*> contexts; |
9f95a23c TL |
361 | contexts.splice(contexts.end(), t.on_applied); |
362 | contexts.splice(contexts.end(), t.on_commit); | |
363 | contexts.splice(contexts.end(), t.on_applied_sync); | |
364 | return C_Contexts::list_to_context(contexts); | |
365 | } | |
11fdf7f2 TL |
366 | |
367 | Context *get_on_applied() { | |
368 | return C_Contexts::list_to_context(on_applied); | |
369 | } | |
370 | Context *get_on_commit() { | |
371 | return C_Contexts::list_to_context(on_commit); | |
372 | } | |
373 | Context *get_on_applied_sync() { | |
374 | return C_Contexts::list_to_context(on_applied_sync); | |
375 | } | |
376 | ||
377 | void set_fadvise_flags(uint32_t flags) { | |
378 | data.fadvise_flags = flags; | |
379 | } | |
380 | void set_fadvise_flag(uint32_t flag) { | |
381 | data.fadvise_flags = data.fadvise_flags | flag; | |
382 | } | |
383 | uint32_t get_fadvise_flags() { return data.fadvise_flags; } | |
384 | ||
385 | void swap(Transaction& other) noexcept { | |
386 | std::swap(data, other.data); | |
387 | std::swap(on_applied, other.on_applied); | |
388 | std::swap(on_commit, other.on_commit); | |
389 | std::swap(on_applied_sync, other.on_applied_sync); | |
9f95a23c | 390 | |
11fdf7f2 TL |
391 | std::swap(coll_index, other.coll_index); |
392 | std::swap(object_index, other.object_index); | |
393 | std::swap(coll_id, other.coll_id); | |
394 | std::swap(object_id, other.object_id); | |
395 | op_bl.swap(other.op_bl); | |
396 | data_bl.swap(other.data_bl); | |
397 | } | |
398 | ||
399 | void _update_op(Op* op, | |
9f95a23c TL |
400 | std::vector<uint32_t> &cm, |
401 | std::vector<uint32_t> &om) { | |
11fdf7f2 TL |
402 | |
403 | switch (op->op) { | |
404 | case OP_NOP: | |
405 | break; | |
406 | ||
9f95a23c | 407 | case OP_CREATE: |
11fdf7f2 TL |
408 | case OP_TOUCH: |
409 | case OP_REMOVE: | |
410 | case OP_SETATTR: | |
411 | case OP_SETATTRS: | |
412 | case OP_RMATTR: | |
413 | case OP_RMATTRS: | |
414 | case OP_COLL_REMOVE: | |
415 | case OP_OMAP_CLEAR: | |
416 | case OP_OMAP_SETKEYS: | |
417 | case OP_OMAP_RMKEYS: | |
418 | case OP_OMAP_RMKEYRANGE: | |
419 | case OP_OMAP_SETHEADER: | |
420 | case OP_WRITE: | |
421 | case OP_ZERO: | |
422 | case OP_TRUNCATE: | |
423 | case OP_SETALLOCHINT: | |
424 | ceph_assert(op->cid < cm.size()); | |
425 | ceph_assert(op->oid < om.size()); | |
426 | op->cid = cm[op->cid]; | |
427 | op->oid = om[op->oid]; | |
428 | break; | |
429 | ||
430 | case OP_CLONERANGE2: | |
431 | case OP_CLONE: | |
432 | ceph_assert(op->cid < cm.size()); | |
433 | ceph_assert(op->oid < om.size()); | |
434 | ceph_assert(op->dest_oid < om.size()); | |
435 | op->cid = cm[op->cid]; | |
436 | op->oid = om[op->oid]; | |
437 | op->dest_oid = om[op->dest_oid]; | |
438 | break; | |
439 | ||
440 | case OP_MKCOLL: | |
441 | case OP_RMCOLL: | |
442 | case OP_COLL_SETATTR: | |
443 | case OP_COLL_RMATTR: | |
444 | case OP_COLL_SETATTRS: | |
445 | case OP_COLL_HINT: | |
446 | case OP_COLL_SET_BITS: | |
447 | ceph_assert(op->cid < cm.size()); | |
448 | op->cid = cm[op->cid]; | |
449 | break; | |
450 | ||
451 | case OP_COLL_ADD: | |
452 | ceph_assert(op->cid < cm.size()); | |
453 | ceph_assert(op->oid < om.size()); | |
454 | ceph_assert(op->dest_cid < om.size()); | |
455 | op->cid = cm[op->cid]; | |
456 | op->dest_cid = cm[op->dest_cid]; | |
457 | op->oid = om[op->oid]; | |
458 | break; | |
9f95a23c | 459 | |
11fdf7f2 TL |
460 | case OP_COLL_MOVE_RENAME: |
461 | ceph_assert(op->cid < cm.size()); | |
462 | ceph_assert(op->oid < om.size()); | |
463 | ceph_assert(op->dest_cid < cm.size()); | |
464 | ceph_assert(op->dest_oid < om.size()); | |
465 | op->cid = cm[op->cid]; | |
466 | op->oid = om[op->oid]; | |
467 | op->dest_cid = cm[op->dest_cid]; | |
468 | op->dest_oid = om[op->dest_oid]; | |
469 | break; | |
9f95a23c | 470 | |
11fdf7f2 TL |
471 | case OP_TRY_RENAME: |
472 | ceph_assert(op->cid < cm.size()); | |
473 | ceph_assert(op->oid < om.size()); | |
474 | ceph_assert(op->dest_oid < om.size()); | |
475 | op->cid = cm[op->cid]; | |
476 | op->oid = om[op->oid]; | |
477 | op->dest_oid = om[op->dest_oid]; | |
9f95a23c TL |
478 | break; |
479 | ||
11fdf7f2 TL |
480 | case OP_SPLIT_COLLECTION2: |
481 | ceph_assert(op->cid < cm.size()); | |
9f95a23c | 482 | ceph_assert(op->dest_cid < cm.size()); |
11fdf7f2 TL |
483 | op->cid = cm[op->cid]; |
484 | op->dest_cid = cm[op->dest_cid]; | |
485 | break; | |
9f95a23c | 486 | |
11fdf7f2 TL |
487 | case OP_MERGE_COLLECTION: |
488 | ceph_assert(op->cid < cm.size()); | |
9f95a23c | 489 | ceph_assert(op->dest_cid < cm.size()); |
11fdf7f2 TL |
490 | op->cid = cm[op->cid]; |
491 | op->dest_cid = cm[op->dest_cid]; | |
492 | break; | |
9f95a23c | 493 | |
11fdf7f2 TL |
494 | default: |
495 | ceph_abort_msg("Unknown OP"); | |
496 | } | |
497 | } | |
498 | void _update_op_bl( | |
9f95a23c TL |
499 | ceph::buffer::list& bl, |
500 | std::vector<uint32_t> &cm, | |
501 | std::vector<uint32_t> &om) { | |
11fdf7f2 TL |
502 | for (auto& bp : bl.buffers()) { |
503 | ceph_assert(bp.length() % sizeof(Op) == 0); | |
9f95a23c | 504 | |
11fdf7f2 TL |
505 | char* raw_p = const_cast<char*>(bp.c_str()); |
506 | char* raw_end = raw_p + bp.length(); | |
507 | while (raw_p < raw_end) { | |
9f95a23c TL |
508 | _update_op(reinterpret_cast<Op*>(raw_p), cm, om); |
509 | raw_p += sizeof(Op); | |
11fdf7f2 TL |
510 | } |
511 | } | |
512 | } | |
9f95a23c | 513 | /// Append the operations of the parameter to this Transaction. Those operations are removed from the parameter Transaction |
11fdf7f2 TL |
514 | void append(Transaction& other) { |
515 | ||
eafe8130 | 516 | data.ops = data.ops + other.data.ops; |
11fdf7f2 | 517 | if (other.data.largest_data_len > data.largest_data_len) { |
9f95a23c TL |
518 | data.largest_data_len = other.data.largest_data_len; |
519 | data.largest_data_off = other.data.largest_data_off; | |
520 | data.largest_data_off_in_data_bl = data_bl.length() + other.data.largest_data_off_in_data_bl; | |
11fdf7f2 | 521 | } |
eafe8130 | 522 | data.fadvise_flags = data.fadvise_flags | other.data.fadvise_flags; |
11fdf7f2 TL |
523 | on_applied.splice(on_applied.end(), other.on_applied); |
524 | on_commit.splice(on_commit.end(), other.on_commit); | |
525 | on_applied_sync.splice(on_applied_sync.end(), other.on_applied_sync); | |
9f95a23c | 526 | |
11fdf7f2 | 527 | //append coll_index & object_index |
9f95a23c TL |
528 | std::vector<uint32_t> cm(other.coll_index.size()); |
529 | std::map<coll_t, uint32_t>::iterator coll_index_p; | |
11fdf7f2 | 530 | for (coll_index_p = other.coll_index.begin(); |
9f95a23c TL |
531 | coll_index_p != other.coll_index.end(); |
532 | ++coll_index_p) { | |
11fdf7f2 TL |
533 | cm[coll_index_p->second] = _get_coll_id(coll_index_p->first); |
534 | } | |
9f95a23c TL |
535 | |
536 | std::vector<uint32_t> om(other.object_index.size()); | |
537 | std::map<ghobject_t, uint32_t>::iterator object_index_p; | |
11fdf7f2 | 538 | for (object_index_p = other.object_index.begin(); |
9f95a23c TL |
539 | object_index_p != other.object_index.end(); |
540 | ++object_index_p) { | |
11fdf7f2 | 541 | om[object_index_p->second] = _get_object_id(object_index_p->first); |
9f95a23c TL |
542 | } |
543 | ||
11fdf7f2 | 544 | //the other.op_bl SHOULD NOT be changes during append operation, |
9f95a23c TL |
545 | //we use additional ceph::buffer::list to avoid this problem |
546 | ceph::buffer::list other_op_bl; | |
11fdf7f2 | 547 | { |
9f95a23c TL |
548 | ceph::buffer::ptr other_op_bl_ptr(other.op_bl.length()); |
549 | other.op_bl.begin().copy(other.op_bl.length(), other_op_bl_ptr.c_str()); | |
11fdf7f2 TL |
550 | other_op_bl.append(std::move(other_op_bl_ptr)); |
551 | } | |
9f95a23c | 552 | |
11fdf7f2 TL |
553 | //update other_op_bl with cm & om |
554 | //When the other is appended to current transaction, all coll_index and | |
555 | //object_index in other.op_buffer should be updated by new index of the | |
556 | //combined transaction | |
557 | _update_op_bl(other_op_bl, cm, om); | |
9f95a23c | 558 | |
11fdf7f2 TL |
559 | //append op_bl |
560 | op_bl.append(other_op_bl); | |
561 | //append data_bl | |
562 | data_bl.append(other.data_bl); | |
563 | } | |
9f95a23c | 564 | |
11fdf7f2 | 565 | /** Inquires about the Transaction as a whole. */ |
9f95a23c | 566 | |
11fdf7f2 TL |
567 | /// How big is the encoded Transaction buffer? |
568 | uint64_t get_encoded_bytes() { | |
569 | //layout: data_bl + op_bl + coll_index + object_index + data | |
9f95a23c | 570 | |
11fdf7f2 TL |
571 | // coll_index size, object_index size and sizeof(transaction_data) |
572 | // all here, so they may be computed at compile-time | |
573 | size_t final_size = sizeof(__u32) * 2 + sizeof(data); | |
9f95a23c | 574 | |
11fdf7f2 | 575 | // coll_index second and object_index second |
9f95a23c TL |
576 | final_size += (coll_index.size() + object_index.size()) * sizeof(__u32); |
577 | ||
11fdf7f2 TL |
578 | // coll_index first |
579 | for (auto p = coll_index.begin(); p != coll_index.end(); ++p) { | |
9f95a23c | 580 | final_size += p->first.encoded_size(); |
11fdf7f2 | 581 | } |
9f95a23c | 582 | |
11fdf7f2 TL |
583 | // object_index first |
584 | for (auto p = object_index.begin(); p != object_index.end(); ++p) { | |
9f95a23c | 585 | final_size += p->first.encoded_size(); |
11fdf7f2 | 586 | } |
9f95a23c | 587 | |
11fdf7f2 | 588 | return data_bl.length() + |
9f95a23c TL |
589 | op_bl.length() + |
590 | final_size; | |
11fdf7f2 | 591 | } |
9f95a23c | 592 | |
11fdf7f2 TL |
593 | /// Retain old version for regression testing purposes |
594 | uint64_t get_encoded_bytes_test() { | |
595 | using ceph::encode; | |
596 | //layout: data_bl + op_bl + coll_index + object_index + data | |
9f95a23c | 597 | ceph::buffer::list bl; |
11fdf7f2 TL |
598 | encode(coll_index, bl); |
599 | encode(object_index, bl); | |
9f95a23c | 600 | |
11fdf7f2 | 601 | return data_bl.length() + |
9f95a23c TL |
602 | op_bl.length() + |
603 | bl.length() + | |
604 | sizeof(data); | |
11fdf7f2 | 605 | } |
9f95a23c | 606 | |
11fdf7f2 TL |
607 | uint64_t get_num_bytes() { |
608 | return get_encoded_bytes(); | |
609 | } | |
610 | /// Size of largest data buffer to the "write" operation encountered so far | |
611 | uint32_t get_data_length() { | |
612 | return data.largest_data_len; | |
613 | } | |
9f95a23c | 614 | /// offset within the encoded buffer to the start of the largest data buffer that's encoded |
11fdf7f2 TL |
615 | uint32_t get_data_offset() { |
616 | if (data.largest_data_off_in_data_bl) { | |
9f95a23c TL |
617 | return data.largest_data_off_in_data_bl + |
618 | sizeof(__u8) + // encode struct_v | |
619 | sizeof(__u8) + // encode compat_v | |
620 | sizeof(__u32) + // encode len | |
621 | sizeof(__u32); // data_bl len | |
11fdf7f2 TL |
622 | } |
623 | return 0; // none | |
624 | } | |
625 | /// offset of buffer as aligned to destination within object. | |
626 | int get_data_alignment() { | |
627 | if (!data.largest_data_len) | |
9f95a23c | 628 | return 0; |
11fdf7f2 TL |
629 | return (0 - get_data_offset()) & ~CEPH_PAGE_MASK; |
630 | } | |
631 | /// Is the Transaction empty (no operations) | |
632 | bool empty() { | |
633 | return !data.ops; | |
634 | } | |
635 | /// Number of operations in the transaction | |
636 | int get_num_ops() { | |
637 | return data.ops; | |
638 | } | |
9f95a23c | 639 | |
11fdf7f2 TL |
640 | /** |
641 | * iterator | |
642 | * | |
643 | * Helper object to parse Transactions. | |
644 | * | |
645 | * ObjectStore instances use this object to step down the encoded | |
646 | * buffer decoding operation codes and parameters as we go. | |
647 | * | |
648 | */ | |
649 | class iterator { | |
650 | Transaction *t; | |
9f95a23c | 651 | |
11fdf7f2 TL |
652 | uint64_t ops; |
653 | char* op_buffer_p; | |
9f95a23c TL |
654 | |
655 | ceph::buffer::list::const_iterator data_bl_p; | |
656 | ||
11fdf7f2 | 657 | public: |
9f95a23c TL |
658 | std::vector<coll_t> colls; |
659 | std::vector<ghobject_t> objects; | |
660 | ||
11fdf7f2 TL |
661 | private: |
662 | explicit iterator(Transaction *t) | |
663 | : t(t), | |
9f95a23c TL |
664 | data_bl_p(t->data_bl.cbegin()), |
665 | colls(t->coll_index.size()), | |
666 | objects(t->object_index.size()) { | |
667 | ||
11fdf7f2 TL |
668 | ops = t->data.ops; |
669 | op_buffer_p = t->op_bl.c_str(); | |
9f95a23c TL |
670 | |
671 | std::map<coll_t, uint32_t>::iterator coll_index_p; | |
11fdf7f2 | 672 | for (coll_index_p = t->coll_index.begin(); |
9f95a23c TL |
673 | coll_index_p != t->coll_index.end(); |
674 | ++coll_index_p) { | |
675 | colls[coll_index_p->second] = coll_index_p->first; | |
11fdf7f2 | 676 | } |
9f95a23c TL |
677 | |
678 | std::map<ghobject_t, uint32_t>::iterator object_index_p; | |
11fdf7f2 | 679 | for (object_index_p = t->object_index.begin(); |
9f95a23c TL |
680 | object_index_p != t->object_index.end(); |
681 | ++object_index_p) { | |
682 | objects[object_index_p->second] = object_index_p->first; | |
11fdf7f2 TL |
683 | } |
684 | } | |
9f95a23c | 685 | |
11fdf7f2 | 686 | friend class Transaction; |
9f95a23c | 687 | |
11fdf7f2 | 688 | public: |
9f95a23c | 689 | |
11fdf7f2 TL |
690 | bool have_op() { |
691 | return ops > 0; | |
692 | } | |
693 | Op* decode_op() { | |
694 | ceph_assert(ops > 0); | |
9f95a23c | 695 | |
11fdf7f2 TL |
696 | Op* op = reinterpret_cast<Op*>(op_buffer_p); |
697 | op_buffer_p += sizeof(Op); | |
698 | ops--; | |
9f95a23c | 699 | |
11fdf7f2 TL |
700 | return op; |
701 | } | |
9f95a23c TL |
702 | std::string decode_string() { |
703 | using ceph::decode; | |
704 | std::string s; | |
11fdf7f2 TL |
705 | decode(s, data_bl_p); |
706 | return s; | |
707 | } | |
9f95a23c TL |
708 | void decode_bp(ceph::buffer::ptr& bp) { |
709 | using ceph::decode; | |
11fdf7f2 TL |
710 | decode(bp, data_bl_p); |
711 | } | |
9f95a23c TL |
712 | void decode_bl(ceph::buffer::list& bl) { |
713 | using ceph::decode; | |
11fdf7f2 TL |
714 | decode(bl, data_bl_p); |
715 | } | |
9f95a23c TL |
716 | void decode_attrset(std::map<std::string,ceph::buffer::ptr>& aset) { |
717 | using ceph::decode; | |
11fdf7f2 TL |
718 | decode(aset, data_bl_p); |
719 | } | |
9f95a23c TL |
720 | void decode_attrset(std::map<std::string,ceph::buffer::list>& aset) { |
721 | using ceph::decode; | |
11fdf7f2 TL |
722 | decode(aset, data_bl_p); |
723 | } | |
9f95a23c TL |
724 | void decode_attrset_bl(ceph::buffer::list *pbl) { |
725 | decode_str_str_map_to_bl(data_bl_p, pbl); | |
726 | } | |
727 | void decode_keyset(std::set<std::string> &keys){ | |
728 | using ceph::decode; | |
11fdf7f2 TL |
729 | decode(keys, data_bl_p); |
730 | } | |
9f95a23c TL |
731 | void decode_keyset_bl(ceph::buffer::list *pbl){ |
732 | decode_str_set_to_bl(data_bl_p, pbl); | |
733 | } | |
734 | ||
735 | const ghobject_t &get_oid(uint32_t oid_id) { | |
11fdf7f2 TL |
736 | ceph_assert(oid_id < objects.size()); |
737 | return objects[oid_id]; | |
738 | } | |
9f95a23c | 739 | const coll_t &get_cid(uint32_t cid_id) { |
11fdf7f2 TL |
740 | ceph_assert(cid_id < colls.size()); |
741 | return colls[cid_id]; | |
742 | } | |
743 | uint32_t get_fadvise_flags() const { | |
9f95a23c | 744 | return t->get_fadvise_flags(); |
11fdf7f2 | 745 | } |
f67539c2 | 746 | |
20effc67 | 747 | const std::vector<ghobject_t> &get_objects() const { |
f67539c2 TL |
748 | return objects; |
749 | } | |
11fdf7f2 | 750 | }; |
9f95a23c | 751 | |
11fdf7f2 | 752 | iterator begin() { |
9f95a23c | 753 | return iterator(this); |
11fdf7f2 | 754 | } |
9f95a23c | 755 | |
11fdf7f2 TL |
756 | private: |
757 | void _build_actions_from_tbl(); | |
758 | ||
11fdf7f2 TL |
759 | /** |
760 | * Helper functions to encode the various mutation elements of a | |
761 | * transaction. These are 1:1 with the operation codes (see | |
762 | * enumeration above). These routines ensure that the | |
763 | * encoder/creator of a transaction gets the right data in the | |
764 | * right place. Sadly, there's no corresponding version nor any | |
765 | * form of seat belts for the decoder. | |
766 | */ | |
767 | Op* _get_next_op() { | |
768 | if (op_bl.get_append_buffer_unused_tail_length() < sizeof(Op)) { | |
769 | op_bl.reserve(sizeof(Op) * OPS_PER_PTR); | |
770 | } | |
771 | // append_hole ensures bptr merging. Even huge number of ops | |
772 | // shouldn't result in overpopulating bl::_buffers. | |
773 | char* const p = op_bl.append_hole(sizeof(Op)).c_str(); | |
774 | memset(p, 0, sizeof(Op)); | |
775 | return reinterpret_cast<Op*>(p); | |
776 | } | |
9f95a23c TL |
777 | uint32_t _get_coll_id(const coll_t& coll) { |
778 | std::map<coll_t, uint32_t>::iterator c = coll_index.find(coll); | |
11fdf7f2 TL |
779 | if (c != coll_index.end()) |
780 | return c->second; | |
9f95a23c TL |
781 | |
782 | uint32_t index_id = coll_id++; | |
11fdf7f2 TL |
783 | coll_index[coll] = index_id; |
784 | return index_id; | |
785 | } | |
9f95a23c TL |
786 | uint32_t _get_object_id(const ghobject_t& oid) { |
787 | std::map<ghobject_t, uint32_t>::iterator o = object_index.find(oid); | |
11fdf7f2 TL |
788 | if (o != object_index.end()) |
789 | return o->second; | |
9f95a23c TL |
790 | |
791 | uint32_t index_id = object_id++; | |
11fdf7f2 TL |
792 | object_index[oid] = index_id; |
793 | return index_id; | |
794 | } | |
9f95a23c | 795 | |
11fdf7f2 TL |
796 | public: |
797 | /// noop. 'nuf said | |
798 | void nop() { | |
799 | Op* _op = _get_next_op(); | |
800 | _op->op = OP_NOP; | |
eafe8130 | 801 | data.ops = data.ops + 1; |
11fdf7f2 | 802 | } |
9f95a23c TL |
803 | /** |
804 | * create | |
805 | * | |
806 | * create an object that does not yet exist | |
807 | * (behavior is undefined if the object already exists) | |
808 | */ | |
809 | void create(const coll_t& cid, const ghobject_t& oid) { | |
810 | Op* _op = _get_next_op(); | |
811 | _op->op = OP_CREATE; | |
812 | _op->cid = _get_coll_id(cid); | |
813 | _op->oid = _get_object_id(oid); | |
814 | data.ops = data.ops + 1; | |
815 | } | |
11fdf7f2 TL |
816 | /** |
817 | * touch | |
818 | * | |
819 | * Ensure the existance of an object in a collection. Create an | |
820 | * empty object if necessary | |
821 | */ | |
822 | void touch(const coll_t& cid, const ghobject_t& oid) { | |
823 | Op* _op = _get_next_op(); | |
824 | _op->op = OP_TOUCH; | |
825 | _op->cid = _get_coll_id(cid); | |
826 | _op->oid = _get_object_id(oid); | |
eafe8130 | 827 | data.ops = data.ops + 1; |
11fdf7f2 TL |
828 | } |
829 | /** | |
830 | * Write data to an offset within an object. If the object is too | |
831 | * small, it is expanded as needed. It is possible to specify an | |
832 | * offset beyond the current end of an object and it will be | |
833 | * expanded as needed. Simple implementations of ObjectStore will | |
834 | * just zero the data between the old end of the object and the | |
835 | * newly provided data. More sophisticated implementations of | |
836 | * ObjectStore will omit the untouched data and store it as a | |
837 | * "hole" in the file. | |
838 | * | |
839 | * Note that a 0-length write does not affect the size of the object. | |
840 | */ | |
841 | void write(const coll_t& cid, const ghobject_t& oid, uint64_t off, uint64_t len, | |
9f95a23c | 842 | const ceph::buffer::list& write_data, uint32_t flags = 0) { |
11fdf7f2 TL |
843 | using ceph::encode; |
844 | uint32_t orig_len = data_bl.length(); | |
845 | Op* _op = _get_next_op(); | |
846 | _op->op = OP_WRITE; | |
847 | _op->cid = _get_coll_id(cid); | |
848 | _op->oid = _get_object_id(oid); | |
849 | _op->off = off; | |
850 | _op->len = len; | |
851 | encode(write_data, data_bl); | |
9f95a23c | 852 | |
11fdf7f2 TL |
853 | ceph_assert(len == write_data.length()); |
854 | data.fadvise_flags = data.fadvise_flags | flags; | |
855 | if (write_data.length() > data.largest_data_len) { | |
9f95a23c TL |
856 | data.largest_data_len = write_data.length(); |
857 | data.largest_data_off = off; | |
858 | data.largest_data_off_in_data_bl = orig_len + sizeof(__u32); // we are about to | |
11fdf7f2 | 859 | } |
eafe8130 | 860 | data.ops = data.ops + 1; |
11fdf7f2 TL |
861 | } |
862 | /** | |
863 | * zero out the indicated byte range within an object. Some | |
864 | * ObjectStore instances may optimize this to release the | |
865 | * underlying storage space. | |
866 | * | |
867 | * If the zero range extends beyond the end of the object, the object | |
868 | * size is extended, just as if we were writing a buffer full of zeros. | |
869 | * EXCEPT if the length is 0, in which case (just like a 0-length write) | |
870 | * we do not adjust the object size. | |
871 | */ | |
872 | void zero(const coll_t& cid, const ghobject_t& oid, uint64_t off, uint64_t len) { | |
873 | Op* _op = _get_next_op(); | |
874 | _op->op = OP_ZERO; | |
875 | _op->cid = _get_coll_id(cid); | |
876 | _op->oid = _get_object_id(oid); | |
877 | _op->off = off; | |
878 | _op->len = len; | |
eafe8130 | 879 | data.ops = data.ops + 1; |
11fdf7f2 TL |
880 | } |
881 | /// Discard all data in the object beyond the specified size. | |
882 | void truncate(const coll_t& cid, const ghobject_t& oid, uint64_t off) { | |
883 | Op* _op = _get_next_op(); | |
884 | _op->op = OP_TRUNCATE; | |
885 | _op->cid = _get_coll_id(cid); | |
886 | _op->oid = _get_object_id(oid); | |
887 | _op->off = off; | |
eafe8130 | 888 | data.ops = data.ops + 1; |
11fdf7f2 TL |
889 | } |
890 | /// Remove an object. All four parts of the object are removed. | |
891 | void remove(const coll_t& cid, const ghobject_t& oid) { | |
892 | Op* _op = _get_next_op(); | |
893 | _op->op = OP_REMOVE; | |
894 | _op->cid = _get_coll_id(cid); | |
895 | _op->oid = _get_object_id(oid); | |
eafe8130 | 896 | data.ops = data.ops + 1; |
11fdf7f2 TL |
897 | } |
898 | /// Set an xattr of an object | |
9f95a23c TL |
899 | void setattr(const coll_t& cid, const ghobject_t& oid, const char* name, ceph::buffer::list& val) { |
900 | std::string n(name); | |
11fdf7f2 TL |
901 | setattr(cid, oid, n, val); |
902 | } | |
903 | /// Set an xattr of an object | |
9f95a23c | 904 | void setattr(const coll_t& cid, const ghobject_t& oid, const std::string& s, ceph::buffer::list& val) { |
11fdf7f2 TL |
905 | using ceph::encode; |
906 | Op* _op = _get_next_op(); | |
907 | _op->op = OP_SETATTR; | |
908 | _op->cid = _get_coll_id(cid); | |
909 | _op->oid = _get_object_id(oid); | |
910 | encode(s, data_bl); | |
911 | encode(val, data_bl); | |
eafe8130 | 912 | data.ops = data.ops + 1; |
11fdf7f2 TL |
913 | } |
914 | /// Set multiple xattrs of an object | |
20effc67 TL |
915 | void setattrs(const coll_t& cid, |
916 | const ghobject_t& oid, | |
917 | const std::map<std::string,ceph::buffer::ptr,std::less<>>& attrset) { | |
11fdf7f2 TL |
918 | using ceph::encode; |
919 | Op* _op = _get_next_op(); | |
920 | _op->op = OP_SETATTRS; | |
921 | _op->cid = _get_coll_id(cid); | |
922 | _op->oid = _get_object_id(oid); | |
923 | encode(attrset, data_bl); | |
eafe8130 | 924 | data.ops = data.ops + 1; |
11fdf7f2 TL |
925 | } |
926 | /// Set multiple xattrs of an object | |
20effc67 TL |
927 | void setattrs(const coll_t& cid, |
928 | const ghobject_t& oid, | |
929 | const std::map<std::string,ceph::buffer::list,std::less<>>& attrset) { | |
11fdf7f2 TL |
930 | using ceph::encode; |
931 | Op* _op = _get_next_op(); | |
932 | _op->op = OP_SETATTRS; | |
933 | _op->cid = _get_coll_id(cid); | |
934 | _op->oid = _get_object_id(oid); | |
935 | encode(attrset, data_bl); | |
eafe8130 | 936 | data.ops = data.ops + 1; |
11fdf7f2 TL |
937 | } |
938 | /// remove an xattr from an object | |
939 | void rmattr(const coll_t& cid, const ghobject_t& oid, const char *name) { | |
9f95a23c | 940 | std::string n(name); |
11fdf7f2 TL |
941 | rmattr(cid, oid, n); |
942 | } | |
943 | /// remove an xattr from an object | |
9f95a23c | 944 | void rmattr(const coll_t& cid, const ghobject_t& oid, const std::string& s) { |
11fdf7f2 TL |
945 | using ceph::encode; |
946 | Op* _op = _get_next_op(); | |
947 | _op->op = OP_RMATTR; | |
948 | _op->cid = _get_coll_id(cid); | |
949 | _op->oid = _get_object_id(oid); | |
950 | encode(s, data_bl); | |
eafe8130 | 951 | data.ops = data.ops + 1; |
11fdf7f2 TL |
952 | } |
953 | /// remove all xattrs from an object | |
954 | void rmattrs(const coll_t& cid, const ghobject_t& oid) { | |
955 | Op* _op = _get_next_op(); | |
956 | _op->op = OP_RMATTRS; | |
957 | _op->cid = _get_coll_id(cid); | |
958 | _op->oid = _get_object_id(oid); | |
eafe8130 | 959 | data.ops = data.ops + 1; |
11fdf7f2 TL |
960 | } |
961 | /** | |
962 | * Clone an object into another object. | |
963 | * | |
964 | * Low-cost (e.g., O(1)) cloning (if supported) is best, but | |
965 | * fallback to an O(n) copy is allowed. All four parts of the | |
966 | * object are cloned (data, xattrs, omap header, omap | |
967 | * entries). | |
968 | * | |
969 | * The destination named object may already exist, in | |
970 | * which case its previous contents are discarded. | |
971 | */ | |
972 | void clone(const coll_t& cid, const ghobject_t& oid, | |
9f95a23c | 973 | const ghobject_t& noid) { |
11fdf7f2 TL |
974 | Op* _op = _get_next_op(); |
975 | _op->op = OP_CLONE; | |
976 | _op->cid = _get_coll_id(cid); | |
977 | _op->oid = _get_object_id(oid); | |
978 | _op->dest_oid = _get_object_id(noid); | |
eafe8130 | 979 | data.ops = data.ops + 1; |
11fdf7f2 TL |
980 | } |
981 | /** | |
982 | * Clone a byte range from one object to another. | |
983 | * | |
984 | * The data portion of the destination object receives a copy of a | |
985 | * portion of the data from the source object. None of the other | |
986 | * three parts of an object is copied from the source. | |
987 | * | |
988 | * The destination object size may be extended to the dstoff + len. | |
989 | * | |
990 | * The source range *must* overlap with the source object data. If it does | |
991 | * not the result is undefined. | |
992 | */ | |
993 | void clone_range(const coll_t& cid, const ghobject_t& oid, | |
9f95a23c TL |
994 | const ghobject_t& noid, |
995 | uint64_t srcoff, uint64_t srclen, uint64_t dstoff) { | |
11fdf7f2 TL |
996 | Op* _op = _get_next_op(); |
997 | _op->op = OP_CLONERANGE2; | |
998 | _op->cid = _get_coll_id(cid); | |
999 | _op->oid = _get_object_id(oid); | |
1000 | _op->dest_oid = _get_object_id(noid); | |
1001 | _op->off = srcoff; | |
1002 | _op->len = srclen; | |
1003 | _op->dest_off = dstoff; | |
eafe8130 | 1004 | data.ops = data.ops + 1; |
11fdf7f2 | 1005 | } |
9f95a23c | 1006 | |
11fdf7f2 TL |
1007 | /// Create the collection |
1008 | void create_collection(const coll_t& cid, int bits) { | |
1009 | Op* _op = _get_next_op(); | |
1010 | _op->op = OP_MKCOLL; | |
1011 | _op->cid = _get_coll_id(cid); | |
1012 | _op->split_bits = bits; | |
eafe8130 | 1013 | data.ops = data.ops + 1; |
11fdf7f2 | 1014 | } |
9f95a23c | 1015 | |
11fdf7f2 TL |
1016 | /** |
1017 | * Give the collection a hint. | |
1018 | * | |
1019 | * @param cid - collection id. | |
1020 | * @param type - hint type. | |
1021 | * @param hint - the hint payload, which contains the customized | |
1022 | * data along with the hint type. | |
1023 | */ | |
9f95a23c | 1024 | void collection_hint(const coll_t& cid, uint32_t type, const ceph::buffer::list& hint) { |
11fdf7f2 TL |
1025 | using ceph::encode; |
1026 | Op* _op = _get_next_op(); | |
1027 | _op->op = OP_COLL_HINT; | |
1028 | _op->cid = _get_coll_id(cid); | |
f67539c2 | 1029 | _op->hint = type; |
11fdf7f2 | 1030 | encode(hint, data_bl); |
eafe8130 | 1031 | data.ops = data.ops + 1; |
11fdf7f2 | 1032 | } |
9f95a23c | 1033 | |
11fdf7f2 TL |
1034 | /// remove the collection, the collection must be empty |
1035 | void remove_collection(const coll_t& cid) { | |
1036 | Op* _op = _get_next_op(); | |
1037 | _op->op = OP_RMCOLL; | |
1038 | _op->cid = _get_coll_id(cid); | |
eafe8130 | 1039 | data.ops = data.ops + 1; |
11fdf7f2 | 1040 | } |
9f95a23c | 1041 | void collection_move(const coll_t& cid, const coll_t &oldcid, const ghobject_t& oid) |
11fdf7f2 | 1042 | __attribute__ ((deprecated)) { |
9f95a23c TL |
1043 | // NOTE: we encode this as a fixed combo of ADD + REMOVE. they |
1044 | // always appear together, so this is effectively a single MOVE. | |
1045 | Op* _op = _get_next_op(); | |
1046 | _op->op = OP_COLL_ADD; | |
1047 | _op->cid = _get_coll_id(oldcid); | |
1048 | _op->oid = _get_object_id(oid); | |
1049 | _op->dest_cid = _get_coll_id(cid); | |
1050 | data.ops = data.ops + 1; | |
1051 | ||
1052 | _op = _get_next_op(); | |
1053 | _op->op = OP_COLL_REMOVE; | |
1054 | _op->cid = _get_coll_id(oldcid); | |
1055 | _op->oid = _get_object_id(oid); | |
1056 | data.ops = data.ops + 1; | |
1057 | } | |
11fdf7f2 | 1058 | void collection_move_rename(const coll_t& oldcid, const ghobject_t& oldoid, |
9f95a23c | 1059 | const coll_t &cid, const ghobject_t& oid) { |
11fdf7f2 TL |
1060 | Op* _op = _get_next_op(); |
1061 | _op->op = OP_COLL_MOVE_RENAME; | |
1062 | _op->cid = _get_coll_id(oldcid); | |
1063 | _op->oid = _get_object_id(oldoid); | |
1064 | _op->dest_cid = _get_coll_id(cid); | |
1065 | _op->dest_oid = _get_object_id(oid); | |
eafe8130 | 1066 | data.ops = data.ops + 1; |
11fdf7f2 TL |
1067 | } |
1068 | void try_rename(const coll_t &cid, const ghobject_t& oldoid, | |
9f95a23c | 1069 | const ghobject_t& oid) { |
11fdf7f2 TL |
1070 | Op* _op = _get_next_op(); |
1071 | _op->op = OP_TRY_RENAME; | |
1072 | _op->cid = _get_coll_id(cid); | |
1073 | _op->oid = _get_object_id(oldoid); | |
1074 | _op->dest_oid = _get_object_id(oid); | |
eafe8130 | 1075 | data.ops = data.ops + 1; |
11fdf7f2 | 1076 | } |
9f95a23c | 1077 | |
11fdf7f2 TL |
1078 | /// Remove omap from oid |
1079 | void omap_clear( | |
1080 | const coll_t &cid, ///< [in] Collection containing oid | |
1081 | const ghobject_t &oid ///< [in] Object from which to remove omap | |
1082 | ) { | |
1083 | Op* _op = _get_next_op(); | |
1084 | _op->op = OP_OMAP_CLEAR; | |
1085 | _op->cid = _get_coll_id(cid); | |
1086 | _op->oid = _get_object_id(oid); | |
eafe8130 | 1087 | data.ops = data.ops + 1; |
11fdf7f2 TL |
1088 | } |
1089 | /// Set keys on oid omap. Replaces duplicate keys. | |
1090 | void omap_setkeys( | |
1091 | const coll_t& cid, ///< [in] Collection containing oid | |
1092 | const ghobject_t &oid, ///< [in] Object to update | |
9f95a23c | 1093 | const std::map<std::string, ceph::buffer::list> &attrset ///< [in] Replacement keys and values |
11fdf7f2 TL |
1094 | ) { |
1095 | using ceph::encode; | |
1096 | Op* _op = _get_next_op(); | |
1097 | _op->op = OP_OMAP_SETKEYS; | |
1098 | _op->cid = _get_coll_id(cid); | |
1099 | _op->oid = _get_object_id(oid); | |
1100 | encode(attrset, data_bl); | |
eafe8130 | 1101 | data.ops = data.ops + 1; |
11fdf7f2 TL |
1102 | } |
1103 | ||
9f95a23c | 1104 | /// Set keys on an oid omap (ceph::buffer::list variant). |
11fdf7f2 TL |
1105 | void omap_setkeys( |
1106 | const coll_t &cid, ///< [in] Collection containing oid | |
1107 | const ghobject_t &oid, ///< [in] Object to update | |
9f95a23c TL |
1108 | const ceph::buffer::list &attrset_bl ///< [in] Replacement keys and values |
1109 | ) { | |
11fdf7f2 TL |
1110 | Op* _op = _get_next_op(); |
1111 | _op->op = OP_OMAP_SETKEYS; | |
1112 | _op->cid = _get_coll_id(cid); | |
1113 | _op->oid = _get_object_id(oid); | |
1114 | data_bl.append(attrset_bl); | |
eafe8130 | 1115 | data.ops = data.ops + 1; |
11fdf7f2 TL |
1116 | } |
1117 | ||
1118 | /// Remove keys from oid omap | |
1119 | void omap_rmkeys( | |
1120 | const coll_t &cid, ///< [in] Collection containing oid | |
1121 | const ghobject_t &oid, ///< [in] Object from which to remove the omap | |
9f95a23c | 1122 | const std::set<std::string> &keys ///< [in] Keys to clear |
11fdf7f2 TL |
1123 | ) { |
1124 | using ceph::encode; | |
1125 | Op* _op = _get_next_op(); | |
1126 | _op->op = OP_OMAP_RMKEYS; | |
1127 | _op->cid = _get_coll_id(cid); | |
1128 | _op->oid = _get_object_id(oid); | |
1129 | encode(keys, data_bl); | |
eafe8130 | 1130 | data.ops = data.ops + 1; |
11fdf7f2 TL |
1131 | } |
1132 | ||
9f95a23c TL |
1133 | /// Remove key from oid omap |
1134 | void omap_rmkey( | |
1135 | const coll_t &cid, ///< [in] Collection containing oid | |
1136 | const ghobject_t &oid, ///< [in] Object from which to remove the omap | |
1137 | const std::string& key ///< [in] Keys to clear | |
1138 | ) { | |
1139 | Op* _op = _get_next_op(); | |
1140 | _op->op = OP_OMAP_RMKEYS; | |
1141 | _op->cid = _get_coll_id(cid); | |
1142 | _op->oid = _get_object_id(oid); | |
1143 | using ceph::encode; | |
1144 | encode((uint32_t)1, data_bl); | |
1145 | encode(key, data_bl); | |
1146 | data.ops = data.ops + 1; | |
1147 | } | |
1148 | ||
11fdf7f2 TL |
1149 | /// Remove keys from oid omap |
1150 | void omap_rmkeys( | |
1151 | const coll_t &cid, ///< [in] Collection containing oid | |
1152 | const ghobject_t &oid, ///< [in] Object from which to remove the omap | |
9f95a23c | 1153 | const ceph::buffer::list &keys_bl ///< [in] Keys to clear |
11fdf7f2 TL |
1154 | ) { |
1155 | Op* _op = _get_next_op(); | |
1156 | _op->op = OP_OMAP_RMKEYS; | |
1157 | _op->cid = _get_coll_id(cid); | |
1158 | _op->oid = _get_object_id(oid); | |
1159 | data_bl.append(keys_bl); | |
eafe8130 | 1160 | data.ops = data.ops + 1; |
11fdf7f2 | 1161 | } |
9f95a23c | 1162 | |
11fdf7f2 TL |
1163 | /// Remove key range from oid omap |
1164 | void omap_rmkeyrange( | |
1165 | const coll_t &cid, ///< [in] Collection containing oid | |
1166 | const ghobject_t &oid, ///< [in] Object from which to remove the omap keys | |
9f95a23c TL |
1167 | const std::string& first, ///< [in] first key in range |
1168 | const std::string& last ///< [in] first key past range, range is [first,last) | |
11fdf7f2 TL |
1169 | ) { |
1170 | using ceph::encode; | |
1171 | Op* _op = _get_next_op(); | |
1172 | _op->op = OP_OMAP_RMKEYRANGE; | |
1173 | _op->cid = _get_coll_id(cid); | |
1174 | _op->oid = _get_object_id(oid); | |
1175 | encode(first, data_bl); | |
1176 | encode(last, data_bl); | |
eafe8130 | 1177 | data.ops = data.ops + 1; |
11fdf7f2 TL |
1178 | } |
1179 | ||
9f95a23c TL |
1180 | /// Remove key range from oid omap |
1181 | void omap_rmkeyrange( | |
1182 | const coll_t cid, ///< [in] Collection containing oid | |
1183 | const ghobject_t &oid, ///< [in] Object from which to remove the omap keys | |
1184 | const bufferlist &keys_bl ///< [in] range of keys to clear | |
1185 | ) { | |
1186 | Op* _op = _get_next_op(); | |
1187 | _op->op = OP_OMAP_RMKEYRANGE; | |
1188 | _op->cid = _get_coll_id(cid); | |
1189 | _op->oid = _get_object_id(oid); | |
1190 | data_bl.append(keys_bl); | |
1191 | data.ops = data.ops + 1; | |
1192 | } | |
1193 | ||
11fdf7f2 TL |
1194 | /// Set omap header |
1195 | void omap_setheader( | |
1196 | const coll_t &cid, ///< [in] Collection containing oid | |
1197 | const ghobject_t &oid, ///< [in] Object | |
9f95a23c | 1198 | const ceph::buffer::list &bl ///< [in] Header value |
11fdf7f2 TL |
1199 | ) { |
1200 | using ceph::encode; | |
1201 | Op* _op = _get_next_op(); | |
1202 | _op->op = OP_OMAP_SETHEADER; | |
1203 | _op->cid = _get_coll_id(cid); | |
1204 | _op->oid = _get_object_id(oid); | |
1205 | encode(bl, data_bl); | |
eafe8130 | 1206 | data.ops = data.ops + 1; |
11fdf7f2 TL |
1207 | } |
1208 | ||
9f95a23c TL |
1209 | /// Split collection based on given prefixes, objects matching the specified bits/rem are |
1210 | /// moved to the new collection | |
11fdf7f2 TL |
1211 | void split_collection( |
1212 | const coll_t &cid, | |
1213 | uint32_t bits, | |
1214 | uint32_t rem, | |
1215 | const coll_t &destination) { | |
1216 | Op* _op = _get_next_op(); | |
1217 | _op->op = OP_SPLIT_COLLECTION2; | |
1218 | _op->cid = _get_coll_id(cid); | |
1219 | _op->dest_cid = _get_coll_id(destination); | |
1220 | _op->split_bits = bits; | |
1221 | _op->split_rem = rem; | |
eafe8130 | 1222 | data.ops = data.ops + 1; |
11fdf7f2 TL |
1223 | } |
1224 | ||
1225 | /// Merge collection into another. | |
1226 | void merge_collection( | |
1227 | coll_t cid, | |
1228 | coll_t destination, | |
1229 | uint32_t bits) { | |
1230 | Op* _op = _get_next_op(); | |
1231 | _op->op = OP_MERGE_COLLECTION; | |
1232 | _op->cid = _get_coll_id(cid); | |
1233 | _op->dest_cid = _get_coll_id(destination); | |
1234 | _op->split_bits = bits; | |
eafe8130 | 1235 | data.ops = data.ops + 1; |
11fdf7f2 TL |
1236 | } |
1237 | ||
1238 | void collection_set_bits( | |
1239 | const coll_t &cid, | |
1240 | int bits) { | |
1241 | Op* _op = _get_next_op(); | |
1242 | _op->op = OP_COLL_SET_BITS; | |
1243 | _op->cid = _get_coll_id(cid); | |
1244 | _op->split_bits = bits; | |
eafe8130 | 1245 | data.ops = data.ops + 1; |
11fdf7f2 TL |
1246 | } |
1247 | ||
1248 | /// Set allocation hint for an object | |
1249 | /// make 0 values(expected_object_size, expected_write_size) noops for all implementations | |
1250 | void set_alloc_hint( | |
1251 | const coll_t &cid, | |
1252 | const ghobject_t &oid, | |
1253 | uint64_t expected_object_size, | |
1254 | uint64_t expected_write_size, | |
1255 | uint32_t flags | |
1256 | ) { | |
1257 | Op* _op = _get_next_op(); | |
1258 | _op->op = OP_SETALLOCHINT; | |
1259 | _op->cid = _get_coll_id(cid); | |
1260 | _op->oid = _get_object_id(oid); | |
1261 | _op->expected_object_size = expected_object_size; | |
1262 | _op->expected_write_size = expected_write_size; | |
f67539c2 | 1263 | _op->hint = flags; |
eafe8130 | 1264 | data.ops = data.ops + 1; |
11fdf7f2 TL |
1265 | } |
1266 | ||
9f95a23c | 1267 | void encode(ceph::buffer::list& bl) const { |
11fdf7f2 TL |
1268 | //layout: data_bl + op_bl + coll_index + object_index + data |
1269 | ENCODE_START(9, 9, bl); | |
1270 | encode(data_bl, bl); | |
1271 | encode(op_bl, bl); | |
1272 | encode(coll_index, bl); | |
1273 | encode(object_index, bl); | |
1274 | data.encode(bl); | |
1275 | ENCODE_FINISH(bl); | |
1276 | } | |
1277 | ||
9f95a23c | 1278 | void decode(ceph::buffer::list::const_iterator &bl) { |
11fdf7f2 TL |
1279 | DECODE_START(9, bl); |
1280 | DECODE_OLDEST(9); | |
1281 | ||
1282 | decode(data_bl, bl); | |
1283 | decode(op_bl, bl); | |
1284 | decode(coll_index, bl); | |
1285 | decode(object_index, bl); | |
1286 | data.decode(bl); | |
1287 | coll_id = coll_index.size(); | |
1288 | object_id = object_index.size(); | |
1289 | ||
1290 | DECODE_FINISH(bl); | |
1291 | } | |
1292 | ||
1293 | void dump(ceph::Formatter *f); | |
9f95a23c | 1294 | static void generate_test_instances(std::list<Transaction*>& o); |
11fdf7f2 | 1295 | }; |
9f95a23c TL |
1296 | WRITE_CLASS_ENCODER(Transaction) |
1297 | WRITE_CLASS_ENCODER(Transaction::TransactionData) | |
1298 | ||
1299 | std::ostream& operator<<(std::ostream& out, const Transaction& tx); | |
1300 | ||
11fdf7f2 | 1301 | } |