]>
Commit | Line | Data |
---|---|---|
9f95a23c | 1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
11fdf7f2 TL |
2 | // vim: ts=8 sw=2 smarttab |
3 | ||
9f95a23c TL |
4 | #pragma once |
5 | ||
11fdf7f2 TL |
6 | #include <map> |
7 | ||
8 | #include "include/int_types.h" | |
9 | #include "include/buffer.h" | |
10 | #include "osd/osd_types.h" | |
11 | ||
9f95a23c TL |
12 | #define OPS_PER_PTR 32 |
13 | ||
14 | void decode_str_str_map_to_bl(ceph::buffer::list::const_iterator& p, ceph::buffer::list *out); | |
15 | void decode_str_set_to_bl(ceph::buffer::list::const_iterator& p, ceph::buffer::list *out); | |
16 | ||
17 | ||
11fdf7f2 TL |
18 | /********************************* |
19 | * transaction | |
20 | * | |
21 | * A Transaction represents a sequence of primitive mutation | |
22 | * operations. | |
23 | * | |
24 | * Three events in the life of a Transaction result in | |
25 | * callbacks. Any Transaction can contain any number of callback | |
26 | * objects (Context) for any combination of the three classes of | |
27 | * callbacks: | |
28 | * | |
29 | * on_applied_sync, on_applied, and on_commit. | |
30 | * | |
31 | * The "on_applied" and "on_applied_sync" callbacks are invoked when | |
32 | * the modifications requested by the Transaction are visible to | |
33 | * subsequent ObjectStore operations, i.e., the results are | |
34 | * readable. The only conceptual difference between on_applied and | |
35 | * on_applied_sync is the specific thread and locking environment in | |
36 | * which the callbacks operate. "on_applied_sync" is called | |
37 | * directly by an ObjectStore execution thread. It is expected to | |
38 | * execute quickly and must not acquire any locks of the calling | |
39 | * environment. Conversely, "on_applied" is called from the separate | |
40 | * Finisher thread, meaning that it can contend for calling | |
41 | * environment locks. NB, on_applied and on_applied_sync are | |
42 | * sometimes called on_readable and on_readable_sync. | |
43 | * | |
44 | * The "on_commit" callback is also called from the Finisher thread | |
45 | * and indicates that all of the mutations have been durably | |
46 | * committed to stable storage (i.e., are now software/hardware | |
47 | * crashproof). | |
48 | * | |
49 | * At the implementation level, each mutation primitive (and its | |
50 | * associated data) can be serialized to a single buffer. That | |
51 | * serialization, however, does not copy any data, but (using the | |
9f95a23c | 52 | * ceph::buffer::list library) will reference the original buffers. This |
11fdf7f2 TL |
53 | * implies that the buffer that contains the data being submitted |
54 | * must remain stable until the on_commit callback completes. In | |
9f95a23c | 55 | * practice, ceph::buffer::list handles all of this for you and this |
11fdf7f2 TL |
56 | * subtlety is only relevant if you are referencing an existing |
57 | * buffer via buffer::raw_static. | |
58 | * | |
59 | * Some implementations of ObjectStore choose to implement their own | |
60 | * form of journaling that uses the serialized form of a | |
61 | * Transaction. This requires that the encode/decode logic properly | |
62 | * version itself and handle version upgrades that might change the | |
63 | * format of the encoded Transaction. This has already happened a | |
64 | * couple of times and the Transaction object contains some helper | |
65 | * variables that aid in this legacy decoding: | |
66 | * | |
67 | * sobject_encoding detects an older/simpler version of oid | |
68 | * present in pre-bobtail versions of ceph. use_pool_override | |
69 | * also detects a situation where the pool of an oid can be | |
70 | * overridden for legacy operations/buffers. For non-legacy | |
71 | * implementations of ObjectStore, neither of these fields are | |
72 | * relevant. | |
73 | * | |
74 | * | |
75 | * TRANSACTION ISOLATION | |
76 | * | |
77 | * Except as noted above, isolation is the responsibility of the | |
78 | * caller. In other words, if any storage element (storage element | |
79 | * == any of the four portions of an object as described above) is | |
80 | * altered by a transaction (including deletion), the caller | |
81 | * promises not to attempt to read that element while the | |
82 | * transaction is pending (here pending means from the time of | |
83 | * issuance until the "on_applied_sync" callback has been | |
84 | * received). Violations of isolation need not be detected by | |
85 | * ObjectStore and there is no corresponding error mechanism for | |
86 | * reporting an isolation violation (crashing would be the | |
87 | * appropriate way to report an isolation violation if detected). | |
88 | * | |
89 | * Enumeration operations may violate transaction isolation as | |
90 | * described above when a storage element is being created or | |
91 | * deleted as part of a transaction. In this case, ObjectStore is | |
92 | * allowed to consider the enumeration operation to either precede | |
93 | * or follow the violating transaction element. In other words, the | |
94 | * presence/absence of the mutated element in the enumeration is | |
95 | * entirely at the discretion of ObjectStore. The arbitrary ordering | |
96 | * applies independently to each transaction element. For example, | |
97 | * if a transaction contains two mutating elements "create A" and | |
98 | * "delete B". And an enumeration operation is performed while this | |
99 | * transaction is pending. It is permissible for ObjectStore to | |
100 | * report any of the four possible combinations of the existence of | |
101 | * A and B. | |
102 | * | |
103 | */ | |
104 | namespace ceph::os { | |
105 | class Transaction { | |
106 | public: | |
107 | enum { | |
108 | OP_NOP = 0, | |
9f95a23c | 109 | OP_CREATE = 7, // cid, oid |
11fdf7f2 TL |
110 | OP_TOUCH = 9, // cid, oid |
111 | OP_WRITE = 10, // cid, oid, offset, len, bl | |
112 | OP_ZERO = 11, // cid, oid, offset, len | |
113 | OP_TRUNCATE = 12, // cid, oid, len | |
114 | OP_REMOVE = 13, // cid, oid | |
115 | OP_SETATTR = 14, // cid, oid, attrname, bl | |
116 | OP_SETATTRS = 15, // cid, oid, attrset | |
117 | OP_RMATTR = 16, // cid, oid, attrname | |
118 | OP_CLONE = 17, // cid, oid, newoid | |
119 | OP_CLONERANGE = 18, // cid, oid, newoid, offset, len | |
120 | OP_CLONERANGE2 = 30, // cid, oid, newoid, srcoff, len, dstoff | |
121 | ||
122 | OP_TRIMCACHE = 19, // cid, oid, offset, len **DEPRECATED** | |
123 | ||
124 | OP_MKCOLL = 20, // cid | |
125 | OP_RMCOLL = 21, // cid | |
126 | OP_COLL_ADD = 22, // cid, oldcid, oid | |
127 | OP_COLL_REMOVE = 23, // cid, oid | |
128 | OP_COLL_SETATTR = 24, // cid, attrname, bl | |
129 | OP_COLL_RMATTR = 25, // cid, attrname | |
130 | OP_COLL_SETATTRS = 26, // cid, attrset | |
131 | OP_COLL_MOVE = 8, // newcid, oldcid, oid | |
132 | ||
133 | OP_RMATTRS = 28, // cid, oid | |
134 | OP_COLL_RENAME = 29, // cid, newcid | |
135 | ||
136 | OP_OMAP_CLEAR = 31, // cid | |
137 | OP_OMAP_SETKEYS = 32, // cid, attrset | |
138 | OP_OMAP_RMKEYS = 33, // cid, keyset | |
139 | OP_OMAP_SETHEADER = 34, // cid, header | |
140 | OP_SPLIT_COLLECTION = 35, // cid, bits, destination | |
141 | OP_SPLIT_COLLECTION2 = 36, /* cid, bits, destination | |
9f95a23c | 142 | doesn't create the destination */ |
11fdf7f2 TL |
143 | OP_OMAP_RMKEYRANGE = 37, // cid, oid, firstkey, lastkey |
144 | OP_COLL_MOVE_RENAME = 38, // oldcid, oldoid, newcid, newoid | |
9f95a23c | 145 | |
11fdf7f2 TL |
146 | OP_SETALLOCHINT = 39, // cid, oid, object_size, write_size |
147 | OP_COLL_HINT = 40, // cid, type, bl | |
148 | ||
149 | OP_TRY_RENAME = 41, // oldcid, oldoid, newoid | |
150 | ||
151 | OP_COLL_SET_BITS = 42, // cid, bits | |
152 | ||
153 | OP_MERGE_COLLECTION = 43, // cid, destination | |
154 | }; | |
155 | ||
156 | // Transaction hint type | |
157 | enum { | |
158 | COLL_HINT_EXPECTED_NUM_OBJECTS = 1, | |
159 | }; | |
160 | ||
161 | struct Op { | |
eafe8130 TL |
162 | ceph_le32 op; |
163 | ceph_le32 cid; | |
164 | ceph_le32 oid; | |
165 | ceph_le64 off; | |
166 | ceph_le64 len; | |
167 | ceph_le32 dest_cid; | |
168 | ceph_le32 dest_oid; //OP_CLONE, OP_CLONERANGE | |
169 | ceph_le64 dest_off; //OP_CLONERANGE | |
11fdf7f2 | 170 | union { |
9f95a23c TL |
171 | struct { |
172 | ceph_le32 hint_type; //OP_COLL_HINT | |
173 | } __attribute__ ((packed)); | |
174 | struct { | |
175 | ceph_le32 alloc_hint_flags; //OP_SETALLOCHINT | |
176 | } __attribute__ ((packed)); | |
177 | } __attribute__ ((packed)); | |
eafe8130 TL |
178 | ceph_le64 expected_object_size; //OP_SETALLOCHINT |
179 | ceph_le64 expected_write_size; //OP_SETALLOCHINT | |
180 | ceph_le32 split_bits; //OP_SPLIT_COLLECTION2,OP_COLL_SET_BITS, | |
11fdf7f2 | 181 | //OP_MKCOLL |
eafe8130 | 182 | ceph_le32 split_rem; //OP_SPLIT_COLLECTION2 |
11fdf7f2 TL |
183 | } __attribute__ ((packed)) ; |
184 | ||
185 | struct TransactionData { | |
eafe8130 TL |
186 | ceph_le64 ops; |
187 | ceph_le32 largest_data_len; | |
188 | ceph_le32 largest_data_off; | |
189 | ceph_le32 largest_data_off_in_data_bl; | |
190 | ceph_le32 fadvise_flags; | |
11fdf7f2 TL |
191 | |
192 | TransactionData() noexcept : | |
9f95a23c | 193 | ops(init_le64(0)), |
eafe8130 TL |
194 | largest_data_len(init_le32(0)), |
195 | largest_data_off(init_le32(0)), | |
196 | largest_data_off_in_data_bl(init_le32(0)), | |
9f95a23c | 197 | fadvise_flags(init_le32(0)) { } |
11fdf7f2 | 198 | |
9f95a23c | 199 | // override default move operations to reset default values |
11fdf7f2 | 200 | TransactionData(TransactionData&& other) noexcept : |
9f95a23c | 201 | ops(other.ops), |
11fdf7f2 TL |
202 | largest_data_len(other.largest_data_len), |
203 | largest_data_off(other.largest_data_off), | |
204 | largest_data_off_in_data_bl(other.largest_data_off_in_data_bl), | |
205 | fadvise_flags(other.fadvise_flags) { | |
206 | other.ops = 0; | |
207 | other.largest_data_len = 0; | |
208 | other.largest_data_off = 0; | |
209 | other.largest_data_off_in_data_bl = 0; | |
210 | other.fadvise_flags = 0; | |
211 | } | |
212 | TransactionData& operator=(TransactionData&& other) noexcept { | |
213 | ops = other.ops; | |
214 | largest_data_len = other.largest_data_len; | |
215 | largest_data_off = other.largest_data_off; | |
216 | largest_data_off_in_data_bl = other.largest_data_off_in_data_bl; | |
217 | fadvise_flags = other.fadvise_flags; | |
218 | other.ops = 0; | |
219 | other.largest_data_len = 0; | |
220 | other.largest_data_off = 0; | |
221 | other.largest_data_off_in_data_bl = 0; | |
222 | other.fadvise_flags = 0; | |
223 | return *this; | |
224 | } | |
225 | ||
226 | TransactionData(const TransactionData& other) = default; | |
227 | TransactionData& operator=(const TransactionData& other) = default; | |
228 | ||
9f95a23c | 229 | void encode(ceph::buffer::list& bl) const { |
11fdf7f2 TL |
230 | bl.append((char*)this, sizeof(TransactionData)); |
231 | } | |
9f95a23c | 232 | void decode(ceph::buffer::list::const_iterator &bl) { |
11fdf7f2 TL |
233 | bl.copy(sizeof(TransactionData), (char*)this); |
234 | } | |
235 | } __attribute__ ((packed)) ; | |
236 | ||
237 | private: | |
238 | TransactionData data; | |
239 | ||
9f95a23c TL |
240 | std::map<coll_t, uint32_t> coll_index; |
241 | std::map<ghobject_t, uint32_t> object_index; | |
11fdf7f2 | 242 | |
9f95a23c TL |
243 | uint32_t coll_id = 0; |
244 | uint32_t object_id = 0; | |
11fdf7f2 | 245 | |
9f95a23c TL |
246 | ceph::buffer::list data_bl; |
247 | ceph::buffer::list op_bl; | |
11fdf7f2 TL |
248 | |
249 | std::list<Context *> on_applied; | |
250 | std::list<Context *> on_commit; | |
251 | std::list<Context *> on_applied_sync; | |
252 | ||
253 | public: | |
254 | Transaction() = default; | |
255 | ||
9f95a23c | 256 | explicit Transaction(ceph::buffer::list::const_iterator &dp) { |
11fdf7f2 TL |
257 | decode(dp); |
258 | } | |
9f95a23c | 259 | explicit Transaction(ceph::buffer::list &nbl) { |
11fdf7f2 TL |
260 | auto dp = nbl.cbegin(); |
261 | decode(dp); | |
262 | } | |
263 | ||
264 | // override default move operations to reset default values | |
265 | Transaction(Transaction&& other) noexcept : | |
266 | data(std::move(other.data)), | |
267 | coll_index(std::move(other.coll_index)), | |
268 | object_index(std::move(other.object_index)), | |
269 | coll_id(other.coll_id), | |
270 | object_id(other.object_id), | |
271 | data_bl(std::move(other.data_bl)), | |
272 | op_bl(std::move(other.op_bl)), | |
273 | on_applied(std::move(other.on_applied)), | |
274 | on_commit(std::move(other.on_commit)), | |
275 | on_applied_sync(std::move(other.on_applied_sync)) { | |
276 | other.coll_id = 0; | |
277 | other.object_id = 0; | |
278 | } | |
279 | ||
280 | Transaction& operator=(Transaction&& other) noexcept { | |
281 | data = std::move(other.data); | |
282 | coll_index = std::move(other.coll_index); | |
283 | object_index = std::move(other.object_index); | |
284 | coll_id = other.coll_id; | |
285 | object_id = other.object_id; | |
286 | data_bl = std::move(other.data_bl); | |
287 | op_bl = std::move(other.op_bl); | |
288 | on_applied = std::move(other.on_applied); | |
289 | on_commit = std::move(other.on_commit); | |
290 | on_applied_sync = std::move(other.on_applied_sync); | |
291 | other.coll_id = 0; | |
292 | other.object_id = 0; | |
293 | return *this; | |
294 | } | |
295 | ||
296 | Transaction(const Transaction& other) = default; | |
297 | Transaction& operator=(const Transaction& other) = default; | |
298 | ||
299 | // expose object_index for FileStore::Op's benefit | |
9f95a23c | 300 | const std::map<ghobject_t, uint32_t>& get_object_index() const { |
11fdf7f2 TL |
301 | return object_index; |
302 | } | |
303 | ||
304 | /* Operations on callback contexts */ | |
305 | void register_on_applied(Context *c) { | |
306 | if (!c) return; | |
307 | on_applied.push_back(c); | |
308 | } | |
309 | void register_on_commit(Context *c) { | |
310 | if (!c) return; | |
311 | on_commit.push_back(c); | |
312 | } | |
313 | void register_on_applied_sync(Context *c) { | |
314 | if (!c) return; | |
315 | on_applied_sync.push_back(c); | |
316 | } | |
317 | void register_on_complete(Context *c) { | |
318 | if (!c) return; | |
319 | RunOnDeleteRef _complete (std::make_shared<RunOnDelete>(c)); | |
320 | register_on_applied(new ContainerContext<RunOnDeleteRef>(_complete)); | |
321 | register_on_commit(new ContainerContext<RunOnDeleteRef>(_complete)); | |
322 | } | |
323 | bool has_contexts() const { | |
324 | return | |
9f95a23c TL |
325 | !on_commit.empty() || |
326 | !on_applied.empty() || | |
327 | !on_applied_sync.empty(); | |
11fdf7f2 TL |
328 | } |
329 | ||
9f95a23c TL |
330 | static void collect_contexts( |
331 | std::vector<Transaction>& t, | |
332 | Context **out_on_applied, | |
333 | Context **out_on_commit, | |
334 | Context **out_on_applied_sync) { | |
11fdf7f2 TL |
335 | ceph_assert(out_on_applied); |
336 | ceph_assert(out_on_commit); | |
337 | ceph_assert(out_on_applied_sync); | |
338 | std::list<Context *> on_applied, on_commit, on_applied_sync; | |
339 | for (auto& i : t) { | |
9f95a23c TL |
340 | on_applied.splice(on_applied.end(), i.on_applied); |
341 | on_commit.splice(on_commit.end(), i.on_commit); | |
342 | on_applied_sync.splice(on_applied_sync.end(), i.on_applied_sync); | |
11fdf7f2 TL |
343 | } |
344 | *out_on_applied = C_Contexts::list_to_context(on_applied); | |
345 | *out_on_commit = C_Contexts::list_to_context(on_commit); | |
346 | *out_on_applied_sync = C_Contexts::list_to_context(on_applied_sync); | |
347 | } | |
9f95a23c TL |
348 | static void collect_contexts( |
349 | std::vector<Transaction>& t, | |
350 | std::list<Context*> *out_on_applied, | |
351 | std::list<Context*> *out_on_commit, | |
352 | std::list<Context*> *out_on_applied_sync) { | |
11fdf7f2 TL |
353 | ceph_assert(out_on_applied); |
354 | ceph_assert(out_on_commit); | |
355 | ceph_assert(out_on_applied_sync); | |
356 | for (auto& i : t) { | |
9f95a23c TL |
357 | out_on_applied->splice(out_on_applied->end(), i.on_applied); |
358 | out_on_commit->splice(out_on_commit->end(), i.on_commit); | |
359 | out_on_applied_sync->splice(out_on_applied_sync->end(), | |
360 | i.on_applied_sync); | |
11fdf7f2 TL |
361 | } |
362 | } | |
9f95a23c TL |
363 | static Context *collect_all_contexts( |
364 | Transaction& t) { | |
365 | list<Context*> contexts; | |
366 | contexts.splice(contexts.end(), t.on_applied); | |
367 | contexts.splice(contexts.end(), t.on_commit); | |
368 | contexts.splice(contexts.end(), t.on_applied_sync); | |
369 | return C_Contexts::list_to_context(contexts); | |
370 | } | |
11fdf7f2 TL |
371 | |
372 | Context *get_on_applied() { | |
373 | return C_Contexts::list_to_context(on_applied); | |
374 | } | |
375 | Context *get_on_commit() { | |
376 | return C_Contexts::list_to_context(on_commit); | |
377 | } | |
378 | Context *get_on_applied_sync() { | |
379 | return C_Contexts::list_to_context(on_applied_sync); | |
380 | } | |
381 | ||
382 | void set_fadvise_flags(uint32_t flags) { | |
383 | data.fadvise_flags = flags; | |
384 | } | |
385 | void set_fadvise_flag(uint32_t flag) { | |
386 | data.fadvise_flags = data.fadvise_flags | flag; | |
387 | } | |
388 | uint32_t get_fadvise_flags() { return data.fadvise_flags; } | |
389 | ||
390 | void swap(Transaction& other) noexcept { | |
391 | std::swap(data, other.data); | |
392 | std::swap(on_applied, other.on_applied); | |
393 | std::swap(on_commit, other.on_commit); | |
394 | std::swap(on_applied_sync, other.on_applied_sync); | |
9f95a23c | 395 | |
11fdf7f2 TL |
396 | std::swap(coll_index, other.coll_index); |
397 | std::swap(object_index, other.object_index); | |
398 | std::swap(coll_id, other.coll_id); | |
399 | std::swap(object_id, other.object_id); | |
400 | op_bl.swap(other.op_bl); | |
401 | data_bl.swap(other.data_bl); | |
402 | } | |
403 | ||
404 | void _update_op(Op* op, | |
9f95a23c TL |
405 | std::vector<uint32_t> &cm, |
406 | std::vector<uint32_t> &om) { | |
11fdf7f2 TL |
407 | |
408 | switch (op->op) { | |
409 | case OP_NOP: | |
410 | break; | |
411 | ||
9f95a23c | 412 | case OP_CREATE: |
11fdf7f2 TL |
413 | case OP_TOUCH: |
414 | case OP_REMOVE: | |
415 | case OP_SETATTR: | |
416 | case OP_SETATTRS: | |
417 | case OP_RMATTR: | |
418 | case OP_RMATTRS: | |
419 | case OP_COLL_REMOVE: | |
420 | case OP_OMAP_CLEAR: | |
421 | case OP_OMAP_SETKEYS: | |
422 | case OP_OMAP_RMKEYS: | |
423 | case OP_OMAP_RMKEYRANGE: | |
424 | case OP_OMAP_SETHEADER: | |
425 | case OP_WRITE: | |
426 | case OP_ZERO: | |
427 | case OP_TRUNCATE: | |
428 | case OP_SETALLOCHINT: | |
429 | ceph_assert(op->cid < cm.size()); | |
430 | ceph_assert(op->oid < om.size()); | |
431 | op->cid = cm[op->cid]; | |
432 | op->oid = om[op->oid]; | |
433 | break; | |
434 | ||
435 | case OP_CLONERANGE2: | |
436 | case OP_CLONE: | |
437 | ceph_assert(op->cid < cm.size()); | |
438 | ceph_assert(op->oid < om.size()); | |
439 | ceph_assert(op->dest_oid < om.size()); | |
440 | op->cid = cm[op->cid]; | |
441 | op->oid = om[op->oid]; | |
442 | op->dest_oid = om[op->dest_oid]; | |
443 | break; | |
444 | ||
445 | case OP_MKCOLL: | |
446 | case OP_RMCOLL: | |
447 | case OP_COLL_SETATTR: | |
448 | case OP_COLL_RMATTR: | |
449 | case OP_COLL_SETATTRS: | |
450 | case OP_COLL_HINT: | |
451 | case OP_COLL_SET_BITS: | |
452 | ceph_assert(op->cid < cm.size()); | |
453 | op->cid = cm[op->cid]; | |
454 | break; | |
455 | ||
456 | case OP_COLL_ADD: | |
457 | ceph_assert(op->cid < cm.size()); | |
458 | ceph_assert(op->oid < om.size()); | |
459 | ceph_assert(op->dest_cid < om.size()); | |
460 | op->cid = cm[op->cid]; | |
461 | op->dest_cid = cm[op->dest_cid]; | |
462 | op->oid = om[op->oid]; | |
463 | break; | |
9f95a23c | 464 | |
11fdf7f2 TL |
465 | case OP_COLL_MOVE_RENAME: |
466 | ceph_assert(op->cid < cm.size()); | |
467 | ceph_assert(op->oid < om.size()); | |
468 | ceph_assert(op->dest_cid < cm.size()); | |
469 | ceph_assert(op->dest_oid < om.size()); | |
470 | op->cid = cm[op->cid]; | |
471 | op->oid = om[op->oid]; | |
472 | op->dest_cid = cm[op->dest_cid]; | |
473 | op->dest_oid = om[op->dest_oid]; | |
474 | break; | |
9f95a23c | 475 | |
11fdf7f2 TL |
476 | case OP_TRY_RENAME: |
477 | ceph_assert(op->cid < cm.size()); | |
478 | ceph_assert(op->oid < om.size()); | |
479 | ceph_assert(op->dest_oid < om.size()); | |
480 | op->cid = cm[op->cid]; | |
481 | op->oid = om[op->oid]; | |
482 | op->dest_oid = om[op->dest_oid]; | |
9f95a23c TL |
483 | break; |
484 | ||
11fdf7f2 TL |
485 | case OP_SPLIT_COLLECTION2: |
486 | ceph_assert(op->cid < cm.size()); | |
9f95a23c | 487 | ceph_assert(op->dest_cid < cm.size()); |
11fdf7f2 TL |
488 | op->cid = cm[op->cid]; |
489 | op->dest_cid = cm[op->dest_cid]; | |
490 | break; | |
9f95a23c | 491 | |
11fdf7f2 TL |
492 | case OP_MERGE_COLLECTION: |
493 | ceph_assert(op->cid < cm.size()); | |
9f95a23c | 494 | ceph_assert(op->dest_cid < cm.size()); |
11fdf7f2 TL |
495 | op->cid = cm[op->cid]; |
496 | op->dest_cid = cm[op->dest_cid]; | |
497 | break; | |
9f95a23c | 498 | |
11fdf7f2 TL |
499 | default: |
500 | ceph_abort_msg("Unknown OP"); | |
501 | } | |
502 | } | |
503 | void _update_op_bl( | |
9f95a23c TL |
504 | ceph::buffer::list& bl, |
505 | std::vector<uint32_t> &cm, | |
506 | std::vector<uint32_t> &om) { | |
11fdf7f2 TL |
507 | for (auto& bp : bl.buffers()) { |
508 | ceph_assert(bp.length() % sizeof(Op) == 0); | |
9f95a23c | 509 | |
11fdf7f2 TL |
510 | char* raw_p = const_cast<char*>(bp.c_str()); |
511 | char* raw_end = raw_p + bp.length(); | |
512 | while (raw_p < raw_end) { | |
9f95a23c TL |
513 | _update_op(reinterpret_cast<Op*>(raw_p), cm, om); |
514 | raw_p += sizeof(Op); | |
11fdf7f2 TL |
515 | } |
516 | } | |
517 | } | |
9f95a23c | 518 | /// Append the operations of the parameter to this Transaction. Those operations are removed from the parameter Transaction |
11fdf7f2 TL |
519 | void append(Transaction& other) { |
520 | ||
eafe8130 | 521 | data.ops = data.ops + other.data.ops; |
11fdf7f2 | 522 | if (other.data.largest_data_len > data.largest_data_len) { |
9f95a23c TL |
523 | data.largest_data_len = other.data.largest_data_len; |
524 | data.largest_data_off = other.data.largest_data_off; | |
525 | data.largest_data_off_in_data_bl = data_bl.length() + other.data.largest_data_off_in_data_bl; | |
11fdf7f2 | 526 | } |
eafe8130 | 527 | data.fadvise_flags = data.fadvise_flags | other.data.fadvise_flags; |
11fdf7f2 TL |
528 | on_applied.splice(on_applied.end(), other.on_applied); |
529 | on_commit.splice(on_commit.end(), other.on_commit); | |
530 | on_applied_sync.splice(on_applied_sync.end(), other.on_applied_sync); | |
9f95a23c | 531 | |
11fdf7f2 | 532 | //append coll_index & object_index |
9f95a23c TL |
533 | std::vector<uint32_t> cm(other.coll_index.size()); |
534 | std::map<coll_t, uint32_t>::iterator coll_index_p; | |
11fdf7f2 | 535 | for (coll_index_p = other.coll_index.begin(); |
9f95a23c TL |
536 | coll_index_p != other.coll_index.end(); |
537 | ++coll_index_p) { | |
11fdf7f2 TL |
538 | cm[coll_index_p->second] = _get_coll_id(coll_index_p->first); |
539 | } | |
9f95a23c TL |
540 | |
541 | std::vector<uint32_t> om(other.object_index.size()); | |
542 | std::map<ghobject_t, uint32_t>::iterator object_index_p; | |
11fdf7f2 | 543 | for (object_index_p = other.object_index.begin(); |
9f95a23c TL |
544 | object_index_p != other.object_index.end(); |
545 | ++object_index_p) { | |
11fdf7f2 | 546 | om[object_index_p->second] = _get_object_id(object_index_p->first); |
9f95a23c TL |
547 | } |
548 | ||
11fdf7f2 | 549 | //the other.op_bl SHOULD NOT be changes during append operation, |
9f95a23c TL |
550 | //we use additional ceph::buffer::list to avoid this problem |
551 | ceph::buffer::list other_op_bl; | |
11fdf7f2 | 552 | { |
9f95a23c TL |
553 | ceph::buffer::ptr other_op_bl_ptr(other.op_bl.length()); |
554 | other.op_bl.begin().copy(other.op_bl.length(), other_op_bl_ptr.c_str()); | |
11fdf7f2 TL |
555 | other_op_bl.append(std::move(other_op_bl_ptr)); |
556 | } | |
9f95a23c | 557 | |
11fdf7f2 TL |
558 | //update other_op_bl with cm & om |
559 | //When the other is appended to current transaction, all coll_index and | |
560 | //object_index in other.op_buffer should be updated by new index of the | |
561 | //combined transaction | |
562 | _update_op_bl(other_op_bl, cm, om); | |
9f95a23c | 563 | |
11fdf7f2 TL |
564 | //append op_bl |
565 | op_bl.append(other_op_bl); | |
566 | //append data_bl | |
567 | data_bl.append(other.data_bl); | |
568 | } | |
9f95a23c | 569 | |
11fdf7f2 | 570 | /** Inquires about the Transaction as a whole. */ |
9f95a23c | 571 | |
11fdf7f2 TL |
572 | /// How big is the encoded Transaction buffer? |
573 | uint64_t get_encoded_bytes() { | |
574 | //layout: data_bl + op_bl + coll_index + object_index + data | |
9f95a23c | 575 | |
11fdf7f2 TL |
576 | // coll_index size, object_index size and sizeof(transaction_data) |
577 | // all here, so they may be computed at compile-time | |
578 | size_t final_size = sizeof(__u32) * 2 + sizeof(data); | |
9f95a23c | 579 | |
11fdf7f2 | 580 | // coll_index second and object_index second |
9f95a23c TL |
581 | final_size += (coll_index.size() + object_index.size()) * sizeof(__u32); |
582 | ||
11fdf7f2 TL |
583 | // coll_index first |
584 | for (auto p = coll_index.begin(); p != coll_index.end(); ++p) { | |
9f95a23c | 585 | final_size += p->first.encoded_size(); |
11fdf7f2 | 586 | } |
9f95a23c | 587 | |
11fdf7f2 TL |
588 | // object_index first |
589 | for (auto p = object_index.begin(); p != object_index.end(); ++p) { | |
9f95a23c | 590 | final_size += p->first.encoded_size(); |
11fdf7f2 | 591 | } |
9f95a23c | 592 | |
11fdf7f2 | 593 | return data_bl.length() + |
9f95a23c TL |
594 | op_bl.length() + |
595 | final_size; | |
11fdf7f2 | 596 | } |
9f95a23c | 597 | |
11fdf7f2 TL |
598 | /// Retain old version for regression testing purposes |
599 | uint64_t get_encoded_bytes_test() { | |
600 | using ceph::encode; | |
601 | //layout: data_bl + op_bl + coll_index + object_index + data | |
9f95a23c | 602 | ceph::buffer::list bl; |
11fdf7f2 TL |
603 | encode(coll_index, bl); |
604 | encode(object_index, bl); | |
9f95a23c | 605 | |
11fdf7f2 | 606 | return data_bl.length() + |
9f95a23c TL |
607 | op_bl.length() + |
608 | bl.length() + | |
609 | sizeof(data); | |
11fdf7f2 | 610 | } |
9f95a23c | 611 | |
11fdf7f2 TL |
612 | uint64_t get_num_bytes() { |
613 | return get_encoded_bytes(); | |
614 | } | |
615 | /// Size of largest data buffer to the "write" operation encountered so far | |
616 | uint32_t get_data_length() { | |
617 | return data.largest_data_len; | |
618 | } | |
9f95a23c | 619 | /// offset within the encoded buffer to the start of the largest data buffer that's encoded |
11fdf7f2 TL |
620 | uint32_t get_data_offset() { |
621 | if (data.largest_data_off_in_data_bl) { | |
9f95a23c TL |
622 | return data.largest_data_off_in_data_bl + |
623 | sizeof(__u8) + // encode struct_v | |
624 | sizeof(__u8) + // encode compat_v | |
625 | sizeof(__u32) + // encode len | |
626 | sizeof(__u32); // data_bl len | |
11fdf7f2 TL |
627 | } |
628 | return 0; // none | |
629 | } | |
630 | /// offset of buffer as aligned to destination within object. | |
631 | int get_data_alignment() { | |
632 | if (!data.largest_data_len) | |
9f95a23c | 633 | return 0; |
11fdf7f2 TL |
634 | return (0 - get_data_offset()) & ~CEPH_PAGE_MASK; |
635 | } | |
636 | /// Is the Transaction empty (no operations) | |
637 | bool empty() { | |
638 | return !data.ops; | |
639 | } | |
640 | /// Number of operations in the transaction | |
641 | int get_num_ops() { | |
642 | return data.ops; | |
643 | } | |
9f95a23c | 644 | |
11fdf7f2 TL |
645 | /** |
646 | * iterator | |
647 | * | |
648 | * Helper object to parse Transactions. | |
649 | * | |
650 | * ObjectStore instances use this object to step down the encoded | |
651 | * buffer decoding operation codes and parameters as we go. | |
652 | * | |
653 | */ | |
654 | class iterator { | |
655 | Transaction *t; | |
9f95a23c | 656 | |
11fdf7f2 TL |
657 | uint64_t ops; |
658 | char* op_buffer_p; | |
9f95a23c TL |
659 | |
660 | ceph::buffer::list::const_iterator data_bl_p; | |
661 | ||
11fdf7f2 | 662 | public: |
9f95a23c TL |
663 | std::vector<coll_t> colls; |
664 | std::vector<ghobject_t> objects; | |
665 | ||
11fdf7f2 TL |
666 | private: |
667 | explicit iterator(Transaction *t) | |
668 | : t(t), | |
9f95a23c TL |
669 | data_bl_p(t->data_bl.cbegin()), |
670 | colls(t->coll_index.size()), | |
671 | objects(t->object_index.size()) { | |
672 | ||
11fdf7f2 TL |
673 | ops = t->data.ops; |
674 | op_buffer_p = t->op_bl.c_str(); | |
9f95a23c TL |
675 | |
676 | std::map<coll_t, uint32_t>::iterator coll_index_p; | |
11fdf7f2 | 677 | for (coll_index_p = t->coll_index.begin(); |
9f95a23c TL |
678 | coll_index_p != t->coll_index.end(); |
679 | ++coll_index_p) { | |
680 | colls[coll_index_p->second] = coll_index_p->first; | |
11fdf7f2 | 681 | } |
9f95a23c TL |
682 | |
683 | std::map<ghobject_t, uint32_t>::iterator object_index_p; | |
11fdf7f2 | 684 | for (object_index_p = t->object_index.begin(); |
9f95a23c TL |
685 | object_index_p != t->object_index.end(); |
686 | ++object_index_p) { | |
687 | objects[object_index_p->second] = object_index_p->first; | |
11fdf7f2 TL |
688 | } |
689 | } | |
9f95a23c | 690 | |
11fdf7f2 | 691 | friend class Transaction; |
9f95a23c | 692 | |
11fdf7f2 | 693 | public: |
9f95a23c | 694 | |
11fdf7f2 TL |
695 | bool have_op() { |
696 | return ops > 0; | |
697 | } | |
698 | Op* decode_op() { | |
699 | ceph_assert(ops > 0); | |
9f95a23c | 700 | |
11fdf7f2 TL |
701 | Op* op = reinterpret_cast<Op*>(op_buffer_p); |
702 | op_buffer_p += sizeof(Op); | |
703 | ops--; | |
9f95a23c | 704 | |
11fdf7f2 TL |
705 | return op; |
706 | } | |
9f95a23c TL |
707 | std::string decode_string() { |
708 | using ceph::decode; | |
709 | std::string s; | |
11fdf7f2 TL |
710 | decode(s, data_bl_p); |
711 | return s; | |
712 | } | |
9f95a23c TL |
713 | void decode_bp(ceph::buffer::ptr& bp) { |
714 | using ceph::decode; | |
11fdf7f2 TL |
715 | decode(bp, data_bl_p); |
716 | } | |
9f95a23c TL |
717 | void decode_bl(ceph::buffer::list& bl) { |
718 | using ceph::decode; | |
11fdf7f2 TL |
719 | decode(bl, data_bl_p); |
720 | } | |
9f95a23c TL |
721 | void decode_attrset(std::map<std::string,ceph::buffer::ptr>& aset) { |
722 | using ceph::decode; | |
11fdf7f2 TL |
723 | decode(aset, data_bl_p); |
724 | } | |
9f95a23c TL |
725 | void decode_attrset(std::map<std::string,ceph::buffer::list>& aset) { |
726 | using ceph::decode; | |
11fdf7f2 TL |
727 | decode(aset, data_bl_p); |
728 | } | |
9f95a23c TL |
729 | void decode_attrset_bl(ceph::buffer::list *pbl) { |
730 | decode_str_str_map_to_bl(data_bl_p, pbl); | |
731 | } | |
732 | void decode_keyset(std::set<std::string> &keys){ | |
733 | using ceph::decode; | |
11fdf7f2 TL |
734 | decode(keys, data_bl_p); |
735 | } | |
9f95a23c TL |
736 | void decode_keyset_bl(ceph::buffer::list *pbl){ |
737 | decode_str_set_to_bl(data_bl_p, pbl); | |
738 | } | |
739 | ||
740 | const ghobject_t &get_oid(uint32_t oid_id) { | |
11fdf7f2 TL |
741 | ceph_assert(oid_id < objects.size()); |
742 | return objects[oid_id]; | |
743 | } | |
9f95a23c | 744 | const coll_t &get_cid(uint32_t cid_id) { |
11fdf7f2 TL |
745 | ceph_assert(cid_id < colls.size()); |
746 | return colls[cid_id]; | |
747 | } | |
748 | uint32_t get_fadvise_flags() const { | |
9f95a23c | 749 | return t->get_fadvise_flags(); |
11fdf7f2 TL |
750 | } |
751 | }; | |
9f95a23c | 752 | |
11fdf7f2 | 753 | iterator begin() { |
9f95a23c | 754 | return iterator(this); |
11fdf7f2 | 755 | } |
9f95a23c | 756 | |
11fdf7f2 TL |
757 | private: |
758 | void _build_actions_from_tbl(); | |
759 | ||
11fdf7f2 TL |
760 | /** |
761 | * Helper functions to encode the various mutation elements of a | |
762 | * transaction. These are 1:1 with the operation codes (see | |
763 | * enumeration above). These routines ensure that the | |
764 | * encoder/creator of a transaction gets the right data in the | |
765 | * right place. Sadly, there's no corresponding version nor any | |
766 | * form of seat belts for the decoder. | |
767 | */ | |
768 | Op* _get_next_op() { | |
769 | if (op_bl.get_append_buffer_unused_tail_length() < sizeof(Op)) { | |
770 | op_bl.reserve(sizeof(Op) * OPS_PER_PTR); | |
771 | } | |
772 | // append_hole ensures bptr merging. Even huge number of ops | |
773 | // shouldn't result in overpopulating bl::_buffers. | |
774 | char* const p = op_bl.append_hole(sizeof(Op)).c_str(); | |
775 | memset(p, 0, sizeof(Op)); | |
776 | return reinterpret_cast<Op*>(p); | |
777 | } | |
9f95a23c TL |
778 | uint32_t _get_coll_id(const coll_t& coll) { |
779 | std::map<coll_t, uint32_t>::iterator c = coll_index.find(coll); | |
11fdf7f2 TL |
780 | if (c != coll_index.end()) |
781 | return c->second; | |
9f95a23c TL |
782 | |
783 | uint32_t index_id = coll_id++; | |
11fdf7f2 TL |
784 | coll_index[coll] = index_id; |
785 | return index_id; | |
786 | } | |
9f95a23c TL |
787 | uint32_t _get_object_id(const ghobject_t& oid) { |
788 | std::map<ghobject_t, uint32_t>::iterator o = object_index.find(oid); | |
11fdf7f2 TL |
789 | if (o != object_index.end()) |
790 | return o->second; | |
9f95a23c TL |
791 | |
792 | uint32_t index_id = object_id++; | |
11fdf7f2 TL |
793 | object_index[oid] = index_id; |
794 | return index_id; | |
795 | } | |
9f95a23c | 796 | |
11fdf7f2 TL |
797 | public: |
798 | /// noop. 'nuf said | |
799 | void nop() { | |
800 | Op* _op = _get_next_op(); | |
801 | _op->op = OP_NOP; | |
eafe8130 | 802 | data.ops = data.ops + 1; |
11fdf7f2 | 803 | } |
9f95a23c TL |
804 | /** |
805 | * create | |
806 | * | |
807 | * create an object that does not yet exist | |
808 | * (behavior is undefined if the object already exists) | |
809 | */ | |
810 | void create(const coll_t& cid, const ghobject_t& oid) { | |
811 | Op* _op = _get_next_op(); | |
812 | _op->op = OP_CREATE; | |
813 | _op->cid = _get_coll_id(cid); | |
814 | _op->oid = _get_object_id(oid); | |
815 | data.ops = data.ops + 1; | |
816 | } | |
11fdf7f2 TL |
817 | /** |
818 | * touch | |
819 | * | |
820 | * Ensure the existance of an object in a collection. Create an | |
821 | * empty object if necessary | |
822 | */ | |
823 | void touch(const coll_t& cid, const ghobject_t& oid) { | |
824 | Op* _op = _get_next_op(); | |
825 | _op->op = OP_TOUCH; | |
826 | _op->cid = _get_coll_id(cid); | |
827 | _op->oid = _get_object_id(oid); | |
eafe8130 | 828 | data.ops = data.ops + 1; |
11fdf7f2 TL |
829 | } |
830 | /** | |
831 | * Write data to an offset within an object. If the object is too | |
832 | * small, it is expanded as needed. It is possible to specify an | |
833 | * offset beyond the current end of an object and it will be | |
834 | * expanded as needed. Simple implementations of ObjectStore will | |
835 | * just zero the data between the old end of the object and the | |
836 | * newly provided data. More sophisticated implementations of | |
837 | * ObjectStore will omit the untouched data and store it as a | |
838 | * "hole" in the file. | |
839 | * | |
840 | * Note that a 0-length write does not affect the size of the object. | |
841 | */ | |
842 | void write(const coll_t& cid, const ghobject_t& oid, uint64_t off, uint64_t len, | |
9f95a23c | 843 | const ceph::buffer::list& write_data, uint32_t flags = 0) { |
11fdf7f2 TL |
844 | using ceph::encode; |
845 | uint32_t orig_len = data_bl.length(); | |
846 | Op* _op = _get_next_op(); | |
847 | _op->op = OP_WRITE; | |
848 | _op->cid = _get_coll_id(cid); | |
849 | _op->oid = _get_object_id(oid); | |
850 | _op->off = off; | |
851 | _op->len = len; | |
852 | encode(write_data, data_bl); | |
9f95a23c | 853 | |
11fdf7f2 TL |
854 | ceph_assert(len == write_data.length()); |
855 | data.fadvise_flags = data.fadvise_flags | flags; | |
856 | if (write_data.length() > data.largest_data_len) { | |
9f95a23c TL |
857 | data.largest_data_len = write_data.length(); |
858 | data.largest_data_off = off; | |
859 | data.largest_data_off_in_data_bl = orig_len + sizeof(__u32); // we are about to | |
11fdf7f2 | 860 | } |
eafe8130 | 861 | data.ops = data.ops + 1; |
11fdf7f2 TL |
862 | } |
863 | /** | |
864 | * zero out the indicated byte range within an object. Some | |
865 | * ObjectStore instances may optimize this to release the | |
866 | * underlying storage space. | |
867 | * | |
868 | * If the zero range extends beyond the end of the object, the object | |
869 | * size is extended, just as if we were writing a buffer full of zeros. | |
870 | * EXCEPT if the length is 0, in which case (just like a 0-length write) | |
871 | * we do not adjust the object size. | |
872 | */ | |
873 | void zero(const coll_t& cid, const ghobject_t& oid, uint64_t off, uint64_t len) { | |
874 | Op* _op = _get_next_op(); | |
875 | _op->op = OP_ZERO; | |
876 | _op->cid = _get_coll_id(cid); | |
877 | _op->oid = _get_object_id(oid); | |
878 | _op->off = off; | |
879 | _op->len = len; | |
eafe8130 | 880 | data.ops = data.ops + 1; |
11fdf7f2 TL |
881 | } |
882 | /// Discard all data in the object beyond the specified size. | |
883 | void truncate(const coll_t& cid, const ghobject_t& oid, uint64_t off) { | |
884 | Op* _op = _get_next_op(); | |
885 | _op->op = OP_TRUNCATE; | |
886 | _op->cid = _get_coll_id(cid); | |
887 | _op->oid = _get_object_id(oid); | |
888 | _op->off = off; | |
eafe8130 | 889 | data.ops = data.ops + 1; |
11fdf7f2 TL |
890 | } |
891 | /// Remove an object. All four parts of the object are removed. | |
892 | void remove(const coll_t& cid, const ghobject_t& oid) { | |
893 | Op* _op = _get_next_op(); | |
894 | _op->op = OP_REMOVE; | |
895 | _op->cid = _get_coll_id(cid); | |
896 | _op->oid = _get_object_id(oid); | |
eafe8130 | 897 | data.ops = data.ops + 1; |
11fdf7f2 TL |
898 | } |
899 | /// Set an xattr of an object | |
9f95a23c TL |
900 | void setattr(const coll_t& cid, const ghobject_t& oid, const char* name, ceph::buffer::list& val) { |
901 | std::string n(name); | |
11fdf7f2 TL |
902 | setattr(cid, oid, n, val); |
903 | } | |
904 | /// Set an xattr of an object | |
9f95a23c | 905 | void setattr(const coll_t& cid, const ghobject_t& oid, const std::string& s, ceph::buffer::list& val) { |
11fdf7f2 TL |
906 | using ceph::encode; |
907 | Op* _op = _get_next_op(); | |
908 | _op->op = OP_SETATTR; | |
909 | _op->cid = _get_coll_id(cid); | |
910 | _op->oid = _get_object_id(oid); | |
911 | encode(s, data_bl); | |
912 | encode(val, data_bl); | |
eafe8130 | 913 | data.ops = data.ops + 1; |
11fdf7f2 TL |
914 | } |
915 | /// Set multiple xattrs of an object | |
9f95a23c | 916 | void setattrs(const coll_t& cid, const ghobject_t& oid, const std::map<std::string,ceph::buffer::ptr>& attrset) { |
11fdf7f2 TL |
917 | using ceph::encode; |
918 | Op* _op = _get_next_op(); | |
919 | _op->op = OP_SETATTRS; | |
920 | _op->cid = _get_coll_id(cid); | |
921 | _op->oid = _get_object_id(oid); | |
922 | encode(attrset, data_bl); | |
eafe8130 | 923 | data.ops = data.ops + 1; |
11fdf7f2 TL |
924 | } |
925 | /// Set multiple xattrs of an object | |
9f95a23c | 926 | void setattrs(const coll_t& cid, const ghobject_t& oid, const std::map<std::string,ceph::buffer::list>& attrset) { |
11fdf7f2 TL |
927 | using ceph::encode; |
928 | Op* _op = _get_next_op(); | |
929 | _op->op = OP_SETATTRS; | |
930 | _op->cid = _get_coll_id(cid); | |
931 | _op->oid = _get_object_id(oid); | |
932 | encode(attrset, data_bl); | |
eafe8130 | 933 | data.ops = data.ops + 1; |
11fdf7f2 TL |
934 | } |
935 | /// remove an xattr from an object | |
936 | void rmattr(const coll_t& cid, const ghobject_t& oid, const char *name) { | |
9f95a23c | 937 | std::string n(name); |
11fdf7f2 TL |
938 | rmattr(cid, oid, n); |
939 | } | |
940 | /// remove an xattr from an object | |
9f95a23c | 941 | void rmattr(const coll_t& cid, const ghobject_t& oid, const std::string& s) { |
11fdf7f2 TL |
942 | using ceph::encode; |
943 | Op* _op = _get_next_op(); | |
944 | _op->op = OP_RMATTR; | |
945 | _op->cid = _get_coll_id(cid); | |
946 | _op->oid = _get_object_id(oid); | |
947 | encode(s, data_bl); | |
eafe8130 | 948 | data.ops = data.ops + 1; |
11fdf7f2 TL |
949 | } |
950 | /// remove all xattrs from an object | |
951 | void rmattrs(const coll_t& cid, const ghobject_t& oid) { | |
952 | Op* _op = _get_next_op(); | |
953 | _op->op = OP_RMATTRS; | |
954 | _op->cid = _get_coll_id(cid); | |
955 | _op->oid = _get_object_id(oid); | |
eafe8130 | 956 | data.ops = data.ops + 1; |
11fdf7f2 TL |
957 | } |
958 | /** | |
959 | * Clone an object into another object. | |
960 | * | |
961 | * Low-cost (e.g., O(1)) cloning (if supported) is best, but | |
962 | * fallback to an O(n) copy is allowed. All four parts of the | |
963 | * object are cloned (data, xattrs, omap header, omap | |
964 | * entries). | |
965 | * | |
966 | * The destination named object may already exist, in | |
967 | * which case its previous contents are discarded. | |
968 | */ | |
969 | void clone(const coll_t& cid, const ghobject_t& oid, | |
9f95a23c | 970 | const ghobject_t& noid) { |
11fdf7f2 TL |
971 | Op* _op = _get_next_op(); |
972 | _op->op = OP_CLONE; | |
973 | _op->cid = _get_coll_id(cid); | |
974 | _op->oid = _get_object_id(oid); | |
975 | _op->dest_oid = _get_object_id(noid); | |
eafe8130 | 976 | data.ops = data.ops + 1; |
11fdf7f2 TL |
977 | } |
978 | /** | |
979 | * Clone a byte range from one object to another. | |
980 | * | |
981 | * The data portion of the destination object receives a copy of a | |
982 | * portion of the data from the source object. None of the other | |
983 | * three parts of an object is copied from the source. | |
984 | * | |
985 | * The destination object size may be extended to the dstoff + len. | |
986 | * | |
987 | * The source range *must* overlap with the source object data. If it does | |
988 | * not the result is undefined. | |
989 | */ | |
990 | void clone_range(const coll_t& cid, const ghobject_t& oid, | |
9f95a23c TL |
991 | const ghobject_t& noid, |
992 | uint64_t srcoff, uint64_t srclen, uint64_t dstoff) { | |
11fdf7f2 TL |
993 | Op* _op = _get_next_op(); |
994 | _op->op = OP_CLONERANGE2; | |
995 | _op->cid = _get_coll_id(cid); | |
996 | _op->oid = _get_object_id(oid); | |
997 | _op->dest_oid = _get_object_id(noid); | |
998 | _op->off = srcoff; | |
999 | _op->len = srclen; | |
1000 | _op->dest_off = dstoff; | |
eafe8130 | 1001 | data.ops = data.ops + 1; |
11fdf7f2 | 1002 | } |
9f95a23c | 1003 | |
11fdf7f2 TL |
1004 | /// Create the collection |
1005 | void create_collection(const coll_t& cid, int bits) { | |
1006 | Op* _op = _get_next_op(); | |
1007 | _op->op = OP_MKCOLL; | |
1008 | _op->cid = _get_coll_id(cid); | |
1009 | _op->split_bits = bits; | |
eafe8130 | 1010 | data.ops = data.ops + 1; |
11fdf7f2 | 1011 | } |
9f95a23c | 1012 | |
11fdf7f2 TL |
1013 | /** |
1014 | * Give the collection a hint. | |
1015 | * | |
1016 | * @param cid - collection id. | |
1017 | * @param type - hint type. | |
1018 | * @param hint - the hint payload, which contains the customized | |
1019 | * data along with the hint type. | |
1020 | */ | |
9f95a23c | 1021 | void collection_hint(const coll_t& cid, uint32_t type, const ceph::buffer::list& hint) { |
11fdf7f2 TL |
1022 | using ceph::encode; |
1023 | Op* _op = _get_next_op(); | |
1024 | _op->op = OP_COLL_HINT; | |
1025 | _op->cid = _get_coll_id(cid); | |
1026 | _op->hint_type = type; | |
1027 | encode(hint, data_bl); | |
eafe8130 | 1028 | data.ops = data.ops + 1; |
11fdf7f2 | 1029 | } |
9f95a23c | 1030 | |
11fdf7f2 TL |
1031 | /// remove the collection, the collection must be empty |
1032 | void remove_collection(const coll_t& cid) { | |
1033 | Op* _op = _get_next_op(); | |
1034 | _op->op = OP_RMCOLL; | |
1035 | _op->cid = _get_coll_id(cid); | |
eafe8130 | 1036 | data.ops = data.ops + 1; |
11fdf7f2 | 1037 | } |
9f95a23c | 1038 | void collection_move(const coll_t& cid, const coll_t &oldcid, const ghobject_t& oid) |
11fdf7f2 | 1039 | __attribute__ ((deprecated)) { |
9f95a23c TL |
1040 | // NOTE: we encode this as a fixed combo of ADD + REMOVE. they |
1041 | // always appear together, so this is effectively a single MOVE. | |
1042 | Op* _op = _get_next_op(); | |
1043 | _op->op = OP_COLL_ADD; | |
1044 | _op->cid = _get_coll_id(oldcid); | |
1045 | _op->oid = _get_object_id(oid); | |
1046 | _op->dest_cid = _get_coll_id(cid); | |
1047 | data.ops = data.ops + 1; | |
1048 | ||
1049 | _op = _get_next_op(); | |
1050 | _op->op = OP_COLL_REMOVE; | |
1051 | _op->cid = _get_coll_id(oldcid); | |
1052 | _op->oid = _get_object_id(oid); | |
1053 | data.ops = data.ops + 1; | |
1054 | } | |
11fdf7f2 | 1055 | void collection_move_rename(const coll_t& oldcid, const ghobject_t& oldoid, |
9f95a23c | 1056 | const coll_t &cid, const ghobject_t& oid) { |
11fdf7f2 TL |
1057 | Op* _op = _get_next_op(); |
1058 | _op->op = OP_COLL_MOVE_RENAME; | |
1059 | _op->cid = _get_coll_id(oldcid); | |
1060 | _op->oid = _get_object_id(oldoid); | |
1061 | _op->dest_cid = _get_coll_id(cid); | |
1062 | _op->dest_oid = _get_object_id(oid); | |
eafe8130 | 1063 | data.ops = data.ops + 1; |
11fdf7f2 TL |
1064 | } |
1065 | void try_rename(const coll_t &cid, const ghobject_t& oldoid, | |
9f95a23c | 1066 | const ghobject_t& oid) { |
11fdf7f2 TL |
1067 | Op* _op = _get_next_op(); |
1068 | _op->op = OP_TRY_RENAME; | |
1069 | _op->cid = _get_coll_id(cid); | |
1070 | _op->oid = _get_object_id(oldoid); | |
1071 | _op->dest_oid = _get_object_id(oid); | |
eafe8130 | 1072 | data.ops = data.ops + 1; |
11fdf7f2 | 1073 | } |
9f95a23c | 1074 | |
11fdf7f2 TL |
1075 | /// Remove omap from oid |
1076 | void omap_clear( | |
1077 | const coll_t &cid, ///< [in] Collection containing oid | |
1078 | const ghobject_t &oid ///< [in] Object from which to remove omap | |
1079 | ) { | |
1080 | Op* _op = _get_next_op(); | |
1081 | _op->op = OP_OMAP_CLEAR; | |
1082 | _op->cid = _get_coll_id(cid); | |
1083 | _op->oid = _get_object_id(oid); | |
eafe8130 | 1084 | data.ops = data.ops + 1; |
11fdf7f2 TL |
1085 | } |
1086 | /// Set keys on oid omap. Replaces duplicate keys. | |
1087 | void omap_setkeys( | |
1088 | const coll_t& cid, ///< [in] Collection containing oid | |
1089 | const ghobject_t &oid, ///< [in] Object to update | |
9f95a23c | 1090 | const std::map<std::string, ceph::buffer::list> &attrset ///< [in] Replacement keys and values |
11fdf7f2 TL |
1091 | ) { |
1092 | using ceph::encode; | |
1093 | Op* _op = _get_next_op(); | |
1094 | _op->op = OP_OMAP_SETKEYS; | |
1095 | _op->cid = _get_coll_id(cid); | |
1096 | _op->oid = _get_object_id(oid); | |
1097 | encode(attrset, data_bl); | |
eafe8130 | 1098 | data.ops = data.ops + 1; |
11fdf7f2 TL |
1099 | } |
1100 | ||
9f95a23c | 1101 | /// Set keys on an oid omap (ceph::buffer::list variant). |
11fdf7f2 TL |
1102 | void omap_setkeys( |
1103 | const coll_t &cid, ///< [in] Collection containing oid | |
1104 | const ghobject_t &oid, ///< [in] Object to update | |
9f95a23c TL |
1105 | const ceph::buffer::list &attrset_bl ///< [in] Replacement keys and values |
1106 | ) { | |
11fdf7f2 TL |
1107 | Op* _op = _get_next_op(); |
1108 | _op->op = OP_OMAP_SETKEYS; | |
1109 | _op->cid = _get_coll_id(cid); | |
1110 | _op->oid = _get_object_id(oid); | |
1111 | data_bl.append(attrset_bl); | |
eafe8130 | 1112 | data.ops = data.ops + 1; |
11fdf7f2 TL |
1113 | } |
1114 | ||
1115 | /// Remove keys from oid omap | |
1116 | void omap_rmkeys( | |
1117 | const coll_t &cid, ///< [in] Collection containing oid | |
1118 | const ghobject_t &oid, ///< [in] Object from which to remove the omap | |
9f95a23c | 1119 | const std::set<std::string> &keys ///< [in] Keys to clear |
11fdf7f2 TL |
1120 | ) { |
1121 | using ceph::encode; | |
1122 | Op* _op = _get_next_op(); | |
1123 | _op->op = OP_OMAP_RMKEYS; | |
1124 | _op->cid = _get_coll_id(cid); | |
1125 | _op->oid = _get_object_id(oid); | |
1126 | encode(keys, data_bl); | |
eafe8130 | 1127 | data.ops = data.ops + 1; |
11fdf7f2 TL |
1128 | } |
1129 | ||
9f95a23c TL |
1130 | /// Remove key from oid omap |
1131 | void omap_rmkey( | |
1132 | const coll_t &cid, ///< [in] Collection containing oid | |
1133 | const ghobject_t &oid, ///< [in] Object from which to remove the omap | |
1134 | const std::string& key ///< [in] Keys to clear | |
1135 | ) { | |
1136 | Op* _op = _get_next_op(); | |
1137 | _op->op = OP_OMAP_RMKEYS; | |
1138 | _op->cid = _get_coll_id(cid); | |
1139 | _op->oid = _get_object_id(oid); | |
1140 | using ceph::encode; | |
1141 | encode((uint32_t)1, data_bl); | |
1142 | encode(key, data_bl); | |
1143 | data.ops = data.ops + 1; | |
1144 | } | |
1145 | ||
11fdf7f2 TL |
1146 | /// Remove keys from oid omap |
1147 | void omap_rmkeys( | |
1148 | const coll_t &cid, ///< [in] Collection containing oid | |
1149 | const ghobject_t &oid, ///< [in] Object from which to remove the omap | |
9f95a23c | 1150 | const ceph::buffer::list &keys_bl ///< [in] Keys to clear |
11fdf7f2 TL |
1151 | ) { |
1152 | Op* _op = _get_next_op(); | |
1153 | _op->op = OP_OMAP_RMKEYS; | |
1154 | _op->cid = _get_coll_id(cid); | |
1155 | _op->oid = _get_object_id(oid); | |
1156 | data_bl.append(keys_bl); | |
eafe8130 | 1157 | data.ops = data.ops + 1; |
11fdf7f2 | 1158 | } |
9f95a23c | 1159 | |
11fdf7f2 TL |
1160 | /// Remove key range from oid omap |
1161 | void omap_rmkeyrange( | |
1162 | const coll_t &cid, ///< [in] Collection containing oid | |
1163 | const ghobject_t &oid, ///< [in] Object from which to remove the omap keys | |
9f95a23c TL |
1164 | const std::string& first, ///< [in] first key in range |
1165 | const std::string& last ///< [in] first key past range, range is [first,last) | |
11fdf7f2 TL |
1166 | ) { |
1167 | using ceph::encode; | |
1168 | Op* _op = _get_next_op(); | |
1169 | _op->op = OP_OMAP_RMKEYRANGE; | |
1170 | _op->cid = _get_coll_id(cid); | |
1171 | _op->oid = _get_object_id(oid); | |
1172 | encode(first, data_bl); | |
1173 | encode(last, data_bl); | |
eafe8130 | 1174 | data.ops = data.ops + 1; |
11fdf7f2 TL |
1175 | } |
1176 | ||
9f95a23c TL |
1177 | /// Remove key range from oid omap |
1178 | void omap_rmkeyrange( | |
1179 | const coll_t cid, ///< [in] Collection containing oid | |
1180 | const ghobject_t &oid, ///< [in] Object from which to remove the omap keys | |
1181 | const bufferlist &keys_bl ///< [in] range of keys to clear | |
1182 | ) { | |
1183 | Op* _op = _get_next_op(); | |
1184 | _op->op = OP_OMAP_RMKEYRANGE; | |
1185 | _op->cid = _get_coll_id(cid); | |
1186 | _op->oid = _get_object_id(oid); | |
1187 | data_bl.append(keys_bl); | |
1188 | data.ops = data.ops + 1; | |
1189 | } | |
1190 | ||
11fdf7f2 TL |
1191 | /// Set omap header |
1192 | void omap_setheader( | |
1193 | const coll_t &cid, ///< [in] Collection containing oid | |
1194 | const ghobject_t &oid, ///< [in] Object | |
9f95a23c | 1195 | const ceph::buffer::list &bl ///< [in] Header value |
11fdf7f2 TL |
1196 | ) { |
1197 | using ceph::encode; | |
1198 | Op* _op = _get_next_op(); | |
1199 | _op->op = OP_OMAP_SETHEADER; | |
1200 | _op->cid = _get_coll_id(cid); | |
1201 | _op->oid = _get_object_id(oid); | |
1202 | encode(bl, data_bl); | |
eafe8130 | 1203 | data.ops = data.ops + 1; |
11fdf7f2 TL |
1204 | } |
1205 | ||
9f95a23c TL |
1206 | /// Split collection based on given prefixes, objects matching the specified bits/rem are |
1207 | /// moved to the new collection | |
11fdf7f2 TL |
1208 | void split_collection( |
1209 | const coll_t &cid, | |
1210 | uint32_t bits, | |
1211 | uint32_t rem, | |
1212 | const coll_t &destination) { | |
1213 | Op* _op = _get_next_op(); | |
1214 | _op->op = OP_SPLIT_COLLECTION2; | |
1215 | _op->cid = _get_coll_id(cid); | |
1216 | _op->dest_cid = _get_coll_id(destination); | |
1217 | _op->split_bits = bits; | |
1218 | _op->split_rem = rem; | |
eafe8130 | 1219 | data.ops = data.ops + 1; |
11fdf7f2 TL |
1220 | } |
1221 | ||
1222 | /// Merge collection into another. | |
1223 | void merge_collection( | |
1224 | coll_t cid, | |
1225 | coll_t destination, | |
1226 | uint32_t bits) { | |
1227 | Op* _op = _get_next_op(); | |
1228 | _op->op = OP_MERGE_COLLECTION; | |
1229 | _op->cid = _get_coll_id(cid); | |
1230 | _op->dest_cid = _get_coll_id(destination); | |
1231 | _op->split_bits = bits; | |
eafe8130 | 1232 | data.ops = data.ops + 1; |
11fdf7f2 TL |
1233 | } |
1234 | ||
1235 | void collection_set_bits( | |
1236 | const coll_t &cid, | |
1237 | int bits) { | |
1238 | Op* _op = _get_next_op(); | |
1239 | _op->op = OP_COLL_SET_BITS; | |
1240 | _op->cid = _get_coll_id(cid); | |
1241 | _op->split_bits = bits; | |
eafe8130 | 1242 | data.ops = data.ops + 1; |
11fdf7f2 TL |
1243 | } |
1244 | ||
1245 | /// Set allocation hint for an object | |
1246 | /// make 0 values(expected_object_size, expected_write_size) noops for all implementations | |
1247 | void set_alloc_hint( | |
1248 | const coll_t &cid, | |
1249 | const ghobject_t &oid, | |
1250 | uint64_t expected_object_size, | |
1251 | uint64_t expected_write_size, | |
1252 | uint32_t flags | |
1253 | ) { | |
1254 | Op* _op = _get_next_op(); | |
1255 | _op->op = OP_SETALLOCHINT; | |
1256 | _op->cid = _get_coll_id(cid); | |
1257 | _op->oid = _get_object_id(oid); | |
1258 | _op->expected_object_size = expected_object_size; | |
1259 | _op->expected_write_size = expected_write_size; | |
1260 | _op->alloc_hint_flags = flags; | |
eafe8130 | 1261 | data.ops = data.ops + 1; |
11fdf7f2 TL |
1262 | } |
1263 | ||
9f95a23c | 1264 | void encode(ceph::buffer::list& bl) const { |
11fdf7f2 TL |
1265 | //layout: data_bl + op_bl + coll_index + object_index + data |
1266 | ENCODE_START(9, 9, bl); | |
1267 | encode(data_bl, bl); | |
1268 | encode(op_bl, bl); | |
1269 | encode(coll_index, bl); | |
1270 | encode(object_index, bl); | |
1271 | data.encode(bl); | |
1272 | ENCODE_FINISH(bl); | |
1273 | } | |
1274 | ||
9f95a23c | 1275 | void decode(ceph::buffer::list::const_iterator &bl) { |
11fdf7f2 TL |
1276 | DECODE_START(9, bl); |
1277 | DECODE_OLDEST(9); | |
1278 | ||
1279 | decode(data_bl, bl); | |
1280 | decode(op_bl, bl); | |
1281 | decode(coll_index, bl); | |
1282 | decode(object_index, bl); | |
1283 | data.decode(bl); | |
1284 | coll_id = coll_index.size(); | |
1285 | object_id = object_index.size(); | |
1286 | ||
1287 | DECODE_FINISH(bl); | |
1288 | } | |
1289 | ||
1290 | void dump(ceph::Formatter *f); | |
9f95a23c | 1291 | static void generate_test_instances(std::list<Transaction*>& o); |
11fdf7f2 | 1292 | }; |
9f95a23c TL |
1293 | WRITE_CLASS_ENCODER(Transaction) |
1294 | WRITE_CLASS_ENCODER(Transaction::TransactionData) | |
1295 | ||
1296 | std::ostream& operator<<(std::ostream& out, const Transaction& tx); | |
1297 | ||
11fdf7f2 | 1298 | } |