]> git.proxmox.com Git - ceph.git/blame - ceph/src/messages/MOSDOp.h
Import ceph 15.2.8
[ceph.git] / ceph / src / messages / MOSDOp.h
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15
16#ifndef CEPH_MOSDOP_H
17#define CEPH_MOSDOP_H
18
9f95a23c
TL
19#include <atomic>
20
7c673cae
FG
21#include "MOSDFastDispatchOp.h"
22#include "include/ceph_features.h"
23#include "common/hobject.h"
7c673cae
FG
24
25/*
26 * OSD op
27 *
28 * oid - object id
29 * op - OSD_OP_DELETE, etc.
30 *
31 */
32
33class OSD;
34
9f95a23c 35class MOSDOp : public MOSDFastDispatchOp {
11fdf7f2
TL
36private:
37 static constexpr int HEAD_VERSION = 8;
38 static constexpr int COMPAT_VERSION = 3;
7c673cae
FG
39
40private:
d2e6a577
FG
41 uint32_t client_inc = 0;
42 __u32 osdmap_epoch = 0;
43 __u32 flags = 0;
7c673cae 44 utime_t mtime;
d2e6a577 45 int32_t retry_attempt = -1; // 0 is first attempt. -1 if we don't know.
7c673cae
FG
46
47 hobject_t hobj;
48 spg_t pgid;
9f95a23c 49 ceph::buffer::list::const_iterator p;
11fdf7f2 50 // Decoding flags. Decoding is only needed for messages caught by pipe reader.
7c673cae
FG
51 // Transition from true -> false without locks being held
52 // Can never see final_decode_needed == false and partial_decode_needed == true
9f95a23c
TL
53 std::atomic<bool> partial_decode_needed;
54 std::atomic<bool> final_decode_needed;
7c673cae
FG
55 //
56public:
9f95a23c 57 std::vector<OSDOp> ops;
7c673cae
FG
58private:
59 snapid_t snap_seq;
9f95a23c 60 std::vector<snapid_t> snaps;
7c673cae
FG
61
62 uint64_t features;
11fdf7f2 63 bool bdata_encode;
7c673cae
FG
64 osd_reqid_t reqid; // reqid explicitly set by sender
65
66public:
67 friend class MOSDOpReply;
68
69 ceph_tid_t get_client_tid() { return header.tid; }
70 void set_snapid(const snapid_t& s) {
71 hobj.snap = s;
72 }
9f95a23c 73 void set_snaps(const std::vector<snapid_t>& i) {
7c673cae
FG
74 snaps = i;
75 }
76 void set_snap_seq(const snapid_t& s) { snap_seq = s; }
77 void set_reqid(const osd_reqid_t rid) {
78 reqid = rid;
79 }
80 void set_spg(spg_t p) {
81 pgid = p;
82 }
83
84 // Fields decoded in partial decoding
85 pg_t get_pg() const {
11fdf7f2 86 ceph_assert(!partial_decode_needed);
7c673cae
FG
87 return pgid.pgid;
88 }
89 spg_t get_spg() const override {
11fdf7f2 90 ceph_assert(!partial_decode_needed);
7c673cae
FG
91 return pgid;
92 }
93 pg_t get_raw_pg() const {
11fdf7f2 94 ceph_assert(!partial_decode_needed);
7c673cae
FG
95 return pg_t(hobj.get_hash(), pgid.pgid.pool());
96 }
97 epoch_t get_map_epoch() const override {
11fdf7f2 98 ceph_assert(!partial_decode_needed);
7c673cae
FG
99 return osdmap_epoch;
100 }
101 int get_flags() const {
11fdf7f2 102 ceph_assert(!partial_decode_needed);
7c673cae
FG
103 return flags;
104 }
105 osd_reqid_t get_reqid() const {
11fdf7f2 106 ceph_assert(!partial_decode_needed);
7c673cae
FG
107 if (reqid.name != entity_name_t() || reqid.tid != 0) {
108 return reqid;
109 } else {
110 if (!final_decode_needed)
11fdf7f2 111 ceph_assert(reqid.inc == (int32_t)client_inc); // decode() should have done this
7c673cae
FG
112 return osd_reqid_t(get_orig_source(),
113 reqid.inc,
114 header.tid);
115 }
116 }
117
118 // Fields decoded in final decoding
119 int get_client_inc() const {
11fdf7f2 120 ceph_assert(!final_decode_needed);
7c673cae
FG
121 return client_inc;
122 }
123 utime_t get_mtime() const {
11fdf7f2 124 ceph_assert(!final_decode_needed);
7c673cae
FG
125 return mtime;
126 }
127 object_locator_t get_object_locator() const {
11fdf7f2 128 ceph_assert(!final_decode_needed);
7c673cae
FG
129 if (hobj.oid.name.empty())
130 return object_locator_t(hobj.pool, hobj.nspace, hobj.get_hash());
131 else
132 return object_locator_t(hobj);
133 }
134 const object_t& get_oid() const {
11fdf7f2 135 ceph_assert(!final_decode_needed);
7c673cae
FG
136 return hobj.oid;
137 }
138 const hobject_t &get_hobj() const {
139 return hobj;
140 }
141 snapid_t get_snapid() const {
11fdf7f2 142 ceph_assert(!final_decode_needed);
7c673cae
FG
143 return hobj.snap;
144 }
145 const snapid_t& get_snap_seq() const {
11fdf7f2 146 ceph_assert(!final_decode_needed);
7c673cae
FG
147 return snap_seq;
148 }
9f95a23c 149 const std::vector<snapid_t> &get_snaps() const {
11fdf7f2 150 ceph_assert(!final_decode_needed);
7c673cae
FG
151 return snaps;
152 }
153
154 /**
155 * get retry attempt
156 *
157 * 0 is the first attempt.
158 *
159 * @return retry attempt, or -1 if we don't know
160 */
161 int get_retry_attempt() const {
162 return retry_attempt;
163 }
164 uint64_t get_features() const {
165 if (features)
166 return features;
167 return get_connection()->get_features();
168 }
169
170 MOSDOp()
9f95a23c 171 : MOSDFastDispatchOp(CEPH_MSG_OSD_OP, HEAD_VERSION, COMPAT_VERSION),
7c673cae 172 partial_decode_needed(true),
11fdf7f2
TL
173 final_decode_needed(true),
174 bdata_encode(false) { }
7c673cae
FG
175 MOSDOp(int inc, long tid, const hobject_t& ho, spg_t& _pgid,
176 epoch_t _osdmap_epoch,
177 int _flags, uint64_t feat)
9f95a23c 178 : MOSDFastDispatchOp(CEPH_MSG_OSD_OP, HEAD_VERSION, COMPAT_VERSION),
7c673cae
FG
179 client_inc(inc),
180 osdmap_epoch(_osdmap_epoch), flags(_flags), retry_attempt(-1),
181 hobj(ho),
182 pgid(_pgid),
183 partial_decode_needed(false),
184 final_decode_needed(false),
11fdf7f2
TL
185 features(feat),
186 bdata_encode(false) {
7c673cae
FG
187 set_tid(tid);
188
189 // also put the client_inc in reqid.inc, so that get_reqid() can
190 // be used before the full message is decoded.
191 reqid.inc = inc;
192 }
193private:
194 ~MOSDOp() override {}
195
196public:
197 void set_mtime(utime_t mt) { mtime = mt; }
198 void set_mtime(ceph::real_time mt) {
199 mtime = ceph::real_clock::to_timespec(mt);
200 }
201
202 // ops
203 void add_simple_op(int o, uint64_t off, uint64_t len) {
204 OSDOp osd_op;
205 osd_op.op.op = o;
206 osd_op.op.extent.offset = off;
207 osd_op.op.extent.length = len;
208 ops.push_back(osd_op);
209 }
9f95a23c 210 void write(uint64_t off, uint64_t len, ceph::buffer::list& bl) {
7c673cae
FG
211 add_simple_op(CEPH_OSD_OP_WRITE, off, len);
212 data.claim(bl);
213 header.data_off = off;
214 }
9f95a23c 215 void writefull(ceph::buffer::list& bl) {
7c673cae
FG
216 add_simple_op(CEPH_OSD_OP_WRITEFULL, 0, bl.length());
217 data.claim(bl);
218 header.data_off = 0;
219 }
220 void zero(uint64_t off, uint64_t len) {
221 add_simple_op(CEPH_OSD_OP_ZERO, off, len);
222 }
223 void truncate(uint64_t off) {
224 add_simple_op(CEPH_OSD_OP_TRUNCATE, off, 0);
225 }
226 void remove() {
227 add_simple_op(CEPH_OSD_OP_DELETE, 0, 0);
228 }
229
230 void read(uint64_t off, uint64_t len) {
231 add_simple_op(CEPH_OSD_OP_READ, off, len);
232 }
233 void stat() {
234 add_simple_op(CEPH_OSD_OP_STAT, 0, 0);
235 }
236
237 bool has_flag(__u32 flag) const { return flags & flag; };
238
239 bool is_retry_attempt() const { return flags & CEPH_OSD_FLAG_RETRY; }
240 void set_retry_attempt(unsigned a) {
241 if (a)
242 flags |= CEPH_OSD_FLAG_RETRY;
243 else
244 flags &= ~CEPH_OSD_FLAG_RETRY;
245 retry_attempt = a;
246 }
247
248 // marshalling
249 void encode_payload(uint64_t features) override {
11fdf7f2
TL
250 using ceph::encode;
251 if( false == bdata_encode ) {
252 OSDOp::merge_osd_op_vector_in_data(ops, data);
253 bdata_encode = true;
254 }
7c673cae
FG
255
256 if ((features & CEPH_FEATURE_OBJECTLOCATOR) == 0) {
257 // here is the old structure we are encoding to: //
258#if 0
259struct ceph_osd_request_head {
9f95a23c 260 ceph_le32 client_inc; /* client incarnation */
7c673cae 261 struct ceph_object_layout layout; /* pgid */
9f95a23c 262 ceph_le32 osdmap_epoch; /* client's osdmap epoch */
7c673cae 263
9f95a23c 264 ceph_le32 flags;
7c673cae
FG
265
266 struct ceph_timespec mtime; /* for mutations only */
267 struct ceph_eversion reassert_version; /* if we are replaying op */
268
9f95a23c 269 ceph_le32 object_len; /* length of object name */
7c673cae 270
9f95a23c
TL
271 ceph_le64 snapid; /* snapid to read */
272 ceph_le64 snap_seq; /* writer's snap context */
273 ceph_le32 num_snaps;
7c673cae 274
9f95a23c 275 ceph_le16 num_ops;
7c673cae
FG
276 struct ceph_osd_op ops[]; /* followed by ops[], obj, ticket, snaps */
277} __attribute__ ((packed));
278#endif
279 header.version = 1;
280
11fdf7f2 281 encode(client_inc, payload);
7c673cae
FG
282
283 __u32 su = 0;
11fdf7f2
TL
284 encode(get_raw_pg(), payload);
285 encode(su, payload);
7c673cae 286
11fdf7f2
TL
287 encode(osdmap_epoch, payload);
288 encode(flags, payload);
289 encode(mtime, payload);
290 encode(eversion_t(), payload); // reassert_version
7c673cae
FG
291
292 __u32 oid_len = hobj.oid.name.length();
11fdf7f2
TL
293 encode(oid_len, payload);
294 encode(hobj.snap, payload);
295 encode(snap_seq, payload);
7c673cae 296 __u32 num_snaps = snaps.size();
11fdf7f2 297 encode(num_snaps, payload);
7c673cae
FG
298
299 //::encode(ops, payload);
300 __u16 num_ops = ops.size();
11fdf7f2 301 encode(num_ops, payload);
7c673cae 302 for (unsigned i = 0; i < ops.size(); i++)
11fdf7f2 303 encode(ops[i].op, payload);
7c673cae 304
9f95a23c
TL
305 ceph::encode_nohead(hobj.oid.name, payload);
306 ceph::encode_nohead(snaps, payload);
7c673cae
FG
307 } else if ((features & CEPH_FEATURE_NEW_OSDOP_ENCODING) == 0) {
308 header.version = 6;
11fdf7f2
TL
309 encode(client_inc, payload);
310 encode(osdmap_epoch, payload);
311 encode(flags, payload);
312 encode(mtime, payload);
313 encode(eversion_t(), payload); // reassert_version
314 encode(get_object_locator(), payload);
315 encode(get_raw_pg(), payload);
7c673cae 316
11fdf7f2 317 encode(hobj.oid, payload);
7c673cae
FG
318
319 __u16 num_ops = ops.size();
11fdf7f2 320 encode(num_ops, payload);
7c673cae 321 for (unsigned i = 0; i < ops.size(); i++)
11fdf7f2 322 encode(ops[i].op, payload);
7c673cae 323
11fdf7f2
TL
324 encode(hobj.snap, payload);
325 encode(snap_seq, payload);
326 encode(snaps, payload);
7c673cae 327
11fdf7f2
TL
328 encode(retry_attempt, payload);
329 encode(features, payload);
7c673cae 330 if (reqid.name != entity_name_t() || reqid.tid != 0) {
11fdf7f2 331 encode(reqid, payload);
7c673cae
FG
332 } else {
333 // don't include client_inc in the reqid for the legacy v6
334 // encoding or else we'll confuse older peers.
11fdf7f2 335 encode(osd_reqid_t(), payload);
7c673cae
FG
336 }
337 } else if (!HAVE_FEATURE(features, RESEND_ON_SPLIT)) {
338 // reordered, v7 message encoding
339 header.version = 7;
11fdf7f2
TL
340 encode(get_raw_pg(), payload);
341 encode(osdmap_epoch, payload);
342 encode(flags, payload);
343 encode(eversion_t(), payload); // reassert_version
344 encode(reqid, payload);
345 encode(client_inc, payload);
346 encode(mtime, payload);
347 encode(get_object_locator(), payload);
348 encode(hobj.oid, payload);
7c673cae
FG
349
350 __u16 num_ops = ops.size();
11fdf7f2 351 encode(num_ops, payload);
7c673cae 352 for (unsigned i = 0; i < ops.size(); i++)
11fdf7f2 353 encode(ops[i].op, payload);
7c673cae 354
11fdf7f2
TL
355 encode(hobj.snap, payload);
356 encode(snap_seq, payload);
357 encode(snaps, payload);
7c673cae 358
11fdf7f2
TL
359 encode(retry_attempt, payload);
360 encode(features, payload);
7c673cae
FG
361 } else {
362 // latest v8 encoding with hobject_t hash separate from pgid, no
363 // reassert version
364 header.version = HEAD_VERSION;
11fdf7f2
TL
365
366 encode(pgid, payload);
367 encode(hobj.get_hash(), payload);
368 encode(osdmap_epoch, payload);
369 encode(flags, payload);
370 encode(reqid, payload);
7c673cae
FG
371 encode_trace(payload, features);
372
373 // -- above decoded up front; below decoded post-dispatch thread --
374
11fdf7f2
TL
375 encode(client_inc, payload);
376 encode(mtime, payload);
377 encode(get_object_locator(), payload);
378 encode(hobj.oid, payload);
7c673cae
FG
379
380 __u16 num_ops = ops.size();
11fdf7f2 381 encode(num_ops, payload);
7c673cae 382 for (unsigned i = 0; i < ops.size(); i++)
11fdf7f2 383 encode(ops[i].op, payload);
7c673cae 384
11fdf7f2
TL
385 encode(hobj.snap, payload);
386 encode(snap_seq, payload);
387 encode(snaps, payload);
7c673cae 388
11fdf7f2
TL
389 encode(retry_attempt, payload);
390 encode(features, payload);
7c673cae
FG
391 }
392 }
393
394 void decode_payload() override {
9f95a23c 395 using ceph::decode;
11fdf7f2
TL
396 ceph_assert(partial_decode_needed && final_decode_needed);
397 p = std::cbegin(payload);
7c673cae
FG
398
399 // Always keep here the newest version of decoding order/rule
400 if (header.version == HEAD_VERSION) {
11fdf7f2 401 decode(pgid, p); // actual pgid
7c673cae 402 uint32_t hash;
11fdf7f2 403 decode(hash, p); // raw hash value
7c673cae 404 hobj.set_hash(hash);
11fdf7f2
TL
405 decode(osdmap_epoch, p);
406 decode(flags, p);
407 decode(reqid, p);
7c673cae
FG
408 decode_trace(p);
409 } else if (header.version == 7) {
11fdf7f2 410 decode(pgid.pgid, p); // raw pgid
7c673cae 411 hobj.set_hash(pgid.pgid.ps());
11fdf7f2
TL
412 decode(osdmap_epoch, p);
413 decode(flags, p);
7c673cae 414 eversion_t reassert_version;
11fdf7f2
TL
415 decode(reassert_version, p);
416 decode(reqid, p);
7c673cae
FG
417 } else if (header.version < 2) {
418 // old decode
11fdf7f2 419 decode(client_inc, p);
7c673cae
FG
420
421 old_pg_t opgid;
9f95a23c 422 ceph::decode_raw(opgid, p);
7c673cae
FG
423 pgid.pgid = opgid;
424
425 __u32 su;
11fdf7f2 426 decode(su, p);
7c673cae 427
11fdf7f2
TL
428 decode(osdmap_epoch, p);
429 decode(flags, p);
430 decode(mtime, p);
7c673cae 431 eversion_t reassert_version;
11fdf7f2 432 decode(reassert_version, p);
7c673cae
FG
433
434 __u32 oid_len;
11fdf7f2
TL
435 decode(oid_len, p);
436 decode(hobj.snap, p);
437 decode(snap_seq, p);
7c673cae 438 __u32 num_snaps;
11fdf7f2 439 decode(num_snaps, p);
7c673cae
FG
440
441 //::decode(ops, p);
442 __u16 num_ops;
11fdf7f2 443 decode(num_ops, p);
7c673cae
FG
444 ops.resize(num_ops);
445 for (unsigned i = 0; i < num_ops; i++)
11fdf7f2 446 decode(ops[i].op, p);
7c673cae 447
9f95a23c
TL
448 ceph::decode_nohead(oid_len, hobj.oid.name, p);
449 ceph::decode_nohead(num_snaps, snaps, p);
7c673cae
FG
450
451 // recalculate pgid hash value
452 pgid.pgid.set_ps(ceph_str_hash(CEPH_STR_HASH_RJENKINS,
453 hobj.oid.name.c_str(),
454 hobj.oid.name.length()));
455 hobj.pool = pgid.pgid.pool();
456 hobj.set_hash(pgid.pgid.ps());
457
458 retry_attempt = -1;
459 features = 0;
460 OSDOp::split_osd_op_vector_in_data(ops, data);
461
462 // we did the full decode
463 final_decode_needed = false;
464
465 // put client_inc in reqid.inc for get_reqid()'s benefit
466 reqid = osd_reqid_t();
467 reqid.inc = client_inc;
468 } else if (header.version < 7) {
11fdf7f2
TL
469 decode(client_inc, p);
470 decode(osdmap_epoch, p);
471 decode(flags, p);
472 decode(mtime, p);
7c673cae 473 eversion_t reassert_version;
11fdf7f2 474 decode(reassert_version, p);
7c673cae
FG
475
476 object_locator_t oloc;
11fdf7f2 477 decode(oloc, p);
7c673cae
FG
478
479 if (header.version < 3) {
480 old_pg_t opgid;
9f95a23c 481 ceph::decode_raw(opgid, p);
7c673cae
FG
482 pgid.pgid = opgid;
483 } else {
11fdf7f2 484 decode(pgid.pgid, p);
7c673cae
FG
485 }
486
11fdf7f2 487 decode(hobj.oid, p);
7c673cae
FG
488
489 //::decode(ops, p);
490 __u16 num_ops;
11fdf7f2 491 decode(num_ops, p);
7c673cae
FG
492 ops.resize(num_ops);
493 for (unsigned i = 0; i < num_ops; i++)
11fdf7f2 494 decode(ops[i].op, p);
7c673cae 495
11fdf7f2
TL
496 decode(hobj.snap, p);
497 decode(snap_seq, p);
498 decode(snaps, p);
7c673cae
FG
499
500 if (header.version >= 4)
11fdf7f2 501 decode(retry_attempt, p);
7c673cae
FG
502 else
503 retry_attempt = -1;
504
505 if (header.version >= 5)
11fdf7f2 506 decode(features, p);
7c673cae
FG
507 else
508 features = 0;
509
510 if (header.version >= 6)
11fdf7f2 511 decode(reqid, p);
7c673cae
FG
512 else
513 reqid = osd_reqid_t();
514
515 hobj.pool = pgid.pgid.pool();
516 hobj.set_key(oloc.key);
517 hobj.nspace = oloc.nspace;
518 hobj.set_hash(pgid.pgid.ps());
519
520 OSDOp::split_osd_op_vector_in_data(ops, data);
521
522 // we did the full decode
523 final_decode_needed = false;
524
525 // put client_inc in reqid.inc for get_reqid()'s benefit
526 if (reqid.name == entity_name_t() && reqid.tid == 0)
527 reqid.inc = client_inc;
528 }
529
530 partial_decode_needed = false;
531 }
532
533 bool finish_decode() {
9f95a23c 534 using ceph::decode;
11fdf7f2 535 ceph_assert(!partial_decode_needed); // partial decoding required
7c673cae
FG
536 if (!final_decode_needed)
537 return false; // Message is already final decoded
11fdf7f2 538 ceph_assert(header.version >= 7);
7c673cae 539
11fdf7f2
TL
540 decode(client_inc, p);
541 decode(mtime, p);
7c673cae 542 object_locator_t oloc;
11fdf7f2
TL
543 decode(oloc, p);
544 decode(hobj.oid, p);
7c673cae
FG
545
546 __u16 num_ops;
11fdf7f2 547 decode(num_ops, p);
7c673cae
FG
548 ops.resize(num_ops);
549 for (unsigned i = 0; i < num_ops; i++)
11fdf7f2 550 decode(ops[i].op, p);
7c673cae 551
11fdf7f2
TL
552 decode(hobj.snap, p);
553 decode(snap_seq, p);
554 decode(snaps, p);
7c673cae 555
11fdf7f2 556 decode(retry_attempt, p);
7c673cae 557
11fdf7f2 558 decode(features, p);
7c673cae
FG
559
560 hobj.pool = pgid.pgid.pool();
561 hobj.set_key(oloc.key);
562 hobj.nspace = oloc.nspace;
563
564 OSDOp::split_osd_op_vector_in_data(ops, data);
565
566 final_decode_needed = false;
567 return true;
568 }
569
570 void clear_buffers() override {
224ce89b 571 OSDOp::clear_data(ops);
11fdf7f2 572 bdata_encode = false;
7c673cae
FG
573 }
574
11fdf7f2 575 std::string_view get_type_name() const override { return "osd_op"; }
9f95a23c 576 void print(std::ostream& out) const override {
7c673cae
FG
577 out << "osd_op(";
578 if (!partial_decode_needed) {
579 out << get_reqid() << ' ';
580 out << pgid;
581 if (!final_decode_needed) {
582 out << ' ';
583 out << hobj
584 << " " << ops
585 << " snapc " << get_snap_seq() << "=" << snaps;
586 if (is_retry_attempt())
587 out << " RETRY=" << get_retry_attempt();
588 } else {
589 out << " " << get_raw_pg() << " (undecoded)";
590 }
591 out << " " << ceph_osd_flag_string(get_flags());
592 out << " e" << osdmap_epoch;
593 }
594 out << ")";
595 }
9f95a23c
TL
596
597private:
598 template<class T, typename... Args>
599 friend boost::intrusive_ptr<T> ceph::make_message(Args&&... args);
7c673cae
FG
600};
601
602
603#endif