]> git.proxmox.com Git - ceph.git/blame - ceph/src/messages/MOSDOp.h
import quincy beta 17.1.0
[ceph.git] / ceph / src / messages / MOSDOp.h
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15
16#ifndef CEPH_MOSDOP_H
17#define CEPH_MOSDOP_H
18
9f95a23c
TL
19#include <atomic>
20
7c673cae
FG
21#include "MOSDFastDispatchOp.h"
22#include "include/ceph_features.h"
23#include "common/hobject.h"
7c673cae
FG
24
25/*
26 * OSD op
27 *
28 * oid - object id
29 * op - OSD_OP_DELETE, etc.
30 *
31 */
32
f67539c2 33class MOSDOpReply;
7c673cae 34
f67539c2
TL
35namespace _mosdop {
36template<typename V>
37class MOSDOp final : public MOSDFastDispatchOp {
11fdf7f2
TL
38private:
39 static constexpr int HEAD_VERSION = 8;
40 static constexpr int COMPAT_VERSION = 3;
7c673cae
FG
41
42private:
d2e6a577
FG
43 uint32_t client_inc = 0;
44 __u32 osdmap_epoch = 0;
45 __u32 flags = 0;
7c673cae 46 utime_t mtime;
d2e6a577 47 int32_t retry_attempt = -1; // 0 is first attempt. -1 if we don't know.
7c673cae
FG
48
49 hobject_t hobj;
50 spg_t pgid;
9f95a23c 51 ceph::buffer::list::const_iterator p;
11fdf7f2 52 // Decoding flags. Decoding is only needed for messages caught by pipe reader.
7c673cae
FG
53 // Transition from true -> false without locks being held
54 // Can never see final_decode_needed == false and partial_decode_needed == true
9f95a23c
TL
55 std::atomic<bool> partial_decode_needed;
56 std::atomic<bool> final_decode_needed;
7c673cae
FG
57 //
58public:
f67539c2 59 V ops;
7c673cae
FG
60private:
61 snapid_t snap_seq;
9f95a23c 62 std::vector<snapid_t> snaps;
7c673cae
FG
63
64 uint64_t features;
11fdf7f2 65 bool bdata_encode;
7c673cae
FG
66 osd_reqid_t reqid; // reqid explicitly set by sender
67
68public:
f67539c2 69 friend MOSDOpReply;
7c673cae
FG
70
71 ceph_tid_t get_client_tid() { return header.tid; }
72 void set_snapid(const snapid_t& s) {
73 hobj.snap = s;
74 }
9f95a23c 75 void set_snaps(const std::vector<snapid_t>& i) {
7c673cae
FG
76 snaps = i;
77 }
78 void set_snap_seq(const snapid_t& s) { snap_seq = s; }
79 void set_reqid(const osd_reqid_t rid) {
80 reqid = rid;
81 }
82 void set_spg(spg_t p) {
83 pgid = p;
84 }
85
86 // Fields decoded in partial decoding
87 pg_t get_pg() const {
11fdf7f2 88 ceph_assert(!partial_decode_needed);
7c673cae
FG
89 return pgid.pgid;
90 }
91 spg_t get_spg() const override {
11fdf7f2 92 ceph_assert(!partial_decode_needed);
7c673cae
FG
93 return pgid;
94 }
95 pg_t get_raw_pg() const {
11fdf7f2 96 ceph_assert(!partial_decode_needed);
7c673cae
FG
97 return pg_t(hobj.get_hash(), pgid.pgid.pool());
98 }
99 epoch_t get_map_epoch() const override {
11fdf7f2 100 ceph_assert(!partial_decode_needed);
7c673cae
FG
101 return osdmap_epoch;
102 }
103 int get_flags() const {
11fdf7f2 104 ceph_assert(!partial_decode_needed);
7c673cae
FG
105 return flags;
106 }
107 osd_reqid_t get_reqid() const {
11fdf7f2 108 ceph_assert(!partial_decode_needed);
7c673cae
FG
109 if (reqid.name != entity_name_t() || reqid.tid != 0) {
110 return reqid;
111 } else {
112 if (!final_decode_needed)
11fdf7f2 113 ceph_assert(reqid.inc == (int32_t)client_inc); // decode() should have done this
7c673cae
FG
114 return osd_reqid_t(get_orig_source(),
115 reqid.inc,
116 header.tid);
117 }
118 }
119
120 // Fields decoded in final decoding
121 int get_client_inc() const {
11fdf7f2 122 ceph_assert(!final_decode_needed);
7c673cae
FG
123 return client_inc;
124 }
125 utime_t get_mtime() const {
11fdf7f2 126 ceph_assert(!final_decode_needed);
7c673cae
FG
127 return mtime;
128 }
129 object_locator_t get_object_locator() const {
11fdf7f2 130 ceph_assert(!final_decode_needed);
7c673cae
FG
131 if (hobj.oid.name.empty())
132 return object_locator_t(hobj.pool, hobj.nspace, hobj.get_hash());
133 else
134 return object_locator_t(hobj);
135 }
136 const object_t& get_oid() const {
11fdf7f2 137 ceph_assert(!final_decode_needed);
7c673cae
FG
138 return hobj.oid;
139 }
140 const hobject_t &get_hobj() const {
141 return hobj;
142 }
143 snapid_t get_snapid() const {
11fdf7f2 144 ceph_assert(!final_decode_needed);
7c673cae
FG
145 return hobj.snap;
146 }
147 const snapid_t& get_snap_seq() const {
11fdf7f2 148 ceph_assert(!final_decode_needed);
7c673cae
FG
149 return snap_seq;
150 }
9f95a23c 151 const std::vector<snapid_t> &get_snaps() const {
11fdf7f2 152 ceph_assert(!final_decode_needed);
7c673cae
FG
153 return snaps;
154 }
155
156 /**
157 * get retry attempt
158 *
159 * 0 is the first attempt.
160 *
161 * @return retry attempt, or -1 if we don't know
162 */
163 int get_retry_attempt() const {
164 return retry_attempt;
165 }
166 uint64_t get_features() const {
167 if (features)
168 return features;
169 return get_connection()->get_features();
170 }
171
172 MOSDOp()
9f95a23c 173 : MOSDFastDispatchOp(CEPH_MSG_OSD_OP, HEAD_VERSION, COMPAT_VERSION),
7c673cae 174 partial_decode_needed(true),
11fdf7f2
TL
175 final_decode_needed(true),
176 bdata_encode(false) { }
7c673cae
FG
177 MOSDOp(int inc, long tid, const hobject_t& ho, spg_t& _pgid,
178 epoch_t _osdmap_epoch,
179 int _flags, uint64_t feat)
9f95a23c 180 : MOSDFastDispatchOp(CEPH_MSG_OSD_OP, HEAD_VERSION, COMPAT_VERSION),
7c673cae
FG
181 client_inc(inc),
182 osdmap_epoch(_osdmap_epoch), flags(_flags), retry_attempt(-1),
183 hobj(ho),
184 pgid(_pgid),
185 partial_decode_needed(false),
186 final_decode_needed(false),
11fdf7f2
TL
187 features(feat),
188 bdata_encode(false) {
7c673cae
FG
189 set_tid(tid);
190
191 // also put the client_inc in reqid.inc, so that get_reqid() can
192 // be used before the full message is decoded.
193 reqid.inc = inc;
194 }
195private:
f67539c2 196 ~MOSDOp() final {}
7c673cae
FG
197
198public:
199 void set_mtime(utime_t mt) { mtime = mt; }
200 void set_mtime(ceph::real_time mt) {
201 mtime = ceph::real_clock::to_timespec(mt);
202 }
203
204 // ops
205 void add_simple_op(int o, uint64_t off, uint64_t len) {
206 OSDOp osd_op;
207 osd_op.op.op = o;
208 osd_op.op.extent.offset = off;
209 osd_op.op.extent.length = len;
210 ops.push_back(osd_op);
211 }
9f95a23c 212 void write(uint64_t off, uint64_t len, ceph::buffer::list& bl) {
7c673cae 213 add_simple_op(CEPH_OSD_OP_WRITE, off, len);
f67539c2 214 data = std::move(bl);
7c673cae
FG
215 header.data_off = off;
216 }
9f95a23c 217 void writefull(ceph::buffer::list& bl) {
7c673cae 218 add_simple_op(CEPH_OSD_OP_WRITEFULL, 0, bl.length());
f67539c2 219 data = std::move(bl);
7c673cae
FG
220 header.data_off = 0;
221 }
222 void zero(uint64_t off, uint64_t len) {
223 add_simple_op(CEPH_OSD_OP_ZERO, off, len);
224 }
225 void truncate(uint64_t off) {
226 add_simple_op(CEPH_OSD_OP_TRUNCATE, off, 0);
227 }
228 void remove() {
229 add_simple_op(CEPH_OSD_OP_DELETE, 0, 0);
230 }
231
232 void read(uint64_t off, uint64_t len) {
233 add_simple_op(CEPH_OSD_OP_READ, off, len);
234 }
235 void stat() {
236 add_simple_op(CEPH_OSD_OP_STAT, 0, 0);
237 }
238
239 bool has_flag(__u32 flag) const { return flags & flag; };
240
241 bool is_retry_attempt() const { return flags & CEPH_OSD_FLAG_RETRY; }
242 void set_retry_attempt(unsigned a) {
243 if (a)
244 flags |= CEPH_OSD_FLAG_RETRY;
245 else
246 flags &= ~CEPH_OSD_FLAG_RETRY;
247 retry_attempt = a;
248 }
249
250 // marshalling
251 void encode_payload(uint64_t features) override {
11fdf7f2
TL
252 using ceph::encode;
253 if( false == bdata_encode ) {
254 OSDOp::merge_osd_op_vector_in_data(ops, data);
255 bdata_encode = true;
256 }
7c673cae
FG
257
258 if ((features & CEPH_FEATURE_OBJECTLOCATOR) == 0) {
259 // here is the old structure we are encoding to: //
260#if 0
261struct ceph_osd_request_head {
9f95a23c 262 ceph_le32 client_inc; /* client incarnation */
7c673cae 263 struct ceph_object_layout layout; /* pgid */
9f95a23c 264 ceph_le32 osdmap_epoch; /* client's osdmap epoch */
7c673cae 265
9f95a23c 266 ceph_le32 flags;
7c673cae
FG
267
268 struct ceph_timespec mtime; /* for mutations only */
269 struct ceph_eversion reassert_version; /* if we are replaying op */
270
9f95a23c 271 ceph_le32 object_len; /* length of object name */
7c673cae 272
9f95a23c
TL
273 ceph_le64 snapid; /* snapid to read */
274 ceph_le64 snap_seq; /* writer's snap context */
275 ceph_le32 num_snaps;
7c673cae 276
9f95a23c 277 ceph_le16 num_ops;
7c673cae
FG
278 struct ceph_osd_op ops[]; /* followed by ops[], obj, ticket, snaps */
279} __attribute__ ((packed));
280#endif
281 header.version = 1;
282
11fdf7f2 283 encode(client_inc, payload);
7c673cae
FG
284
285 __u32 su = 0;
11fdf7f2
TL
286 encode(get_raw_pg(), payload);
287 encode(su, payload);
7c673cae 288
11fdf7f2
TL
289 encode(osdmap_epoch, payload);
290 encode(flags, payload);
291 encode(mtime, payload);
292 encode(eversion_t(), payload); // reassert_version
7c673cae
FG
293
294 __u32 oid_len = hobj.oid.name.length();
11fdf7f2
TL
295 encode(oid_len, payload);
296 encode(hobj.snap, payload);
297 encode(snap_seq, payload);
7c673cae 298 __u32 num_snaps = snaps.size();
11fdf7f2 299 encode(num_snaps, payload);
7c673cae
FG
300
301 //::encode(ops, payload);
302 __u16 num_ops = ops.size();
11fdf7f2 303 encode(num_ops, payload);
7c673cae 304 for (unsigned i = 0; i < ops.size(); i++)
11fdf7f2 305 encode(ops[i].op, payload);
7c673cae 306
9f95a23c
TL
307 ceph::encode_nohead(hobj.oid.name, payload);
308 ceph::encode_nohead(snaps, payload);
7c673cae
FG
309 } else if ((features & CEPH_FEATURE_NEW_OSDOP_ENCODING) == 0) {
310 header.version = 6;
11fdf7f2
TL
311 encode(client_inc, payload);
312 encode(osdmap_epoch, payload);
313 encode(flags, payload);
314 encode(mtime, payload);
315 encode(eversion_t(), payload); // reassert_version
316 encode(get_object_locator(), payload);
317 encode(get_raw_pg(), payload);
7c673cae 318
11fdf7f2 319 encode(hobj.oid, payload);
7c673cae
FG
320
321 __u16 num_ops = ops.size();
11fdf7f2 322 encode(num_ops, payload);
7c673cae 323 for (unsigned i = 0; i < ops.size(); i++)
11fdf7f2 324 encode(ops[i].op, payload);
7c673cae 325
11fdf7f2
TL
326 encode(hobj.snap, payload);
327 encode(snap_seq, payload);
328 encode(snaps, payload);
7c673cae 329
11fdf7f2
TL
330 encode(retry_attempt, payload);
331 encode(features, payload);
7c673cae 332 if (reqid.name != entity_name_t() || reqid.tid != 0) {
11fdf7f2 333 encode(reqid, payload);
7c673cae
FG
334 } else {
335 // don't include client_inc in the reqid for the legacy v6
336 // encoding or else we'll confuse older peers.
11fdf7f2 337 encode(osd_reqid_t(), payload);
7c673cae
FG
338 }
339 } else if (!HAVE_FEATURE(features, RESEND_ON_SPLIT)) {
340 // reordered, v7 message encoding
341 header.version = 7;
11fdf7f2
TL
342 encode(get_raw_pg(), payload);
343 encode(osdmap_epoch, payload);
344 encode(flags, payload);
345 encode(eversion_t(), payload); // reassert_version
346 encode(reqid, payload);
347 encode(client_inc, payload);
348 encode(mtime, payload);
349 encode(get_object_locator(), payload);
350 encode(hobj.oid, payload);
7c673cae
FG
351
352 __u16 num_ops = ops.size();
11fdf7f2 353 encode(num_ops, payload);
7c673cae 354 for (unsigned i = 0; i < ops.size(); i++)
11fdf7f2 355 encode(ops[i].op, payload);
7c673cae 356
11fdf7f2
TL
357 encode(hobj.snap, payload);
358 encode(snap_seq, payload);
359 encode(snaps, payload);
7c673cae 360
11fdf7f2
TL
361 encode(retry_attempt, payload);
362 encode(features, payload);
7c673cae
FG
363 } else {
364 // latest v8 encoding with hobject_t hash separate from pgid, no
365 // reassert version
366 header.version = HEAD_VERSION;
11fdf7f2
TL
367
368 encode(pgid, payload);
369 encode(hobj.get_hash(), payload);
370 encode(osdmap_epoch, payload);
371 encode(flags, payload);
372 encode(reqid, payload);
7c673cae
FG
373 encode_trace(payload, features);
374
375 // -- above decoded up front; below decoded post-dispatch thread --
376
11fdf7f2
TL
377 encode(client_inc, payload);
378 encode(mtime, payload);
379 encode(get_object_locator(), payload);
380 encode(hobj.oid, payload);
7c673cae
FG
381
382 __u16 num_ops = ops.size();
11fdf7f2 383 encode(num_ops, payload);
7c673cae 384 for (unsigned i = 0; i < ops.size(); i++)
11fdf7f2 385 encode(ops[i].op, payload);
7c673cae 386
11fdf7f2
TL
387 encode(hobj.snap, payload);
388 encode(snap_seq, payload);
389 encode(snaps, payload);
7c673cae 390
11fdf7f2
TL
391 encode(retry_attempt, payload);
392 encode(features, payload);
7c673cae
FG
393 }
394 }
395
396 void decode_payload() override {
9f95a23c 397 using ceph::decode;
11fdf7f2
TL
398 ceph_assert(partial_decode_needed && final_decode_needed);
399 p = std::cbegin(payload);
7c673cae
FG
400
401 // Always keep here the newest version of decoding order/rule
402 if (header.version == HEAD_VERSION) {
11fdf7f2 403 decode(pgid, p); // actual pgid
7c673cae 404 uint32_t hash;
11fdf7f2 405 decode(hash, p); // raw hash value
7c673cae 406 hobj.set_hash(hash);
11fdf7f2
TL
407 decode(osdmap_epoch, p);
408 decode(flags, p);
409 decode(reqid, p);
7c673cae
FG
410 decode_trace(p);
411 } else if (header.version == 7) {
11fdf7f2 412 decode(pgid.pgid, p); // raw pgid
7c673cae 413 hobj.set_hash(pgid.pgid.ps());
11fdf7f2
TL
414 decode(osdmap_epoch, p);
415 decode(flags, p);
7c673cae 416 eversion_t reassert_version;
11fdf7f2
TL
417 decode(reassert_version, p);
418 decode(reqid, p);
7c673cae
FG
419 } else if (header.version < 2) {
420 // old decode
11fdf7f2 421 decode(client_inc, p);
7c673cae
FG
422
423 old_pg_t opgid;
9f95a23c 424 ceph::decode_raw(opgid, p);
7c673cae
FG
425 pgid.pgid = opgid;
426
427 __u32 su;
11fdf7f2 428 decode(su, p);
7c673cae 429
11fdf7f2
TL
430 decode(osdmap_epoch, p);
431 decode(flags, p);
432 decode(mtime, p);
7c673cae 433 eversion_t reassert_version;
11fdf7f2 434 decode(reassert_version, p);
7c673cae
FG
435
436 __u32 oid_len;
11fdf7f2
TL
437 decode(oid_len, p);
438 decode(hobj.snap, p);
439 decode(snap_seq, p);
7c673cae 440 __u32 num_snaps;
11fdf7f2 441 decode(num_snaps, p);
7c673cae
FG
442
443 //::decode(ops, p);
444 __u16 num_ops;
11fdf7f2 445 decode(num_ops, p);
7c673cae
FG
446 ops.resize(num_ops);
447 for (unsigned i = 0; i < num_ops; i++)
11fdf7f2 448 decode(ops[i].op, p);
7c673cae 449
9f95a23c
TL
450 ceph::decode_nohead(oid_len, hobj.oid.name, p);
451 ceph::decode_nohead(num_snaps, snaps, p);
7c673cae
FG
452
453 // recalculate pgid hash value
454 pgid.pgid.set_ps(ceph_str_hash(CEPH_STR_HASH_RJENKINS,
455 hobj.oid.name.c_str(),
456 hobj.oid.name.length()));
457 hobj.pool = pgid.pgid.pool();
458 hobj.set_hash(pgid.pgid.ps());
459
460 retry_attempt = -1;
461 features = 0;
462 OSDOp::split_osd_op_vector_in_data(ops, data);
463
464 // we did the full decode
465 final_decode_needed = false;
466
467 // put client_inc in reqid.inc for get_reqid()'s benefit
468 reqid = osd_reqid_t();
469 reqid.inc = client_inc;
470 } else if (header.version < 7) {
11fdf7f2
TL
471 decode(client_inc, p);
472 decode(osdmap_epoch, p);
473 decode(flags, p);
474 decode(mtime, p);
7c673cae 475 eversion_t reassert_version;
11fdf7f2 476 decode(reassert_version, p);
7c673cae
FG
477
478 object_locator_t oloc;
11fdf7f2 479 decode(oloc, p);
7c673cae
FG
480
481 if (header.version < 3) {
482 old_pg_t opgid;
9f95a23c 483 ceph::decode_raw(opgid, p);
7c673cae
FG
484 pgid.pgid = opgid;
485 } else {
11fdf7f2 486 decode(pgid.pgid, p);
7c673cae
FG
487 }
488
11fdf7f2 489 decode(hobj.oid, p);
7c673cae
FG
490
491 //::decode(ops, p);
492 __u16 num_ops;
11fdf7f2 493 decode(num_ops, p);
7c673cae
FG
494 ops.resize(num_ops);
495 for (unsigned i = 0; i < num_ops; i++)
11fdf7f2 496 decode(ops[i].op, p);
7c673cae 497
11fdf7f2
TL
498 decode(hobj.snap, p);
499 decode(snap_seq, p);
500 decode(snaps, p);
7c673cae
FG
501
502 if (header.version >= 4)
11fdf7f2 503 decode(retry_attempt, p);
7c673cae
FG
504 else
505 retry_attempt = -1;
506
507 if (header.version >= 5)
11fdf7f2 508 decode(features, p);
7c673cae
FG
509 else
510 features = 0;
511
512 if (header.version >= 6)
11fdf7f2 513 decode(reqid, p);
7c673cae
FG
514 else
515 reqid = osd_reqid_t();
516
517 hobj.pool = pgid.pgid.pool();
518 hobj.set_key(oloc.key);
519 hobj.nspace = oloc.nspace;
520 hobj.set_hash(pgid.pgid.ps());
521
522 OSDOp::split_osd_op_vector_in_data(ops, data);
523
524 // we did the full decode
525 final_decode_needed = false;
526
527 // put client_inc in reqid.inc for get_reqid()'s benefit
528 if (reqid.name == entity_name_t() && reqid.tid == 0)
529 reqid.inc = client_inc;
530 }
531
532 partial_decode_needed = false;
533 }
534
535 bool finish_decode() {
9f95a23c 536 using ceph::decode;
11fdf7f2 537 ceph_assert(!partial_decode_needed); // partial decoding required
7c673cae
FG
538 if (!final_decode_needed)
539 return false; // Message is already final decoded
11fdf7f2 540 ceph_assert(header.version >= 7);
7c673cae 541
11fdf7f2
TL
542 decode(client_inc, p);
543 decode(mtime, p);
7c673cae 544 object_locator_t oloc;
11fdf7f2
TL
545 decode(oloc, p);
546 decode(hobj.oid, p);
7c673cae
FG
547
548 __u16 num_ops;
11fdf7f2 549 decode(num_ops, p);
7c673cae
FG
550 ops.resize(num_ops);
551 for (unsigned i = 0; i < num_ops; i++)
11fdf7f2 552 decode(ops[i].op, p);
7c673cae 553
11fdf7f2
TL
554 decode(hobj.snap, p);
555 decode(snap_seq, p);
556 decode(snaps, p);
7c673cae 557
11fdf7f2 558 decode(retry_attempt, p);
7c673cae 559
11fdf7f2 560 decode(features, p);
7c673cae
FG
561
562 hobj.pool = pgid.pgid.pool();
563 hobj.set_key(oloc.key);
564 hobj.nspace = oloc.nspace;
565
566 OSDOp::split_osd_op_vector_in_data(ops, data);
567
568 final_decode_needed = false;
569 return true;
570 }
571
572 void clear_buffers() override {
224ce89b 573 OSDOp::clear_data(ops);
11fdf7f2 574 bdata_encode = false;
7c673cae
FG
575 }
576
11fdf7f2 577 std::string_view get_type_name() const override { return "osd_op"; }
9f95a23c 578 void print(std::ostream& out) const override {
7c673cae
FG
579 out << "osd_op(";
580 if (!partial_decode_needed) {
581 out << get_reqid() << ' ';
582 out << pgid;
583 if (!final_decode_needed) {
584 out << ' ';
585 out << hobj
586 << " " << ops
587 << " snapc " << get_snap_seq() << "=" << snaps;
588 if (is_retry_attempt())
589 out << " RETRY=" << get_retry_attempt();
590 } else {
591 out << " " << get_raw_pg() << " (undecoded)";
592 }
593 out << " " << ceph_osd_flag_string(get_flags());
594 out << " e" << osdmap_epoch;
595 }
596 out << ")";
597 }
9f95a23c
TL
598
599private:
600 template<class T, typename... Args>
601 friend boost::intrusive_ptr<T> ceph::make_message(Args&&... args);
7c673cae 602};
f67539c2
TL
603}
604
605using MOSDOp = _mosdop::MOSDOp<std::vector<OSDOp>>;
7c673cae
FG
606
607
608#endif