1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
16 #ifndef CEPH_MOSDOPREPLY_H
17 #define CEPH_MOSDOPREPLY_H
19 #include "msg/Message.h"
22 #include "common/errno.h"
28 * op - OSD_OP_DELETE, etc.
32 class MOSDOpReply
: public Message
{
34 static constexpr int HEAD_VERSION
= 8;
35 static constexpr int COMPAT_VERSION
= 2;
39 std::vector
<OSDOp
> ops
;
43 eversion_t bad_replay_version
;
44 eversion_t replay_version
;
45 version_t user_version
= 0;
46 epoch_t osdmap_epoch
= 0;
47 int32_t retry_attempt
= -1;
49 request_redirect_t redirect
;
52 const object_t
& get_oid() const { return oid
; }
53 const pg_t
& get_pg() const { return pgid
; }
54 int get_flags() const { return flags
; }
56 bool is_ondisk() const { return get_flags() & CEPH_OSD_FLAG_ONDISK
; }
57 bool is_onnvram() const { return get_flags() & CEPH_OSD_FLAG_ONNVRAM
; }
59 int get_result() const { return result
; }
60 const eversion_t
& get_replay_version() const { return replay_version
; }
61 const version_t
& get_user_version() const { return user_version
; }
63 void set_result(int r
) { result
= r
; }
65 void set_reply_versions(eversion_t v
, version_t uv
) {
68 /* We go through some shenanigans here for backwards compatibility
69 * with old clients, who do not look at our replay_version and
70 * user_version but instead see what we now call the
71 * bad_replay_version. On pools without caching
72 * the user_version infrastructure is a slightly-laggy copy of
73 * the regular pg version/at_version infrastructure; the difference
74 * being it is not updated on watch ops like that is -- but on updates
75 * it is set equal to at_version. This means that for non-watch write ops
76 * on classic pools, all three of replay_version, user_version, and
77 * bad_replay_version are identical. But for watch ops the replay_version
78 * has been updated, while the user_at_version has not, and the semantics
79 * we promised old clients are that the version they see is not an update.
80 * So set the bad_replay_version to be the same as the user_at_version. */
81 bad_replay_version
= v
;
83 bad_replay_version
.version
= uv
;
87 /* Don't fill in replay_version for non-write ops */
88 void set_enoent_reply_versions(const eversion_t
& v
, const version_t
& uv
) {
90 bad_replay_version
= v
;
93 void set_redirect(const request_redirect_t
& redir
) { redirect
= redir
; }
94 const request_redirect_t
& get_redirect() const { return redirect
; }
95 bool is_redirect_reply() const { return do_redirect
; }
97 void add_flags(int f
) { flags
|= f
; }
99 void claim_op_out_data(std::vector
<OSDOp
>& o
) {
100 ceph_assert(ops
.size() == o
.size());
101 for (unsigned i
= 0; i
< o
.size(); i
++) {
102 ops
[i
].outdata
.claim(o
[i
].outdata
);
105 void claim_ops(std::vector
<OSDOp
>& o
) {
107 bdata_encode
= false;
109 void set_op_returns(const vector
<pg_log_op_return_item_t
>& op_returns
) {
110 if (op_returns
.size()) {
111 ceph_assert(ops
.empty() || ops
.size() == op_returns
.size());
112 ops
.resize(op_returns
.size());
113 for (unsigned i
= 0; i
< op_returns
.size(); ++i
) {
114 ops
[i
].rval
= op_returns
[i
].rval
;
115 ops
[i
].outdata
= op_returns
[i
].bl
;
123 * If we don't know the attempt (because the server is old), return -1.
125 int get_retry_attempt() const {
126 return retry_attempt
;
130 epoch_t
get_map_epoch() const { return osdmap_epoch
; }
132 /*osd_reqid_t get_reqid() { return osd_reqid_t(get_dest(),
139 : Message
{CEPH_MSG_OSD_OPREPLY
, HEAD_VERSION
, COMPAT_VERSION
},
140 bdata_encode(false) {
143 MOSDOpReply(const MOSDOp
*req
, int r
, epoch_t e
, int acktype
,
144 bool ignore_out_data
)
145 : Message
{CEPH_MSG_OSD_OPREPLY
, HEAD_VERSION
, COMPAT_VERSION
},
146 oid(req
->hobj
.oid
), pgid(req
->pgid
.pgid
), ops(req
->ops
),
147 bdata_encode(false) {
149 set_tid(req
->get_tid());
152 (req
->flags
& ~(CEPH_OSD_FLAG_ONDISK
|CEPH_OSD_FLAG_ONNVRAM
|CEPH_OSD_FLAG_ACK
)) | acktype
;
155 retry_attempt
= req
->get_retry_attempt();
158 for (unsigned i
= 0; i
< ops
.size(); i
++) {
159 // zero out input data
160 ops
[i
].indata
.clear();
161 if (ignore_out_data
) {
162 // original request didn't set the RETURNVEC flag
163 ops
[i
].outdata
.clear();
168 ~MOSDOpReply() override
{}
171 void encode_payload(uint64_t features
) override
{
173 if(false == bdata_encode
) {
174 OSDOp::merge_osd_op_vector_out_data(ops
, data
);
178 if ((features
& CEPH_FEATURE_PGID64
) == 0) {
180 ceph_osd_reply_head head
;
181 memset(&head
, 0, sizeof(head
));
182 head
.layout
.ol_pgid
= pgid
.get_old_pg().v
;
184 head
.osdmap_epoch
= osdmap_epoch
;
185 head
.reassert_version
= bad_replay_version
;
186 head
.result
= result
;
187 head
.num_ops
= ops
.size();
188 head
.object_len
= oid
.name
.length();
189 encode(head
, payload
);
190 for (unsigned i
= 0; i
< head
.num_ops
; i
++) {
191 encode(ops
[i
].op
, payload
);
193 ceph::encode_nohead(oid
.name
, payload
);
195 header
.version
= HEAD_VERSION
;
196 encode(oid
, payload
);
197 encode(pgid
, payload
);
198 encode(flags
, payload
);
199 encode(result
, payload
);
200 encode(bad_replay_version
, payload
);
201 encode(osdmap_epoch
, payload
);
203 __u32 num_ops
= ops
.size();
204 encode(num_ops
, payload
);
205 for (unsigned i
= 0; i
< num_ops
; i
++)
206 encode(ops
[i
].op
, payload
);
208 encode(retry_attempt
, payload
);
210 for (unsigned i
= 0; i
< num_ops
; i
++)
211 encode(ops
[i
].rval
, payload
);
213 encode(replay_version
, payload
);
214 encode(user_version
, payload
);
215 if ((features
& CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING
) == 0) {
217 encode(redirect
, payload
);
219 do_redirect
= !redirect
.empty();
220 encode(do_redirect
, payload
);
222 encode(redirect
, payload
);
225 encode_trace(payload
, features
);
228 void decode_payload() override
{
230 auto p
= payload
.cbegin();
232 // Always keep here the newest version of decoding order/rule
233 if (header
.version
== HEAD_VERSION
) {
238 decode(bad_replay_version
, p
);
239 decode(osdmap_epoch
, p
);
241 __u32 num_ops
= ops
.size();
244 for (unsigned i
= 0; i
< num_ops
; i
++)
245 decode(ops
[i
].op
, p
);
246 decode(retry_attempt
, p
);
248 for (unsigned i
= 0; i
< num_ops
; ++i
)
249 decode(ops
[i
].rval
, p
);
251 OSDOp::split_osd_op_vector_out_data(ops
, data
);
253 decode(replay_version
, p
);
254 decode(user_version
, p
);
255 decode(do_redirect
, p
);
259 } else if (header
.version
< 2) {
260 ceph_osd_reply_head head
;
262 ops
.resize(head
.num_ops
);
263 for (unsigned i
= 0; i
< head
.num_ops
; i
++) {
264 decode(ops
[i
].op
, p
);
266 ceph::decode_nohead(head
.object_len
, oid
.name
, p
);
267 pgid
= pg_t(head
.layout
.ol_pgid
);
268 result
= (int32_t)head
.result
;
270 replay_version
= head
.reassert_version
;
271 user_version
= replay_version
.version
;
272 osdmap_epoch
= head
.osdmap_epoch
;
279 decode(bad_replay_version
, p
);
280 decode(osdmap_epoch
, p
);
282 __u32 num_ops
= ops
.size();
285 for (unsigned i
= 0; i
< num_ops
; i
++)
286 decode(ops
[i
].op
, p
);
288 if (header
.version
>= 3)
289 decode(retry_attempt
, p
);
293 if (header
.version
>= 4) {
294 for (unsigned i
= 0; i
< num_ops
; ++i
)
295 decode(ops
[i
].rval
, p
);
297 OSDOp::split_osd_op_vector_out_data(ops
, data
);
300 if (header
.version
>= 5) {
301 decode(replay_version
, p
);
302 decode(user_version
, p
);
304 replay_version
= bad_replay_version
;
305 user_version
= replay_version
.version
;
308 if (header
.version
== 6) {
310 do_redirect
= !redirect
.empty();
312 if (header
.version
>= 7) {
313 decode(do_redirect
, p
);
318 if (header
.version
>= 8) {
324 std::string_view
get_type_name() const override
{ return "osd_op_reply"; }
326 void print(std::ostream
& out
) const override
{
327 out
<< "osd_op_reply(" << get_tid()
328 << " " << oid
<< " " << ops
329 << " v" << get_replay_version()
330 << " uv" << get_user_version();
333 else if (is_onnvram())
337 out
<< " = " << get_result();
338 if (get_result() < 0) {
339 out
<< " (" << cpp_strerror(get_result()) << ")";
341 if (is_redirect_reply()) {
342 out
<< " redirect: { " << redirect
<< " }";
348 template<class T
, typename
... Args
>
349 friend boost::intrusive_ptr
<T
> ceph::make_message(Args
&&... args
);