1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
16 #ifndef CEPH_MOSDSUBOP_H
17 #define CEPH_MOSDSUBOP_H
19 #include "MOSDFastDispatchOp.h"
21 #include "include/ceph_features.h"
24 * OSD sub op - for internal ops on pobjects between primary and replicas(/stripes/whatever)
27 class MOSDSubOp
: public MOSDFastDispatchOp
{
29 static const int HEAD_VERSION
= 12;
30 static const int COMPAT_VERSION
= 7;
33 epoch_t map_epoch
= 0;
35 // metadata from original request
42 object_locator_t oloc
;
50 bool old_exists
= false;
51 uint64_t old_size
= 0;
52 eversion_t old_version
;
56 // transaction to exec
63 // piggybacked osd/og state
64 eversion_t pg_trim_to
; // primary->replica: trim to here
65 eversion_t pg_roll_forward_to
; // primary->replica: trim rollback
67 osd_peer_stat_t peer_stat
;
69 map
<string
,bufferlist
> attrset
;
71 interval_set
<uint64_t> data_subset
;
72 map
<hobject_t
, interval_set
<uint64_t>> clone_subsets
;
74 bool first
= false, complete
= false;
76 interval_set
<uint64_t> data_included
;
77 ObjectRecoveryInfo recovery_info
;
79 // reflects result of current push
80 ObjectRecoveryProgress recovery_progress
;
82 // reflects progress before current push
83 ObjectRecoveryProgress current_progress
;
85 map
<string
,bufferlist
> omap_entries
;
86 bufferlist omap_header
;
89 hobject_t new_temp_oid
; ///< new temp object that we must now start tracking
90 hobject_t discard_temp_oid
; ///< previously used temp object that we can now stop tracking
92 /// non-empty if this transaction involves a hit_set history update
93 boost::optional
<pg_hit_set_history_t
> updated_hit_set_history
;
95 epoch_t
get_map_epoch() const override
{
98 spg_t
get_spg() const override
{
102 int get_cost() const override
{
103 if (ops
.size() == 1 && ops
[0].op
.op
== CEPH_OSD_OP_PULL
)
104 return ops
[0].op
.extent
.length
;
105 return data
.length();
108 void decode_payload() override
{
109 //since we drop incorrect_pools flag, now we only support
111 assert (header
.version
>= 7);
112 bufferlist::iterator p
= payload
.begin();
113 ::decode(map_epoch
, p
);
115 ::decode(pgid
.pgid
, p
);
119 ::decode(num_ops
, p
);
122 for (unsigned i
= 0; i
< num_ops
; i
++) {
123 ::decode(ops
[i
].op
, p
);
124 ops
[i
].indata
.substr_of(data
, off
, ops
[i
].op
.payload_len
);
125 off
+= ops
[i
].op
.payload_len
;
128 //we don't need noop anymore
130 ::decode(noop_dont_need
, p
);
132 ::decode(acks_wanted
, p
);
133 ::decode(version
, p
);
134 ::decode(old_exists
, p
);
135 ::decode(old_size
, p
);
136 ::decode(old_version
, p
);
137 ::decode(snapset
, p
);
139 if (header
.version
<= 11) {
140 SnapContext snapc_dont_need
;
141 ::decode(snapc_dont_need
, p
);
145 ::decode(pg_stats
, p
);
146 ::decode(pg_trim_to
, p
);
147 ::decode(peer_stat
, p
);
148 ::decode(attrset
, p
);
150 ::decode(data_subset
, p
);
151 ::decode(clone_subsets
, p
);
154 ::decode(complete
, p
);
156 ::decode(data_included
, p
);
157 recovery_info
.decode(p
, pgid
.pool());
158 ::decode(recovery_progress
, p
);
159 ::decode(current_progress
, p
);
160 ::decode(omap_entries
, p
);
161 ::decode(omap_header
, p
);
163 if (header
.version
>= 8) {
164 ::decode(new_temp_oid
, p
);
165 ::decode(discard_temp_oid
, p
);
168 if (header
.version
>= 9) {
170 ::decode(pgid
.shard
, p
);
174 shard_id_t::NO_SHARD
);
175 pgid
.shard
= shard_id_t::NO_SHARD
;
177 if (header
.version
>= 10) {
178 ::decode(updated_hit_set_history
, p
);
180 if (header
.version
>= 11) {
181 ::decode(pg_roll_forward_to
, p
);
183 pg_roll_forward_to
= pg_trim_to
;
187 void finish_decode() { }
189 void encode_payload(uint64_t features
) override
{
190 header
.version
= HEAD_VERSION
;
191 ::encode(map_epoch
, payload
);
192 ::encode(reqid
, payload
);
193 ::encode(pgid
.pgid
, payload
);
194 ::encode(poid
, payload
);
196 __u32 num_ops
= ops
.size();
197 ::encode(num_ops
, payload
);
198 for (unsigned i
= 0; i
< ops
.size(); i
++) {
199 ops
[i
].op
.payload_len
= ops
[i
].indata
.length();
200 ::encode(ops
[i
].op
, payload
);
201 data
.append(ops
[i
].indata
);
203 ::encode(mtime
, payload
);
204 //encode a false here for backward compatiable
205 ::encode(false, payload
);
206 ::encode(acks_wanted
, payload
);
207 ::encode(version
, payload
);
208 ::encode(old_exists
, payload
);
209 ::encode(old_size
, payload
);
210 ::encode(old_version
, payload
);
211 ::encode(snapset
, payload
);
213 if ((features
& CEPH_FEATURE_OSDSUBOP_NO_SNAPCONTEXT
) == 0) {
215 SnapContext dummy_snapc
;
216 ::encode(dummy_snapc
, payload
);
219 ::encode(logbl
, payload
);
220 ::encode(pg_stats
, payload
);
221 ::encode(pg_trim_to
, payload
);
222 ::encode(peer_stat
, payload
);
223 ::encode(attrset
, payload
);
224 ::encode(data_subset
, payload
);
225 ::encode(clone_subsets
, payload
);
227 header
.data_off
= ops
[0].op
.extent
.offset
;
230 ::encode(first
, payload
);
231 ::encode(complete
, payload
);
232 ::encode(oloc
, payload
);
233 ::encode(data_included
, payload
);
234 ::encode(recovery_info
, payload
, features
);
235 ::encode(recovery_progress
, payload
);
236 ::encode(current_progress
, payload
);
237 ::encode(omap_entries
, payload
);
238 ::encode(omap_header
, payload
);
239 ::encode(new_temp_oid
, payload
);
240 ::encode(discard_temp_oid
, payload
);
241 ::encode(from
, payload
);
242 ::encode(pgid
.shard
, payload
);
243 ::encode(updated_hit_set_history
, payload
);
244 ::encode(pg_roll_forward_to
, payload
);
248 : MOSDFastDispatchOp(MSG_OSD_SUBOP
, HEAD_VERSION
, COMPAT_VERSION
) { }
249 MOSDSubOp(osd_reqid_t r
, pg_shard_t from
,
250 spg_t p
, const hobject_t
& po
, int aw
,
251 epoch_t mape
, ceph_tid_t rtid
, eversion_t v
)
252 : MOSDFastDispatchOp(MSG_OSD_SUBOP
, HEAD_VERSION
, COMPAT_VERSION
),
259 old_exists(false), old_size(0),
261 first(false), complete(false) {
262 memset(&peer_stat
, 0, sizeof(peer_stat
));
266 ~MOSDSubOp() override
{}
269 const char *get_type_name() const override
{ return "osd_sub_op"; }
270 void print(ostream
& out
) const override
{
271 out
<< "osd_sub_op(" << reqid
279 out
<< " v " << version
280 << " snapset=" << snapset
;
281 if (!data_subset
.empty()) out
<< " subset " << data_subset
;
282 if (updated_hit_set_history
)
283 out
<< ", has_updated_hit_set_history";