]> git.proxmox.com Git - ceph.git/blob - ceph/src/messages/MOSDSubOp.h
3bab8f57770a7ad4fcae204eea71960675b6f1b9
[ceph.git] / ceph / src / messages / MOSDSubOp.h
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 /*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15
16 #ifndef CEPH_MOSDSUBOP_H
17 #define CEPH_MOSDSUBOP_H
18
19 #include "MOSDFastDispatchOp.h"
20
21 #include "include/ceph_features.h"
22
23 /*
24 * OSD sub op - for internal ops on pobjects between primary and replicas(/stripes/whatever)
25 */
26
27 class MOSDSubOp : public MOSDFastDispatchOp {
28
29 static const int HEAD_VERSION = 12;
30 static const int COMPAT_VERSION = 7;
31
32 public:
33 epoch_t map_epoch;
34
35 // metadata from original request
36 osd_reqid_t reqid;
37
38 // subop
39 pg_shard_t from;
40 spg_t pgid;
41 hobject_t poid;
42 object_locator_t oloc;
43
44 __u8 acks_wanted;
45
46 // op to exec
47 vector<OSDOp> ops;
48 utime_t mtime;
49
50 bool old_exists;
51 uint64_t old_size;
52 eversion_t old_version;
53
54 SnapSet snapset;
55
56 // transaction to exec
57 bufferlist logbl;
58 pg_stat_t pg_stats;
59
60 // subop metadata
61 eversion_t version;
62
63 // piggybacked osd/og state
64 eversion_t pg_trim_to; // primary->replica: trim to here
65 eversion_t pg_roll_forward_to; // primary->replica: trim rollback
66 // info to here
67 osd_peer_stat_t peer_stat;
68
69 map<string,bufferlist> attrset;
70
71 interval_set<uint64_t> data_subset;
72 map<hobject_t, interval_set<uint64_t>> clone_subsets;
73
74 bool first, complete;
75
76 interval_set<uint64_t> data_included;
77 ObjectRecoveryInfo recovery_info;
78
79 // reflects result of current push
80 ObjectRecoveryProgress recovery_progress;
81
82 // reflects progress before current push
83 ObjectRecoveryProgress current_progress;
84
85 map<string,bufferlist> omap_entries;
86 bufferlist omap_header;
87
88
89 hobject_t new_temp_oid; ///< new temp object that we must now start tracking
90 hobject_t discard_temp_oid; ///< previously used temp object that we can now stop tracking
91
92 /// non-empty if this transaction involves a hit_set history update
93 boost::optional<pg_hit_set_history_t> updated_hit_set_history;
94
95 epoch_t get_map_epoch() const override {
96 return map_epoch;
97 }
98 spg_t get_spg() const override {
99 return pgid;
100 }
101
102 int get_cost() const override {
103 if (ops.size() == 1 && ops[0].op.op == CEPH_OSD_OP_PULL)
104 return ops[0].op.extent.length;
105 return data.length();
106 }
107
108 void decode_payload() override {
109 //since we drop incorrect_pools flag, now we only support
110 //version >=7
111 assert (header.version >= 7);
112 bufferlist::iterator p = payload.begin();
113 ::decode(map_epoch, p);
114 ::decode(reqid, p);
115 ::decode(pgid.pgid, p);
116 ::decode(poid, p);
117
118 __u32 num_ops;
119 ::decode(num_ops, p);
120 ops.resize(num_ops);
121 unsigned off = 0;
122 for (unsigned i = 0; i < num_ops; i++) {
123 ::decode(ops[i].op, p);
124 ops[i].indata.substr_of(data, off, ops[i].op.payload_len);
125 off += ops[i].op.payload_len;
126 }
127 ::decode(mtime, p);
128 //we don't need noop anymore
129 bool noop_dont_need;
130 ::decode(noop_dont_need, p);
131
132 ::decode(acks_wanted, p);
133 ::decode(version, p);
134 ::decode(old_exists, p);
135 ::decode(old_size, p);
136 ::decode(old_version, p);
137 ::decode(snapset, p);
138
139 if (header.version <= 11) {
140 SnapContext snapc_dont_need;
141 ::decode(snapc_dont_need, p);
142 }
143
144 ::decode(logbl, p);
145 ::decode(pg_stats, p);
146 ::decode(pg_trim_to, p);
147 ::decode(peer_stat, p);
148 ::decode(attrset, p);
149
150 ::decode(data_subset, p);
151 ::decode(clone_subsets, p);
152
153 ::decode(first, p);
154 ::decode(complete, p);
155 ::decode(oloc, p);
156 ::decode(data_included, p);
157 recovery_info.decode(p, pgid.pool());
158 ::decode(recovery_progress, p);
159 ::decode(current_progress, p);
160 ::decode(omap_entries, p);
161 ::decode(omap_header, p);
162
163 if (header.version >= 8) {
164 ::decode(new_temp_oid, p);
165 ::decode(discard_temp_oid, p);
166 }
167
168 if (header.version >= 9) {
169 ::decode(from, p);
170 ::decode(pgid.shard, p);
171 } else {
172 from = pg_shard_t(
173 get_source().num(),
174 shard_id_t::NO_SHARD);
175 pgid.shard = shard_id_t::NO_SHARD;
176 }
177 if (header.version >= 10) {
178 ::decode(updated_hit_set_history, p);
179 }
180 if (header.version >= 11) {
181 ::decode(pg_roll_forward_to, p);
182 } else {
183 pg_roll_forward_to = pg_trim_to;
184 }
185 }
186
187 void finish_decode() { }
188
189 void encode_payload(uint64_t features) override {
190 header.version = HEAD_VERSION;
191 ::encode(map_epoch, payload);
192 ::encode(reqid, payload);
193 ::encode(pgid.pgid, payload);
194 ::encode(poid, payload);
195
196 __u32 num_ops = ops.size();
197 ::encode(num_ops, payload);
198 for (unsigned i = 0; i < ops.size(); i++) {
199 ops[i].op.payload_len = ops[i].indata.length();
200 ::encode(ops[i].op, payload);
201 data.append(ops[i].indata);
202 }
203 ::encode(mtime, payload);
204 //encode a false here for backward compatiable
205 ::encode(false, payload);
206 ::encode(acks_wanted, payload);
207 ::encode(version, payload);
208 ::encode(old_exists, payload);
209 ::encode(old_size, payload);
210 ::encode(old_version, payload);
211 ::encode(snapset, payload);
212
213 if ((features & CEPH_FEATURE_OSDSUBOP_NO_SNAPCONTEXT) == 0) {
214 header.version = 11;
215 SnapContext dummy_snapc;
216 ::encode(dummy_snapc, payload);
217 }
218
219 ::encode(logbl, payload);
220 ::encode(pg_stats, payload);
221 ::encode(pg_trim_to, payload);
222 ::encode(peer_stat, payload);
223 ::encode(attrset, payload);
224 ::encode(data_subset, payload);
225 ::encode(clone_subsets, payload);
226 if (ops.size())
227 header.data_off = ops[0].op.extent.offset;
228 else
229 header.data_off = 0;
230 ::encode(first, payload);
231 ::encode(complete, payload);
232 ::encode(oloc, payload);
233 ::encode(data_included, payload);
234 ::encode(recovery_info, payload, features);
235 ::encode(recovery_progress, payload);
236 ::encode(current_progress, payload);
237 ::encode(omap_entries, payload);
238 ::encode(omap_header, payload);
239 ::encode(new_temp_oid, payload);
240 ::encode(discard_temp_oid, payload);
241 ::encode(from, payload);
242 ::encode(pgid.shard, payload);
243 ::encode(updated_hit_set_history, payload);
244 ::encode(pg_roll_forward_to, payload);
245 }
246
247 MOSDSubOp()
248 : MOSDFastDispatchOp(MSG_OSD_SUBOP, HEAD_VERSION, COMPAT_VERSION) { }
249 MOSDSubOp(osd_reqid_t r, pg_shard_t from,
250 spg_t p, const hobject_t& po, int aw,
251 epoch_t mape, ceph_tid_t rtid, eversion_t v)
252 : MOSDFastDispatchOp(MSG_OSD_SUBOP, HEAD_VERSION, COMPAT_VERSION),
253 map_epoch(mape),
254 reqid(r),
255 from(from),
256 pgid(p),
257 poid(po),
258 acks_wanted(aw),
259 old_exists(false), old_size(0),
260 version(v),
261 first(false), complete(false) {
262 memset(&peer_stat, 0, sizeof(peer_stat));
263 set_tid(rtid);
264 }
265 private:
266 ~MOSDSubOp() override {}
267
268 public:
269 const char *get_type_name() const override { return "osd_sub_op"; }
270 void print(ostream& out) const override {
271 out << "osd_sub_op(" << reqid
272 << " " << pgid
273 << " " << poid
274 << " " << ops;
275 if (first)
276 out << " first";
277 if (complete)
278 out << " complete";
279 out << " v " << version
280 << " snapset=" << snapset;
281 if (!data_subset.empty()) out << " subset " << data_subset;
282 if (updated_hit_set_history)
283 out << ", has_updated_hit_set_history";
284 out << ")";
285 }
286 };
287
288
289 #endif