]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net> | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
14 | ||
15 | ||
16 | #ifndef CEPH_MOSDSUBOP_H | |
17 | #define CEPH_MOSDSUBOP_H | |
18 | ||
19 | #include "MOSDFastDispatchOp.h" | |
20 | ||
21 | #include "include/ceph_features.h" | |
22 | ||
23 | /* | |
24 | * OSD sub op - for internal ops on pobjects between primary and replicas(/stripes/whatever) | |
25 | */ | |
26 | ||
27 | class MOSDSubOp : public MOSDFastDispatchOp { | |
28 | ||
29 | static const int HEAD_VERSION = 12; | |
30 | static const int COMPAT_VERSION = 7; | |
31 | ||
32 | public: | |
d2e6a577 | 33 | epoch_t map_epoch = 0; |
7c673cae FG |
34 | |
35 | // metadata from original request | |
36 | osd_reqid_t reqid; | |
37 | ||
38 | // subop | |
39 | pg_shard_t from; | |
40 | spg_t pgid; | |
41 | hobject_t poid; | |
42 | object_locator_t oloc; | |
43 | ||
d2e6a577 | 44 | __u8 acks_wanted = 0; |
7c673cae FG |
45 | |
46 | // op to exec | |
47 | vector<OSDOp> ops; | |
48 | utime_t mtime; | |
49 | ||
d2e6a577 FG |
50 | bool old_exists = false; |
51 | uint64_t old_size = 0; | |
7c673cae FG |
52 | eversion_t old_version; |
53 | ||
54 | SnapSet snapset; | |
55 | ||
56 | // transaction to exec | |
57 | bufferlist logbl; | |
58 | pg_stat_t pg_stats; | |
59 | ||
60 | // subop metadata | |
61 | eversion_t version; | |
62 | ||
63 | // piggybacked osd/og state | |
64 | eversion_t pg_trim_to; // primary->replica: trim to here | |
65 | eversion_t pg_roll_forward_to; // primary->replica: trim rollback | |
66 | // info to here | |
67 | osd_peer_stat_t peer_stat; | |
68 | ||
69 | map<string,bufferlist> attrset; | |
70 | ||
71 | interval_set<uint64_t> data_subset; | |
72 | map<hobject_t, interval_set<uint64_t>> clone_subsets; | |
73 | ||
d2e6a577 | 74 | bool first = false, complete = false; |
7c673cae FG |
75 | |
76 | interval_set<uint64_t> data_included; | |
77 | ObjectRecoveryInfo recovery_info; | |
78 | ||
79 | // reflects result of current push | |
80 | ObjectRecoveryProgress recovery_progress; | |
81 | ||
82 | // reflects progress before current push | |
83 | ObjectRecoveryProgress current_progress; | |
84 | ||
85 | map<string,bufferlist> omap_entries; | |
86 | bufferlist omap_header; | |
87 | ||
88 | ||
89 | hobject_t new_temp_oid; ///< new temp object that we must now start tracking | |
90 | hobject_t discard_temp_oid; ///< previously used temp object that we can now stop tracking | |
91 | ||
92 | /// non-empty if this transaction involves a hit_set history update | |
93 | boost::optional<pg_hit_set_history_t> updated_hit_set_history; | |
94 | ||
95 | epoch_t get_map_epoch() const override { | |
96 | return map_epoch; | |
97 | } | |
98 | spg_t get_spg() const override { | |
99 | return pgid; | |
100 | } | |
101 | ||
102 | int get_cost() const override { | |
103 | if (ops.size() == 1 && ops[0].op.op == CEPH_OSD_OP_PULL) | |
104 | return ops[0].op.extent.length; | |
105 | return data.length(); | |
106 | } | |
107 | ||
108 | void decode_payload() override { | |
109 | //since we drop incorrect_pools flag, now we only support | |
110 | //version >=7 | |
111 | assert (header.version >= 7); | |
112 | bufferlist::iterator p = payload.begin(); | |
113 | ::decode(map_epoch, p); | |
114 | ::decode(reqid, p); | |
115 | ::decode(pgid.pgid, p); | |
116 | ::decode(poid, p); | |
117 | ||
118 | __u32 num_ops; | |
119 | ::decode(num_ops, p); | |
120 | ops.resize(num_ops); | |
121 | unsigned off = 0; | |
122 | for (unsigned i = 0; i < num_ops; i++) { | |
123 | ::decode(ops[i].op, p); | |
124 | ops[i].indata.substr_of(data, off, ops[i].op.payload_len); | |
125 | off += ops[i].op.payload_len; | |
126 | } | |
127 | ::decode(mtime, p); | |
128 | //we don't need noop anymore | |
129 | bool noop_dont_need; | |
130 | ::decode(noop_dont_need, p); | |
131 | ||
132 | ::decode(acks_wanted, p); | |
133 | ::decode(version, p); | |
134 | ::decode(old_exists, p); | |
135 | ::decode(old_size, p); | |
136 | ::decode(old_version, p); | |
137 | ::decode(snapset, p); | |
138 | ||
139 | if (header.version <= 11) { | |
140 | SnapContext snapc_dont_need; | |
141 | ::decode(snapc_dont_need, p); | |
142 | } | |
143 | ||
144 | ::decode(logbl, p); | |
145 | ::decode(pg_stats, p); | |
146 | ::decode(pg_trim_to, p); | |
147 | ::decode(peer_stat, p); | |
148 | ::decode(attrset, p); | |
149 | ||
150 | ::decode(data_subset, p); | |
151 | ::decode(clone_subsets, p); | |
152 | ||
153 | ::decode(first, p); | |
154 | ::decode(complete, p); | |
155 | ::decode(oloc, p); | |
156 | ::decode(data_included, p); | |
157 | recovery_info.decode(p, pgid.pool()); | |
158 | ::decode(recovery_progress, p); | |
159 | ::decode(current_progress, p); | |
160 | ::decode(omap_entries, p); | |
161 | ::decode(omap_header, p); | |
162 | ||
163 | if (header.version >= 8) { | |
164 | ::decode(new_temp_oid, p); | |
165 | ::decode(discard_temp_oid, p); | |
166 | } | |
167 | ||
168 | if (header.version >= 9) { | |
169 | ::decode(from, p); | |
170 | ::decode(pgid.shard, p); | |
171 | } else { | |
172 | from = pg_shard_t( | |
173 | get_source().num(), | |
174 | shard_id_t::NO_SHARD); | |
175 | pgid.shard = shard_id_t::NO_SHARD; | |
176 | } | |
177 | if (header.version >= 10) { | |
178 | ::decode(updated_hit_set_history, p); | |
179 | } | |
180 | if (header.version >= 11) { | |
181 | ::decode(pg_roll_forward_to, p); | |
182 | } else { | |
183 | pg_roll_forward_to = pg_trim_to; | |
184 | } | |
185 | } | |
186 | ||
187 | void finish_decode() { } | |
188 | ||
189 | void encode_payload(uint64_t features) override { | |
190 | header.version = HEAD_VERSION; | |
191 | ::encode(map_epoch, payload); | |
192 | ::encode(reqid, payload); | |
193 | ::encode(pgid.pgid, payload); | |
194 | ::encode(poid, payload); | |
195 | ||
196 | __u32 num_ops = ops.size(); | |
197 | ::encode(num_ops, payload); | |
198 | for (unsigned i = 0; i < ops.size(); i++) { | |
199 | ops[i].op.payload_len = ops[i].indata.length(); | |
200 | ::encode(ops[i].op, payload); | |
201 | data.append(ops[i].indata); | |
202 | } | |
203 | ::encode(mtime, payload); | |
204 | //encode a false here for backward compatiable | |
205 | ::encode(false, payload); | |
206 | ::encode(acks_wanted, payload); | |
207 | ::encode(version, payload); | |
208 | ::encode(old_exists, payload); | |
209 | ::encode(old_size, payload); | |
210 | ::encode(old_version, payload); | |
211 | ::encode(snapset, payload); | |
212 | ||
213 | if ((features & CEPH_FEATURE_OSDSUBOP_NO_SNAPCONTEXT) == 0) { | |
214 | header.version = 11; | |
215 | SnapContext dummy_snapc; | |
216 | ::encode(dummy_snapc, payload); | |
217 | } | |
218 | ||
219 | ::encode(logbl, payload); | |
220 | ::encode(pg_stats, payload); | |
221 | ::encode(pg_trim_to, payload); | |
222 | ::encode(peer_stat, payload); | |
223 | ::encode(attrset, payload); | |
224 | ::encode(data_subset, payload); | |
225 | ::encode(clone_subsets, payload); | |
226 | if (ops.size()) | |
227 | header.data_off = ops[0].op.extent.offset; | |
228 | else | |
229 | header.data_off = 0; | |
230 | ::encode(first, payload); | |
231 | ::encode(complete, payload); | |
232 | ::encode(oloc, payload); | |
233 | ::encode(data_included, payload); | |
234 | ::encode(recovery_info, payload, features); | |
235 | ::encode(recovery_progress, payload); | |
236 | ::encode(current_progress, payload); | |
237 | ::encode(omap_entries, payload); | |
238 | ::encode(omap_header, payload); | |
239 | ::encode(new_temp_oid, payload); | |
240 | ::encode(discard_temp_oid, payload); | |
241 | ::encode(from, payload); | |
242 | ::encode(pgid.shard, payload); | |
243 | ::encode(updated_hit_set_history, payload); | |
244 | ::encode(pg_roll_forward_to, payload); | |
245 | } | |
246 | ||
247 | MOSDSubOp() | |
248 | : MOSDFastDispatchOp(MSG_OSD_SUBOP, HEAD_VERSION, COMPAT_VERSION) { } | |
249 | MOSDSubOp(osd_reqid_t r, pg_shard_t from, | |
250 | spg_t p, const hobject_t& po, int aw, | |
251 | epoch_t mape, ceph_tid_t rtid, eversion_t v) | |
252 | : MOSDFastDispatchOp(MSG_OSD_SUBOP, HEAD_VERSION, COMPAT_VERSION), | |
253 | map_epoch(mape), | |
254 | reqid(r), | |
255 | from(from), | |
256 | pgid(p), | |
257 | poid(po), | |
258 | acks_wanted(aw), | |
259 | old_exists(false), old_size(0), | |
260 | version(v), | |
261 | first(false), complete(false) { | |
262 | memset(&peer_stat, 0, sizeof(peer_stat)); | |
263 | set_tid(rtid); | |
264 | } | |
265 | private: | |
266 | ~MOSDSubOp() override {} | |
267 | ||
268 | public: | |
269 | const char *get_type_name() const override { return "osd_sub_op"; } | |
270 | void print(ostream& out) const override { | |
271 | out << "osd_sub_op(" << reqid | |
272 | << " " << pgid | |
273 | << " " << poid | |
274 | << " " << ops; | |
275 | if (first) | |
276 | out << " first"; | |
277 | if (complete) | |
278 | out << " complete"; | |
279 | out << " v " << version | |
280 | << " snapset=" << snapset; | |
281 | if (!data_subset.empty()) out << " subset " << data_subset; | |
282 | if (updated_hit_set_history) | |
283 | out << ", has_updated_hit_set_history"; | |
284 | out << ")"; | |
285 | } | |
286 | }; | |
287 | ||
288 | ||
289 | #endif |