]>
Commit | Line | Data |
---|---|---|
9f95a23c TL |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | ||
4 | #include "osd/osd_op_util.h" | |
5 | ||
6 | #include "osd/ClassHandler.h" | |
7 | #include "messages/MOSDOp.h" | |
8 | ||
f67539c2 TL |
9 | using std::ostream; |
10 | using std::string; | |
11 | using std::vector; | |
12 | ||
13 | using ceph::bufferlist; | |
14 | ||
9f95a23c TL |
15 | bool OpInfo::check_rmw(int flag) const { |
16 | ceph_assert(rmw_flags != 0); | |
17 | return rmw_flags & flag; | |
18 | } | |
19 | bool OpInfo::may_read() const { | |
20 | return need_read_cap() || check_rmw(CEPH_OSD_RMW_FLAG_CLASS_READ); | |
21 | } | |
22 | bool OpInfo::may_write() const { | |
23 | return need_write_cap() || check_rmw(CEPH_OSD_RMW_FLAG_CLASS_WRITE); | |
24 | } | |
25 | bool OpInfo::may_cache() const { return check_rmw(CEPH_OSD_RMW_FLAG_CACHE); } | |
26 | bool OpInfo::rwordered_forced() const { | |
27 | return check_rmw(CEPH_OSD_RMW_FLAG_RWORDERED); | |
28 | } | |
29 | bool OpInfo::rwordered() const { | |
30 | return may_write() || may_cache() || rwordered_forced(); | |
31 | } | |
32 | ||
33 | bool OpInfo::includes_pg_op() const { | |
34 | return check_rmw(CEPH_OSD_RMW_FLAG_PGOP); | |
35 | } | |
36 | bool OpInfo::need_read_cap() const { | |
37 | return check_rmw(CEPH_OSD_RMW_FLAG_READ); | |
38 | } | |
39 | bool OpInfo::need_write_cap() const { | |
40 | return check_rmw(CEPH_OSD_RMW_FLAG_WRITE); | |
41 | } | |
42 | bool OpInfo::need_promote() const { | |
43 | return check_rmw(CEPH_OSD_RMW_FLAG_FORCE_PROMOTE); | |
44 | } | |
45 | bool OpInfo::need_skip_handle_cache() const { | |
46 | return check_rmw(CEPH_OSD_RMW_FLAG_SKIP_HANDLE_CACHE); | |
47 | } | |
48 | bool OpInfo::need_skip_promote() const { | |
49 | return check_rmw(CEPH_OSD_RMW_FLAG_SKIP_PROMOTE); | |
50 | } | |
51 | bool OpInfo::allows_returnvec() const { | |
52 | return check_rmw(CEPH_OSD_RMW_FLAG_RETURNVEC); | |
53 | } | |
54 | ||
55 | void OpInfo::set_rmw_flags(int flags) { | |
56 | rmw_flags |= flags; | |
57 | } | |
58 | ||
59 | void OpInfo::set_read() { set_rmw_flags(CEPH_OSD_RMW_FLAG_READ); } | |
60 | void OpInfo::set_write() { set_rmw_flags(CEPH_OSD_RMW_FLAG_WRITE); } | |
61 | void OpInfo::set_class_read() { set_rmw_flags(CEPH_OSD_RMW_FLAG_CLASS_READ); } | |
62 | void OpInfo::set_class_write() { set_rmw_flags(CEPH_OSD_RMW_FLAG_CLASS_WRITE); } | |
63 | void OpInfo::set_pg_op() { set_rmw_flags(CEPH_OSD_RMW_FLAG_PGOP); } | |
64 | void OpInfo::set_cache() { set_rmw_flags(CEPH_OSD_RMW_FLAG_CACHE); } | |
65 | void OpInfo::set_promote() { set_rmw_flags(CEPH_OSD_RMW_FLAG_FORCE_PROMOTE); } | |
66 | void OpInfo::set_skip_handle_cache() { set_rmw_flags(CEPH_OSD_RMW_FLAG_SKIP_HANDLE_CACHE); } | |
67 | void OpInfo::set_skip_promote() { set_rmw_flags(CEPH_OSD_RMW_FLAG_SKIP_PROMOTE); } | |
68 | void OpInfo::set_force_rwordered() { set_rmw_flags(CEPH_OSD_RMW_FLAG_RWORDERED); } | |
69 | void OpInfo::set_returnvec() { set_rmw_flags(CEPH_OSD_RMW_FLAG_RETURNVEC); } | |
70 | ||
71 | ||
72 | int OpInfo::set_from_op( | |
73 | const MOSDOp *m, | |
74 | const OSDMap &osdmap) | |
75 | { | |
9f95a23c TL |
76 | // client flags have no bearing on whether an op is a read, write, etc. |
77 | clear(); | |
78 | ||
79 | if (m->has_flag(CEPH_OSD_FLAG_RWORDERED)) { | |
80 | set_force_rwordered(); | |
81 | } | |
82 | if (m->has_flag(CEPH_OSD_FLAG_RETURNVEC)) { | |
83 | set_returnvec(); | |
84 | } | |
20effc67 TL |
85 | return set_from_op(m->ops, m->get_pg(), osdmap); |
86 | } | |
87 | ||
88 | int OpInfo::set_from_op( | |
89 | const std::vector<OSDOp>& ops, | |
90 | const pg_t& pg, | |
91 | const OSDMap &osdmap) | |
92 | { | |
93 | vector<OSDOp>::const_iterator iter; | |
9f95a23c TL |
94 | |
95 | // set bits based on op codes, called methods. | |
20effc67 | 96 | for (iter = ops.begin(); iter != ops.end(); ++iter) { |
9f95a23c TL |
97 | if ((iter->op.op == CEPH_OSD_OP_WATCH && |
98 | iter->op.watch.op == CEPH_OSD_WATCH_OP_PING)) { | |
99 | /* This a bit odd. PING isn't actually a write. It can't | |
100 | * result in an update to the object_info. PINGs also aren't | |
101 | * resent, so there's no reason to write out a log entry. | |
102 | * | |
103 | * However, we pipeline them behind writes, so let's force | |
104 | * the write_ordered flag. | |
105 | */ | |
106 | set_force_rwordered(); | |
107 | } else { | |
108 | if (ceph_osd_op_mode_modify(iter->op.op)) | |
109 | set_write(); | |
110 | } | |
111 | if (ceph_osd_op_mode_read(iter->op.op)) | |
112 | set_read(); | |
113 | ||
114 | // set READ flag if there are src_oids | |
115 | if (iter->soid.oid.name.length()) | |
116 | set_read(); | |
117 | ||
118 | // set PGOP flag if there are PG ops | |
119 | if (ceph_osd_op_type_pg(iter->op.op)) | |
120 | set_pg_op(); | |
121 | ||
122 | if (ceph_osd_op_mode_cache(iter->op.op)) | |
123 | set_cache(); | |
124 | ||
125 | // check for ec base pool | |
20effc67 | 126 | int64_t poolid = pg.pool(); |
9f95a23c TL |
127 | const pg_pool_t *pool = osdmap.get_pg_pool(poolid); |
128 | if (pool && pool->is_tier()) { | |
129 | const pg_pool_t *base_pool = osdmap.get_pg_pool(pool->tier_of); | |
130 | if (base_pool && base_pool->require_rollback()) { | |
131 | if ((iter->op.op != CEPH_OSD_OP_READ) && | |
132 | (iter->op.op != CEPH_OSD_OP_CHECKSUM) && | |
133 | (iter->op.op != CEPH_OSD_OP_CMPEXT) && | |
134 | (iter->op.op != CEPH_OSD_OP_STAT) && | |
135 | (iter->op.op != CEPH_OSD_OP_ISDIRTY) && | |
136 | (iter->op.op != CEPH_OSD_OP_UNDIRTY) && | |
137 | (iter->op.op != CEPH_OSD_OP_GETXATTR) && | |
138 | (iter->op.op != CEPH_OSD_OP_GETXATTRS) && | |
139 | (iter->op.op != CEPH_OSD_OP_CMPXATTR) && | |
140 | (iter->op.op != CEPH_OSD_OP_ASSERT_VER) && | |
141 | (iter->op.op != CEPH_OSD_OP_LIST_WATCHERS) && | |
142 | (iter->op.op != CEPH_OSD_OP_LIST_SNAPS) && | |
143 | (iter->op.op != CEPH_OSD_OP_SETALLOCHINT) && | |
144 | (iter->op.op != CEPH_OSD_OP_WRITEFULL) && | |
145 | (iter->op.op != CEPH_OSD_OP_ROLLBACK) && | |
146 | (iter->op.op != CEPH_OSD_OP_CREATE) && | |
147 | (iter->op.op != CEPH_OSD_OP_DELETE) && | |
148 | (iter->op.op != CEPH_OSD_OP_SETXATTR) && | |
149 | (iter->op.op != CEPH_OSD_OP_RMXATTR) && | |
150 | (iter->op.op != CEPH_OSD_OP_STARTSYNC) && | |
151 | (iter->op.op != CEPH_OSD_OP_COPY_GET) && | |
152 | (iter->op.op != CEPH_OSD_OP_COPY_FROM) && | |
153 | (iter->op.op != CEPH_OSD_OP_COPY_FROM2)) { | |
154 | set_promote(); | |
155 | } | |
156 | } | |
157 | } | |
158 | ||
159 | switch (iter->op.op) { | |
160 | case CEPH_OSD_OP_CALL: | |
161 | { | |
162 | bufferlist::iterator bp = const_cast<bufferlist&>(iter->indata).begin(); | |
163 | int is_write, is_read; | |
164 | string cname, mname; | |
165 | bp.copy(iter->op.cls.class_len, cname); | |
166 | bp.copy(iter->op.cls.method_len, mname); | |
167 | ||
168 | ClassHandler::ClassData *cls; | |
169 | int r = ClassHandler::get_instance().open_class(cname, &cls); | |
170 | if (r) { | |
171 | if (r == -ENOENT) | |
172 | r = -EOPNOTSUPP; | |
173 | else if (r != -EPERM) // propagate permission errors | |
174 | r = -EIO; | |
175 | return r; | |
176 | } | |
177 | int flags = cls->get_method_flags(mname); | |
178 | if (flags < 0) { | |
179 | if (flags == -ENOENT) | |
180 | r = -EOPNOTSUPP; | |
181 | else | |
182 | r = flags; | |
183 | return r; | |
184 | } | |
185 | is_read = flags & CLS_METHOD_RD; | |
186 | is_write = flags & CLS_METHOD_WR; | |
187 | bool is_promote = flags & CLS_METHOD_PROMOTE; | |
188 | ||
189 | if (is_read) | |
190 | set_class_read(); | |
191 | if (is_write) | |
192 | set_class_write(); | |
193 | if (is_promote) | |
194 | set_promote(); | |
195 | add_class(std::move(cname), std::move(mname), is_read, is_write, | |
f67539c2 | 196 | cls->allowed); |
9f95a23c TL |
197 | break; |
198 | } | |
199 | ||
200 | case CEPH_OSD_OP_WATCH: | |
201 | // force the read bit for watch since it is depends on previous | |
202 | // watch state (and may return early if the watch exists) or, in | |
203 | // the case of ping, is simply a read op. | |
204 | set_read(); | |
205 | // fall through | |
206 | case CEPH_OSD_OP_NOTIFY: | |
207 | case CEPH_OSD_OP_NOTIFY_ACK: | |
208 | { | |
209 | set_promote(); | |
210 | break; | |
211 | } | |
212 | ||
213 | case CEPH_OSD_OP_DELETE: | |
214 | // if we get a delete with FAILOK we can skip handle cache. without | |
215 | // FAILOK we still need to promote (or do something smarter) to | |
216 | // determine whether to return ENOENT or 0. | |
20effc67 | 217 | if (iter == ops.begin() && |
9f95a23c TL |
218 | iter->op.flags == CEPH_OSD_OP_FLAG_FAILOK) { |
219 | set_skip_handle_cache(); | |
220 | } | |
221 | // skip promotion when proxying a delete op | |
20effc67 | 222 | if (ops.size() == 1) { |
9f95a23c TL |
223 | set_skip_promote(); |
224 | } | |
225 | break; | |
226 | ||
227 | case CEPH_OSD_OP_CACHE_TRY_FLUSH: | |
228 | case CEPH_OSD_OP_CACHE_FLUSH: | |
229 | case CEPH_OSD_OP_CACHE_EVICT: | |
230 | // If try_flush/flush/evict is the only op, can skip handle cache. | |
20effc67 | 231 | if (ops.size() == 1) { |
9f95a23c TL |
232 | set_skip_handle_cache(); |
233 | } | |
234 | break; | |
235 | ||
236 | case CEPH_OSD_OP_READ: | |
237 | case CEPH_OSD_OP_SYNC_READ: | |
238 | case CEPH_OSD_OP_SPARSE_READ: | |
239 | case CEPH_OSD_OP_CHECKSUM: | |
240 | case CEPH_OSD_OP_WRITEFULL: | |
20effc67 | 241 | if (ops.size() == 1 && |
9f95a23c TL |
242 | (iter->op.flags & CEPH_OSD_OP_FLAG_FADVISE_NOCACHE || |
243 | iter->op.flags & CEPH_OSD_OP_FLAG_FADVISE_DONTNEED)) { | |
244 | set_skip_promote(); | |
245 | } | |
246 | break; | |
247 | ||
248 | // force promotion when pin an object in cache tier | |
249 | case CEPH_OSD_OP_CACHE_PIN: | |
250 | set_promote(); | |
251 | break; | |
252 | ||
253 | default: | |
254 | break; | |
255 | } | |
256 | } | |
257 | ||
258 | if (rmw_flags == 0) | |
259 | return -EINVAL; | |
260 | ||
261 | return 0; | |
262 | ||
263 | } | |
264 | ||
265 | ostream& operator<<(ostream& out, const OpInfo::ClassInfo& i) | |
266 | { | |
267 | out << "class " << i.class_name << " method " << i.method_name | |
f67539c2 | 268 | << " rd " << i.read << " wr " << i.write << " allowed " << i.allowed; |
9f95a23c TL |
269 | return out; |
270 | } |