]>
Commit | Line | Data |
---|---|---|
9f95a23c TL |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | ||
4 | #include "osd/osd_op_util.h" | |
5 | ||
6 | #include "osd/ClassHandler.h" | |
7 | #include "messages/MOSDOp.h" | |
8 | ||
f67539c2 TL |
9 | using std::ostream; |
10 | using std::string; | |
11 | using std::vector; | |
12 | ||
13 | using ceph::bufferlist; | |
14 | ||
9f95a23c TL |
15 | bool OpInfo::check_rmw(int flag) const { |
16 | ceph_assert(rmw_flags != 0); | |
17 | return rmw_flags & flag; | |
18 | } | |
19 | bool OpInfo::may_read() const { | |
20 | return need_read_cap() || check_rmw(CEPH_OSD_RMW_FLAG_CLASS_READ); | |
21 | } | |
22 | bool OpInfo::may_write() const { | |
23 | return need_write_cap() || check_rmw(CEPH_OSD_RMW_FLAG_CLASS_WRITE); | |
24 | } | |
25 | bool OpInfo::may_cache() const { return check_rmw(CEPH_OSD_RMW_FLAG_CACHE); } | |
26 | bool OpInfo::rwordered_forced() const { | |
27 | return check_rmw(CEPH_OSD_RMW_FLAG_RWORDERED); | |
28 | } | |
29 | bool OpInfo::rwordered() const { | |
30 | return may_write() || may_cache() || rwordered_forced(); | |
31 | } | |
32 | ||
33 | bool OpInfo::includes_pg_op() const { | |
34 | return check_rmw(CEPH_OSD_RMW_FLAG_PGOP); | |
35 | } | |
36 | bool OpInfo::need_read_cap() const { | |
37 | return check_rmw(CEPH_OSD_RMW_FLAG_READ); | |
38 | } | |
39 | bool OpInfo::need_write_cap() const { | |
40 | return check_rmw(CEPH_OSD_RMW_FLAG_WRITE); | |
41 | } | |
42 | bool OpInfo::need_promote() const { | |
43 | return check_rmw(CEPH_OSD_RMW_FLAG_FORCE_PROMOTE); | |
44 | } | |
45 | bool OpInfo::need_skip_handle_cache() const { | |
46 | return check_rmw(CEPH_OSD_RMW_FLAG_SKIP_HANDLE_CACHE); | |
47 | } | |
48 | bool OpInfo::need_skip_promote() const { | |
49 | return check_rmw(CEPH_OSD_RMW_FLAG_SKIP_PROMOTE); | |
50 | } | |
51 | bool OpInfo::allows_returnvec() const { | |
52 | return check_rmw(CEPH_OSD_RMW_FLAG_RETURNVEC); | |
53 | } | |
54 | ||
55 | void OpInfo::set_rmw_flags(int flags) { | |
56 | rmw_flags |= flags; | |
57 | } | |
58 | ||
59 | void OpInfo::set_read() { set_rmw_flags(CEPH_OSD_RMW_FLAG_READ); } | |
60 | void OpInfo::set_write() { set_rmw_flags(CEPH_OSD_RMW_FLAG_WRITE); } | |
61 | void OpInfo::set_class_read() { set_rmw_flags(CEPH_OSD_RMW_FLAG_CLASS_READ); } | |
62 | void OpInfo::set_class_write() { set_rmw_flags(CEPH_OSD_RMW_FLAG_CLASS_WRITE); } | |
63 | void OpInfo::set_pg_op() { set_rmw_flags(CEPH_OSD_RMW_FLAG_PGOP); } | |
64 | void OpInfo::set_cache() { set_rmw_flags(CEPH_OSD_RMW_FLAG_CACHE); } | |
65 | void OpInfo::set_promote() { set_rmw_flags(CEPH_OSD_RMW_FLAG_FORCE_PROMOTE); } | |
66 | void OpInfo::set_skip_handle_cache() { set_rmw_flags(CEPH_OSD_RMW_FLAG_SKIP_HANDLE_CACHE); } | |
67 | void OpInfo::set_skip_promote() { set_rmw_flags(CEPH_OSD_RMW_FLAG_SKIP_PROMOTE); } | |
68 | void OpInfo::set_force_rwordered() { set_rmw_flags(CEPH_OSD_RMW_FLAG_RWORDERED); } | |
69 | void OpInfo::set_returnvec() { set_rmw_flags(CEPH_OSD_RMW_FLAG_RETURNVEC); } | |
70 | ||
71 | ||
72 | int OpInfo::set_from_op( | |
73 | const MOSDOp *m, | |
74 | const OSDMap &osdmap) | |
75 | { | |
76 | vector<OSDOp>::const_iterator iter; | |
77 | ||
78 | // client flags have no bearing on whether an op is a read, write, etc. | |
79 | clear(); | |
80 | ||
81 | if (m->has_flag(CEPH_OSD_FLAG_RWORDERED)) { | |
82 | set_force_rwordered(); | |
83 | } | |
84 | if (m->has_flag(CEPH_OSD_FLAG_RETURNVEC)) { | |
85 | set_returnvec(); | |
86 | } | |
87 | ||
88 | // set bits based on op codes, called methods. | |
89 | for (iter = m->ops.begin(); iter != m->ops.end(); ++iter) { | |
90 | if ((iter->op.op == CEPH_OSD_OP_WATCH && | |
91 | iter->op.watch.op == CEPH_OSD_WATCH_OP_PING)) { | |
92 | /* This a bit odd. PING isn't actually a write. It can't | |
93 | * result in an update to the object_info. PINGs also aren't | |
94 | * resent, so there's no reason to write out a log entry. | |
95 | * | |
96 | * However, we pipeline them behind writes, so let's force | |
97 | * the write_ordered flag. | |
98 | */ | |
99 | set_force_rwordered(); | |
100 | } else { | |
101 | if (ceph_osd_op_mode_modify(iter->op.op)) | |
102 | set_write(); | |
103 | } | |
104 | if (ceph_osd_op_mode_read(iter->op.op)) | |
105 | set_read(); | |
106 | ||
107 | // set READ flag if there are src_oids | |
108 | if (iter->soid.oid.name.length()) | |
109 | set_read(); | |
110 | ||
111 | // set PGOP flag if there are PG ops | |
112 | if (ceph_osd_op_type_pg(iter->op.op)) | |
113 | set_pg_op(); | |
114 | ||
115 | if (ceph_osd_op_mode_cache(iter->op.op)) | |
116 | set_cache(); | |
117 | ||
118 | // check for ec base pool | |
119 | int64_t poolid = m->get_pg().pool(); | |
120 | const pg_pool_t *pool = osdmap.get_pg_pool(poolid); | |
121 | if (pool && pool->is_tier()) { | |
122 | const pg_pool_t *base_pool = osdmap.get_pg_pool(pool->tier_of); | |
123 | if (base_pool && base_pool->require_rollback()) { | |
124 | if ((iter->op.op != CEPH_OSD_OP_READ) && | |
125 | (iter->op.op != CEPH_OSD_OP_CHECKSUM) && | |
126 | (iter->op.op != CEPH_OSD_OP_CMPEXT) && | |
127 | (iter->op.op != CEPH_OSD_OP_STAT) && | |
128 | (iter->op.op != CEPH_OSD_OP_ISDIRTY) && | |
129 | (iter->op.op != CEPH_OSD_OP_UNDIRTY) && | |
130 | (iter->op.op != CEPH_OSD_OP_GETXATTR) && | |
131 | (iter->op.op != CEPH_OSD_OP_GETXATTRS) && | |
132 | (iter->op.op != CEPH_OSD_OP_CMPXATTR) && | |
133 | (iter->op.op != CEPH_OSD_OP_ASSERT_VER) && | |
134 | (iter->op.op != CEPH_OSD_OP_LIST_WATCHERS) && | |
135 | (iter->op.op != CEPH_OSD_OP_LIST_SNAPS) && | |
136 | (iter->op.op != CEPH_OSD_OP_SETALLOCHINT) && | |
137 | (iter->op.op != CEPH_OSD_OP_WRITEFULL) && | |
138 | (iter->op.op != CEPH_OSD_OP_ROLLBACK) && | |
139 | (iter->op.op != CEPH_OSD_OP_CREATE) && | |
140 | (iter->op.op != CEPH_OSD_OP_DELETE) && | |
141 | (iter->op.op != CEPH_OSD_OP_SETXATTR) && | |
142 | (iter->op.op != CEPH_OSD_OP_RMXATTR) && | |
143 | (iter->op.op != CEPH_OSD_OP_STARTSYNC) && | |
144 | (iter->op.op != CEPH_OSD_OP_COPY_GET) && | |
145 | (iter->op.op != CEPH_OSD_OP_COPY_FROM) && | |
146 | (iter->op.op != CEPH_OSD_OP_COPY_FROM2)) { | |
147 | set_promote(); | |
148 | } | |
149 | } | |
150 | } | |
151 | ||
152 | switch (iter->op.op) { | |
153 | case CEPH_OSD_OP_CALL: | |
154 | { | |
155 | bufferlist::iterator bp = const_cast<bufferlist&>(iter->indata).begin(); | |
156 | int is_write, is_read; | |
157 | string cname, mname; | |
158 | bp.copy(iter->op.cls.class_len, cname); | |
159 | bp.copy(iter->op.cls.method_len, mname); | |
160 | ||
161 | ClassHandler::ClassData *cls; | |
162 | int r = ClassHandler::get_instance().open_class(cname, &cls); | |
163 | if (r) { | |
164 | if (r == -ENOENT) | |
165 | r = -EOPNOTSUPP; | |
166 | else if (r != -EPERM) // propagate permission errors | |
167 | r = -EIO; | |
168 | return r; | |
169 | } | |
170 | int flags = cls->get_method_flags(mname); | |
171 | if (flags < 0) { | |
172 | if (flags == -ENOENT) | |
173 | r = -EOPNOTSUPP; | |
174 | else | |
175 | r = flags; | |
176 | return r; | |
177 | } | |
178 | is_read = flags & CLS_METHOD_RD; | |
179 | is_write = flags & CLS_METHOD_WR; | |
180 | bool is_promote = flags & CLS_METHOD_PROMOTE; | |
181 | ||
182 | if (is_read) | |
183 | set_class_read(); | |
184 | if (is_write) | |
185 | set_class_write(); | |
186 | if (is_promote) | |
187 | set_promote(); | |
188 | add_class(std::move(cname), std::move(mname), is_read, is_write, | |
f67539c2 | 189 | cls->allowed); |
9f95a23c TL |
190 | break; |
191 | } | |
192 | ||
193 | case CEPH_OSD_OP_WATCH: | |
194 | // force the read bit for watch since it is depends on previous | |
195 | // watch state (and may return early if the watch exists) or, in | |
196 | // the case of ping, is simply a read op. | |
197 | set_read(); | |
198 | // fall through | |
199 | case CEPH_OSD_OP_NOTIFY: | |
200 | case CEPH_OSD_OP_NOTIFY_ACK: | |
201 | { | |
202 | set_promote(); | |
203 | break; | |
204 | } | |
205 | ||
206 | case CEPH_OSD_OP_DELETE: | |
207 | // if we get a delete with FAILOK we can skip handle cache. without | |
208 | // FAILOK we still need to promote (or do something smarter) to | |
209 | // determine whether to return ENOENT or 0. | |
210 | if (iter == m->ops.begin() && | |
211 | iter->op.flags == CEPH_OSD_OP_FLAG_FAILOK) { | |
212 | set_skip_handle_cache(); | |
213 | } | |
214 | // skip promotion when proxying a delete op | |
215 | if (m->ops.size() == 1) { | |
216 | set_skip_promote(); | |
217 | } | |
218 | break; | |
219 | ||
220 | case CEPH_OSD_OP_CACHE_TRY_FLUSH: | |
221 | case CEPH_OSD_OP_CACHE_FLUSH: | |
222 | case CEPH_OSD_OP_CACHE_EVICT: | |
223 | // If try_flush/flush/evict is the only op, can skip handle cache. | |
224 | if (m->ops.size() == 1) { | |
225 | set_skip_handle_cache(); | |
226 | } | |
227 | break; | |
228 | ||
229 | case CEPH_OSD_OP_READ: | |
230 | case CEPH_OSD_OP_SYNC_READ: | |
231 | case CEPH_OSD_OP_SPARSE_READ: | |
232 | case CEPH_OSD_OP_CHECKSUM: | |
233 | case CEPH_OSD_OP_WRITEFULL: | |
234 | if (m->ops.size() == 1 && | |
235 | (iter->op.flags & CEPH_OSD_OP_FLAG_FADVISE_NOCACHE || | |
236 | iter->op.flags & CEPH_OSD_OP_FLAG_FADVISE_DONTNEED)) { | |
237 | set_skip_promote(); | |
238 | } | |
239 | break; | |
240 | ||
241 | // force promotion when pin an object in cache tier | |
242 | case CEPH_OSD_OP_CACHE_PIN: | |
243 | set_promote(); | |
244 | break; | |
245 | ||
246 | default: | |
247 | break; | |
248 | } | |
249 | } | |
250 | ||
251 | if (rmw_flags == 0) | |
252 | return -EINVAL; | |
253 | ||
254 | return 0; | |
255 | ||
256 | } | |
257 | ||
258 | ostream& operator<<(ostream& out, const OpInfo::ClassInfo& i) | |
259 | { | |
260 | out << "class " << i.class_name << " method " << i.method_name | |
f67539c2 | 261 | << " rd " << i.read << " wr " << i.write << " allowed " << i.allowed; |
9f95a23c TL |
262 | return out; |
263 | } |