]>
Commit | Line | Data |
---|---|---|
9f95a23c TL |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | ||
4 | #include "osd/osd_op_util.h" | |
5 | ||
6 | #include "osd/ClassHandler.h" | |
7 | #include "messages/MOSDOp.h" | |
8 | ||
9 | bool OpInfo::check_rmw(int flag) const { | |
10 | ceph_assert(rmw_flags != 0); | |
11 | return rmw_flags & flag; | |
12 | } | |
13 | bool OpInfo::may_read() const { | |
14 | return need_read_cap() || check_rmw(CEPH_OSD_RMW_FLAG_CLASS_READ); | |
15 | } | |
16 | bool OpInfo::may_write() const { | |
17 | return need_write_cap() || check_rmw(CEPH_OSD_RMW_FLAG_CLASS_WRITE); | |
18 | } | |
19 | bool OpInfo::may_cache() const { return check_rmw(CEPH_OSD_RMW_FLAG_CACHE); } | |
20 | bool OpInfo::rwordered_forced() const { | |
21 | return check_rmw(CEPH_OSD_RMW_FLAG_RWORDERED); | |
22 | } | |
23 | bool OpInfo::rwordered() const { | |
24 | return may_write() || may_cache() || rwordered_forced(); | |
25 | } | |
26 | ||
27 | bool OpInfo::includes_pg_op() const { | |
28 | return check_rmw(CEPH_OSD_RMW_FLAG_PGOP); | |
29 | } | |
30 | bool OpInfo::need_read_cap() const { | |
31 | return check_rmw(CEPH_OSD_RMW_FLAG_READ); | |
32 | } | |
33 | bool OpInfo::need_write_cap() const { | |
34 | return check_rmw(CEPH_OSD_RMW_FLAG_WRITE); | |
35 | } | |
36 | bool OpInfo::need_promote() const { | |
37 | return check_rmw(CEPH_OSD_RMW_FLAG_FORCE_PROMOTE); | |
38 | } | |
39 | bool OpInfo::need_skip_handle_cache() const { | |
40 | return check_rmw(CEPH_OSD_RMW_FLAG_SKIP_HANDLE_CACHE); | |
41 | } | |
42 | bool OpInfo::need_skip_promote() const { | |
43 | return check_rmw(CEPH_OSD_RMW_FLAG_SKIP_PROMOTE); | |
44 | } | |
45 | bool OpInfo::allows_returnvec() const { | |
46 | return check_rmw(CEPH_OSD_RMW_FLAG_RETURNVEC); | |
47 | } | |
48 | ||
49 | void OpInfo::set_rmw_flags(int flags) { | |
50 | rmw_flags |= flags; | |
51 | } | |
52 | ||
53 | void OpInfo::set_read() { set_rmw_flags(CEPH_OSD_RMW_FLAG_READ); } | |
54 | void OpInfo::set_write() { set_rmw_flags(CEPH_OSD_RMW_FLAG_WRITE); } | |
55 | void OpInfo::set_class_read() { set_rmw_flags(CEPH_OSD_RMW_FLAG_CLASS_READ); } | |
56 | void OpInfo::set_class_write() { set_rmw_flags(CEPH_OSD_RMW_FLAG_CLASS_WRITE); } | |
57 | void OpInfo::set_pg_op() { set_rmw_flags(CEPH_OSD_RMW_FLAG_PGOP); } | |
58 | void OpInfo::set_cache() { set_rmw_flags(CEPH_OSD_RMW_FLAG_CACHE); } | |
59 | void OpInfo::set_promote() { set_rmw_flags(CEPH_OSD_RMW_FLAG_FORCE_PROMOTE); } | |
60 | void OpInfo::set_skip_handle_cache() { set_rmw_flags(CEPH_OSD_RMW_FLAG_SKIP_HANDLE_CACHE); } | |
61 | void OpInfo::set_skip_promote() { set_rmw_flags(CEPH_OSD_RMW_FLAG_SKIP_PROMOTE); } | |
62 | void OpInfo::set_force_rwordered() { set_rmw_flags(CEPH_OSD_RMW_FLAG_RWORDERED); } | |
63 | void OpInfo::set_returnvec() { set_rmw_flags(CEPH_OSD_RMW_FLAG_RETURNVEC); } | |
64 | ||
65 | ||
66 | int OpInfo::set_from_op( | |
67 | const MOSDOp *m, | |
68 | const OSDMap &osdmap) | |
69 | { | |
70 | vector<OSDOp>::const_iterator iter; | |
71 | ||
72 | // client flags have no bearing on whether an op is a read, write, etc. | |
73 | clear(); | |
74 | ||
75 | if (m->has_flag(CEPH_OSD_FLAG_RWORDERED)) { | |
76 | set_force_rwordered(); | |
77 | } | |
78 | if (m->has_flag(CEPH_OSD_FLAG_RETURNVEC)) { | |
79 | set_returnvec(); | |
80 | } | |
81 | ||
82 | // set bits based on op codes, called methods. | |
83 | for (iter = m->ops.begin(); iter != m->ops.end(); ++iter) { | |
84 | if ((iter->op.op == CEPH_OSD_OP_WATCH && | |
85 | iter->op.watch.op == CEPH_OSD_WATCH_OP_PING)) { | |
86 | /* This a bit odd. PING isn't actually a write. It can't | |
87 | * result in an update to the object_info. PINGs also aren't | |
88 | * resent, so there's no reason to write out a log entry. | |
89 | * | |
90 | * However, we pipeline them behind writes, so let's force | |
91 | * the write_ordered flag. | |
92 | */ | |
93 | set_force_rwordered(); | |
94 | } else { | |
95 | if (ceph_osd_op_mode_modify(iter->op.op)) | |
96 | set_write(); | |
97 | } | |
98 | if (ceph_osd_op_mode_read(iter->op.op)) | |
99 | set_read(); | |
100 | ||
101 | // set READ flag if there are src_oids | |
102 | if (iter->soid.oid.name.length()) | |
103 | set_read(); | |
104 | ||
105 | // set PGOP flag if there are PG ops | |
106 | if (ceph_osd_op_type_pg(iter->op.op)) | |
107 | set_pg_op(); | |
108 | ||
109 | if (ceph_osd_op_mode_cache(iter->op.op)) | |
110 | set_cache(); | |
111 | ||
112 | // check for ec base pool | |
113 | int64_t poolid = m->get_pg().pool(); | |
114 | const pg_pool_t *pool = osdmap.get_pg_pool(poolid); | |
115 | if (pool && pool->is_tier()) { | |
116 | const pg_pool_t *base_pool = osdmap.get_pg_pool(pool->tier_of); | |
117 | if (base_pool && base_pool->require_rollback()) { | |
118 | if ((iter->op.op != CEPH_OSD_OP_READ) && | |
119 | (iter->op.op != CEPH_OSD_OP_CHECKSUM) && | |
120 | (iter->op.op != CEPH_OSD_OP_CMPEXT) && | |
121 | (iter->op.op != CEPH_OSD_OP_STAT) && | |
122 | (iter->op.op != CEPH_OSD_OP_ISDIRTY) && | |
123 | (iter->op.op != CEPH_OSD_OP_UNDIRTY) && | |
124 | (iter->op.op != CEPH_OSD_OP_GETXATTR) && | |
125 | (iter->op.op != CEPH_OSD_OP_GETXATTRS) && | |
126 | (iter->op.op != CEPH_OSD_OP_CMPXATTR) && | |
127 | (iter->op.op != CEPH_OSD_OP_ASSERT_VER) && | |
128 | (iter->op.op != CEPH_OSD_OP_LIST_WATCHERS) && | |
129 | (iter->op.op != CEPH_OSD_OP_LIST_SNAPS) && | |
130 | (iter->op.op != CEPH_OSD_OP_SETALLOCHINT) && | |
131 | (iter->op.op != CEPH_OSD_OP_WRITEFULL) && | |
132 | (iter->op.op != CEPH_OSD_OP_ROLLBACK) && | |
133 | (iter->op.op != CEPH_OSD_OP_CREATE) && | |
134 | (iter->op.op != CEPH_OSD_OP_DELETE) && | |
135 | (iter->op.op != CEPH_OSD_OP_SETXATTR) && | |
136 | (iter->op.op != CEPH_OSD_OP_RMXATTR) && | |
137 | (iter->op.op != CEPH_OSD_OP_STARTSYNC) && | |
138 | (iter->op.op != CEPH_OSD_OP_COPY_GET) && | |
139 | (iter->op.op != CEPH_OSD_OP_COPY_FROM) && | |
140 | (iter->op.op != CEPH_OSD_OP_COPY_FROM2)) { | |
141 | set_promote(); | |
142 | } | |
143 | } | |
144 | } | |
145 | ||
146 | switch (iter->op.op) { | |
147 | case CEPH_OSD_OP_CALL: | |
148 | { | |
149 | bufferlist::iterator bp = const_cast<bufferlist&>(iter->indata).begin(); | |
150 | int is_write, is_read; | |
151 | string cname, mname; | |
152 | bp.copy(iter->op.cls.class_len, cname); | |
153 | bp.copy(iter->op.cls.method_len, mname); | |
154 | ||
155 | ClassHandler::ClassData *cls; | |
156 | int r = ClassHandler::get_instance().open_class(cname, &cls); | |
157 | if (r) { | |
158 | if (r == -ENOENT) | |
159 | r = -EOPNOTSUPP; | |
160 | else if (r != -EPERM) // propagate permission errors | |
161 | r = -EIO; | |
162 | return r; | |
163 | } | |
164 | int flags = cls->get_method_flags(mname); | |
165 | if (flags < 0) { | |
166 | if (flags == -ENOENT) | |
167 | r = -EOPNOTSUPP; | |
168 | else | |
169 | r = flags; | |
170 | return r; | |
171 | } | |
172 | is_read = flags & CLS_METHOD_RD; | |
173 | is_write = flags & CLS_METHOD_WR; | |
174 | bool is_promote = flags & CLS_METHOD_PROMOTE; | |
175 | ||
176 | if (is_read) | |
177 | set_class_read(); | |
178 | if (is_write) | |
179 | set_class_write(); | |
180 | if (is_promote) | |
181 | set_promote(); | |
182 | add_class(std::move(cname), std::move(mname), is_read, is_write, | |
183 | cls->whitelisted); | |
184 | break; | |
185 | } | |
186 | ||
187 | case CEPH_OSD_OP_WATCH: | |
188 | // force the read bit for watch since it is depends on previous | |
189 | // watch state (and may return early if the watch exists) or, in | |
190 | // the case of ping, is simply a read op. | |
191 | set_read(); | |
192 | // fall through | |
193 | case CEPH_OSD_OP_NOTIFY: | |
194 | case CEPH_OSD_OP_NOTIFY_ACK: | |
195 | { | |
196 | set_promote(); | |
197 | break; | |
198 | } | |
199 | ||
200 | case CEPH_OSD_OP_DELETE: | |
201 | // if we get a delete with FAILOK we can skip handle cache. without | |
202 | // FAILOK we still need to promote (or do something smarter) to | |
203 | // determine whether to return ENOENT or 0. | |
204 | if (iter == m->ops.begin() && | |
205 | iter->op.flags == CEPH_OSD_OP_FLAG_FAILOK) { | |
206 | set_skip_handle_cache(); | |
207 | } | |
208 | // skip promotion when proxying a delete op | |
209 | if (m->ops.size() == 1) { | |
210 | set_skip_promote(); | |
211 | } | |
212 | break; | |
213 | ||
214 | case CEPH_OSD_OP_CACHE_TRY_FLUSH: | |
215 | case CEPH_OSD_OP_CACHE_FLUSH: | |
216 | case CEPH_OSD_OP_CACHE_EVICT: | |
217 | // If try_flush/flush/evict is the only op, can skip handle cache. | |
218 | if (m->ops.size() == 1) { | |
219 | set_skip_handle_cache(); | |
220 | } | |
221 | break; | |
222 | ||
223 | case CEPH_OSD_OP_READ: | |
224 | case CEPH_OSD_OP_SYNC_READ: | |
225 | case CEPH_OSD_OP_SPARSE_READ: | |
226 | case CEPH_OSD_OP_CHECKSUM: | |
227 | case CEPH_OSD_OP_WRITEFULL: | |
228 | if (m->ops.size() == 1 && | |
229 | (iter->op.flags & CEPH_OSD_OP_FLAG_FADVISE_NOCACHE || | |
230 | iter->op.flags & CEPH_OSD_OP_FLAG_FADVISE_DONTNEED)) { | |
231 | set_skip_promote(); | |
232 | } | |
233 | break; | |
234 | ||
235 | // force promotion when pin an object in cache tier | |
236 | case CEPH_OSD_OP_CACHE_PIN: | |
237 | set_promote(); | |
238 | break; | |
239 | ||
240 | default: | |
241 | break; | |
242 | } | |
243 | } | |
244 | ||
245 | if (rmw_flags == 0) | |
246 | return -EINVAL; | |
247 | ||
248 | return 0; | |
249 | ||
250 | } | |
251 | ||
252 | ostream& operator<<(ostream& out, const OpInfo::ClassInfo& i) | |
253 | { | |
254 | out << "class " << i.class_name << " method " << i.method_name | |
255 | << " rd " << i.read << " wr " << i.write << " wl " << i.whitelisted; | |
256 | return out; | |
257 | } |