]> git.proxmox.com Git - ceph.git/blob - ceph/src/osd/osd_op_util.cc
import quincy beta 17.1.0
[ceph.git] / ceph / src / osd / osd_op_util.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3
4 #include "osd/osd_op_util.h"
5
6 #include "osd/ClassHandler.h"
7 #include "messages/MOSDOp.h"
8
9 using std::ostream;
10 using std::string;
11 using std::vector;
12
13 using ceph::bufferlist;
14
15 bool OpInfo::check_rmw(int flag) const {
16 ceph_assert(rmw_flags != 0);
17 return rmw_flags & flag;
18 }
19 bool OpInfo::may_read() const {
20 return need_read_cap() || check_rmw(CEPH_OSD_RMW_FLAG_CLASS_READ);
21 }
22 bool OpInfo::may_write() const {
23 return need_write_cap() || check_rmw(CEPH_OSD_RMW_FLAG_CLASS_WRITE);
24 }
25 bool OpInfo::may_cache() const { return check_rmw(CEPH_OSD_RMW_FLAG_CACHE); }
26 bool OpInfo::rwordered_forced() const {
27 return check_rmw(CEPH_OSD_RMW_FLAG_RWORDERED);
28 }
29 bool OpInfo::rwordered() const {
30 return may_write() || may_cache() || rwordered_forced();
31 }
32
33 bool OpInfo::includes_pg_op() const {
34 return check_rmw(CEPH_OSD_RMW_FLAG_PGOP);
35 }
36 bool OpInfo::need_read_cap() const {
37 return check_rmw(CEPH_OSD_RMW_FLAG_READ);
38 }
39 bool OpInfo::need_write_cap() const {
40 return check_rmw(CEPH_OSD_RMW_FLAG_WRITE);
41 }
42 bool OpInfo::need_promote() const {
43 return check_rmw(CEPH_OSD_RMW_FLAG_FORCE_PROMOTE);
44 }
45 bool OpInfo::need_skip_handle_cache() const {
46 return check_rmw(CEPH_OSD_RMW_FLAG_SKIP_HANDLE_CACHE);
47 }
48 bool OpInfo::need_skip_promote() const {
49 return check_rmw(CEPH_OSD_RMW_FLAG_SKIP_PROMOTE);
50 }
51 bool OpInfo::allows_returnvec() const {
52 return check_rmw(CEPH_OSD_RMW_FLAG_RETURNVEC);
53 }
54
55 void OpInfo::set_rmw_flags(int flags) {
56 rmw_flags |= flags;
57 }
58
59 void OpInfo::set_read() { set_rmw_flags(CEPH_OSD_RMW_FLAG_READ); }
60 void OpInfo::set_write() { set_rmw_flags(CEPH_OSD_RMW_FLAG_WRITE); }
61 void OpInfo::set_class_read() { set_rmw_flags(CEPH_OSD_RMW_FLAG_CLASS_READ); }
62 void OpInfo::set_class_write() { set_rmw_flags(CEPH_OSD_RMW_FLAG_CLASS_WRITE); }
63 void OpInfo::set_pg_op() { set_rmw_flags(CEPH_OSD_RMW_FLAG_PGOP); }
64 void OpInfo::set_cache() { set_rmw_flags(CEPH_OSD_RMW_FLAG_CACHE); }
65 void OpInfo::set_promote() { set_rmw_flags(CEPH_OSD_RMW_FLAG_FORCE_PROMOTE); }
66 void OpInfo::set_skip_handle_cache() { set_rmw_flags(CEPH_OSD_RMW_FLAG_SKIP_HANDLE_CACHE); }
67 void OpInfo::set_skip_promote() { set_rmw_flags(CEPH_OSD_RMW_FLAG_SKIP_PROMOTE); }
68 void OpInfo::set_force_rwordered() { set_rmw_flags(CEPH_OSD_RMW_FLAG_RWORDERED); }
69 void OpInfo::set_returnvec() { set_rmw_flags(CEPH_OSD_RMW_FLAG_RETURNVEC); }
70
71
72 int OpInfo::set_from_op(
73 const MOSDOp *m,
74 const OSDMap &osdmap)
75 {
76 // client flags have no bearing on whether an op is a read, write, etc.
77 clear();
78
79 if (m->has_flag(CEPH_OSD_FLAG_RWORDERED)) {
80 set_force_rwordered();
81 }
82 if (m->has_flag(CEPH_OSD_FLAG_RETURNVEC)) {
83 set_returnvec();
84 }
85 return set_from_op(m->ops, m->get_pg(), osdmap);
86 }
87
88 int OpInfo::set_from_op(
89 const std::vector<OSDOp>& ops,
90 const pg_t& pg,
91 const OSDMap &osdmap)
92 {
93 vector<OSDOp>::const_iterator iter;
94
95 // set bits based on op codes, called methods.
96 for (iter = ops.begin(); iter != ops.end(); ++iter) {
97 if ((iter->op.op == CEPH_OSD_OP_WATCH &&
98 iter->op.watch.op == CEPH_OSD_WATCH_OP_PING)) {
99 /* This a bit odd. PING isn't actually a write. It can't
100 * result in an update to the object_info. PINGs also aren't
101 * resent, so there's no reason to write out a log entry.
102 *
103 * However, we pipeline them behind writes, so let's force
104 * the write_ordered flag.
105 */
106 set_force_rwordered();
107 } else {
108 if (ceph_osd_op_mode_modify(iter->op.op))
109 set_write();
110 }
111 if (ceph_osd_op_mode_read(iter->op.op))
112 set_read();
113
114 // set READ flag if there are src_oids
115 if (iter->soid.oid.name.length())
116 set_read();
117
118 // set PGOP flag if there are PG ops
119 if (ceph_osd_op_type_pg(iter->op.op))
120 set_pg_op();
121
122 if (ceph_osd_op_mode_cache(iter->op.op))
123 set_cache();
124
125 // check for ec base pool
126 int64_t poolid = pg.pool();
127 const pg_pool_t *pool = osdmap.get_pg_pool(poolid);
128 if (pool && pool->is_tier()) {
129 const pg_pool_t *base_pool = osdmap.get_pg_pool(pool->tier_of);
130 if (base_pool && base_pool->require_rollback()) {
131 if ((iter->op.op != CEPH_OSD_OP_READ) &&
132 (iter->op.op != CEPH_OSD_OP_CHECKSUM) &&
133 (iter->op.op != CEPH_OSD_OP_CMPEXT) &&
134 (iter->op.op != CEPH_OSD_OP_STAT) &&
135 (iter->op.op != CEPH_OSD_OP_ISDIRTY) &&
136 (iter->op.op != CEPH_OSD_OP_UNDIRTY) &&
137 (iter->op.op != CEPH_OSD_OP_GETXATTR) &&
138 (iter->op.op != CEPH_OSD_OP_GETXATTRS) &&
139 (iter->op.op != CEPH_OSD_OP_CMPXATTR) &&
140 (iter->op.op != CEPH_OSD_OP_ASSERT_VER) &&
141 (iter->op.op != CEPH_OSD_OP_LIST_WATCHERS) &&
142 (iter->op.op != CEPH_OSD_OP_LIST_SNAPS) &&
143 (iter->op.op != CEPH_OSD_OP_SETALLOCHINT) &&
144 (iter->op.op != CEPH_OSD_OP_WRITEFULL) &&
145 (iter->op.op != CEPH_OSD_OP_ROLLBACK) &&
146 (iter->op.op != CEPH_OSD_OP_CREATE) &&
147 (iter->op.op != CEPH_OSD_OP_DELETE) &&
148 (iter->op.op != CEPH_OSD_OP_SETXATTR) &&
149 (iter->op.op != CEPH_OSD_OP_RMXATTR) &&
150 (iter->op.op != CEPH_OSD_OP_STARTSYNC) &&
151 (iter->op.op != CEPH_OSD_OP_COPY_GET) &&
152 (iter->op.op != CEPH_OSD_OP_COPY_FROM) &&
153 (iter->op.op != CEPH_OSD_OP_COPY_FROM2)) {
154 set_promote();
155 }
156 }
157 }
158
159 switch (iter->op.op) {
160 case CEPH_OSD_OP_CALL:
161 {
162 bufferlist::iterator bp = const_cast<bufferlist&>(iter->indata).begin();
163 int is_write, is_read;
164 string cname, mname;
165 bp.copy(iter->op.cls.class_len, cname);
166 bp.copy(iter->op.cls.method_len, mname);
167
168 ClassHandler::ClassData *cls;
169 int r = ClassHandler::get_instance().open_class(cname, &cls);
170 if (r) {
171 if (r == -ENOENT)
172 r = -EOPNOTSUPP;
173 else if (r != -EPERM) // propagate permission errors
174 r = -EIO;
175 return r;
176 }
177 int flags = cls->get_method_flags(mname);
178 if (flags < 0) {
179 if (flags == -ENOENT)
180 r = -EOPNOTSUPP;
181 else
182 r = flags;
183 return r;
184 }
185 is_read = flags & CLS_METHOD_RD;
186 is_write = flags & CLS_METHOD_WR;
187 bool is_promote = flags & CLS_METHOD_PROMOTE;
188
189 if (is_read)
190 set_class_read();
191 if (is_write)
192 set_class_write();
193 if (is_promote)
194 set_promote();
195 add_class(std::move(cname), std::move(mname), is_read, is_write,
196 cls->allowed);
197 break;
198 }
199
200 case CEPH_OSD_OP_WATCH:
201 // force the read bit for watch since it is depends on previous
202 // watch state (and may return early if the watch exists) or, in
203 // the case of ping, is simply a read op.
204 set_read();
205 // fall through
206 case CEPH_OSD_OP_NOTIFY:
207 case CEPH_OSD_OP_NOTIFY_ACK:
208 {
209 set_promote();
210 break;
211 }
212
213 case CEPH_OSD_OP_DELETE:
214 // if we get a delete with FAILOK we can skip handle cache. without
215 // FAILOK we still need to promote (or do something smarter) to
216 // determine whether to return ENOENT or 0.
217 if (iter == ops.begin() &&
218 iter->op.flags == CEPH_OSD_OP_FLAG_FAILOK) {
219 set_skip_handle_cache();
220 }
221 // skip promotion when proxying a delete op
222 if (ops.size() == 1) {
223 set_skip_promote();
224 }
225 break;
226
227 case CEPH_OSD_OP_CACHE_TRY_FLUSH:
228 case CEPH_OSD_OP_CACHE_FLUSH:
229 case CEPH_OSD_OP_CACHE_EVICT:
230 // If try_flush/flush/evict is the only op, can skip handle cache.
231 if (ops.size() == 1) {
232 set_skip_handle_cache();
233 }
234 break;
235
236 case CEPH_OSD_OP_READ:
237 case CEPH_OSD_OP_SYNC_READ:
238 case CEPH_OSD_OP_SPARSE_READ:
239 case CEPH_OSD_OP_CHECKSUM:
240 case CEPH_OSD_OP_WRITEFULL:
241 if (ops.size() == 1 &&
242 (iter->op.flags & CEPH_OSD_OP_FLAG_FADVISE_NOCACHE ||
243 iter->op.flags & CEPH_OSD_OP_FLAG_FADVISE_DONTNEED)) {
244 set_skip_promote();
245 }
246 break;
247
248 // force promotion when pin an object in cache tier
249 case CEPH_OSD_OP_CACHE_PIN:
250 set_promote();
251 break;
252
253 default:
254 break;
255 }
256 }
257
258 if (rmw_flags == 0)
259 return -EINVAL;
260
261 return 0;
262
263 }
264
265 ostream& operator<<(ostream& out, const OpInfo::ClassInfo& i)
266 {
267 out << "class " << i.class_name << " method " << i.method_name
268 << " rd " << i.read << " wr " << i.write << " allowed " << i.allowed;
269 return out;
270 }