]> git.proxmox.com Git - ceph.git/blob - ceph/src/osd/osd_op_util.cc
import 15.2.0 Octopus source
[ceph.git] / ceph / src / osd / osd_op_util.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3
4 #include "osd/osd_op_util.h"
5
6 #include "osd/ClassHandler.h"
7 #include "messages/MOSDOp.h"
8
9 bool OpInfo::check_rmw(int flag) const {
10 ceph_assert(rmw_flags != 0);
11 return rmw_flags & flag;
12 }
13 bool OpInfo::may_read() const {
14 return need_read_cap() || check_rmw(CEPH_OSD_RMW_FLAG_CLASS_READ);
15 }
16 bool OpInfo::may_write() const {
17 return need_write_cap() || check_rmw(CEPH_OSD_RMW_FLAG_CLASS_WRITE);
18 }
19 bool OpInfo::may_cache() const { return check_rmw(CEPH_OSD_RMW_FLAG_CACHE); }
20 bool OpInfo::rwordered_forced() const {
21 return check_rmw(CEPH_OSD_RMW_FLAG_RWORDERED);
22 }
23 bool OpInfo::rwordered() const {
24 return may_write() || may_cache() || rwordered_forced();
25 }
26
27 bool OpInfo::includes_pg_op() const {
28 return check_rmw(CEPH_OSD_RMW_FLAG_PGOP);
29 }
30 bool OpInfo::need_read_cap() const {
31 return check_rmw(CEPH_OSD_RMW_FLAG_READ);
32 }
33 bool OpInfo::need_write_cap() const {
34 return check_rmw(CEPH_OSD_RMW_FLAG_WRITE);
35 }
36 bool OpInfo::need_promote() const {
37 return check_rmw(CEPH_OSD_RMW_FLAG_FORCE_PROMOTE);
38 }
39 bool OpInfo::need_skip_handle_cache() const {
40 return check_rmw(CEPH_OSD_RMW_FLAG_SKIP_HANDLE_CACHE);
41 }
42 bool OpInfo::need_skip_promote() const {
43 return check_rmw(CEPH_OSD_RMW_FLAG_SKIP_PROMOTE);
44 }
45 bool OpInfo::allows_returnvec() const {
46 return check_rmw(CEPH_OSD_RMW_FLAG_RETURNVEC);
47 }
48
49 void OpInfo::set_rmw_flags(int flags) {
50 rmw_flags |= flags;
51 }
52
53 void OpInfo::set_read() { set_rmw_flags(CEPH_OSD_RMW_FLAG_READ); }
54 void OpInfo::set_write() { set_rmw_flags(CEPH_OSD_RMW_FLAG_WRITE); }
55 void OpInfo::set_class_read() { set_rmw_flags(CEPH_OSD_RMW_FLAG_CLASS_READ); }
56 void OpInfo::set_class_write() { set_rmw_flags(CEPH_OSD_RMW_FLAG_CLASS_WRITE); }
57 void OpInfo::set_pg_op() { set_rmw_flags(CEPH_OSD_RMW_FLAG_PGOP); }
58 void OpInfo::set_cache() { set_rmw_flags(CEPH_OSD_RMW_FLAG_CACHE); }
59 void OpInfo::set_promote() { set_rmw_flags(CEPH_OSD_RMW_FLAG_FORCE_PROMOTE); }
60 void OpInfo::set_skip_handle_cache() { set_rmw_flags(CEPH_OSD_RMW_FLAG_SKIP_HANDLE_CACHE); }
61 void OpInfo::set_skip_promote() { set_rmw_flags(CEPH_OSD_RMW_FLAG_SKIP_PROMOTE); }
62 void OpInfo::set_force_rwordered() { set_rmw_flags(CEPH_OSD_RMW_FLAG_RWORDERED); }
63 void OpInfo::set_returnvec() { set_rmw_flags(CEPH_OSD_RMW_FLAG_RETURNVEC); }
64
65
66 int OpInfo::set_from_op(
67 const MOSDOp *m,
68 const OSDMap &osdmap)
69 {
70 vector<OSDOp>::const_iterator iter;
71
72 // client flags have no bearing on whether an op is a read, write, etc.
73 clear();
74
75 if (m->has_flag(CEPH_OSD_FLAG_RWORDERED)) {
76 set_force_rwordered();
77 }
78 if (m->has_flag(CEPH_OSD_FLAG_RETURNVEC)) {
79 set_returnvec();
80 }
81
82 // set bits based on op codes, called methods.
83 for (iter = m->ops.begin(); iter != m->ops.end(); ++iter) {
84 if ((iter->op.op == CEPH_OSD_OP_WATCH &&
85 iter->op.watch.op == CEPH_OSD_WATCH_OP_PING)) {
86 /* This a bit odd. PING isn't actually a write. It can't
87 * result in an update to the object_info. PINGs also aren't
88 * resent, so there's no reason to write out a log entry.
89 *
90 * However, we pipeline them behind writes, so let's force
91 * the write_ordered flag.
92 */
93 set_force_rwordered();
94 } else {
95 if (ceph_osd_op_mode_modify(iter->op.op))
96 set_write();
97 }
98 if (ceph_osd_op_mode_read(iter->op.op))
99 set_read();
100
101 // set READ flag if there are src_oids
102 if (iter->soid.oid.name.length())
103 set_read();
104
105 // set PGOP flag if there are PG ops
106 if (ceph_osd_op_type_pg(iter->op.op))
107 set_pg_op();
108
109 if (ceph_osd_op_mode_cache(iter->op.op))
110 set_cache();
111
112 // check for ec base pool
113 int64_t poolid = m->get_pg().pool();
114 const pg_pool_t *pool = osdmap.get_pg_pool(poolid);
115 if (pool && pool->is_tier()) {
116 const pg_pool_t *base_pool = osdmap.get_pg_pool(pool->tier_of);
117 if (base_pool && base_pool->require_rollback()) {
118 if ((iter->op.op != CEPH_OSD_OP_READ) &&
119 (iter->op.op != CEPH_OSD_OP_CHECKSUM) &&
120 (iter->op.op != CEPH_OSD_OP_CMPEXT) &&
121 (iter->op.op != CEPH_OSD_OP_STAT) &&
122 (iter->op.op != CEPH_OSD_OP_ISDIRTY) &&
123 (iter->op.op != CEPH_OSD_OP_UNDIRTY) &&
124 (iter->op.op != CEPH_OSD_OP_GETXATTR) &&
125 (iter->op.op != CEPH_OSD_OP_GETXATTRS) &&
126 (iter->op.op != CEPH_OSD_OP_CMPXATTR) &&
127 (iter->op.op != CEPH_OSD_OP_ASSERT_VER) &&
128 (iter->op.op != CEPH_OSD_OP_LIST_WATCHERS) &&
129 (iter->op.op != CEPH_OSD_OP_LIST_SNAPS) &&
130 (iter->op.op != CEPH_OSD_OP_SETALLOCHINT) &&
131 (iter->op.op != CEPH_OSD_OP_WRITEFULL) &&
132 (iter->op.op != CEPH_OSD_OP_ROLLBACK) &&
133 (iter->op.op != CEPH_OSD_OP_CREATE) &&
134 (iter->op.op != CEPH_OSD_OP_DELETE) &&
135 (iter->op.op != CEPH_OSD_OP_SETXATTR) &&
136 (iter->op.op != CEPH_OSD_OP_RMXATTR) &&
137 (iter->op.op != CEPH_OSD_OP_STARTSYNC) &&
138 (iter->op.op != CEPH_OSD_OP_COPY_GET) &&
139 (iter->op.op != CEPH_OSD_OP_COPY_FROM) &&
140 (iter->op.op != CEPH_OSD_OP_COPY_FROM2)) {
141 set_promote();
142 }
143 }
144 }
145
146 switch (iter->op.op) {
147 case CEPH_OSD_OP_CALL:
148 {
149 bufferlist::iterator bp = const_cast<bufferlist&>(iter->indata).begin();
150 int is_write, is_read;
151 string cname, mname;
152 bp.copy(iter->op.cls.class_len, cname);
153 bp.copy(iter->op.cls.method_len, mname);
154
155 ClassHandler::ClassData *cls;
156 int r = ClassHandler::get_instance().open_class(cname, &cls);
157 if (r) {
158 if (r == -ENOENT)
159 r = -EOPNOTSUPP;
160 else if (r != -EPERM) // propagate permission errors
161 r = -EIO;
162 return r;
163 }
164 int flags = cls->get_method_flags(mname);
165 if (flags < 0) {
166 if (flags == -ENOENT)
167 r = -EOPNOTSUPP;
168 else
169 r = flags;
170 return r;
171 }
172 is_read = flags & CLS_METHOD_RD;
173 is_write = flags & CLS_METHOD_WR;
174 bool is_promote = flags & CLS_METHOD_PROMOTE;
175
176 if (is_read)
177 set_class_read();
178 if (is_write)
179 set_class_write();
180 if (is_promote)
181 set_promote();
182 add_class(std::move(cname), std::move(mname), is_read, is_write,
183 cls->whitelisted);
184 break;
185 }
186
187 case CEPH_OSD_OP_WATCH:
188 // force the read bit for watch since it is depends on previous
189 // watch state (and may return early if the watch exists) or, in
190 // the case of ping, is simply a read op.
191 set_read();
192 // fall through
193 case CEPH_OSD_OP_NOTIFY:
194 case CEPH_OSD_OP_NOTIFY_ACK:
195 {
196 set_promote();
197 break;
198 }
199
200 case CEPH_OSD_OP_DELETE:
201 // if we get a delete with FAILOK we can skip handle cache. without
202 // FAILOK we still need to promote (or do something smarter) to
203 // determine whether to return ENOENT or 0.
204 if (iter == m->ops.begin() &&
205 iter->op.flags == CEPH_OSD_OP_FLAG_FAILOK) {
206 set_skip_handle_cache();
207 }
208 // skip promotion when proxying a delete op
209 if (m->ops.size() == 1) {
210 set_skip_promote();
211 }
212 break;
213
214 case CEPH_OSD_OP_CACHE_TRY_FLUSH:
215 case CEPH_OSD_OP_CACHE_FLUSH:
216 case CEPH_OSD_OP_CACHE_EVICT:
217 // If try_flush/flush/evict is the only op, can skip handle cache.
218 if (m->ops.size() == 1) {
219 set_skip_handle_cache();
220 }
221 break;
222
223 case CEPH_OSD_OP_READ:
224 case CEPH_OSD_OP_SYNC_READ:
225 case CEPH_OSD_OP_SPARSE_READ:
226 case CEPH_OSD_OP_CHECKSUM:
227 case CEPH_OSD_OP_WRITEFULL:
228 if (m->ops.size() == 1 &&
229 (iter->op.flags & CEPH_OSD_OP_FLAG_FADVISE_NOCACHE ||
230 iter->op.flags & CEPH_OSD_OP_FLAG_FADVISE_DONTNEED)) {
231 set_skip_promote();
232 }
233 break;
234
235 // force promotion when pin an object in cache tier
236 case CEPH_OSD_OP_CACHE_PIN:
237 set_promote();
238 break;
239
240 default:
241 break;
242 }
243 }
244
245 if (rmw_flags == 0)
246 return -EINVAL;
247
248 return 0;
249
250 }
251
252 ostream& operator<<(ostream& out, const OpInfo::ClassInfo& i)
253 {
254 out << "class " << i.class_name << " method " << i.method_name
255 << " rd " << i.read << " wr " << i.write << " wl " << i.whitelisted;
256 return out;
257 }