]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | ||
4 | /* | |
5 | * This is a simple example RADOS class, designed to be usable as a | |
6 | * template for implementing new methods. | |
7 | * | |
8 | * Our goal here is to illustrate the interface between the OSD and | |
9 | * the class and demonstrate what kinds of things a class can do. | |
10 | * | |
11 | * Note that any *real* class will probably have a much more | |
12 | * sophisticated protocol dealing with the in and out data buffers. | |
13 | * For an example of the model that we've settled on for handling that | |
14 | * in a clean way, please refer to cls_lock or cls_version for | |
15 | * relatively simple examples of how the parameter encoding can be | |
16 | * encoded in a way that allows for forward and backward compatibility | |
17 | * between client vs class revisions. | |
18 | */ | |
19 | ||
20 | /* | |
21 | * A quick note about bufferlists: | |
22 | * | |
23 | * The bufferlist class allows memory buffers to be concatenated, | |
24 | * truncated, spliced, "copied," encoded/embedded, and decoded. For | |
25 | * most operations no actual data is ever copied, making bufferlists | |
26 | * very convenient for efficiently passing data around. | |
27 | * | |
28 | * bufferlist is actually a typedef of buffer::list, and is defined in | |
29 | * include/buffer.h (and implemented in common/buffer.cc). | |
30 | */ | |
31 | ||
32 | #include <algorithm> | |
33 | #include <string> | |
34 | #include <sstream> | |
9f95a23c | 35 | #include <cerrno> |
7c673cae FG |
36 | |
37 | #include "objclass/objclass.h" | |
9f95a23c TL |
38 | #include "osd/osd_types.h" |
39 | ||
9f95a23c | 40 | using std::string; |
f67539c2 TL |
41 | using std::ostringstream; |
42 | ||
43 | using ceph::bufferlist; | |
44 | using ceph::decode; | |
45 | using ceph::encode; | |
7c673cae FG |
46 | |
47 | CLS_VER(1,0) | |
48 | CLS_NAME(hello) | |
49 | ||
50 | /** | |
51 | * say hello - a "read" method that does not depend on the object | |
52 | * | |
53 | * This is an example of a method that does some computation and | |
54 | * returns data to the caller, without depending on the local object | |
55 | * content. | |
56 | */ | |
57 | static int say_hello(cls_method_context_t hctx, bufferlist *in, bufferlist *out) | |
58 | { | |
59 | // see if the input data from the client matches what this method | |
60 | // expects to receive. your class can fill this buffer with what it | |
61 | // wants. | |
62 | if (in->length() > 100) | |
63 | return -EINVAL; | |
64 | ||
65 | // we generate our reply | |
66 | out->append("Hello, "); | |
67 | if (in->length() == 0) | |
68 | out->append("world"); | |
69 | else | |
70 | out->append(*in); | |
71 | out->append("!"); | |
72 | ||
73 | // this return value will be returned back to the librados caller | |
74 | return 0; | |
75 | } | |
76 | ||
77 | /** | |
78 | * record hello - a "write" method that creates an object | |
79 | * | |
80 | * This method modifies a local object (in this case, by creating it | |
81 | * if it doesn't exist). We make multiple write calls (write, | |
82 | * setxattr) which are accumulated and applied as an atomic | |
83 | * transaction. | |
84 | */ | |
85 | static int record_hello(cls_method_context_t hctx, bufferlist *in, bufferlist *out) | |
86 | { | |
87 | // we can write arbitrary stuff to the ceph-osd debug log. each log | |
88 | // message is accompanied by an integer log level. smaller is | |
89 | // "louder". how much of this makes it into the log is controlled | |
90 | // by the debug_cls option on the ceph-osd, similar to how other log | |
91 | // levels are controlled. this message, at level 20, will generally | |
92 | // not be seen by anyone unless debug_cls is set at 20 or higher. | |
93 | CLS_LOG(20, "in record_hello"); | |
94 | ||
95 | // see if the input data from the client matches what this method | |
96 | // expects to receive. your class can fill this buffer with what it | |
97 | // wants. | |
98 | if (in->length() > 100) | |
99 | return -EINVAL; | |
100 | ||
101 | // only say hello to non-existent objects | |
102 | if (cls_cxx_stat(hctx, NULL, NULL) == 0) | |
103 | return -EEXIST; | |
104 | ||
105 | bufferlist content; | |
106 | content.append("Hello, "); | |
107 | if (in->length() == 0) | |
108 | content.append("world"); | |
109 | else | |
110 | content.append(*in); | |
111 | content.append("!"); | |
112 | ||
113 | // create/write the object | |
114 | int r = cls_cxx_write_full(hctx, &content); | |
115 | if (r < 0) | |
116 | return r; | |
117 | ||
118 | // also make note of who said it | |
119 | entity_inst_t origin; | |
120 | cls_get_request_origin(hctx, &origin); | |
121 | ostringstream ss; | |
122 | ss << origin; | |
123 | bufferlist attrbl; | |
124 | attrbl.append(ss.str()); | |
125 | r = cls_cxx_setxattr(hctx, "said_by", &attrbl); | |
126 | if (r < 0) | |
127 | return r; | |
128 | ||
129 | // For write operations, there are two possible outcomes: | |
130 | // | |
131 | // * For a failure, we return a negative error code. The out | |
132 | // buffer can contain any data that we want, and that data will | |
133 | // be returned to the caller. No change is made to the object. | |
134 | // | |
135 | // * For a success, we must return 0 and *no* data in the out | |
136 | // buffer. This is becaues the OSD does not log write result | |
137 | // codes or output buffers and we need a replayed/resent | |
138 | // operation (e.g., after a TCP disconnect) to be idempotent. | |
139 | // | |
140 | // If a class returns a positive value or puts data in the out | |
141 | // buffer, the OSD code will ignore it and return 0 to the | |
142 | // client. | |
143 | return 0; | |
144 | } | |
145 | ||
9f95a23c | 146 | static int write_return_data(cls_method_context_t hctx, bufferlist *in, bufferlist *out) |
7c673cae FG |
147 | { |
148 | // make some change to the object | |
149 | bufferlist attrbl; | |
150 | attrbl.append("bar"); | |
151 | int r = cls_cxx_setxattr(hctx, "foo", &attrbl); | |
152 | if (r < 0) | |
153 | return r; | |
154 | ||
155 | if (in->length() > 0) { | |
156 | // note that if we return anything < 0 (an error), this | |
157 | // operation/transaction will abort, and the setattr above will | |
158 | // never happen. however, we *can* return data on error. | |
159 | out->append("too much input data!"); | |
160 | return -EINVAL; | |
161 | } | |
162 | ||
9f95a23c TL |
163 | // try to return some data. note that this will only reach the client |
164 | // if the client has set the CEPH_OSD_FLAG_RETURNVEC flag on the op. | |
165 | out->append("you might see this"); | |
7c673cae | 166 | |
9f95a23c TL |
167 | // client will only see a >0 value with the RETURNVEC flag is set; otherwise |
168 | // they will see 0. | |
7c673cae FG |
169 | return 42; |
170 | } | |
171 | ||
9f95a23c TL |
172 | static int write_too_much_return_data(cls_method_context_t hctx, bufferlist *in, bufferlist *out) |
173 | { | |
174 | // make some change to the object | |
175 | bufferlist attrbl; | |
176 | attrbl.append("bar"); | |
177 | int r = cls_cxx_setxattr(hctx, "foo", &attrbl); | |
178 | if (r < 0) | |
179 | return r; | |
180 | ||
181 | // try to return too much data. this should be enough to exceed | |
182 | // osd_max_write_op_reply_len, which defaults to a pretty small number. | |
183 | for (unsigned i=0; i < 10; ++i) { | |
184 | out->append("you should not see this because it is toooooo long. "); | |
185 | } | |
186 | ||
187 | return 42; | |
188 | } | |
7c673cae FG |
189 | |
190 | /** | |
191 | * replay - a "read" method to get a previously recorded hello | |
192 | * | |
193 | * This is a read method that will retrieve a previously recorded | |
194 | * hello statement. | |
195 | */ | |
196 | static int replay(cls_method_context_t hctx, bufferlist *in, bufferlist *out) | |
197 | { | |
198 | // read contents out of the on-disk object. our behavior can be a | |
199 | // function of either the request alone, or the request and the | |
200 | // on-disk state, depending on whether the RD flag is specified when | |
201 | // registering the method (see the __cls__init function below). | |
202 | int r = cls_cxx_read(hctx, 0, 1100, out); | |
203 | if (r < 0) | |
204 | return r; | |
205 | ||
206 | // note that our return value need not be the length of the returned | |
207 | // data; it can be whatever value we want: positive, zero or | |
208 | // negative (this is a read). | |
209 | return 0; | |
210 | } | |
211 | ||
212 | /** | |
213 | * turn_it_to_11 - a "write" method that mutates existing object data | |
214 | * | |
215 | * A write method can depend on previous object content (i.e., perform | |
216 | * a read/modify/write operation). This atomically transitions the | |
217 | * object state from the old content to the new content. | |
218 | */ | |
219 | static int turn_it_to_11(cls_method_context_t hctx, bufferlist *in, bufferlist *out) | |
220 | { | |
221 | // see if the input data from the client matches what this method | |
222 | // expects to receive. your class can fill this buffer with what it | |
223 | // wants. | |
224 | if (in->length() != 0) | |
225 | return -EINVAL; | |
226 | ||
227 | bufferlist previous; | |
228 | int r = cls_cxx_read(hctx, 0, 1100, &previous); | |
229 | if (r < 0) | |
230 | return r; | |
231 | ||
232 | std::string str(previous.c_str(), previous.length()); | |
233 | std::transform(str.begin(), str.end(), str.begin(), ::toupper); | |
234 | previous.clear(); | |
235 | previous.append(str); | |
236 | ||
237 | // replace previous byte data content (write_full == truncate(0) + write) | |
238 | r = cls_cxx_write_full(hctx, &previous); | |
239 | if (r < 0) | |
240 | return r; | |
241 | ||
242 | // record who did it | |
243 | entity_inst_t origin; | |
244 | cls_get_request_origin(hctx, &origin); | |
245 | ostringstream ss; | |
246 | ss << origin; | |
247 | bufferlist attrbl; | |
248 | attrbl.append(ss.str()); | |
249 | r = cls_cxx_setxattr(hctx, "amplified_by", &attrbl); | |
250 | if (r < 0) | |
251 | return r; | |
252 | ||
253 | // return value is 0 for success; out buffer is empty. | |
254 | return 0; | |
255 | } | |
256 | ||
257 | /** | |
258 | * example method that does not behave | |
259 | * | |
260 | * This method is registered as WR but tries to read | |
261 | */ | |
262 | static int bad_reader(cls_method_context_t hctx, bufferlist *in, bufferlist *out) | |
263 | { | |
264 | return cls_cxx_read(hctx, 0, 100, out); | |
265 | } | |
266 | ||
267 | /** | |
268 | * example method that does not behave | |
269 | * | |
270 | * This method is registered as RD but tries to write | |
271 | */ | |
272 | static int bad_writer(cls_method_context_t hctx, bufferlist *in, bufferlist *out) | |
273 | { | |
274 | return cls_cxx_write_full(hctx, in); | |
275 | } | |
276 | ||
277 | ||
278 | class PGLSHelloFilter : public PGLSFilter { | |
279 | string val; | |
280 | public: | |
11fdf7f2 | 281 | int init(bufferlist::const_iterator& params) override { |
7c673cae | 282 | try { |
11fdf7f2 TL |
283 | decode(xattr, params); |
284 | decode(val, params); | |
f67539c2 | 285 | } catch (ceph::buffer::error &e) { |
7c673cae FG |
286 | return -EINVAL; |
287 | } | |
288 | return 0; | |
289 | } | |
290 | ||
291 | ~PGLSHelloFilter() override {} | |
9f95a23c TL |
292 | bool filter(const hobject_t& obj, |
293 | const bufferlist& xattr_data) const override | |
7c673cae | 294 | { |
9f95a23c | 295 | return xattr_data.contents_equal(val.c_str(), val.size()); |
7c673cae FG |
296 | } |
297 | }; | |
298 | ||
299 | ||
300 | PGLSFilter *hello_filter() | |
301 | { | |
302 | return new PGLSHelloFilter(); | |
303 | } | |
304 | ||
305 | ||
306 | /** | |
307 | * initialize class | |
308 | * | |
309 | * We do two things here: we register the new class, and then register | |
310 | * all of the class's methods. | |
311 | */ | |
312 | CLS_INIT(hello) | |
313 | { | |
314 | // this log message, at level 0, will always appear in the ceph-osd | |
315 | // log file. | |
316 | CLS_LOG(0, "loading cls_hello"); | |
317 | ||
318 | cls_handle_t h_class; | |
319 | cls_method_handle_t h_say_hello; | |
320 | cls_method_handle_t h_record_hello; | |
321 | cls_method_handle_t h_replay; | |
9f95a23c | 322 | cls_method_handle_t h_write_return_data; |
7c673cae | 323 | cls_method_handle_t h_writes_dont_return_data; |
9f95a23c | 324 | cls_method_handle_t h_write_too_much_return_data; |
7c673cae FG |
325 | cls_method_handle_t h_turn_it_to_11; |
326 | cls_method_handle_t h_bad_reader; | |
327 | cls_method_handle_t h_bad_writer; | |
328 | ||
329 | cls_register("hello", &h_class); | |
330 | ||
331 | // There are two flags we specify for methods: | |
332 | // | |
333 | // RD : whether this method (may) read prior object state | |
334 | // WR : whether this method (may) write or update the object | |
335 | // | |
336 | // A method can be RD, WR, neither, or both. If a method does | |
337 | // neither, the data it returns to the caller is a function of the | |
338 | // request and not the object contents. | |
339 | ||
340 | cls_register_cxx_method(h_class, "say_hello", | |
341 | CLS_METHOD_RD, | |
342 | say_hello, &h_say_hello); | |
343 | cls_register_cxx_method(h_class, "record_hello", | |
344 | CLS_METHOD_WR | CLS_METHOD_PROMOTE, | |
345 | record_hello, &h_record_hello); | |
9f95a23c TL |
346 | cls_register_cxx_method(h_class, "write_return_data", |
347 | CLS_METHOD_WR, | |
348 | write_return_data, &h_write_return_data); | |
349 | // legacy alias for this method for pre-octopus clients | |
7c673cae FG |
350 | cls_register_cxx_method(h_class, "writes_dont_return_data", |
351 | CLS_METHOD_WR, | |
9f95a23c TL |
352 | write_return_data, &h_writes_dont_return_data); |
353 | cls_register_cxx_method(h_class, "write_too_much_return_data", | |
354 | CLS_METHOD_WR, | |
355 | write_too_much_return_data, &h_write_too_much_return_data); | |
7c673cae FG |
356 | cls_register_cxx_method(h_class, "replay", |
357 | CLS_METHOD_RD, | |
358 | replay, &h_replay); | |
359 | ||
360 | // RD | WR is a read-modify-write method. | |
361 | cls_register_cxx_method(h_class, "turn_it_to_11", | |
362 | CLS_METHOD_RD | CLS_METHOD_WR | CLS_METHOD_PROMOTE, | |
363 | turn_it_to_11, &h_turn_it_to_11); | |
364 | ||
365 | // counter-examples | |
366 | cls_register_cxx_method(h_class, "bad_reader", CLS_METHOD_WR, | |
367 | bad_reader, &h_bad_reader); | |
368 | cls_register_cxx_method(h_class, "bad_writer", CLS_METHOD_RD, | |
369 | bad_writer, &h_bad_writer); | |
370 | ||
371 | // A PGLS filter | |
372 | cls_register_cxx_filter(h_class, "hello", hello_filter); | |
373 | } |