]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2014 Sebastien Ponce <sebastien.ponce@cern.ch> | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
14 | ||
15 | #ifndef CEPH_LIBRADOSSTRIPER_RADOSSTRIPERIMPL_H | |
16 | #define CEPH_LIBRADOSSTRIPER_RADOSSTRIPERIMPL_H | |
17 | ||
18 | #include <string> | |
19 | ||
7c673cae FG |
20 | #include "include/rados/librados.h" |
21 | #include "include/rados/librados.hpp" | |
22 | #include "include/radosstriper/libradosstriper.h" | |
23 | #include "include/radosstriper/libradosstriper.hpp" | |
24 | ||
25 | #include "librados/IoCtxImpl.h" | |
26 | #include "librados/AioCompletionImpl.h" | |
27 | #include "common/RefCountedObj.h" | |
28 | ||
29 | struct libradosstriper::RadosStriperImpl { | |
30 | ||
31 | /** | |
32 | * struct handling the data needed to pass to the call back | |
33 | * function in asynchronous operations | |
34 | */ | |
35 | struct CompletionData : RefCountedObject { | |
36 | /// constructor | |
37 | CompletionData(libradosstriper::RadosStriperImpl * striper, | |
38 | const std::string& soid, | |
39 | const std::string& lockCookie, | |
40 | librados::AioCompletionImpl *userCompletion = 0, | |
41 | int n = 1); | |
42 | /// destructor | |
43 | ~CompletionData() override; | |
44 | /// complete method | |
45 | void complete(int r); | |
46 | /// striper to be used to handle the write completion | |
47 | libradosstriper::RadosStriperImpl *m_striper; | |
48 | /// striped object concerned by the write operation | |
49 | std::string m_soid; | |
50 | /// shared lock to be released at completion | |
51 | std::string m_lockCookie; | |
52 | /// completion handler | |
53 | librados::IoCtxImpl::C_aio_Complete *m_ack; | |
54 | }; | |
55 | ||
56 | /** | |
57 | * struct handling the data needed to pass to the call back | |
58 | * function in asynchronous read operations | |
59 | */ | |
60 | struct ReadCompletionData : CompletionData { | |
61 | /// bufferlist containing final result | |
62 | bufferlist* m_bl; | |
63 | /// extents that will be read | |
64 | std::vector<ObjectExtent>* m_extents; | |
65 | /// intermediate results | |
66 | std::vector<bufferlist>* m_resultbl; | |
67 | /// return code of read completion, to be remembered until unlocking happened | |
68 | int m_readRc; | |
69 | /// completion object for the unlocking of the striped object at the end of the read | |
70 | librados::AioCompletion *m_unlockCompletion; | |
71 | /// constructor | |
72 | ReadCompletionData(libradosstriper::RadosStriperImpl * striper, | |
73 | const std::string& soid, | |
74 | const std::string& lockCookie, | |
75 | librados::AioCompletionImpl *userCompletion, | |
76 | bufferlist* bl, | |
77 | std::vector<ObjectExtent>* extents, | |
78 | std::vector<bufferlist>* resultbl, | |
79 | int n); | |
80 | /// destructor | |
81 | ~ReadCompletionData() override; | |
82 | /// complete method for when reading is over | |
83 | void complete_read(int r); | |
84 | /// complete method for when object is unlocked | |
85 | void complete_unlock(int r); | |
86 | }; | |
87 | ||
88 | /** | |
89 | * struct handling the data needed to pass to the call back | |
90 | * function in asynchronous write operations | |
91 | */ | |
92 | struct WriteCompletionData : CompletionData { | |
93 | /// safe completion handler | |
94 | librados::IoCtxImpl::C_aio_Complete *m_safe; | |
95 | /// return code of write completion, to be remembered until unlocking happened | |
96 | int m_writeRc; | |
97 | /// completion object for the unlocking of the striped object at the end of the write | |
98 | librados::AioCompletion *m_unlockCompletion; | |
99 | /// constructor | |
100 | WriteCompletionData(libradosstriper::RadosStriperImpl * striper, | |
101 | const std::string& soid, | |
102 | const std::string& lockCookie, | |
103 | librados::AioCompletionImpl *userCompletion, | |
104 | int n); | |
105 | /// destructor | |
106 | ~WriteCompletionData() override; | |
107 | /// complete method for when writing is over | |
108 | void complete_write(int r); | |
109 | /// complete method for when object is unlocked | |
110 | void complete_unlock(int r); | |
111 | /// safe method | |
112 | void safe(int r); | |
113 | }; | |
114 | ||
115 | /** | |
116 | * struct handling the data needed to pass to the call back | |
117 | * function in asynchronous read operations of a Rados File | |
118 | */ | |
119 | struct RadosReadCompletionData : RefCountedObject { | |
120 | /// constructor | |
121 | RadosReadCompletionData(MultiAioCompletionImpl *multiAioCompl, | |
122 | uint64_t expectedBytes, | |
123 | bufferlist *bl, | |
124 | CephContext *context, | |
125 | int n = 1) : | |
126 | RefCountedObject(context, n), | |
127 | m_multiAioCompl(multiAioCompl), m_expectedBytes(expectedBytes), m_bl(bl) {}; | |
128 | /// the multi asynch io completion object to be used | |
129 | MultiAioCompletionImpl *m_multiAioCompl; | |
130 | /// the expected number of bytes | |
131 | uint64_t m_expectedBytes; | |
132 | /// the bufferlist object where data have been written | |
133 | bufferlist *m_bl; | |
134 | }; | |
135 | ||
136 | /** | |
137 | * struct handling (most of) the data needed to pass to the call back | |
138 | * function in asynchronous stat operations. | |
139 | * Inherited by the actual type for adding time information in different | |
140 | * versions (time_t or struct timespec) | |
141 | */ | |
142 | struct BasicStatCompletionData : CompletionData { | |
143 | /// constructor | |
144 | BasicStatCompletionData(libradosstriper::RadosStriperImpl* striper, | |
145 | const std::string& soid, | |
146 | librados::AioCompletionImpl *userCompletion, | |
147 | libradosstriper::MultiAioCompletionImpl *multiCompletion, | |
148 | uint64_t *psize, | |
149 | int n = 1) : | |
150 | CompletionData(striper, soid, "", userCompletion, n), | |
151 | m_multiCompletion(multiCompletion), m_psize(psize), | |
152 | m_statRC(0), m_getxattrRC(0) {}; | |
153 | // MultiAioCompletionImpl used to handle the double aysnc | |
154 | // call in the back (stat + getxattr) | |
155 | libradosstriper::MultiAioCompletionImpl *m_multiCompletion; | |
156 | // where to store the size of first objct | |
157 | // this will be ignored but we need a place to store it when | |
158 | // async stat is called | |
159 | uint64_t m_objectSize; | |
160 | // where to store the file size | |
161 | uint64_t *m_psize; | |
162 | /// the bufferlist object used for the getxattr call | |
163 | bufferlist m_bl; | |
164 | /// return code of the stat | |
165 | int m_statRC; | |
166 | /// return code of the getxattr | |
167 | int m_getxattrRC; | |
168 | }; | |
169 | ||
170 | /** | |
171 | * struct handling the data needed to pass to the call back | |
172 | * function in asynchronous stat operations. | |
173 | * Simple templated extension of BasicStatCompletionData. | |
174 | * The template parameter is the type of the time information | |
175 | * (used with time_t for stat and struct timespec for stat2) | |
176 | */ | |
177 | template<class TimeType> | |
178 | struct StatCompletionData : BasicStatCompletionData { | |
179 | /// constructor | |
180 | StatCompletionData(libradosstriper::RadosStriperImpl* striper, | |
181 | const std::string& soid, | |
182 | librados::AioCompletionImpl *userCompletion, | |
183 | libradosstriper::MultiAioCompletionImpl *multiCompletion, | |
184 | uint64_t *psize, | |
185 | TimeType *pmtime, | |
186 | int n = 1) : | |
187 | BasicStatCompletionData(striper, soid, userCompletion, multiCompletion, psize, n), | |
188 | m_pmtime(pmtime) {}; | |
189 | // where to store the file time | |
190 | TimeType *m_pmtime; | |
191 | }; | |
192 | ||
193 | /** | |
194 | * struct handling the data needed to pass to the call back | |
195 | * function in asynchronous remove operations of a Rados File | |
196 | */ | |
197 | struct RadosRemoveCompletionData : RefCountedObject { | |
198 | /// constructor | |
199 | RadosRemoveCompletionData(MultiAioCompletionImpl *multiAioCompl, | |
200 | CephContext *context) : | |
201 | RefCountedObject(context, 2), | |
202 | m_multiAioCompl(multiAioCompl) {}; | |
203 | /// the multi asynch io completion object to be used | |
204 | MultiAioCompletionImpl *m_multiAioCompl; | |
205 | }; | |
206 | ||
207 | struct RemoveCompletionData : CompletionData { | |
208 | /// removal flags | |
209 | int flags; | |
210 | /** | |
211 | * constructor | |
212 | * note that the constructed object will take ownership of the lock | |
213 | */ | |
214 | RemoveCompletionData(libradosstriper::RadosStriperImpl * striper, | |
215 | const std::string& soid, | |
216 | const std::string& lockCookie, | |
217 | librados::AioCompletionImpl *userCompletion, | |
218 | int flags = 0); | |
219 | }; | |
220 | ||
221 | /** | |
222 | * struct handling the data needed to pass to the call back | |
223 | * function in asynchronous truncate operations | |
224 | */ | |
225 | struct TruncateCompletionData : RefCountedObject { | |
226 | /// constructor | |
227 | TruncateCompletionData(libradosstriper::RadosStriperImpl* striper, | |
228 | const std::string& soid, | |
229 | uint64_t size); | |
230 | /// destructor | |
231 | ~TruncateCompletionData() override; | |
232 | /// striper to be used | |
233 | libradosstriper::RadosStriperImpl *m_striper; | |
234 | /// striped object concerned by the truncate operation | |
235 | std::string m_soid; | |
236 | /// the final size of the truncated object | |
237 | uint64_t m_size; | |
238 | }; | |
239 | ||
240 | /** | |
241 | * exception wrapper around an error code | |
242 | */ | |
243 | struct ErrorCode { | |
244 | ErrorCode(int error) : m_code(error) {}; | |
245 | int m_code; | |
246 | }; | |
247 | ||
248 | /* | |
249 | * Constructor | |
250 | * @param cluster_name name of the cluster, can be NULL | |
251 | * @param client_name has 2 meanings depending on cluster_name | |
252 | * - if cluster_name is null : this is the client id | |
253 | * - else : this is the full client name in format type.id | |
254 | */ | |
255 | RadosStriperImpl(librados::IoCtx& ioctx, librados::IoCtxImpl *ioctx_impl); | |
256 | /// Destructor | |
257 | ~RadosStriperImpl() {}; | |
258 | ||
259 | // configuration | |
260 | int setObjectLayoutStripeUnit(unsigned int stripe_unit); | |
261 | int setObjectLayoutStripeCount(unsigned int stripe_count); | |
262 | int setObjectLayoutObjectSize(unsigned int object_size); | |
263 | ||
264 | // xattrs | |
265 | int getxattr(const object_t& soid, const char *name, bufferlist& bl); | |
266 | int setxattr(const object_t& soid, const char *name, bufferlist& bl); | |
267 | int getxattrs(const object_t& soid, map<string, bufferlist>& attrset); | |
268 | int rmxattr(const object_t& soid, const char *name); | |
269 | ||
270 | // io | |
271 | int write(const std::string& soid, const bufferlist& bl, size_t len, uint64_t off); | |
272 | int append(const std::string& soid, const bufferlist& bl, size_t len); | |
273 | int write_full(const std::string& soid, const bufferlist& bl); | |
274 | int read(const std::string& soid, bufferlist* pbl, size_t len, uint64_t off); | |
275 | ||
276 | // asynchronous io | |
277 | int aio_write(const std::string& soid, librados::AioCompletionImpl *c, | |
278 | const bufferlist& bl, size_t len, uint64_t off); | |
279 | int aio_append(const std::string& soid, librados::AioCompletionImpl *c, | |
280 | const bufferlist& bl, size_t len); | |
281 | int aio_write_full(const std::string& soid, librados::AioCompletionImpl *c, | |
282 | const bufferlist& bl); | |
283 | int aio_read(const std::string& soid, librados::AioCompletionImpl *c, | |
284 | bufferlist* pbl, size_t len, uint64_t off); | |
285 | int aio_read(const std::string& soid, librados::AioCompletionImpl *c, | |
286 | char* buf, size_t len, uint64_t off); | |
287 | int aio_flush(); | |
288 | ||
289 | // stat, deletion and truncation | |
290 | int stat(const std::string& soid, uint64_t *psize, time_t *pmtime); | |
291 | int stat2(const std::string& soid, uint64_t *psize, struct timespec *pts); | |
292 | template<class TimeType> | |
293 | struct StatFunction { | |
294 | typedef int (librados::IoCtxImpl::*Type) (const object_t& oid, | |
295 | librados::AioCompletionImpl *c, | |
296 | uint64_t *psize, TimeType *pmtime); | |
297 | }; | |
298 | template<class TimeType> | |
299 | int aio_generic_stat(const std::string& soid, librados::AioCompletionImpl *c, | |
300 | uint64_t *psize, TimeType *pmtime, | |
301 | typename StatFunction<TimeType>::Type statFunction); | |
302 | int aio_stat(const std::string& soid, librados::AioCompletionImpl *c, | |
303 | uint64_t *psize, time_t *pmtime); | |
304 | int aio_stat2(const std::string& soid, librados::AioCompletionImpl *c, | |
305 | uint64_t *psize, struct timespec *pts); | |
306 | int remove(const std::string& soid, int flags=0); | |
307 | int trunc(const std::string& soid, uint64_t size); | |
308 | ||
309 | // asynchronous remove. Note that the removal is not 100% parallelized : | |
310 | // the removal of the first rados object of the striped object will be | |
311 | // done via a syncrhonous call after the completion of all other removals. | |
312 | // These are done asynchrounously and in parallel | |
313 | int aio_remove(const std::string& soid, librados::AioCompletionImpl *c, int flags=0); | |
314 | ||
315 | // reference counting | |
316 | void get() { | |
317 | lock.Lock(); | |
318 | m_refCnt ++ ; | |
319 | lock.Unlock(); | |
320 | } | |
321 | void put() { | |
322 | bool deleteme = false; | |
323 | lock.Lock(); | |
324 | m_refCnt --; | |
325 | if (m_refCnt == 0) | |
326 | deleteme = true; | |
327 | cond.Signal(); | |
328 | lock.Unlock(); | |
329 | if (deleteme) | |
330 | delete this; | |
331 | } | |
332 | ||
333 | // objectid manipulation | |
334 | std::string getObjectId(const object_t& soid, long long unsigned objectno); | |
335 | ||
336 | // opening and closing of striped objects | |
337 | void unlockObject(const std::string& soid, | |
338 | const std::string& lockCookie); | |
339 | void aio_unlockObject(const std::string& soid, | |
340 | const std::string& lockCookie, | |
341 | librados::AioCompletion *c); | |
342 | ||
343 | // internal versions of IO method | |
344 | int write_in_open_object(const std::string& soid, | |
345 | const ceph_file_layout& layout, | |
346 | const std::string& lockCookie, | |
347 | const bufferlist& bl, | |
348 | size_t len, | |
349 | uint64_t off); | |
350 | int aio_write_in_open_object(const std::string& soid, | |
351 | librados::AioCompletionImpl *c, | |
352 | const ceph_file_layout& layout, | |
353 | const std::string& lockCookie, | |
354 | const bufferlist& bl, | |
355 | size_t len, | |
356 | uint64_t off); | |
357 | int internal_aio_write(const std::string& soid, | |
358 | libradosstriper::MultiAioCompletionImpl *c, | |
359 | const bufferlist& bl, | |
360 | size_t len, | |
361 | uint64_t off, | |
362 | const ceph_file_layout& layout); | |
363 | ||
364 | int extract_uint32_attr(std::map<std::string, bufferlist> &attrs, | |
365 | const std::string& key, | |
366 | ceph_le32 *value); | |
367 | ||
368 | int extract_sizet_attr(std::map<std::string, bufferlist> &attrs, | |
369 | const std::string& key, | |
370 | size_t *value); | |
371 | ||
372 | int internal_get_layout_and_size(const std::string& oid, | |
373 | ceph_file_layout *layout, | |
374 | uint64_t *size); | |
375 | ||
376 | int internal_aio_remove(const std::string& soid, | |
377 | libradosstriper::MultiAioCompletionImpl *multi_completion, | |
378 | int flags=0); | |
379 | ||
380 | /** | |
381 | * opens an existing striped object and takes a shared lock on it | |
382 | * @return 0 if everything is ok and the lock was taken. -errcode otherwise | |
383 | * In particulae, if the striped object does not exists, -ENOENT is returned | |
384 | * In case the return code in not 0, no lock is taken | |
385 | */ | |
386 | int openStripedObjectForRead(const std::string& soid, | |
387 | ceph_file_layout *layout, | |
388 | uint64_t *size, | |
389 | std::string *lockCookie); | |
390 | ||
391 | /** | |
392 | * opens an existing striped object, takes a shared lock on it | |
393 | * and sets its size to the size it will have after the write. | |
394 | * In case the striped object does not exists, it will create it by | |
395 | * calling createOrOpenStripedObject. | |
396 | * @param layout this is filled with the layout of the file | |
397 | * @param size new size of the file (together with isFileSizeAbsolute) | |
398 | * In case of success, this is filled with the size of the file before the opening | |
399 | * @param isFileSizeAbsolute if false, this means that the given size should | |
400 | * be added to the current file size (append mode) | |
401 | * @return 0 if everything is ok and the lock was taken. -errcode otherwise | |
402 | * In case the return code in not 0, no lock is taken | |
403 | */ | |
404 | int openStripedObjectForWrite(const std::string& soid, | |
405 | ceph_file_layout *layout, | |
406 | uint64_t *size, | |
407 | std::string *lockCookie, | |
408 | bool isFileSizeAbsolute); | |
409 | /** | |
410 | * creates an empty striped object with the given size and opens it calling | |
411 | * openStripedObjectForWrite, which implies taking a shared lock on it | |
412 | * Also deals with the cases where the object was created in the mean time | |
413 | * @param isFileSizeAbsolute if false, this means that the given size should | |
414 | * be added to the current file size (append mode). This of course only makes | |
415 | * sense in case the striped object already exists | |
416 | * @return 0 if everything is ok and the lock was taken. -errcode otherwise | |
417 | * In case the return code in not 0, no lock is taken | |
418 | */ | |
419 | int createAndOpenStripedObject(const std::string& soid, | |
420 | ceph_file_layout *layout, | |
421 | uint64_t size, | |
422 | std::string *lockCookie, | |
423 | bool isFileSizeAbsolute); | |
424 | ||
425 | /** | |
426 | * truncates an object synchronously. Should only be called with size < original_size | |
427 | */ | |
428 | int truncate(const std::string& soid, | |
429 | uint64_t original_size, | |
430 | uint64_t size, | |
431 | ceph_file_layout &layout); | |
432 | ||
433 | /** | |
434 | * truncates an object asynchronously. Should only be called with size < original_size | |
435 | * note that the method is not 100% asynchronous, only the removal of rados objects | |
436 | * is, the (potential) truncation of the rados object residing just at the truncation | |
437 | * point is synchronous for lack of asynchronous truncation in the rados layer | |
438 | */ | |
439 | int aio_truncate(const std::string& soid, | |
440 | libradosstriper::MultiAioCompletionImpl *c, | |
441 | uint64_t original_size, | |
442 | uint64_t size, | |
443 | ceph_file_layout &layout); | |
444 | ||
445 | /** | |
446 | * grows an object (adding 0s). Should only be called with size > original_size | |
447 | */ | |
448 | int grow(const std::string& soid, | |
449 | uint64_t original_size, | |
450 | uint64_t size, | |
451 | ceph_file_layout &layout); | |
452 | ||
453 | /** | |
454 | * creates a unique identifier | |
455 | */ | |
456 | static std::string getUUID(); | |
457 | ||
458 | CephContext *cct() { | |
459 | return (CephContext*)m_radosCluster.cct(); | |
460 | } | |
461 | ||
462 | // reference counting | |
463 | Cond cond; | |
464 | int m_refCnt; | |
465 | Mutex lock; | |
466 | ||
467 | ||
468 | // Context | |
469 | librados::Rados m_radosCluster; | |
470 | librados::IoCtx m_ioCtx; | |
471 | librados::IoCtxImpl *m_ioCtxImpl; | |
472 | ||
473 | // Default layout | |
474 | ceph_file_layout m_layout; | |
475 | }; | |
476 | ||
477 | #endif |