1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2014 Sebastien Ponce <sebastien.ponce@cern.ch>
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
15 #ifndef CEPH_LIBRADOSSTRIPER_RADOSSTRIPERIMPL_H
16 #define CEPH_LIBRADOSSTRIPER_RADOSSTRIPERIMPL_H
20 #include "include/rados/librados.h"
21 #include "include/rados/librados.hpp"
22 #include "include/radosstriper/libradosstriper.h"
23 #include "include/radosstriper/libradosstriper.hpp"
25 #include "librados/IoCtxImpl.h"
26 #include "librados/AioCompletionImpl.h"
27 #include "common/RefCountedObj.h"
29 struct libradosstriper::RadosStriperImpl
{
32 * struct handling the data needed to pass to the call back
33 * function in asynchronous operations
35 struct CompletionData
: RefCountedObject
{
37 CompletionData(libradosstriper::RadosStriperImpl
* striper
,
38 const std::string
& soid
,
39 const std::string
& lockCookie
,
40 librados::AioCompletionImpl
*userCompletion
= 0,
43 ~CompletionData() override
;
46 /// striper to be used to handle the write completion
47 libradosstriper::RadosStriperImpl
*m_striper
;
48 /// striped object concerned by the write operation
50 /// shared lock to be released at completion
51 std::string m_lockCookie
;
52 /// completion handler
53 librados::IoCtxImpl::C_aio_Complete
*m_ack
;
57 * struct handling the data needed to pass to the call back
58 * function in asynchronous read operations
60 struct ReadCompletionData
: CompletionData
{
61 /// bufferlist containing final result
63 /// extents that will be read
64 std::vector
<ObjectExtent
>* m_extents
;
65 /// intermediate results
66 std::vector
<bufferlist
>* m_resultbl
;
67 /// return code of read completion, to be remembered until unlocking happened
69 /// completion object for the unlocking of the striped object at the end of the read
70 librados::AioCompletion
*m_unlockCompletion
;
72 ReadCompletionData(libradosstriper::RadosStriperImpl
* striper
,
73 const std::string
& soid
,
74 const std::string
& lockCookie
,
75 librados::AioCompletionImpl
*userCompletion
,
77 std::vector
<ObjectExtent
>* extents
,
78 std::vector
<bufferlist
>* resultbl
,
81 ~ReadCompletionData() override
;
82 /// complete method for when reading is over
83 void complete_read(int r
);
84 /// complete method for when object is unlocked
85 void complete_unlock(int r
);
89 * struct handling the data needed to pass to the call back
90 * function in asynchronous write operations
92 struct WriteCompletionData
: CompletionData
{
93 /// safe completion handler
94 librados::IoCtxImpl::C_aio_Complete
*m_safe
;
95 /// return code of write completion, to be remembered until unlocking happened
97 /// completion object for the unlocking of the striped object at the end of the write
98 librados::AioCompletion
*m_unlockCompletion
;
100 WriteCompletionData(libradosstriper::RadosStriperImpl
* striper
,
101 const std::string
& soid
,
102 const std::string
& lockCookie
,
103 librados::AioCompletionImpl
*userCompletion
,
106 ~WriteCompletionData() override
;
107 /// complete method for when writing is over
108 void complete_write(int r
);
109 /// complete method for when object is unlocked
110 void complete_unlock(int r
);
116 * struct handling the data needed to pass to the call back
117 * function in asynchronous read operations of a Rados File
119 struct RadosReadCompletionData
: RefCountedObject
{
121 RadosReadCompletionData(MultiAioCompletionImpl
*multiAioCompl
,
122 uint64_t expectedBytes
,
124 CephContext
*context
,
126 RefCountedObject(context
, n
),
127 m_multiAioCompl(multiAioCompl
), m_expectedBytes(expectedBytes
), m_bl(bl
) {};
128 /// the multi asynch io completion object to be used
129 MultiAioCompletionImpl
*m_multiAioCompl
;
130 /// the expected number of bytes
131 uint64_t m_expectedBytes
;
132 /// the bufferlist object where data have been written
137 * struct handling (most of) the data needed to pass to the call back
138 * function in asynchronous stat operations.
139 * Inherited by the actual type for adding time information in different
140 * versions (time_t or struct timespec)
142 struct BasicStatCompletionData
: CompletionData
{
144 BasicStatCompletionData(libradosstriper::RadosStriperImpl
* striper
,
145 const std::string
& soid
,
146 librados::AioCompletionImpl
*userCompletion
,
147 libradosstriper::MultiAioCompletionImpl
*multiCompletion
,
150 CompletionData(striper
, soid
, "", userCompletion
, n
),
151 m_multiCompletion(multiCompletion
), m_psize(psize
),
152 m_statRC(0), m_getxattrRC(0) {};
153 // MultiAioCompletionImpl used to handle the double aysnc
154 // call in the back (stat + getxattr)
155 libradosstriper::MultiAioCompletionImpl
*m_multiCompletion
;
156 // where to store the size of first objct
157 // this will be ignored but we need a place to store it when
158 // async stat is called
159 uint64_t m_objectSize
;
160 // where to store the file size
162 /// the bufferlist object used for the getxattr call
164 /// return code of the stat
166 /// return code of the getxattr
171 * struct handling the data needed to pass to the call back
172 * function in asynchronous stat operations.
173 * Simple templated extension of BasicStatCompletionData.
174 * The template parameter is the type of the time information
175 * (used with time_t for stat and struct timespec for stat2)
177 template<class TimeType
>
178 struct StatCompletionData
: BasicStatCompletionData
{
180 StatCompletionData(libradosstriper::RadosStriperImpl
* striper
,
181 const std::string
& soid
,
182 librados::AioCompletionImpl
*userCompletion
,
183 libradosstriper::MultiAioCompletionImpl
*multiCompletion
,
187 BasicStatCompletionData(striper
, soid
, userCompletion
, multiCompletion
, psize
, n
),
189 // where to store the file time
194 * struct handling the data needed to pass to the call back
195 * function in asynchronous remove operations of a Rados File
197 struct RadosRemoveCompletionData
: RefCountedObject
{
199 RadosRemoveCompletionData(MultiAioCompletionImpl
*multiAioCompl
,
200 CephContext
*context
) :
201 RefCountedObject(context
, 2),
202 m_multiAioCompl(multiAioCompl
) {};
203 /// the multi asynch io completion object to be used
204 MultiAioCompletionImpl
*m_multiAioCompl
;
207 struct RemoveCompletionData
: CompletionData
{
212 * note that the constructed object will take ownership of the lock
214 RemoveCompletionData(libradosstriper::RadosStriperImpl
* striper
,
215 const std::string
& soid
,
216 const std::string
& lockCookie
,
217 librados::AioCompletionImpl
*userCompletion
,
222 * struct handling the data needed to pass to the call back
223 * function in asynchronous truncate operations
225 struct TruncateCompletionData
: RefCountedObject
{
227 TruncateCompletionData(libradosstriper::RadosStriperImpl
* striper
,
228 const std::string
& soid
,
231 ~TruncateCompletionData() override
;
232 /// striper to be used
233 libradosstriper::RadosStriperImpl
*m_striper
;
234 /// striped object concerned by the truncate operation
236 /// the final size of the truncated object
241 * exception wrapper around an error code
244 ErrorCode(int error
) : m_code(error
) {};
250 * @param cluster_name name of the cluster, can be NULL
251 * @param client_name has 2 meanings depending on cluster_name
252 * - if cluster_name is null : this is the client id
253 * - else : this is the full client name in format type.id
255 RadosStriperImpl(librados::IoCtx
& ioctx
, librados::IoCtxImpl
*ioctx_impl
);
257 ~RadosStriperImpl() {};
260 int setObjectLayoutStripeUnit(unsigned int stripe_unit
);
261 int setObjectLayoutStripeCount(unsigned int stripe_count
);
262 int setObjectLayoutObjectSize(unsigned int object_size
);
265 int getxattr(const object_t
& soid
, const char *name
, bufferlist
& bl
);
266 int setxattr(const object_t
& soid
, const char *name
, bufferlist
& bl
);
267 int getxattrs(const object_t
& soid
, map
<string
, bufferlist
>& attrset
);
268 int rmxattr(const object_t
& soid
, const char *name
);
271 int write(const std::string
& soid
, const bufferlist
& bl
, size_t len
, uint64_t off
);
272 int append(const std::string
& soid
, const bufferlist
& bl
, size_t len
);
273 int write_full(const std::string
& soid
, const bufferlist
& bl
);
274 int read(const std::string
& soid
, bufferlist
* pbl
, size_t len
, uint64_t off
);
277 int aio_write(const std::string
& soid
, librados::AioCompletionImpl
*c
,
278 const bufferlist
& bl
, size_t len
, uint64_t off
);
279 int aio_append(const std::string
& soid
, librados::AioCompletionImpl
*c
,
280 const bufferlist
& bl
, size_t len
);
281 int aio_write_full(const std::string
& soid
, librados::AioCompletionImpl
*c
,
282 const bufferlist
& bl
);
283 int aio_read(const std::string
& soid
, librados::AioCompletionImpl
*c
,
284 bufferlist
* pbl
, size_t len
, uint64_t off
);
285 int aio_read(const std::string
& soid
, librados::AioCompletionImpl
*c
,
286 char* buf
, size_t len
, uint64_t off
);
289 // stat, deletion and truncation
290 int stat(const std::string
& soid
, uint64_t *psize
, time_t *pmtime
);
291 int stat2(const std::string
& soid
, uint64_t *psize
, struct timespec
*pts
);
292 template<class TimeType
>
293 struct StatFunction
{
294 typedef int (librados::IoCtxImpl::*Type
) (const object_t
& oid
,
295 librados::AioCompletionImpl
*c
,
296 uint64_t *psize
, TimeType
*pmtime
);
298 template<class TimeType
>
299 int aio_generic_stat(const std::string
& soid
, librados::AioCompletionImpl
*c
,
300 uint64_t *psize
, TimeType
*pmtime
,
301 typename StatFunction
<TimeType
>::Type statFunction
);
302 int aio_stat(const std::string
& soid
, librados::AioCompletionImpl
*c
,
303 uint64_t *psize
, time_t *pmtime
);
304 int aio_stat2(const std::string
& soid
, librados::AioCompletionImpl
*c
,
305 uint64_t *psize
, struct timespec
*pts
);
306 int remove(const std::string
& soid
, int flags
=0);
307 int trunc(const std::string
& soid
, uint64_t size
);
309 // asynchronous remove. Note that the removal is not 100% parallelized :
310 // the removal of the first rados object of the striped object will be
311 // done via a syncrhonous call after the completion of all other removals.
312 // These are done asynchrounously and in parallel
313 int aio_remove(const std::string
& soid
, librados::AioCompletionImpl
*c
, int flags
=0);
315 // reference counting
322 bool deleteme
= false;
333 // objectid manipulation
334 std::string
getObjectId(const object_t
& soid
, long long unsigned objectno
);
336 // opening and closing of striped objects
337 void unlockObject(const std::string
& soid
,
338 const std::string
& lockCookie
);
339 void aio_unlockObject(const std::string
& soid
,
340 const std::string
& lockCookie
,
341 librados::AioCompletion
*c
);
343 // internal versions of IO method
344 int write_in_open_object(const std::string
& soid
,
345 const ceph_file_layout
& layout
,
346 const std::string
& lockCookie
,
347 const bufferlist
& bl
,
350 int aio_write_in_open_object(const std::string
& soid
,
351 librados::AioCompletionImpl
*c
,
352 const ceph_file_layout
& layout
,
353 const std::string
& lockCookie
,
354 const bufferlist
& bl
,
357 int internal_aio_write(const std::string
& soid
,
358 libradosstriper::MultiAioCompletionImpl
*c
,
359 const bufferlist
& bl
,
362 const ceph_file_layout
& layout
);
364 int extract_uint32_attr(std::map
<std::string
, bufferlist
> &attrs
,
365 const std::string
& key
,
368 int extract_sizet_attr(std::map
<std::string
, bufferlist
> &attrs
,
369 const std::string
& key
,
372 int internal_get_layout_and_size(const std::string
& oid
,
373 ceph_file_layout
*layout
,
376 int internal_aio_remove(const std::string
& soid
,
377 libradosstriper::MultiAioCompletionImpl
*multi_completion
,
381 * opens an existing striped object and takes a shared lock on it
382 * @return 0 if everything is ok and the lock was taken. -errcode otherwise
383 * In particulae, if the striped object does not exists, -ENOENT is returned
384 * In case the return code in not 0, no lock is taken
386 int openStripedObjectForRead(const std::string
& soid
,
387 ceph_file_layout
*layout
,
389 std::string
*lockCookie
);
392 * opens an existing striped object, takes a shared lock on it
393 * and sets its size to the size it will have after the write.
394 * In case the striped object does not exists, it will create it by
395 * calling createOrOpenStripedObject.
396 * @param layout this is filled with the layout of the file
397 * @param size new size of the file (together with isFileSizeAbsolute)
398 * In case of success, this is filled with the size of the file before the opening
399 * @param isFileSizeAbsolute if false, this means that the given size should
400 * be added to the current file size (append mode)
401 * @return 0 if everything is ok and the lock was taken. -errcode otherwise
402 * In case the return code in not 0, no lock is taken
404 int openStripedObjectForWrite(const std::string
& soid
,
405 ceph_file_layout
*layout
,
407 std::string
*lockCookie
,
408 bool isFileSizeAbsolute
);
410 * creates an empty striped object with the given size and opens it calling
411 * openStripedObjectForWrite, which implies taking a shared lock on it
412 * Also deals with the cases where the object was created in the mean time
413 * @param isFileSizeAbsolute if false, this means that the given size should
414 * be added to the current file size (append mode). This of course only makes
415 * sense in case the striped object already exists
416 * @return 0 if everything is ok and the lock was taken. -errcode otherwise
417 * In case the return code in not 0, no lock is taken
419 int createAndOpenStripedObject(const std::string
& soid
,
420 ceph_file_layout
*layout
,
422 std::string
*lockCookie
,
423 bool isFileSizeAbsolute
);
426 * truncates an object synchronously. Should only be called with size < original_size
428 int truncate(const std::string
& soid
,
429 uint64_t original_size
,
431 ceph_file_layout
&layout
);
434 * truncates an object asynchronously. Should only be called with size < original_size
435 * note that the method is not 100% asynchronous, only the removal of rados objects
436 * is, the (potential) truncation of the rados object residing just at the truncation
437 * point is synchronous for lack of asynchronous truncation in the rados layer
439 int aio_truncate(const std::string
& soid
,
440 libradosstriper::MultiAioCompletionImpl
*c
,
441 uint64_t original_size
,
443 ceph_file_layout
&layout
);
446 * grows an object (adding 0s). Should only be called with size > original_size
448 int grow(const std::string
& soid
,
449 uint64_t original_size
,
451 ceph_file_layout
&layout
);
454 * creates a unique identifier
456 static std::string
getUUID();
459 return (CephContext
*)m_radosCluster
.cct();
462 // reference counting
469 librados::Rados m_radosCluster
;
470 librados::IoCtx m_ioCtx
;
471 librados::IoCtxImpl
*m_ioCtxImpl
;
474 ceph_file_layout m_layout
;