1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2014 Sebastien Ponce <sebastien.ponce@cern.ch>
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
15 #ifndef CEPH_LIBRADOSSTRIPER_RADOSSTRIPERIMPL_H
16 #define CEPH_LIBRADOSSTRIPER_RADOSSTRIPERIMPL_H
20 #include "include/atomic.h"
22 #include "include/rados/librados.h"
23 #include "include/rados/librados.hpp"
24 #include "include/radosstriper/libradosstriper.h"
25 #include "include/radosstriper/libradosstriper.hpp"
27 #include "librados/IoCtxImpl.h"
28 #include "librados/AioCompletionImpl.h"
29 #include "common/RefCountedObj.h"
31 struct libradosstriper::RadosStriperImpl
{
34 * struct handling the data needed to pass to the call back
35 * function in asynchronous operations
37 struct CompletionData
: RefCountedObject
{
39 CompletionData(libradosstriper::RadosStriperImpl
* striper
,
40 const std::string
& soid
,
41 const std::string
& lockCookie
,
42 librados::AioCompletionImpl
*userCompletion
= 0,
45 ~CompletionData() override
;
48 /// striper to be used to handle the write completion
49 libradosstriper::RadosStriperImpl
*m_striper
;
50 /// striped object concerned by the write operation
52 /// shared lock to be released at completion
53 std::string m_lockCookie
;
54 /// completion handler
55 librados::IoCtxImpl::C_aio_Complete
*m_ack
;
59 * struct handling the data needed to pass to the call back
60 * function in asynchronous read operations
62 struct ReadCompletionData
: CompletionData
{
63 /// bufferlist containing final result
65 /// extents that will be read
66 std::vector
<ObjectExtent
>* m_extents
;
67 /// intermediate results
68 std::vector
<bufferlist
>* m_resultbl
;
69 /// return code of read completion, to be remembered until unlocking happened
71 /// completion object for the unlocking of the striped object at the end of the read
72 librados::AioCompletion
*m_unlockCompletion
;
74 ReadCompletionData(libradosstriper::RadosStriperImpl
* striper
,
75 const std::string
& soid
,
76 const std::string
& lockCookie
,
77 librados::AioCompletionImpl
*userCompletion
,
79 std::vector
<ObjectExtent
>* extents
,
80 std::vector
<bufferlist
>* resultbl
,
83 ~ReadCompletionData() override
;
84 /// complete method for when reading is over
85 void complete_read(int r
);
86 /// complete method for when object is unlocked
87 void complete_unlock(int r
);
91 * struct handling the data needed to pass to the call back
92 * function in asynchronous write operations
94 struct WriteCompletionData
: CompletionData
{
95 /// safe completion handler
96 librados::IoCtxImpl::C_aio_Complete
*m_safe
;
97 /// return code of write completion, to be remembered until unlocking happened
99 /// completion object for the unlocking of the striped object at the end of the write
100 librados::AioCompletion
*m_unlockCompletion
;
102 WriteCompletionData(libradosstriper::RadosStriperImpl
* striper
,
103 const std::string
& soid
,
104 const std::string
& lockCookie
,
105 librados::AioCompletionImpl
*userCompletion
,
108 ~WriteCompletionData() override
;
109 /// complete method for when writing is over
110 void complete_write(int r
);
111 /// complete method for when object is unlocked
112 void complete_unlock(int r
);
118 * struct handling the data needed to pass to the call back
119 * function in asynchronous read operations of a Rados File
121 struct RadosReadCompletionData
: RefCountedObject
{
123 RadosReadCompletionData(MultiAioCompletionImpl
*multiAioCompl
,
124 uint64_t expectedBytes
,
126 CephContext
*context
,
128 RefCountedObject(context
, n
),
129 m_multiAioCompl(multiAioCompl
), m_expectedBytes(expectedBytes
), m_bl(bl
) {};
130 /// the multi asynch io completion object to be used
131 MultiAioCompletionImpl
*m_multiAioCompl
;
132 /// the expected number of bytes
133 uint64_t m_expectedBytes
;
134 /// the bufferlist object where data have been written
139 * struct handling (most of) the data needed to pass to the call back
140 * function in asynchronous stat operations.
141 * Inherited by the actual type for adding time information in different
142 * versions (time_t or struct timespec)
144 struct BasicStatCompletionData
: CompletionData
{
146 BasicStatCompletionData(libradosstriper::RadosStriperImpl
* striper
,
147 const std::string
& soid
,
148 librados::AioCompletionImpl
*userCompletion
,
149 libradosstriper::MultiAioCompletionImpl
*multiCompletion
,
152 CompletionData(striper
, soid
, "", userCompletion
, n
),
153 m_multiCompletion(multiCompletion
), m_psize(psize
),
154 m_statRC(0), m_getxattrRC(0) {};
155 // MultiAioCompletionImpl used to handle the double aysnc
156 // call in the back (stat + getxattr)
157 libradosstriper::MultiAioCompletionImpl
*m_multiCompletion
;
158 // where to store the size of first objct
159 // this will be ignored but we need a place to store it when
160 // async stat is called
161 uint64_t m_objectSize
;
162 // where to store the file size
164 /// the bufferlist object used for the getxattr call
166 /// return code of the stat
168 /// return code of the getxattr
173 * struct handling the data needed to pass to the call back
174 * function in asynchronous stat operations.
175 * Simple templated extension of BasicStatCompletionData.
176 * The template parameter is the type of the time information
177 * (used with time_t for stat and struct timespec for stat2)
179 template<class TimeType
>
180 struct StatCompletionData
: BasicStatCompletionData
{
182 StatCompletionData(libradosstriper::RadosStriperImpl
* striper
,
183 const std::string
& soid
,
184 librados::AioCompletionImpl
*userCompletion
,
185 libradosstriper::MultiAioCompletionImpl
*multiCompletion
,
189 BasicStatCompletionData(striper
, soid
, userCompletion
, multiCompletion
, psize
, n
),
191 // where to store the file time
196 * struct handling the data needed to pass to the call back
197 * function in asynchronous remove operations of a Rados File
199 struct RadosRemoveCompletionData
: RefCountedObject
{
201 RadosRemoveCompletionData(MultiAioCompletionImpl
*multiAioCompl
,
202 CephContext
*context
) :
203 RefCountedObject(context
, 2),
204 m_multiAioCompl(multiAioCompl
) {};
205 /// the multi asynch io completion object to be used
206 MultiAioCompletionImpl
*m_multiAioCompl
;
209 struct RemoveCompletionData
: CompletionData
{
214 * note that the constructed object will take ownership of the lock
216 RemoveCompletionData(libradosstriper::RadosStriperImpl
* striper
,
217 const std::string
& soid
,
218 const std::string
& lockCookie
,
219 librados::AioCompletionImpl
*userCompletion
,
224 * struct handling the data needed to pass to the call back
225 * function in asynchronous truncate operations
227 struct TruncateCompletionData
: RefCountedObject
{
229 TruncateCompletionData(libradosstriper::RadosStriperImpl
* striper
,
230 const std::string
& soid
,
233 ~TruncateCompletionData() override
;
234 /// striper to be used
235 libradosstriper::RadosStriperImpl
*m_striper
;
236 /// striped object concerned by the truncate operation
238 /// the final size of the truncated object
243 * exception wrapper around an error code
246 ErrorCode(int error
) : m_code(error
) {};
252 * @param cluster_name name of the cluster, can be NULL
253 * @param client_name has 2 meanings depending on cluster_name
254 * - if cluster_name is null : this is the client id
255 * - else : this is the full client name in format type.id
257 RadosStriperImpl(librados::IoCtx
& ioctx
, librados::IoCtxImpl
*ioctx_impl
);
259 ~RadosStriperImpl() {};
262 int setObjectLayoutStripeUnit(unsigned int stripe_unit
);
263 int setObjectLayoutStripeCount(unsigned int stripe_count
);
264 int setObjectLayoutObjectSize(unsigned int object_size
);
267 int getxattr(const object_t
& soid
, const char *name
, bufferlist
& bl
);
268 int setxattr(const object_t
& soid
, const char *name
, bufferlist
& bl
);
269 int getxattrs(const object_t
& soid
, map
<string
, bufferlist
>& attrset
);
270 int rmxattr(const object_t
& soid
, const char *name
);
273 int write(const std::string
& soid
, const bufferlist
& bl
, size_t len
, uint64_t off
);
274 int append(const std::string
& soid
, const bufferlist
& bl
, size_t len
);
275 int write_full(const std::string
& soid
, const bufferlist
& bl
);
276 int read(const std::string
& soid
, bufferlist
* pbl
, size_t len
, uint64_t off
);
279 int aio_write(const std::string
& soid
, librados::AioCompletionImpl
*c
,
280 const bufferlist
& bl
, size_t len
, uint64_t off
);
281 int aio_append(const std::string
& soid
, librados::AioCompletionImpl
*c
,
282 const bufferlist
& bl
, size_t len
);
283 int aio_write_full(const std::string
& soid
, librados::AioCompletionImpl
*c
,
284 const bufferlist
& bl
);
285 int aio_read(const std::string
& soid
, librados::AioCompletionImpl
*c
,
286 bufferlist
* pbl
, size_t len
, uint64_t off
);
287 int aio_read(const std::string
& soid
, librados::AioCompletionImpl
*c
,
288 char* buf
, size_t len
, uint64_t off
);
291 // stat, deletion and truncation
292 int stat(const std::string
& soid
, uint64_t *psize
, time_t *pmtime
);
293 int stat2(const std::string
& soid
, uint64_t *psize
, struct timespec
*pts
);
294 template<class TimeType
>
295 struct StatFunction
{
296 typedef int (librados::IoCtxImpl::*Type
) (const object_t
& oid
,
297 librados::AioCompletionImpl
*c
,
298 uint64_t *psize
, TimeType
*pmtime
);
300 template<class TimeType
>
301 int aio_generic_stat(const std::string
& soid
, librados::AioCompletionImpl
*c
,
302 uint64_t *psize
, TimeType
*pmtime
,
303 typename StatFunction
<TimeType
>::Type statFunction
);
304 int aio_stat(const std::string
& soid
, librados::AioCompletionImpl
*c
,
305 uint64_t *psize
, time_t *pmtime
);
306 int aio_stat2(const std::string
& soid
, librados::AioCompletionImpl
*c
,
307 uint64_t *psize
, struct timespec
*pts
);
308 int remove(const std::string
& soid
, int flags
=0);
309 int trunc(const std::string
& soid
, uint64_t size
);
311 // asynchronous remove. Note that the removal is not 100% parallelized :
312 // the removal of the first rados object of the striped object will be
313 // done via a syncrhonous call after the completion of all other removals.
314 // These are done asynchrounously and in parallel
315 int aio_remove(const std::string
& soid
, librados::AioCompletionImpl
*c
, int flags
=0);
317 // reference counting
324 bool deleteme
= false;
335 // objectid manipulation
336 std::string
getObjectId(const object_t
& soid
, long long unsigned objectno
);
338 // opening and closing of striped objects
339 void unlockObject(const std::string
& soid
,
340 const std::string
& lockCookie
);
341 void aio_unlockObject(const std::string
& soid
,
342 const std::string
& lockCookie
,
343 librados::AioCompletion
*c
);
345 // internal versions of IO method
346 int write_in_open_object(const std::string
& soid
,
347 const ceph_file_layout
& layout
,
348 const std::string
& lockCookie
,
349 const bufferlist
& bl
,
352 int aio_write_in_open_object(const std::string
& soid
,
353 librados::AioCompletionImpl
*c
,
354 const ceph_file_layout
& layout
,
355 const std::string
& lockCookie
,
356 const bufferlist
& bl
,
359 int internal_aio_write(const std::string
& soid
,
360 libradosstriper::MultiAioCompletionImpl
*c
,
361 const bufferlist
& bl
,
364 const ceph_file_layout
& layout
);
366 int extract_uint32_attr(std::map
<std::string
, bufferlist
> &attrs
,
367 const std::string
& key
,
370 int extract_sizet_attr(std::map
<std::string
, bufferlist
> &attrs
,
371 const std::string
& key
,
374 int internal_get_layout_and_size(const std::string
& oid
,
375 ceph_file_layout
*layout
,
378 int internal_aio_remove(const std::string
& soid
,
379 libradosstriper::MultiAioCompletionImpl
*multi_completion
,
383 * opens an existing striped object and takes a shared lock on it
384 * @return 0 if everything is ok and the lock was taken. -errcode otherwise
385 * In particulae, if the striped object does not exists, -ENOENT is returned
386 * In case the return code in not 0, no lock is taken
388 int openStripedObjectForRead(const std::string
& soid
,
389 ceph_file_layout
*layout
,
391 std::string
*lockCookie
);
394 * opens an existing striped object, takes a shared lock on it
395 * and sets its size to the size it will have after the write.
396 * In case the striped object does not exists, it will create it by
397 * calling createOrOpenStripedObject.
398 * @param layout this is filled with the layout of the file
399 * @param size new size of the file (together with isFileSizeAbsolute)
400 * In case of success, this is filled with the size of the file before the opening
401 * @param isFileSizeAbsolute if false, this means that the given size should
402 * be added to the current file size (append mode)
403 * @return 0 if everything is ok and the lock was taken. -errcode otherwise
404 * In case the return code in not 0, no lock is taken
406 int openStripedObjectForWrite(const std::string
& soid
,
407 ceph_file_layout
*layout
,
409 std::string
*lockCookie
,
410 bool isFileSizeAbsolute
);
412 * creates an empty striped object with the given size and opens it calling
413 * openStripedObjectForWrite, which implies taking a shared lock on it
414 * Also deals with the cases where the object was created in the mean time
415 * @param isFileSizeAbsolute if false, this means that the given size should
416 * be added to the current file size (append mode). This of course only makes
417 * sense in case the striped object already exists
418 * @return 0 if everything is ok and the lock was taken. -errcode otherwise
419 * In case the return code in not 0, no lock is taken
421 int createAndOpenStripedObject(const std::string
& soid
,
422 ceph_file_layout
*layout
,
424 std::string
*lockCookie
,
425 bool isFileSizeAbsolute
);
428 * truncates an object synchronously. Should only be called with size < original_size
430 int truncate(const std::string
& soid
,
431 uint64_t original_size
,
433 ceph_file_layout
&layout
);
436 * truncates an object asynchronously. Should only be called with size < original_size
437 * note that the method is not 100% asynchronous, only the removal of rados objects
438 * is, the (potential) truncation of the rados object residing just at the truncation
439 * point is synchronous for lack of asynchronous truncation in the rados layer
441 int aio_truncate(const std::string
& soid
,
442 libradosstriper::MultiAioCompletionImpl
*c
,
443 uint64_t original_size
,
445 ceph_file_layout
&layout
);
448 * grows an object (adding 0s). Should only be called with size > original_size
450 int grow(const std::string
& soid
,
451 uint64_t original_size
,
453 ceph_file_layout
&layout
);
456 * creates a unique identifier
458 static std::string
getUUID();
461 return (CephContext
*)m_radosCluster
.cct();
464 // reference counting
471 librados::Rados m_radosCluster
;
472 librados::IoCtx m_ioCtx
;
473 librados::IoCtxImpl
*m_ioCtxImpl
;
476 ceph_file_layout m_layout
;