1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2014 Sebastien Ponce <sebastien.ponce@cern.ch>
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
15 #include "libradosstriper/RadosStriperImpl.h"
23 #include "include/types.h"
24 #include "include/uuid.h"
25 #include "include/ceph_fs.h"
26 #include "common/dout.h"
27 #include "common/strtol.h"
28 #include "osdc/Striper.h"
29 #include "libradosstriper/MultiAioCompletionImpl.h"
30 #include "librados/AioCompletionImpl.h"
31 #include <cls/lock/cls_lock_client.h>
34 * This file contents the actual implementation of the rados striped objects interface.
36 * Striped objects are stored in rados in a set of regular rados objects, after their
37 * content has been striped using the osdc/Striper interface.
39 * The external attributes of the striped object are mapped to the attributes of the
40 * first underlying object. This first object has a set of extra external attributes
41 * storing the layout of the striped object for future read back. These attributes are :
42 * - striper.layout.object_size : the size of rados objects used.
43 * Must be a multiple of striper.layout.stripe_unit
44 * - striper.layout.stripe_unit : the size of a stripe unit
45 * - striper.layout.stripe_count : the number of stripes used
46 * - striper.size : total striped object size
48 * In general operations on striped objects are not atomic.
49 * However, a certain number of safety guards have been put to make the interface closer
51 * - each data operation takes a shared lock on the first rados object for the
52 * whole time of the operation
53 * - the remove and trunc operations take an exclusive lock on the first rados object
54 * for the whole time of the operation
55 * This makes sure that no removal/truncation of a striped object occurs while
56 * data operations are happening and vice versa. It thus makes sure that the layout
57 * of a striped object does not change during data operation, which is essential for
60 * Still the writing to a striped object is not atomic. This means in particular that
61 * the size of an object may not be in sync with its content at all times.
62 * As the size is always garanteed to be updated first and in an atomic way, and as
63 * sparse striped objects are supported (see below), what will typically happen is
64 * that a reader that comes too soon after a write will read 0s instead of the actual
67 * Note that remove handles the pieces of the striped object in reverse order,
68 * so that the head object is removed last, making the completion of the deletion atomic.
70 * Striped objects can be sparse, typically in case data was written at the end of the
71 * striped object only. In such a case, some rados objects constituing the striped object
72 * may be missing. Other can be partial (only the beginning will have data)
73 * When dealing with such sparse striped files, missing objects are detected and
74 * considered as full of 0s. They are however not created until real data is written
77 * There are a number of missing features/improvements that could be implemented.
78 * Here are some ideas :
79 * - implementation of missing entry points (compared to rados)
80 * In particular : clone_range, sparse_read, exec, aio_flush_async, tmaps, omaps, ...
84 #define dout_subsys ceph_subsys_rados
86 #define dout_prefix *_dout << "libradosstriper: "
88 /// size of xattr buffer
89 #define XATTR_BUFFER_SIZE 32
91 /// names of the different xattr entries
92 #define XATTR_LAYOUT_STRIPE_UNIT "striper.layout.stripe_unit"
93 #define XATTR_LAYOUT_STRIPE_COUNT "striper.layout.stripe_count"
94 #define XATTR_LAYOUT_OBJECT_SIZE "striper.layout.object_size"
95 #define XATTR_SIZE "striper.size"
96 #define LOCK_PREFIX "lock."
98 /// name of the lock used on objects to ensure layout stability during IO
99 #define RADOS_LOCK_NAME "striper.lock"
101 /// format of the extension of rados objects created for a given striped object
102 #define RADOS_OBJECT_EXTENSION_FORMAT ".%016llx"
104 /// default object layout
105 struct ceph_file_layout default_file_layout
= {
106 init_le32(1<<22), // fl_stripe_unit
107 init_le32(1), // fl_stripe_count
108 init_le32(1<<22), // fl_object_size
109 init_le32(0), // fl_cas_hash
110 init_le32(0), // fl_object_stripe_unit
111 init_le32(-1), // fl_unused
112 init_le32(-1), // fl_pg_pool
116 ///////////////////////// CompletionData /////////////////////////////
118 libradosstriper::RadosStriperImpl::CompletionData::CompletionData
119 (libradosstriper::RadosStriperImpl
* striper
,
120 const std::string
& soid
,
121 const std::string
& lockCookie
,
122 librados::AioCompletionImpl
*userCompletion
,
124 RefCountedObject(striper
->cct(), n
),
125 m_striper(striper
), m_soid(soid
), m_lockCookie(lockCookie
), m_ack(0) {
127 if (userCompletion
) {
128 m_ack
= new librados::IoCtxImpl::C_aio_Complete(userCompletion
);
129 userCompletion
->io
= striper
->m_ioCtxImpl
;
133 libradosstriper::RadosStriperImpl::CompletionData::~CompletionData() {
134 if (m_ack
) delete m_ack
;
138 void libradosstriper::RadosStriperImpl::CompletionData::complete(int r
) {
139 if (m_ack
) m_ack
->finish(r
);
142 libradosstriper::RadosStriperImpl::ReadCompletionData::ReadCompletionData
143 (libradosstriper::RadosStriperImpl
* striper
,
144 const std::string
& soid
,
145 const std::string
& lockCookie
,
146 librados::AioCompletionImpl
*userCompletion
,
148 std::vector
<ObjectExtent
>* extents
,
149 std::vector
<bufferlist
>* resultbl
,
151 CompletionData(striper
, soid
, lockCookie
, userCompletion
, n
),
152 m_bl(bl
), m_extents(extents
), m_resultbl(resultbl
), m_readRc(0),
153 m_unlockCompletion(0) {}
155 libradosstriper::RadosStriperImpl::ReadCompletionData::~ReadCompletionData() {
156 m_unlockCompletion
->release();
161 void libradosstriper::RadosStriperImpl::ReadCompletionData::complete_read(int r
) {
162 // gather data into final buffer
163 Striper::StripedReadResult readResult
;
164 vector
<bufferlist
>::iterator bit
= m_resultbl
->begin();
165 for (vector
<ObjectExtent
>::iterator eit
= m_extents
->begin();
166 eit
!= m_extents
->end();
168 readResult
.add_partial_result(m_striper
->cct(), *bit
, eit
->buffer_extents
);
171 readResult
.assemble_result(m_striper
->cct(), *m_bl
, true);
172 // Remember return code
176 void libradosstriper::RadosStriperImpl::ReadCompletionData::complete_unlock(int r
) {
177 // call parent's completion method
178 // Note that we ignore the return code of the unlock as we cannot do much about it
179 CompletionData::complete(m_readRc
?m_readRc
:m_bl
->length());
182 libradosstriper::RadosStriperImpl::WriteCompletionData::WriteCompletionData
183 (libradosstriper::RadosStriperImpl
* striper
,
184 const std::string
& soid
,
185 const std::string
& lockCookie
,
186 librados::AioCompletionImpl
*userCompletion
,
188 CompletionData(striper
, soid
, lockCookie
, userCompletion
, n
), m_safe(0),
189 m_unlockCompletion(0) {
190 if (userCompletion
) {
191 m_safe
= new librados::IoCtxImpl::C_aio_Complete(userCompletion
);
195 libradosstriper::RadosStriperImpl::WriteCompletionData::~WriteCompletionData() {
196 m_unlockCompletion
->release();
197 if (m_safe
) delete m_safe
;
200 void libradosstriper::RadosStriperImpl::WriteCompletionData::complete_unlock(int r
) {
201 // call parent's completion method
202 // Note that we ignore the return code of the unlock as we cannot do much about it
203 CompletionData::complete(m_writeRc
);
206 void libradosstriper::RadosStriperImpl::WriteCompletionData::complete_write(int r
) {
207 // Remember return code
211 void libradosstriper::RadosStriperImpl::WriteCompletionData::safe(int r
) {
212 if (m_safe
) m_safe
->finish(r
);
215 libradosstriper::RadosStriperImpl::RemoveCompletionData::RemoveCompletionData
216 (libradosstriper::RadosStriperImpl
* striper
,
217 const std::string
& soid
,
218 const std::string
& lockCookie
,
219 librados::AioCompletionImpl
*userCompletion
,
221 CompletionData(striper
, soid
, lockCookie
, userCompletion
), flags(flags
) {}
223 libradosstriper::RadosStriperImpl::TruncateCompletionData::TruncateCompletionData
224 (libradosstriper::RadosStriperImpl
* striper
,
225 const std::string
& soid
,
227 RefCountedObject(striper
->cct()),
228 m_striper(striper
), m_soid(soid
), m_size(size
) {
232 libradosstriper::RadosStriperImpl::TruncateCompletionData::~TruncateCompletionData() {
236 ///////////////////////// constructor /////////////////////////////
238 libradosstriper::RadosStriperImpl::RadosStriperImpl(librados::IoCtx
& ioctx
, librados::IoCtxImpl
*ioctx_impl
) :
239 m_refCnt(0),lock("RadosStriper Refcont", false, false), m_radosCluster(ioctx
), m_ioCtx(ioctx
), m_ioCtxImpl(ioctx_impl
),
240 m_layout(default_file_layout
) {}
242 ///////////////////////// layout /////////////////////////////
244 int libradosstriper::RadosStriperImpl::setObjectLayoutStripeUnit
245 (unsigned int stripe_unit
)
247 /* stripe unit must be non-zero, 64k increment */
248 if (!stripe_unit
|| (stripe_unit
& (CEPH_MIN_STRIPE_UNIT
-1)))
250 m_layout
.fl_stripe_unit
= stripe_unit
;
254 int libradosstriper::RadosStriperImpl::setObjectLayoutStripeCount
255 (unsigned int stripe_count
)
257 /* stripe count must be non-zero */
260 m_layout
.fl_stripe_count
= stripe_count
;
264 int libradosstriper::RadosStriperImpl::setObjectLayoutObjectSize
265 (unsigned int object_size
)
267 /* object size must be non-zero, 64k increment */
268 if (!object_size
|| (object_size
& (CEPH_MIN_STRIPE_UNIT
-1)))
270 /* object size must be a multiple of stripe unit */
271 if (object_size
< m_layout
.fl_stripe_unit
||
272 object_size
% m_layout
.fl_stripe_unit
)
274 m_layout
.fl_object_size
= object_size
;
278 ///////////////////////// xattrs /////////////////////////////
280 int libradosstriper::RadosStriperImpl::getxattr(const object_t
& soid
,
284 std::string firstObjOid
= getObjectId(soid
, 0);
285 return m_ioCtx
.getxattr(firstObjOid
, name
, bl
);
288 int libradosstriper::RadosStriperImpl::setxattr(const object_t
& soid
,
292 std::string firstObjOid
= getObjectId(soid
, 0);
293 return m_ioCtx
.setxattr(firstObjOid
, name
, bl
);
296 int libradosstriper::RadosStriperImpl::getxattrs(const object_t
& soid
,
297 map
<string
, bufferlist
>& attrset
)
299 std::string firstObjOid
= getObjectId(soid
, 0);
300 int rc
= m_ioCtx
.getxattrs(firstObjOid
, attrset
);
302 // cleanup internal attributes dedicated to striping and locking
303 attrset
.erase(XATTR_LAYOUT_STRIPE_UNIT
);
304 attrset
.erase(XATTR_LAYOUT_STRIPE_COUNT
);
305 attrset
.erase(XATTR_LAYOUT_OBJECT_SIZE
);
306 attrset
.erase(XATTR_SIZE
);
307 attrset
.erase(std::string(LOCK_PREFIX
) + RADOS_LOCK_NAME
);
311 int libradosstriper::RadosStriperImpl::rmxattr(const object_t
& soid
,
314 std::string firstObjOid
= getObjectId(soid
, 0);
315 return m_ioCtx
.rmxattr(firstObjOid
, name
);
318 ///////////////////////// io /////////////////////////////
320 int libradosstriper::RadosStriperImpl::write(const std::string
& soid
,
321 const bufferlist
& bl
,
325 // open the object. This will create it if needed, retrieve its layout
326 // and size and take a shared lock on it
327 ceph_file_layout layout
;
328 std::string lockCookie
;
329 int rc
= createAndOpenStripedObject(soid
, &layout
, len
+off
, &lockCookie
, true);
331 return write_in_open_object(soid
, layout
, lockCookie
, bl
, len
, off
);
334 int libradosstriper::RadosStriperImpl::append(const std::string
& soid
,
335 const bufferlist
& bl
,
338 // open the object. This will create it if needed, retrieve its layout
339 // and size and take a shared lock on it
340 ceph_file_layout layout
;
342 std::string lockCookie
;
343 int rc
= openStripedObjectForWrite(soid
, &layout
, &size
, &lockCookie
, false);
345 return write_in_open_object(soid
, layout
, lockCookie
, bl
, len
, size
);
348 int libradosstriper::RadosStriperImpl::write_full(const std::string
& soid
,
349 const bufferlist
& bl
)
351 int rc
= trunc(soid
, 0);
352 if (rc
&& rc
!= -ENOENT
) return rc
; // ENOENT is obviously ok
353 return write(soid
, bl
, bl
.length(), 0);
356 int libradosstriper::RadosStriperImpl::read(const std::string
& soid
,
361 // create a completion object
362 librados::AioCompletionImpl c
;
363 // call asynchronous method
364 int rc
= aio_read(soid
, &c
, bl
, len
, off
);
365 // and wait for completion
367 // wait for completion
368 c
.wait_for_complete_and_cb();
370 rc
= c
.get_return_value();
375 ///////////////////////// asynchronous io /////////////////////////////
377 int libradosstriper::RadosStriperImpl::aio_write(const std::string
& soid
,
378 librados::AioCompletionImpl
*c
,
379 const bufferlist
& bl
,
383 ceph_file_layout layout
;
384 std::string lockCookie
;
385 int rc
= createAndOpenStripedObject(soid
, &layout
, len
+off
, &lockCookie
, true);
387 return aio_write_in_open_object(soid
, c
, layout
, lockCookie
, bl
, len
, off
);
390 int libradosstriper::RadosStriperImpl::aio_append(const std::string
& soid
,
391 librados::AioCompletionImpl
*c
,
392 const bufferlist
& bl
,
395 ceph_file_layout layout
;
397 std::string lockCookie
;
398 int rc
= openStripedObjectForWrite(soid
, &layout
, &size
, &lockCookie
, false);
400 // create a completion object
401 return aio_write_in_open_object(soid
, c
, layout
, lockCookie
, bl
, len
, size
);
404 int libradosstriper::RadosStriperImpl::aio_write_full(const std::string
& soid
,
405 librados::AioCompletionImpl
*c
,
406 const bufferlist
& bl
)
408 int rc
= trunc(soid
, 0);
410 return aio_write(soid
, c
, bl
, bl
.length(), 0);
413 static void rados_read_aio_unlock_complete(rados_striper_multi_completion_t c
, void *arg
)
415 libradosstriper::RadosStriperImpl::ReadCompletionData
*cdata
=
416 reinterpret_cast<libradosstriper::RadosStriperImpl::ReadCompletionData
*>(arg
);
417 libradosstriper::MultiAioCompletionImpl
*comp
=
418 reinterpret_cast<libradosstriper::MultiAioCompletionImpl
*>(c
);
419 cdata
->complete_unlock(comp
->rval
);
423 static void striper_read_aio_req_complete(rados_striper_multi_completion_t c
, void *arg
)
425 libradosstriper::RadosStriperImpl::ReadCompletionData
*cdata
=
426 reinterpret_cast<libradosstriper::RadosStriperImpl::ReadCompletionData
*>(arg
);
427 // launch the async unlocking of the object
428 cdata
->m_striper
->aio_unlockObject(cdata
->m_soid
, cdata
->m_lockCookie
, cdata
->m_unlockCompletion
);
429 // complete the read part in parallel
430 libradosstriper::MultiAioCompletionImpl
*comp
=
431 reinterpret_cast<libradosstriper::MultiAioCompletionImpl
*>(c
);
432 cdata
->complete_read(comp
->rval
);
435 static void rados_req_read_safe(rados_completion_t c
, void *arg
)
437 libradosstriper::RadosStriperImpl::RadosReadCompletionData
*data
=
438 reinterpret_cast<libradosstriper::RadosStriperImpl::RadosReadCompletionData
*>(arg
);
439 int rc
= rados_aio_get_return_value(c
);
440 // ENOENT means that we are dealing with a sparse file. This is fine,
441 // data (0s) will be created on the fly by the rados_req_read_complete method
442 if (rc
== -ENOENT
) rc
= 0;
443 libradosstriper::MultiAioCompletionImpl
*multiAioComp
= data
->m_multiAioCompl
;
444 multiAioComp
->safe_request(rc
);
448 static void rados_req_read_complete(rados_completion_t c
, void *arg
)
450 libradosstriper::RadosStriperImpl::RadosReadCompletionData
*data
=
451 reinterpret_cast<libradosstriper::RadosStriperImpl::RadosReadCompletionData
*>(arg
);
452 int rc
= rados_aio_get_return_value(c
);
453 // We need to handle the case of sparse files here
455 // the object did not exist at all. This can happen for sparse files.
456 // we consider we've read 0 bytes and it will fall into next case
459 if (rc
>= 0 && (((uint64_t)rc
) < data
->m_expectedBytes
)) {
460 // only partial data were present in the object (or the object did not
461 // even exist if we've gone through previous case).
462 // This is typical of sparse file and we need to complete with 0s.
463 unsigned int lenOfZeros
= data
->m_expectedBytes
-rc
;
464 unsigned int existingDataToZero
= min(data
->m_bl
->length()-rc
, lenOfZeros
);
465 if (existingDataToZero
> 0) {
466 data
->m_bl
->zero(rc
, existingDataToZero
);
468 if (lenOfZeros
> existingDataToZero
) {
469 ceph::bufferptr
zeros(ceph::buffer::create(lenOfZeros
-existingDataToZero
));
471 data
->m_bl
->push_back(zeros
);
473 rc
= data
->m_expectedBytes
;
475 libradosstriper::MultiAioCompletionImpl
* multiAioComp
= data
->m_multiAioCompl
;
476 multiAioComp
->complete_request(rc
);
480 int libradosstriper::RadosStriperImpl::aio_read(const std::string
& soid
,
481 librados::AioCompletionImpl
*c
,
486 // open the object. This will retrieve its layout and size
487 // and take a shared lock on it
488 ceph_file_layout layout
;
490 std::string lockCookie
;
491 int rc
= openStripedObjectForRead(soid
, &layout
, &size
, &lockCookie
);
493 // find out the actual number of bytes we can read
496 // nothing to read ! We are done.
499 read_len
= min(len
, (size_t)(size
-off
));
501 // get list of extents to be read from
502 vector
<ObjectExtent
> *extents
= new vector
<ObjectExtent
>();
504 std::string format
= soid
+ RADOS_OBJECT_EXTENSION_FORMAT
;
506 l
.from_legacy(layout
);
507 Striper::file_to_extents(cct(), format
.c_str(), &l
, off
, read_len
,
511 // create a completion object and transfer ownership of extents and resultbl
512 vector
<bufferlist
> *resultbl
= new vector
<bufferlist
>(extents
->size());
513 ReadCompletionData
*cdata
= new ReadCompletionData(this, soid
, lockCookie
, c
,
514 bl
, extents
, resultbl
, 1);
517 // create a completion for the unlocking of the striped object at the end of the read
518 librados::AioCompletion
*unlock_completion
=
519 librados::Rados::aio_create_completion(cdata
, rados_read_aio_unlock_complete
, 0);
520 cdata
->m_unlockCompletion
= unlock_completion
;
521 // create the multiCompletion object handling the reads
522 libradosstriper::MultiAioCompletionImpl
*nc
= new libradosstriper::MultiAioCompletionImpl
;
523 nc
->set_complete_callback(cdata
, striper_read_aio_req_complete
);
524 // go through the extents
526 for (vector
<ObjectExtent
>::iterator p
= extents
->begin(); p
!= extents
->end(); ++p
) {
527 // create a buffer list describing where to place data read from current extend
528 bufferlist
*oid_bl
= &((*resultbl
)[i
++]);
529 for (vector
<pair
<uint64_t,uint64_t> >::iterator q
= p
->buffer_extents
.begin();
530 q
!= p
->buffer_extents
.end();
532 bufferlist buffer_bl
;
533 buffer_bl
.substr_of(*bl
, q
->first
, q
->second
);
534 oid_bl
->append(buffer_bl
);
536 // read all extends of a given object in one go
538 // we need 2 references on data as both rados_req_read_safe and rados_req_read_complete
540 RadosReadCompletionData
*data
= new RadosReadCompletionData(nc
, p
->length
, oid_bl
, cct(), 2);
541 librados::AioCompletion
*rados_completion
=
542 librados::Rados::aio_create_completion(data
, rados_req_read_complete
, rados_req_read_safe
);
543 r
= m_ioCtx
.aio_read(p
->oid
.name
, rados_completion
, oid_bl
, p
->length
, p
->offset
);
544 rados_completion
->release();
548 nc
->finish_adding_requests();
553 int libradosstriper::RadosStriperImpl::aio_read(const std::string
& soid
,
554 librados::AioCompletionImpl
*c
,
559 // create a buffer list and store it inside the completion object
561 c
->bl
.push_back(buffer::create_static(len
, buf
));
562 // call the bufferlist version of this method
563 return aio_read(soid
, c
, &c
->bl
, len
, off
);
566 int libradosstriper::RadosStriperImpl::aio_flush()
569 // pass to the rados level
570 ret
= m_ioCtx
.aio_flush();
573 //wait all CompletionData are released
581 ///////////////////////// stat and deletion /////////////////////////////
583 int libradosstriper::RadosStriperImpl::stat(const std::string
& soid
, uint64_t *psize
, time_t *pmtime
)
585 // create a completion object
586 librados::AioCompletionImpl c
;
587 // call asynchronous version of stat
588 int rc
= aio_stat(soid
, &c
, psize
, pmtime
);
590 // wait for completion of the remove
591 c
.wait_for_complete();
593 rc
= c
.get_return_value();
598 static void striper_stat_aio_stat_complete(rados_completion_t c
, void *arg
) {
599 libradosstriper::RadosStriperImpl::BasicStatCompletionData
*data
=
600 reinterpret_cast<libradosstriper::RadosStriperImpl::BasicStatCompletionData
*>(arg
);
601 int rc
= rados_aio_get_return_value(c
);
603 // remember this has failed
606 data
->m_multiCompletion
->complete_request(rc
);
610 static void striper_stat_aio_getxattr_complete(rados_completion_t c
, void *arg
) {
611 libradosstriper::RadosStriperImpl::BasicStatCompletionData
*data
=
612 reinterpret_cast<libradosstriper::RadosStriperImpl::BasicStatCompletionData
*>(arg
);
613 int rc
= rados_aio_get_return_value(c
);
614 // We need to handle the case of sparse files here
616 // remember this has failed
617 data
->m_getxattrRC
= rc
;
619 // this intermediate string allows to add a null terminator before calling strtol
621 std::string
strsize(data
->m_bl
.c_str(), data
->m_bl
.length());
622 *data
->m_psize
= strict_strtoll(strsize
.c_str(), 10, &err
);
624 lderr(data
->m_striper
->cct()) << XATTR_SIZE
<< " : " << err
<< dendl
;
625 data
->m_getxattrRC
= -EINVAL
;
629 data
->m_multiCompletion
->complete_request(rc
);
633 static void striper_stat_aio_req_complete(rados_striper_multi_completion_t c
,
635 libradosstriper::RadosStriperImpl::BasicStatCompletionData
*data
=
636 reinterpret_cast<libradosstriper::RadosStriperImpl::BasicStatCompletionData
*>(arg
);
637 if (data
->m_statRC
) {
638 data
->complete(data
->m_statRC
);
640 if (data
->m_getxattrRC
< 0) {
641 data
->complete(data
->m_getxattrRC
);
649 template<class TimeType
>
650 int libradosstriper::RadosStriperImpl::aio_generic_stat
651 (const std::string
& soid
,
652 librados::AioCompletionImpl
*c
,
655 typename
libradosstriper::RadosStriperImpl::StatFunction
<TimeType
>::Type statFunction
)
657 // use a MultiAioCompletion object for dealing with the fact
658 // that we'll do 2 asynchronous calls in parallel
659 libradosstriper::MultiAioCompletionImpl
*multi_completion
=
660 new libradosstriper::MultiAioCompletionImpl
;
661 // Data object used for passing context to asynchronous calls
662 std::string firstObjOid
= getObjectId(soid
, 0);
663 StatCompletionData
<TimeType
> *cdata
=
664 new StatCompletionData
<TimeType
>(this, firstObjOid
, c
,
665 multi_completion
, psize
, pmtime
, 4);
666 multi_completion
->set_complete_callback(cdata
, striper_stat_aio_req_complete
);
667 // use a regular AioCompletion for the stat async call
668 librados::AioCompletion
*stat_completion
=
669 librados::Rados::aio_create_completion(cdata
, striper_stat_aio_stat_complete
, 0);
670 multi_completion
->add_safe_request();
671 object_t
obj(firstObjOid
);
672 int rc
= (m_ioCtxImpl
->*statFunction
)(obj
, stat_completion
->pc
,
673 &cdata
->m_objectSize
, cdata
->m_pmtime
);
674 stat_completion
->release();
676 // nothing is really started so cancel everything
677 delete multi_completion
;
681 // use a regular AioCompletion for the getxattr async call
682 librados::AioCompletion
*getxattr_completion
=
683 librados::Rados::aio_create_completion(cdata
, striper_stat_aio_getxattr_complete
, 0);
684 multi_completion
->add_safe_request();
685 // in parallel, get the pmsize from the first object asynchronously
686 rc
= m_ioCtxImpl
->aio_getxattr(obj
, getxattr_completion
->pc
,
687 XATTR_SIZE
, cdata
->m_bl
);
688 getxattr_completion
->release();
689 multi_completion
->finish_adding_requests();
691 // the async stat is ongoing, so we need to go on
692 // we mark the getxattr as failed in the data object
693 cdata
->m_getxattrRC
= rc
;
694 multi_completion
->complete_request(rc
);
695 multi_completion
->put();
699 multi_completion
->put();
703 int libradosstriper::RadosStriperImpl::aio_stat(const std::string
& soid
,
704 librados::AioCompletionImpl
*c
,
708 return aio_generic_stat
<time_t>(soid
, c
, psize
, pmtime
, &librados::IoCtxImpl::aio_stat
);
711 int libradosstriper::RadosStriperImpl::stat2(const std::string
& soid
, uint64_t *psize
, struct timespec
*pts
)
713 // create a completion object
714 librados::AioCompletionImpl c
;
715 // call asynchronous version of stat
716 int rc
= aio_stat2(soid
, &c
, psize
, pts
);
718 // wait for completion of the remove
719 c
.wait_for_complete_and_cb();
721 rc
= c
.get_return_value();
726 int libradosstriper::RadosStriperImpl::aio_stat2(const std::string
& soid
,
727 librados::AioCompletionImpl
*c
,
729 struct timespec
*pts
)
731 return aio_generic_stat
<struct timespec
>(soid
, c
, psize
, pts
, &librados::IoCtxImpl::aio_stat2
);
734 static void rados_req_remove_complete(rados_completion_t c
, void *arg
)
736 libradosstriper::RadosStriperImpl::RadosRemoveCompletionData
*cdata
=
737 reinterpret_cast<libradosstriper::RadosStriperImpl::RadosRemoveCompletionData
*>(arg
);
738 int rc
= rados_aio_get_return_value(c
);
739 // in case the object did not exist, it means we had a sparse file, all is fine
743 cdata
->m_multiAioCompl
->complete_request(rc
);
747 static void rados_req_remove_safe(rados_completion_t c
, void *arg
)
749 libradosstriper::RadosStriperImpl::RadosRemoveCompletionData
*cdata
=
750 reinterpret_cast<libradosstriper::RadosStriperImpl::RadosRemoveCompletionData
*>(arg
);
751 int rc
= rados_aio_get_return_value(c
);
752 // in case the object did not exist, it means we had a sparse file, all is fine
756 cdata
->m_multiAioCompl
->safe_request(rc
);
760 static void striper_remove_aio_req_complete(rados_striper_multi_completion_t c
, void *arg
)
762 libradosstriper::RadosStriperImpl::RemoveCompletionData
*cdata
=
763 reinterpret_cast<libradosstriper::RadosStriperImpl::RemoveCompletionData
*>(arg
);
764 libradosstriper::MultiAioCompletionImpl
*comp
=
765 reinterpret_cast<libradosstriper::MultiAioCompletionImpl
*>(c
);
766 ldout(cdata
->m_striper
->cct(), 10)
767 << "RadosStriperImpl : striper_remove_aio_req_complete called for "
768 << cdata
->m_soid
<< dendl
;
771 // All went fine, synchronously remove first object
772 rc
= cdata
->m_striper
->m_ioCtx
.remove(cdata
->m_striper
->getObjectId(cdata
->m_soid
, 0),
775 lderr(cdata
->m_striper
->cct())
776 << "RadosStriperImpl : deletion/truncation incomplete for " << cdata
->m_soid
777 << ", as errors were encountered. The file is left present but it's content "
778 << " has been partially removed"
785 int libradosstriper::RadosStriperImpl::remove(const std::string
& soid
, int flags
)
787 // create a completion object
788 librados::AioCompletionImpl c
;
789 // call asynchronous version of remove
790 int rc
= aio_remove(soid
, &c
, flags
);
792 // wait for completion of the remove
793 c
.wait_for_complete_and_cb();
795 rc
= c
.get_return_value();
800 int libradosstriper::RadosStriperImpl::aio_remove(const std::string
& soid
,
801 librados::AioCompletionImpl
*c
,
804 // the RemoveCompletionData object will lock the given soid for the duration
806 std::string lockCookie
= getUUID();
807 int rc
= m_ioCtx
.lock_exclusive(getObjectId(soid
, 0), RADOS_LOCK_NAME
, lockCookie
, "", 0, 0);
809 // create CompletionData for the async remove call
810 RemoveCompletionData
*cdata
= new RemoveCompletionData(this, soid
, lockCookie
, c
, flags
);
811 libradosstriper::MultiAioCompletionImpl
*multi_completion
=
812 new libradosstriper::MultiAioCompletionImpl
;
813 multi_completion
->set_complete_callback(cdata
, striper_remove_aio_req_complete
);
814 // call asynchronous internal version of remove
816 << "RadosStriperImpl : Aio_remove starting for "
818 rc
= internal_aio_remove(soid
, multi_completion
);
819 multi_completion
->put();
823 int libradosstriper::RadosStriperImpl::internal_aio_remove
824 (const std::string
& soid
,
825 libradosstriper::MultiAioCompletionImpl
*multi_completion
,
828 std::string firstObjOid
= getObjectId(soid
, 0);
830 // check size and get number of rados objects to delete
831 uint64_t nb_objects
= 0;
833 int rc
= getxattr(soid
, XATTR_SIZE
, bl2
);
835 // no object size (or not able to get it)
836 // try to find the number of object "by hand"
839 while (!m_ioCtx
.stat(getObjectId(soid
, nb_objects
), &psize
, &pmtime
)) {
843 // count total number of rados objects in the striped object
845 // this intermediate string allows to add a null terminator before calling strtol
846 std::string
strsize(bl2
.c_str(), bl2
.length());
847 uint64_t size
= strict_strtoll(strsize
.c_str(), 10, &err
);
849 lderr(cct()) << XATTR_SIZE
<< " : " << err
<< dendl
;
853 uint64_t object_size
= m_layout
.fl_object_size
;
854 uint64_t su
= m_layout
.fl_stripe_unit
;
855 uint64_t stripe_count
= m_layout
.fl_stripe_count
;
856 uint64_t nb_complete_sets
= size
/ (object_size
*stripe_count
);
857 uint64_t remaining_data
= size
% (object_size
*stripe_count
);
858 uint64_t remaining_stripe_units
= (remaining_data
+ su
-1) / su
;
859 uint64_t remaining_objects
= std::min(remaining_stripe_units
, stripe_count
);
860 nb_objects
= nb_complete_sets
* stripe_count
+ remaining_objects
;
862 // delete rados objects in reverse order
863 // Note that we do not drop the first object. This one will only be dropped
864 // if all other removals have been successful, and this is done in the
865 // callback of the multi_completion object
867 for (int i
= nb_objects
-1; i
>= 1; i
--) {
868 multi_completion
->add_request();
869 RadosRemoveCompletionData
*data
=
870 new RadosRemoveCompletionData(multi_completion
, cct());
871 librados::AioCompletion
*rados_completion
=
872 librados::Rados::aio_create_completion(data
,
873 rados_req_remove_complete
,
874 rados_req_remove_safe
);
876 rcr
= m_ioCtx
.aio_remove(getObjectId(soid
, i
), rados_completion
);
878 rcr
= m_ioCtx
.aio_remove(getObjectId(soid
, i
), rados_completion
, flags
);
880 rados_completion
->release();
881 if (rcr
< 0 and -ENOENT
!= rcr
) {
882 lderr(cct()) << "RadosStriperImpl::remove : deletion incomplete for " << soid
883 << ", as " << getObjectId(soid
, i
) << " could not be deleted (rc=" << rc
<< ")"
888 // we are over adding requests to the multi_completion object
889 multi_completion
->finish_adding_requests();
892 } catch (ErrorCode
&e
) {
893 // errror caught when trying to take the exclusive lock
899 int libradosstriper::RadosStriperImpl::trunc(const std::string
& soid
, uint64_t size
)
901 // lock the object in exclusive mode
902 std::string firstObjOid
= getObjectId(soid
, 0);
903 librados::ObjectWriteOperation op
;
905 std::string lockCookie
= RadosStriperImpl::getUUID();
906 utime_t dur
= utime_t();
907 rados::cls::lock::lock(&op
, RADOS_LOCK_NAME
, LOCK_EXCLUSIVE
, lockCookie
, "", "", dur
, 0);
908 int rc
= m_ioCtx
.operate(firstObjOid
, &op
);
910 // load layout and size
911 ceph_file_layout layout
;
912 uint64_t original_size
;
913 rc
= internal_get_layout_and_size(firstObjOid
, &layout
, &original_size
);
915 if (size
< original_size
) {
916 rc
= truncate(soid
, original_size
, size
, layout
);
917 } else if (size
> original_size
) {
918 rc
= grow(soid
, original_size
, size
, layout
);
921 // unlock object, ignore return code as we cannot do much
922 m_ioCtx
.unlock(firstObjOid
, RADOS_LOCK_NAME
, lockCookie
);
928 ///////////////////////// private helpers /////////////////////////////
930 std::string
libradosstriper::RadosStriperImpl::getObjectId(const object_t
& soid
,
931 long long unsigned objectno
)
933 std::ostringstream s
;
934 s
<< soid
<< '.' << std::setfill ('0') << std::setw(16) << std::hex
<< objectno
;
938 void libradosstriper::RadosStriperImpl::unlockObject(const std::string
& soid
,
939 const std::string
& lockCookie
)
941 // unlock the shared lock on the first rados object
942 std::string firstObjOid
= getObjectId(soid
, 0);
943 m_ioCtx
.unlock(firstObjOid
, RADOS_LOCK_NAME
, lockCookie
);
946 void libradosstriper::RadosStriperImpl::aio_unlockObject(const std::string
& soid
,
947 const std::string
& lockCookie
,
948 librados::AioCompletion
*c
)
950 // unlock the shared lock on the first rados object
951 std::string firstObjOid
= getObjectId(soid
, 0);
952 m_ioCtx
.aio_unlock(firstObjOid
, RADOS_LOCK_NAME
, lockCookie
, c
);
955 static void rados_write_aio_unlock_complete(rados_striper_multi_completion_t c
, void *arg
)
957 libradosstriper::RadosStriperImpl::WriteCompletionData
*cdata
=
958 reinterpret_cast<libradosstriper::RadosStriperImpl::WriteCompletionData
*>(arg
);
959 libradosstriper::MultiAioCompletionImpl
*comp
=
960 reinterpret_cast<libradosstriper::MultiAioCompletionImpl
*>(c
);
961 cdata
->complete_unlock(comp
->rval
);
965 static void striper_write_aio_req_complete(rados_striper_multi_completion_t c
, void *arg
)
967 libradosstriper::RadosStriperImpl::WriteCompletionData
*cdata
=
968 reinterpret_cast<libradosstriper::RadosStriperImpl::WriteCompletionData
*>(arg
);
969 // launch the async unlocking of the object
970 cdata
->m_striper
->aio_unlockObject(cdata
->m_soid
, cdata
->m_lockCookie
, cdata
->m_unlockCompletion
);
971 // complete the write part in parallel
972 libradosstriper::MultiAioCompletionImpl
*comp
=
973 reinterpret_cast<libradosstriper::MultiAioCompletionImpl
*>(c
);
974 cdata
->complete_write(comp
->rval
);
978 static void striper_write_aio_req_safe(rados_striper_multi_completion_t c
, void *arg
)
980 libradosstriper::RadosStriperImpl::WriteCompletionData
*cdata
=
981 reinterpret_cast<libradosstriper::RadosStriperImpl::WriteCompletionData
*>(arg
);
982 libradosstriper::MultiAioCompletionImpl
*comp
=
983 reinterpret_cast<libradosstriper::MultiAioCompletionImpl
*>(c
);
984 cdata
->safe(comp
->rval
);
988 int libradosstriper::RadosStriperImpl::write_in_open_object(const std::string
& soid
,
989 const ceph_file_layout
& layout
,
990 const std::string
& lockCookie
,
991 const bufferlist
& bl
,
994 // create a completion object to be passed to the callbacks of the multicompletion
995 // we need 3 references as striper_write_aio_req_complete will release two and
996 // striper_write_aio_req_safe will release one
997 WriteCompletionData
*cdata
= new WriteCompletionData(this, soid
, lockCookie
, 0, 3);
998 cdata
->get(); // local ref
999 // create a completion object for the unlocking of the striped object at the end of the write
1000 librados::AioCompletion
*unlock_completion
=
1001 librados::Rados::aio_create_completion(cdata
, rados_write_aio_unlock_complete
, 0);
1002 cdata
->m_unlockCompletion
= unlock_completion
;
1003 // create the multicompletion that will handle the write completion
1004 libradosstriper::MultiAioCompletionImpl
*c
= new libradosstriper::MultiAioCompletionImpl
;
1005 c
->set_complete_callback(cdata
, striper_write_aio_req_complete
);
1006 c
->set_safe_callback(cdata
, striper_write_aio_req_safe
);
1007 // call the asynchronous API
1008 int rc
= internal_aio_write(soid
, c
, bl
, len
, off
, layout
);
1010 // wait for completion and safety of data
1011 c
->wait_for_complete_and_cb();
1012 c
->wait_for_safe_and_cb();
1013 // wait for the unlocking
1014 unlock_completion
->wait_for_complete();
1016 rc
= c
->get_return_value();
1023 int libradosstriper::RadosStriperImpl::aio_write_in_open_object(const std::string
& soid
,
1024 librados::AioCompletionImpl
*c
,
1025 const ceph_file_layout
& layout
,
1026 const std::string
& lockCookie
,
1027 const bufferlist
& bl
,
1030 // create a completion object to be passed to the callbacks of the multicompletion
1031 // we need 3 references as striper_write_aio_req_complete will release two and
1032 // striper_write_aio_req_safe will release one
1033 WriteCompletionData
*cdata
= new WriteCompletionData(this, soid
, lockCookie
, c
, 3);
1034 cdata
->get(); // local ref
1036 c
->io
= m_ioCtxImpl
;
1037 // create a completion object for the unlocking of the striped object at the end of the write
1038 librados::AioCompletion
*unlock_completion
=
1039 librados::Rados::aio_create_completion(cdata
, rados_write_aio_unlock_complete
, 0);
1040 cdata
->m_unlockCompletion
= unlock_completion
;
1041 // create the multicompletion that will handle the write completion
1042 libradosstriper::MultiAioCompletionImpl
*nc
= new libradosstriper::MultiAioCompletionImpl
;
1043 nc
->set_complete_callback(cdata
, striper_write_aio_req_complete
);
1044 nc
->set_safe_callback(cdata
, striper_write_aio_req_safe
);
1045 // internal asynchronous API
1046 int rc
= internal_aio_write(soid
, nc
, bl
, len
, off
, layout
);
1052 static void rados_req_write_safe(rados_completion_t c
, void *arg
)
1054 libradosstriper::MultiAioCompletionImpl
*comp
=
1055 reinterpret_cast<libradosstriper::MultiAioCompletionImpl
*>(arg
);
1056 comp
->safe_request(rados_aio_get_return_value(c
));
1059 static void rados_req_write_complete(rados_completion_t c
, void *arg
)
1061 libradosstriper::MultiAioCompletionImpl
*comp
=
1062 reinterpret_cast<libradosstriper::MultiAioCompletionImpl
*>(arg
);
1063 comp
->complete_request(rados_aio_get_return_value(c
));
1067 libradosstriper::RadosStriperImpl::internal_aio_write(const std::string
& soid
,
1068 libradosstriper::MultiAioCompletionImpl
*c
,
1069 const bufferlist
& bl
,
1072 const ceph_file_layout
& layout
)
1075 // Do not try anything if we are called with empty buffer,
1076 // file_to_extents would raise an exception
1078 // get list of extents to be written to
1079 vector
<ObjectExtent
> extents
;
1080 std::string format
= soid
+ RADOS_OBJECT_EXTENSION_FORMAT
;
1082 l
.from_legacy(layout
);
1083 Striper::file_to_extents(cct(), format
.c_str(), &l
, off
, len
, 0, extents
);
1084 // go through the extents
1085 for (vector
<ObjectExtent
>::iterator p
= extents
.begin(); p
!= extents
.end(); ++p
) {
1086 // assemble pieces of a given object into a single buffer list
1088 for (vector
<pair
<uint64_t,uint64_t> >::iterator q
= p
->buffer_extents
.begin();
1089 q
!= p
->buffer_extents
.end();
1091 bufferlist buffer_bl
;
1092 buffer_bl
.substr_of(bl
, q
->first
, q
->second
);
1093 oid_bl
.append(buffer_bl
);
1095 // and write the object
1097 librados::AioCompletion
*rados_completion
=
1098 librados::Rados::aio_create_completion(c
, rados_req_write_complete
, rados_req_write_safe
);
1099 r
= m_ioCtx
.aio_write(p
->oid
.name
, rados_completion
, oid_bl
, p
->length
, p
->offset
);
1100 rados_completion
->release();
1105 c
->finish_adding_requests();
1109 int libradosstriper::RadosStriperImpl::extract_uint32_attr
1110 (std::map
<std::string
, bufferlist
> &attrs
,
1111 const std::string
& key
,
1114 std::map
<std::string
, bufferlist
>::iterator attrsIt
= attrs
.find(key
);
1115 if (attrsIt
!= attrs
.end()) {
1116 // this intermediate string allows to add a null terminator before calling strtol
1117 std::string
strvalue(attrsIt
->second
.c_str(), attrsIt
->second
.length());
1119 *value
= strict_strtol(strvalue
.c_str(), 10, &err
);
1121 lderr(cct()) << key
<< " : " << err
<< dendl
;
1130 int libradosstriper::RadosStriperImpl::extract_sizet_attr
1131 (std::map
<std::string
, bufferlist
> &attrs
,
1132 const std::string
& key
,
1135 std::map
<std::string
, bufferlist
>::iterator attrsIt
= attrs
.find(key
);
1136 if (attrsIt
!= attrs
.end()) {
1137 // this intermediate string allows to add a null terminator before calling strtol
1138 std::string
strvalue(attrsIt
->second
.c_str(), attrsIt
->second
.length());
1140 *value
= strict_strtoll(strvalue
.c_str(), 10, &err
);
1142 lderr(cct()) << key
<< " : " << err
<< dendl
;
1151 int libradosstriper::RadosStriperImpl::internal_get_layout_and_size(
1152 const std::string
& oid
,
1153 ceph_file_layout
*layout
,
1156 // get external attributes of the first rados object
1157 std::map
<std::string
, bufferlist
> attrs
;
1158 int rc
= m_ioCtx
.getxattrs(oid
, attrs
);
1160 // deal with stripe_unit
1161 rc
= extract_uint32_attr(attrs
, XATTR_LAYOUT_STRIPE_UNIT
, &layout
->fl_stripe_unit
);
1163 // deal with stripe_count
1164 rc
= extract_uint32_attr(attrs
, XATTR_LAYOUT_STRIPE_COUNT
, &layout
->fl_stripe_count
);
1166 // deal with object_size
1167 rc
= extract_uint32_attr(attrs
, XATTR_LAYOUT_OBJECT_SIZE
, &layout
->fl_object_size
);
1171 rc
= extract_sizet_attr(attrs
, XATTR_SIZE
, &ssize
);
1176 // make valgrind happy by setting unused fl_pg_pool
1177 layout
->fl_pg_pool
= 0;
1181 int libradosstriper::RadosStriperImpl::openStripedObjectForRead(
1182 const std::string
& soid
,
1183 ceph_file_layout
*layout
,
1185 std::string
*lockCookie
)
1187 // take a lock the first rados object, if it exists and gets its size
1188 // check, lock and size reading must be atomic and are thus done within a single operation
1189 librados::ObjectWriteOperation op
;
1191 *lockCookie
= getUUID();
1192 utime_t dur
= utime_t();
1193 rados::cls::lock::lock(&op
, RADOS_LOCK_NAME
, LOCK_SHARED
, *lockCookie
, "Tag", "", dur
, 0);
1194 std::string firstObjOid
= getObjectId(soid
, 0);
1195 int rc
= m_ioCtx
.operate(firstObjOid
, &op
);
1197 // error case (including -ENOENT)
1200 rc
= internal_get_layout_and_size(firstObjOid
, layout
, size
);
1202 unlockObject(soid
, *lockCookie
);
1203 lderr(cct()) << "RadosStriperImpl::openStripedObjectForRead : "
1204 << "could not load layout and size for "
1205 << soid
<< " : rc = " << rc
<< dendl
;
1210 int libradosstriper::RadosStriperImpl::openStripedObjectForWrite(const std::string
& soid
,
1211 ceph_file_layout
*layout
,
1213 std::string
*lockCookie
,
1214 bool isFileSizeAbsolute
)
1216 // take a lock the first rados object, if it exists
1217 // check and lock must be atomic and are thus done within a single operation
1218 librados::ObjectWriteOperation op
;
1220 *lockCookie
= getUUID();
1221 utime_t dur
= utime_t();
1222 rados::cls::lock::lock(&op
, RADOS_LOCK_NAME
, LOCK_SHARED
, *lockCookie
, "Tag", "", dur
, 0);
1223 std::string firstObjOid
= getObjectId(soid
, 0);
1224 int rc
= m_ioCtx
.operate(firstObjOid
, &op
);
1226 if (rc
== -ENOENT
) {
1227 // object does not exist, delegate to createEmptyStripedObject
1228 int rc
= createAndOpenStripedObject(soid
, layout
, *size
, lockCookie
, isFileSizeAbsolute
);
1229 // return original size
1238 rc
= internal_get_layout_and_size(firstObjOid
, layout
, &curSize
);
1240 unlockObject(soid
, *lockCookie
);
1241 lderr(cct()) << "RadosStriperImpl::openStripedObjectForWrite : "
1242 << "could not load layout and size for "
1243 << soid
<< " : rc = " << rc
<< dendl
;
1246 // atomically update object size, only if smaller than current one
1247 if (!isFileSizeAbsolute
)
1249 librados::ObjectWriteOperation writeOp
;
1250 writeOp
.cmpxattr(XATTR_SIZE
, LIBRADOS_CMPXATTR_OP_GT
, *size
);
1251 std::ostringstream oss
;
1254 bl
.append(oss
.str());
1255 writeOp
.setxattr(XATTR_SIZE
, bl
);
1256 rc
= m_ioCtx
.operate(firstObjOid
, &writeOp
);
1257 // return current size
1259 // handle case where objectsize is already bigger than size
1260 if (-ECANCELED
== rc
)
1263 unlockObject(soid
, *lockCookie
);
1264 lderr(cct()) << "RadosStriperImpl::openStripedObjectForWrite : "
1265 << "could not set new size for "
1266 << soid
<< " : rc = " << rc
<< dendl
;
1271 int libradosstriper::RadosStriperImpl::createAndOpenStripedObject(const std::string
& soid
,
1272 ceph_file_layout
*layout
,
1274 std::string
*lockCookie
,
1275 bool isFileSizeAbsolute
)
1277 // build atomic write operation
1278 librados::ObjectWriteOperation writeOp
;
1279 writeOp
.create(true);
1281 std::ostringstream oss_object_size
;
1282 oss_object_size
<< m_layout
.fl_object_size
;
1283 bufferlist bl_object_size
;
1284 bl_object_size
.append(oss_object_size
.str());
1285 writeOp
.setxattr(XATTR_LAYOUT_OBJECT_SIZE
, bl_object_size
);
1287 std::ostringstream oss_stripe_unit
;
1288 oss_stripe_unit
<< m_layout
.fl_stripe_unit
;
1289 bufferlist bl_stripe_unit
;
1290 bl_stripe_unit
.append(oss_stripe_unit
.str());
1291 writeOp
.setxattr(XATTR_LAYOUT_STRIPE_UNIT
, bl_stripe_unit
);
1293 std::ostringstream oss_stripe_count
;
1294 oss_stripe_count
<< m_layout
.fl_stripe_count
;
1295 bufferlist bl_stripe_count
;
1296 bl_stripe_count
.append(oss_stripe_count
.str());
1297 writeOp
.setxattr(XATTR_LAYOUT_STRIPE_COUNT
, bl_stripe_count
);
1299 std::ostringstream oss_size
;
1300 oss_size
<< (isFileSizeAbsolute
?size
:0);
1302 bl_size
.append(oss_size
.str());
1303 writeOp
.setxattr(XATTR_SIZE
, bl_size
);
1304 // effectively change attributes
1305 std::string firstObjOid
= getObjectId(soid
, 0);
1306 int rc
= m_ioCtx
.operate(firstObjOid
, &writeOp
);
1307 // in case of error (but no EEXIST which would mean the object existed), return
1308 if (rc
&& -EEXIST
!= rc
) return rc
;
1309 // Otherwise open the object
1310 uint64_t fileSize
= size
;
1311 return openStripedObjectForWrite(soid
, layout
, &fileSize
, lockCookie
, isFileSizeAbsolute
);
1314 static void striper_truncate_aio_req_complete(rados_striper_multi_completion_t c
, void *arg
)
1316 libradosstriper::RadosStriperImpl::TruncateCompletionData
*cdata
=
1317 reinterpret_cast<libradosstriper::RadosStriperImpl::TruncateCompletionData
*>(arg
);
1318 libradosstriper::MultiAioCompletionImpl
*comp
=
1319 reinterpret_cast<libradosstriper::MultiAioCompletionImpl
*>(c
);
1320 if (0 == comp
->rval
) {
1321 // all went fine, change size in the external attributes
1322 std::ostringstream oss
;
1323 oss
<< cdata
->m_size
;
1325 bl
.append(oss
.str());
1326 cdata
->m_striper
->setxattr(cdata
->m_soid
, XATTR_SIZE
, bl
);
1331 int libradosstriper::RadosStriperImpl::truncate(const std::string
& soid
,
1332 uint64_t original_size
,
1334 ceph_file_layout
&layout
)
1336 TruncateCompletionData
*cdata
= new TruncateCompletionData(this, soid
, size
);
1337 libradosstriper::MultiAioCompletionImpl
*multi_completion
=
1338 new libradosstriper::MultiAioCompletionImpl
;
1339 multi_completion
->set_complete_callback(cdata
, striper_truncate_aio_req_complete
);
1340 // call asynchrous version of truncate
1341 int rc
= aio_truncate(soid
, multi_completion
, original_size
, size
, layout
);
1342 // wait for completion of the truncation
1343 multi_completion
->finish_adding_requests();
1344 multi_completion
->wait_for_complete_and_cb();
1347 rc
= multi_completion
->get_return_value();
1349 multi_completion
->put();
1353 int libradosstriper::RadosStriperImpl::aio_truncate
1354 (const std::string
& soid
,
1355 libradosstriper::MultiAioCompletionImpl
*multi_completion
,
1356 uint64_t original_size
,
1358 ceph_file_layout
&layout
)
1360 // handle the underlying rados objects. 3 cases here :
1361 // -- the objects belonging to object sets entirely located
1362 // before the truncation are unchanged
1363 // -- the objects belonging to the object set where the
1364 // truncation took place are truncated or removed
1365 // -- the objects belonging to object sets entirely located
1366 // after the truncation are removed
1367 // Note that we do it backward and that we change the size in
1368 // the external attributes only at the end. This make sure that
1369 // no rados object stays behind if we remove the striped object
1370 // after a truncation has failed
1371 uint64_t trunc_objectsetno
= size
/ layout
.fl_object_size
/ layout
.fl_stripe_count
;
1372 uint64_t last_objectsetno
= original_size
/ layout
.fl_object_size
/ layout
.fl_stripe_count
;
1373 bool exists
= false;
1374 for (int64_t objectno
= (last_objectsetno
+1) * layout
.fl_stripe_count
-1;
1375 objectno
>= (int64_t)((trunc_objectsetno
+ 1) * layout
.fl_stripe_count
);
1377 // if no object existed so far, check object existence
1379 uint64_t nb_full_object_set
= objectno
/ layout
.fl_stripe_count
;
1380 uint64_t object_index_in_set
= objectno
% layout
.fl_stripe_count
;
1381 uint64_t set_start_off
= nb_full_object_set
* layout
.fl_object_size
* layout
.fl_stripe_count
;
1382 uint64_t object_start_off
= set_start_off
+ object_index_in_set
* layout
.fl_stripe_unit
;
1383 exists
= (original_size
> object_start_off
);
1386 // remove asynchronously
1387 multi_completion
->add_request();
1388 RadosRemoveCompletionData
*data
=
1389 new RadosRemoveCompletionData(multi_completion
, cct());
1390 librados::AioCompletion
*rados_completion
=
1391 librados::Rados::aio_create_completion(data
,
1392 rados_req_remove_complete
,
1393 rados_req_remove_safe
);
1394 int rc
= m_ioCtx
.aio_remove(getObjectId(soid
, objectno
), rados_completion
);
1395 rados_completion
->release();
1396 // in case the object did not exist, it means we had a sparse file, all is fine
1397 if (rc
&& rc
!= -ENOENT
) return rc
;
1400 for (int64_t objectno
= ((trunc_objectsetno
+ 1) * layout
.fl_stripe_count
) -1;
1401 objectno
>= (int64_t)(trunc_objectsetno
* layout
.fl_stripe_count
);
1403 // if no object existed so far, check object existence
1405 uint64_t object_start_off
= ((objectno
/ layout
.fl_stripe_count
) * layout
.fl_object_size
) +
1406 ((objectno
% layout
.fl_stripe_count
) * layout
.fl_stripe_unit
);
1407 exists
= (original_size
> object_start_off
);
1412 l
.from_legacy(layout
);
1413 uint64_t new_object_size
= Striper::object_truncate_size(cct(), &l
, objectno
, size
);
1415 if (new_object_size
> 0 or 0 == objectno
) {
1416 // trunc is synchronous as there is no async version
1417 // but note that only a single object will be truncated
1418 // reducing the overload to a fixed amount
1419 rc
= m_ioCtx
.trunc(getObjectId(soid
, objectno
), new_object_size
);
1421 // removes are asynchronous in order to speed up truncations of big files
1422 multi_completion
->add_request();
1423 RadosRemoveCompletionData
*data
=
1424 new RadosRemoveCompletionData(multi_completion
, cct());
1425 librados::AioCompletion
*rados_completion
=
1426 librados::Rados::aio_create_completion(data
,
1427 rados_req_remove_complete
,
1428 rados_req_remove_safe
);
1429 rc
= m_ioCtx
.aio_remove(getObjectId(soid
, objectno
), rados_completion
);
1430 rados_completion
->release();
1432 // in case the object did not exist, it means we had a sparse file, all is fine
1433 if (rc
&& rc
!= -ENOENT
) return rc
;
1439 int libradosstriper::RadosStriperImpl::grow(const std::string
& soid
,
1440 uint64_t original_size
,
1442 ceph_file_layout
&layout
)
1444 // handle the underlying rados objects. As we support sparse objects,
1445 // we only have to change the size in the external attributes
1446 std::ostringstream oss
;
1449 bl
.append(oss
.str());
1450 int rc
= m_ioCtx
.setxattr(getObjectId(soid
, 0), XATTR_SIZE
, bl
);
1454 std::string
libradosstriper::RadosStriperImpl::getUUID()
1457 uuid
.generate_random();
1460 return std::string(suuid
);