1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2012 Inktank
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
17 #include "include/types.h"
18 #include "include/buffer.h"
19 #include "osd/OSDMap.h"
21 #include "common/config.h"
22 #include "common/debug.h"
24 #define dout_subsys ceph_subsys_striper
26 #define dout_prefix *_dout << "striper "
32 using ceph::bufferlist
;
36 object_t
format_oid(const char* object_format
, uint64_t object_no
) {
37 char buf
[strlen(object_format
) + 32];
38 snprintf(buf
, sizeof(buf
), object_format
, (long long unsigned)object_no
);
42 struct OrderByObject
{
43 constexpr bool operator()(uint64_t object_no
,
44 const striper::LightweightObjectExtent
& rhs
) const {
45 return object_no
< rhs
.object_no
;
47 constexpr bool operator()(const striper::LightweightObjectExtent
& lhs
,
48 uint64_t object_no
) const {
49 return lhs
.object_no
< object_no
;
53 } // anonymous namespace
55 void Striper::file_to_extents(CephContext
*cct
, const char *object_format
,
56 const file_layout_t
*layout
,
57 uint64_t offset
, uint64_t len
,
59 std::vector
<ObjectExtent
>& extents
,
60 uint64_t buffer_offset
)
62 striper::LightweightObjectExtents lightweight_object_extents
;
63 file_to_extents(cct
, layout
, offset
, len
, trunc_size
, buffer_offset
,
64 &lightweight_object_extents
);
66 // convert lightweight object extents to heavyweight version
67 extents
.reserve(lightweight_object_extents
.size());
68 for (auto& lightweight_object_extent
: lightweight_object_extents
) {
69 auto& object_extent
= extents
.emplace_back(
70 object_t(format_oid(object_format
, lightweight_object_extent
.object_no
)),
71 lightweight_object_extent
.object_no
,
72 lightweight_object_extent
.offset
, lightweight_object_extent
.length
,
73 lightweight_object_extent
.truncate_size
);
75 object_extent
.oloc
= OSDMap::file_to_object_locator(*layout
);
76 object_extent
.buffer_extents
.reserve(
77 lightweight_object_extent
.buffer_extents
.size());
78 object_extent
.buffer_extents
.insert(
79 object_extent
.buffer_extents
.end(),
80 lightweight_object_extent
.buffer_extents
.begin(),
81 lightweight_object_extent
.buffer_extents
.end());
85 void Striper::file_to_extents(
86 CephContext
*cct
, const char *object_format
,
87 const file_layout_t
*layout
,
88 uint64_t offset
, uint64_t len
,
90 map
<object_t
,std::vector
<ObjectExtent
> >& object_extents
,
91 uint64_t buffer_offset
)
93 striper::LightweightObjectExtents lightweight_object_extents
;
94 file_to_extents(cct
, layout
, offset
, len
, trunc_size
, buffer_offset
,
95 &lightweight_object_extents
);
97 // convert lightweight object extents to heavyweight version
98 for (auto& lightweight_object_extent
: lightweight_object_extents
) {
99 auto oid
= format_oid(object_format
, lightweight_object_extent
.object_no
);
100 auto& object_extent
= object_extents
[oid
].emplace_back(
101 oid
, lightweight_object_extent
.object_no
,
102 lightweight_object_extent
.offset
, lightweight_object_extent
.length
,
103 lightweight_object_extent
.truncate_size
);
105 object_extent
.oloc
= OSDMap::file_to_object_locator(*layout
);
106 object_extent
.buffer_extents
.reserve(
107 lightweight_object_extent
.buffer_extents
.size());
108 object_extent
.buffer_extents
.insert(
109 object_extent
.buffer_extents
.end(),
110 lightweight_object_extent
.buffer_extents
.begin(),
111 lightweight_object_extent
.buffer_extents
.end());
115 void Striper::file_to_extents(
116 CephContext
*cct
, const file_layout_t
*layout
, uint64_t offset
,
117 uint64_t len
, uint64_t trunc_size
, uint64_t buffer_offset
,
118 striper::LightweightObjectExtents
* object_extents
) {
119 ldout(cct
, 10) << "file_to_extents " << offset
<< "~" << len
<< dendl
;
120 ceph_assert(len
> 0);
123 * we want only one extent per object! this means that each extent
124 * we read may map into different bits of the final read
125 * buffer.. hence buffer_extents
128 __u32 object_size
= layout
->object_size
;
129 __u32 su
= layout
->stripe_unit
;
130 __u32 stripe_count
= layout
->stripe_count
;
131 ceph_assert(object_size
>= su
);
132 if (stripe_count
== 1) {
133 ldout(cct
, 20) << " sc is one, reset su to os" << dendl
;
136 uint64_t stripes_per_object
= object_size
/ su
;
137 ldout(cct
, 20) << " su " << su
<< " sc " << stripe_count
<< " os "
138 << object_size
<< " stripes_per_object " << stripes_per_object
141 uint64_t cur
= offset
;
144 // layout into objects
145 uint64_t blockno
= cur
/ su
; // which block
146 // which horizontal stripe (Y)
147 uint64_t stripeno
= blockno
/ stripe_count
;
148 // which object in the object set (X)
149 uint64_t stripepos
= blockno
% stripe_count
;
151 uint64_t objectsetno
= stripeno
/ stripes_per_object
;
153 uint64_t objectno
= objectsetno
* stripe_count
+ stripepos
;
155 // map range into object
156 uint64_t block_start
= (stripeno
% stripes_per_object
) * su
;
157 uint64_t block_off
= cur
% su
;
158 uint64_t max
= su
- block_off
;
160 uint64_t x_offset
= block_start
+ block_off
;
167 ldout(cct
, 20) << " off " << cur
<< " blockno " << blockno
<< " stripeno "
168 << stripeno
<< " stripepos " << stripepos
<< " objectsetno "
169 << objectsetno
<< " objectno " << objectno
170 << " block_start " << block_start
<< " block_off "
171 << block_off
<< " " << x_offset
<< "~" << x_len
174 striper::LightweightObjectExtent
* ex
= nullptr;
175 auto it
= std::upper_bound(object_extents
->begin(), object_extents
->end(),
176 objectno
, OrderByObject());
177 striper::LightweightObjectExtents::reverse_iterator
rev_it(it
);
178 if (rev_it
== object_extents
->rend() ||
179 rev_it
->object_no
!= objectno
||
180 rev_it
->offset
+ rev_it
->length
!= x_offset
) {
181 // expect up to "stripe-width - 1" vector shifts in the worst-case
182 ex
= &(*object_extents
->emplace(
183 it
, objectno
, x_offset
, x_len
,
184 object_truncate_size(cct
, layout
, objectno
, trunc_size
)));
185 ldout(cct
, 20) << " added new " << *ex
<< dendl
;
188 ceph_assert(ex
->offset
+ ex
->length
== x_offset
);
190 ldout(cct
, 20) << " adding in to " << *ex
<< dendl
;
194 ex
->buffer_extents
.emplace_back(cur
- offset
+ buffer_offset
, x_len
);
196 ldout(cct
, 15) << "file_to_extents " << *ex
<< dendl
;
197 // ldout(cct, 0) << "map: ino " << ino << " oid " << ex.oid << " osd "
198 // << ex.osd << " offset " << ex.offset << " len " << ex.len
199 // << " ... left " << left << dendl;
206 void Striper::extent_to_file(CephContext
*cct
, file_layout_t
*layout
,
207 uint64_t objectno
, uint64_t off
, uint64_t len
,
208 std::vector
<pair
<uint64_t, uint64_t> >& extents
)
210 ldout(cct
, 10) << "extent_to_file " << objectno
<< " " << off
<< "~"
213 __u32 object_size
= layout
->object_size
;
214 __u32 su
= layout
->stripe_unit
;
215 __u32 stripe_count
= layout
->stripe_count
;
216 ceph_assert(object_size
>= su
);
217 uint64_t stripes_per_object
= object_size
/ su
;
218 ldout(cct
, 20) << " stripes_per_object " << stripes_per_object
<< dendl
;
220 uint64_t off_in_block
= off
% su
;
222 extents
.reserve(len
/ su
+ 1);
225 uint64_t stripepos
= objectno
% stripe_count
;
226 uint64_t objectsetno
= objectno
/ stripe_count
;
227 uint64_t stripeno
= off
/ su
+ objectsetno
* stripes_per_object
;
228 uint64_t blockno
= stripeno
* stripe_count
+ stripepos
;
229 uint64_t extent_off
= blockno
* su
+ off_in_block
;
230 uint64_t extent_len
= std::min(len
, su
- off_in_block
);
231 extents
.push_back(make_pair(extent_off
, extent_len
));
233 ldout(cct
, 20) << " object " << off
<< "~" << extent_len
234 << " -> file " << extent_off
<< "~" << extent_len
243 uint64_t Striper::object_truncate_size(CephContext
*cct
,
244 const file_layout_t
*layout
,
245 uint64_t objectno
, uint64_t trunc_size
)
247 uint64_t obj_trunc_size
;
248 if (trunc_size
== 0 || trunc_size
== (uint64_t)-1) {
249 obj_trunc_size
= trunc_size
;
251 __u32 object_size
= layout
->object_size
;
252 __u32 su
= layout
->stripe_unit
;
253 __u32 stripe_count
= layout
->stripe_count
;
254 ceph_assert(object_size
>= su
);
255 uint64_t stripes_per_object
= object_size
/ su
;
257 uint64_t objectsetno
= objectno
/ stripe_count
;
258 uint64_t trunc_objectsetno
= trunc_size
/ object_size
/ stripe_count
;
259 if (objectsetno
> trunc_objectsetno
)
261 else if (objectsetno
< trunc_objectsetno
)
262 obj_trunc_size
= object_size
;
264 uint64_t trunc_blockno
= trunc_size
/ su
;
265 uint64_t trunc_stripeno
= trunc_blockno
/ stripe_count
;
266 uint64_t trunc_stripepos
= trunc_blockno
% stripe_count
;
267 uint64_t trunc_objectno
= trunc_objectsetno
* stripe_count
269 if (objectno
< trunc_objectno
)
270 obj_trunc_size
= ((trunc_stripeno
% stripes_per_object
) + 1) * su
;
271 else if (objectno
> trunc_objectno
)
272 obj_trunc_size
= (trunc_stripeno
% stripes_per_object
) * su
;
274 obj_trunc_size
= (trunc_stripeno
% stripes_per_object
) * su
278 ldout(cct
, 20) << "object_truncate_size " << objectno
<< " "
279 << trunc_size
<< "->" << obj_trunc_size
<< dendl
;
280 return obj_trunc_size
;
283 uint64_t Striper::get_num_objects(const file_layout_t
& layout
,
286 __u32 stripe_unit
= layout
.stripe_unit
;
287 __u32 stripe_count
= layout
.stripe_count
;
288 uint64_t period
= layout
.get_period();
289 uint64_t num_periods
= (size
+ period
- 1) / period
;
290 uint64_t remainder_bytes
= size
% period
;
291 uint64_t remainder_objs
= 0;
292 if ((remainder_bytes
> 0) && (remainder_bytes
< (uint64_t)stripe_count
294 remainder_objs
= stripe_count
- ((remainder_bytes
+ stripe_unit
- 1)
296 return num_periods
* stripe_count
- remainder_objs
;
301 void Striper::StripedReadResult::add_partial_result(
302 CephContext
*cct
, bufferlist
& bl
,
303 const std::vector
<pair
<uint64_t,uint64_t> >& buffer_extents
)
305 ldout(cct
, 10) << "add_partial_result(" << this << ") " << bl
.length()
306 << " to " << buffer_extents
<< dendl
;
307 for (auto p
= buffer_extents
.cbegin(); p
!= buffer_extents
.cend(); ++p
) {
308 pair
<bufferlist
, uint64_t>& r
= partial
[p
->first
];
309 size_t actual
= std::min
<uint64_t>(bl
.length(), p
->second
);
310 bl
.splice(0, actual
, &r
.first
);
311 r
.second
= p
->second
;
312 total_intended_len
+= r
.second
;
316 void Striper::StripedReadResult::add_partial_result(
317 CephContext
*cct
, bufferlist
&& bl
,
318 const striper::LightweightBufferExtents
& buffer_extents
)
320 ldout(cct
, 10) << "add_partial_result(" << this << ") " << bl
.length()
321 << " to " << buffer_extents
<< dendl
;
322 for (auto& be
: buffer_extents
) {
323 auto& r
= partial
[be
.first
];
324 size_t actual
= std::min
<uint64_t>(bl
.length(), be
.second
);
325 if (buffer_extents
.size() == 1) {
326 r
.first
= std::move(bl
);
328 bl
.splice(0, actual
, &r
.first
);
330 r
.second
= be
.second
;
331 total_intended_len
+= r
.second
;
335 void Striper::StripedReadResult::add_partial_sparse_result(
336 CephContext
*cct
, bufferlist
& bl
, const map
<uint64_t, uint64_t>& bl_map
,
337 uint64_t bl_off
, const std::vector
<pair
<uint64_t,uint64_t> >& buffer_extents
)
339 ldout(cct
, 10) << "add_partial_sparse_result(" << this << ") " << bl
.length()
340 << " covering " << bl_map
<< " (offset " << bl_off
<< ")"
341 << " to " << buffer_extents
<< dendl
;
342 auto s
= bl_map
.cbegin();
343 for (auto& be
: buffer_extents
) {
344 add_partial_sparse_result(cct
, bl
, &s
, bl_map
.end(), &bl_off
, be
.first
,
349 void Striper::StripedReadResult::add_partial_sparse_result(
350 CephContext
*cct
, ceph::buffer::list
& bl
,
351 const std::map
<uint64_t, uint64_t>& bl_map
, uint64_t bl_off
,
352 const striper::LightweightBufferExtents
& buffer_extents
) {
353 ldout(cct
, 10) << "add_partial_sparse_result(" << this << ") " << bl
.length()
354 << " covering " << bl_map
<< " (offset " << bl_off
<< ")"
355 << " to " << buffer_extents
<< dendl
;
356 auto s
= bl_map
.cbegin();
357 for (auto& be
: buffer_extents
) {
358 add_partial_sparse_result(cct
, bl
, &s
, bl_map
.cend(), &bl_off
, be
.first
,
363 void Striper::StripedReadResult::add_partial_sparse_result(
364 CephContext
*cct
, bufferlist
& bl
,
365 std::map
<uint64_t, uint64_t>::const_iterator
* it
,
366 const std::map
<uint64_t, uint64_t>::const_iterator
& end_it
,
367 uint64_t* bl_off
, uint64_t tofs
, uint64_t tlen
) {
368 ldout(cct
, 30) << " be " << tofs
<< "~" << tlen
<< dendl
;
372 ldout(cct
, 20) << " t " << tofs
<< "~" << tlen
373 << " bl has " << bl
.length()
374 << " off " << *bl_off
<< dendl
;
376 ldout(cct
, 20) << " s at end" << dendl
;
377 auto& r
= partial
[tofs
];
379 total_intended_len
+= r
.second
;
383 ldout(cct
, 30) << " s " << s
->first
<< "~" << s
->second
<< dendl
;
385 // skip zero-length extent
386 if (s
->second
== 0) {
387 ldout(cct
, 30) << " s len 0, skipping" << dendl
;
392 if (s
->first
> *bl_off
) {
393 // gap in sparse read result
394 pair
<bufferlist
, uint64_t>& r
= partial
[tofs
];
395 size_t gap
= std::min
<size_t>(s
->first
- *bl_off
, tlen
);
396 ldout(cct
, 20) << " s gap " << gap
<< ", skipping" << dendl
;
398 total_intended_len
+= r
.second
;
407 ceph_assert(s
->first
<= *bl_off
);
408 size_t left
= (s
->first
+ s
->second
) - *bl_off
;
409 size_t actual
= std::min
<size_t>(left
, tlen
);
412 ldout(cct
, 20) << " s has " << actual
<< ", copying" << dendl
;
413 pair
<bufferlist
, uint64_t>& r
= partial
[tofs
];
414 bl
.splice(0, actual
, &r
.first
);
416 total_intended_len
+= r
.second
;
421 if (actual
== left
) {
422 ldout(cct
, 30) << " s advancing" << dendl
;
428 void Striper::StripedReadResult::assemble_result(CephContext
*cct
,
432 ldout(cct
, 10) << "assemble_result(" << this << ") zero_tail=" << zero_tail
434 size_t zeros
= 0; // zeros preceding current position
435 for (auto& p
: partial
) {
436 size_t got
= p
.second
.first
.length();
437 size_t expect
= p
.second
.second
;
440 bl
.append_zero(zeros
);
443 bl
.claim_append(p
.second
.first
);
445 zeros
+= expect
- got
;
447 if (zero_tail
&& zeros
) {
448 bl
.append_zero(zeros
);
453 void Striper::StripedReadResult::assemble_result(CephContext
*cct
, char *buffer
, size_t length
)
456 ceph_assert(buffer
&& length
== total_intended_len
);
458 map
<uint64_t,pair
<bufferlist
,uint64_t> >::reverse_iterator p
= partial
.rbegin();
459 if (p
== partial
.rend())
462 uint64_t curr
= length
;
463 uint64_t end
= p
->first
+ p
->second
.second
;
464 while (p
!= partial
.rend()) {
466 ldout(cct
, 20) << "assemble_result(" << this << ") " << p
->first
<< "~" << p
->second
.second
467 << " " << p
->second
.first
.length() << " bytes"
469 ceph_assert(p
->first
== end
- p
->second
.second
);
472 size_t len
= p
->second
.first
.length();
473 ceph_assert(curr
>= p
->second
.second
);
474 curr
-= p
->second
.second
;
475 if (len
< p
->second
.second
) {
477 p
->second
.first
.begin().copy(len
, buffer
+ curr
);
478 // FIPS zeroization audit 20191117: this memset is not security related.
479 memset(buffer
+ curr
+ len
, 0, p
->second
.second
- len
);
481 p
->second
.first
.begin().copy(len
, buffer
+ curr
);
486 ceph_assert(curr
== 0);
489 void Striper::StripedReadResult::assemble_result(
490 CephContext
*cct
, std::map
<uint64_t, uint64_t> *extent_map
,
493 ldout(cct
, 10) << "assemble_result(" << this << ")" << dendl
;
494 for (auto& p
: partial
) {
495 uint64_t off
= p
.first
;
496 uint64_t len
= p
.second
.first
.length();
498 (*extent_map
)[off
] = len
;
499 bl
->claim_append(p
.second
.first
);