]>
git.proxmox.com Git - ceph.git/blob - ceph/src/osdc/Striper.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2012 Inktank
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
17 #include "include/types.h"
18 #include "include/buffer.h"
19 #include "osd/OSDMap.h"
21 #include "common/config.h"
22 #include "common/debug.h"
24 #define dout_subsys ceph_subsys_striper
26 #define dout_prefix *_dout << "striper "
29 void Striper::file_to_extents(CephContext
*cct
, const char *object_format
,
30 const file_layout_t
*layout
,
31 uint64_t offset
, uint64_t len
,
33 vector
<ObjectExtent
>& extents
,
34 uint64_t buffer_offset
)
36 map
<object_t
,vector
<ObjectExtent
> > object_extents
;
37 file_to_extents(cct
, object_format
, layout
, offset
, len
, trunc_size
,
38 object_extents
, buffer_offset
);
39 assimilate_extents(object_extents
, extents
);
42 void Striper::file_to_extents(
43 CephContext
*cct
, const char *object_format
,
44 const file_layout_t
*layout
,
45 uint64_t offset
, uint64_t len
,
47 map
<object_t
,vector
<ObjectExtent
> >& object_extents
,
48 uint64_t buffer_offset
)
50 ldout(cct
, 10) << "file_to_extents " << offset
<< "~" << len
51 << " format " << object_format
56 * we want only one extent per object! this means that each extent
57 * we read may map into different bits of the final read
58 * buffer.. hence ObjectExtent.buffer_extents
61 __u32 object_size
= layout
->object_size
;
62 __u32 su
= layout
->stripe_unit
;
63 __u32 stripe_count
= layout
->stripe_count
;
64 ceph_assert(object_size
>= su
);
65 if (stripe_count
== 1) {
66 ldout(cct
, 20) << " sc is one, reset su to os" << dendl
;
69 uint64_t stripes_per_object
= object_size
/ su
;
70 ldout(cct
, 20) << " su " << su
<< " sc " << stripe_count
<< " os "
71 << object_size
<< " stripes_per_object " << stripes_per_object
74 uint64_t cur
= offset
;
77 // layout into objects
78 uint64_t blockno
= cur
/ su
; // which block
79 // which horizontal stripe (Y)
80 uint64_t stripeno
= blockno
/ stripe_count
;
81 // which object in the object set (X)
82 uint64_t stripepos
= blockno
% stripe_count
;
84 uint64_t objectsetno
= stripeno
/ stripes_per_object
;
86 uint64_t objectno
= objectsetno
* stripe_count
+ stripepos
;
89 char buf
[strlen(object_format
) + 32];
90 snprintf(buf
, sizeof(buf
), object_format
, (long long unsigned)objectno
);
93 // map range into object
94 uint64_t block_start
= (stripeno
% stripes_per_object
) * su
;
95 uint64_t block_off
= cur
% su
;
96 uint64_t max
= su
- block_off
;
98 uint64_t x_offset
= block_start
+ block_off
;
105 ldout(cct
, 20) << " off " << cur
<< " blockno " << blockno
<< " stripeno "
106 << stripeno
<< " stripepos " << stripepos
<< " objectsetno "
107 << objectsetno
<< " objectno " << objectno
108 << " block_start " << block_start
<< " block_off "
109 << block_off
<< " " << x_offset
<< "~" << x_len
112 ObjectExtent
*ex
= 0;
113 vector
<ObjectExtent
>& exv
= object_extents
[oid
];
114 if (exv
.empty() || exv
.back().offset
+ exv
.back().length
!= x_offset
) {
115 exv
.resize(exv
.size() + 1);
118 ex
->objectno
= objectno
;
119 ex
->oloc
= OSDMap::file_to_object_locator(*layout
);
121 ex
->offset
= x_offset
;
123 ex
->truncate_size
= object_truncate_size(cct
, layout
, objectno
,
126 ldout(cct
, 20) << " added new " << *ex
<< dendl
;
130 ldout(cct
, 20) << " adding in to " << *ex
<< dendl
;
133 ex
->buffer_extents
.push_back(make_pair(cur
- offset
+ buffer_offset
,
136 ldout(cct
, 15) << "file_to_extents " << *ex
<< " in " << ex
->oloc
138 // ldout(cct, 0) << "map: ino " << ino << " oid " << ex.oid << " osd "
139 // << ex.osd << " offset " << ex.offset << " len " << ex.len
140 // << " ... left " << left << dendl;
147 void Striper::assimilate_extents(
148 map
<object_t
,vector
<ObjectExtent
> >& object_extents
,
149 vector
<ObjectExtent
>& extents
)
152 for (map
<object_t
, vector
<ObjectExtent
> >::iterator it
153 = object_extents
.begin();
154 it
!= object_extents
.end();
156 for (vector
<ObjectExtent
>::iterator p
= it
->second
.begin();
157 p
!= it
->second
.end();
159 extents
.push_back(*p
);
164 void Striper::extent_to_file(CephContext
*cct
, file_layout_t
*layout
,
165 uint64_t objectno
, uint64_t off
, uint64_t len
,
166 vector
<pair
<uint64_t, uint64_t> >& extents
)
168 ldout(cct
, 10) << "extent_to_file " << objectno
<< " " << off
<< "~"
171 __u32 object_size
= layout
->object_size
;
172 __u32 su
= layout
->stripe_unit
;
173 __u32 stripe_count
= layout
->stripe_count
;
174 ceph_assert(object_size
>= su
);
175 uint64_t stripes_per_object
= object_size
/ su
;
176 ldout(cct
, 20) << " stripes_per_object " << stripes_per_object
<< dendl
;
178 uint64_t off_in_block
= off
% su
;
180 extents
.reserve(len
/ su
+ 1);
183 uint64_t stripepos
= objectno
% stripe_count
;
184 uint64_t objectsetno
= objectno
/ stripe_count
;
185 uint64_t stripeno
= off
/ su
+ objectsetno
* stripes_per_object
;
186 uint64_t blockno
= stripeno
* stripe_count
+ stripepos
;
187 uint64_t extent_off
= blockno
* su
+ off_in_block
;
188 uint64_t extent_len
= std::min(len
, su
- off_in_block
);
189 extents
.push_back(make_pair(extent_off
, extent_len
));
191 ldout(cct
, 20) << " object " << off
<< "~" << extent_len
192 << " -> file " << extent_off
<< "~" << extent_len
201 uint64_t Striper::object_truncate_size(CephContext
*cct
,
202 const file_layout_t
*layout
,
203 uint64_t objectno
, uint64_t trunc_size
)
205 uint64_t obj_trunc_size
;
206 if (trunc_size
== 0 || trunc_size
== (uint64_t)-1) {
207 obj_trunc_size
= trunc_size
;
209 __u32 object_size
= layout
->object_size
;
210 __u32 su
= layout
->stripe_unit
;
211 __u32 stripe_count
= layout
->stripe_count
;
212 ceph_assert(object_size
>= su
);
213 uint64_t stripes_per_object
= object_size
/ su
;
215 uint64_t objectsetno
= objectno
/ stripe_count
;
216 uint64_t trunc_objectsetno
= trunc_size
/ object_size
/ stripe_count
;
217 if (objectsetno
> trunc_objectsetno
)
219 else if (objectsetno
< trunc_objectsetno
)
220 obj_trunc_size
= object_size
;
222 uint64_t trunc_blockno
= trunc_size
/ su
;
223 uint64_t trunc_stripeno
= trunc_blockno
/ stripe_count
;
224 uint64_t trunc_stripepos
= trunc_blockno
% stripe_count
;
225 uint64_t trunc_objectno
= trunc_objectsetno
* stripe_count
227 if (objectno
< trunc_objectno
)
228 obj_trunc_size
= ((trunc_stripeno
% stripes_per_object
) + 1) * su
;
229 else if (objectno
> trunc_objectno
)
230 obj_trunc_size
= (trunc_stripeno
% stripes_per_object
) * su
;
232 obj_trunc_size
= (trunc_stripeno
% stripes_per_object
) * su
236 ldout(cct
, 20) << "object_truncate_size " << objectno
<< " "
237 << trunc_size
<< "->" << obj_trunc_size
<< dendl
;
238 return obj_trunc_size
;
241 uint64_t Striper::get_num_objects(const file_layout_t
& layout
,
244 __u32 stripe_unit
= layout
.stripe_unit
;
245 __u32 stripe_count
= layout
.stripe_count
;
246 uint64_t period
= layout
.get_period();
247 uint64_t num_periods
= (size
+ period
- 1) / period
;
248 uint64_t remainder_bytes
= size
% period
;
249 uint64_t remainder_objs
= 0;
250 if ((remainder_bytes
> 0) && (remainder_bytes
< (uint64_t)stripe_count
252 remainder_objs
= stripe_count
- ((remainder_bytes
+ stripe_unit
- 1)
254 return num_periods
* stripe_count
- remainder_objs
;
259 void Striper::StripedReadResult::add_partial_result(
260 CephContext
*cct
, bufferlist
& bl
,
261 const vector
<pair
<uint64_t,uint64_t> >& buffer_extents
)
263 ldout(cct
, 10) << "add_partial_result(" << this << ") " << bl
.length()
264 << " to " << buffer_extents
<< dendl
;
265 for (vector
<pair
<uint64_t,uint64_t> >::const_iterator p
266 = buffer_extents
.begin();
267 p
!= buffer_extents
.end();
269 pair
<bufferlist
, uint64_t>& r
= partial
[p
->first
];
270 size_t actual
= std::min
<uint64_t>(bl
.length(), p
->second
);
271 bl
.splice(0, actual
, &r
.first
);
272 r
.second
= p
->second
;
273 total_intended_len
+= r
.second
;
277 void Striper::StripedReadResult::add_partial_sparse_result(
278 CephContext
*cct
, bufferlist
& bl
, const map
<uint64_t, uint64_t>& bl_map
,
279 uint64_t bl_off
, const vector
<pair
<uint64_t,uint64_t> >& buffer_extents
)
281 ldout(cct
, 10) << "add_partial_sparse_result(" << this << ") " << bl
.length()
282 << " covering " << bl_map
<< " (offset " << bl_off
<< ")"
283 << " to " << buffer_extents
<< dendl
;
284 map
<uint64_t, uint64_t>::const_iterator s
= bl_map
.begin();
285 for (vector
<pair
<uint64_t,uint64_t> >::const_iterator p
286 = buffer_extents
.begin();
287 p
!= buffer_extents
.end();
289 uint64_t tofs
= p
->first
;
290 size_t tlen
= p
->second
;
291 ldout(cct
, 30) << " be " << tofs
<< "~" << tlen
<< dendl
;
293 ldout(cct
, 20) << " t " << tofs
<< "~" << tlen
294 << " bl has " << bl
.length()
297 if (s
== bl_map
.end()) {
298 ldout(cct
, 20) << " s at end" << dendl
;
299 pair
<bufferlist
, uint64_t>& r
= partial
[tofs
];
301 total_intended_len
+= r
.second
;
305 ldout(cct
, 30) << " s " << s
->first
<< "~" << s
->second
<< dendl
;
307 // skip zero-length extent
308 if (s
->second
== 0) {
309 ldout(cct
, 30) << " s len 0, skipping" << dendl
;
314 if (s
->first
> bl_off
) {
315 // gap in sparse read result
316 pair
<bufferlist
, uint64_t>& r
= partial
[tofs
];
317 size_t gap
= std::min
<size_t>(s
->first
- bl_off
, tlen
);
318 ldout(cct
, 20) << " s gap " << gap
<< ", skipping" << dendl
;
320 total_intended_len
+= r
.second
;
329 ceph_assert(s
->first
<= bl_off
);
330 size_t left
= (s
->first
+ s
->second
) - bl_off
;
331 size_t actual
= std::min(left
, tlen
);
334 ldout(cct
, 20) << " s has " << actual
<< ", copying" << dendl
;
335 pair
<bufferlist
, uint64_t>& r
= partial
[tofs
];
336 bl
.splice(0, actual
, &r
.first
);
338 total_intended_len
+= r
.second
;
343 if (actual
== left
) {
344 ldout(cct
, 30) << " s advancing" << dendl
;
351 void Striper::StripedReadResult::assemble_result(CephContext
*cct
,
355 ldout(cct
, 10) << "assemble_result(" << this << ") zero_tail=" << zero_tail
357 size_t zeros
= 0; // zeros preceding current position
358 for (auto& p
: partial
) {
359 size_t got
= p
.second
.first
.length();
360 size_t expect
= p
.second
.second
;
363 bl
.append_zero(zeros
);
366 bl
.claim_append(p
.second
.first
);
368 zeros
+= expect
- got
;
370 if (zero_tail
&& zeros
) {
371 bl
.append_zero(zeros
);
376 void Striper::StripedReadResult::assemble_result(CephContext
*cct
, char *buffer
, size_t length
)
379 ceph_assert(buffer
&& length
== total_intended_len
);
381 map
<uint64_t,pair
<bufferlist
,uint64_t> >::reverse_iterator p
= partial
.rbegin();
382 if (p
== partial
.rend())
385 uint64_t curr
= length
;
386 uint64_t end
= p
->first
+ p
->second
.second
;
387 while (p
!= partial
.rend()) {
389 ldout(cct
, 20) << "assemble_result(" << this << ") " << p
->first
<< "~" << p
->second
.second
390 << " " << p
->second
.first
.length() << " bytes"
392 ceph_assert(p
->first
== end
- p
->second
.second
);
395 size_t len
= p
->second
.first
.length();
396 ceph_assert(curr
>= p
->second
.second
);
397 curr
-= p
->second
.second
;
398 if (len
< p
->second
.second
) {
400 p
->second
.first
.copy(0, len
, buffer
+ curr
);
401 // FIPS zeroization audit 20191117: this memset is not security related.
402 memset(buffer
+ curr
+ len
, 0, p
->second
.second
- len
);
404 p
->second
.first
.copy(0, len
, buffer
+ curr
);
409 ceph_assert(curr
== 0);