]> git.proxmox.com Git - ceph.git/blob - ceph/src/osdc/Striper.cc
import 15.2.0 Octopus source
[ceph.git] / ceph / src / osdc / Striper.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 /*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2012 Inktank
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15 #include "Striper.h"
16
17 #include "include/types.h"
18 #include "include/buffer.h"
19 #include "osd/OSDMap.h"
20
21 #include "common/config.h"
22 #include "common/debug.h"
23
24 #define dout_subsys ceph_subsys_striper
25 #undef dout_prefix
26 #define dout_prefix *_dout << "striper "
27
28 using std::make_pair;
29 using std::map;
30 using std::pair;
31
32 using ceph::bufferlist;
33
34 namespace {
35
36 object_t format_oid(const char* object_format, uint64_t object_no) {
37 char buf[strlen(object_format) + 32];
38 snprintf(buf, sizeof(buf), object_format, (long long unsigned)object_no);
39 return object_t(buf);
40 }
41
42 struct OrderByObject {
43 constexpr bool operator()(uint64_t object_no,
44 const striper::LightweightObjectExtent& rhs) const {
45 return object_no < rhs.object_no;
46 }
47 constexpr bool operator()(const striper::LightweightObjectExtent& lhs,
48 uint64_t object_no) const {
49 return lhs.object_no < object_no;
50 }
51 };
52
53 } // anonymous namespace
54
55 void Striper::file_to_extents(CephContext *cct, const char *object_format,
56 const file_layout_t *layout,
57 uint64_t offset, uint64_t len,
58 uint64_t trunc_size,
59 std::vector<ObjectExtent>& extents,
60 uint64_t buffer_offset)
61 {
62 striper::LightweightObjectExtents lightweight_object_extents;
63 file_to_extents(cct, layout, offset, len, trunc_size, buffer_offset,
64 &lightweight_object_extents);
65
66 // convert lightweight object extents to heavyweight version
67 extents.reserve(lightweight_object_extents.size());
68 for (auto& lightweight_object_extent : lightweight_object_extents) {
69 auto& object_extent = extents.emplace_back(
70 object_t(format_oid(object_format, lightweight_object_extent.object_no)),
71 lightweight_object_extent.object_no,
72 lightweight_object_extent.offset, lightweight_object_extent.length,
73 lightweight_object_extent.truncate_size);
74
75 object_extent.oloc = OSDMap::file_to_object_locator(*layout);
76 object_extent.buffer_extents.reserve(
77 lightweight_object_extent.buffer_extents.size());
78 object_extent.buffer_extents.insert(
79 object_extent.buffer_extents.end(),
80 lightweight_object_extent.buffer_extents.begin(),
81 lightweight_object_extent.buffer_extents.end());
82 }
83 }
84
85 void Striper::file_to_extents(
86 CephContext *cct, const char *object_format,
87 const file_layout_t *layout,
88 uint64_t offset, uint64_t len,
89 uint64_t trunc_size,
90 map<object_t,std::vector<ObjectExtent> >& object_extents,
91 uint64_t buffer_offset)
92 {
93 striper::LightweightObjectExtents lightweight_object_extents;
94 file_to_extents(cct, layout, offset, len, trunc_size, buffer_offset,
95 &lightweight_object_extents);
96
97 // convert lightweight object extents to heavyweight version
98 for (auto& lightweight_object_extent : lightweight_object_extents) {
99 auto oid = format_oid(object_format, lightweight_object_extent.object_no);
100 auto& object_extent = object_extents[oid].emplace_back(
101 oid, lightweight_object_extent.object_no,
102 lightweight_object_extent.offset, lightweight_object_extent.length,
103 lightweight_object_extent.truncate_size);
104
105 object_extent.oloc = OSDMap::file_to_object_locator(*layout);
106 object_extent.buffer_extents.reserve(
107 lightweight_object_extent.buffer_extents.size());
108 object_extent.buffer_extents.insert(
109 object_extent.buffer_extents.end(),
110 lightweight_object_extent.buffer_extents.begin(),
111 lightweight_object_extent.buffer_extents.end());
112 }
113 }
114
115 void Striper::file_to_extents(
116 CephContext *cct, const file_layout_t *layout, uint64_t offset,
117 uint64_t len, uint64_t trunc_size, uint64_t buffer_offset,
118 striper::LightweightObjectExtents* object_extents) {
119 ldout(cct, 10) << "file_to_extents " << offset << "~" << len << dendl;
120 ceph_assert(len > 0);
121
122 /*
123 * we want only one extent per object! this means that each extent
124 * we read may map into different bits of the final read
125 * buffer.. hence buffer_extents
126 */
127
128 __u32 object_size = layout->object_size;
129 __u32 su = layout->stripe_unit;
130 __u32 stripe_count = layout->stripe_count;
131 ceph_assert(object_size >= su);
132 if (stripe_count == 1) {
133 ldout(cct, 20) << " sc is one, reset su to os" << dendl;
134 su = object_size;
135 }
136 uint64_t stripes_per_object = object_size / su;
137 ldout(cct, 20) << " su " << su << " sc " << stripe_count << " os "
138 << object_size << " stripes_per_object " << stripes_per_object
139 << dendl;
140
141 uint64_t cur = offset;
142 uint64_t left = len;
143 while (left > 0) {
144 // layout into objects
145 uint64_t blockno = cur / su; // which block
146 // which horizontal stripe (Y)
147 uint64_t stripeno = blockno / stripe_count;
148 // which object in the object set (X)
149 uint64_t stripepos = blockno % stripe_count;
150 // which object set
151 uint64_t objectsetno = stripeno / stripes_per_object;
152 // object id
153 uint64_t objectno = objectsetno * stripe_count + stripepos;
154
155 // map range into object
156 uint64_t block_start = (stripeno % stripes_per_object) * su;
157 uint64_t block_off = cur % su;
158 uint64_t max = su - block_off;
159
160 uint64_t x_offset = block_start + block_off;
161 uint64_t x_len;
162 if (left > max)
163 x_len = max;
164 else
165 x_len = left;
166
167 ldout(cct, 20) << " off " << cur << " blockno " << blockno << " stripeno "
168 << stripeno << " stripepos " << stripepos << " objectsetno "
169 << objectsetno << " objectno " << objectno
170 << " block_start " << block_start << " block_off "
171 << block_off << " " << x_offset << "~" << x_len
172 << dendl;
173
174 striper::LightweightObjectExtent* ex = nullptr;
175 auto it = std::upper_bound(object_extents->begin(), object_extents->end(),
176 objectno, OrderByObject());
177 striper::LightweightObjectExtents::reverse_iterator rev_it(it);
178 if (rev_it == object_extents->rend() ||
179 rev_it->object_no != objectno ||
180 rev_it->offset + rev_it->length != x_offset) {
181 // expect up to "stripe-width - 1" vector shifts in the worst-case
182 ex = &(*object_extents->emplace(
183 it, objectno, x_offset, x_len,
184 object_truncate_size(cct, layout, objectno, trunc_size)));
185 ldout(cct, 20) << " added new " << *ex << dendl;
186 } else {
187 ex = &(*rev_it);
188 ceph_assert(ex->offset + ex->length == x_offset);
189
190 ldout(cct, 20) << " adding in to " << *ex << dendl;
191 ex->length += x_len;
192 }
193
194 ex->buffer_extents.emplace_back(cur - offset + buffer_offset, x_len);
195
196 ldout(cct, 15) << "file_to_extents " << *ex << dendl;
197 // ldout(cct, 0) << "map: ino " << ino << " oid " << ex.oid << " osd "
198 // << ex.osd << " offset " << ex.offset << " len " << ex.len
199 // << " ... left " << left << dendl;
200
201 left -= x_len;
202 cur += x_len;
203 }
204 }
205
206 void Striper::extent_to_file(CephContext *cct, file_layout_t *layout,
207 uint64_t objectno, uint64_t off, uint64_t len,
208 std::vector<pair<uint64_t, uint64_t> >& extents)
209 {
210 ldout(cct, 10) << "extent_to_file " << objectno << " " << off << "~"
211 << len << dendl;
212
213 __u32 object_size = layout->object_size;
214 __u32 su = layout->stripe_unit;
215 __u32 stripe_count = layout->stripe_count;
216 ceph_assert(object_size >= su);
217 uint64_t stripes_per_object = object_size / su;
218 ldout(cct, 20) << " stripes_per_object " << stripes_per_object << dendl;
219
220 uint64_t off_in_block = off % su;
221
222 extents.reserve(len / su + 1);
223
224 while (len > 0) {
225 uint64_t stripepos = objectno % stripe_count;
226 uint64_t objectsetno = objectno / stripe_count;
227 uint64_t stripeno = off / su + objectsetno * stripes_per_object;
228 uint64_t blockno = stripeno * stripe_count + stripepos;
229 uint64_t extent_off = blockno * su + off_in_block;
230 uint64_t extent_len = std::min(len, su - off_in_block);
231 extents.push_back(make_pair(extent_off, extent_len));
232
233 ldout(cct, 20) << " object " << off << "~" << extent_len
234 << " -> file " << extent_off << "~" << extent_len
235 << dendl;
236
237 off_in_block = 0;
238 off += extent_len;
239 len -= extent_len;
240 }
241 }
242
243 uint64_t Striper::object_truncate_size(CephContext *cct,
244 const file_layout_t *layout,
245 uint64_t objectno, uint64_t trunc_size)
246 {
247 uint64_t obj_trunc_size;
248 if (trunc_size == 0 || trunc_size == (uint64_t)-1) {
249 obj_trunc_size = trunc_size;
250 } else {
251 __u32 object_size = layout->object_size;
252 __u32 su = layout->stripe_unit;
253 __u32 stripe_count = layout->stripe_count;
254 ceph_assert(object_size >= su);
255 uint64_t stripes_per_object = object_size / su;
256
257 uint64_t objectsetno = objectno / stripe_count;
258 uint64_t trunc_objectsetno = trunc_size / object_size / stripe_count;
259 if (objectsetno > trunc_objectsetno)
260 obj_trunc_size = 0;
261 else if (objectsetno < trunc_objectsetno)
262 obj_trunc_size = object_size;
263 else {
264 uint64_t trunc_blockno = trunc_size / su;
265 uint64_t trunc_stripeno = trunc_blockno / stripe_count;
266 uint64_t trunc_stripepos = trunc_blockno % stripe_count;
267 uint64_t trunc_objectno = trunc_objectsetno * stripe_count
268 + trunc_stripepos;
269 if (objectno < trunc_objectno)
270 obj_trunc_size = ((trunc_stripeno % stripes_per_object) + 1) * su;
271 else if (objectno > trunc_objectno)
272 obj_trunc_size = (trunc_stripeno % stripes_per_object) * su;
273 else
274 obj_trunc_size = (trunc_stripeno % stripes_per_object) * su
275 + (trunc_size % su);
276 }
277 }
278 ldout(cct, 20) << "object_truncate_size " << objectno << " "
279 << trunc_size << "->" << obj_trunc_size << dendl;
280 return obj_trunc_size;
281 }
282
283 uint64_t Striper::get_num_objects(const file_layout_t& layout,
284 uint64_t size)
285 {
286 __u32 stripe_unit = layout.stripe_unit;
287 __u32 stripe_count = layout.stripe_count;
288 uint64_t period = layout.get_period();
289 uint64_t num_periods = (size + period - 1) / period;
290 uint64_t remainder_bytes = size % period;
291 uint64_t remainder_objs = 0;
292 if ((remainder_bytes > 0) && (remainder_bytes < (uint64_t)stripe_count
293 * stripe_unit))
294 remainder_objs = stripe_count - ((remainder_bytes + stripe_unit - 1)
295 / stripe_unit);
296 return num_periods * stripe_count - remainder_objs;
297 }
298
299 // StripedReadResult
300
301 void Striper::StripedReadResult::add_partial_result(
302 CephContext *cct, bufferlist& bl,
303 const std::vector<pair<uint64_t,uint64_t> >& buffer_extents)
304 {
305 ldout(cct, 10) << "add_partial_result(" << this << ") " << bl.length()
306 << " to " << buffer_extents << dendl;
307 for (auto p = buffer_extents.cbegin(); p != buffer_extents.cend(); ++p) {
308 pair<bufferlist, uint64_t>& r = partial[p->first];
309 size_t actual = std::min<uint64_t>(bl.length(), p->second);
310 bl.splice(0, actual, &r.first);
311 r.second = p->second;
312 total_intended_len += r.second;
313 }
314 }
315
316 void Striper::StripedReadResult::add_partial_result(
317 CephContext *cct, bufferlist&& bl,
318 const striper::LightweightBufferExtents& buffer_extents)
319 {
320 ldout(cct, 10) << "add_partial_result(" << this << ") " << bl.length()
321 << " to " << buffer_extents << dendl;
322 for (auto& be : buffer_extents) {
323 auto& r = partial[be.first];
324 size_t actual = std::min<uint64_t>(bl.length(), be.second);
325 if (buffer_extents.size() == 1) {
326 r.first = std::move(bl);
327 } else {
328 bl.splice(0, actual, &r.first);
329 }
330 r.second = be.second;
331 total_intended_len += r.second;
332 }
333 }
334
335 void Striper::StripedReadResult::add_partial_sparse_result(
336 CephContext *cct, bufferlist& bl, const map<uint64_t, uint64_t>& bl_map,
337 uint64_t bl_off, const std::vector<pair<uint64_t,uint64_t> >& buffer_extents)
338 {
339 ldout(cct, 10) << "add_partial_sparse_result(" << this << ") " << bl.length()
340 << " covering " << bl_map << " (offset " << bl_off << ")"
341 << " to " << buffer_extents << dendl;
342 auto s = bl_map.cbegin();
343 for (auto& be : buffer_extents) {
344 add_partial_sparse_result(cct, bl, &s, bl_map.end(), &bl_off, be.first,
345 be.second);
346 }
347 }
348
349 void Striper::StripedReadResult::add_partial_sparse_result(
350 CephContext *cct, ceph::buffer::list& bl,
351 const std::map<uint64_t, uint64_t>& bl_map, uint64_t bl_off,
352 const striper::LightweightBufferExtents& buffer_extents) {
353 ldout(cct, 10) << "add_partial_sparse_result(" << this << ") " << bl.length()
354 << " covering " << bl_map << " (offset " << bl_off << ")"
355 << " to " << buffer_extents << dendl;
356 auto s = bl_map.cbegin();
357 for (auto& be : buffer_extents) {
358 add_partial_sparse_result(cct, bl, &s, bl_map.cend(), &bl_off, be.first,
359 be.second);
360 }
361 }
362
363 void Striper::StripedReadResult::add_partial_sparse_result(
364 CephContext *cct, bufferlist& bl,
365 std::map<uint64_t, uint64_t>::const_iterator* it,
366 const std::map<uint64_t, uint64_t>::const_iterator& end_it,
367 uint64_t* bl_off, uint64_t tofs, uint64_t tlen) {
368 ldout(cct, 30) << " be " << tofs << "~" << tlen << dendl;
369
370 auto& s = *it;
371 while (tlen > 0) {
372 ldout(cct, 20) << " t " << tofs << "~" << tlen
373 << " bl has " << bl.length()
374 << " off " << *bl_off << dendl;
375 if (s == end_it) {
376 ldout(cct, 20) << " s at end" << dendl;
377 auto& r = partial[tofs];
378 r.second = tlen;
379 total_intended_len += r.second;
380 break;
381 }
382
383 ldout(cct, 30) << " s " << s->first << "~" << s->second << dendl;
384
385 // skip zero-length extent
386 if (s->second == 0) {
387 ldout(cct, 30) << " s len 0, skipping" << dendl;
388 ++s;
389 continue;
390 }
391
392 if (s->first > *bl_off) {
393 // gap in sparse read result
394 pair<bufferlist, uint64_t>& r = partial[tofs];
395 size_t gap = std::min<size_t>(s->first - *bl_off, tlen);
396 ldout(cct, 20) << " s gap " << gap << ", skipping" << dendl;
397 r.second = gap;
398 total_intended_len += r.second;
399 *bl_off += gap;
400 tofs += gap;
401 tlen -= gap;
402 if (tlen == 0) {
403 continue;
404 }
405 }
406
407 ceph_assert(s->first <= *bl_off);
408 size_t left = (s->first + s->second) - *bl_off;
409 size_t actual = std::min<size_t>(left, tlen);
410
411 if (actual > 0) {
412 ldout(cct, 20) << " s has " << actual << ", copying" << dendl;
413 pair<bufferlist, uint64_t>& r = partial[tofs];
414 bl.splice(0, actual, &r.first);
415 r.second = actual;
416 total_intended_len += r.second;
417 *bl_off += actual;
418 tofs += actual;
419 tlen -= actual;
420 }
421 if (actual == left) {
422 ldout(cct, 30) << " s advancing" << dendl;
423 ++s;
424 }
425 }
426 }
427
428 void Striper::StripedReadResult::assemble_result(CephContext *cct,
429 bufferlist& bl,
430 bool zero_tail)
431 {
432 ldout(cct, 10) << "assemble_result(" << this << ") zero_tail=" << zero_tail
433 << dendl;
434 size_t zeros = 0; // zeros preceding current position
435 for (auto& p : partial) {
436 size_t got = p.second.first.length();
437 size_t expect = p.second.second;
438 if (got) {
439 if (zeros) {
440 bl.append_zero(zeros);
441 zeros = 0;
442 }
443 bl.claim_append(p.second.first);
444 }
445 zeros += expect - got;
446 }
447 if (zero_tail && zeros) {
448 bl.append_zero(zeros);
449 }
450 partial.clear();
451 }
452
453 void Striper::StripedReadResult::assemble_result(CephContext *cct, char *buffer, size_t length)
454 {
455
456 ceph_assert(buffer && length == total_intended_len);
457
458 map<uint64_t,pair<bufferlist,uint64_t> >::reverse_iterator p = partial.rbegin();
459 if (p == partial.rend())
460 return;
461
462 uint64_t curr = length;
463 uint64_t end = p->first + p->second.second;
464 while (p != partial.rend()) {
465 // sanity check
466 ldout(cct, 20) << "assemble_result(" << this << ") " << p->first << "~" << p->second.second
467 << " " << p->second.first.length() << " bytes"
468 << dendl;
469 ceph_assert(p->first == end - p->second.second);
470 end = p->first;
471
472 size_t len = p->second.first.length();
473 ceph_assert(curr >= p->second.second);
474 curr -= p->second.second;
475 if (len < p->second.second) {
476 if (len)
477 p->second.first.begin().copy(len, buffer + curr);
478 // FIPS zeroization audit 20191117: this memset is not security related.
479 memset(buffer + curr + len, 0, p->second.second - len);
480 } else {
481 p->second.first.begin().copy(len, buffer + curr);
482 }
483 ++p;
484 }
485 partial.clear();
486 ceph_assert(curr == 0);
487 }
488
489 void Striper::StripedReadResult::assemble_result(
490 CephContext *cct, std::map<uint64_t, uint64_t> *extent_map,
491 bufferlist *bl)
492 {
493 ldout(cct, 10) << "assemble_result(" << this << ")" << dendl;
494 for (auto& p : partial) {
495 uint64_t off = p.first;
496 uint64_t len = p.second.first.length();
497 if (len > 0) {
498 (*extent_map)[off] = len;
499 bl->claim_append(p.second.first);
500 }
501 }
502 partial.clear();
503 }