]> git.proxmox.com Git - ceph.git/blame - ceph/src/osdc/Striper.cc
update sources to ceph Nautilus 14.2.1
[ceph.git] / ceph / src / osdc / Striper.cc
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2012 Inktank
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15#include "Striper.h"
16
17#include "include/types.h"
18#include "include/buffer.h"
19#include "osd/OSDMap.h"
20
21#include "common/config.h"
22#include "common/debug.h"
23
24#define dout_subsys ceph_subsys_striper
25#undef dout_prefix
26#define dout_prefix *_dout << "striper "
27
28
29void Striper::file_to_extents(CephContext *cct, const char *object_format,
30 const file_layout_t *layout,
31 uint64_t offset, uint64_t len,
32 uint64_t trunc_size,
33 vector<ObjectExtent>& extents,
34 uint64_t buffer_offset)
35{
36 map<object_t,vector<ObjectExtent> > object_extents;
37 file_to_extents(cct, object_format, layout, offset, len, trunc_size,
38 object_extents, buffer_offset);
39 assimilate_extents(object_extents, extents);
40}
41
42void Striper::file_to_extents(
43 CephContext *cct, const char *object_format,
44 const file_layout_t *layout,
45 uint64_t offset, uint64_t len,
46 uint64_t trunc_size,
47 map<object_t,vector<ObjectExtent> >& object_extents,
48 uint64_t buffer_offset)
49{
50 ldout(cct, 10) << "file_to_extents " << offset << "~" << len
51 << " format " << object_format
52 << dendl;
11fdf7f2 53 ceph_assert(len > 0);
7c673cae
FG
54
55 /*
56 * we want only one extent per object! this means that each extent
57 * we read may map into different bits of the final read
58 * buffer.. hence ObjectExtent.buffer_extents
59 */
60
61 __u32 object_size = layout->object_size;
62 __u32 su = layout->stripe_unit;
63 __u32 stripe_count = layout->stripe_count;
11fdf7f2 64 ceph_assert(object_size >= su);
7c673cae
FG
65 if (stripe_count == 1) {
66 ldout(cct, 20) << " sc is one, reset su to os" << dendl;
67 su = object_size;
68 }
69 uint64_t stripes_per_object = object_size / su;
70 ldout(cct, 20) << " su " << su << " sc " << stripe_count << " os "
71 << object_size << " stripes_per_object " << stripes_per_object
72 << dendl;
73
74 uint64_t cur = offset;
75 uint64_t left = len;
76 while (left > 0) {
77 // layout into objects
78 uint64_t blockno = cur / su; // which block
79 // which horizontal stripe (Y)
80 uint64_t stripeno = blockno / stripe_count;
81 // which object in the object set (X)
82 uint64_t stripepos = blockno % stripe_count;
83 // which object set
84 uint64_t objectsetno = stripeno / stripes_per_object;
85 // object id
86 uint64_t objectno = objectsetno * stripe_count + stripepos;
87
88 // find oid, extent
89 char buf[strlen(object_format) + 32];
90 snprintf(buf, sizeof(buf), object_format, (long long unsigned)objectno);
91 object_t oid = buf;
92
93 // map range into object
94 uint64_t block_start = (stripeno % stripes_per_object) * su;
95 uint64_t block_off = cur % su;
96 uint64_t max = su - block_off;
97
98 uint64_t x_offset = block_start + block_off;
99 uint64_t x_len;
100 if (left > max)
101 x_len = max;
102 else
103 x_len = left;
104
105 ldout(cct, 20) << " off " << cur << " blockno " << blockno << " stripeno "
106 << stripeno << " stripepos " << stripepos << " objectsetno "
107 << objectsetno << " objectno " << objectno
108 << " block_start " << block_start << " block_off "
109 << block_off << " " << x_offset << "~" << x_len
110 << dendl;
111
112 ObjectExtent *ex = 0;
113 vector<ObjectExtent>& exv = object_extents[oid];
114 if (exv.empty() || exv.back().offset + exv.back().length != x_offset) {
115 exv.resize(exv.size() + 1);
116 ex = &exv.back();
117 ex->oid = oid;
118 ex->objectno = objectno;
119 ex->oloc = OSDMap::file_to_object_locator(*layout);
120
121 ex->offset = x_offset;
122 ex->length = x_len;
123 ex->truncate_size = object_truncate_size(cct, layout, objectno,
124 trunc_size);
125
126 ldout(cct, 20) << " added new " << *ex << dendl;
127 } else {
128 // add to extent
129 ex = &exv.back();
130 ldout(cct, 20) << " adding in to " << *ex << dendl;
131 ex->length += x_len;
132 }
133 ex->buffer_extents.push_back(make_pair(cur - offset + buffer_offset,
134 x_len));
135
136 ldout(cct, 15) << "file_to_extents " << *ex << " in " << ex->oloc
137 << dendl;
138 // ldout(cct, 0) << "map: ino " << ino << " oid " << ex.oid << " osd "
139 // << ex.osd << " offset " << ex.offset << " len " << ex.len
140 // << " ... left " << left << dendl;
141
142 left -= x_len;
143 cur += x_len;
144 }
145}
146
147void Striper::assimilate_extents(
148 map<object_t,vector<ObjectExtent> >& object_extents,
149 vector<ObjectExtent>& extents)
150{
151 // make final list
152 for (map<object_t, vector<ObjectExtent> >::iterator it
153 = object_extents.begin();
154 it != object_extents.end();
155 ++it) {
156 for (vector<ObjectExtent>::iterator p = it->second.begin();
157 p != it->second.end();
158 ++p) {
159 extents.push_back(*p);
160 }
161 }
162}
163
164void Striper::extent_to_file(CephContext *cct, file_layout_t *layout,
165 uint64_t objectno, uint64_t off, uint64_t len,
166 vector<pair<uint64_t, uint64_t> >& extents)
167{
168 ldout(cct, 10) << "extent_to_file " << objectno << " " << off << "~"
169 << len << dendl;
170
171 __u32 object_size = layout->object_size;
172 __u32 su = layout->stripe_unit;
173 __u32 stripe_count = layout->stripe_count;
11fdf7f2 174 ceph_assert(object_size >= su);
7c673cae
FG
175 uint64_t stripes_per_object = object_size / su;
176 ldout(cct, 20) << " stripes_per_object " << stripes_per_object << dendl;
177
178 uint64_t off_in_block = off % su;
179
180 extents.reserve(len / su + 1);
181
182 while (len > 0) {
183 uint64_t stripepos = objectno % stripe_count;
184 uint64_t objectsetno = objectno / stripe_count;
185 uint64_t stripeno = off / su + objectsetno * stripes_per_object;
186 uint64_t blockno = stripeno * stripe_count + stripepos;
187 uint64_t extent_off = blockno * su + off_in_block;
11fdf7f2 188 uint64_t extent_len = std::min(len, su - off_in_block);
7c673cae
FG
189 extents.push_back(make_pair(extent_off, extent_len));
190
191 ldout(cct, 20) << " object " << off << "~" << extent_len
192 << " -> file " << extent_off << "~" << extent_len
193 << dendl;
194
195 off_in_block = 0;
196 off += extent_len;
197 len -= extent_len;
198 }
199}
200
201uint64_t Striper::object_truncate_size(CephContext *cct,
202 const file_layout_t *layout,
203 uint64_t objectno, uint64_t trunc_size)
204{
205 uint64_t obj_trunc_size;
206 if (trunc_size == 0 || trunc_size == (uint64_t)-1) {
207 obj_trunc_size = trunc_size;
208 } else {
209 __u32 object_size = layout->object_size;
210 __u32 su = layout->stripe_unit;
211 __u32 stripe_count = layout->stripe_count;
11fdf7f2 212 ceph_assert(object_size >= su);
7c673cae
FG
213 uint64_t stripes_per_object = object_size / su;
214
215 uint64_t objectsetno = objectno / stripe_count;
216 uint64_t trunc_objectsetno = trunc_size / object_size / stripe_count;
217 if (objectsetno > trunc_objectsetno)
218 obj_trunc_size = 0;
219 else if (objectsetno < trunc_objectsetno)
220 obj_trunc_size = object_size;
221 else {
222 uint64_t trunc_blockno = trunc_size / su;
223 uint64_t trunc_stripeno = trunc_blockno / stripe_count;
224 uint64_t trunc_stripepos = trunc_blockno % stripe_count;
225 uint64_t trunc_objectno = trunc_objectsetno * stripe_count
226 + trunc_stripepos;
227 if (objectno < trunc_objectno)
228 obj_trunc_size = ((trunc_stripeno % stripes_per_object) + 1) * su;
229 else if (objectno > trunc_objectno)
230 obj_trunc_size = (trunc_stripeno % stripes_per_object) * su;
231 else
232 obj_trunc_size = (trunc_stripeno % stripes_per_object) * su
233 + (trunc_size % su);
234 }
235 }
236 ldout(cct, 20) << "object_truncate_size " << objectno << " "
237 << trunc_size << "->" << obj_trunc_size << dendl;
238 return obj_trunc_size;
239}
240
241uint64_t Striper::get_num_objects(const file_layout_t& layout,
242 uint64_t size)
243{
244 __u32 stripe_unit = layout.stripe_unit;
245 __u32 stripe_count = layout.stripe_count;
246 uint64_t period = layout.get_period();
247 uint64_t num_periods = (size + period - 1) / period;
248 uint64_t remainder_bytes = size % period;
249 uint64_t remainder_objs = 0;
250 if ((remainder_bytes > 0) && (remainder_bytes < (uint64_t)stripe_count
251 * stripe_unit))
252 remainder_objs = stripe_count - ((remainder_bytes + stripe_unit - 1)
253 / stripe_unit);
254 return num_periods * stripe_count - remainder_objs;
255}
256
257// StripedReadResult
258
259void Striper::StripedReadResult::add_partial_result(
260 CephContext *cct, bufferlist& bl,
261 const vector<pair<uint64_t,uint64_t> >& buffer_extents)
262{
263 ldout(cct, 10) << "add_partial_result(" << this << ") " << bl.length()
264 << " to " << buffer_extents << dendl;
265 for (vector<pair<uint64_t,uint64_t> >::const_iterator p
266 = buffer_extents.begin();
267 p != buffer_extents.end();
268 ++p) {
269 pair<bufferlist, uint64_t>& r = partial[p->first];
11fdf7f2 270 size_t actual = std::min<uint64_t>(bl.length(), p->second);
7c673cae
FG
271 bl.splice(0, actual, &r.first);
272 r.second = p->second;
273 total_intended_len += r.second;
274 }
275}
276
277void Striper::StripedReadResult::add_partial_sparse_result(
278 CephContext *cct, bufferlist& bl, const map<uint64_t, uint64_t>& bl_map,
279 uint64_t bl_off, const vector<pair<uint64_t,uint64_t> >& buffer_extents)
280{
281 ldout(cct, 10) << "add_partial_sparse_result(" << this << ") " << bl.length()
282 << " covering " << bl_map << " (offset " << bl_off << ")"
283 << " to " << buffer_extents << dendl;
284 map<uint64_t, uint64_t>::const_iterator s = bl_map.begin();
285 for (vector<pair<uint64_t,uint64_t> >::const_iterator p
286 = buffer_extents.begin();
287 p != buffer_extents.end();
288 ++p) {
289 uint64_t tofs = p->first;
11fdf7f2 290 size_t tlen = p->second;
7c673cae
FG
291 ldout(cct, 30) << " be " << tofs << "~" << tlen << dendl;
292 while (tlen > 0) {
293 ldout(cct, 20) << " t " << tofs << "~" << tlen
294 << " bl has " << bl.length()
295 << " off " << bl_off
296 << dendl;
297 if (s == bl_map.end()) {
298 ldout(cct, 20) << " s at end" << dendl;
299 pair<bufferlist, uint64_t>& r = partial[tofs];
300 r.second = tlen;
301 total_intended_len += r.second;
302 break;
303 }
304
305 ldout(cct, 30) << " s " << s->first << "~" << s->second << dendl;
306
307 // skip zero-length extent
308 if (s->second == 0) {
309 ldout(cct, 30) << " s len 0, skipping" << dendl;
310 ++s;
311 continue;
312 }
313
314 if (s->first > bl_off) {
315 // gap in sparse read result
316 pair<bufferlist, uint64_t>& r = partial[tofs];
11fdf7f2 317 size_t gap = std::min<size_t>(s->first - bl_off, tlen);
7c673cae
FG
318 ldout(cct, 20) << " s gap " << gap << ", skipping" << dendl;
319 r.second = gap;
320 total_intended_len += r.second;
321 bl_off += gap;
322 tofs += gap;
323 tlen -= gap;
324 if (tlen == 0) {
325 continue;
326 }
327 }
328
11fdf7f2 329 ceph_assert(s->first <= bl_off);
7c673cae 330 size_t left = (s->first + s->second) - bl_off;
11fdf7f2 331 size_t actual = std::min(left, tlen);
7c673cae
FG
332
333 if (actual > 0) {
334 ldout(cct, 20) << " s has " << actual << ", copying" << dendl;
335 pair<bufferlist, uint64_t>& r = partial[tofs];
336 bl.splice(0, actual, &r.first);
337 r.second = actual;
338 total_intended_len += r.second;
339 bl_off += actual;
340 tofs += actual;
341 tlen -= actual;
342 }
343 if (actual == left) {
344 ldout(cct, 30) << " s advancing" << dendl;
345 ++s;
346 }
347 }
348 }
349}
350
351void Striper::StripedReadResult::assemble_result(CephContext *cct,
352 bufferlist& bl,
353 bool zero_tail)
354{
355 ldout(cct, 10) << "assemble_result(" << this << ") zero_tail=" << zero_tail
356 << dendl;
11fdf7f2
TL
357 size_t zeros = 0; // zeros preceding current position
358 for (auto& p : partial) {
359 size_t got = p.second.first.length();
360 size_t expect = p.second.second;
361 if (got) {
362 if (zeros) {
363 bl.append_zero(zeros);
364 zeros = 0;
7c673cae 365 }
11fdf7f2 366 bl.claim_append(p.second.first);
7c673cae 367 }
11fdf7f2
TL
368 zeros += expect - got;
369 }
370 if (zero_tail && zeros) {
371 bl.append_zero(zeros);
7c673cae
FG
372 }
373 partial.clear();
374}
375
376void Striper::StripedReadResult::assemble_result(CephContext *cct, char *buffer, size_t length)
377{
378
11fdf7f2 379 ceph_assert(buffer && length == total_intended_len);
7c673cae
FG
380
381 map<uint64_t,pair<bufferlist,uint64_t> >::reverse_iterator p = partial.rbegin();
382 if (p == partial.rend())
383 return;
384
385 uint64_t curr = length;
386 uint64_t end = p->first + p->second.second;
387 while (p != partial.rend()) {
388 // sanity check
389 ldout(cct, 20) << "assemble_result(" << this << ") " << p->first << "~" << p->second.second
390 << " " << p->second.first.length() << " bytes"
391 << dendl;
11fdf7f2 392 ceph_assert(p->first == end - p->second.second);
7c673cae
FG
393 end = p->first;
394
395 size_t len = p->second.first.length();
11fdf7f2 396 ceph_assert(curr >= p->second.second);
7c673cae
FG
397 curr -= p->second.second;
398 if (len < p->second.second) {
399 if (len)
400 p->second.first.copy(0, len, buffer + curr);
401 memset(buffer + curr + len, 0, p->second.second - len);
402 } else {
403 p->second.first.copy(0, len, buffer + curr);
404 }
405 ++p;
406 }
407 partial.clear();
11fdf7f2 408 ceph_assert(curr == 0);
7c673cae
FG
409}
410