1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph distributed storage system
6 * Copyright (C) 2014 Cloudwatt <libre.licensing@cloudwatt.com>
7 * Copyright (C) 2014 Red Hat <contact@redhat.com>
9 * Author: Loic Dachary <loic@dachary.org>
11 * This library is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License as published by the Free Software Foundation; either
14 * version 2.1 of the License, or (at your option) any later version.
21 #include "ErasureCode.h"
23 #include "common/strtol.h"
24 #include "include/buffer.h"
25 #include "crush/CrushWrapper.h"
26 #include "osd/osd_types.h"
28 #define DEFAULT_RULE_ROOT "default"
29 #define DEFAULT_RULE_FAILURE_DOMAIN "host"
39 using ceph::bufferlist
;
42 const unsigned ErasureCode::SIMD_ALIGN
= 32;
44 int ErasureCode::init(
45 ErasureCodeProfile
&profile
,
49 err
|= to_string("crush-root", profile
,
51 DEFAULT_RULE_ROOT
, ss
);
52 err
|= to_string("crush-failure-domain", profile
,
54 DEFAULT_RULE_FAILURE_DOMAIN
, ss
);
55 err
|= to_string("crush-device-class", profile
,
64 int ErasureCode::create_rule(
65 const std::string
&name
,
67 std::ostream
*ss
) const
69 int ruleid
= crush
.add_simple_rule(
75 pg_pool_t::TYPE_ERASURE
,
81 crush
.set_rule_mask_max_size(ruleid
, get_chunk_count());
85 int ErasureCode::sanity_check_k_m(int k
, int m
, ostream
*ss
)
88 *ss
<< "k=" << k
<< " must be >= 2" << std::endl
;
92 *ss
<< "m=" << m
<< " must be >= 1" << std::endl
;
98 int ErasureCode::chunk_index(unsigned int i
) const
100 return chunk_mapping
.size() > i
? chunk_mapping
[i
] : i
;
103 int ErasureCode::_minimum_to_decode(const set
<int> &want_to_read
,
104 const set
<int> &available_chunks
,
107 if (includes(available_chunks
.begin(), available_chunks
.end(),
108 want_to_read
.begin(), want_to_read
.end())) {
109 *minimum
= want_to_read
;
111 unsigned int k
= get_data_chunk_count();
112 if (available_chunks
.size() < (unsigned)k
)
114 set
<int>::iterator i
;
116 for (i
= available_chunks
.begin(), j
= 0; j
< (unsigned)k
; ++i
, j
++)
122 int ErasureCode::minimum_to_decode(const set
<int> &want_to_read
,
123 const set
<int> &available_chunks
,
124 map
<int, vector
<pair
<int, int>>> *minimum
)
126 set
<int> minimum_shard_ids
;
127 int r
= _minimum_to_decode(want_to_read
, available_chunks
, &minimum_shard_ids
);
131 vector
<pair
<int, int>> default_subchunks
;
132 default_subchunks
.push_back(make_pair(0, get_sub_chunk_count()));
133 for (auto &&id
: minimum_shard_ids
) {
134 minimum
->insert(make_pair(id
, default_subchunks
));
139 int ErasureCode::minimum_to_decode_with_cost(const set
<int> &want_to_read
,
140 const map
<int, int> &available
,
143 set
<int> available_chunks
;
144 for (map
<int, int>::const_iterator i
= available
.begin();
145 i
!= available
.end();
147 available_chunks
.insert(i
->first
);
148 return _minimum_to_decode(want_to_read
, available_chunks
, minimum
);
151 int ErasureCode::encode_prepare(const bufferlist
&raw
,
152 map
<int, bufferlist
> &encoded
) const
154 unsigned int k
= get_data_chunk_count();
155 unsigned int m
= get_chunk_count() - k
;
156 unsigned blocksize
= get_chunk_size(raw
.length());
157 unsigned padded_chunks
= k
- raw
.length() / blocksize
;
158 bufferlist prepared
= raw
;
160 for (unsigned int i
= 0; i
< k
- padded_chunks
; i
++) {
161 bufferlist
&chunk
= encoded
[chunk_index(i
)];
162 chunk
.substr_of(prepared
, i
* blocksize
, blocksize
);
163 chunk
.rebuild_aligned_size_and_memory(blocksize
, SIMD_ALIGN
);
164 ceph_assert(chunk
.is_contiguous());
167 unsigned remainder
= raw
.length() - (k
- padded_chunks
) * blocksize
;
168 bufferptr
buf(buffer::create_aligned(blocksize
, SIMD_ALIGN
));
170 raw
.begin((k
- padded_chunks
) * blocksize
).copy(remainder
, buf
.c_str());
171 buf
.zero(remainder
, blocksize
- remainder
);
172 encoded
[chunk_index(k
-padded_chunks
)].push_back(std::move(buf
));
174 for (unsigned int i
= k
- padded_chunks
+ 1; i
< k
; i
++) {
175 bufferptr
buf(buffer::create_aligned(blocksize
, SIMD_ALIGN
));
177 encoded
[chunk_index(i
)].push_back(std::move(buf
));
180 for (unsigned int i
= k
; i
< k
+ m
; i
++) {
181 bufferlist
&chunk
= encoded
[chunk_index(i
)];
182 chunk
.push_back(buffer::create_aligned(blocksize
, SIMD_ALIGN
));
188 int ErasureCode::encode(const set
<int> &want_to_encode
,
189 const bufferlist
&in
,
190 map
<int, bufferlist
> *encoded
)
192 unsigned int k
= get_data_chunk_count();
193 unsigned int m
= get_chunk_count() - k
;
195 int err
= encode_prepare(in
, *encoded
);
198 encode_chunks(want_to_encode
, encoded
);
199 for (unsigned int i
= 0; i
< k
+ m
; i
++) {
200 if (want_to_encode
.count(i
) == 0)
206 int ErasureCode::encode_chunks(const set
<int> &want_to_encode
,
207 map
<int, bufferlist
> *encoded
)
209 ceph_abort_msg("ErasureCode::encode_chunks not implemented");
212 int ErasureCode::_decode(const set
<int> &want_to_read
,
213 const map
<int, bufferlist
> &chunks
,
214 map
<int, bufferlist
> *decoded
)
217 have
.reserve(chunks
.size());
218 for (map
<int, bufferlist
>::const_iterator i
= chunks
.begin();
221 have
.push_back(i
->first
);
224 have
.begin(), have
.end(), want_to_read
.begin(), want_to_read
.end())) {
225 for (set
<int>::iterator i
= want_to_read
.begin();
226 i
!= want_to_read
.end();
228 (*decoded
)[*i
] = chunks
.find(*i
)->second
;
232 unsigned int k
= get_data_chunk_count();
233 unsigned int m
= get_chunk_count() - k
;
234 unsigned blocksize
= (*chunks
.begin()).second
.length();
235 for (unsigned int i
= 0; i
< k
+ m
; i
++) {
236 if (chunks
.find(i
) == chunks
.end()) {
238 bufferptr
ptr(buffer::create_aligned(blocksize
, SIMD_ALIGN
));
240 tmp
.claim_append((*decoded
)[i
]);
241 (*decoded
)[i
].swap(tmp
);
243 (*decoded
)[i
] = chunks
.find(i
)->second
;
244 (*decoded
)[i
].rebuild_aligned(SIMD_ALIGN
);
247 return decode_chunks(want_to_read
, chunks
, decoded
);
250 int ErasureCode::decode(const set
<int> &want_to_read
,
251 const map
<int, bufferlist
> &chunks
,
252 map
<int, bufferlist
> *decoded
, int chunk_size
)
254 return _decode(want_to_read
, chunks
, decoded
);
257 int ErasureCode::decode_chunks(const set
<int> &want_to_read
,
258 const map
<int, bufferlist
> &chunks
,
259 map
<int, bufferlist
> *decoded
)
261 ceph_abort_msg("ErasureCode::decode_chunks not implemented");
264 int ErasureCode::parse(const ErasureCodeProfile
&profile
,
267 return to_mapping(profile
, ss
);
270 const vector
<int> &ErasureCode::get_chunk_mapping() const {
271 return chunk_mapping
;
274 int ErasureCode::to_mapping(const ErasureCodeProfile
&profile
,
277 if (profile
.find("mapping") != profile
.end()) {
278 std::string mapping
= profile
.find("mapping")->second
;
280 vector
<int> coding_chunk_mapping
;
281 for(std::string::iterator it
= mapping
.begin(); it
!= mapping
.end(); ++it
) {
283 chunk_mapping
.push_back(position
);
285 coding_chunk_mapping
.push_back(position
);
288 chunk_mapping
.insert(chunk_mapping
.end(),
289 coding_chunk_mapping
.begin(),
290 coding_chunk_mapping
.end());
295 int ErasureCode::to_int(const std::string
&name
,
296 ErasureCodeProfile
&profile
,
298 const std::string
&default_value
,
301 if (profile
.find(name
) == profile
.end() ||
302 profile
.find(name
)->second
.size() == 0)
303 profile
[name
] = default_value
;
304 std::string p
= profile
.find(name
)->second
;
306 int r
= strict_strtol(p
.c_str(), 10, &err
);
308 *ss
<< "could not convert " << name
<< "=" << p
309 << " to int because " << err
310 << ", set to default " << default_value
<< std::endl
;
311 *value
= strict_strtol(default_value
.c_str(), 10, &err
);
318 int ErasureCode::to_bool(const std::string
&name
,
319 ErasureCodeProfile
&profile
,
321 const std::string
&default_value
,
324 if (profile
.find(name
) == profile
.end() ||
325 profile
.find(name
)->second
.size() == 0)
326 profile
[name
] = default_value
;
327 const std::string p
= profile
.find(name
)->second
;
328 *value
= (p
== "yes") || (p
== "true");
332 int ErasureCode::to_string(const std::string
&name
,
333 ErasureCodeProfile
&profile
,
335 const std::string
&default_value
,
338 if (profile
.find(name
) == profile
.end() ||
339 profile
.find(name
)->second
.size() == 0)
340 profile
[name
] = default_value
;
341 *value
= profile
[name
];
345 int ErasureCode::decode_concat(const map
<int, bufferlist
> &chunks
,
348 set
<int> want_to_read
;
350 for (unsigned int i
= 0; i
< get_data_chunk_count(); i
++) {
351 want_to_read
.insert(chunk_index(i
));
353 map
<int, bufferlist
> decoded_map
;
354 int r
= _decode(want_to_read
, chunks
, &decoded_map
);
356 for (unsigned int i
= 0; i
< get_data_chunk_count(); i
++) {
357 decoded
->claim_append(decoded_map
[chunk_index(i
)]);