1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph distributed storage system
6 * Copyright (C) 2014 Cloudwatt <libre.licensing@cloudwatt.com>
7 * Copyright (C) 2014 Red Hat <contact@redhat.com>
9 * Author: Loic Dachary <loic@dachary.org>
11 * This library is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License as published by the Free Software Foundation; either
14 * version 2.1 of the License, or (at your option) any later version.
21 #include "ErasureCode.h"
23 #include "common/strtol.h"
24 #include "include/buffer.h"
25 #include "crush/CrushWrapper.h"
26 #include "osd/osd_types.h"
30 const unsigned ErasureCode::SIMD_ALIGN
= 32;
32 #define DEFAULT_RULE_ROOT "default"
33 #define DEFAULT_RULE_FAILURE_DOMAIN "host"
35 int ErasureCode::init(
36 ErasureCodeProfile
&profile
,
40 err
|= to_string("crush-root", profile
,
42 DEFAULT_RULE_ROOT
, ss
);
43 err
|= to_string("crush-failure-domain", profile
,
45 DEFAULT_RULE_FAILURE_DOMAIN
, ss
);
46 err
|= to_string("crush-device-class", profile
,
55 int ErasureCode::create_rule(
56 const std::string
&name
,
58 std::ostream
*ss
) const
60 int ruleid
= crush
.add_simple_rule(
66 pg_pool_t::TYPE_ERASURE
,
72 crush
.set_rule_mask_max_size(ruleid
, get_chunk_count());
76 int ErasureCode::sanity_check_k(int k
, ostream
*ss
)
79 *ss
<< "k=" << k
<< " must be >= 2" << std::endl
;
86 int ErasureCode::chunk_index(unsigned int i
) const
88 return chunk_mapping
.size() > i
? chunk_mapping
[i
] : i
;
91 int ErasureCode::minimum_to_decode(const set
<int> &want_to_read
,
92 const set
<int> &available_chunks
,
95 if (includes(available_chunks
.begin(), available_chunks
.end(),
96 want_to_read
.begin(), want_to_read
.end())) {
97 *minimum
= want_to_read
;
99 unsigned int k
= get_data_chunk_count();
100 if (available_chunks
.size() < (unsigned)k
)
102 set
<int>::iterator i
;
104 for (i
= available_chunks
.begin(), j
= 0; j
< (unsigned)k
; ++i
, j
++)
110 int ErasureCode::minimum_to_decode_with_cost(const set
<int> &want_to_read
,
111 const map
<int, int> &available
,
114 set
<int> available_chunks
;
115 for (map
<int, int>::const_iterator i
= available
.begin();
116 i
!= available
.end();
118 available_chunks
.insert(i
->first
);
119 return minimum_to_decode(want_to_read
, available_chunks
, minimum
);
122 int ErasureCode::encode_prepare(const bufferlist
&raw
,
123 map
<int, bufferlist
> &encoded
) const
125 unsigned int k
= get_data_chunk_count();
126 unsigned int m
= get_chunk_count() - k
;
127 unsigned blocksize
= get_chunk_size(raw
.length());
128 unsigned padded_chunks
= k
- raw
.length() / blocksize
;
129 bufferlist prepared
= raw
;
131 for (unsigned int i
= 0; i
< k
- padded_chunks
; i
++) {
132 bufferlist
&chunk
= encoded
[chunk_index(i
)];
133 chunk
.substr_of(prepared
, i
* blocksize
, blocksize
);
134 chunk
.rebuild_aligned_size_and_memory(blocksize
, SIMD_ALIGN
);
135 assert(chunk
.is_contiguous());
138 unsigned remainder
= raw
.length() - (k
- padded_chunks
) * blocksize
;
139 bufferptr
buf(buffer::create_aligned(blocksize
, SIMD_ALIGN
));
141 raw
.copy((k
- padded_chunks
) * blocksize
, remainder
, buf
.c_str());
142 buf
.zero(remainder
, blocksize
- remainder
);
143 encoded
[chunk_index(k
-padded_chunks
)].push_back(std::move(buf
));
145 for (unsigned int i
= k
- padded_chunks
+ 1; i
< k
; i
++) {
146 bufferptr
buf(buffer::create_aligned(blocksize
, SIMD_ALIGN
));
148 encoded
[chunk_index(i
)].push_back(std::move(buf
));
151 for (unsigned int i
= k
; i
< k
+ m
; i
++) {
152 bufferlist
&chunk
= encoded
[chunk_index(i
)];
153 chunk
.push_back(buffer::create_aligned(blocksize
, SIMD_ALIGN
));
159 int ErasureCode::encode(const set
<int> &want_to_encode
,
160 const bufferlist
&in
,
161 map
<int, bufferlist
> *encoded
)
163 unsigned int k
= get_data_chunk_count();
164 unsigned int m
= get_chunk_count() - k
;
166 int err
= encode_prepare(in
, *encoded
);
169 encode_chunks(want_to_encode
, encoded
);
170 for (unsigned int i
= 0; i
< k
+ m
; i
++) {
171 if (want_to_encode
.count(i
) == 0)
177 int ErasureCode::encode_chunks(const set
<int> &want_to_encode
,
178 map
<int, bufferlist
> *encoded
)
180 assert("ErasureCode::encode_chunks not implemented" == 0);
183 int ErasureCode::decode(const set
<int> &want_to_read
,
184 const map
<int, bufferlist
> &chunks
,
185 map
<int, bufferlist
> *decoded
)
188 have
.reserve(chunks
.size());
189 for (map
<int, bufferlist
>::const_iterator i
= chunks
.begin();
192 have
.push_back(i
->first
);
195 have
.begin(), have
.end(), want_to_read
.begin(), want_to_read
.end())) {
196 for (set
<int>::iterator i
= want_to_read
.begin();
197 i
!= want_to_read
.end();
199 (*decoded
)[*i
] = chunks
.find(*i
)->second
;
203 unsigned int k
= get_data_chunk_count();
204 unsigned int m
= get_chunk_count() - k
;
205 unsigned blocksize
= (*chunks
.begin()).second
.length();
206 for (unsigned int i
= 0; i
< k
+ m
; i
++) {
207 if (chunks
.find(i
) == chunks
.end()) {
208 bufferptr
ptr(buffer::create_aligned(blocksize
, SIMD_ALIGN
));
209 (*decoded
)[i
].push_front(ptr
);
211 (*decoded
)[i
] = chunks
.find(i
)->second
;
212 (*decoded
)[i
].rebuild_aligned(SIMD_ALIGN
);
215 return decode_chunks(want_to_read
, chunks
, decoded
);
218 int ErasureCode::decode_chunks(const set
<int> &want_to_read
,
219 const map
<int, bufferlist
> &chunks
,
220 map
<int, bufferlist
> *decoded
)
222 assert("ErasureCode::decode_chunks not implemented" == 0);
225 int ErasureCode::parse(const ErasureCodeProfile
&profile
,
228 return to_mapping(profile
, ss
);
231 const vector
<int> &ErasureCode::get_chunk_mapping() const {
232 return chunk_mapping
;
235 int ErasureCode::to_mapping(const ErasureCodeProfile
&profile
,
238 if (profile
.find("mapping") != profile
.end()) {
239 std::string mapping
= profile
.find("mapping")->second
;
241 vector
<int> coding_chunk_mapping
;
242 for(std::string::iterator it
= mapping
.begin(); it
!= mapping
.end(); ++it
) {
244 chunk_mapping
.push_back(position
);
246 coding_chunk_mapping
.push_back(position
);
249 chunk_mapping
.insert(chunk_mapping
.end(),
250 coding_chunk_mapping
.begin(),
251 coding_chunk_mapping
.end());
256 int ErasureCode::to_int(const std::string
&name
,
257 ErasureCodeProfile
&profile
,
259 const std::string
&default_value
,
262 if (profile
.find(name
) == profile
.end() ||
263 profile
.find(name
)->second
.size() == 0)
264 profile
[name
] = default_value
;
265 std::string p
= profile
.find(name
)->second
;
267 int r
= strict_strtol(p
.c_str(), 10, &err
);
269 *ss
<< "could not convert " << name
<< "=" << p
270 << " to int because " << err
271 << ", set to default " << default_value
<< std::endl
;
272 *value
= strict_strtol(default_value
.c_str(), 10, &err
);
279 int ErasureCode::to_bool(const std::string
&name
,
280 ErasureCodeProfile
&profile
,
282 const std::string
&default_value
,
285 if (profile
.find(name
) == profile
.end() ||
286 profile
.find(name
)->second
.size() == 0)
287 profile
[name
] = default_value
;
288 const std::string p
= profile
.find(name
)->second
;
289 *value
= (p
== "yes") || (p
== "true");
293 int ErasureCode::to_string(const std::string
&name
,
294 ErasureCodeProfile
&profile
,
296 const std::string
&default_value
,
299 if (profile
.find(name
) == profile
.end() ||
300 profile
.find(name
)->second
.size() == 0)
301 profile
[name
] = default_value
;
302 *value
= profile
[name
];
306 int ErasureCode::decode_concat(const map
<int, bufferlist
> &chunks
,
309 set
<int> want_to_read
;
311 for (unsigned int i
= 0; i
< get_data_chunk_count(); i
++) {
312 want_to_read
.insert(chunk_index(i
));
314 map
<int, bufferlist
> decoded_map
;
315 int r
= decode(want_to_read
, chunks
, &decoded_map
);
317 for (unsigned int i
= 0; i
< get_data_chunk_count(); i
++) {
318 decoded
->claim_append(decoded_map
[chunk_index(i
)]);