]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph distributed storage system | |
5 | * | |
6 | * Copyright (C) 2013,2014 Cloudwatt <libre.licensing@cloudwatt.com> | |
7 | * Copyright (C) 2014 Red Hat <contact@redhat.com> | |
8 | * | |
9 | * Author: Loic Dachary <loic@dachary.org> | |
10 | * | |
11 | * This library is free software; you can redistribute it and/or | |
12 | * modify it under the terms of the GNU Lesser General Public | |
13 | * License as published by the Free Software Foundation; either | |
14 | * version 2.1 of the License, or (at your option) any later version. | |
15 | * | |
16 | */ | |
17 | ||
18 | #include "common/debug.h" | |
19 | #include "ErasureCodeJerasure.h" | |
20 | #include "crush/CrushWrapper.h" | |
21 | #include "osd/osd_types.h" | |
31f18b77 FG |
22 | |
23 | using namespace std; | |
24 | ||
7c673cae FG |
25 | extern "C" { |
26 | #include "jerasure.h" | |
27 | #include "reed_sol.h" | |
28 | #include "galois.h" | |
29 | #include "cauchy.h" | |
30 | #include "liberation.h" | |
31 | } | |
32 | ||
33 | #define LARGEST_VECTOR_WORDSIZE 16 | |
34 | ||
35 | #define dout_context g_ceph_context | |
36 | #define dout_subsys ceph_subsys_osd | |
37 | #undef dout_prefix | |
38 | #define dout_prefix _prefix(_dout) | |
39 | ||
40 | static ostream& _prefix(std::ostream* _dout) | |
41 | { | |
42 | return *_dout << "ErasureCodeJerasure: "; | |
43 | } | |
44 | ||
45 | int ErasureCodeJerasure::create_ruleset(const string &name, | |
46 | CrushWrapper &crush, | |
47 | ostream *ss) const | |
48 | { | |
31f18b77 FG |
49 | int ruleid = crush.add_simple_rule( |
50 | name, ruleset_root, ruleset_failure_domain, | |
51 | "indep", pg_pool_t::TYPE_ERASURE, ss); | |
7c673cae FG |
52 | if (ruleid < 0) |
53 | return ruleid; | |
54 | else { | |
55 | crush.set_rule_mask_max_size(ruleid, get_chunk_count()); | |
56 | return crush.get_rule_mask_ruleset(ruleid); | |
57 | } | |
58 | } | |
59 | ||
60 | int ErasureCodeJerasure::init(ErasureCodeProfile& profile, ostream *ss) | |
61 | { | |
62 | int err = 0; | |
63 | dout(10) << "technique=" << technique << dendl; | |
64 | profile["technique"] = technique; | |
65 | err |= to_string("ruleset-root", profile, | |
66 | &ruleset_root, | |
67 | DEFAULT_RULESET_ROOT, ss); | |
68 | err |= to_string("ruleset-failure-domain", profile, | |
69 | &ruleset_failure_domain, | |
70 | DEFAULT_RULESET_FAILURE_DOMAIN, ss); | |
71 | err |= parse(profile, ss); | |
72 | if (err) | |
73 | return err; | |
74 | prepare(); | |
75 | ErasureCode::init(profile, ss); | |
76 | return err; | |
77 | } | |
78 | ||
79 | int ErasureCodeJerasure::parse(ErasureCodeProfile &profile, | |
80 | ostream *ss) | |
81 | { | |
82 | int err = ErasureCode::parse(profile, ss); | |
83 | err |= to_int("k", profile, &k, DEFAULT_K, ss); | |
84 | err |= to_int("m", profile, &m, DEFAULT_M, ss); | |
85 | err |= to_int("w", profile, &w, DEFAULT_W, ss); | |
86 | if (chunk_mapping.size() > 0 && (int)chunk_mapping.size() != k + m) { | |
87 | *ss << "mapping " << profile.find("mapping")->second | |
88 | << " maps " << chunk_mapping.size() << " chunks instead of" | |
89 | << " the expected " << k + m << " and will be ignored" << std::endl; | |
90 | chunk_mapping.clear(); | |
91 | err = -EINVAL; | |
92 | } | |
93 | err |= sanity_check_k(k, ss); | |
94 | return err; | |
95 | } | |
96 | ||
97 | unsigned int ErasureCodeJerasure::get_chunk_size(unsigned int object_size) const | |
98 | { | |
99 | unsigned alignment = get_alignment(); | |
100 | if (per_chunk_alignment) { | |
101 | unsigned chunk_size = object_size / k; | |
102 | if (object_size % k) | |
103 | chunk_size++; | |
104 | dout(20) << "get_chunk_size: chunk_size " << chunk_size | |
105 | << " must be modulo " << alignment << dendl; | |
106 | assert(alignment <= chunk_size); | |
107 | unsigned modulo = chunk_size % alignment; | |
108 | if (modulo) { | |
109 | dout(10) << "get_chunk_size: " << chunk_size | |
110 | << " padded to " << chunk_size + alignment - modulo << dendl; | |
111 | chunk_size += alignment - modulo; | |
112 | } | |
113 | return chunk_size; | |
114 | } else { | |
115 | unsigned tail = object_size % alignment; | |
116 | unsigned padded_length = object_size + ( tail ? ( alignment - tail ) : 0 ); | |
117 | assert(padded_length % k == 0); | |
118 | return padded_length / k; | |
119 | } | |
120 | } | |
121 | ||
122 | int ErasureCodeJerasure::encode_chunks(const set<int> &want_to_encode, | |
123 | map<int, bufferlist> *encoded) | |
124 | { | |
125 | char *chunks[k + m]; | |
126 | for (int i = 0; i < k + m; i++) | |
127 | chunks[i] = (*encoded)[i].c_str(); | |
128 | jerasure_encode(&chunks[0], &chunks[k], (*encoded)[0].length()); | |
129 | return 0; | |
130 | } | |
131 | ||
132 | int ErasureCodeJerasure::decode_chunks(const set<int> &want_to_read, | |
133 | const map<int, bufferlist> &chunks, | |
134 | map<int, bufferlist> *decoded) | |
135 | { | |
136 | unsigned blocksize = (*chunks.begin()).second.length(); | |
137 | int erasures[k + m + 1]; | |
138 | int erasures_count = 0; | |
139 | char *data[k]; | |
140 | char *coding[m]; | |
141 | for (int i = 0; i < k + m; i++) { | |
142 | if (chunks.find(i) == chunks.end()) { | |
143 | erasures[erasures_count] = i; | |
144 | erasures_count++; | |
145 | } | |
146 | if (i < k) | |
147 | data[i] = (*decoded)[i].c_str(); | |
148 | else | |
149 | coding[i - k] = (*decoded)[i].c_str(); | |
150 | } | |
151 | erasures[erasures_count] = -1; | |
152 | ||
153 | assert(erasures_count > 0); | |
154 | return jerasure_decode(erasures, data, coding, blocksize); | |
155 | } | |
156 | ||
157 | bool ErasureCodeJerasure::is_prime(int value) | |
158 | { | |
159 | int prime55[] = { | |
160 | 2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71, | |
161 | 73,79,83,89,97,101,103,107,109,113,127,131,137,139,149, | |
162 | 151,157,163,167,173,179, | |
163 | 181,191,193,197,199,211,223,227,229,233,239,241,251,257 | |
164 | }; | |
165 | int i; | |
166 | for (i = 0; i < 55; i++) | |
167 | if (value == prime55[i]) | |
168 | return true; | |
169 | return false; | |
170 | } | |
171 | ||
172 | // | |
173 | // ErasureCodeJerasureReedSolomonVandermonde | |
174 | // | |
175 | void ErasureCodeJerasureReedSolomonVandermonde::jerasure_encode(char **data, | |
176 | char **coding, | |
177 | int blocksize) | |
178 | { | |
179 | jerasure_matrix_encode(k, m, w, matrix, data, coding, blocksize); | |
180 | } | |
181 | ||
182 | int ErasureCodeJerasureReedSolomonVandermonde::jerasure_decode(int *erasures, | |
183 | char **data, | |
184 | char **coding, | |
185 | int blocksize) | |
186 | { | |
187 | return jerasure_matrix_decode(k, m, w, matrix, 1, | |
188 | erasures, data, coding, blocksize); | |
189 | } | |
190 | ||
191 | unsigned ErasureCodeJerasureReedSolomonVandermonde::get_alignment() const | |
192 | { | |
193 | if (per_chunk_alignment) { | |
194 | return w * LARGEST_VECTOR_WORDSIZE; | |
195 | } else { | |
196 | unsigned alignment = k*w*sizeof(int); | |
197 | if ( ((w*sizeof(int))%LARGEST_VECTOR_WORDSIZE) ) | |
198 | alignment = k*w*LARGEST_VECTOR_WORDSIZE; | |
199 | return alignment; | |
200 | } | |
201 | } | |
202 | ||
203 | int ErasureCodeJerasureReedSolomonVandermonde::parse(ErasureCodeProfile &profile, | |
204 | ostream *ss) | |
205 | { | |
206 | int err = 0; | |
207 | err |= ErasureCodeJerasure::parse(profile, ss); | |
208 | if (w != 8 && w != 16 && w != 32) { | |
209 | *ss << "ReedSolomonVandermonde: w=" << w | |
210 | << " must be one of {8, 16, 32} : revert to " << DEFAULT_W << std::endl; | |
211 | profile["w"] = "8"; | |
212 | err |= to_int("w", profile, &w, DEFAULT_W, ss); | |
213 | err = -EINVAL; | |
214 | } | |
215 | err |= to_bool("jerasure-per-chunk-alignment", profile, | |
216 | &per_chunk_alignment, "false", ss); | |
217 | return err; | |
218 | } | |
219 | ||
220 | void ErasureCodeJerasureReedSolomonVandermonde::prepare() | |
221 | { | |
222 | matrix = reed_sol_vandermonde_coding_matrix(k, m, w); | |
223 | } | |
224 | ||
225 | // | |
226 | // ErasureCodeJerasureReedSolomonRAID6 | |
227 | // | |
228 | void ErasureCodeJerasureReedSolomonRAID6::jerasure_encode(char **data, | |
229 | char **coding, | |
230 | int blocksize) | |
231 | { | |
232 | reed_sol_r6_encode(k, w, data, coding, blocksize); | |
233 | } | |
234 | ||
235 | int ErasureCodeJerasureReedSolomonRAID6::jerasure_decode(int *erasures, | |
236 | char **data, | |
237 | char **coding, | |
238 | int blocksize) | |
239 | { | |
240 | return jerasure_matrix_decode(k, m, w, matrix, 1, erasures, data, coding, blocksize); | |
241 | } | |
242 | ||
243 | unsigned ErasureCodeJerasureReedSolomonRAID6::get_alignment() const | |
244 | { | |
245 | if (per_chunk_alignment) { | |
246 | return w * LARGEST_VECTOR_WORDSIZE; | |
247 | } else { | |
248 | unsigned alignment = k*w*sizeof(int); | |
249 | if ( ((w*sizeof(int))%LARGEST_VECTOR_WORDSIZE) ) | |
250 | alignment = k*w*LARGEST_VECTOR_WORDSIZE; | |
251 | return alignment; | |
252 | } | |
253 | } | |
254 | ||
255 | int ErasureCodeJerasureReedSolomonRAID6::parse(ErasureCodeProfile &profile, | |
256 | ostream *ss) | |
257 | { | |
258 | int err = ErasureCodeJerasure::parse(profile, ss); | |
259 | profile.erase("m"); | |
260 | m = 2; | |
261 | if (w != 8 && w != 16 && w != 32) { | |
262 | *ss << "ReedSolomonRAID6: w=" << w | |
263 | << " must be one of {8, 16, 32} : revert to 8 " << std::endl; | |
264 | profile["w"] = "8"; | |
265 | err |= to_int("w", profile, &w, DEFAULT_W, ss); | |
266 | err = -EINVAL; | |
267 | } | |
268 | return err; | |
269 | } | |
270 | ||
271 | void ErasureCodeJerasureReedSolomonRAID6::prepare() | |
272 | { | |
273 | matrix = reed_sol_r6_coding_matrix(k, w); | |
274 | } | |
275 | ||
276 | // | |
277 | // ErasureCodeJerasureCauchy | |
278 | // | |
279 | void ErasureCodeJerasureCauchy::jerasure_encode(char **data, | |
280 | char **coding, | |
281 | int blocksize) | |
282 | { | |
283 | jerasure_schedule_encode(k, m, w, schedule, | |
284 | data, coding, blocksize, packetsize); | |
285 | } | |
286 | ||
287 | int ErasureCodeJerasureCauchy::jerasure_decode(int *erasures, | |
288 | char **data, | |
289 | char **coding, | |
290 | int blocksize) | |
291 | { | |
292 | return jerasure_schedule_decode_lazy(k, m, w, bitmatrix, | |
293 | erasures, data, coding, blocksize, packetsize, 1); | |
294 | } | |
295 | ||
296 | unsigned ErasureCodeJerasureCauchy::get_alignment() const | |
297 | { | |
298 | if (per_chunk_alignment) { | |
299 | unsigned alignment = w * packetsize; | |
300 | unsigned modulo = alignment % LARGEST_VECTOR_WORDSIZE; | |
301 | if (modulo) | |
302 | alignment += LARGEST_VECTOR_WORDSIZE - modulo; | |
303 | return alignment; | |
304 | } else { | |
305 | unsigned alignment = k*w*packetsize*sizeof(int); | |
306 | if ( ((w*packetsize*sizeof(int))%LARGEST_VECTOR_WORDSIZE) ) | |
307 | alignment = k*w*packetsize*LARGEST_VECTOR_WORDSIZE; | |
308 | return alignment; | |
309 | } | |
310 | } | |
311 | ||
312 | int ErasureCodeJerasureCauchy::parse(ErasureCodeProfile &profile, | |
313 | ostream *ss) | |
314 | { | |
315 | int err = ErasureCodeJerasure::parse(profile, ss); | |
316 | err |= to_int("packetsize", profile, &packetsize, DEFAULT_PACKETSIZE, ss); | |
317 | err |= to_bool("jerasure-per-chunk-alignment", profile, | |
318 | &per_chunk_alignment, "false", ss); | |
319 | return err; | |
320 | } | |
321 | ||
322 | void ErasureCodeJerasureCauchy::prepare_schedule(int *matrix) | |
323 | { | |
324 | bitmatrix = jerasure_matrix_to_bitmatrix(k, m, w, matrix); | |
325 | schedule = jerasure_smart_bitmatrix_to_schedule(k, m, w, bitmatrix); | |
326 | } | |
327 | ||
328 | // | |
329 | // ErasureCodeJerasureCauchyOrig | |
330 | // | |
331 | void ErasureCodeJerasureCauchyOrig::prepare() | |
332 | { | |
333 | int *matrix = cauchy_original_coding_matrix(k, m, w); | |
334 | prepare_schedule(matrix); | |
335 | free(matrix); | |
336 | } | |
337 | ||
338 | // | |
339 | // ErasureCodeJerasureCauchyGood | |
340 | // | |
341 | void ErasureCodeJerasureCauchyGood::prepare() | |
342 | { | |
343 | int *matrix = cauchy_good_general_coding_matrix(k, m, w); | |
344 | prepare_schedule(matrix); | |
345 | free(matrix); | |
346 | } | |
347 | ||
348 | // | |
349 | // ErasureCodeJerasureLiberation | |
350 | // | |
351 | ErasureCodeJerasureLiberation::~ErasureCodeJerasureLiberation() | |
352 | { | |
353 | if (bitmatrix) | |
354 | free(bitmatrix); | |
355 | if (schedule) | |
356 | jerasure_free_schedule(schedule); | |
357 | } | |
358 | ||
359 | void ErasureCodeJerasureLiberation::jerasure_encode(char **data, | |
360 | char **coding, | |
361 | int blocksize) | |
362 | { | |
363 | jerasure_schedule_encode(k, m, w, schedule, data, | |
364 | coding, blocksize, packetsize); | |
365 | } | |
366 | ||
367 | int ErasureCodeJerasureLiberation::jerasure_decode(int *erasures, | |
368 | char **data, | |
369 | char **coding, | |
370 | int blocksize) | |
371 | { | |
372 | return jerasure_schedule_decode_lazy(k, m, w, bitmatrix, erasures, data, | |
373 | coding, blocksize, packetsize, 1); | |
374 | } | |
375 | ||
376 | unsigned ErasureCodeJerasureLiberation::get_alignment() const | |
377 | { | |
378 | unsigned alignment = k*w*packetsize*sizeof(int); | |
379 | if ( ((w*packetsize*sizeof(int))%LARGEST_VECTOR_WORDSIZE) ) | |
380 | alignment = k*w*packetsize*LARGEST_VECTOR_WORDSIZE; | |
381 | return alignment; | |
382 | } | |
383 | ||
384 | bool ErasureCodeJerasureLiberation::check_k(ostream *ss) const | |
385 | { | |
386 | if (k > w) { | |
387 | *ss << "k=" << k << " must be less than or equal to w=" << w << std::endl; | |
388 | return false; | |
389 | } else { | |
390 | return true; | |
391 | } | |
392 | } | |
393 | ||
394 | bool ErasureCodeJerasureLiberation::check_w(ostream *ss) const | |
395 | { | |
396 | if (w <= 2 || !is_prime(w)) { | |
397 | *ss << "w=" << w << " must be greater than two and be prime" << std::endl; | |
398 | return false; | |
399 | } else { | |
400 | return true; | |
401 | } | |
402 | } | |
403 | ||
404 | bool ErasureCodeJerasureLiberation::check_packetsize_set(ostream *ss) const | |
405 | { | |
406 | if (packetsize == 0) { | |
407 | *ss << "packetsize=" << packetsize << " must be set" << std::endl; | |
408 | return false; | |
409 | } else { | |
410 | return true; | |
411 | } | |
412 | } | |
413 | ||
414 | bool ErasureCodeJerasureLiberation::check_packetsize(ostream *ss) const | |
415 | { | |
416 | if ((packetsize%(sizeof(int))) != 0) { | |
417 | *ss << "packetsize=" << packetsize | |
418 | << " must be a multiple of sizeof(int) = " << sizeof(int) << std::endl; | |
419 | return false; | |
420 | } else { | |
421 | return true; | |
422 | } | |
423 | } | |
424 | ||
425 | int ErasureCodeJerasureLiberation::revert_to_default(ErasureCodeProfile &profile, | |
426 | ostream *ss) | |
427 | { | |
428 | int err = 0; | |
429 | *ss << "reverting to k=" << DEFAULT_K << ", w=" | |
430 | << DEFAULT_W << ", packetsize=" << DEFAULT_PACKETSIZE << std::endl; | |
431 | profile["k"] = DEFAULT_K; | |
432 | err |= to_int("k", profile, &k, DEFAULT_K, ss); | |
433 | profile["w"] = DEFAULT_W; | |
434 | err |= to_int("w", profile, &w, DEFAULT_W, ss); | |
435 | profile["packetsize"] = DEFAULT_PACKETSIZE; | |
436 | err |= to_int("packetsize", profile, &packetsize, DEFAULT_PACKETSIZE, ss); | |
437 | return err; | |
438 | } | |
439 | ||
440 | int ErasureCodeJerasureLiberation::parse(ErasureCodeProfile &profile, | |
441 | ostream *ss) | |
442 | { | |
443 | int err = ErasureCodeJerasure::parse(profile, ss); | |
444 | err |= to_int("packetsize", profile, &packetsize, DEFAULT_PACKETSIZE, ss); | |
445 | ||
446 | bool error = false; | |
447 | if (!check_k(ss)) | |
448 | error = true; | |
449 | if (!check_w(ss)) | |
450 | error = true; | |
451 | if (!check_packetsize_set(ss) || !check_packetsize(ss)) | |
452 | error = true; | |
453 | if (error) { | |
454 | revert_to_default(profile, ss); | |
455 | err = -EINVAL; | |
456 | } | |
457 | return err; | |
458 | } | |
459 | ||
460 | void ErasureCodeJerasureLiberation::prepare() | |
461 | { | |
462 | bitmatrix = liberation_coding_bitmatrix(k, w); | |
463 | schedule = jerasure_smart_bitmatrix_to_schedule(k, m, w, bitmatrix); | |
464 | } | |
465 | ||
466 | // | |
467 | // ErasureCodeJerasureBlaumRoth | |
468 | // | |
469 | bool ErasureCodeJerasureBlaumRoth::check_w(ostream *ss) const | |
470 | { | |
471 | // back in Firefly, w = 7 was the default and produced useable | |
472 | // chunks. Tolerate this value for backward compatibility. | |
473 | if (w == 7) | |
474 | return true; | |
475 | if (w <= 2 || !is_prime(w+1)) { | |
476 | *ss << "w=" << w << " must be greater than two and " | |
477 | << "w+1 must be prime" << std::endl; | |
478 | return false; | |
479 | } else { | |
480 | return true; | |
481 | } | |
482 | } | |
483 | ||
484 | void ErasureCodeJerasureBlaumRoth::prepare() | |
485 | { | |
486 | bitmatrix = blaum_roth_coding_bitmatrix(k, w); | |
487 | schedule = jerasure_smart_bitmatrix_to_schedule(k, m, w, bitmatrix); | |
488 | } | |
489 | ||
490 | // | |
491 | // ErasureCodeJerasureLiber8tion | |
492 | // | |
493 | int ErasureCodeJerasureLiber8tion::parse(ErasureCodeProfile &profile, | |
494 | ostream *ss) | |
495 | { | |
496 | int err = ErasureCodeJerasure::parse(profile, ss); | |
497 | profile.erase("m"); | |
498 | err |= to_int("m", profile, &m, DEFAULT_M, ss); | |
499 | profile.erase("w"); | |
500 | err |= to_int("w", profile, &w, DEFAULT_W, ss); | |
501 | err |= to_int("packetsize", profile, &packetsize, DEFAULT_PACKETSIZE, ss); | |
502 | ||
503 | bool error = false; | |
504 | if (!check_k(ss)) | |
505 | error = true; | |
506 | if (!check_packetsize_set(ss)) | |
507 | error = true; | |
508 | if (error) { | |
509 | revert_to_default(profile, ss); | |
510 | err = -EINVAL; | |
511 | } | |
512 | return err; | |
513 | } | |
514 | ||
515 | void ErasureCodeJerasureLiber8tion::prepare() | |
516 | { | |
517 | bitmatrix = liber8tion_coding_bitmatrix(k); | |
518 | schedule = jerasure_smart_bitmatrix_to_schedule(k, m, w, bitmatrix); | |
519 | } |