1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph distributed storage system
6 * Copyright (C) 2013,2014 Cloudwatt <libre.licensing@cloudwatt.com>
7 * Copyright (C) 2014 Red Hat <contact@redhat.com>
9 * Author: Loic Dachary <loic@dachary.org>
11 * This library is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License as published by the Free Software Foundation; either
14 * version 2.1 of the License, or (at your option) any later version.
18 #include "common/debug.h"
19 #include "ErasureCodeJerasure.h"
20 #include "crush/CrushWrapper.h"
21 #include "osd/osd_types.h"
27 #include "liberation.h"
30 #define LARGEST_VECTOR_WORDSIZE 16
32 #define dout_context g_ceph_context
33 #define dout_subsys ceph_subsys_osd
35 #define dout_prefix _prefix(_dout)
37 static ostream
& _prefix(std::ostream
* _dout
)
39 return *_dout
<< "ErasureCodeJerasure: ";
42 int ErasureCodeJerasure::create_ruleset(const string
&name
,
46 int ruleid
= crush
.add_simple_ruleset(name
, ruleset_root
, ruleset_failure_domain
,
47 "indep", pg_pool_t::TYPE_ERASURE
, ss
);
51 crush
.set_rule_mask_max_size(ruleid
, get_chunk_count());
52 return crush
.get_rule_mask_ruleset(ruleid
);
56 int ErasureCodeJerasure::init(ErasureCodeProfile
& profile
, ostream
*ss
)
59 dout(10) << "technique=" << technique
<< dendl
;
60 profile
["technique"] = technique
;
61 err
|= to_string("ruleset-root", profile
,
63 DEFAULT_RULESET_ROOT
, ss
);
64 err
|= to_string("ruleset-failure-domain", profile
,
65 &ruleset_failure_domain
,
66 DEFAULT_RULESET_FAILURE_DOMAIN
, ss
);
67 err
|= parse(profile
, ss
);
71 ErasureCode::init(profile
, ss
);
75 int ErasureCodeJerasure::parse(ErasureCodeProfile
&profile
,
78 int err
= ErasureCode::parse(profile
, ss
);
79 err
|= to_int("k", profile
, &k
, DEFAULT_K
, ss
);
80 err
|= to_int("m", profile
, &m
, DEFAULT_M
, ss
);
81 err
|= to_int("w", profile
, &w
, DEFAULT_W
, ss
);
82 if (chunk_mapping
.size() > 0 && (int)chunk_mapping
.size() != k
+ m
) {
83 *ss
<< "mapping " << profile
.find("mapping")->second
84 << " maps " << chunk_mapping
.size() << " chunks instead of"
85 << " the expected " << k
+ m
<< " and will be ignored" << std::endl
;
86 chunk_mapping
.clear();
89 err
|= sanity_check_k(k
, ss
);
93 unsigned int ErasureCodeJerasure::get_chunk_size(unsigned int object_size
) const
95 unsigned alignment
= get_alignment();
96 if (per_chunk_alignment
) {
97 unsigned chunk_size
= object_size
/ k
;
100 dout(20) << "get_chunk_size: chunk_size " << chunk_size
101 << " must be modulo " << alignment
<< dendl
;
102 assert(alignment
<= chunk_size
);
103 unsigned modulo
= chunk_size
% alignment
;
105 dout(10) << "get_chunk_size: " << chunk_size
106 << " padded to " << chunk_size
+ alignment
- modulo
<< dendl
;
107 chunk_size
+= alignment
- modulo
;
111 unsigned tail
= object_size
% alignment
;
112 unsigned padded_length
= object_size
+ ( tail
? ( alignment
- tail
) : 0 );
113 assert(padded_length
% k
== 0);
114 return padded_length
/ k
;
118 int ErasureCodeJerasure::encode_chunks(const set
<int> &want_to_encode
,
119 map
<int, bufferlist
> *encoded
)
122 for (int i
= 0; i
< k
+ m
; i
++)
123 chunks
[i
] = (*encoded
)[i
].c_str();
124 jerasure_encode(&chunks
[0], &chunks
[k
], (*encoded
)[0].length());
128 int ErasureCodeJerasure::decode_chunks(const set
<int> &want_to_read
,
129 const map
<int, bufferlist
> &chunks
,
130 map
<int, bufferlist
> *decoded
)
132 unsigned blocksize
= (*chunks
.begin()).second
.length();
133 int erasures
[k
+ m
+ 1];
134 int erasures_count
= 0;
137 for (int i
= 0; i
< k
+ m
; i
++) {
138 if (chunks
.find(i
) == chunks
.end()) {
139 erasures
[erasures_count
] = i
;
143 data
[i
] = (*decoded
)[i
].c_str();
145 coding
[i
- k
] = (*decoded
)[i
].c_str();
147 erasures
[erasures_count
] = -1;
149 assert(erasures_count
> 0);
150 return jerasure_decode(erasures
, data
, coding
, blocksize
);
153 bool ErasureCodeJerasure::is_prime(int value
)
156 2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,
157 73,79,83,89,97,101,103,107,109,113,127,131,137,139,149,
158 151,157,163,167,173,179,
159 181,191,193,197,199,211,223,227,229,233,239,241,251,257
162 for (i
= 0; i
< 55; i
++)
163 if (value
== prime55
[i
])
169 // ErasureCodeJerasureReedSolomonVandermonde
171 void ErasureCodeJerasureReedSolomonVandermonde::jerasure_encode(char **data
,
175 jerasure_matrix_encode(k
, m
, w
, matrix
, data
, coding
, blocksize
);
178 int ErasureCodeJerasureReedSolomonVandermonde::jerasure_decode(int *erasures
,
183 return jerasure_matrix_decode(k
, m
, w
, matrix
, 1,
184 erasures
, data
, coding
, blocksize
);
187 unsigned ErasureCodeJerasureReedSolomonVandermonde::get_alignment() const
189 if (per_chunk_alignment
) {
190 return w
* LARGEST_VECTOR_WORDSIZE
;
192 unsigned alignment
= k
*w
*sizeof(int);
193 if ( ((w
*sizeof(int))%LARGEST_VECTOR_WORDSIZE
) )
194 alignment
= k
*w
*LARGEST_VECTOR_WORDSIZE
;
199 int ErasureCodeJerasureReedSolomonVandermonde::parse(ErasureCodeProfile
&profile
,
203 err
|= ErasureCodeJerasure::parse(profile
, ss
);
204 if (w
!= 8 && w
!= 16 && w
!= 32) {
205 *ss
<< "ReedSolomonVandermonde: w=" << w
206 << " must be one of {8, 16, 32} : revert to " << DEFAULT_W
<< std::endl
;
208 err
|= to_int("w", profile
, &w
, DEFAULT_W
, ss
);
211 err
|= to_bool("jerasure-per-chunk-alignment", profile
,
212 &per_chunk_alignment
, "false", ss
);
216 void ErasureCodeJerasureReedSolomonVandermonde::prepare()
218 matrix
= reed_sol_vandermonde_coding_matrix(k
, m
, w
);
222 // ErasureCodeJerasureReedSolomonRAID6
224 void ErasureCodeJerasureReedSolomonRAID6::jerasure_encode(char **data
,
228 reed_sol_r6_encode(k
, w
, data
, coding
, blocksize
);
231 int ErasureCodeJerasureReedSolomonRAID6::jerasure_decode(int *erasures
,
236 return jerasure_matrix_decode(k
, m
, w
, matrix
, 1, erasures
, data
, coding
, blocksize
);
239 unsigned ErasureCodeJerasureReedSolomonRAID6::get_alignment() const
241 if (per_chunk_alignment
) {
242 return w
* LARGEST_VECTOR_WORDSIZE
;
244 unsigned alignment
= k
*w
*sizeof(int);
245 if ( ((w
*sizeof(int))%LARGEST_VECTOR_WORDSIZE
) )
246 alignment
= k
*w
*LARGEST_VECTOR_WORDSIZE
;
251 int ErasureCodeJerasureReedSolomonRAID6::parse(ErasureCodeProfile
&profile
,
254 int err
= ErasureCodeJerasure::parse(profile
, ss
);
257 if (w
!= 8 && w
!= 16 && w
!= 32) {
258 *ss
<< "ReedSolomonRAID6: w=" << w
259 << " must be one of {8, 16, 32} : revert to 8 " << std::endl
;
261 err
|= to_int("w", profile
, &w
, DEFAULT_W
, ss
);
267 void ErasureCodeJerasureReedSolomonRAID6::prepare()
269 matrix
= reed_sol_r6_coding_matrix(k
, w
);
273 // ErasureCodeJerasureCauchy
275 void ErasureCodeJerasureCauchy::jerasure_encode(char **data
,
279 jerasure_schedule_encode(k
, m
, w
, schedule
,
280 data
, coding
, blocksize
, packetsize
);
283 int ErasureCodeJerasureCauchy::jerasure_decode(int *erasures
,
288 return jerasure_schedule_decode_lazy(k
, m
, w
, bitmatrix
,
289 erasures
, data
, coding
, blocksize
, packetsize
, 1);
292 unsigned ErasureCodeJerasureCauchy::get_alignment() const
294 if (per_chunk_alignment
) {
295 unsigned alignment
= w
* packetsize
;
296 unsigned modulo
= alignment
% LARGEST_VECTOR_WORDSIZE
;
298 alignment
+= LARGEST_VECTOR_WORDSIZE
- modulo
;
301 unsigned alignment
= k
*w
*packetsize
*sizeof(int);
302 if ( ((w
*packetsize
*sizeof(int))%LARGEST_VECTOR_WORDSIZE
) )
303 alignment
= k
*w
*packetsize
*LARGEST_VECTOR_WORDSIZE
;
308 int ErasureCodeJerasureCauchy::parse(ErasureCodeProfile
&profile
,
311 int err
= ErasureCodeJerasure::parse(profile
, ss
);
312 err
|= to_int("packetsize", profile
, &packetsize
, DEFAULT_PACKETSIZE
, ss
);
313 err
|= to_bool("jerasure-per-chunk-alignment", profile
,
314 &per_chunk_alignment
, "false", ss
);
318 void ErasureCodeJerasureCauchy::prepare_schedule(int *matrix
)
320 bitmatrix
= jerasure_matrix_to_bitmatrix(k
, m
, w
, matrix
);
321 schedule
= jerasure_smart_bitmatrix_to_schedule(k
, m
, w
, bitmatrix
);
325 // ErasureCodeJerasureCauchyOrig
327 void ErasureCodeJerasureCauchyOrig::prepare()
329 int *matrix
= cauchy_original_coding_matrix(k
, m
, w
);
330 prepare_schedule(matrix
);
335 // ErasureCodeJerasureCauchyGood
337 void ErasureCodeJerasureCauchyGood::prepare()
339 int *matrix
= cauchy_good_general_coding_matrix(k
, m
, w
);
340 prepare_schedule(matrix
);
345 // ErasureCodeJerasureLiberation
347 ErasureCodeJerasureLiberation::~ErasureCodeJerasureLiberation()
352 jerasure_free_schedule(schedule
);
355 void ErasureCodeJerasureLiberation::jerasure_encode(char **data
,
359 jerasure_schedule_encode(k
, m
, w
, schedule
, data
,
360 coding
, blocksize
, packetsize
);
363 int ErasureCodeJerasureLiberation::jerasure_decode(int *erasures
,
368 return jerasure_schedule_decode_lazy(k
, m
, w
, bitmatrix
, erasures
, data
,
369 coding
, blocksize
, packetsize
, 1);
372 unsigned ErasureCodeJerasureLiberation::get_alignment() const
374 unsigned alignment
= k
*w
*packetsize
*sizeof(int);
375 if ( ((w
*packetsize
*sizeof(int))%LARGEST_VECTOR_WORDSIZE
) )
376 alignment
= k
*w
*packetsize
*LARGEST_VECTOR_WORDSIZE
;
380 bool ErasureCodeJerasureLiberation::check_k(ostream
*ss
) const
383 *ss
<< "k=" << k
<< " must be less than or equal to w=" << w
<< std::endl
;
390 bool ErasureCodeJerasureLiberation::check_w(ostream
*ss
) const
392 if (w
<= 2 || !is_prime(w
)) {
393 *ss
<< "w=" << w
<< " must be greater than two and be prime" << std::endl
;
400 bool ErasureCodeJerasureLiberation::check_packetsize_set(ostream
*ss
) const
402 if (packetsize
== 0) {
403 *ss
<< "packetsize=" << packetsize
<< " must be set" << std::endl
;
410 bool ErasureCodeJerasureLiberation::check_packetsize(ostream
*ss
) const
412 if ((packetsize
%(sizeof(int))) != 0) {
413 *ss
<< "packetsize=" << packetsize
414 << " must be a multiple of sizeof(int) = " << sizeof(int) << std::endl
;
421 int ErasureCodeJerasureLiberation::revert_to_default(ErasureCodeProfile
&profile
,
425 *ss
<< "reverting to k=" << DEFAULT_K
<< ", w="
426 << DEFAULT_W
<< ", packetsize=" << DEFAULT_PACKETSIZE
<< std::endl
;
427 profile
["k"] = DEFAULT_K
;
428 err
|= to_int("k", profile
, &k
, DEFAULT_K
, ss
);
429 profile
["w"] = DEFAULT_W
;
430 err
|= to_int("w", profile
, &w
, DEFAULT_W
, ss
);
431 profile
["packetsize"] = DEFAULT_PACKETSIZE
;
432 err
|= to_int("packetsize", profile
, &packetsize
, DEFAULT_PACKETSIZE
, ss
);
436 int ErasureCodeJerasureLiberation::parse(ErasureCodeProfile
&profile
,
439 int err
= ErasureCodeJerasure::parse(profile
, ss
);
440 err
|= to_int("packetsize", profile
, &packetsize
, DEFAULT_PACKETSIZE
, ss
);
447 if (!check_packetsize_set(ss
) || !check_packetsize(ss
))
450 revert_to_default(profile
, ss
);
456 void ErasureCodeJerasureLiberation::prepare()
458 bitmatrix
= liberation_coding_bitmatrix(k
, w
);
459 schedule
= jerasure_smart_bitmatrix_to_schedule(k
, m
, w
, bitmatrix
);
463 // ErasureCodeJerasureBlaumRoth
465 bool ErasureCodeJerasureBlaumRoth::check_w(ostream
*ss
) const
467 // back in Firefly, w = 7 was the default and produced useable
468 // chunks. Tolerate this value for backward compatibility.
471 if (w
<= 2 || !is_prime(w
+1)) {
472 *ss
<< "w=" << w
<< " must be greater than two and "
473 << "w+1 must be prime" << std::endl
;
480 void ErasureCodeJerasureBlaumRoth::prepare()
482 bitmatrix
= blaum_roth_coding_bitmatrix(k
, w
);
483 schedule
= jerasure_smart_bitmatrix_to_schedule(k
, m
, w
, bitmatrix
);
487 // ErasureCodeJerasureLiber8tion
489 int ErasureCodeJerasureLiber8tion::parse(ErasureCodeProfile
&profile
,
492 int err
= ErasureCodeJerasure::parse(profile
, ss
);
494 err
|= to_int("m", profile
, &m
, DEFAULT_M
, ss
);
496 err
|= to_int("w", profile
, &w
, DEFAULT_W
, ss
);
497 err
|= to_int("packetsize", profile
, &packetsize
, DEFAULT_PACKETSIZE
, ss
);
502 if (!check_packetsize_set(ss
))
505 revert_to_default(profile
, ss
);
511 void ErasureCodeJerasureLiber8tion::prepare()
513 bitmatrix
= liber8tion_coding_bitmatrix(k
);
514 schedule
= jerasure_smart_bitmatrix_to_schedule(k
, m
, w
, bitmatrix
);