1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph distributed storage system
6 * Copyright (C) 2013,2014 Cloudwatt <libre.licensing@cloudwatt.com>
7 * Copyright (C) 2014 Red Hat <contact@redhat.com>
9 * Author: Loic Dachary <loic@dachary.org>
11 * This library is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License as published by the Free Software Foundation; either
14 * version 2.1 of the License, or (at your option) any later version.
18 #include "common/debug.h"
19 #include "ErasureCodeJerasure.h"
20 #include "crush/CrushWrapper.h"
21 #include "osd/osd_types.h"
30 #include "liberation.h"
33 #define LARGEST_VECTOR_WORDSIZE 16
35 #define dout_context g_ceph_context
36 #define dout_subsys ceph_subsys_osd
38 #define dout_prefix _prefix(_dout)
40 static ostream
& _prefix(std::ostream
* _dout
)
42 return *_dout
<< "ErasureCodeJerasure: ";
45 int ErasureCodeJerasure::create_ruleset(const string
&name
,
49 int ruleid
= crush
.add_simple_rule(
50 name
, ruleset_root
, ruleset_failure_domain
,
51 "indep", pg_pool_t::TYPE_ERASURE
, ss
);
55 crush
.set_rule_mask_max_size(ruleid
, get_chunk_count());
56 return crush
.get_rule_mask_ruleset(ruleid
);
60 int ErasureCodeJerasure::init(ErasureCodeProfile
& profile
, ostream
*ss
)
63 dout(10) << "technique=" << technique
<< dendl
;
64 profile
["technique"] = technique
;
65 err
|= to_string("ruleset-root", profile
,
67 DEFAULT_RULESET_ROOT
, ss
);
68 err
|= to_string("ruleset-failure-domain", profile
,
69 &ruleset_failure_domain
,
70 DEFAULT_RULESET_FAILURE_DOMAIN
, ss
);
71 err
|= parse(profile
, ss
);
75 ErasureCode::init(profile
, ss
);
79 int ErasureCodeJerasure::parse(ErasureCodeProfile
&profile
,
82 int err
= ErasureCode::parse(profile
, ss
);
83 err
|= to_int("k", profile
, &k
, DEFAULT_K
, ss
);
84 err
|= to_int("m", profile
, &m
, DEFAULT_M
, ss
);
85 err
|= to_int("w", profile
, &w
, DEFAULT_W
, ss
);
86 if (chunk_mapping
.size() > 0 && (int)chunk_mapping
.size() != k
+ m
) {
87 *ss
<< "mapping " << profile
.find("mapping")->second
88 << " maps " << chunk_mapping
.size() << " chunks instead of"
89 << " the expected " << k
+ m
<< " and will be ignored" << std::endl
;
90 chunk_mapping
.clear();
93 err
|= sanity_check_k(k
, ss
);
97 unsigned int ErasureCodeJerasure::get_chunk_size(unsigned int object_size
) const
99 unsigned alignment
= get_alignment();
100 if (per_chunk_alignment
) {
101 unsigned chunk_size
= object_size
/ k
;
104 dout(20) << "get_chunk_size: chunk_size " << chunk_size
105 << " must be modulo " << alignment
<< dendl
;
106 assert(alignment
<= chunk_size
);
107 unsigned modulo
= chunk_size
% alignment
;
109 dout(10) << "get_chunk_size: " << chunk_size
110 << " padded to " << chunk_size
+ alignment
- modulo
<< dendl
;
111 chunk_size
+= alignment
- modulo
;
115 unsigned tail
= object_size
% alignment
;
116 unsigned padded_length
= object_size
+ ( tail
? ( alignment
- tail
) : 0 );
117 assert(padded_length
% k
== 0);
118 return padded_length
/ k
;
122 int ErasureCodeJerasure::encode_chunks(const set
<int> &want_to_encode
,
123 map
<int, bufferlist
> *encoded
)
126 for (int i
= 0; i
< k
+ m
; i
++)
127 chunks
[i
] = (*encoded
)[i
].c_str();
128 jerasure_encode(&chunks
[0], &chunks
[k
], (*encoded
)[0].length());
132 int ErasureCodeJerasure::decode_chunks(const set
<int> &want_to_read
,
133 const map
<int, bufferlist
> &chunks
,
134 map
<int, bufferlist
> *decoded
)
136 unsigned blocksize
= (*chunks
.begin()).second
.length();
137 int erasures
[k
+ m
+ 1];
138 int erasures_count
= 0;
141 for (int i
= 0; i
< k
+ m
; i
++) {
142 if (chunks
.find(i
) == chunks
.end()) {
143 erasures
[erasures_count
] = i
;
147 data
[i
] = (*decoded
)[i
].c_str();
149 coding
[i
- k
] = (*decoded
)[i
].c_str();
151 erasures
[erasures_count
] = -1;
153 assert(erasures_count
> 0);
154 return jerasure_decode(erasures
, data
, coding
, blocksize
);
157 bool ErasureCodeJerasure::is_prime(int value
)
160 2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,
161 73,79,83,89,97,101,103,107,109,113,127,131,137,139,149,
162 151,157,163,167,173,179,
163 181,191,193,197,199,211,223,227,229,233,239,241,251,257
166 for (i
= 0; i
< 55; i
++)
167 if (value
== prime55
[i
])
173 // ErasureCodeJerasureReedSolomonVandermonde
175 void ErasureCodeJerasureReedSolomonVandermonde::jerasure_encode(char **data
,
179 jerasure_matrix_encode(k
, m
, w
, matrix
, data
, coding
, blocksize
);
182 int ErasureCodeJerasureReedSolomonVandermonde::jerasure_decode(int *erasures
,
187 return jerasure_matrix_decode(k
, m
, w
, matrix
, 1,
188 erasures
, data
, coding
, blocksize
);
191 unsigned ErasureCodeJerasureReedSolomonVandermonde::get_alignment() const
193 if (per_chunk_alignment
) {
194 return w
* LARGEST_VECTOR_WORDSIZE
;
196 unsigned alignment
= k
*w
*sizeof(int);
197 if ( ((w
*sizeof(int))%LARGEST_VECTOR_WORDSIZE
) )
198 alignment
= k
*w
*LARGEST_VECTOR_WORDSIZE
;
203 int ErasureCodeJerasureReedSolomonVandermonde::parse(ErasureCodeProfile
&profile
,
207 err
|= ErasureCodeJerasure::parse(profile
, ss
);
208 if (w
!= 8 && w
!= 16 && w
!= 32) {
209 *ss
<< "ReedSolomonVandermonde: w=" << w
210 << " must be one of {8, 16, 32} : revert to " << DEFAULT_W
<< std::endl
;
212 err
|= to_int("w", profile
, &w
, DEFAULT_W
, ss
);
215 err
|= to_bool("jerasure-per-chunk-alignment", profile
,
216 &per_chunk_alignment
, "false", ss
);
220 void ErasureCodeJerasureReedSolomonVandermonde::prepare()
222 matrix
= reed_sol_vandermonde_coding_matrix(k
, m
, w
);
226 // ErasureCodeJerasureReedSolomonRAID6
228 void ErasureCodeJerasureReedSolomonRAID6::jerasure_encode(char **data
,
232 reed_sol_r6_encode(k
, w
, data
, coding
, blocksize
);
235 int ErasureCodeJerasureReedSolomonRAID6::jerasure_decode(int *erasures
,
240 return jerasure_matrix_decode(k
, m
, w
, matrix
, 1, erasures
, data
, coding
, blocksize
);
243 unsigned ErasureCodeJerasureReedSolomonRAID6::get_alignment() const
245 if (per_chunk_alignment
) {
246 return w
* LARGEST_VECTOR_WORDSIZE
;
248 unsigned alignment
= k
*w
*sizeof(int);
249 if ( ((w
*sizeof(int))%LARGEST_VECTOR_WORDSIZE
) )
250 alignment
= k
*w
*LARGEST_VECTOR_WORDSIZE
;
255 int ErasureCodeJerasureReedSolomonRAID6::parse(ErasureCodeProfile
&profile
,
258 int err
= ErasureCodeJerasure::parse(profile
, ss
);
261 if (w
!= 8 && w
!= 16 && w
!= 32) {
262 *ss
<< "ReedSolomonRAID6: w=" << w
263 << " must be one of {8, 16, 32} : revert to 8 " << std::endl
;
265 err
|= to_int("w", profile
, &w
, DEFAULT_W
, ss
);
271 void ErasureCodeJerasureReedSolomonRAID6::prepare()
273 matrix
= reed_sol_r6_coding_matrix(k
, w
);
277 // ErasureCodeJerasureCauchy
279 void ErasureCodeJerasureCauchy::jerasure_encode(char **data
,
283 jerasure_schedule_encode(k
, m
, w
, schedule
,
284 data
, coding
, blocksize
, packetsize
);
287 int ErasureCodeJerasureCauchy::jerasure_decode(int *erasures
,
292 return jerasure_schedule_decode_lazy(k
, m
, w
, bitmatrix
,
293 erasures
, data
, coding
, blocksize
, packetsize
, 1);
296 unsigned ErasureCodeJerasureCauchy::get_alignment() const
298 if (per_chunk_alignment
) {
299 unsigned alignment
= w
* packetsize
;
300 unsigned modulo
= alignment
% LARGEST_VECTOR_WORDSIZE
;
302 alignment
+= LARGEST_VECTOR_WORDSIZE
- modulo
;
305 unsigned alignment
= k
*w
*packetsize
*sizeof(int);
306 if ( ((w
*packetsize
*sizeof(int))%LARGEST_VECTOR_WORDSIZE
) )
307 alignment
= k
*w
*packetsize
*LARGEST_VECTOR_WORDSIZE
;
312 int ErasureCodeJerasureCauchy::parse(ErasureCodeProfile
&profile
,
315 int err
= ErasureCodeJerasure::parse(profile
, ss
);
316 err
|= to_int("packetsize", profile
, &packetsize
, DEFAULT_PACKETSIZE
, ss
);
317 err
|= to_bool("jerasure-per-chunk-alignment", profile
,
318 &per_chunk_alignment
, "false", ss
);
322 void ErasureCodeJerasureCauchy::prepare_schedule(int *matrix
)
324 bitmatrix
= jerasure_matrix_to_bitmatrix(k
, m
, w
, matrix
);
325 schedule
= jerasure_smart_bitmatrix_to_schedule(k
, m
, w
, bitmatrix
);
329 // ErasureCodeJerasureCauchyOrig
331 void ErasureCodeJerasureCauchyOrig::prepare()
333 int *matrix
= cauchy_original_coding_matrix(k
, m
, w
);
334 prepare_schedule(matrix
);
339 // ErasureCodeJerasureCauchyGood
341 void ErasureCodeJerasureCauchyGood::prepare()
343 int *matrix
= cauchy_good_general_coding_matrix(k
, m
, w
);
344 prepare_schedule(matrix
);
349 // ErasureCodeJerasureLiberation
351 ErasureCodeJerasureLiberation::~ErasureCodeJerasureLiberation()
356 jerasure_free_schedule(schedule
);
359 void ErasureCodeJerasureLiberation::jerasure_encode(char **data
,
363 jerasure_schedule_encode(k
, m
, w
, schedule
, data
,
364 coding
, blocksize
, packetsize
);
367 int ErasureCodeJerasureLiberation::jerasure_decode(int *erasures
,
372 return jerasure_schedule_decode_lazy(k
, m
, w
, bitmatrix
, erasures
, data
,
373 coding
, blocksize
, packetsize
, 1);
376 unsigned ErasureCodeJerasureLiberation::get_alignment() const
378 unsigned alignment
= k
*w
*packetsize
*sizeof(int);
379 if ( ((w
*packetsize
*sizeof(int))%LARGEST_VECTOR_WORDSIZE
) )
380 alignment
= k
*w
*packetsize
*LARGEST_VECTOR_WORDSIZE
;
384 bool ErasureCodeJerasureLiberation::check_k(ostream
*ss
) const
387 *ss
<< "k=" << k
<< " must be less than or equal to w=" << w
<< std::endl
;
394 bool ErasureCodeJerasureLiberation::check_w(ostream
*ss
) const
396 if (w
<= 2 || !is_prime(w
)) {
397 *ss
<< "w=" << w
<< " must be greater than two and be prime" << std::endl
;
404 bool ErasureCodeJerasureLiberation::check_packetsize_set(ostream
*ss
) const
406 if (packetsize
== 0) {
407 *ss
<< "packetsize=" << packetsize
<< " must be set" << std::endl
;
414 bool ErasureCodeJerasureLiberation::check_packetsize(ostream
*ss
) const
416 if ((packetsize
%(sizeof(int))) != 0) {
417 *ss
<< "packetsize=" << packetsize
418 << " must be a multiple of sizeof(int) = " << sizeof(int) << std::endl
;
425 int ErasureCodeJerasureLiberation::revert_to_default(ErasureCodeProfile
&profile
,
429 *ss
<< "reverting to k=" << DEFAULT_K
<< ", w="
430 << DEFAULT_W
<< ", packetsize=" << DEFAULT_PACKETSIZE
<< std::endl
;
431 profile
["k"] = DEFAULT_K
;
432 err
|= to_int("k", profile
, &k
, DEFAULT_K
, ss
);
433 profile
["w"] = DEFAULT_W
;
434 err
|= to_int("w", profile
, &w
, DEFAULT_W
, ss
);
435 profile
["packetsize"] = DEFAULT_PACKETSIZE
;
436 err
|= to_int("packetsize", profile
, &packetsize
, DEFAULT_PACKETSIZE
, ss
);
440 int ErasureCodeJerasureLiberation::parse(ErasureCodeProfile
&profile
,
443 int err
= ErasureCodeJerasure::parse(profile
, ss
);
444 err
|= to_int("packetsize", profile
, &packetsize
, DEFAULT_PACKETSIZE
, ss
);
451 if (!check_packetsize_set(ss
) || !check_packetsize(ss
))
454 revert_to_default(profile
, ss
);
460 void ErasureCodeJerasureLiberation::prepare()
462 bitmatrix
= liberation_coding_bitmatrix(k
, w
);
463 schedule
= jerasure_smart_bitmatrix_to_schedule(k
, m
, w
, bitmatrix
);
467 // ErasureCodeJerasureBlaumRoth
469 bool ErasureCodeJerasureBlaumRoth::check_w(ostream
*ss
) const
471 // back in Firefly, w = 7 was the default and produced useable
472 // chunks. Tolerate this value for backward compatibility.
475 if (w
<= 2 || !is_prime(w
+1)) {
476 *ss
<< "w=" << w
<< " must be greater than two and "
477 << "w+1 must be prime" << std::endl
;
484 void ErasureCodeJerasureBlaumRoth::prepare()
486 bitmatrix
= blaum_roth_coding_bitmatrix(k
, w
);
487 schedule
= jerasure_smart_bitmatrix_to_schedule(k
, m
, w
, bitmatrix
);
491 // ErasureCodeJerasureLiber8tion
493 int ErasureCodeJerasureLiber8tion::parse(ErasureCodeProfile
&profile
,
496 int err
= ErasureCodeJerasure::parse(profile
, ss
);
498 err
|= to_int("m", profile
, &m
, DEFAULT_M
, ss
);
500 err
|= to_int("w", profile
, &w
, DEFAULT_W
, ss
);
501 err
|= to_int("packetsize", profile
, &packetsize
, DEFAULT_PACKETSIZE
, ss
);
506 if (!check_packetsize_set(ss
))
509 revert_to_default(profile
, ss
);
515 void ErasureCodeJerasureLiber8tion::prepare()
517 bitmatrix
= liber8tion_coding_bitmatrix(k
);
518 schedule
= jerasure_smart_bitmatrix_to_schedule(k
, m
, w
, bitmatrix
);