]> git.proxmox.com Git - ceph.git/blame - ceph/src/erasure-code/isa/ErasureCodeIsa.cc
update sources to v12.1.1
[ceph.git] / ceph / src / erasure-code / isa / ErasureCodeIsa.cc
CommitLineData
7c673cae
FG
1/*
2 * Ceph - scalable distributed file system
3 *
4 * Copyright (C) 2014 CERN (Switzerland)
5 *
6 * Author: Andreas-Joachim Peters <Andreas.Joachim.Peters@cern.ch>
7 *
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 */
14
15// -----------------------------------------------------------------------------
16#include <algorithm>
7c673cae
FG
17#include <errno.h>
18// -----------------------------------------------------------------------------
19#include "common/debug.h"
20#include "ErasureCodeIsa.h"
21#include "xor_op.h"
224ce89b 22#include "include/assert.h"
31f18b77
FG
23using namespace std;
24
7c673cae
FG
25// -----------------------------------------------------------------------------
26extern "C" {
27#include "isa-l/include/erasure_code.h"
28}
29// -----------------------------------------------------------------------------
30#define dout_context g_ceph_context
31#define dout_subsys ceph_subsys_osd
32#undef dout_prefix
33#define dout_prefix _prefix(_dout)
34// -----------------------------------------------------------------------------
35
36// -----------------------------------------------------------------------------
37
38static ostream&
39_prefix(std::ostream* _dout)
40{
41 return *_dout << "ErasureCodeIsa: ";
42}
43// -----------------------------------------------------------------------------
44
45const std::string ErasureCodeIsaDefault::DEFAULT_K("7");
46const std::string ErasureCodeIsaDefault::DEFAULT_M("3");
47
7c673cae
FG
48
49// -----------------------------------------------------------------------------
50
51int
52ErasureCodeIsa::init(ErasureCodeProfile &profile, ostream *ss)
53{
54 int err = 0;
7c673cae
FG
55 err |= parse(profile, ss);
56 if (err)
57 return err;
58 prepare();
224ce89b 59 return ErasureCode::init(profile, ss);
7c673cae
FG
60}
61
62// -----------------------------------------------------------------------------
63
64unsigned int
65ErasureCodeIsa::get_chunk_size(unsigned int object_size) const
66{
67 unsigned alignment = get_alignment();
68 unsigned chunk_size = ( object_size + k - 1 ) / k;
69 dout(20) << "get_chunk_size: chunk_size " << chunk_size
70 << " must be modulo " << alignment << dendl;
71 unsigned modulo = chunk_size % alignment;
72 if (modulo) {
73 dout(10) << "get_chunk_size: " << chunk_size
74 << " padded to " << chunk_size + alignment - modulo << dendl;
75 chunk_size += alignment - modulo;
76 }
77 return chunk_size;
78}
79
80// -----------------------------------------------------------------------------
81
82int ErasureCodeIsa::encode_chunks(const set<int> &want_to_encode,
83 map<int, bufferlist> *encoded)
84{
85 char *chunks[k + m];
86 for (int i = 0; i < k + m; i++)
87 chunks[i] = (*encoded)[i].c_str();
88 isa_encode(&chunks[0], &chunks[k], (*encoded)[0].length());
89 return 0;
90}
91
92int ErasureCodeIsa::decode_chunks(const set<int> &want_to_read,
93 const map<int, bufferlist> &chunks,
94 map<int, bufferlist> *decoded)
95{
96 unsigned blocksize = (*chunks.begin()).second.length();
97 int erasures[k + m + 1];
98 int erasures_count = 0;
99 char *data[k];
100 char *coding[m];
101 for (int i = 0; i < k + m; i++) {
102 if (chunks.find(i) == chunks.end()) {
103 erasures[erasures_count] = i;
104 erasures_count++;
105 }
106 if (i < k)
107 data[i] = (*decoded)[i].c_str();
108 else
109 coding[i - k] = (*decoded)[i].c_str();
110 }
111 erasures[erasures_count] = -1;
112 assert(erasures_count > 0);
113 return isa_decode(erasures, data, coding, blocksize);
114}
115
116// -----------------------------------------------------------------------------
117
118void
119ErasureCodeIsaDefault::isa_encode(char **data,
120 char **coding,
121 int blocksize)
122{
123
124 if (m == 1)
125 // single parity stripe
126 region_xor((unsigned char**) data, (unsigned char*) coding[0], k, blocksize);
127 else
128 ec_encode_data(blocksize, k, m, encode_tbls,
129 (unsigned char**) data, (unsigned char**) coding);
130}
131
132// -----------------------------------------------------------------------------
133
134bool
135ErasureCodeIsaDefault::erasure_contains(int *erasures, int i)
136{
137 for (int l = 0; erasures[l] != -1; l++) {
138 if (erasures[l] == i)
139 return true;
140 }
141 return false;
142}
143
144// -----------------------------------------------------------------------------
145
146
147
148// -----------------------------------------------------------------------------
149
150int
151ErasureCodeIsaDefault::isa_decode(int *erasures,
152 char **data,
153 char **coding,
154 int blocksize)
155{
156 int nerrs = 0;
157 int i, r, s;
158
159 // count the errors
160 for (int l = 0; erasures[l] != -1; l++) {
161 nerrs++;
162 }
163
164 unsigned char *recover_source[k];
165 unsigned char *recover_target[m];
166
167 memset(recover_source, 0, sizeof (recover_source));
168 memset(recover_target, 0, sizeof (recover_target));
169
170 // ---------------------------------------------
171 // Assign source and target buffers
172 // ---------------------------------------------
173 for (i = 0, s = 0, r = 0; ((r < k) || (s < nerrs)) && (i < (k + m)); i++) {
174 if (!erasure_contains(erasures, i)) {
175 if (r < k) {
176 if (i < k) {
177 recover_source[r] = (unsigned char*) data[i];
178 } else {
179 recover_source[r] = (unsigned char*) coding[i - k];
180 }
181 r++;
182 }
183 } else {
184 if (s < m) {
185 if (i < k) {
186 recover_target[s] = (unsigned char*) data[i];
187 } else {
188 recover_target[s] = (unsigned char*) coding[i - k];
189 }
190 s++;
191 }
192 }
193 }
194
195 if (m == 1) {
196 // single parity decoding
197 assert(1 == nerrs);
198 dout(20) << "isa_decode: reconstruct using region xor [" <<
199 erasures[0] << "]" << dendl;
200 region_xor(recover_source, recover_target[0], k, blocksize);
201 return 0;
202 }
203
204
205 if ((matrixtype == kVandermonde) &&
206 (nerrs == 1) &&
207 (erasures[0] < (k + 1))) {
208 // use xor decoding if a data chunk is missing or the first coding chunk
209 dout(20) << "isa_decode: reconstruct using region xor [" <<
210 erasures[0] << "]" << dendl;
211 assert(1 == s);
212 assert(k == r);
213 region_xor(recover_source, recover_target[0], k, blocksize);
214 return 0;
215 }
216
217 unsigned char d[k * (m + k)];
218 unsigned char decode_tbls[k * (m + k)*32];
219 unsigned char *p_tbls = decode_tbls;
220
221 int decode_index[k];
222
223 if (nerrs > m)
224 return -1;
225
226 std::string erasure_signature; // describes a matrix configuration for caching
227
228 // ---------------------------------------------
229 // Construct b by removing error rows
230 // ---------------------------------------------
231
232 for (i = 0, r = 0; i < k; i++, r++) {
233 char id[128];
234 while (erasure_contains(erasures, r))
235 r++;
236
237 decode_index[i] = r;
238
239 snprintf(id, sizeof (id), "+%d", r);
240 erasure_signature += id;
241 }
242
243 for (int p = 0; p < nerrs; p++) {
244 char id[128];
245 snprintf(id, sizeof (id), "-%d", erasures[p]);
246 erasure_signature += id;
247 }
248
249 // ---------------------------------------------
250 // Try to get an already computed matrix
251 // ---------------------------------------------
252 if (!tcache.getDecodingTableFromCache(erasure_signature, p_tbls, matrixtype, k, m)) {
253 int j;
254 unsigned char b[k * (m + k)];
255 unsigned char c[k * (m + k)];
256
257 for (i = 0; i < k; i++) {
258 r = decode_index[i];
259 for (j = 0; j < k; j++)
260 b[k * i + j] = encode_coeff[k * r + j];
261 }
262 // ---------------------------------------------
263 // Compute inverted matrix
264 // ---------------------------------------------
265
266 // --------------------------------------------------------
267 // Remark: this may fail for certain Vandermonde matrices !
268 // There is an advanced way trying to use different
269 // source chunks to get an invertible matrix, however
270 // there are also (k,m) combinations which cannot be
271 // inverted when m chunks are lost and this optimizations
272 // does not help. Therefor we keep the code simpler.
273 // --------------------------------------------------------
274 if (gf_invert_matrix(b, d, k) < 0) {
275 dout(0) << "isa_decode: bad matrix" << dendl;
276 return -1;
277 }
278
279 for (int p = 0; p < nerrs; p++) {
280 if (erasures[p] < k) {
281 // decoding matrix elements for data chunks
282 for (j = 0; j < k; j++) {
283 c[k * p + j] = d[k * erasures[p] + j];
284 }
285 } else {
286 // decoding matrix element for coding chunks
287 for (i = 0; i < k; i++) {
288 int s = 0;
289 for (j = 0; j < k; j++)
290 s ^= gf_mul(d[j * k + i],
291 encode_coeff[k * erasures[p] + j]);
292
293 c[k * p + i] = s;
294 }
295 }
296 }
297
298 // ---------------------------------------------
299 // Initialize Decoding Table
300 // ---------------------------------------------
301 ec_init_tables(k, nerrs, c, decode_tbls);
302 tcache.putDecodingTableToCache(erasure_signature, p_tbls, matrixtype, k, m);
303 }
304 // Recover data sources
305 ec_encode_data(blocksize,
306 k, nerrs, decode_tbls, recover_source, recover_target);
307
308
309 return 0;
310}
311
312// -----------------------------------------------------------------------------
313
314unsigned
315ErasureCodeIsaDefault::get_alignment() const
316{
317 return EC_ISA_ADDRESS_ALIGNMENT;
318}
319
320// -----------------------------------------------------------------------------
321
322int ErasureCodeIsaDefault::parse(ErasureCodeProfile &profile,
323 ostream *ss)
324{
325 int err = ErasureCode::parse(profile, ss);
326 err |= to_int("k", profile, &k, DEFAULT_K, ss);
327 err |= to_int("m", profile, &m, DEFAULT_M, ss);
328 err |= sanity_check_k(k, ss);
329
330 if (matrixtype == kVandermonde) {
331 // these are verified safe values evaluated using the
332 // benchmarktool and 10*(combinatoric for maximum loss) random
333 // full erasures
334 if (k > 32) {
335 *ss << "Vandermonde: m=" << m
336 << " should be less/equal than 32 : revert to k=32" << std::endl;
337 k = 32;
338 err = -EINVAL;
339 }
340
341 if (m > 4) {
342 *ss << "Vandermonde: m=" << m
343 << " should be less than 5 to guarantee an MDS codec:"
344 << " revert to m=4" << std::endl;
345 m = 4;
346 err = -EINVAL;
347 }
348 switch (m) {
349 case 4:
350 if (k > 21) {
351 *ss << "Vandermonde: k=" << k
352 << " should be less than 22 to guarantee an MDS"
353 << " codec with m=4: revert to k=21" << std::endl;
354 k = 21;
355 err = -EINVAL;
356 }
357 break;
358 default:
359 ;
360 }
361 }
362 return err;
363}
364
365// -----------------------------------------------------------------------------
366
367void
368ErasureCodeIsaDefault::prepare()
369{
370 // setup shared encoding table and coefficients
371 unsigned char** p_enc_table =
372 tcache.getEncodingTable(matrixtype, k, m);
373
374 unsigned char** p_enc_coeff =
375 tcache.getEncodingCoefficient(matrixtype, k, m);
376
377 if (!*p_enc_coeff) {
378 dout(10) << "[ cache tables ] creating coeff for k=" <<
379 k << " m=" << m << dendl;
380 // build encoding coefficients which need to be computed once for each (k,m)
381 encode_coeff = (unsigned char*) malloc(k * (m + k));
382
383 if (matrixtype == kVandermonde)
384 gf_gen_rs_matrix(encode_coeff, k + m, k);
385 if (matrixtype == kCauchy)
386 gf_gen_cauchy1_matrix(encode_coeff, k + m, k);
387
388 // either our new created coefficients are stored or if they have been
389 // created in the meanwhile the locally allocated coefficients will be
390 // freed by setEncodingCoefficient
391 encode_coeff = tcache.setEncodingCoefficient(matrixtype, k, m, encode_coeff);
392 } else {
393 encode_coeff = *p_enc_coeff;
394 }
395
396 if (!*p_enc_table) {
397 dout(10) << "[ cache tables ] creating tables for k=" <<
398 k << " m=" << m << dendl;
399 // build encoding table which needs to be computed once for each (k,m)
400 encode_tbls = (unsigned char*) malloc(k * (m + k)*32);
401 ec_init_tables(k, m, &encode_coeff[k * k], encode_tbls);
402
403 // either our new created table is stored or if it has been
404 // created in the meanwhile the locally allocated table will be
405 // freed by setEncodingTable
406 encode_tbls = tcache.setEncodingTable(matrixtype, k, m, encode_tbls);
407 } else {
408 encode_tbls = *p_enc_table;
409 }
410
411 unsigned memory_lru_cache =
412 k * (m + k) * 32 * tcache.decoding_tables_lru_length;
413
414 dout(10) << "[ cache memory ] = " << memory_lru_cache << " bytes" <<
415 " [ matrix ] = " <<
416 ((matrixtype == kVandermonde) ? "Vandermonde" : "Cauchy") << dendl;
417
418 assert((matrixtype == kVandermonde) || (matrixtype == kCauchy));
419
420}
421// -----------------------------------------------------------------------------